Coverage Report

Created: 2026-01-25 06:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/systemd/src/libsystemd/sd-event/sd-event.c
Line
Count
Source
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3
#include <linux/magic.h>
4
#include <malloc.h>
5
#include <stdlib.h>
6
#include <sys/timerfd.h>
7
#include <sys/wait.h>
8
#include <threads.h>
9
#include <unistd.h>
10
11
#include "sd-daemon.h"
12
#include "sd-event.h"
13
#include "sd-id128.h"
14
#include "sd-messages.h"
15
16
#include "alloc-util.h"
17
#include "errno-util.h"
18
#include "event-source.h"
19
#include "fd-util.h"
20
#include "format-util.h"
21
#include "glyph-util.h"
22
#include "hashmap.h"
23
#include "hexdecoct.h"
24
#include "list.h"
25
#include "log.h"
26
#include "logarithm.h"
27
#include "memory-util.h"
28
#include "origin-id.h"
29
#include "path-util.h"
30
#include "pidfd-util.h"
31
#include "prioq.h"
32
#include "process-util.h"
33
#include "psi-util.h"
34
#include "set.h"
35
#include "signal-util.h"
36
#include "siphash24.h"
37
#include "socket-util.h"
38
#include "stat-util.h"
39
#include "string-table.h"
40
#include "string-util.h"
41
#include "strv.h"
42
#include "strxcpyx.h"
43
#include "time-util.h"
44
45
28.1k
#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
46
47
0
static bool EVENT_SOURCE_WATCH_PIDFD(const sd_event_source *s) {
48
        /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
49
0
        return s &&
50
0
                s->type == SOURCE_CHILD &&
51
0
                (s->child.options & ~WNOWAIT) == WEXITED;
52
0
}
53
54
1.93M
static bool event_source_is_online(sd_event_source *s) {
55
1.93M
        assert(s);
56
1.93M
        return s->enabled != SD_EVENT_OFF && !s->ratelimited;
57
1.93M
}
58
59
16.6M
static bool event_source_is_offline(sd_event_source *s) {
60
16.6M
        assert(s);
61
16.6M
        return s->enabled == SD_EVENT_OFF || s->ratelimited;
62
16.6M
}
63
64
static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
65
        [SOURCE_IO]                  = "io",
66
        [SOURCE_TIME_REALTIME]       = "realtime",
67
        [SOURCE_TIME_BOOTTIME]       = "boottime",
68
        [SOURCE_TIME_MONOTONIC]      = "monotonic",
69
        [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
70
        [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
71
        [SOURCE_SIGNAL]              = "signal",
72
        [SOURCE_CHILD]               = "child",
73
        [SOURCE_DEFER]               = "defer",
74
        [SOURCE_POST]                = "post",
75
        [SOURCE_EXIT]                = "exit",
76
        [SOURCE_WATCHDOG]            = "watchdog",
77
        [SOURCE_INOTIFY]             = "inotify",
78
        [SOURCE_MEMORY_PRESSURE]     = "memory-pressure",
79
};
80
81
DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
82
83
#define EVENT_SOURCE_IS_TIME(t)                 \
84
15.6M
        IN_SET((t),                             \
85
15.6M
               SOURCE_TIME_REALTIME,            \
86
15.6M
               SOURCE_TIME_BOOTTIME,            \
87
15.6M
               SOURCE_TIME_MONOTONIC,           \
88
15.6M
               SOURCE_TIME_REALTIME_ALARM,      \
89
15.6M
               SOURCE_TIME_BOOTTIME_ALARM)
90
91
#define EVENT_SOURCE_CAN_RATE_LIMIT(t)          \
92
0
        IN_SET((t),                             \
93
               SOURCE_IO,                       \
94
               SOURCE_TIME_REALTIME,            \
95
               SOURCE_TIME_BOOTTIME,            \
96
               SOURCE_TIME_MONOTONIC,           \
97
               SOURCE_TIME_REALTIME_ALARM,      \
98
               SOURCE_TIME_BOOTTIME_ALARM,      \
99
               SOURCE_SIGNAL,                   \
100
               SOURCE_DEFER,                    \
101
               SOURCE_INOTIFY,                  \
102
               SOURCE_MEMORY_PRESSURE)
103
104
/* This is used to assert that we didn't pass an unexpected source type to event_source_time_prioq_put().
105
 * Time sources and ratelimited sources can be passed, so effectively this is the same as the
106
 * EVENT_SOURCE_CAN_RATE_LIMIT() macro. */
107
#define EVENT_SOURCE_USES_TIME_PRIOQ(t) EVENT_SOURCE_CAN_RATE_LIMIT(t)
108
109
struct sd_event {
110
        unsigned n_ref;
111
112
        int epoll_fd;
113
        int watchdog_fd;
114
115
        Prioq *pending;
116
        Prioq *prepare;
117
118
        /* timerfd_create() only supports these five clocks so far. We
119
         * can add support for more clocks when the kernel learns to
120
         * deal with them, too. */
121
        struct clock_data realtime;
122
        struct clock_data boottime;
123
        struct clock_data monotonic;
124
        struct clock_data realtime_alarm;
125
        struct clock_data boottime_alarm;
126
127
        usec_t perturb;
128
129
        sd_event_source **signal_sources; /* indexed by signal number */
130
        Hashmap *signal_data; /* indexed by priority */
131
132
        Hashmap *child_sources;
133
        unsigned n_online_child_sources;
134
135
        Set *post_sources;
136
137
        Prioq *exit;
138
139
        Hashmap *inotify_data; /* indexed by priority */
140
141
        /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
142
        LIST_HEAD(InodeData, inode_data_to_close_list);
143
144
        /* A list of inotify objects that already have events buffered which aren't processed yet */
145
        LIST_HEAD(InotifyData, buffered_inotify_data_list);
146
147
        /* A list of memory pressure event sources that still need their subscription string written */
148
        LIST_HEAD(sd_event_source, memory_pressure_write_list);
149
150
        uint64_t origin_id;
151
152
        uint64_t iteration;
153
        triple_timestamp timestamp;
154
        int state;
155
156
        bool exit_requested:1;
157
        bool need_process_child:1;
158
        bool watchdog:1;
159
        bool profile_delays:1;
160
        bool exit_on_idle:1;
161
162
        int exit_code;
163
164
        pid_t tid;
165
        sd_event **default_event_ptr;
166
167
        usec_t watchdog_last, watchdog_period;
168
169
        unsigned n_sources;
170
171
        struct epoll_event *event_queue;
172
173
        LIST_HEAD(sd_event_source, sources);
174
175
        sd_event_source *sigint_event_source, *sigterm_event_source;
176
177
        usec_t last_run_usec, last_log_usec;
178
        unsigned delays[sizeof(usec_t) * 8];
179
};
180
181
417M
DEFINE_PRIVATE_ORIGIN_ID_HELPERS(sd_event, event);
sd-event.c:origin_id_query
Line
Count
Source
181
DEFINE_PRIVATE_ORIGIN_ID_HELPERS(sd_event, event);
sd-event.c:event_origin_changed
Line
Count
Source
181
DEFINE_PRIVATE_ORIGIN_ID_HELPERS(sd_event, event);
182
417M
183
417M
static thread_local sd_event *default_event = NULL;
184
417M
185
417M
static void source_disconnect(sd_event_source *s);
186
417M
static void event_gc_inode_data(sd_event *e, InodeData *d);
187
417M
188
417M
static sd_event* event_resolve(sd_event *e) {
189
14.9M
        return e == SD_EVENT_DEFAULT ? default_event : e;
190
14.9M
}
191
192
16.5M
static int pending_prioq_compare(const void *a, const void *b) {
193
16.5M
        const sd_event_source *x = a, *y = b;
194
16.5M
        int r;
195
196
16.5M
        assert(x->pending);
197
16.5M
        assert(y->pending);
198
199
        /* Enabled ones first */
200
16.5M
        r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
201
16.5M
        if (r != 0)
202
3.39M
                return r;
203
204
        /* Non rate-limited ones first. */
205
13.1M
        r = CMP(!!x->ratelimited, !!y->ratelimited);
206
13.1M
        if (r != 0)
207
0
                return r;
208
209
        /* Lower priority values first */
210
13.1M
        r = CMP(x->priority, y->priority);
211
13.1M
        if (r != 0)
212
5.06M
                return r;
213
214
        /* Older entries first */
215
8.07M
        return CMP(x->pending_iteration, y->pending_iteration);
216
13.1M
}
217
218
4.49M
static int prepare_prioq_compare(const void *a, const void *b) {
219
4.49M
        const sd_event_source *x = a, *y = b;
220
4.49M
        int r;
221
222
4.49M
        assert(x->prepare);
223
4.49M
        assert(y->prepare);
224
225
        /* Enabled ones first */
226
4.49M
        r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
227
4.49M
        if (r != 0)
228
6.53k
                return r;
229
230
        /* Non rate-limited ones first. */
231
4.48M
        r = CMP(!!x->ratelimited, !!y->ratelimited);
232
4.48M
        if (r != 0)
233
0
                return r;
234
235
        /* Move most recently prepared ones last, so that we can stop
236
         * preparing as soon as we hit one that has already been
237
         * prepared in the current iteration */
238
4.48M
        r = CMP(x->prepare_iteration, y->prepare_iteration);
239
4.48M
        if (r != 0)
240
2.98M
                return r;
241
242
        /* Lower priority values first */
243
1.50M
        return CMP(x->priority, y->priority);
244
4.48M
}
245
246
6.65M
static usec_t time_event_source_next(const sd_event_source *s) {
247
6.65M
        assert(s);
248
249
        /* We have two kinds of event sources that have elapsation times associated with them: the actual
250
         * time based ones and the ones for which a ratelimit can be in effect (where we want to be notified
251
         * once the ratelimit time window ends). Let's return the next elapsing time depending on what we are
252
         * looking at here. */
253
254
6.65M
        if (s->ratelimited) { /* If rate-limited the next elapsation is when the ratelimit time window ends */
255
0
                assert(s->rate_limit.begin != 0);
256
0
                assert(s->rate_limit.interval != 0);
257
0
                return usec_add(s->rate_limit.begin, s->rate_limit.interval);
258
0
        }
259
260
        /* Otherwise this must be a time event source, if not ratelimited */
261
6.65M
        if (EVENT_SOURCE_IS_TIME(s->type))
262
6.65M
                return s->time.next;
263
264
0
        return USEC_INFINITY;
265
6.65M
}
266
267
1.56M
static usec_t time_event_source_latest(const sd_event_source *s) {
268
1.56M
        assert(s);
269
270
1.56M
        if (s->ratelimited) { /* For ratelimited stuff the earliest and the latest time shall actually be the
271
                               * same, as we should avoid adding additional inaccuracy on an inaccuracy time
272
                               * window */
273
0
                assert(s->rate_limit.begin != 0);
274
0
                assert(s->rate_limit.interval != 0);
275
0
                return usec_add(s->rate_limit.begin, s->rate_limit.interval);
276
0
        }
277
278
        /* Must be a time event source, if not ratelimited */
279
1.56M
        if (EVENT_SOURCE_IS_TIME(s->type))
280
1.56M
                return usec_add(s->time.next, s->time.accuracy);
281
282
0
        return USEC_INFINITY;
283
1.56M
}
284
285
108k
static bool event_source_timer_candidate(const sd_event_source *s) {
286
108k
        assert(s);
287
288
        /* Returns true for event sources that either are not pending yet (i.e. where it's worth to mark them pending)
289
         * or which are currently ratelimited (i.e. where it's worth leaving the ratelimited state) */
290
108k
        return !s->pending || s->ratelimited;
291
108k
}
292
293
3.09M
static int time_prioq_compare(const void *a, const void *b, usec_t (*time_func)(const sd_event_source *s)) {
294
3.09M
        const sd_event_source *x = a, *y = b;
295
3.09M
        int r;
296
297
        /* Enabled ones first */
298
3.09M
        r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
299
3.09M
        if (r != 0)
300
3.03M
                return r;
301
302
        /* Order "non-pending OR ratelimited" before "pending AND not-ratelimited" */
303
54.2k
        r = CMP(!event_source_timer_candidate(x), !event_source_timer_candidate(y));
304
54.2k
        if (r != 0)
305
0
                return r;
306
307
        /* Order by time */
308
54.2k
        return CMP(time_func(x), time_func(y));
309
54.2k
}
310
311
1.54M
static int earliest_time_prioq_compare(const void *a, const void *b) {
312
1.54M
        return time_prioq_compare(a, b, time_event_source_next);
313
1.54M
}
314
315
1.54M
static int latest_time_prioq_compare(const void *a, const void *b) {
316
1.54M
        return time_prioq_compare(a, b, time_event_source_latest);
317
1.54M
}
318
319
13.7k
static int exit_prioq_compare(const void *a, const void *b) {
320
13.7k
        const sd_event_source *x = a, *y = b;
321
13.7k
        int r;
322
323
13.7k
        assert(x->type == SOURCE_EXIT);
324
13.7k
        assert(y->type == SOURCE_EXIT);
325
326
        /* Enabled ones first */
327
13.7k
        r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
328
13.7k
        if (r != 0)
329
8.50k
                return r;
330
331
        /* Lower priority values first */
332
5.25k
        return CMP(x->priority, y->priority);
333
13.7k
}
334
335
369k
static void free_clock_data(struct clock_data *d) {
336
369k
        assert(d);
337
369k
        assert(d->wakeup == WAKEUP_CLOCK_DATA);
338
339
369k
        safe_close(d->fd);
340
369k
        prioq_free(d->earliest);
341
369k
        prioq_free(d->latest);
342
369k
}
343
344
73.8k
static sd_event* event_free(sd_event *e) {
345
73.8k
        sd_event_source *s;
346
347
73.8k
        assert(e);
348
349
73.8k
        e->sigterm_event_source = sd_event_source_unref(e->sigterm_event_source);
350
73.8k
        e->sigint_event_source = sd_event_source_unref(e->sigint_event_source);
351
352
73.8k
        while ((s = e->sources)) {
353
0
                assert(s->floating);
354
0
                source_disconnect(s);
355
0
                sd_event_source_unref(s);
356
0
        }
357
358
73.8k
        assert(e->n_sources == 0);
359
360
73.8k
        if (e->default_event_ptr)
361
60.1k
                *(e->default_event_ptr) = NULL;
362
363
73.8k
        safe_close(e->epoll_fd);
364
73.8k
        safe_close(e->watchdog_fd);
365
366
73.8k
        free_clock_data(&e->realtime);
367
73.8k
        free_clock_data(&e->boottime);
368
73.8k
        free_clock_data(&e->monotonic);
369
73.8k
        free_clock_data(&e->realtime_alarm);
370
73.8k
        free_clock_data(&e->boottime_alarm);
371
372
73.8k
        prioq_free(e->pending);
373
73.8k
        prioq_free(e->prepare);
374
73.8k
        prioq_free(e->exit);
375
376
73.8k
        free(e->signal_sources);
377
73.8k
        hashmap_free(e->signal_data);
378
379
73.8k
        hashmap_free(e->inotify_data);
380
381
73.8k
        hashmap_free(e->child_sources);
382
73.8k
        set_free(e->post_sources);
383
384
73.8k
        free(e->event_queue);
385
386
73.8k
        return mfree(e);
387
73.8k
}
388
389
73.8k
_public_ int sd_event_new(sd_event** ret) {
390
73.8k
        sd_event *e;
391
73.8k
        int r;
392
393
73.8k
        assert_return(ret, -EINVAL);
394
395
73.8k
        e = new(sd_event, 1);
396
73.8k
        if (!e)
397
0
                return -ENOMEM;
398
399
73.8k
        *e = (sd_event) {
400
73.8k
                .n_ref = 1,
401
73.8k
                .epoll_fd = -EBADF,
402
73.8k
                .watchdog_fd = -EBADF,
403
73.8k
                .realtime.wakeup = WAKEUP_CLOCK_DATA,
404
73.8k
                .realtime.fd = -EBADF,
405
73.8k
                .realtime.next = USEC_INFINITY,
406
73.8k
                .boottime.wakeup = WAKEUP_CLOCK_DATA,
407
73.8k
                .boottime.fd = -EBADF,
408
73.8k
                .boottime.next = USEC_INFINITY,
409
73.8k
                .monotonic.wakeup = WAKEUP_CLOCK_DATA,
410
73.8k
                .monotonic.fd = -EBADF,
411
73.8k
                .monotonic.next = USEC_INFINITY,
412
73.8k
                .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
413
73.8k
                .realtime_alarm.fd = -EBADF,
414
73.8k
                .realtime_alarm.next = USEC_INFINITY,
415
73.8k
                .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
416
73.8k
                .boottime_alarm.fd = -EBADF,
417
73.8k
                .boottime_alarm.next = USEC_INFINITY,
418
73.8k
                .perturb = USEC_INFINITY,
419
73.8k
                .origin_id = origin_id_query(),
420
73.8k
        };
421
422
73.8k
        r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
423
73.8k
        if (r < 0)
424
0
                goto fail;
425
426
73.8k
        e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
427
73.8k
        if (e->epoll_fd < 0) {
428
0
                r = -errno;
429
0
                goto fail;
430
0
        }
431
432
73.8k
        e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
433
434
73.8k
        if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
435
0
                log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 %s 2^63 us will be logged every 5s.",
436
0
                          glyph(GLYPH_ELLIPSIS));
437
0
                e->profile_delays = true;
438
0
        }
439
440
73.8k
        *ret = e;
441
73.8k
        return 0;
442
443
0
fail:
444
0
        event_free(e);
445
0
        return r;
446
73.8k
}
447
448
/* Define manually so we can add the origin check */
449
14.9M
_public_ sd_event* sd_event_ref(sd_event *e) {
450
14.9M
        if (!e)
451
0
                return NULL;
452
14.9M
        if (event_origin_changed(e))
453
0
                return NULL;
454
455
14.9M
        e->n_ref++;
456
457
14.9M
        return e;
458
14.9M
}
459
460
15.1M
_public_ sd_event* sd_event_unref(sd_event *e) {
461
15.1M
        if (!e)
462
164k
                return NULL;
463
14.9M
        if (event_origin_changed(e))
464
0
                return NULL;
465
466
14.9M
        assert(e->n_ref > 0);
467
14.9M
        if (--e->n_ref > 0)
468
14.9M
                return NULL;
469
470
73.8k
        return event_free(e);
471
14.9M
}
472
473
#define PROTECT_EVENT(e)                                                \
474
14.7M
        _unused_ _cleanup_(sd_event_unrefp) sd_event *_ref = sd_event_ref(e);
475
476
949k
_public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
477
949k
        int r;
478
479
949k
        r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
480
949k
        if (r < 0)
481
949k
                log_debug_errno(r, "Failed to disable event source %p (%s): %m",
482
949k
                                s, strna(s->description));
483
484
949k
        return sd_event_source_unref(s);
485
949k
}
486
487
91.1k
static void source_io_unregister(sd_event_source *s) {
488
91.1k
        assert(s);
489
91.1k
        assert(s->type == SOURCE_IO);
490
491
91.1k
        if (event_origin_changed(s->event))
492
0
                return;
493
494
91.1k
        if (!s->io.registered)
495
47.5k
                return;
496
497
43.6k
        if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
498
43.6k
                log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
499
43.6k
                                strna(s->description), event_source_type_to_string(s->type));
500
501
43.6k
        s->io.registered = false;
502
43.6k
}
503
504
static int source_io_register(
505
                sd_event_source *s,
506
                int enabled,
507
1.51M
                uint32_t events) {
508
509
1.51M
        assert(s);
510
1.51M
        assert(s->type == SOURCE_IO);
511
1.51M
        assert(enabled != SD_EVENT_OFF);
512
513
1.51M
        struct epoll_event ev = {
514
1.51M
                .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
515
1.51M
                .data.ptr = s,
516
1.51M
        };
517
518
1.51M
        if (epoll_ctl(s->event->epoll_fd,
519
1.51M
                      s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
520
1.51M
                      s->io.fd, &ev) < 0)
521
5.81k
                return -errno;
522
523
1.50M
        s->io.registered = true;
524
525
1.50M
        return 0;
526
1.51M
}
527
528
0
static void source_child_pidfd_unregister(sd_event_source *s) {
529
0
        assert(s);
530
0
        assert(s->type == SOURCE_CHILD);
531
532
0
        if (event_origin_changed(s->event))
533
0
                return;
534
535
0
        if (!s->child.registered)
536
0
                return;
537
538
0
        if (EVENT_SOURCE_WATCH_PIDFD(s))
539
0
                if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
540
0
                        log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
541
0
                                        strna(s->description), event_source_type_to_string(s->type));
542
543
0
        s->child.registered = false;
544
0
}
545
546
0
static int source_child_pidfd_register(sd_event_source *s, int enabled) {
547
0
        assert(s);
548
0
        assert(s->type == SOURCE_CHILD);
549
0
        assert(enabled != SD_EVENT_OFF);
550
551
0
        if (EVENT_SOURCE_WATCH_PIDFD(s)) {
552
0
                struct epoll_event ev = {
553
0
                        .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
554
0
                        .data.ptr = s,
555
0
                };
556
557
0
                if (epoll_ctl(s->event->epoll_fd,
558
0
                              s->child.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
559
0
                              s->child.pidfd, &ev) < 0)
560
0
                        return -errno;
561
0
        }
562
563
0
        s->child.registered = true;
564
0
        return 0;
565
0
}
566
567
0
static void source_memory_pressure_unregister(sd_event_source *s) {
568
0
        assert(s);
569
0
        assert(s->type == SOURCE_MEMORY_PRESSURE);
570
571
0
        if (event_origin_changed(s->event))
572
0
                return;
573
574
0
        if (!s->memory_pressure.registered)
575
0
                return;
576
577
0
        if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->memory_pressure.fd, NULL) < 0)
578
0
                log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
579
0
                                strna(s->description), event_source_type_to_string(s->type));
580
581
0
        s->memory_pressure.registered = false;
582
0
}
583
584
0
static int source_memory_pressure_register(sd_event_source *s, int enabled) {
585
0
        assert(s);
586
0
        assert(s->type == SOURCE_MEMORY_PRESSURE);
587
0
        assert(enabled != SD_EVENT_OFF);
588
589
0
        struct epoll_event ev = {
590
0
                .events = s->memory_pressure.write_buffer_size > 0 ? EPOLLOUT :
591
0
                          (s->memory_pressure.events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0)),
592
0
                .data.ptr = s,
593
0
        };
594
595
0
        if (epoll_ctl(s->event->epoll_fd,
596
0
                      s->memory_pressure.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
597
0
                      s->memory_pressure.fd, &ev) < 0)
598
0
                return -errno;
599
600
0
        s->memory_pressure.registered = true;
601
0
        return 0;
602
0
}
603
604
0
static void source_memory_pressure_add_to_write_list(sd_event_source *s) {
605
0
        assert(s);
606
0
        assert(s->type == SOURCE_MEMORY_PRESSURE);
607
608
0
        if (s->memory_pressure.in_write_list)
609
0
                return;
610
611
0
        LIST_PREPEND(memory_pressure.write_list, s->event->memory_pressure_write_list, s);
612
0
        s->memory_pressure.in_write_list = true;
613
0
}
614
615
0
static void source_memory_pressure_remove_from_write_list(sd_event_source *s) {
616
0
        assert(s);
617
0
        assert(s->type == SOURCE_MEMORY_PRESSURE);
618
619
0
        if (!s->memory_pressure.in_write_list)
620
0
                return;
621
622
0
        LIST_REMOVE(memory_pressure.write_list, s->event->memory_pressure_write_list, s);
623
0
        s->memory_pressure.in_write_list = false;
624
0
}
625
626
8.04k
static clockid_t event_source_type_to_clock(EventSourceType t) {
627
628
8.04k
        switch (t) {
629
630
0
        case SOURCE_TIME_REALTIME:
631
0
                return CLOCK_REALTIME;
632
633
8.04k
        case SOURCE_TIME_BOOTTIME:
634
8.04k
                return CLOCK_BOOTTIME;
635
636
0
        case SOURCE_TIME_MONOTONIC:
637
0
                return CLOCK_MONOTONIC;
638
639
0
        case SOURCE_TIME_REALTIME_ALARM:
640
0
                return CLOCK_REALTIME_ALARM;
641
642
0
        case SOURCE_TIME_BOOTTIME_ALARM:
643
0
                return CLOCK_BOOTTIME_ALARM;
644
645
0
        default:
646
0
                return (clockid_t) -1;
647
8.04k
        }
648
8.04k
}
649
650
28.1k
static EventSourceType clock_to_event_source_type(clockid_t clock) {
651
652
28.1k
        switch (clock) {
653
654
0
        case CLOCK_REALTIME:
655
0
                return SOURCE_TIME_REALTIME;
656
657
17.6k
        case CLOCK_BOOTTIME:
658
17.6k
                return SOURCE_TIME_BOOTTIME;
659
660
10.5k
        case CLOCK_MONOTONIC:
661
10.5k
                return SOURCE_TIME_MONOTONIC;
662
663
0
        case CLOCK_REALTIME_ALARM:
664
0
                return SOURCE_TIME_REALTIME_ALARM;
665
666
0
        case CLOCK_BOOTTIME_ALARM:
667
0
                return SOURCE_TIME_BOOTTIME_ALARM;
668
669
0
        default:
670
0
                return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
671
28.1k
        }
672
28.1k
}
673
674
1.66M
static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
675
1.66M
        assert(e);
676
677
1.66M
        switch (t) {
678
679
0
        case SOURCE_TIME_REALTIME:
680
0
                return &e->realtime;
681
682
130k
        case SOURCE_TIME_BOOTTIME:
683
130k
                return &e->boottime;
684
685
1.53M
        case SOURCE_TIME_MONOTONIC:
686
1.53M
                return &e->monotonic;
687
688
0
        case SOURCE_TIME_REALTIME_ALARM:
689
0
                return &e->realtime_alarm;
690
691
0
        case SOURCE_TIME_BOOTTIME_ALARM:
692
0
                return &e->boottime_alarm;
693
694
0
        default:
695
0
                return NULL;
696
1.66M
        }
697
1.66M
}
698
699
0
static void event_free_signal_data(sd_event *e, struct signal_data *d) {
700
0
        assert(e);
701
702
0
        if (!d)
703
0
                return;
704
705
0
        hashmap_remove(e->signal_data, &d->priority);
706
0
        safe_close(d->fd);
707
0
        free(d);
708
0
}
709
710
static int event_make_signal_data(
711
                sd_event *e,
712
                int sig,
713
0
                struct signal_data **ret) {
714
715
0
        struct signal_data *d;
716
0
        bool added = false;
717
0
        sigset_t ss_copy;
718
0
        int64_t priority;
719
0
        int r;
720
721
0
        assert(e);
722
723
0
        if (event_origin_changed(e))
724
0
                return -ECHILD;
725
726
0
        if (e->signal_sources && e->signal_sources[sig])
727
0
                priority = e->signal_sources[sig]->priority;
728
0
        else
729
0
                priority = SD_EVENT_PRIORITY_NORMAL;
730
731
0
        d = hashmap_get(e->signal_data, &priority);
732
0
        if (d) {
733
0
                if (sigismember(&d->sigset, sig) > 0) {
734
0
                        if (ret)
735
0
                                *ret = d;
736
0
                        return 0;
737
0
                }
738
0
        } else {
739
0
                d = new(struct signal_data, 1);
740
0
                if (!d)
741
0
                        return -ENOMEM;
742
743
0
                *d = (struct signal_data) {
744
0
                        .wakeup = WAKEUP_SIGNAL_DATA,
745
0
                        .fd = -EBADF,
746
0
                        .priority = priority,
747
0
                };
748
749
0
                r = hashmap_ensure_put(&e->signal_data, &uint64_hash_ops, &d->priority, d);
750
0
                if (r < 0) {
751
0
                        free(d);
752
0
                        return r;
753
0
                }
754
755
0
                added = true;
756
0
        }
757
758
0
        ss_copy = d->sigset;
759
0
        assert_se(sigaddset(&ss_copy, sig) >= 0);
760
761
0
        r = signalfd(d->fd >= 0 ? d->fd : -1,   /* the first arg must be -1 or a valid signalfd */
762
0
                     &ss_copy,
763
0
                     SFD_NONBLOCK|SFD_CLOEXEC);
764
0
        if (r < 0) {
765
0
                r = -errno;
766
0
                goto fail;
767
0
        }
768
769
0
        d->sigset = ss_copy;
770
771
0
        if (d->fd >= 0) {
772
0
                if (ret)
773
0
                        *ret = d;
774
0
                return 0;
775
0
        }
776
777
0
        d->fd = fd_move_above_stdio(r);
778
779
0
        struct epoll_event ev = {
780
0
                .events = EPOLLIN,
781
0
                .data.ptr = d,
782
0
        };
783
784
0
        if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
785
0
                r = -errno;
786
0
                goto fail;
787
0
        }
788
789
0
        if (ret)
790
0
                *ret = d;
791
792
0
        return 0;
793
794
0
fail:
795
0
        if (added)
796
0
                event_free_signal_data(e, d);
797
798
0
        return r;
799
0
}
800
801
0
static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
802
0
        assert(e);
803
0
        assert(d);
804
805
        /* Turns off the specified signal in the signal data
806
         * object. If the signal mask of the object becomes empty that
807
         * way removes it. */
808
809
0
        if (sigismember(&d->sigset, sig) == 0)
810
0
                return;
811
812
0
        assert_se(sigdelset(&d->sigset, sig) >= 0);
813
814
0
        if (sigisemptyset(&d->sigset)) {
815
                /* If all the mask is all-zero we can get rid of the structure */
816
0
                event_free_signal_data(e, d);
817
0
                return;
818
0
        }
819
820
0
        if (event_origin_changed(e))
821
0
                return;
822
823
0
        assert(d->fd >= 0);
824
825
0
        if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
826
0
                log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
827
0
}
828
829
0
static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
830
0
        struct signal_data *d;
831
0
        static const int64_t zero_priority = 0;
832
833
0
        assert(e);
834
835
        /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
836
         * and possibly drop the signalfd for it. */
837
838
0
        if (sig == SIGCHLD &&
839
0
            e->n_online_child_sources > 0)
840
0
                return;
841
842
0
        if (e->signal_sources &&
843
0
            e->signal_sources[sig] &&
844
0
            event_source_is_online(e->signal_sources[sig]))
845
0
                return;
846
847
        /*
848
         * The specified signal might be enabled in three different queues:
849
         *
850
         * 1) the one that belongs to the priority passed (if it is non-NULL)
851
         * 2) the one that belongs to the priority of the event source of the signal (if there is one)
852
         * 3) the 0 priority (to cover the SIGCHLD case)
853
         *
854
         * Hence, let's remove it from all three here.
855
         */
856
857
0
        if (priority) {
858
0
                d = hashmap_get(e->signal_data, priority);
859
0
                if (d)
860
0
                        event_unmask_signal_data(e, d, sig);
861
0
        }
862
863
0
        if (e->signal_sources && e->signal_sources[sig]) {
864
0
                d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
865
0
                if (d)
866
0
                        event_unmask_signal_data(e, d, sig);
867
0
        }
868
869
0
        d = hashmap_get(e->signal_data, &zero_priority);
870
0
        if (d)
871
0
                event_unmask_signal_data(e, d, sig);
872
0
}
873
874
2.69M
static void event_source_pp_prioq_reshuffle(sd_event_source *s) {
875
2.69M
        assert(s);
876
877
        /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
878
         * they are enabled/disabled or marked pending and such. */
879
880
2.69M
        if (s->pending)
881
2.59M
                prioq_reshuffle(s->event->pending, s, &s->pending_index);
882
883
2.69M
        if (s->prepare)
884
10.5k
                prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
885
2.69M
}
886
887
2.63M
static void event_source_time_prioq_reshuffle(sd_event_source *s) {
888
2.63M
        struct clock_data *d;
889
890
2.63M
        assert(s);
891
892
        /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
893
         * pending, enable state, and ratelimiting state. Makes sure the two prioq's are ordered
894
         * properly again. */
895
896
2.63M
        if (s->ratelimited)
897
0
                d = &s->event->monotonic;
898
2.63M
        else if (EVENT_SOURCE_IS_TIME(s->type))
899
2.63M
                assert_se(d = event_get_clock_data(s->event, s->type));
900
1.02M
        else
901
1.02M
                return; /* no-op for an event source which is neither a timer nor ratelimited. */
902
903
1.60M
        prioq_reshuffle(d->earliest, s, &s->earliest_index);
904
1.60M
        prioq_reshuffle(d->latest, s, &s->latest_index);
905
1.60M
        d->needs_rearm = true;
906
1.60M
}
907
908
static void event_source_time_prioq_remove(
909
                sd_event_source *s,
910
28.1k
                struct clock_data *d) {
911
912
28.1k
        assert(s);
913
28.1k
        assert(d);
914
915
28.1k
        prioq_remove(d->earliest, s, &s->earliest_index);
916
28.1k
        prioq_remove(d->latest, s, &s->latest_index);
917
28.1k
        s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
918
28.1k
        d->needs_rearm = true;
919
28.1k
}
920
921
160k
static void source_disconnect(sd_event_source *s) {
922
160k
        sd_event *event;
923
160k
        int r;
924
925
160k
        assert(s);
926
927
160k
        if (!s->event)
928
10.7k
                return;
929
930
160k
        assert(s->event->n_sources > 0);
931
932
149k
        switch (s->type) {
933
934
49.4k
        case SOURCE_IO:
935
49.4k
                if (s->io.fd >= 0)
936
49.4k
                        source_io_unregister(s);
937
938
49.4k
                break;
939
940
0
        case SOURCE_TIME_REALTIME:
941
17.6k
        case SOURCE_TIME_BOOTTIME:
942
28.1k
        case SOURCE_TIME_MONOTONIC:
943
28.1k
        case SOURCE_TIME_REALTIME_ALARM:
944
28.1k
        case SOURCE_TIME_BOOTTIME_ALARM:
945
                /* Only remove this event source from the time event source here if it is not ratelimited. If
946
                 * it is ratelimited, we'll remove it below, separately. Why? Because the clock used might
947
                 * differ: ratelimiting always uses CLOCK_MONOTONIC, but timer events might use any clock */
948
949
28.1k
                if (!s->ratelimited) {
950
28.1k
                        struct clock_data *d;
951
28.1k
                        assert_se(d = event_get_clock_data(s->event, s->type));
952
28.1k
                        event_source_time_prioq_remove(s, d);
953
28.1k
                }
954
955
28.1k
                break;
956
957
0
        case SOURCE_SIGNAL:
958
0
                if (s->signal.sig > 0) {
959
960
0
                        if (s->event->signal_sources)
961
0
                                s->event->signal_sources[s->signal.sig] = NULL;
962
963
0
                        event_gc_signal_data(s->event, &s->priority, s->signal.sig);
964
965
0
                        if (s->signal.unblock) {
966
0
                                sigset_t new_ss;
967
968
0
                                if (sigemptyset(&new_ss) < 0)
969
0
                                        log_debug_errno(errno, "Failed to reset signal set, ignoring: %m");
970
0
                                else if (sigaddset(&new_ss, s->signal.sig) < 0)
971
0
                                        log_debug_errno(errno, "Failed to add signal %i to signal mask, ignoring: %m", s->signal.sig);
972
0
                                else {
973
0
                                        r = pthread_sigmask(SIG_UNBLOCK, &new_ss, NULL);
974
0
                                        if (r != 0)
975
0
                                                log_debug_errno(r, "Failed to unblock signal %i, ignoring: %m", s->signal.sig);
976
0
                                }
977
0
                        }
978
0
                }
979
980
0
                break;
981
982
0
        case SOURCE_CHILD:
983
0
                if (event_origin_changed(s->event))
984
0
                        s->child.process_owned = false;
985
986
0
                if (s->child.pid > 0) {
987
0
                        if (event_source_is_online(s)) {
988
0
                                assert(s->event->n_online_child_sources > 0);
989
0
                                s->event->n_online_child_sources--;
990
0
                        }
991
992
0
                        assert_se(hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid)));
993
0
                }
994
995
0
                if (EVENT_SOURCE_WATCH_PIDFD(s))
996
0
                        source_child_pidfd_unregister(s);
997
0
                else
998
0
                        event_gc_signal_data(s->event, &s->priority, SIGCHLD);
999
1000
0
                break;
1001
1002
61.2k
        case SOURCE_DEFER:
1003
                /* nothing */
1004
61.2k
                break;
1005
1006
0
        case SOURCE_POST:
1007
0
                set_remove(s->event->post_sources, s);
1008
0
                break;
1009
1010
10.5k
        case SOURCE_EXIT:
1011
10.5k
                prioq_remove(s->event->exit, s, &s->exit.prioq_index);
1012
10.5k
                break;
1013
1014
0
        case SOURCE_INOTIFY: {
1015
0
                InodeData *inode_data;
1016
1017
0
                inode_data = s->inotify.inode_data;
1018
0
                if (inode_data) {
1019
0
                        InotifyData *inotify_data;
1020
0
                        assert_se(inotify_data = inode_data->inotify_data);
1021
1022
                        /* Detach this event source from the inode object */
1023
0
                        LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
1024
0
                        s->inotify.inode_data = NULL;
1025
1026
0
                        if (s->pending) {
1027
0
                                assert(inotify_data->n_pending > 0);
1028
0
                                inotify_data->n_pending--;
1029
0
                        }
1030
1031
                        /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
1032
                         * continued to being watched. That's because inotify doesn't really have an API for that: we
1033
                         * can only change watch masks with access to the original inode either by fd or by path. But
1034
                         * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
1035
                         * continuously and keeping the mount busy which we can't really do. We could reconstruct the
1036
                         * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
1037
                         * there), but given the need for open_by_handle_at() which is privileged and not universally
1038
                         * available this would be quite an incomplete solution. Hence we go the other way, leave the
1039
                         * mask set, even if it is not minimized now, and ignore all events we aren't interested in
1040
                         * anymore after reception. Yes, this sucks, but … Linux … */
1041
1042
                        /* Maybe release the inode data (and its inotify) */
1043
0
                        event_gc_inode_data(s->event, inode_data);
1044
0
                }
1045
1046
0
                break;
1047
0
        }
1048
1049
0
        case SOURCE_MEMORY_PRESSURE:
1050
0
                source_memory_pressure_remove_from_write_list(s);
1051
0
                source_memory_pressure_unregister(s);
1052
0
                break;
1053
1054
0
        default:
1055
0
                assert_not_reached();
1056
149k
        }
1057
1058
149k
        if (s->pending)
1059
61.2k
                prioq_remove(s->event->pending, s, &s->pending_index);
1060
1061
149k
        if (s->prepare)
1062
10.5k
                prioq_remove(s->event->prepare, s, &s->prepare_index);
1063
1064
149k
        if (s->ratelimited)
1065
0
                event_source_time_prioq_remove(s, &s->event->monotonic);
1066
1067
149k
        event = TAKE_PTR(s->event);
1068
298k
        LIST_REMOVE(sources, event->sources, s);
1069
298k
        event->n_sources--;
1070
1071
        /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
1072
         * pidfd associated with this event source, which we'll do only on source_free(). */
1073
1074
298k
        if (!s->floating)
1075
149k
                sd_event_unref(event);
1076
298k
}
1077
1078
149k
static sd_event_source* source_free(sd_event_source *s) {
1079
149k
        int r;
1080
1081
149k
        assert(s);
1082
1083
149k
        source_disconnect(s);
1084
1085
149k
        if (s->type == SOURCE_IO && s->io.owned)
1086
0
                s->io.fd = safe_close(s->io.fd);
1087
1088
149k
        if (s->type == SOURCE_CHILD) {
1089
                /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
1090
1091
0
                if (s->child.process_owned) {
1092
0
                        assert(s->child.pid > 0);
1093
0
                        assert(s->child.pidfd >= 0);
1094
1095
0
                        if (!s->child.exited) {
1096
0
                                r = RET_NERRNO(pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0));
1097
0
                                if (r < 0 && r != -ESRCH)
1098
0
                                        log_debug_errno(r, "Failed to kill process " PID_FMT ", ignoring: %m",
1099
0
                                                        s->child.pid);
1100
0
                        }
1101
1102
0
                        if (!s->child.waited) {
1103
0
                                siginfo_t si = {};
1104
1105
                                /* Reap the child if we can */
1106
0
                                (void) waitid(P_PIDFD, s->child.pidfd, &si, WEXITED);
1107
0
                        }
1108
0
                }
1109
1110
0
                if (s->child.pidfd_owned)
1111
0
                        s->child.pidfd = safe_close(s->child.pidfd);
1112
0
        }
1113
1114
149k
        if (s->type == SOURCE_MEMORY_PRESSURE) {
1115
0
                s->memory_pressure.fd = safe_close(s->memory_pressure.fd);
1116
0
                s->memory_pressure.write_buffer = mfree(s->memory_pressure.write_buffer);
1117
0
        }
1118
1119
149k
        if (s->destroy_callback)
1120
0
                s->destroy_callback(s->userdata);
1121
1122
149k
        free(s->description);
1123
149k
        return mfree(s);
1124
149k
}
1125
DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
1126
1127
12.1M
static int source_set_pending(sd_event_source *s, bool b) {
1128
12.1M
        int r;
1129
1130
12.1M
        assert(s);
1131
12.1M
        assert(s->type != SOURCE_EXIT);
1132
1133
12.1M
        if (s->pending == b)
1134
7.33M
                return 0;
1135
1136
4.82M
        s->pending = b;
1137
1138
4.82M
        if (b) {
1139
2.44M
                s->pending_iteration = s->event->iteration;
1140
1141
2.44M
                r = prioq_put(s->event->pending, s, &s->pending_index);
1142
2.44M
                if (r < 0) {
1143
0
                        s->pending = false;
1144
0
                        return r;
1145
0
                }
1146
2.44M
        } else
1147
4.82M
                assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
1148
1149
4.82M
        if (EVENT_SOURCE_IS_TIME(s->type))
1150
30.2k
                event_source_time_prioq_reshuffle(s);
1151
1152
4.82M
        if (s->type == SOURCE_SIGNAL && !b) {
1153
0
                struct signal_data *d;
1154
1155
0
                d = hashmap_get(s->event->signal_data, &s->priority);
1156
0
                if (d && d->current == s)
1157
0
                        d->current = NULL;
1158
0
        }
1159
1160
4.82M
        if (s->type == SOURCE_INOTIFY) {
1161
1162
0
                assert(s->inotify.inode_data);
1163
0
                assert(s->inotify.inode_data->inotify_data);
1164
1165
0
                if (b)
1166
0
                        s->inotify.inode_data->inotify_data->n_pending++;
1167
0
                else {
1168
0
                        assert(s->inotify.inode_data->inotify_data->n_pending > 0);
1169
0
                        s->inotify.inode_data->inotify_data->n_pending--;
1170
0
                }
1171
0
        }
1172
1173
4.82M
        return 1;
1174
4.82M
}
1175
1176
149k
static sd_event_source* source_new(sd_event *e, bool floating, EventSourceType type) {
1177
1178
        /* Let's allocate exactly what we need. Note that the difference of the smallest event source
1179
         * structure to the largest is 144 bytes on x86-64 at the time of writing, i.e. more than two cache
1180
         * lines. */
1181
149k
        static const size_t size_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
1182
149k
                [SOURCE_IO]                  = endoffsetof_field(sd_event_source, io),
1183
149k
                [SOURCE_TIME_REALTIME]       = endoffsetof_field(sd_event_source, time),
1184
149k
                [SOURCE_TIME_BOOTTIME]       = endoffsetof_field(sd_event_source, time),
1185
149k
                [SOURCE_TIME_MONOTONIC]      = endoffsetof_field(sd_event_source, time),
1186
149k
                [SOURCE_TIME_REALTIME_ALARM] = endoffsetof_field(sd_event_source, time),
1187
149k
                [SOURCE_TIME_BOOTTIME_ALARM] = endoffsetof_field(sd_event_source, time),
1188
149k
                [SOURCE_SIGNAL]              = endoffsetof_field(sd_event_source, signal),
1189
149k
                [SOURCE_CHILD]               = endoffsetof_field(sd_event_source, child),
1190
149k
                [SOURCE_DEFER]               = endoffsetof_field(sd_event_source, defer),
1191
149k
                [SOURCE_POST]                = endoffsetof_field(sd_event_source, post),
1192
149k
                [SOURCE_EXIT]                = endoffsetof_field(sd_event_source, exit),
1193
149k
                [SOURCE_INOTIFY]             = endoffsetof_field(sd_event_source, inotify),
1194
149k
                [SOURCE_MEMORY_PRESSURE]     = endoffsetof_field(sd_event_source, memory_pressure),
1195
149k
        };
1196
1197
149k
        sd_event_source *s;
1198
1199
149k
        assert(e);
1200
149k
        assert(type >= 0);
1201
149k
        assert(type < _SOURCE_EVENT_SOURCE_TYPE_MAX);
1202
149k
        assert(size_table[type] > 0);
1203
1204
149k
        s = malloc0(size_table[type]);
1205
149k
        if (!s)
1206
0
                return NULL;
1207
        /* We use expand_to_usable() here to tell gcc that it should consider this an object of the full
1208
         * size, even if we only allocate the initial part we need. */
1209
149k
        s = expand_to_usable(s, sizeof(sd_event_source));
1210
1211
        /* Note: we cannot use compound initialization here, because sizeof(sd_event_source) is likely larger
1212
         * than what we allocated here. */
1213
149k
        s->n_ref = 1;
1214
149k
        s->event = e;
1215
149k
        s->floating = floating;
1216
149k
        s->type = type;
1217
149k
        s->pending_index = PRIOQ_IDX_NULL;
1218
149k
        s->prepare_index = PRIOQ_IDX_NULL;
1219
1220
149k
        if (!floating)
1221
149k
                sd_event_ref(e);
1222
1223
149k
        LIST_PREPEND(sources, e->sources, s);
1224
149k
        e->n_sources++;
1225
1226
149k
        return s;
1227
149k
}
1228
1229
0
static int io_exit_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1230
0
        assert(s);
1231
1232
0
        return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1233
0
}
1234
1235
_public_ int sd_event_add_io(
1236
                sd_event *e,
1237
                sd_event_source **ret,
1238
                int fd,
1239
                uint32_t events,
1240
                sd_event_io_handler_t callback,
1241
49.4k
                void *userdata) {
1242
1243
49.4k
        _cleanup_(source_freep) sd_event_source *s = NULL;
1244
49.4k
        int r;
1245
1246
49.4k
        assert_return(e, -EINVAL);
1247
49.4k
        assert_return(e = event_resolve(e), -ENOPKG);
1248
49.4k
        assert_return(fd >= 0, -EBADF);
1249
49.4k
        assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1250
49.4k
        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1251
49.4k
        assert_return(!event_origin_changed(e), -ECHILD);
1252
1253
49.4k
        if (!callback)
1254
0
                callback = io_exit_callback;
1255
1256
49.4k
        s = source_new(e, !ret, SOURCE_IO);
1257
49.4k
        if (!s)
1258
0
                return -ENOMEM;
1259
1260
49.4k
        s->wakeup = WAKEUP_EVENT_SOURCE;
1261
49.4k
        s->io.fd = fd;
1262
49.4k
        s->io.events = events;
1263
49.4k
        s->io.callback = callback;
1264
49.4k
        s->userdata = userdata;
1265
49.4k
        s->enabled = SD_EVENT_ON;
1266
1267
49.4k
        r = source_io_register(s, s->enabled, events);
1268
49.4k
        if (r < 0)
1269
5.81k
                return r;
1270
1271
43.6k
        if (ret)
1272
43.6k
                *ret = s;
1273
43.6k
        TAKE_PTR(s);
1274
1275
43.6k
        return 0;
1276
49.4k
}
1277
1278
1.49M
static void initialize_perturb(sd_event *e) {
1279
1.49M
        sd_id128_t id = {};
1280
1281
        /* When we sleep for longer, we try to realign the wakeup to the same time within each
1282
         * minute/second/250ms, so that events all across the system can be coalesced into a single CPU
1283
         * wakeup. However, let's take some system-specific randomness for this value, so that in a network
1284
         * of systems with synced clocks timer events are distributed a bit. Here, we calculate a
1285
         * perturbation usec offset from the boot ID (or machine ID if failed, e.g. /proc is not mounted). */
1286
1287
1.49M
        if (_likely_(e->perturb != USEC_INFINITY))
1288
1.49M
                return;
1289
1290
8.61k
        if (sd_id128_get_boot(&id) >= 0 || sd_id128_get_machine(&id) >= 0)
1291
8.61k
                e->perturb = (id.qwords[0] ^ id.qwords[1]) % USEC_PER_MINUTE;
1292
0
        else
1293
0
                e->perturb = 0; /* This is a super early process without /proc and /etc ?? */
1294
8.61k
}
1295
1296
static int event_setup_timer_fd(
1297
                sd_event *e,
1298
                struct clock_data *d,
1299
18.3k
                clockid_t clock) {
1300
1301
18.3k
        assert(e);
1302
18.3k
        assert(d);
1303
1304
18.3k
        if (_likely_(d->fd >= 0))
1305
0
                return 0;
1306
1307
18.3k
        _cleanup_close_ int fd = -EBADF;
1308
1309
18.3k
        fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1310
18.3k
        if (fd < 0)
1311
0
                return -errno;
1312
1313
18.3k
        fd = fd_move_above_stdio(fd);
1314
1315
18.3k
        struct epoll_event ev = {
1316
18.3k
                .events = EPOLLIN,
1317
18.3k
                .data.ptr = d,
1318
18.3k
        };
1319
1320
18.3k
        if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
1321
0
                return -errno;
1322
1323
18.3k
        d->fd = TAKE_FD(fd);
1324
18.3k
        return 0;
1325
18.3k
}
1326
1327
0
static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1328
0
        assert(s);
1329
1330
0
        return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1331
0
}
1332
1333
28.1k
static int setup_clock_data(sd_event *e, struct clock_data *d, clockid_t clock) {
1334
28.1k
        int r;
1335
1336
28.1k
        assert(d);
1337
1338
28.1k
        if (d->fd < 0) {
1339
18.3k
                r = event_setup_timer_fd(e, d, clock);
1340
18.3k
                if (r < 0)
1341
0
                        return r;
1342
18.3k
        }
1343
1344
28.1k
        r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1345
28.1k
        if (r < 0)
1346
0
                return r;
1347
1348
28.1k
        r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1349
28.1k
        if (r < 0)
1350
0
                return r;
1351
1352
28.1k
        return 0;
1353
28.1k
}
1354
1355
static int event_source_time_prioq_put(
1356
                sd_event_source *s,
1357
28.1k
                struct clock_data *d) {
1358
1359
28.1k
        int r;
1360
1361
28.1k
        assert(s);
1362
28.1k
        assert(d);
1363
28.1k
        assert(EVENT_SOURCE_USES_TIME_PRIOQ(s->type));
1364
1365
28.1k
        r = prioq_put(d->earliest, s, &s->earliest_index);
1366
28.1k
        if (r < 0)
1367
0
                return r;
1368
1369
28.1k
        r = prioq_put(d->latest, s, &s->latest_index);
1370
28.1k
        if (r < 0) {
1371
0
                assert_se(prioq_remove(d->earliest, s, &s->earliest_index) > 0);
1372
0
                s->earliest_index = PRIOQ_IDX_NULL;
1373
0
                return r;
1374
0
        }
1375
1376
28.1k
        d->needs_rearm = true;
1377
28.1k
        return 0;
1378
28.1k
}
1379
1380
_public_ int sd_event_add_time(
1381
                sd_event *e,
1382
                sd_event_source **ret,
1383
                clockid_t clock,
1384
                uint64_t usec,
1385
                uint64_t accuracy,
1386
                sd_event_time_handler_t callback,
1387
28.1k
                void *userdata) {
1388
1389
28.1k
        EventSourceType type;
1390
28.1k
        _cleanup_(source_freep) sd_event_source *s = NULL;
1391
28.1k
        struct clock_data *d;
1392
28.1k
        int r;
1393
1394
28.1k
        assert_return(e, -EINVAL);
1395
28.1k
        assert_return(e = event_resolve(e), -ENOPKG);
1396
28.1k
        assert_return(accuracy != UINT64_MAX, -EINVAL);
1397
28.1k
        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1398
28.1k
        assert_return(!event_origin_changed(e), -ECHILD);
1399
1400
28.1k
        if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1401
0
                return -EOPNOTSUPP;
1402
1403
28.1k
        type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1404
28.1k
        if (type < 0)
1405
0
                return -EOPNOTSUPP;
1406
1407
28.1k
        if (!callback)
1408
0
                callback = time_exit_callback;
1409
1410
28.1k
        assert_se(d = event_get_clock_data(e, type));
1411
1412
28.1k
        r = setup_clock_data(e, d, clock);
1413
28.1k
        if (r < 0)
1414
0
                return r;
1415
1416
28.1k
        s = source_new(e, !ret, type);
1417
28.1k
        if (!s)
1418
0
                return -ENOMEM;
1419
1420
28.1k
        s->time.next = usec;
1421
28.1k
        s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1422
28.1k
        s->time.callback = callback;
1423
28.1k
        s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
1424
28.1k
        s->userdata = userdata;
1425
28.1k
        s->enabled = SD_EVENT_ONESHOT;
1426
1427
28.1k
        r = event_source_time_prioq_put(s, d);
1428
28.1k
        if (r < 0)
1429
0
                return r;
1430
1431
28.1k
        if (ret)
1432
28.1k
                *ret = s;
1433
28.1k
        TAKE_PTR(s);
1434
1435
28.1k
        return 0;
1436
28.1k
}
1437
1438
_public_ int sd_event_add_time_relative(
1439
                sd_event *e,
1440
                sd_event_source **ret,
1441
                clockid_t clock,
1442
                uint64_t usec,
1443
                uint64_t accuracy,
1444
                sd_event_time_handler_t callback,
1445
0
                void *userdata) {
1446
1447
0
        usec_t t;
1448
0
        int r;
1449
1450
        /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1451
         * checks for overflow. */
1452
1453
0
        r = sd_event_now(e, clock, &t);
1454
0
        if (r < 0)
1455
0
                return r;
1456
1457
0
        if (usec >= USEC_INFINITY - t)
1458
0
                return -EOVERFLOW;
1459
1460
0
        return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
1461
0
}
1462
1463
0
static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1464
0
        assert(s);
1465
1466
0
        return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1467
0
}
1468
1469
_public_ int sd_event_add_signal(
1470
                sd_event *e,
1471
                sd_event_source **ret,
1472
                int sig,
1473
                sd_event_signal_handler_t callback,
1474
0
                void *userdata) {
1475
1476
0
        _cleanup_(source_freep) sd_event_source *s = NULL;
1477
0
        struct signal_data *d;
1478
0
        sigset_t new_ss;
1479
0
        bool block_it;
1480
0
        int r;
1481
1482
0
        assert_return(e, -EINVAL);
1483
0
        assert_return(e = event_resolve(e), -ENOPKG);
1484
0
        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1485
0
        assert_return(!event_origin_changed(e), -ECHILD);
1486
1487
        /* Let's make sure our special flag stays outside of the valid signal range */
1488
0
        assert_cc(_NSIG < SD_EVENT_SIGNAL_PROCMASK);
1489
1490
0
        if (sig & SD_EVENT_SIGNAL_PROCMASK) {
1491
0
                sig &= ~SD_EVENT_SIGNAL_PROCMASK;
1492
0
                assert_return(SIGNAL_VALID(sig), -EINVAL);
1493
1494
0
                block_it = true;
1495
0
        } else {
1496
0
                assert_return(SIGNAL_VALID(sig), -EINVAL);
1497
1498
0
                r = signal_is_blocked(sig);
1499
0
                if (r < 0)
1500
0
                        return r;
1501
0
                if (r == 0)
1502
0
                        return -EBUSY;
1503
1504
0
                block_it = false;
1505
0
        }
1506
1507
0
        if (!callback)
1508
0
                callback = signal_exit_callback;
1509
1510
0
        if (!e->signal_sources) {
1511
0
                e->signal_sources = new0(sd_event_source*, _NSIG);
1512
0
                if (!e->signal_sources)
1513
0
                        return -ENOMEM;
1514
0
        } else if (e->signal_sources[sig])
1515
0
                return -EBUSY;
1516
1517
0
        s = source_new(e, !ret, SOURCE_SIGNAL);
1518
0
        if (!s)
1519
0
                return -ENOMEM;
1520
1521
0
        s->signal.sig = sig;
1522
0
        s->signal.callback = callback;
1523
0
        s->userdata = userdata;
1524
0
        s->enabled = SD_EVENT_ON;
1525
1526
0
        e->signal_sources[sig] = s;
1527
1528
0
        if (block_it) {
1529
0
                sigset_t old_ss;
1530
1531
0
                if (sigemptyset(&new_ss) < 0)
1532
0
                        return -errno;
1533
1534
0
                if (sigaddset(&new_ss, sig) < 0)
1535
0
                        return -errno;
1536
1537
0
                r = pthread_sigmask(SIG_BLOCK, &new_ss, &old_ss);
1538
0
                if (r != 0)
1539
0
                        return -r;
1540
1541
0
                r = sigismember(&old_ss, sig);
1542
0
                if (r < 0)
1543
0
                        return -errno;
1544
1545
0
                s->signal.unblock = !r;
1546
0
        } else
1547
0
                s->signal.unblock = false;
1548
1549
0
        r = event_make_signal_data(e, sig, &d);
1550
0
        if (r < 0) {
1551
0
                if (s->signal.unblock)
1552
0
                        (void) pthread_sigmask(SIG_UNBLOCK, &new_ss, NULL);
1553
1554
0
                return r;
1555
0
        }
1556
1557
        /* Use the signal name as description for the event source by default */
1558
0
        (void) sd_event_source_set_description(s, signal_to_string(sig));
1559
1560
0
        if (ret)
1561
0
                *ret = s;
1562
0
        TAKE_PTR(s);
1563
1564
0
        return 0;
1565
0
}
1566
1567
0
static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *userdata) {
1568
0
        assert(s);
1569
1570
0
        return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1571
0
}
1572
1573
0
static int verify_sigchld(int options) {
1574
0
        int r;
1575
1576
0
        if ((options & (WSTOPPED|WCONTINUED)) != 0) {
1577
                /* Caller must block SIGCHLD before using us to watch for WSTOPPED or WCONTINUED. */
1578
1579
0
                r = signal_is_blocked(SIGCHLD);
1580
0
                if (r < 0)
1581
0
                        return r;
1582
0
                if (r == 0)
1583
0
                        return -EBUSY;
1584
0
        }
1585
1586
        /* We don't want the Linux autoreaping logic to take effect when we're watching for process exit, so
1587
         * check if it is enabled. */
1588
1589
0
        if (options & WEXITED) {
1590
0
                r = autoreaping_enabled();
1591
0
                if (r < 0)
1592
0
                        return r;
1593
0
                if (r > 0)
1594
0
                        return -EBUSY;
1595
0
        }
1596
1597
0
        return 0;
1598
0
}
1599
1600
_public_ int sd_event_add_child(
1601
                sd_event *e,
1602
                sd_event_source **ret,
1603
                pid_t pid,
1604
                int options,
1605
                sd_event_child_handler_t callback,
1606
0
                void *userdata) {
1607
1608
0
        _cleanup_(source_freep) sd_event_source *s = NULL;
1609
0
        int r;
1610
1611
0
        assert_return(e, -EINVAL);
1612
0
        assert_return(e = event_resolve(e), -ENOPKG);
1613
0
        assert_return(pid > 1, -EINVAL);
1614
0
        assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED|WNOWAIT)), -EINVAL);
1615
0
        assert_return(options != 0, -EINVAL);
1616
0
        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1617
0
        assert_return(!event_origin_changed(e), -ECHILD);
1618
1619
0
        if (!callback)
1620
0
                callback = child_exit_callback;
1621
1622
0
        r = verify_sigchld(options);
1623
0
        if (r < 0)
1624
0
                return r;
1625
1626
0
        r = hashmap_ensure_allocated(&e->child_sources, NULL);
1627
0
        if (r < 0)
1628
0
                return r;
1629
1630
0
        if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1631
0
                return -EBUSY;
1632
1633
0
        s = source_new(e, !ret, SOURCE_CHILD);
1634
0
        if (!s)
1635
0
                return -ENOMEM;
1636
1637
        /* We always take a pidfd here, even if we wait for anything else than WEXITED, so that we pin the
1638
         * PID, and make regular waitid() handling race-free. */
1639
1640
0
        s->child.pidfd = pidfd_open(pid, 0);
1641
0
        if (s->child.pidfd < 0)
1642
0
                return -errno;
1643
1644
0
        s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1645
1646
0
        s->wakeup = WAKEUP_EVENT_SOURCE;
1647
0
        s->child.options = options;
1648
0
        s->child.callback = callback;
1649
0
        s->userdata = userdata;
1650
0
        s->enabled = SD_EVENT_ONESHOT;
1651
1652
0
        if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1653
                /* We only want to watch for exit */
1654
0
                r = source_child_pidfd_register(s, s->enabled);
1655
0
                if (r < 0)
1656
0
                        return r;
1657
1658
0
        } else {
1659
                /* We shall wait for some other event than WEXITED */
1660
0
                r = event_make_signal_data(e, SIGCHLD, NULL);
1661
0
                if (r < 0)
1662
0
                        return r;
1663
1664
0
                e->need_process_child = true;
1665
0
        }
1666
1667
0
        r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1668
0
        if (r < 0)
1669
0
                return r;
1670
1671
        /* These must be done after everything succeeds. */
1672
0
        s->child.pid = pid;
1673
0
        e->n_online_child_sources++;
1674
1675
0
        if (ret)
1676
0
                *ret = s;
1677
0
        TAKE_PTR(s);
1678
0
        return 0;
1679
0
}
1680
1681
_public_ int sd_event_add_child_pidfd(
1682
                sd_event *e,
1683
                sd_event_source **ret,
1684
                int pidfd,
1685
                int options,
1686
                sd_event_child_handler_t callback,
1687
0
                void *userdata) {
1688
1689
0
        _cleanup_(source_freep) sd_event_source *s = NULL;
1690
0
        pid_t pid;
1691
0
        int r;
1692
1693
0
        assert_return(e, -EINVAL);
1694
0
        assert_return(e = event_resolve(e), -ENOPKG);
1695
0
        assert_return(pidfd >= 0, -EBADF);
1696
0
        assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED|WNOWAIT)), -EINVAL);
1697
0
        assert_return(options != 0, -EINVAL);
1698
0
        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1699
0
        assert_return(!event_origin_changed(e), -ECHILD);
1700
1701
0
        if (!callback)
1702
0
                callback = child_exit_callback;
1703
1704
0
        r = verify_sigchld(options);
1705
0
        if (r < 0)
1706
0
                return r;
1707
1708
0
        r = hashmap_ensure_allocated(&e->child_sources, NULL);
1709
0
        if (r < 0)
1710
0
                return r;
1711
1712
0
        r = pidfd_get_pid(pidfd, &pid);
1713
0
        if (r < 0)
1714
0
                return r;
1715
1716
0
        if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1717
0
                return -EBUSY;
1718
1719
0
        s = source_new(e, !ret, SOURCE_CHILD);
1720
0
        if (!s)
1721
0
                return -ENOMEM;
1722
1723
0
        s->wakeup = WAKEUP_EVENT_SOURCE;
1724
0
        s->child.pidfd = pidfd;
1725
0
        s->child.options = options;
1726
0
        s->child.callback = callback;
1727
0
        s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1728
0
        s->userdata = userdata;
1729
0
        s->enabled = SD_EVENT_ONESHOT;
1730
1731
0
        if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1732
                /* We only want to watch for WEXITED */
1733
0
                r = source_child_pidfd_register(s, s->enabled);
1734
0
                if (r < 0)
1735
0
                        return r;
1736
0
        } else {
1737
                /* We shall wait for some other event than WEXITED */
1738
0
                r = event_make_signal_data(e, SIGCHLD, NULL);
1739
0
                if (r < 0)
1740
0
                        return r;
1741
1742
0
                e->need_process_child = true;
1743
0
        }
1744
1745
0
        r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1746
0
        if (r < 0)
1747
0
                return r;
1748
1749
0
        s->child.pid = pid;
1750
0
        e->n_online_child_sources++;
1751
1752
0
        if (ret)
1753
0
                *ret = s;
1754
0
        TAKE_PTR(s);
1755
0
        return 0;
1756
0
}
1757
1758
0
static int generic_exit_callback(sd_event_source *s, void *userdata) {
1759
0
        assert(s);
1760
1761
0
        return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1762
0
}
1763
1764
_public_ int sd_event_add_defer(
1765
                sd_event *e,
1766
                sd_event_source **ret,
1767
                sd_event_handler_t callback,
1768
61.2k
                void *userdata) {
1769
1770
61.2k
        _cleanup_(source_freep) sd_event_source *s = NULL;
1771
61.2k
        int r;
1772
1773
61.2k
        assert_return(e, -EINVAL);
1774
61.2k
        assert_return(e = event_resolve(e), -ENOPKG);
1775
61.2k
        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1776
61.2k
        assert_return(!event_origin_changed(e), -ECHILD);
1777
1778
61.2k
        if (!callback)
1779
0
                callback = generic_exit_callback;
1780
1781
61.2k
        s = source_new(e, !ret, SOURCE_DEFER);
1782
61.2k
        if (!s)
1783
0
                return -ENOMEM;
1784
1785
61.2k
        s->defer.callback = callback;
1786
61.2k
        s->userdata = userdata;
1787
61.2k
        s->enabled = SD_EVENT_ONESHOT;
1788
1789
61.2k
        r = source_set_pending(s, true);
1790
61.2k
        if (r < 0)
1791
0
                return r;
1792
1793
61.2k
        if (ret)
1794
61.2k
                *ret = s;
1795
61.2k
        TAKE_PTR(s);
1796
1797
61.2k
        return 0;
1798
61.2k
}
1799
1800
_public_ int sd_event_add_post(
1801
                sd_event *e,
1802
                sd_event_source **ret,
1803
                sd_event_handler_t callback,
1804
0
                void *userdata) {
1805
1806
0
        _cleanup_(source_freep) sd_event_source *s = NULL;
1807
0
        int r;
1808
1809
0
        assert_return(e, -EINVAL);
1810
0
        assert_return(e = event_resolve(e), -ENOPKG);
1811
0
        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1812
0
        assert_return(!event_origin_changed(e), -ECHILD);
1813
1814
0
        if (!callback)
1815
0
                callback = generic_exit_callback;
1816
1817
0
        s = source_new(e, !ret, SOURCE_POST);
1818
0
        if (!s)
1819
0
                return -ENOMEM;
1820
1821
0
        s->post.callback = callback;
1822
0
        s->userdata = userdata;
1823
0
        s->enabled = SD_EVENT_ON;
1824
1825
0
        r = set_ensure_put(&e->post_sources, NULL, s);
1826
0
        if (r < 0)
1827
0
                return r;
1828
0
        assert(r > 0);
1829
1830
0
        if (ret)
1831
0
                *ret = s;
1832
0
        TAKE_PTR(s);
1833
1834
0
        return 0;
1835
0
}
1836
1837
_public_ int sd_event_add_exit(
1838
                sd_event *e,
1839
                sd_event_source **ret,
1840
                sd_event_handler_t callback,
1841
10.5k
                void *userdata) {
1842
1843
10.5k
        _cleanup_(source_freep) sd_event_source *s = NULL;
1844
10.5k
        int r;
1845
1846
10.5k
        assert_return(e, -EINVAL);
1847
10.5k
        assert_return(e = event_resolve(e), -ENOPKG);
1848
10.5k
        assert_return(callback, -EINVAL);
1849
10.5k
        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1850
10.5k
        assert_return(!event_origin_changed(e), -ECHILD);
1851
1852
10.5k
        r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1853
10.5k
        if (r < 0)
1854
0
                return r;
1855
1856
10.5k
        s = source_new(e, !ret, SOURCE_EXIT);
1857
10.5k
        if (!s)
1858
0
                return -ENOMEM;
1859
1860
10.5k
        s->exit.callback = callback;
1861
10.5k
        s->userdata = userdata;
1862
10.5k
        s->exit.prioq_index = PRIOQ_IDX_NULL;
1863
10.5k
        s->enabled = SD_EVENT_ONESHOT;
1864
1865
10.5k
        r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1866
10.5k
        if (r < 0)
1867
0
                return r;
1868
1869
10.5k
        if (ret)
1870
10.5k
                *ret = s;
1871
10.5k
        TAKE_PTR(s);
1872
1873
10.5k
        return 0;
1874
10.5k
}
1875
1876
0
_public_ int sd_event_trim_memory(void) {
1877
0
        int r;
1878
1879
        /* A default implementation of a memory pressure callback. Simply releases our own allocation caches
1880
         * and glibc's. This is automatically used when people call sd_event_add_memory_pressure() with a
1881
         * NULL callback parameter. */
1882
1883
0
        log_debug("Memory pressure event, trimming malloc() memory.");
1884
1885
0
        struct mallinfo2 before_mallinfo = mallinfo2();
1886
1887
0
        usec_t before_timestamp = now(CLOCK_MONOTONIC);
1888
0
        hashmap_trim_pools();
1889
0
        r = malloc_trim(0);
1890
0
        usec_t after_timestamp = now(CLOCK_MONOTONIC);
1891
1892
0
        if (r > 0)
1893
0
                log_debug("Successfully trimmed some memory.");
1894
0
        else
1895
0
                log_debug("Couldn't trim any memory.");
1896
1897
0
        usec_t period = after_timestamp - before_timestamp;
1898
1899
0
        struct mallinfo2 after_mallinfo = mallinfo2();
1900
0
        size_t l = LESS_BY(before_mallinfo.hblkhd, after_mallinfo.hblkhd) +
1901
0
                LESS_BY(before_mallinfo.arena, after_mallinfo.arena);
1902
0
        log_struct(LOG_DEBUG,
1903
0
                   LOG_MESSAGE("Memory trimming took %s, returned %s to OS.",
1904
0
                               FORMAT_TIMESPAN(period, 0),
1905
0
                               FORMAT_BYTES(l)),
1906
0
                   LOG_MESSAGE_ID(SD_MESSAGE_MEMORY_TRIM_STR),
1907
0
                   LOG_ITEM("TRIMMED_BYTES=%zu", l),
1908
0
                   LOG_ITEM("TRIMMED_USEC=" USEC_FMT, period));
1909
1910
0
        return 0;
1911
0
}
1912
1913
0
static int memory_pressure_callback(sd_event_source *s, void *userdata) {
1914
0
        assert(s);
1915
1916
0
        sd_event_trim_memory();
1917
0
        return 0;
1918
0
}
1919
1920
_public_ int sd_event_add_memory_pressure(
1921
                sd_event *e,
1922
                sd_event_source **ret,
1923
                sd_event_handler_t callback,
1924
0
                void *userdata) {
1925
1926
0
        _cleanup_free_ char *w = NULL;
1927
0
        _cleanup_(source_freep) sd_event_source *s = NULL;
1928
0
        _cleanup_close_ int path_fd = -EBADF, fd = -EBADF;
1929
0
        _cleanup_free_ void *write_buffer = NULL;
1930
0
        const char *watch, *watch_fallback = NULL, *env;
1931
0
        size_t write_buffer_size = 0;
1932
0
        struct stat st;
1933
0
        uint32_t events;
1934
0
        bool locked;
1935
0
        int r;
1936
1937
0
        assert_return(e, -EINVAL);
1938
0
        assert_return(e = event_resolve(e), -ENOPKG);
1939
0
        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1940
0
        assert_return(!event_origin_changed(e), -ECHILD);
1941
1942
0
        if (!callback)
1943
0
                callback = memory_pressure_callback;
1944
1945
0
        s = source_new(e, !ret, SOURCE_MEMORY_PRESSURE);
1946
0
        if (!s)
1947
0
                return -ENOMEM;
1948
1949
0
        s->wakeup = WAKEUP_EVENT_SOURCE;
1950
0
        s->memory_pressure.callback = callback;
1951
0
        s->userdata = userdata;
1952
0
        s->enabled = SD_EVENT_ON;
1953
0
        s->memory_pressure.fd = -EBADF;
1954
1955
0
        env = secure_getenv("MEMORY_PRESSURE_WATCH");
1956
0
        if (env) {
1957
0
                if (isempty(env) || path_equal(env, "/dev/null"))
1958
0
                        return log_debug_errno(SYNTHETIC_ERRNO(EHOSTDOWN),
1959
0
                                               "Memory pressure logic is explicitly disabled via $MEMORY_PRESSURE_WATCH.");
1960
1961
0
                if (!path_is_absolute(env) || !path_is_normalized(env))
1962
0
                        return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
1963
0
                                               "$MEMORY_PRESSURE_WATCH set to invalid path: %s", env);
1964
1965
0
                watch = env;
1966
1967
0
                env = secure_getenv("MEMORY_PRESSURE_WRITE");
1968
0
                if (env) {
1969
0
                        r = unbase64mem(env, &write_buffer, &write_buffer_size);
1970
0
                        if (r < 0)
1971
0
                                return r;
1972
0
                }
1973
1974
0
                locked = true;
1975
0
        } else {
1976
1977
0
                r = is_pressure_supported();
1978
0
                if (r < 0)
1979
0
                        return r;
1980
0
                if (r == 0)
1981
0
                        return -EOPNOTSUPP;
1982
1983
                /* By default we want to watch memory pressure on the local cgroup, but we'll fall back on
1984
                 * the system wide pressure if for some reason we cannot (which could be: memory controller
1985
                 * not delegated to us, or PSI simply not available in the kernel). */
1986
1987
0
                _cleanup_free_ char *cg = NULL;
1988
0
                r = cg_pid_get_path(0, &cg);
1989
0
                if (r < 0)
1990
0
                        return r;
1991
1992
0
                w = path_join("/sys/fs/cgroup", cg, "memory.pressure");
1993
0
                if (!w)
1994
0
                        return -ENOMEM;
1995
1996
0
                watch = w;
1997
0
                watch_fallback = "/proc/pressure/memory";
1998
1999
                /* Android uses three levels in its userspace low memory killer logic:
2000
                 *     some  70000 1000000
2001
                 *     some 100000 1000000
2002
                 *     full  70000 1000000
2003
                 *
2004
                 * GNOME's low memory monitor uses:
2005
                 *     some  70000 1000000
2006
                 *     some 100000 1000000
2007
                 *     full 100000 1000000
2008
                 *
2009
                 * We'll default to the middle level that both agree on. Except we do it on a 2s window
2010
                 * (i.e. 200ms per 2s, rather than 100ms per 1s), because that's the window duration the
2011
                 * kernel will allow us to do unprivileged, also in the future. */
2012
0
                if (asprintf((char**) &write_buffer,
2013
0
                             "%s " USEC_FMT " " USEC_FMT,
2014
0
                             MEMORY_PRESSURE_DEFAULT_TYPE,
2015
0
                             MEMORY_PRESSURE_DEFAULT_THRESHOLD_USEC,
2016
0
                             MEMORY_PRESSURE_DEFAULT_WINDOW_USEC) < 0)
2017
0
                        return -ENOMEM;
2018
2019
0
                write_buffer_size = strlen(write_buffer) + 1;
2020
0
                locked = false;
2021
0
        }
2022
2023
0
        path_fd = open(watch, O_PATH|O_CLOEXEC);
2024
0
        if (path_fd < 0) {
2025
0
                if (errno != ENOENT)
2026
0
                        return -errno;
2027
2028
                /* We got ENOENT. Three options now: try the fallback if we have one, or return the error as
2029
                 * is (if based on user/env config), or return -EOPNOTSUPP (because we picked the path, and
2030
                 * the PSI service apparently is not supported) */
2031
0
                if (!watch_fallback)
2032
0
                        return locked ? -ENOENT : -EOPNOTSUPP;
2033
2034
0
                path_fd = open(watch_fallback, O_PATH|O_CLOEXEC);
2035
0
                if (path_fd < 0) {
2036
0
                        if (errno == ENOENT) /* PSI is not available in the kernel even under the fallback path? */
2037
0
                                return -EOPNOTSUPP;
2038
0
                        return -errno;
2039
0
                }
2040
0
        }
2041
2042
0
        if (fstat(path_fd, &st) < 0)
2043
0
                return -errno;
2044
2045
0
        if (S_ISSOCK(st.st_mode)) {
2046
0
                fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
2047
0
                if (fd < 0)
2048
0
                        return -errno;
2049
2050
0
                r = connect_unix_path(fd, path_fd, NULL);
2051
0
                if (r < 0)
2052
0
                        return r;
2053
2054
0
                events = EPOLLIN;
2055
2056
0
        } else if (S_ISREG(st.st_mode) || S_ISFIFO(st.st_mode) || S_ISCHR(st.st_mode)) {
2057
0
                fd = fd_reopen(path_fd, (write_buffer_size > 0 ? O_RDWR : O_RDONLY) |O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2058
0
                if (fd < 0)
2059
0
                        return fd;
2060
2061
0
                if (S_ISREG(st.st_mode)) {
2062
0
                        struct statfs sfs;
2063
2064
                        /* If this is a regular file validate this is a procfs or cgroupfs file, where we look for EPOLLPRI */
2065
2066
0
                        if (fstatfs(fd, &sfs) < 0)
2067
0
                                return -errno;
2068
2069
0
                        if (!is_fs_type(&sfs, PROC_SUPER_MAGIC) &&
2070
0
                            !is_fs_type(&sfs, CGROUP2_SUPER_MAGIC))
2071
0
                                return -ENOTTY;
2072
2073
0
                        events = EPOLLPRI;
2074
0
                } else
2075
                        /* For fifos and char devices just watch for EPOLLIN */
2076
0
                        events = EPOLLIN;
2077
2078
0
        } else if (S_ISDIR(st.st_mode))
2079
0
                return -EISDIR;
2080
0
        else
2081
0
                return -EBADF;
2082
2083
0
        s->memory_pressure.fd = TAKE_FD(fd);
2084
0
        s->memory_pressure.write_buffer = TAKE_PTR(write_buffer);
2085
0
        s->memory_pressure.write_buffer_size = write_buffer_size;
2086
0
        s->memory_pressure.events = events;
2087
0
        s->memory_pressure.locked = locked;
2088
2089
        /* So here's the thing: if we are talking to PSI we need to write the watch string before adding the
2090
         * fd to epoll (if we ignore this, then the watch won't work). Hence we'll not actually register the
2091
         * fd with the epoll right-away. Instead, we just add the event source to a list of memory pressure
2092
         * event sources on which writes must be executed before the first event loop iteration is
2093
         * executed. (We could also write the data here, right away, but we want to give the caller the
2094
         * freedom to call sd_event_source_set_memory_pressure_type() and
2095
         * sd_event_source_set_memory_pressure_rate() before we write it. */
2096
2097
0
        if (s->memory_pressure.write_buffer_size > 0)
2098
0
                source_memory_pressure_add_to_write_list(s);
2099
0
        else {
2100
0
                r = source_memory_pressure_register(s, s->enabled);
2101
0
                if (r < 0)
2102
0
                        return r;
2103
0
        }
2104
2105
0
        if (ret)
2106
0
                *ret = s;
2107
0
        TAKE_PTR(s);
2108
2109
0
        return 0;
2110
0
}
2111
2112
0
static void event_free_inotify_data(sd_event *e, InotifyData *d) {
2113
0
        assert(e);
2114
2115
0
        if (!d)
2116
0
                return;
2117
2118
0
        assert(hashmap_isempty(d->inodes));
2119
0
        assert(hashmap_isempty(d->wd));
2120
2121
0
        if (d->buffer_filled > 0)
2122
0
                LIST_REMOVE(buffered, e->buffered_inotify_data_list, d);
2123
2124
0
        hashmap_free(d->inodes);
2125
0
        hashmap_free(d->wd);
2126
2127
0
        assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
2128
2129
0
        if (d->fd >= 0) {
2130
0
                if (!event_origin_changed(e) &&
2131
0
                    epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
2132
0
                        log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
2133
2134
0
                safe_close(d->fd);
2135
0
        }
2136
0
        free(d);
2137
0
}
2138
2139
0
static int event_make_inotify_data(sd_event *e, int64_t priority, InotifyData **ret) {
2140
0
        _cleanup_close_ int fd = -EBADF;
2141
0
        InotifyData *d;
2142
0
        int r;
2143
2144
0
        assert(e);
2145
2146
0
        d = hashmap_get(e->inotify_data, &priority);
2147
0
        if (d) {
2148
0
                if (ret)
2149
0
                        *ret = d;
2150
0
                return 0;
2151
0
        }
2152
2153
0
        fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
2154
0
        if (fd < 0)
2155
0
                return -errno;
2156
2157
0
        fd = fd_move_above_stdio(fd);
2158
2159
0
        d = new(InotifyData, 1);
2160
0
        if (!d)
2161
0
                return -ENOMEM;
2162
2163
0
        *d = (InotifyData) {
2164
0
                .wakeup = WAKEUP_INOTIFY_DATA,
2165
0
                .fd = TAKE_FD(fd),
2166
0
                .priority = priority,
2167
0
        };
2168
2169
0
        r = hashmap_ensure_put(&e->inotify_data, &uint64_hash_ops, &d->priority, d);
2170
0
        if (r < 0) {
2171
0
                d->fd = safe_close(d->fd);
2172
0
                free(d);
2173
0
                return r;
2174
0
        }
2175
2176
0
        struct epoll_event ev = {
2177
0
                .events = EPOLLIN,
2178
0
                .data.ptr = d,
2179
0
        };
2180
2181
0
        if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
2182
0
                r = -errno;
2183
0
                d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
2184
                                            * remove the fd from the epoll first, which we don't want as we couldn't
2185
                                            * add it in the first place. */
2186
0
                event_free_inotify_data(e, d);
2187
0
                return r;
2188
0
        }
2189
2190
0
        if (ret)
2191
0
                *ret = d;
2192
2193
0
        return 1;
2194
0
}
2195
2196
0
static int inode_data_compare(const InodeData *x, const InodeData *y) {
2197
0
        int r;
2198
2199
0
        assert(x);
2200
0
        assert(y);
2201
2202
0
        r = CMP(x->dev, y->dev);
2203
0
        if (r != 0)
2204
0
                return r;
2205
2206
0
        return CMP(x->ino, y->ino);
2207
0
}
2208
2209
0
static void inode_data_hash_func(const InodeData *d, struct siphash *state) {
2210
0
        assert(d);
2211
2212
0
        siphash24_compress_typesafe(d->dev, state);
2213
0
        siphash24_compress_typesafe(d->ino, state);
2214
0
}
2215
2216
DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, InodeData, inode_data_hash_func, inode_data_compare);
2217
2218
0
static void event_free_inode_data(sd_event *e, InodeData *d) {
2219
0
        assert(e);
2220
2221
0
        if (!d)
2222
0
                return;
2223
2224
0
        assert(!d->event_sources);
2225
2226
0
        if (d->fd >= 0) {
2227
0
                LIST_REMOVE(to_close, e->inode_data_to_close_list, d);
2228
0
                safe_close(d->fd);
2229
0
        }
2230
2231
0
        if (d->inotify_data) {
2232
2233
0
                if (d->wd >= 0) {
2234
0
                        if (d->inotify_data->fd >= 0 && !event_origin_changed(e)) {
2235
                                /* So here's a problem. At the time this runs the watch descriptor might already be
2236
                                 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
2237
                                 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
2238
                                 * likely case to happen. */
2239
2240
0
                                if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
2241
0
                                        log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
2242
0
                        }
2243
2244
0
                        assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
2245
0
                }
2246
2247
0
                assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
2248
0
        }
2249
2250
0
        free(d->path);
2251
0
        free(d);
2252
0
}
2253
2254
0
static void event_gc_inotify_data(sd_event *e, InotifyData *d) {
2255
0
        assert(e);
2256
2257
        /* Collects the InotifyData object if we don't need it anymore. That's the case if we don't want to
2258
         * watch any inode with it anymore, which in turn happens if no event source of this priority is
2259
         * interested in any inode any longer. That said, we maintain an extra busy counter: if non-zero
2260
         * we'll delay GC (under the expectation that the GC is called again once the counter is
2261
         * decremented). */
2262
2263
0
        if (!d)
2264
0
                return;
2265
2266
0
        if (!hashmap_isempty(d->inodes))
2267
0
                return;
2268
2269
0
        if (d->n_busy > 0)
2270
0
                return;
2271
2272
0
        event_free_inotify_data(e, d);
2273
0
}
2274
2275
0
static void event_gc_inode_data(sd_event *e, InodeData *d) {
2276
0
        InotifyData *inotify_data;
2277
2278
0
        assert(e);
2279
2280
0
        if (!d)
2281
0
                return;
2282
2283
0
        if (d->event_sources)
2284
0
                return;
2285
2286
0
        inotify_data = d->inotify_data;
2287
0
        event_free_inode_data(e, d);
2288
2289
0
        event_gc_inotify_data(e, inotify_data);
2290
0
}
2291
2292
static int event_make_inode_data(
2293
                sd_event *e,
2294
                InotifyData *inotify_data,
2295
                dev_t dev,
2296
                ino_t ino,
2297
0
                InodeData **ret) {
2298
2299
0
        InodeData *d, key;
2300
0
        int r;
2301
2302
0
        assert(e);
2303
0
        assert(inotify_data);
2304
2305
0
        key = (InodeData) {
2306
0
                .ino = ino,
2307
0
                .dev = dev,
2308
0
        };
2309
2310
0
        d = hashmap_get(inotify_data->inodes, &key);
2311
0
        if (d) {
2312
0
                if (ret)
2313
0
                        *ret = d;
2314
2315
0
                return 0;
2316
0
        }
2317
2318
0
        r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
2319
0
        if (r < 0)
2320
0
                return r;
2321
2322
0
        d = new(InodeData, 1);
2323
0
        if (!d)
2324
0
                return -ENOMEM;
2325
2326
0
        *d = (InodeData) {
2327
0
                .dev = dev,
2328
0
                .ino = ino,
2329
0
                .wd = -1,
2330
0
                .fd = -EBADF,
2331
0
                .inotify_data = inotify_data,
2332
0
        };
2333
2334
0
        r = hashmap_put(inotify_data->inodes, d, d);
2335
0
        if (r < 0) {
2336
0
                free(d);
2337
0
                return r;
2338
0
        }
2339
2340
0
        if (ret)
2341
0
                *ret = d;
2342
2343
0
        return 1;
2344
0
}
2345
2346
0
static uint32_t inode_data_determine_mask(InodeData *d) {
2347
0
        bool excl_unlink = true;
2348
0
        uint32_t combined = 0;
2349
2350
0
        assert(d);
2351
2352
        /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
2353
         * the IN_EXCL_UNLINK flag is ANDed instead.
2354
         *
2355
         * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
2356
         * because we cannot change the mask anymore after the event source was created once, since the kernel has no
2357
         * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
2358
         * events we don't care for client-side. */
2359
2360
0
        LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
2361
2362
0
                if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
2363
0
                        excl_unlink = false;
2364
2365
0
                combined |= s->inotify.mask;
2366
0
        }
2367
2368
0
        return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
2369
0
}
2370
2371
0
static int inode_data_realize_watch(sd_event *e, InodeData *d) {
2372
0
        uint32_t combined_mask;
2373
0
        int wd, r;
2374
2375
0
        assert(d);
2376
0
        assert(d->fd >= 0);
2377
2378
0
        combined_mask = inode_data_determine_mask(d);
2379
2380
0
        if (d->wd >= 0 && combined_mask == d->combined_mask)
2381
0
                return 0;
2382
2383
0
        r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
2384
0
        if (r < 0)
2385
0
                return r;
2386
2387
0
        wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
2388
0
        if (wd < 0)
2389
0
                return wd;
2390
2391
0
        if (d->wd < 0) {
2392
0
                r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
2393
0
                if (r < 0) {
2394
0
                        (void) inotify_rm_watch(d->inotify_data->fd, wd);
2395
0
                        return r;
2396
0
                }
2397
2398
0
                d->wd = wd;
2399
2400
0
        } else if (d->wd != wd) {
2401
2402
0
                log_debug("Weird, the watch descriptor we already knew for this inode changed?");
2403
0
                (void) inotify_rm_watch(d->fd, wd);
2404
0
                return -EINVAL;
2405
0
        }
2406
2407
0
        d->combined_mask = combined_mask;
2408
0
        return 1;
2409
0
}
2410
2411
0
static int inotify_exit_callback(sd_event_source *s, const struct inotify_event *event, void *userdata) {
2412
0
        assert(s);
2413
2414
0
        return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
2415
0
}
2416
2417
static int event_add_inotify_fd_internal(
2418
                sd_event *e,
2419
                sd_event_source **ret,
2420
                int fd,
2421
                bool donate,
2422
                uint32_t mask,
2423
                sd_event_inotify_handler_t callback,
2424
0
                void *userdata) {
2425
2426
0
        _cleanup_close_ int donated_fd = donate ? fd : -EBADF;
2427
0
        _cleanup_(source_freep) sd_event_source *s = NULL;
2428
0
        InotifyData *inotify_data = NULL;
2429
0
        InodeData *inode_data = NULL;
2430
0
        struct stat st;
2431
0
        int r;
2432
2433
0
        assert_return(e, -EINVAL);
2434
0
        assert_return(e = event_resolve(e), -ENOPKG);
2435
0
        assert_return(fd >= 0, -EBADF);
2436
0
        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2437
0
        assert_return(!event_origin_changed(e), -ECHILD);
2438
2439
0
        if (!callback)
2440
0
                callback = inotify_exit_callback;
2441
2442
        /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
2443
         * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
2444
         * the user can't use them for us. */
2445
0
        if (mask & IN_MASK_ADD)
2446
0
                return -EINVAL;
2447
2448
0
        if (fstat(fd, &st) < 0)
2449
0
                return -errno;
2450
2451
0
        s = source_new(e, !ret, SOURCE_INOTIFY);
2452
0
        if (!s)
2453
0
                return -ENOMEM;
2454
2455
0
        s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
2456
0
        s->inotify.mask = mask;
2457
0
        s->inotify.callback = callback;
2458
0
        s->userdata = userdata;
2459
2460
        /* Allocate an inotify object for this priority, and an inode object within it */
2461
0
        r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
2462
0
        if (r < 0)
2463
0
                return r;
2464
2465
0
        r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
2466
0
        if (r < 0) {
2467
0
                event_gc_inotify_data(e, inotify_data);
2468
0
                return r;
2469
0
        }
2470
2471
        /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
2472
         * the event source, until then, for which we need the original inode. */
2473
0
        if (inode_data->fd < 0) {
2474
0
                if (donated_fd >= 0)
2475
0
                        inode_data->fd = TAKE_FD(donated_fd);
2476
0
                else {
2477
0
                        inode_data->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
2478
0
                        if (inode_data->fd < 0) {
2479
0
                                r = -errno;
2480
0
                                event_gc_inode_data(e, inode_data);
2481
0
                                return r;
2482
0
                        }
2483
0
                }
2484
2485
0
                LIST_PREPEND(to_close, e->inode_data_to_close_list, inode_data);
2486
2487
0
                _cleanup_free_ char *path = NULL;
2488
0
                r = fd_get_path(inode_data->fd, &path);
2489
0
                if (r < 0 && r != -ENOSYS) { /* The path is optional, hence ignore -ENOSYS. */
2490
0
                        event_gc_inode_data(e, inode_data);
2491
0
                        return r;
2492
0
                }
2493
2494
0
                free_and_replace(inode_data->path, path);
2495
0
        }
2496
2497
        /* Link our event source to the inode data object */
2498
0
        LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
2499
0
        s->inotify.inode_data = inode_data;
2500
2501
        /* Actually realize the watch now */
2502
0
        r = inode_data_realize_watch(e, inode_data);
2503
0
        if (r < 0)
2504
0
                return r;
2505
2506
0
        if (ret)
2507
0
                *ret = s;
2508
0
        TAKE_PTR(s);
2509
2510
0
        return 0;
2511
0
}
2512
2513
_public_ int sd_event_add_inotify_fd(
2514
                sd_event *e,
2515
                sd_event_source **ret,
2516
                int fd,
2517
                uint32_t mask,
2518
                sd_event_inotify_handler_t callback,
2519
0
                void *userdata) {
2520
2521
0
        return event_add_inotify_fd_internal(e, ret, fd, /* donate= */ false, mask, callback, userdata);
2522
0
}
2523
2524
_public_ int sd_event_add_inotify(
2525
                sd_event *e,
2526
                sd_event_source **ret,
2527
                const char *path,
2528
                uint32_t mask,
2529
                sd_event_inotify_handler_t callback,
2530
0
                void *userdata) {
2531
2532
0
        sd_event_source *s = NULL; /* avoid false maybe-uninitialized warning */
2533
0
        int fd, r;
2534
2535
0
        assert_return(path, -EINVAL);
2536
2537
0
        fd = open(path, O_PATH | O_CLOEXEC |
2538
0
                        (mask & IN_ONLYDIR ? O_DIRECTORY : 0) |
2539
0
                        (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
2540
0
        if (fd < 0)
2541
0
                return -errno;
2542
2543
0
        r = event_add_inotify_fd_internal(e, &s, fd, /* donate= */ true, mask, callback, userdata);
2544
0
        if (r < 0)
2545
0
                return r;
2546
2547
0
        (void) sd_event_source_set_description(s, path);
2548
2549
0
        if (ret)
2550
0
                *ret = s;
2551
2552
0
        return r;
2553
0
}
2554
2555
143k
static sd_event_source* event_source_free(sd_event_source *s) {
2556
143k
        if (!s)
2557
0
                return NULL;
2558
2559
        /* Here's a special hack: when we are called from a
2560
         * dispatch handler we won't free the event source
2561
         * immediately, but we will detach the fd from the
2562
         * epoll. This way it is safe for the caller to unref
2563
         * the event source and immediately close the fd, but
2564
         * we still retain a valid event source object after
2565
         * the callback. */
2566
2567
143k
        if (s->dispatching)
2568
10.7k
                source_disconnect(s);
2569
132k
        else
2570
132k
                source_free(s);
2571
2572
143k
        return NULL;
2573
143k
}
2574
2575
2.54M
DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
Unexecuted instantiation: sd_event_source_ref
sd_event_source_unref
Line
Count
Source
2575
DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
2576
2.54M
2577
2.54M
_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
2578
134k
        assert_return(s, -EINVAL);
2579
134k
        assert_return(!event_origin_changed(s->event), -ECHILD);
2580
2581
134k
        return free_and_strdup(&s->description, description);
2582
134k
}
2583
2584
0
_public_ int sd_event_source_get_description(sd_event_source *s, const char **ret) {
2585
0
        assert_return(s, -EINVAL);
2586
0
        assert_return(ret, -EINVAL);
2587
2588
0
        if (!s->description)
2589
0
                return -ENXIO;
2590
2591
0
        *ret = s->description;
2592
0
        return 0;
2593
0
}
2594
2595
5.25k
_public_ sd_event* sd_event_source_get_event(sd_event_source *s) {
2596
5.25k
        assert_return(s, NULL);
2597
5.25k
        assert_return(!event_origin_changed(s->event), NULL);
2598
2599
5.25k
        return s->event;
2600
5.25k
}
2601
2602
0
_public_ int sd_event_source_get_pending(sd_event_source *s) {
2603
0
        assert_return(s, -EINVAL);
2604
0
        assert_return(s->type != SOURCE_EXIT, -EDOM);
2605
0
        assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2606
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
2607
2608
0
        return s->pending;
2609
0
}
2610
2611
0
_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
2612
0
        assert_return(s, -EINVAL);
2613
0
        assert_return(s->type == SOURCE_IO, -EDOM);
2614
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
2615
2616
0
        return s->io.fd;
2617
0
}
2618
2619
0
_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2620
0
        int saved_fd, r;
2621
2622
0
        assert_return(s, -EINVAL);
2623
0
        assert_return(fd >= 0, -EBADF);
2624
0
        assert_return(s->type == SOURCE_IO, -EDOM);
2625
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
2626
2627
0
        if (s->io.fd == fd)
2628
0
                return 0;
2629
2630
0
        saved_fd = s->io.fd;
2631
0
        s->io.fd = fd;
2632
2633
0
        assert(event_source_is_offline(s) == !s->io.registered);
2634
2635
0
        if (s->io.registered) {
2636
0
                s->io.registered = false;
2637
2638
0
                r = source_io_register(s, s->enabled, s->io.events);
2639
0
                if (r < 0) {
2640
0
                        s->io.fd = saved_fd;
2641
0
                        s->io.registered = true;
2642
0
                        return r;
2643
0
                }
2644
2645
0
                (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
2646
0
        }
2647
2648
0
        if (s->io.owned)
2649
0
                safe_close(saved_fd);
2650
2651
0
        return 0;
2652
0
}
2653
2654
0
_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2655
0
        assert_return(s, -EINVAL);
2656
0
        assert_return(s->type == SOURCE_IO, -EDOM);
2657
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
2658
2659
0
        return s->io.owned;
2660
0
}
2661
2662
0
_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2663
0
        assert_return(s, -EINVAL);
2664
0
        assert_return(s->type == SOURCE_IO, -EDOM);
2665
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
2666
2667
0
        s->io.owned = own;
2668
0
        return 0;
2669
0
}
2670
2671
0
_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t *ret) {
2672
0
        assert_return(s, -EINVAL);
2673
0
        assert_return(ret, -EINVAL);
2674
0
        assert_return(s->type == SOURCE_IO, -EDOM);
2675
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
2676
2677
0
        *ret = s->io.events;
2678
0
        return 0;
2679
0
}
2680
2681
5.59M
_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
2682
5.59M
        int r;
2683
2684
5.59M
        assert_return(s, -EINVAL);
2685
5.59M
        assert_return(s->type == SOURCE_IO, -EDOM);
2686
5.59M
        assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
2687
5.59M
        assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2688
5.59M
        assert_return(!event_origin_changed(s->event), -ECHILD);
2689
2690
        /* edge-triggered updates are never skipped, so we can reset edges */
2691
5.59M
        if (s->io.events == events && !(events & EPOLLET))
2692
4.12M
                return 0;
2693
2694
1.46M
        r = source_set_pending(s, false);
2695
1.46M
        if (r < 0)
2696
0
                return r;
2697
2698
1.46M
        if (event_source_is_online(s)) {
2699
1.46M
                r = source_io_register(s, s->enabled, events);
2700
1.46M
                if (r < 0)
2701
0
                        return r;
2702
1.46M
        }
2703
2704
1.46M
        s->io.events = events;
2705
2706
1.46M
        return 0;
2707
1.46M
}
2708
2709
0
_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t *ret) {
2710
0
        assert_return(s, -EINVAL);
2711
0
        assert_return(ret, -EINVAL);
2712
0
        assert_return(s->type == SOURCE_IO, -EDOM);
2713
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
2714
2715
0
        if (!s->pending)
2716
0
                return -ENODATA;
2717
2718
0
        *ret = s->io.revents;
2719
0
        return 0;
2720
0
}
2721
2722
0
_public_ int sd_event_source_get_signal(sd_event_source *s) {
2723
0
        assert_return(s, -EINVAL);
2724
0
        assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2725
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
2726
2727
0
        return s->signal.sig;
2728
0
}
2729
2730
0
_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *ret) {
2731
0
        assert_return(s, -EINVAL);
2732
0
        assert_return(ret, -EINVAL);
2733
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
2734
2735
0
        *ret = s->priority;
2736
0
        return 0;
2737
0
}
2738
2739
135k
_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2740
135k
        bool rm_inotify = false, rm_inode = false;
2741
135k
        InotifyData *new_inotify_data = NULL;
2742
135k
        InodeData *new_inode_data = NULL;
2743
135k
        int r;
2744
2745
135k
        assert_return(s, -EINVAL);
2746
135k
        assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2747
135k
        assert_return(!event_origin_changed(s->event), -ECHILD);
2748
2749
135k
        if (s->priority == priority)
2750
89.2k
                return 0;
2751
2752
45.9k
        if (s->type == SOURCE_INOTIFY) {
2753
0
                InodeData *old_inode_data;
2754
2755
0
                assert(s->inotify.inode_data);
2756
0
                old_inode_data = s->inotify.inode_data;
2757
2758
                /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2759
                 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2760
                 * events we allow priority changes only until the first following iteration. */
2761
0
                if (old_inode_data->fd < 0)
2762
0
                        return -EOPNOTSUPP;
2763
2764
0
                r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2765
0
                if (r < 0)
2766
0
                        return r;
2767
0
                rm_inotify = r > 0;
2768
2769
0
                r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2770
0
                if (r < 0)
2771
0
                        goto fail;
2772
0
                rm_inode = r > 0;
2773
2774
0
                if (new_inode_data->fd < 0) {
2775
                        /* Duplicate the fd for the new inode object if we don't have any yet */
2776
0
                        new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2777
0
                        if (new_inode_data->fd < 0) {
2778
0
                                r = -errno;
2779
0
                                goto fail;
2780
0
                        }
2781
2782
0
                        LIST_PREPEND(to_close, s->event->inode_data_to_close_list, new_inode_data);
2783
2784
0
                        _cleanup_free_ char *path = NULL;
2785
0
                        r = fd_get_path(new_inode_data->fd, &path);
2786
0
                        if (r < 0 && r != -ENOSYS)
2787
0
                                goto fail;
2788
2789
0
                        free_and_replace(new_inode_data->path, path);
2790
0
                }
2791
2792
                /* Move the event source to the new inode data structure */
2793
0
                LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2794
0
                LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2795
0
                s->inotify.inode_data = new_inode_data;
2796
2797
                /* Now create the new watch */
2798
0
                r = inode_data_realize_watch(s->event, new_inode_data);
2799
0
                if (r < 0) {
2800
                        /* Move it back */
2801
0
                        LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2802
0
                        LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2803
0
                        s->inotify.inode_data = old_inode_data;
2804
0
                        goto fail;
2805
0
                }
2806
2807
0
                s->priority = priority;
2808
2809
0
                event_gc_inode_data(s->event, old_inode_data);
2810
2811
45.9k
        } else if (s->type == SOURCE_SIGNAL && event_source_is_online(s)) {
2812
0
                struct signal_data *old, *d;
2813
2814
                /* Move us from the signalfd belonging to the old
2815
                 * priority to the signalfd of the new priority */
2816
2817
0
                assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2818
2819
0
                s->priority = priority;
2820
2821
0
                r = event_make_signal_data(s->event, s->signal.sig, &d);
2822
0
                if (r < 0) {
2823
0
                        s->priority = old->priority;
2824
0
                        return r;
2825
0
                }
2826
2827
0
                event_unmask_signal_data(s->event, old, s->signal.sig);
2828
0
        } else
2829
45.9k
                s->priority = priority;
2830
2831
45.9k
        event_source_pp_prioq_reshuffle(s);
2832
2833
45.9k
        if (s->type == SOURCE_EXIT)
2834
0
                prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2835
2836
45.9k
        return 0;
2837
2838
0
fail:
2839
0
        if (rm_inode)
2840
0
                event_free_inode_data(s->event, new_inode_data);
2841
2842
0
        if (rm_inotify)
2843
0
                event_free_inotify_data(s->event, new_inotify_data);
2844
2845
0
        return r;
2846
45.9k
}
2847
2848
22.6k
_public_ int sd_event_source_get_enabled(sd_event_source *s, int *ret) {
2849
        /* Quick mode: the event source doesn't exist and we only want to query boolean enablement state. */
2850
22.6k
        if (!s && !ret)
2851
19.2k
                return false;
2852
2853
3.36k
        assert_return(s, -EINVAL);
2854
3.36k
        assert_return(!event_origin_changed(s->event), -ECHILD);
2855
2856
3.36k
        if (ret)
2857
0
                *ret = s->enabled;
2858
2859
3.36k
        return s->enabled != SD_EVENT_OFF;
2860
3.36k
}
2861
2862
static int event_source_offline(
2863
                sd_event_source *s,
2864
                int enabled,
2865
598k
                bool ratelimited) {
2866
2867
598k
        bool was_offline;
2868
598k
        int r;
2869
2870
598k
        assert(s);
2871
598k
        assert(enabled == SD_EVENT_OFF || ratelimited);
2872
2873
        /* Unset the pending flag when this event source is disabled */
2874
598k
        if (s->enabled != SD_EVENT_OFF &&
2875
598k
            enabled == SD_EVENT_OFF &&
2876
598k
            !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2877
77.9k
                r = source_set_pending(s, false);
2878
77.9k
                if (r < 0)
2879
0
                        return r;
2880
77.9k
        }
2881
2882
598k
        was_offline = event_source_is_offline(s);
2883
598k
        s->enabled = enabled;
2884
598k
        s->ratelimited = ratelimited;
2885
2886
598k
        switch (s->type) {
2887
2888
41.7k
        case SOURCE_IO:
2889
41.7k
                source_io_unregister(s);
2890
41.7k
                break;
2891
2892
0
        case SOURCE_SIGNAL:
2893
0
                event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2894
0
                break;
2895
2896
0
        case SOURCE_CHILD:
2897
0
                if (!was_offline) {
2898
0
                        assert(s->event->n_online_child_sources > 0);
2899
0
                        s->event->n_online_child_sources--;
2900
0
                }
2901
2902
0
                if (EVENT_SOURCE_WATCH_PIDFD(s))
2903
0
                        source_child_pidfd_unregister(s);
2904
0
                else
2905
0
                        event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2906
0
                break;
2907
2908
10.5k
        case SOURCE_EXIT:
2909
10.5k
                prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2910
10.5k
                break;
2911
2912
0
        case SOURCE_MEMORY_PRESSURE:
2913
0
                source_memory_pressure_unregister(s);
2914
0
                break;
2915
2916
0
        case SOURCE_TIME_REALTIME:
2917
25.6k
        case SOURCE_TIME_BOOTTIME:
2918
36.2k
        case SOURCE_TIME_MONOTONIC:
2919
36.2k
        case SOURCE_TIME_REALTIME_ALARM:
2920
36.2k
        case SOURCE_TIME_BOOTTIME_ALARM:
2921
546k
        case SOURCE_DEFER:
2922
546k
        case SOURCE_POST:
2923
546k
        case SOURCE_INOTIFY:
2924
546k
                break;
2925
2926
0
        default:
2927
0
                assert_not_reached();
2928
598k
        }
2929
2930
        /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
2931
598k
        event_source_time_prioq_reshuffle(s);
2932
2933
598k
        return 1;
2934
598k
}
2935
2936
static int event_source_online(
2937
                sd_event_source *s,
2938
                int enabled,
2939
474k
                bool ratelimited) {
2940
2941
474k
        bool was_online;
2942
474k
        int r;
2943
2944
474k
        assert(s);
2945
474k
        assert(enabled != SD_EVENT_OFF || !ratelimited);
2946
2947
        /* Unset the pending flag when this event source is enabled */
2948
474k
        if (s->enabled == SD_EVENT_OFF &&
2949
463k
            enabled != SD_EVENT_OFF &&
2950
463k
            !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2951
8.05k
                r = source_set_pending(s, false);
2952
8.05k
                if (r < 0)
2953
0
                        return r;
2954
8.05k
        }
2955
2956
        /* Are we really ready for onlining? */
2957
474k
        if (enabled == SD_EVENT_OFF || ratelimited) {
2958
                /* Nope, we are not ready for onlining, then just update the precise state and exit */
2959
0
                s->enabled = enabled;
2960
0
                s->ratelimited = ratelimited;
2961
0
                return 0;
2962
0
        }
2963
2964
474k
        was_online = event_source_is_online(s);
2965
2966
474k
        switch (s->type) {
2967
10
        case SOURCE_IO:
2968
10
                r = source_io_register(s, enabled, s->io.events);
2969
10
                if (r < 0)
2970
0
                        return r;
2971
10
                break;
2972
2973
10
        case SOURCE_SIGNAL:
2974
0
                r = event_make_signal_data(s->event, s->signal.sig, NULL);
2975
0
                if (r < 0) {
2976
0
                        event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2977
0
                        return r;
2978
0
                }
2979
2980
0
                break;
2981
2982
0
        case SOURCE_CHILD:
2983
0
                if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2984
                        /* yes, we can rely on pidfd */
2985
2986
0
                        r = source_child_pidfd_register(s, enabled);
2987
0
                        if (r < 0)
2988
0
                                return r;
2989
0
                } else {
2990
                        /* something other to watch for than WEXITED */
2991
2992
0
                        r = event_make_signal_data(s->event, SIGCHLD, NULL);
2993
0
                        if (r < 0) {
2994
0
                                event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2995
0
                                return r;
2996
0
                        }
2997
0
                }
2998
2999
0
                if (!was_online)
3000
0
                        s->event->n_online_child_sources++;
3001
0
                break;
3002
3003
0
        case SOURCE_MEMORY_PRESSURE:
3004
                /* As documented in sd_event_add_memory_pressure(), we can only register the PSI fd with
3005
                 * epoll after writing the watch string. */
3006
0
                if (s->memory_pressure.write_buffer_size == 0) {
3007
0
                        r = source_memory_pressure_register(s, enabled);
3008
0
                        if (r < 0)
3009
0
                                return r;
3010
0
                }
3011
3012
0
                break;
3013
3014
0
        case SOURCE_TIME_REALTIME:
3015
8.04k
        case SOURCE_TIME_BOOTTIME:
3016
13.3k
        case SOURCE_TIME_MONOTONIC:
3017
13.3k
        case SOURCE_TIME_REALTIME_ALARM:
3018
13.3k
        case SOURCE_TIME_BOOTTIME_ALARM:
3019
13.3k
        case SOURCE_EXIT:
3020
474k
        case SOURCE_DEFER:
3021
474k
        case SOURCE_POST:
3022
474k
        case SOURCE_INOTIFY:
3023
474k
                break;
3024
3025
0
        default:
3026
0
                assert_not_reached();
3027
474k
        }
3028
3029
474k
        s->enabled = enabled;
3030
474k
        s->ratelimited = ratelimited;
3031
3032
        /* Non-failing operations below */
3033
474k
        if (s->type == SOURCE_EXIT)
3034
0
                prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
3035
3036
        /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
3037
474k
        event_source_time_prioq_reshuffle(s);
3038
3039
474k
        return 1;
3040
474k
}
3041
3042
8.20M
_public_ int sd_event_source_set_enabled(sd_event_source *s, int enabled) {
3043
8.20M
        int r;
3044
3045
8.20M
        assert_return(IN_SET(enabled, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
3046
3047
        /* Quick mode: if the source doesn't exist, SD_EVENT_OFF is a noop. */
3048
8.20M
        if (enabled == SD_EVENT_OFF && !s)
3049
945k
                return 0;
3050
3051
7.25M
        assert_return(s, -EINVAL);
3052
7.25M
        assert_return(!event_origin_changed(s->event), -ECHILD);
3053
3054
        /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */
3055
7.25M
        if (s->event->state == SD_EVENT_FINISHED)
3056
0
                return enabled == SD_EVENT_OFF ? 0 : -ESTALE;
3057
3058
7.25M
        if (s->enabled == enabled) /* No change? */
3059
6.18M
                return 0;
3060
3061
1.07M
        if (enabled == SD_EVENT_OFF)
3062
598k
                r = event_source_offline(s, enabled, s->ratelimited);
3063
474k
        else
3064
474k
                r = event_source_online(s, enabled, s->ratelimited);
3065
1.07M
        if (r < 0)
3066
0
                return r;
3067
3068
1.07M
        event_source_pp_prioq_reshuffle(s);
3069
1.07M
        return 0;
3070
1.07M
}
3071
3072
0
_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *ret) {
3073
0
        assert_return(s, -EINVAL);
3074
0
        assert_return(ret, -EINVAL);
3075
0
        assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
3076
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
3077
3078
0
        *ret = s->time.next;
3079
0
        return 0;
3080
0
}
3081
3082
1.50M
_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
3083
1.50M
        int r;
3084
3085
1.50M
        assert_return(s, -EINVAL);
3086
1.50M
        assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
3087
1.50M
        assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
3088
1.50M
        assert_return(!event_origin_changed(s->event), -ECHILD);
3089
3090
1.50M
        r = source_set_pending(s, false);
3091
1.50M
        if (r < 0)
3092
0
                return r;
3093
3094
1.50M
        s->time.next = usec;
3095
3096
1.50M
        event_source_time_prioq_reshuffle(s);
3097
1.50M
        return 0;
3098
1.50M
}
3099
3100
0
_public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
3101
0
        usec_t t;
3102
0
        int r;
3103
3104
0
        assert_return(s, -EINVAL);
3105
0
        assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
3106
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
3107
3108
0
        if (usec == USEC_INFINITY)
3109
0
                return sd_event_source_set_time(s, USEC_INFINITY);
3110
3111
0
        r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
3112
0
        if (r < 0)
3113
0
                return r;
3114
3115
0
        usec = usec_add(t, usec);
3116
0
        if (usec == USEC_INFINITY)
3117
0
                return -EOVERFLOW;
3118
3119
0
        return sd_event_source_set_time(s, usec);
3120
0
}
3121
3122
0
_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *ret) {
3123
0
        assert_return(s, -EINVAL);
3124
0
        assert_return(ret, -EINVAL);
3125
0
        assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
3126
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
3127
3128
0
        *ret = s->time.accuracy;
3129
0
        return 0;
3130
0
}
3131
3132
8.04k
_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
3133
8.04k
        int r;
3134
3135
8.04k
        assert_return(s, -EINVAL);
3136
8.04k
        assert_return(usec != UINT64_MAX, -EINVAL);
3137
8.04k
        assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
3138
8.04k
        assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
3139
8.04k
        assert_return(!event_origin_changed(s->event), -ECHILD);
3140
3141
8.04k
        r = source_set_pending(s, false);
3142
8.04k
        if (r < 0)
3143
0
                return r;
3144
3145
8.04k
        if (usec == 0)
3146
7.86k
                usec = DEFAULT_ACCURACY_USEC;
3147
3148
8.04k
        s->time.accuracy = usec;
3149
3150
8.04k
        event_source_time_prioq_reshuffle(s);
3151
8.04k
        return 0;
3152
8.04k
}
3153
3154
8.04k
_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *ret) {
3155
8.04k
        assert_return(s, -EINVAL);
3156
8.04k
        assert_return(ret, -EINVAL);
3157
8.04k
        assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
3158
8.04k
        assert_return(!event_origin_changed(s->event), -ECHILD);
3159
3160
8.04k
        *ret = event_source_type_to_clock(s->type);
3161
8.04k
        return 0;
3162
8.04k
}
3163
3164
0
_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *ret) {
3165
0
        assert_return(s, -EINVAL);
3166
0
        assert_return(ret, -EINVAL);
3167
0
        assert_return(s->type == SOURCE_CHILD, -EDOM);
3168
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
3169
3170
0
        *ret = s->child.pid;
3171
0
        return 0;
3172
0
}
3173
3174
0
_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
3175
0
        assert_return(s, -EINVAL);
3176
0
        assert_return(s->type == SOURCE_CHILD, -EDOM);
3177
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
3178
3179
0
        return s->child.pidfd;
3180
0
}
3181
3182
0
_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
3183
0
        assert_return(s, -EINVAL);
3184
0
        assert_return(s->type == SOURCE_CHILD, -EDOM);
3185
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
3186
0
        assert_return(SIGNAL_VALID(sig), -EINVAL);
3187
0
        assert(s->child.pidfd >= 0);
3188
3189
        /* If we already have seen indication the process exited refuse sending a signal early. */
3190
0
        if (s->child.exited)
3191
0
                return -ESRCH;
3192
0
        assert(!s->child.waited);
3193
3194
        /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the structure here. */
3195
0
        siginfo_t copy;
3196
0
        if (si)
3197
0
                copy = *si;
3198
3199
0
        return RET_NERRNO(pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, flags));
3200
0
}
3201
3202
0
_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
3203
0
        assert_return(s, -EINVAL);
3204
0
        assert_return(s->type == SOURCE_CHILD, -EDOM);
3205
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
3206
0
        assert(s->child.pidfd >= 0);
3207
3208
0
        return s->child.pidfd_owned;
3209
0
}
3210
3211
0
_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
3212
0
        assert_return(s, -EINVAL);
3213
0
        assert_return(s->type == SOURCE_CHILD, -EDOM);
3214
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
3215
0
        assert(s->child.pidfd >= 0);
3216
3217
0
        s->child.pidfd_owned = own;
3218
0
        return 0;
3219
0
}
3220
3221
0
_public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
3222
0
        assert_return(s, -EINVAL);
3223
0
        assert_return(s->type == SOURCE_CHILD, -EDOM);
3224
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
3225
3226
0
        return s->child.process_owned;
3227
0
}
3228
3229
0
_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
3230
0
        assert_return(s, -EINVAL);
3231
0
        assert_return(s->type == SOURCE_CHILD, -EDOM);
3232
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
3233
3234
0
        s->child.process_owned = own;
3235
0
        return 0;
3236
0
}
3237
3238
0
_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *ret) {
3239
0
        assert_return(s, -EINVAL);
3240
0
        assert_return(ret, -EINVAL);
3241
0
        assert_return(s->type == SOURCE_INOTIFY, -EDOM);
3242
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
3243
3244
0
        *ret = s->inotify.mask;
3245
0
        return 0;
3246
0
}
3247
3248
0
_public_ int sd_event_source_get_inotify_path(sd_event_source *s, const char **ret) {
3249
0
        assert_return(s, -EINVAL);
3250
0
        assert_return(ret, -EINVAL);
3251
0
        assert_return(s->type == SOURCE_INOTIFY, -EDOM);
3252
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
3253
3254
0
        if (!s->inotify.inode_data)
3255
0
                return -ESTALE; /* already disconnected. */
3256
3257
0
        if (!s->inotify.inode_data->path)
3258
0
                return -ENOSYS; /* /proc was not mounted? */
3259
3260
0
        *ret = s->inotify.inode_data->path;
3261
0
        return 0;
3262
0
}
3263
3264
10.5k
_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
3265
10.5k
        int r;
3266
3267
10.5k
        assert_return(s, -EINVAL);
3268
10.5k
        assert_return(s->type != SOURCE_EXIT, -EDOM);
3269
10.5k
        assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
3270
10.5k
        assert_return(!event_origin_changed(s->event), -ECHILD);
3271
3272
10.5k
        if (s->prepare == callback)
3273
0
                return 0;
3274
3275
10.5k
        if (callback && s->prepare) {
3276
0
                s->prepare = callback;
3277
0
                return 0;
3278
0
        }
3279
3280
10.5k
        r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
3281
10.5k
        if (r < 0)
3282
0
                return r;
3283
3284
10.5k
        s->prepare = callback;
3285
3286
10.5k
        if (callback) {
3287
10.5k
                r = prioq_put(s->event->prepare, s, &s->prepare_index);
3288
10.5k
                if (r < 0)
3289
0
                        return r;
3290
10.5k
        } else
3291
0
                prioq_remove(s->event->prepare, s, &s->prepare_index);
3292
3293
10.5k
        return 0;
3294
10.5k
}
3295
3296
0
_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
3297
0
        assert_return(s, NULL);
3298
0
        assert_return(!event_origin_changed(s->event), NULL);
3299
3300
0
        return s->userdata;
3301
0
}
3302
3303
8.04k
_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
3304
8.04k
        void *ret;
3305
3306
8.04k
        assert_return(s, NULL);
3307
8.04k
        assert_return(!event_origin_changed(s->event), NULL);
3308
3309
8.04k
        ret = s->userdata;
3310
8.04k
        s->userdata = userdata;
3311
3312
8.04k
        return ret;
3313
8.04k
}
3314
3315
0
static int event_source_enter_ratelimited(sd_event_source *s) {
3316
0
        int r;
3317
3318
0
        assert(s);
3319
3320
        /* When an event source becomes ratelimited, we place it in the CLOCK_MONOTONIC priority queue, with
3321
         * the end of the rate limit time window, much as if it was a timer event source. */
3322
3323
0
        if (s->ratelimited)
3324
0
                return 0; /* Already ratelimited, this is a NOP hence */
3325
3326
        /* Make sure we can install a CLOCK_MONOTONIC event further down. */
3327
0
        r = setup_clock_data(s->event, &s->event->monotonic, CLOCK_MONOTONIC);
3328
0
        if (r < 0)
3329
0
                return r;
3330
3331
        /* Timer event sources are already using the earliest/latest queues for the timer scheduling. Let's
3332
         * first remove them from the prioq appropriate for their own clock, so that we can use the prioq
3333
         * fields of the event source then for adding it to the CLOCK_MONOTONIC prioq instead. */
3334
0
        if (EVENT_SOURCE_IS_TIME(s->type))
3335
0
                event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
3336
3337
        /* Now, let's add the event source to the monotonic clock instead */
3338
0
        r = event_source_time_prioq_put(s, &s->event->monotonic);
3339
0
        if (r < 0)
3340
0
                goto fail;
3341
3342
        /* And let's take the event source officially offline */
3343
0
        r = event_source_offline(s, s->enabled, /* ratelimited= */ true);
3344
0
        if (r < 0) {
3345
0
                event_source_time_prioq_remove(s, &s->event->monotonic);
3346
0
                goto fail;
3347
0
        }
3348
3349
0
        event_source_pp_prioq_reshuffle(s);
3350
3351
0
        log_debug("Event source %p (%s) entered rate limit state.", s, strna(s->description));
3352
0
        return 0;
3353
3354
0
fail:
3355
        /* Reinstall time event sources in the priority queue as before. This shouldn't fail, since the queue
3356
         * space for it should already be allocated. */
3357
0
        if (EVENT_SOURCE_IS_TIME(s->type))
3358
0
                assert_se(event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type)) >= 0);
3359
3360
0
        return r;
3361
0
}
3362
3363
0
static int event_source_leave_ratelimit(sd_event_source *s, bool run_callback) {
3364
0
        int r;
3365
3366
0
        assert(s);
3367
3368
0
        if (!s->ratelimited)
3369
0
                return 0;
3370
3371
        /* Let's take the event source out of the monotonic prioq first. */
3372
0
        event_source_time_prioq_remove(s, &s->event->monotonic);
3373
3374
        /* Let's then add the event source to its native clock prioq again — if this is a timer event source */
3375
0
        if (EVENT_SOURCE_IS_TIME(s->type)) {
3376
0
                r = event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type));
3377
0
                if (r < 0)
3378
0
                        goto fail;
3379
0
        }
3380
3381
        /* Let's try to take it online again.  */
3382
0
        r = event_source_online(s, s->enabled, /* ratelimited= */ false);
3383
0
        if (r < 0) {
3384
                /* Do something roughly sensible when this failed: undo the two prioq ops above */
3385
0
                if (EVENT_SOURCE_IS_TIME(s->type))
3386
0
                        event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
3387
3388
0
                goto fail;
3389
0
        }
3390
3391
0
        event_source_pp_prioq_reshuffle(s);
3392
0
        ratelimit_reset(&s->rate_limit);
3393
3394
0
        log_debug("Event source %p (%s) left rate limit state.", s, strna(s->description));
3395
3396
0
        if (run_callback && s->ratelimit_expire_callback) {
3397
0
                s->dispatching = true;
3398
0
                r = s->ratelimit_expire_callback(s, s->userdata);
3399
0
                s->dispatching = false;
3400
3401
0
                if (r < 0) {
3402
0
                        log_debug_errno(r, "Ratelimit expiry callback of event source %s (type %s) returned error, %s: %m",
3403
0
                                        strna(s->description),
3404
0
                                        event_source_type_to_string(s->type),
3405
0
                                        s->exit_on_failure ? "exiting" : "disabling");
3406
3407
0
                        if (s->exit_on_failure)
3408
0
                                (void) sd_event_exit(s->event, r);
3409
0
                }
3410
3411
0
                if (s->n_ref == 0)
3412
0
                        source_free(s);
3413
0
                else if (r < 0)
3414
0
                        assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
3415
3416
0
                return 1;
3417
0
        }
3418
3419
0
        return 0;
3420
3421
0
fail:
3422
        /* Do something somewhat reasonable when we cannot move an event sources out of ratelimited mode:
3423
         * simply put it back in it, maybe we can then process it more successfully next iteration. */
3424
0
        assert_se(event_source_time_prioq_put(s, &s->event->monotonic) >= 0);
3425
3426
0
        return r;
3427
0
}
3428
3429
1.51M
static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
3430
1.51M
        usec_t c;
3431
1.51M
        assert(e);
3432
1.51M
        assert(a <= b);
3433
3434
1.51M
        if (a <= 0)
3435
15.1k
                return 0;
3436
1.49M
        if (a >= USEC_INFINITY)
3437
0
                return USEC_INFINITY;
3438
3439
1.49M
        if (b <= a + 1)
3440
0
                return a;
3441
3442
1.49M
        initialize_perturb(e);
3443
3444
        /*
3445
          Find a good time to wake up again between times a and b. We
3446
          have two goals here:
3447
3448
          a) We want to wake up as seldom as possible, hence prefer
3449
             later times over earlier times.
3450
3451
          b) But if we have to wake up, then let's make sure to
3452
             dispatch as much as possible on the entire system.
3453
3454
          We implement this by waking up everywhere at the same time
3455
          within any given minute if we can, synchronised via the
3456
          perturbation value determined from the boot ID. If we can't,
3457
          then we try to find the same spot in every 10s, then 1s and
3458
          then 250ms step. Otherwise, we pick the last possible time
3459
          to wake up.
3460
        */
3461
3462
1.49M
        c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
3463
1.49M
        if (c >= b) {
3464
986k
                if (_unlikely_(c < USEC_PER_MINUTE))
3465
0
                        return b;
3466
3467
986k
                c -= USEC_PER_MINUTE;
3468
986k
        }
3469
3470
1.49M
        if (c >= a)
3471
4.35k
                return c;
3472
3473
1.49M
        c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
3474
1.49M
        if (c >= b) {
3475
1.26M
                if (_unlikely_(c < USEC_PER_SEC*10))
3476
0
                        return b;
3477
3478
1.26M
                c -= USEC_PER_SEC*10;
3479
1.26M
        }
3480
3481
1.49M
        if (c >= a)
3482
29.2k
                return c;
3483
3484
1.46M
        c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
3485
1.46M
        if (c >= b) {
3486
855k
                if (_unlikely_(c < USEC_PER_SEC))
3487
0
                        return b;
3488
3489
855k
                c -= USEC_PER_SEC;
3490
855k
        }
3491
3492
1.46M
        if (c >= a)
3493
355k
                return c;
3494
3495
1.10M
        c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
3496
1.10M
        if (c >= b) {
3497
390k
                if (_unlikely_(c < USEC_PER_MSEC*250))
3498
0
                        return b;
3499
3500
390k
                c -= USEC_PER_MSEC*250;
3501
390k
        }
3502
3503
1.10M
        if (c >= a)
3504
1.10M
                return c;
3505
3506
0
        return b;
3507
1.10M
}
3508
3509
static int event_arm_timer(
3510
                sd_event *e,
3511
18.3M
                struct clock_data *d) {
3512
3513
18.3M
        struct itimerspec its = {};
3514
18.3M
        sd_event_source *a, *b;
3515
18.3M
        usec_t t;
3516
3517
18.3M
        assert(e);
3518
18.3M
        assert(d);
3519
3520
18.3M
        if (!d->needs_rearm)
3521
16.8M
                return 0;
3522
3523
1.52M
        d->needs_rearm = false;
3524
3525
1.52M
        a = prioq_peek(d->earliest);
3526
1.52M
        assert(!a || EVENT_SOURCE_USES_TIME_PRIOQ(a->type));
3527
1.52M
        if (!a || a->enabled == SD_EVENT_OFF || time_event_source_next(a) == USEC_INFINITY) {
3528
3529
8.46k
                if (d->fd < 0)
3530
0
                        return 0;
3531
3532
8.46k
                if (d->next == USEC_INFINITY)
3533
3.32k
                        return 0;
3534
3535
                /* disarm */
3536
5.14k
                if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
3537
0
                        return -errno;
3538
3539
5.14k
                d->next = USEC_INFINITY;
3540
5.14k
                return 0;
3541
5.14k
        }
3542
3543
1.51M
        b = prioq_peek(d->latest);
3544
1.51M
        assert(!b || EVENT_SOURCE_USES_TIME_PRIOQ(b->type));
3545
1.51M
        assert(b && b->enabled != SD_EVENT_OFF);
3546
3547
1.51M
        t = sleep_between(e, time_event_source_next(a), time_event_source_latest(b));
3548
1.51M
        if (d->next == t)
3549
1.49M
                return 0;
3550
3551
19.8k
        assert_se(d->fd >= 0);
3552
3553
19.8k
        if (t == 0) {
3554
                /* We don't want to disarm here, just mean some time looooong ago. */
3555
11.2k
                its.it_value.tv_sec = 0;
3556
11.2k
                its.it_value.tv_nsec = 1;
3557
11.2k
        } else
3558
8.61k
                timespec_store(&its.it_value, t);
3559
3560
19.8k
        if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
3561
0
                return -errno;
3562
3563
19.8k
        d->next = t;
3564
19.8k
        return 0;
3565
19.8k
}
3566
3567
6.91M
static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
3568
6.91M
        assert(e);
3569
6.91M
        assert(s);
3570
6.91M
        assert(s->type == SOURCE_IO);
3571
3572
        /* If the event source was already pending, we just OR in the
3573
         * new revents, otherwise we reset the value. The ORing is
3574
         * necessary to handle EPOLLONESHOT events properly where
3575
         * readability might happen independently of writability, and
3576
         * we need to keep track of both */
3577
3578
6.91M
        if (s->pending)
3579
4.54M
                s->io.revents |= revents;
3580
2.36M
        else
3581
2.36M
                s->io.revents = revents;
3582
3583
6.91M
        return source_set_pending(s, true);
3584
6.91M
}
3585
3586
4.17k
static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
3587
4.17k
        uint64_t x;
3588
4.17k
        ssize_t ss;
3589
3590
4.17k
        assert(e);
3591
4.17k
        assert(fd >= 0);
3592
3593
4.17k
        assert_return(events == EPOLLIN, -EIO);
3594
3595
4.17k
        ss = read(fd, &x, sizeof(x));
3596
4.17k
        if (ss < 0) {
3597
0
                if (ERRNO_IS_TRANSIENT(errno))
3598
0
                        return 0;
3599
3600
0
                return -errno;
3601
0
        }
3602
3603
4.17k
        if (_unlikely_(ss != sizeof(x)))
3604
0
                return -EIO;
3605
3606
4.17k
        if (next)
3607
4.17k
                *next = USEC_INFINITY;
3608
3609
4.17k
        return 0;
3610
4.17k
}
3611
3612
static int process_timer(
3613
                sd_event *e,
3614
                usec_t n,
3615
18.3M
                struct clock_data *d) {
3616
3617
18.3M
        sd_event_source *s;
3618
18.3M
        bool callback_invoked = false;
3619
18.3M
        int r;
3620
3621
18.3M
        assert(e);
3622
18.3M
        assert(d);
3623
3624
18.4M
        for (;;) {
3625
18.4M
                s = prioq_peek(d->earliest);
3626
18.4M
                assert(!s || EVENT_SOURCE_USES_TIME_PRIOQ(s->type));
3627
3628
18.4M
                if (!s || time_event_source_next(s) > n)
3629
16.3M
                        break;
3630
3631
2.07M
                if (s->ratelimited) {
3632
                        /* This is an event sources whose ratelimit window has ended. Let's turn it on
3633
                         * again. */
3634
0
                        assert(s->ratelimited);
3635
3636
0
                        r = event_source_leave_ratelimit(s, /* run_callback= */ true);
3637
0
                        if (r < 0)
3638
0
                                return r;
3639
0
                        else if (r == 1)
3640
0
                                callback_invoked = true;
3641
3642
0
                        continue;
3643
0
                }
3644
3645
2.07M
                if (s->enabled == SD_EVENT_OFF || s->pending)
3646
2.05M
                        break;
3647
3648
15.1k
                r = source_set_pending(s, true);
3649
15.1k
                if (r < 0)
3650
0
                        return r;
3651
3652
15.1k
                event_source_time_prioq_reshuffle(s);
3653
15.1k
        }
3654
3655
18.3M
        return callback_invoked;
3656
18.3M
}
3657
3658
3.67M
static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priority) {
3659
3.67M
        int64_t min_priority = threshold;
3660
3.67M
        bool something_new = false;
3661
3.67M
        sd_event_source *s;
3662
3.67M
        int r;
3663
3664
3.67M
        assert(e);
3665
3.67M
        assert(ret_min_priority);
3666
3667
3.67M
        if (!e->need_process_child) {
3668
3.67M
                *ret_min_priority = min_priority;
3669
3.67M
                return 0;
3670
3.67M
        }
3671
3672
0
        e->need_process_child = false;
3673
3674
        /* So, this is ugly. We iteratively invoke waitid() + WNOHANG with each child process we shall wait for,
3675
         * instead of using P_ALL. This is because we only want to get child information of very specific
3676
         * child processes, and not all of them. We might not have processed the SIGCHLD event
3677
         * of a previous invocation and we don't want to maintain a unbounded *per-child* event queue,
3678
         * hence we really don't want anything flushed out of the kernel's queue that we don't care
3679
         * about. Since this is O(n) this means that if you have a lot of processes you probably want
3680
         * to handle SIGCHLD yourself.
3681
         *
3682
         * We do not reap the children here (by using WNOWAIT), this is only done after the event
3683
         * source is dispatched so that the callback still sees the process as a zombie. */
3684
3685
0
        HASHMAP_FOREACH(s, e->child_sources) {
3686
0
                assert(s->type == SOURCE_CHILD);
3687
0
                assert(s->child.pidfd >= 0);
3688
3689
0
                if (s->priority > threshold)
3690
0
                        continue;
3691
3692
0
                if (s->pending)
3693
0
                        continue;
3694
3695
0
                if (event_source_is_offline(s))
3696
0
                        continue;
3697
3698
0
                if (s->child.exited)
3699
0
                        continue;
3700
3701
0
                if (EVENT_SOURCE_WATCH_PIDFD(s))
3702
                        /* There's a usable pidfd known for this event source? Then don't waitid() for
3703
                         * it here */
3704
0
                        continue;
3705
3706
0
                zero(s->child.siginfo);
3707
0
                if (waitid(P_PIDFD, s->child.pidfd, &s->child.siginfo,
3708
0
                           WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | (s->child.options & ~WNOWAIT)) < 0)
3709
0
                        return negative_errno();
3710
3711
0
                if (s->child.siginfo.si_pid != 0) {
3712
0
                        bool zombie = SIGINFO_CODE_IS_DEAD(s->child.siginfo.si_code);
3713
3714
0
                        if (zombie)
3715
0
                                s->child.exited = true;
3716
0
                        else if (s->child.options & WEXITED) {
3717
                                /* If the child isn't dead then let's immediately remove the state change
3718
                                 * from the queue, since there's no benefit in leaving it queued. */
3719
3720
0
                                assert(s->child.options & (WSTOPPED|WCONTINUED));
3721
0
                                (void) waitid(P_PIDFD, s->child.pidfd, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
3722
0
                        }
3723
3724
0
                        r = source_set_pending(s, true);
3725
0
                        if (r < 0)
3726
0
                                return r;
3727
0
                        if (r > 0) {
3728
0
                                something_new = true;
3729
0
                                min_priority = MIN(min_priority, s->priority);
3730
0
                        }
3731
0
                }
3732
0
        }
3733
3734
0
        *ret_min_priority = min_priority;
3735
0
        return something_new;
3736
0
}
3737
3738
0
static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
3739
0
        assert(e);
3740
0
        assert(s);
3741
0
        assert(s->type == SOURCE_CHILD);
3742
0
        assert(s->child.pidfd >= 0);
3743
3744
0
        if (s->pending)
3745
0
                return 0;
3746
3747
0
        if (event_source_is_offline(s))
3748
0
                return 0;
3749
3750
0
        if (!EVENT_SOURCE_WATCH_PIDFD(s))
3751
0
                return 0;
3752
3753
        /* Note that pidfd would also generate EPOLLHUP when the process gets reaped. But at this point we
3754
         * only permit EPOLLIN, under the assumption that upon EPOLLHUP the child source should already
3755
         * be set to pending, and we would have returned early above. */
3756
3757
0
        zero(s->child.siginfo);
3758
0
        if (waitid(P_PIDFD, s->child.pidfd, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
3759
0
                return -errno;
3760
3761
0
        if (s->child.siginfo.si_pid == 0)
3762
0
                return 0;
3763
3764
0
        if (SIGINFO_CODE_IS_DEAD(s->child.siginfo.si_code))
3765
0
                s->child.exited = true;
3766
3767
0
        return source_set_pending(s, true);
3768
0
}
3769
3770
0
static int process_signal(sd_event *e, struct signal_data *d, uint32_t events, int64_t *min_priority) {
3771
0
        int r;
3772
3773
0
        assert(e);
3774
0
        assert(d);
3775
0
        assert_return(events == EPOLLIN, -EIO);
3776
0
        assert(min_priority);
3777
3778
        /* If there's a signal queued on this priority and SIGCHLD is on this priority too, then make
3779
         * sure to recheck the children we watch. This is because we only ever dequeue the first signal
3780
         * per priority, and if we dequeue one, and SIGCHLD might be enqueued later we wouldn't know,
3781
         * but we might have higher priority children we care about hence we need to check that
3782
         * explicitly. */
3783
3784
0
        if (sigismember(&d->sigset, SIGCHLD))
3785
0
                e->need_process_child = true;
3786
3787
        /* If there's already an event source pending for this priority we don't read another */
3788
0
        if (d->current)
3789
0
                return 0;
3790
3791
0
        for (;;) {
3792
0
                struct signalfd_siginfo si;
3793
0
                ssize_t n;
3794
0
                sd_event_source *s = NULL;
3795
3796
0
                n = read(d->fd, &si, sizeof(si));
3797
0
                if (n < 0) {
3798
0
                        if (ERRNO_IS_TRANSIENT(errno))
3799
0
                                return 0;
3800
3801
0
                        return -errno;
3802
0
                }
3803
3804
0
                if (_unlikely_(n != sizeof(si)))
3805
0
                        return -EIO;
3806
3807
0
                if (_unlikely_(!SIGNAL_VALID(si.ssi_signo)))
3808
0
                        return -EIO;
3809
3810
0
                if (e->signal_sources)
3811
0
                        s = e->signal_sources[si.ssi_signo];
3812
0
                if (!s)
3813
0
                        continue;
3814
0
                if (s->pending)
3815
0
                        continue;
3816
3817
0
                s->signal.siginfo = si;
3818
0
                d->current = s;
3819
3820
0
                r = source_set_pending(s, true);
3821
0
                if (r < 0)
3822
0
                        return r;
3823
0
                if (r > 0 && *min_priority >= s->priority) {
3824
0
                        *min_priority = s->priority;
3825
0
                        return 1; /* an event source with smaller priority is queued. */
3826
0
                }
3827
3828
0
                return 0;
3829
0
        }
3830
0
}
3831
3832
0
static int event_inotify_data_read(sd_event *e, InotifyData *d, uint32_t revents, int64_t threshold) {
3833
0
        ssize_t n;
3834
3835
0
        assert(e);
3836
0
        assert(d);
3837
3838
0
        assert_return(revents == EPOLLIN, -EIO);
3839
3840
        /* If there's already an event source pending for this priority, don't read another */
3841
0
        if (d->n_pending > 0)
3842
0
                return 0;
3843
3844
        /* Is the read buffer non-empty? If so, let's not read more */
3845
0
        if (d->buffer_filled > 0)
3846
0
                return 0;
3847
3848
0
        if (d->priority > threshold)
3849
0
                return 0;
3850
3851
0
        n = read(d->fd, &d->buffer, sizeof(d->buffer));
3852
0
        if (n < 0) {
3853
0
                if (ERRNO_IS_TRANSIENT(errno))
3854
0
                        return 0;
3855
3856
0
                return -errno;
3857
0
        }
3858
3859
0
        assert(n > 0);
3860
0
        d->buffer_filled = (size_t) n;
3861
0
        LIST_PREPEND(buffered, e->buffered_inotify_data_list, d);
3862
3863
0
        return 1;
3864
0
}
3865
3866
0
static void event_inotify_data_drop(sd_event *e, InotifyData *d, size_t sz) {
3867
0
        assert(e);
3868
0
        assert(d);
3869
0
        assert(sz <= d->buffer_filled);
3870
3871
0
        if (sz == 0)
3872
0
                return;
3873
3874
        /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3875
0
        memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3876
0
        d->buffer_filled -= sz;
3877
3878
0
        if (d->buffer_filled == 0)
3879
0
                LIST_REMOVE(buffered, e->buffered_inotify_data_list, d);
3880
0
}
3881
3882
0
static int event_inotify_data_process(sd_event *e, InotifyData *d) {
3883
0
        int r;
3884
3885
0
        assert(e);
3886
0
        assert(d);
3887
3888
        /* If there's already an event source pending for this priority, don't read another */
3889
0
        if (d->n_pending > 0)
3890
0
                return 0;
3891
3892
0
        while (d->buffer_filled > 0) {
3893
0
                size_t sz;
3894
3895
                /* Let's validate that the event structures are complete */
3896
0
                if (d->buffer_filled < offsetof(struct inotify_event, name))
3897
0
                        return -EIO;
3898
3899
0
                sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3900
0
                if (d->buffer_filled < sz)
3901
0
                        return -EIO;
3902
3903
0
                if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3904
0
                        InodeData *inode_data;
3905
3906
                        /* The queue overran, let's pass this event to all event sources connected to this inotify
3907
                         * object */
3908
3909
0
                        HASHMAP_FOREACH(inode_data, d->inodes)
3910
0
                                LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3911
3912
0
                                        if (event_source_is_offline(s))
3913
0
                                                continue;
3914
3915
0
                                        r = source_set_pending(s, true);
3916
0
                                        if (r < 0)
3917
0
                                                return r;
3918
0
                                }
3919
0
                } else {
3920
0
                        InodeData *inode_data;
3921
3922
                        /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3923
                         * our watch descriptor table. */
3924
0
                        if (d->buffer.ev.mask & IN_IGNORED) {
3925
3926
0
                                inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3927
0
                                if (!inode_data) {
3928
0
                                        event_inotify_data_drop(e, d, sz);
3929
0
                                        continue;
3930
0
                                }
3931
3932
                                /* The watch descriptor was removed by the kernel, let's drop it here too */
3933
0
                                inode_data->wd = -1;
3934
0
                        } else {
3935
0
                                inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3936
0
                                if (!inode_data) {
3937
0
                                        event_inotify_data_drop(e, d, sz);
3938
0
                                        continue;
3939
0
                                }
3940
0
                        }
3941
3942
                        /* Trigger all event sources that are interested in these events. Also trigger all event
3943
                         * sources if IN_IGNORED or IN_UNMOUNT is set. */
3944
0
                        LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3945
3946
0
                                if (event_source_is_offline(s))
3947
0
                                        continue;
3948
3949
0
                                if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3950
0
                                    (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3951
0
                                        continue;
3952
3953
0
                                r = source_set_pending(s, true);
3954
0
                                if (r < 0)
3955
0
                                        return r;
3956
0
                        }
3957
0
                }
3958
3959
                /* Something pending now? If so, let's finish. */
3960
0
                if (d->n_pending > 0)
3961
0
                        return 1;
3962
3963
                /* otherwise, drop the event and let's read more */
3964
0
                event_inotify_data_drop(e, d, sz);
3965
0
        }
3966
3967
0
        return 0;
3968
0
}
3969
3970
3.67M
static int process_inotify(sd_event *e) {
3971
3.67M
        int r, done = 0;
3972
3973
3.67M
        assert(e);
3974
3975
3.67M
        LIST_FOREACH(buffered, d, e->buffered_inotify_data_list) {
3976
0
                r = event_inotify_data_process(e, d);
3977
0
                if (r < 0)
3978
0
                        return r;
3979
0
                if (r > 0)
3980
0
                        done++;
3981
0
        }
3982
3983
3.67M
        return done;
3984
3.67M
}
3985
3986
0
static int process_memory_pressure(sd_event_source *s, uint32_t revents) {
3987
0
        assert(s);
3988
0
        assert(s->type == SOURCE_MEMORY_PRESSURE);
3989
3990
0
        if (s->pending)
3991
0
                s->memory_pressure.revents |= revents;
3992
0
        else
3993
0
                s->memory_pressure.revents = revents;
3994
3995
0
        return source_set_pending(s, true);
3996
0
}
3997
3998
0
static int source_memory_pressure_write(sd_event_source *s) {
3999
0
        ssize_t n;
4000
0
        int r;
4001
4002
0
        assert(s);
4003
0
        assert(s->type == SOURCE_MEMORY_PRESSURE);
4004
4005
        /* once we start writing, the buffer is locked, we allow no further changes. */
4006
0
        s->memory_pressure.locked = true;
4007
4008
0
        if (s->memory_pressure.write_buffer_size > 0) {
4009
0
                n = write(s->memory_pressure.fd, s->memory_pressure.write_buffer, s->memory_pressure.write_buffer_size);
4010
0
                if (n < 0) {
4011
0
                        if (!ERRNO_IS_TRANSIENT(errno)) {
4012
                                /* If kernel is built with CONFIG_PSI_DEFAULT_DISABLED it will expose PSI
4013
                                 * files, but then generates EOPNOSUPP on read() and write() (instead of on
4014
                                 * open()!). This sucks hard, since we can only detect this kind of failure
4015
                                 * so late. Let's make the best of it, and turn off the event source like we
4016
                                 * do for failed event source handlers. */
4017
4018
0
                                log_debug_errno(errno, "Writing memory pressure settings to kernel failed, disabling memory pressure event source: %m");
4019
0
                                assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
4020
0
                                return 0;
4021
0
                        }
4022
4023
0
                        n = 0;
4024
0
                }
4025
0
        } else
4026
0
                n = 0;
4027
4028
0
        assert(n >= 0);
4029
4030
0
        if ((size_t) n == s->memory_pressure.write_buffer_size) {
4031
0
                s->memory_pressure.write_buffer = mfree(s->memory_pressure.write_buffer);
4032
4033
0
                if (n > 0) {
4034
0
                        s->memory_pressure.write_buffer_size = 0;
4035
4036
                        /* Update epoll events mask, since we have now written everything and don't care for EPOLLOUT anymore */
4037
0
                        r = source_memory_pressure_register(s, s->enabled);
4038
0
                        if (r < 0)
4039
0
                                return r;
4040
0
                }
4041
0
        } else if (n > 0) {
4042
0
                _cleanup_free_ void *c = NULL;
4043
4044
0
                assert((size_t) n < s->memory_pressure.write_buffer_size);
4045
4046
0
                c = memdup((uint8_t*) s->memory_pressure.write_buffer + n, s->memory_pressure.write_buffer_size - n);
4047
0
                if (!c)
4048
0
                        return -ENOMEM;
4049
4050
0
                free_and_replace(s->memory_pressure.write_buffer, c);
4051
0
                s->memory_pressure.write_buffer_size -= n;
4052
0
                return 1;
4053
0
        }
4054
4055
0
        return 0;
4056
0
}
4057
4058
0
static int source_memory_pressure_initiate_dispatch(sd_event_source *s) {
4059
0
        int r;
4060
4061
0
        assert(s);
4062
0
        assert(s->type == SOURCE_MEMORY_PRESSURE);
4063
4064
0
        r = source_memory_pressure_write(s);
4065
0
        if (r < 0)
4066
0
                return r;
4067
0
        if (r > 0)
4068
0
                return 1; /* if we wrote something, then don't continue with dispatching user dispatch
4069
                           * function. Instead, shortcut it so that we wait for next EPOLLOUT immediately. */
4070
4071
        /* No pending incoming IO? Then let's not continue further */
4072
0
        if ((s->memory_pressure.revents & (EPOLLIN|EPOLLPRI)) == 0) {
4073
4074
                /* Treat IO errors on the notifier the same ways errors returned from a callback */
4075
0
                if ((s->memory_pressure.revents & (EPOLLHUP|EPOLLERR|EPOLLRDHUP)) != 0)
4076
0
                        return -EIO;
4077
4078
0
                return 1; /* leave dispatch, we already processed everything */
4079
0
        }
4080
4081
0
        if (s->memory_pressure.revents & EPOLLIN) {
4082
0
                uint8_t pipe_buf[PIPE_BUF];
4083
0
                ssize_t n;
4084
4085
                /* If the fd is readable, then flush out anything that might be queued */
4086
4087
0
                n = read(s->memory_pressure.fd, pipe_buf, sizeof(pipe_buf));
4088
0
                if (n < 0 && !ERRNO_IS_TRANSIENT(errno))
4089
0
                        return -errno;
4090
0
        }
4091
4092
0
        return 0; /* go on, dispatch to user callback */
4093
0
}
4094
4095
7.36M
static int maybe_mark_post_sources_pending(EventSourceType t, sd_event *e) {
4096
7.36M
        sd_event_source *z;
4097
7.36M
        int r;
4098
4099
7.36M
        if (t == SOURCE_POST)
4100
0
                return 0;
4101
4102
7.36M
        SET_FOREACH(z, e->post_sources) {
4103
0
                if (event_source_is_offline(z))
4104
0
                        continue;
4105
4106
0
                r = source_set_pending(z, true);
4107
0
                if (r < 0)
4108
0
                        return r;
4109
0
        }
4110
4111
7.36M
        return 0;
4112
7.36M
}
4113
4114
3.68M
static int source_dispatch(sd_event_source *s) {
4115
3.68M
        EventSourceType saved_type;
4116
3.68M
        sd_event *saved_event;
4117
3.68M
        int r = 0;
4118
4119
3.68M
        assert(s);
4120
3.68M
        assert(s->pending || s->type == SOURCE_EXIT);
4121
4122
        /* Save the event source type, here, so that we still know it after the event callback which might
4123
         * invalidate the event. */
4124
3.68M
        saved_type = s->type;
4125
4126
        /* Similarly, store a reference to the event loop object, so that we can still access it after the
4127
         * callback might have invalidated/disconnected the event source. */
4128
3.68M
        saved_event = s->event;
4129
3.68M
        PROTECT_EVENT(saved_event);
4130
4131
        /* Check if we hit the ratelimit for this event source, and if so, let's disable it. */
4132
3.68M
        assert(!s->ratelimited);
4133
3.68M
        if (!ratelimit_below(&s->rate_limit)) {
4134
0
                r = event_source_enter_ratelimited(s);
4135
0
                if (r < 0)
4136
0
                        return r;
4137
4138
0
                return 1;
4139
0
        }
4140
4141
3.68M
        if (IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
4142
                /* Make sure this event source is moved to the end of the priority list now. We do this here
4143
                 * because defer and exit event sources are always pending from the moment they're added so
4144
                 * the same logic in source_set_pending() is never triggered. */
4145
1.57M
                s->pending_iteration = s->event->iteration;
4146
1.57M
                event_source_pp_prioq_reshuffle(s);
4147
2.10M
        } else {
4148
2.10M
                r = source_set_pending(s, false);
4149
2.10M
                if (r < 0)
4150
0
                        return r;
4151
2.10M
        }
4152
4153
        /* If we execute a non-post source, let's mark all post sources as pending. */
4154
3.68M
        r = maybe_mark_post_sources_pending(s->type, s->event);
4155
3.68M
        if (r < 0)
4156
0
                return r;
4157
4158
3.68M
        if (s->type == SOURCE_MEMORY_PRESSURE) {
4159
0
                r = source_memory_pressure_initiate_dispatch(s);
4160
0
                if (r == -EIO) /* handle EIO errors similar to callback errors */
4161
0
                        goto finish;
4162
0
                if (r < 0)
4163
0
                        return r;
4164
0
                if (r > 0) /* already handled */
4165
0
                        return 1;
4166
0
        }
4167
4168
3.68M
        if (s->enabled == SD_EVENT_ONESHOT) {
4169
17.6k
                r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
4170
17.6k
                if (r < 0)
4171
0
                        return r;
4172
17.6k
        }
4173
4174
3.68M
        s->dispatching = true;
4175
4176
3.68M
        switch (s->type) {
4177
4178
2.10M
        case SOURCE_IO:
4179
2.10M
                r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
4180
2.10M
                break;
4181
4182
0
        case SOURCE_TIME_REALTIME:
4183
0
        case SOURCE_TIME_BOOTTIME:
4184
0
        case SOURCE_TIME_MONOTONIC:
4185
0
        case SOURCE_TIME_REALTIME_ALARM:
4186
0
        case SOURCE_TIME_BOOTTIME_ALARM:
4187
0
                r = s->time.callback(s, s->time.next, s->userdata);
4188
0
                break;
4189
4190
0
        case SOURCE_SIGNAL:
4191
0
                r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
4192
0
                break;
4193
4194
0
        case SOURCE_CHILD: {
4195
0
                bool zombie = SIGINFO_CODE_IS_DEAD(s->child.siginfo.si_code);
4196
4197
0
                r = s->child.callback(s, &s->child.siginfo, s->userdata);
4198
4199
                /* Now, reap the PID for good (unless WNOWAIT was specified by the caller). */
4200
0
                if (zombie) {
4201
0
                        (void) waitid(P_PIDFD, s->child.pidfd, &s->child.siginfo, WNOHANG|WEXITED|(s->child.options & WNOWAIT));
4202
0
                        if (!FLAGS_SET(s->child.options, WNOWAIT))
4203
0
                                s->child.waited = true;
4204
0
                }
4205
4206
0
                break;
4207
0
        }
4208
4209
1.57M
        case SOURCE_DEFER:
4210
1.57M
                r = s->defer.callback(s, s->userdata);
4211
1.57M
                break;
4212
4213
0
        case SOURCE_POST:
4214
0
                r = s->post.callback(s, s->userdata);
4215
0
                break;
4216
4217
1.91k
        case SOURCE_EXIT:
4218
1.91k
                r = s->exit.callback(s, s->userdata);
4219
1.91k
                break;
4220
4221
0
        case SOURCE_INOTIFY: {
4222
0
                struct sd_event *e = s->event;
4223
0
                InotifyData *d;
4224
0
                size_t sz;
4225
4226
0
                assert(s->inotify.inode_data);
4227
0
                assert_se(d = s->inotify.inode_data->inotify_data);
4228
4229
0
                assert(d->buffer_filled >= offsetof(struct inotify_event, name));
4230
0
                sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
4231
0
                assert(d->buffer_filled >= sz);
4232
4233
                /* If the inotify callback destroys the event source then this likely means we don't need to
4234
                 * watch the inode anymore, and thus also won't need the inotify object anymore. But if we'd
4235
                 * free it immediately, then we couldn't drop the event from the inotify event queue without
4236
                 * memory corruption anymore, as below. Hence, let's not free it immediately, but mark it
4237
                 * "busy" with a counter (which will ensure it's not GC'ed away prematurely). Let's then
4238
                 * explicitly GC it after we are done dropping the inotify event from the buffer. */
4239
0
                d->n_busy++;
4240
0
                r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
4241
0
                d->n_busy--;
4242
4243
                /* When no event is pending anymore on this inotify object, then let's drop the event from
4244
                 * the inotify event queue buffer. */
4245
0
                if (d->n_pending == 0)
4246
0
                        event_inotify_data_drop(e, d, sz);
4247
4248
                /* Now we don't want to access 'd' anymore, it's OK to GC now. */
4249
0
                event_gc_inotify_data(e, d);
4250
0
                break;
4251
0
        }
4252
4253
0
        case SOURCE_MEMORY_PRESSURE:
4254
0
                r = s->memory_pressure.callback(s, s->userdata);
4255
0
                break;
4256
4257
0
        case SOURCE_WATCHDOG:
4258
0
        case _SOURCE_EVENT_SOURCE_TYPE_MAX:
4259
0
        case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
4260
0
                assert_not_reached();
4261
3.68M
        }
4262
4263
3.68M
        s->dispatching = false;
4264
4265
3.68M
finish:
4266
3.68M
        if (r < 0) {
4267
0
                log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m",
4268
0
                                strna(s->description),
4269
0
                                event_source_type_to_string(saved_type),
4270
0
                                s->exit_on_failure ? "exiting" : "disabling");
4271
4272
0
                if (s->exit_on_failure)
4273
0
                        (void) sd_event_exit(saved_event, r);
4274
0
        }
4275
4276
3.68M
        if (s->n_ref == 0)
4277
10.7k
                source_free(s);
4278
3.67M
        else if (r < 0)
4279
0
                assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
4280
4281
        /* More post sources might have been added while executing the callback, let's make sure
4282
         * those are marked pending as well. */
4283
3.68M
        r = maybe_mark_post_sources_pending(saved_type, saved_event);
4284
3.68M
        if (r < 0)
4285
0
                return r;
4286
4287
3.68M
        return 1;
4288
3.68M
}
4289
4290
3.67M
static int event_prepare(sd_event *e) {
4291
3.67M
        int r;
4292
4293
3.67M
        assert(e);
4294
4295
8.71M
        for (;;) {
4296
8.71M
                sd_event_source *s;
4297
4298
8.71M
                s = prioq_peek(e->prepare);
4299
8.71M
                if (!s || s->prepare_iteration == e->iteration || event_source_is_offline(s))
4300
3.67M
                        break;
4301
4302
5.03M
                s->prepare_iteration = e->iteration;
4303
5.03M
                prioq_reshuffle(e->prepare, s, &s->prepare_index);
4304
4305
5.03M
                assert(s->prepare);
4306
5.03M
                s->dispatching = true;
4307
5.03M
                r = s->prepare(s, s->userdata);
4308
5.03M
                s->dispatching = false;
4309
4310
5.03M
                if (r < 0) {
4311
0
                        log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, %s: %m",
4312
0
                                        strna(s->description),
4313
0
                                        event_source_type_to_string(s->type),
4314
0
                                        s->exit_on_failure ? "exiting" : "disabling");
4315
4316
0
                        if (s->exit_on_failure)
4317
0
                                (void) sd_event_exit(e, r);
4318
0
                }
4319
4320
5.03M
                if (s->n_ref == 0)
4321
0
                        source_free(s);
4322
5.03M
                else if (r < 0)
4323
0
                        assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
4324
5.03M
        }
4325
4326
3.67M
        return 0;
4327
3.67M
}
4328
4329
7.17k
static int dispatch_exit(sd_event *e) {
4330
7.17k
        sd_event_source *p;
4331
7.17k
        int r;
4332
4333
7.17k
        assert(e);
4334
4335
7.17k
        p = prioq_peek(e->exit);
4336
7.17k
        assert(!p || p->type == SOURCE_EXIT);
4337
4338
7.17k
        if (!p || event_source_is_offline(p)) {
4339
5.25k
                e->state = SD_EVENT_FINISHED;
4340
5.25k
                return 0;
4341
5.25k
        }
4342
4343
7.17k
        PROTECT_EVENT(e);
4344
1.91k
        e->iteration++;
4345
1.91k
        e->state = SD_EVENT_EXITING;
4346
1.91k
        r = source_dispatch(p);
4347
1.91k
        e->state = SD_EVENT_INITIAL;
4348
1.91k
        return r;
4349
7.17k
}
4350
4351
11.0M
static sd_event_source* event_next_pending(sd_event *e) {
4352
11.0M
        sd_event_source *p;
4353
4354
11.0M
        assert(e);
4355
4356
11.0M
        p = prioq_peek(e->pending);
4357
11.0M
        if (!p)
4358
22.3k
                return NULL;
4359
4360
11.0M
        if (event_source_is_offline(p))
4361
0
                return NULL;
4362
4363
11.0M
        return p;
4364
11.0M
}
4365
4366
0
static int arm_watchdog(sd_event *e) {
4367
0
        struct itimerspec its = {};
4368
0
        usec_t t;
4369
4370
0
        assert(e);
4371
0
        assert(e->watchdog_fd >= 0);
4372
4373
0
        t = sleep_between(e,
4374
0
                          usec_add(e->watchdog_last, (e->watchdog_period / 2)),
4375
0
                          usec_add(e->watchdog_last, (e->watchdog_period * 3 / 4)));
4376
4377
0
        timespec_store(&its.it_value, t);
4378
4379
        /* Make sure we never set the watchdog to 0, which tells the
4380
         * kernel to disable it. */
4381
0
        if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
4382
0
                its.it_value.tv_nsec = 1;
4383
4384
0
        return RET_NERRNO(timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL));
4385
0
}
4386
4387
3.67M
static int process_watchdog(sd_event *e) {
4388
3.67M
        assert(e);
4389
4390
3.67M
        if (!e->watchdog)
4391
3.67M
                return 0;
4392
4393
        /* Don't notify watchdog too often */
4394
0
        if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
4395
0
                return 0;
4396
4397
0
        sd_notify(false, "WATCHDOG=1");
4398
0
        e->watchdog_last = e->timestamp.monotonic;
4399
4400
0
        return arm_watchdog(e);
4401
0
}
4402
4403
3.67M
static void event_close_inode_data_fds(sd_event *e) {
4404
3.67M
        InodeData *d;
4405
4406
3.67M
        assert(e);
4407
4408
        /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
4409
         * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
4410
         * adjustments to the event source, such as changing the priority (which requires us to remove and re-add a watch
4411
         * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
4412
         * compromise. */
4413
4414
3.67M
        while ((d = e->inode_data_to_close_list)) {
4415
0
                assert(d->fd >= 0);
4416
0
                d->fd = safe_close(d->fd);
4417
4418
0
                LIST_REMOVE(to_close, e->inode_data_to_close_list, d);
4419
0
        }
4420
3.67M
}
4421
4422
3.67M
static int event_memory_pressure_write_list(sd_event *e) {
4423
3.67M
        int r;
4424
4425
3.67M
        assert(e);
4426
4427
3.67M
        for (;;) {
4428
3.67M
                sd_event_source *s;
4429
4430
3.67M
                s = LIST_POP(memory_pressure.write_list, e->memory_pressure_write_list);
4431
3.67M
                if (!s)
4432
3.67M
                        break;
4433
4434
3.67M
                assert(s->type == SOURCE_MEMORY_PRESSURE);
4435
0
                assert(s->memory_pressure.write_buffer_size > 0);
4436
0
                s->memory_pressure.in_write_list = false;
4437
4438
0
                r = source_memory_pressure_write(s);
4439
0
                if (r < 0)
4440
0
                        return r;
4441
0
        }
4442
4443
3.67M
        return 0;
4444
3.67M
}
4445
4446
0
static bool event_loop_idle(sd_event *e) {
4447
0
        assert(e);
4448
4449
0
        LIST_FOREACH(sources, s, e->sources) {
4450
                /* Exit sources only trigger on exit, so whether they're enabled or not doesn't matter when
4451
                 * we're deciding if the event loop is idle or not. */
4452
0
                if (s->type == SOURCE_EXIT)
4453
0
                        continue;
4454
4455
0
                if (s->enabled == SD_EVENT_OFF)
4456
0
                        continue;
4457
4458
                /* Post event sources always need another active event source to become pending. */
4459
0
                if (s->type == SOURCE_POST && !s->pending)
4460
0
                        continue;
4461
4462
0
                return false;
4463
0
        }
4464
4465
0
        return true;
4466
0
}
4467
4468
3.68M
_public_ int sd_event_prepare(sd_event *e) {
4469
3.68M
        int r;
4470
4471
3.68M
        assert_return(e, -EINVAL);
4472
3.68M
        assert_return(e = event_resolve(e), -ENOPKG);
4473
3.68M
        assert_return(!event_origin_changed(e), -ECHILD);
4474
3.68M
        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4475
3.68M
        assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
4476
4477
        /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
4478
         * this check here once, since gettid() is typically not cached, and thus want to minimize
4479
         * syscalls */
4480
3.68M
        assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
4481
4482
        /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
4483
3.68M
        PROTECT_EVENT(e);
4484
4485
3.68M
        if (!e->exit_requested && e->exit_on_idle && event_loop_idle(e))
4486
0
                (void) sd_event_exit(e, 0);
4487
4488
3.68M
        if (e->exit_requested)
4489
7.17k
                goto pending;
4490
4491
3.67M
        e->iteration++;
4492
4493
3.67M
        e->state = SD_EVENT_PREPARING;
4494
3.67M
        r = event_prepare(e);
4495
3.67M
        e->state = SD_EVENT_INITIAL;
4496
3.67M
        if (r < 0)
4497
0
                return r;
4498
4499
3.67M
        r = event_memory_pressure_write_list(e);
4500
3.67M
        if (r < 0)
4501
0
                return r;
4502
4503
3.67M
        r = event_arm_timer(e, &e->realtime);
4504
3.67M
        if (r < 0)
4505
0
                return r;
4506
4507
3.67M
        r = event_arm_timer(e, &e->boottime);
4508
3.67M
        if (r < 0)
4509
0
                return r;
4510
4511
3.67M
        r = event_arm_timer(e, &e->monotonic);
4512
3.67M
        if (r < 0)
4513
0
                return r;
4514
4515
3.67M
        r = event_arm_timer(e, &e->realtime_alarm);
4516
3.67M
        if (r < 0)
4517
0
                return r;
4518
4519
3.67M
        r = event_arm_timer(e, &e->boottime_alarm);
4520
3.67M
        if (r < 0)
4521
0
                return r;
4522
4523
3.67M
        event_close_inode_data_fds(e);
4524
4525
3.67M
        if (event_next_pending(e) || e->need_process_child || e->buffered_inotify_data_list)
4526
3.65M
                goto pending;
4527
4528
22.3k
        e->state = SD_EVENT_ARMED;
4529
4530
22.3k
        return 0;
4531
4532
3.66M
pending:
4533
3.66M
        e->state = SD_EVENT_ARMED;
4534
3.66M
        r = sd_event_wait(e, 0);
4535
3.66M
        if (r == 0)
4536
0
                e->state = SD_EVENT_ARMED;
4537
4538
3.66M
        return r;
4539
3.67M
}
4540
4541
static int epoll_wait_usec(
4542
                int fd,
4543
                struct epoll_event *events,
4544
                int maxevents,
4545
3.67M
                usec_t timeout) {
4546
4547
3.67M
        int msec;
4548
        /* A wrapper that uses epoll_pwait2() if available, and falls back to epoll_wait() if not. */
4549
4550
3.67M
#if HAVE_EPOLL_PWAIT2
4551
3.67M
        static bool epoll_pwait2_absent = false;
4552
3.67M
        int r;
4553
4554
        /* epoll_pwait2() was added to Linux 5.11 (2021-02-14) and to glibc in 2.35 (2022-02-03). In contrast
4555
         * to other syscalls we don't bother with our own fallback syscall wrappers on old libcs, since this
4556
         * is not that obvious to implement given the libc and kernel definitions differ in the last
4557
         * argument. Moreover, the only reason to use it is the more accurate timeouts (which is not a
4558
         * biggie), let's hence rely on glibc's definitions, and fallback to epoll_pwait() when that's
4559
         * missing. */
4560
4561
3.67M
        if (!epoll_pwait2_absent && timeout != USEC_INFINITY) {
4562
2
                r = epoll_pwait2(fd,
4563
2
                                 events,
4564
2
                                 maxevents,
4565
2
                                 TIMESPEC_STORE(timeout),
4566
2
                                 NULL);
4567
2
                if (r >= 0)
4568
0
                        return r;
4569
2
                if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
4570
0
                        return -errno; /* Only fallback to old epoll_wait() if the syscall is masked or not
4571
                                        * supported. */
4572
4573
2
                epoll_pwait2_absent = true;
4574
2
        }
4575
3.67M
#endif
4576
4577
3.67M
        if (timeout == USEC_INFINITY)
4578
21.5k
                msec = -1;
4579
3.65M
        else {
4580
3.65M
                usec_t k;
4581
4582
3.65M
                k = DIV_ROUND_UP(timeout, USEC_PER_MSEC);
4583
3.65M
                if (k >= INT_MAX)
4584
0
                        msec = INT_MAX; /* Saturate */
4585
3.65M
                else
4586
3.65M
                        msec = (int) k;
4587
3.65M
        }
4588
4589
3.67M
        return RET_NERRNO(epoll_wait(fd, events, maxevents, msec));
4590
3.67M
}
4591
4592
3.67M
static int process_epoll(sd_event *e, usec_t timeout, int64_t threshold, int64_t *ret_min_priority) {
4593
3.67M
        size_t n_event_queue, m, n_event_max;
4594
3.67M
        int64_t min_priority = threshold;
4595
3.67M
        bool something_new = false;
4596
3.67M
        int r;
4597
4598
3.67M
        assert(e);
4599
3.67M
        assert(ret_min_priority);
4600
4601
3.67M
        n_event_queue = MAX(e->n_sources, 1u);
4602
3.67M
        if (!GREEDY_REALLOC(e->event_queue, n_event_queue))
4603
0
                return -ENOMEM;
4604
4605
3.67M
        n_event_max = MALLOC_ELEMENTSOF(e->event_queue);
4606
4607
        /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
4608
3.67M
        if (e->buffered_inotify_data_list)
4609
0
                timeout = 0;
4610
4611
3.67M
        for (;;) {
4612
3.67M
                r = epoll_wait_usec(
4613
3.67M
                                e->epoll_fd,
4614
3.67M
                                e->event_queue,
4615
3.67M
                                n_event_max,
4616
3.67M
                                timeout);
4617
3.67M
                if (r < 0)
4618
0
                        return r;
4619
4620
3.67M
                m = (size_t) r;
4621
3.67M
                msan_unpoison(e->event_queue, m * sizeof(struct epoll_event));
4622
4623
3.67M
                if (m < n_event_max)
4624
3.67M
                        break;
4625
4626
0
                if (n_event_max >= n_event_queue * 10)
4627
0
                        break;
4628
4629
0
                if (!GREEDY_REALLOC(e->event_queue, n_event_max + n_event_queue))
4630
0
                        return -ENOMEM;
4631
4632
0
                n_event_max = MALLOC_ELEMENTSOF(e->event_queue);
4633
0
                timeout = 0;
4634
0
        }
4635
4636
        /* Set timestamp only when this is called first time. */
4637
3.67M
        if (threshold == INT64_MAX)
4638
3.67M
                triple_timestamp_now(&e->timestamp);
4639
4640
6.92M
        FOREACH_ARRAY(i, e->event_queue, m) {
4641
4642
6.92M
                if (i->data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
4643
0
                        r = flush_timer(e, e->watchdog_fd, i->events, NULL);
4644
6.92M
                else {
4645
6.92M
                        WakeupType *t = ASSERT_PTR(i->data.ptr);
4646
4647
6.92M
                        switch (*t) {
4648
4649
6.91M
                        case WAKEUP_EVENT_SOURCE: {
4650
6.91M
                                sd_event_source *s = i->data.ptr;
4651
4652
6.91M
                                if (s->priority > threshold)
4653
0
                                        continue;
4654
4655
6.91M
                                min_priority = MIN(min_priority, s->priority);
4656
4657
6.91M
                                switch (s->type) {
4658
4659
6.91M
                                case SOURCE_IO:
4660
6.91M
                                        r = process_io(e, s, i->events);
4661
6.91M
                                        break;
4662
4663
0
                                case SOURCE_CHILD:
4664
0
                                        r = process_pidfd(e, s, i->events);
4665
0
                                        break;
4666
4667
0
                                case SOURCE_MEMORY_PRESSURE:
4668
0
                                        r = process_memory_pressure(s, i->events);
4669
0
                                        break;
4670
4671
0
                                default:
4672
0
                                        assert_not_reached();
4673
6.91M
                                }
4674
4675
6.91M
                                break;
4676
6.91M
                        }
4677
4678
6.91M
                        case WAKEUP_CLOCK_DATA: {
4679
4.17k
                                struct clock_data *d = i->data.ptr;
4680
4681
4.17k
                                r = flush_timer(e, d->fd, i->events, &d->next);
4682
4.17k
                                break;
4683
6.91M
                        }
4684
4685
0
                        case WAKEUP_SIGNAL_DATA:
4686
0
                                r = process_signal(e, i->data.ptr, i->events, &min_priority);
4687
0
                                break;
4688
4689
0
                        case WAKEUP_INOTIFY_DATA:
4690
0
                                r = event_inotify_data_read(e, i->data.ptr, i->events, threshold);
4691
0
                                break;
4692
4693
0
                        default:
4694
0
                                assert_not_reached();
4695
6.92M
                        }
4696
6.92M
                }
4697
6.92M
                if (r < 0)
4698
0
                        return r;
4699
6.92M
                if (r > 0)
4700
2.36M
                        something_new = true;
4701
6.92M
        }
4702
4703
3.67M
        *ret_min_priority = min_priority;
4704
3.67M
        return something_new;
4705
3.67M
}
4706
4707
3.68M
_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
4708
3.68M
        int r;
4709
4710
3.68M
        assert_return(e, -EINVAL);
4711
3.68M
        assert_return(e = event_resolve(e), -ENOPKG);
4712
3.68M
        assert_return(!event_origin_changed(e), -ECHILD);
4713
3.68M
        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4714
3.68M
        assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
4715
4716
3.68M
        if (e->exit_requested) {
4717
7.17k
                e->state = SD_EVENT_PENDING;
4718
7.17k
                return 1;
4719
7.17k
        }
4720
4721
3.67M
        for (int64_t threshold = INT64_MAX; ; threshold--) {
4722
3.67M
                int64_t epoll_min_priority, child_min_priority;
4723
4724
                /* There may be a possibility that new epoll (especially IO) and child events are
4725
                 * triggered just after process_epoll() call but before process_child(), and the new IO
4726
                 * events may have higher priority than the child events. To salvage these events,
4727
                 * let's call epoll_wait() again, but accepts only events with higher priority than the
4728
                 * previous. See issue https://github.com/systemd/systemd/issues/18190 and comments
4729
                 * https://github.com/systemd/systemd/pull/18750#issuecomment-785801085
4730
                 * https://github.com/systemd/systemd/pull/18922#issuecomment-792825226 */
4731
4732
3.67M
                r = process_epoll(e, timeout, threshold, &epoll_min_priority);
4733
3.67M
                if (r == -EINTR) {
4734
0
                        e->state = SD_EVENT_PENDING;
4735
0
                        return 1;
4736
0
                }
4737
3.67M
                if (r < 0)
4738
0
                        goto finish;
4739
3.67M
                if (r == 0 && threshold < INT64_MAX)
4740
                        /* No new epoll event. */
4741
0
                        break;
4742
4743
3.67M
                r = process_child(e, threshold, &child_min_priority);
4744
3.67M
                if (r < 0)
4745
0
                        goto finish;
4746
3.67M
                if (r == 0)
4747
                        /* No new child event. */
4748
3.67M
                        break;
4749
4750
0
                threshold = MIN(epoll_min_priority, child_min_priority);
4751
0
                if (threshold == INT64_MIN)
4752
0
                        break;
4753
4754
0
                timeout = 0;
4755
0
        }
4756
4757
3.67M
        r = process_watchdog(e);
4758
3.67M
        if (r < 0)
4759
0
                goto finish;
4760
4761
3.67M
        r = process_inotify(e);
4762
3.67M
        if (r < 0)
4763
0
                goto finish;
4764
4765
3.67M
        r = process_timer(e, e->timestamp.realtime, &e->realtime);
4766
3.67M
        if (r < 0)
4767
0
                goto finish;
4768
4769
3.67M
        r = process_timer(e, e->timestamp.boottime, &e->boottime);
4770
3.67M
        if (r < 0)
4771
0
                goto finish;
4772
4773
3.67M
        r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
4774
3.67M
        if (r < 0)
4775
0
                goto finish;
4776
4777
3.67M
        r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
4778
3.67M
        if (r < 0)
4779
0
                goto finish;
4780
4781
3.67M
        r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
4782
3.67M
        if (r < 0)
4783
0
                goto finish;
4784
3.67M
        else if (r == 1) {
4785
                /* Ratelimit expiry callback was called. Let's postpone processing pending sources and
4786
                 * put loop in the initial state in order to evaluate (in the next iteration) also sources
4787
                 * there were potentially re-enabled by the callback.
4788
                 *
4789
                 * Wondering why we treat only this invocation of process_timer() differently? Once event
4790
                 * source is ratelimited we essentially transform it into CLOCK_MONOTONIC timer hence
4791
                 * ratelimit expiry callback is never called for any other timer type. */
4792
0
                r = 0;
4793
0
                goto finish;
4794
0
        }
4795
4796
3.67M
        if (event_next_pending(e)) {
4797
3.67M
                e->state = SD_EVENT_PENDING;
4798
3.67M
                return 1;
4799
3.67M
        }
4800
4801
0
        r = 0;
4802
4803
0
finish:
4804
0
        e->state = SD_EVENT_INITIAL;
4805
4806
0
        return r;
4807
0
}
4808
4809
3.68M
_public_ int sd_event_dispatch(sd_event *e) {
4810
3.68M
        sd_event_source *p;
4811
3.68M
        int r;
4812
4813
3.68M
        assert_return(e, -EINVAL);
4814
3.68M
        assert_return(e = event_resolve(e), -ENOPKG);
4815
3.68M
        assert_return(!event_origin_changed(e), -ECHILD);
4816
3.68M
        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4817
3.68M
        assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
4818
4819
3.68M
        if (e->exit_requested)
4820
7.17k
                return dispatch_exit(e);
4821
4822
3.67M
        p = event_next_pending(e);
4823
3.67M
        if (p) {
4824
3.67M
                PROTECT_EVENT(e);
4825
4826
3.67M
                e->state = SD_EVENT_RUNNING;
4827
3.67M
                r = source_dispatch(p);
4828
3.67M
                e->state = SD_EVENT_INITIAL;
4829
3.67M
                return r;
4830
3.67M
        }
4831
4832
0
        e->state = SD_EVENT_INITIAL;
4833
4834
0
        return 1;
4835
3.67M
}
4836
4837
0
static void event_log_delays(sd_event *e) {
4838
0
        char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
4839
0
        size_t l;
4840
4841
0
        p = b;
4842
0
        l = sizeof(b);
4843
0
        FOREACH_ELEMENT(delay, e->delays) {
4844
0
                l = strpcpyf(&p, l, "%u ", *delay);
4845
0
                *delay = 0;
4846
0
        }
4847
0
        log_debug("Event loop iterations: %s", b);
4848
0
}
4849
4850
3.68M
_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
4851
3.68M
        int r;
4852
4853
3.68M
        assert_return(e, -EINVAL);
4854
3.68M
        assert_return(e = event_resolve(e), -ENOPKG);
4855
3.68M
        assert_return(!event_origin_changed(e), -ECHILD);
4856
3.68M
        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4857
3.68M
        assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
4858
4859
3.68M
        if (e->profile_delays && e->last_run_usec != 0) {
4860
0
                usec_t this_run;
4861
0
                unsigned l;
4862
4863
0
                this_run = now(CLOCK_MONOTONIC);
4864
4865
0
                l = log2u64(this_run - e->last_run_usec);
4866
0
                assert(l < ELEMENTSOF(e->delays));
4867
0
                e->delays[l]++;
4868
4869
0
                if (this_run - e->last_log_usec >= 5*USEC_PER_SEC) {
4870
0
                        event_log_delays(e);
4871
0
                        e->last_log_usec = this_run;
4872
0
                }
4873
0
        }
4874
4875
        /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
4876
3.68M
        PROTECT_EVENT(e);
4877
4878
3.68M
        r = sd_event_prepare(e);
4879
3.68M
        if (r == 0)
4880
                /* There was nothing? Then wait... */
4881
22.3k
                r = sd_event_wait(e, timeout);
4882
4883
3.68M
        if (e->profile_delays)
4884
0
                e->last_run_usec = now(CLOCK_MONOTONIC);
4885
4886
3.68M
        if (r > 0) {
4887
                /* There's something now, then let's dispatch it */
4888
3.68M
                r = sd_event_dispatch(e);
4889
3.68M
                if (r < 0)
4890
0
                        return r;
4891
4892
3.68M
                return 1;
4893
3.68M
        }
4894
4895
0
        return r;
4896
3.68M
}
4897
4898
5.25k
_public_ int sd_event_loop(sd_event *e) {
4899
5.25k
        int r;
4900
4901
5.25k
        assert_return(e, -EINVAL);
4902
5.25k
        assert_return(e = event_resolve(e), -ENOPKG);
4903
5.25k
        assert_return(!event_origin_changed(e), -ECHILD);
4904
5.25k
        assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
4905
4906
5.25k
        PROTECT_EVENT(e);
4907
4908
3.66M
        while (e->state != SD_EVENT_FINISHED) {
4909
3.66M
                r = sd_event_run(e, UINT64_MAX);
4910
3.66M
                if (r < 0)
4911
0
                        return r;
4912
3.66M
        }
4913
4914
5.25k
        return e->exit_code;
4915
5.25k
}
4916
4917
0
_public_ int sd_event_get_fd(sd_event *e) {
4918
0
        assert_return(e, -EINVAL);
4919
0
        assert_return(e = event_resolve(e), -ENOPKG);
4920
0
        assert_return(!event_origin_changed(e), -ECHILD);
4921
4922
0
        return e->epoll_fd;
4923
0
}
4924
4925
2.07k
_public_ int sd_event_get_state(sd_event *e) {
4926
2.07k
        assert_return(e, -EINVAL);
4927
2.07k
        assert_return(e = event_resolve(e), -ENOPKG);
4928
2.07k
        assert_return(!event_origin_changed(e), -ECHILD);
4929
4930
2.07k
        return e->state;
4931
2.07k
}
4932
4933
0
_public_ int sd_event_get_exit_code(sd_event *e, int *ret) {
4934
0
        assert_return(e, -EINVAL);
4935
0
        assert_return(e = event_resolve(e), -ENOPKG);
4936
0
        assert_return(!event_origin_changed(e), -ECHILD);
4937
4938
0
        if (!e->exit_requested)
4939
0
                return -ENODATA;
4940
4941
0
        if (ret)
4942
0
                *ret = e->exit_code;
4943
0
        return 0;
4944
0
}
4945
4946
5.25k
_public_ int sd_event_exit(sd_event *e, int code) {
4947
5.25k
        assert_return(e, -EINVAL);
4948
5.25k
        assert_return(e = event_resolve(e), -ENOPKG);
4949
5.25k
        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4950
5.25k
        assert_return(!event_origin_changed(e), -ECHILD);
4951
4952
5.25k
        e->exit_requested = true;
4953
5.25k
        e->exit_code = code;
4954
4955
5.25k
        return 0;
4956
5.25k
}
4957
4958
42.3k
_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *ret) {
4959
42.3k
        assert_return(e, -EINVAL);
4960
42.3k
        assert_return(e = event_resolve(e), -ENOPKG);
4961
42.3k
        assert_return(ret, -EINVAL);
4962
42.3k
        assert_return(!event_origin_changed(e), -ECHILD);
4963
4964
42.3k
        if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
4965
0
                return -EOPNOTSUPP;
4966
4967
42.3k
        if (!triple_timestamp_is_set(&e->timestamp)) {
4968
                /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
4969
22.7k
                *ret = now(clock);
4970
22.7k
                return 1;
4971
22.7k
        }
4972
4973
19.5k
        *ret = triple_timestamp_by_clock(&e->timestamp, clock);
4974
19.5k
        return 0;
4975
42.3k
}
4976
4977
60.1k
_public_ int sd_event_default(sd_event **ret) {
4978
60.1k
        sd_event *e = NULL;
4979
60.1k
        int r;
4980
4981
60.1k
        if (!ret)
4982
0
                return !!default_event;
4983
4984
60.1k
        if (default_event) {
4985
0
                *ret = sd_event_ref(default_event);
4986
0
                return 0;
4987
0
        }
4988
4989
60.1k
        r = sd_event_new(&e);
4990
60.1k
        if (r < 0)
4991
0
                return r;
4992
4993
60.1k
        e->default_event_ptr = &default_event;
4994
60.1k
        e->tid = gettid();
4995
60.1k
        default_event = e;
4996
4997
60.1k
        *ret = e;
4998
60.1k
        return 1;
4999
60.1k
}
5000
5001
0
_public_ int sd_event_get_tid(sd_event *e, pid_t *ret) {
5002
0
        assert_return(e, -EINVAL);
5003
0
        assert_return(e = event_resolve(e), -ENOPKG);
5004
0
        assert_return(ret, -EINVAL);
5005
0
        assert_return(!event_origin_changed(e), -ECHILD);
5006
5007
0
        if (e->tid == 0)
5008
0
                return -ENXIO;
5009
5010
0
        *ret = e->tid;
5011
0
        return 0;
5012
0
}
5013
5014
0
_public_ int sd_event_set_watchdog(sd_event *e, int b) {
5015
0
        int r;
5016
5017
0
        assert_return(e, -EINVAL);
5018
0
        assert_return(e = event_resolve(e), -ENOPKG);
5019
0
        assert_return(!event_origin_changed(e), -ECHILD);
5020
5021
0
        if (e->watchdog == !!b)
5022
0
                return e->watchdog;
5023
5024
0
        if (b) {
5025
0
                r = sd_watchdog_enabled(false, &e->watchdog_period);
5026
0
                if (r <= 0)
5027
0
                        return r;
5028
5029
                /* Issue first ping immediately */
5030
0
                sd_notify(false, "WATCHDOG=1");
5031
0
                e->watchdog_last = now(CLOCK_MONOTONIC);
5032
5033
0
                e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
5034
0
                if (e->watchdog_fd < 0)
5035
0
                        return -errno;
5036
5037
0
                r = arm_watchdog(e);
5038
0
                if (r < 0)
5039
0
                        goto fail;
5040
5041
0
                struct epoll_event ev = {
5042
0
                        .events = EPOLLIN,
5043
0
                        .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
5044
0
                };
5045
5046
0
                if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev) < 0) {
5047
0
                        r = -errno;
5048
0
                        goto fail;
5049
0
                }
5050
5051
0
        } else {
5052
0
                if (e->watchdog_fd >= 0) {
5053
0
                        (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
5054
0
                        e->watchdog_fd = safe_close(e->watchdog_fd);
5055
0
                }
5056
0
        }
5057
5058
0
        e->watchdog = b;
5059
0
        return e->watchdog;
5060
5061
0
fail:
5062
0
        e->watchdog_fd = safe_close(e->watchdog_fd);
5063
0
        return r;
5064
0
}
5065
5066
0
_public_ int sd_event_get_watchdog(sd_event *e) {
5067
0
        assert_return(e, -EINVAL);
5068
0
        assert_return(e = event_resolve(e), -ENOPKG);
5069
0
        assert_return(!event_origin_changed(e), -ECHILD);
5070
5071
0
        return e->watchdog;
5072
0
}
5073
5074
0
_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
5075
0
        assert_return(e, -EINVAL);
5076
0
        assert_return(e = event_resolve(e), -ENOPKG);
5077
0
        assert_return(!event_origin_changed(e), -ECHILD);
5078
5079
0
        *ret = e->iteration;
5080
0
        return 0;
5081
0
}
5082
5083
0
_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
5084
0
        assert_return(s, -EINVAL);
5085
0
        assert_return(s->event, -EINVAL);
5086
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
5087
5088
0
        s->destroy_callback = callback;
5089
0
        return 0;
5090
0
}
5091
5092
0
_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
5093
0
        assert_return(s, -EINVAL);
5094
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
5095
5096
0
        if (ret)
5097
0
                *ret = s->destroy_callback;
5098
5099
0
        return !!s->destroy_callback;
5100
0
}
5101
5102
0
_public_ int sd_event_source_get_floating(sd_event_source *s) {
5103
0
        assert_return(s, -EINVAL);
5104
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
5105
5106
0
        return s->floating;
5107
0
}
5108
5109
0
_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
5110
0
        assert_return(s, -EINVAL);
5111
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
5112
5113
0
        if (s->floating == !!b)
5114
0
                return 0;
5115
5116
0
        if (!s->event) /* Already disconnected */
5117
0
                return -ESTALE;
5118
5119
0
        s->floating = b;
5120
5121
0
        if (b) {
5122
0
                sd_event_source_ref(s);
5123
0
                sd_event_unref(s->event);
5124
0
        } else {
5125
0
                sd_event_ref(s->event);
5126
0
                sd_event_source_unref(s);
5127
0
        }
5128
5129
0
        return 1;
5130
0
}
5131
5132
0
_public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) {
5133
0
        assert_return(s, -EINVAL);
5134
0
        assert_return(s->type != SOURCE_EXIT, -EDOM);
5135
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
5136
5137
0
        return s->exit_on_failure;
5138
0
}
5139
5140
0
_public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) {
5141
0
        assert_return(s, -EINVAL);
5142
0
        assert_return(s->type != SOURCE_EXIT, -EDOM);
5143
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
5144
5145
0
        if (s->exit_on_failure == !!b)
5146
0
                return 0;
5147
5148
0
        s->exit_on_failure = b;
5149
0
        return 1;
5150
0
}
5151
5152
0
_public_ int sd_event_source_set_ratelimit(sd_event_source *s, uint64_t interval, unsigned burst) {
5153
0
        int r;
5154
5155
0
        assert_return(s, -EINVAL);
5156
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
5157
5158
        /* Turning on ratelimiting on event source types that don't support it, is a loggable offense. Doing
5159
         * so is a programming error. */
5160
0
        assert_return(EVENT_SOURCE_CAN_RATE_LIMIT(s->type), -EDOM);
5161
5162
        /* When ratelimiting is configured we'll always reset the rate limit state first and start fresh,
5163
         * non-ratelimited. */
5164
0
        r = event_source_leave_ratelimit(s, /* run_callback= */ false);
5165
0
        if (r < 0)
5166
0
                return r;
5167
5168
0
        s->rate_limit = (RateLimit) { interval, burst };
5169
0
        return 0;
5170
0
}
5171
5172
0
_public_ int sd_event_source_set_ratelimit_expire_callback(sd_event_source *s, sd_event_handler_t callback) {
5173
0
        assert_return(s, -EINVAL);
5174
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
5175
5176
0
        s->ratelimit_expire_callback = callback;
5177
0
        return 0;
5178
0
}
5179
5180
0
_public_ int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_interval, unsigned *ret_burst) {
5181
0
        assert_return(s, -EINVAL);
5182
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
5183
5184
        /* Querying whether an event source has ratelimiting configured is not a loggable offense, hence
5185
         * don't use assert_return(). Unlike turning on ratelimiting it's not really a programming error. */
5186
0
        if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
5187
0
                return -EDOM;
5188
5189
0
        if (!ratelimit_configured(&s->rate_limit))
5190
0
                return -ENOEXEC;
5191
5192
0
        if (ret_interval)
5193
0
                *ret_interval = s->rate_limit.interval;
5194
0
        if (ret_burst)
5195
0
                *ret_burst = s->rate_limit.burst;
5196
5197
0
        return 0;
5198
0
}
5199
5200
0
_public_ int sd_event_source_is_ratelimited(sd_event_source *s) {
5201
0
        assert_return(s, -EINVAL);
5202
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
5203
5204
0
        if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
5205
0
                return false;
5206
5207
0
        if (!ratelimit_configured(&s->rate_limit))
5208
0
                return false;
5209
5210
0
        return s->ratelimited;
5211
0
}
5212
5213
0
_public_ int sd_event_source_leave_ratelimit(sd_event_source *s) {
5214
0
        int r;
5215
5216
0
        assert_return(s, -EINVAL);
5217
5218
0
        if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
5219
0
                return 0;
5220
5221
0
        if (!ratelimit_configured(&s->rate_limit))
5222
0
                return 0;
5223
5224
0
        if (!s->ratelimited)
5225
0
                return 0;
5226
5227
0
        r = event_source_leave_ratelimit(s, /* run_callback= */ false);
5228
0
        if (r < 0)
5229
0
                return r;
5230
5231
0
        return 1; /* tell caller that we indeed just left the ratelimit state */
5232
0
}
5233
5234
0
_public_ int sd_event_set_signal_exit(sd_event *e, int b) {
5235
0
        bool change = false;
5236
0
        int r;
5237
5238
0
        assert_return(e, -EINVAL);
5239
0
        assert_return(e = event_resolve(e), -ENOPKG);
5240
0
        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
5241
0
        assert_return(!event_origin_changed(e), -ECHILD);
5242
5243
0
        if (b) {
5244
                /* We want to maintain pointers to these event sources, so that we can destroy them when told
5245
                 * so. But we also don't want them to pin the event loop itself. Hence we mark them as
5246
                 * floating after creation (and undo this before deleting them again). */
5247
5248
0
                if (!e->sigint_event_source) {
5249
0
                        r = sd_event_add_signal(e, &e->sigint_event_source, SIGINT | SD_EVENT_SIGNAL_PROCMASK, NULL, NULL);
5250
0
                        if (r < 0)
5251
0
                                return r;
5252
5253
0
                        assert_se(sd_event_source_set_floating(e->sigint_event_source, true) >= 0);
5254
0
                        change = true;
5255
0
                }
5256
5257
0
                if (!e->sigterm_event_source) {
5258
0
                        r = sd_event_add_signal(e, &e->sigterm_event_source, SIGTERM | SD_EVENT_SIGNAL_PROCMASK, NULL, NULL);
5259
0
                        if (r < 0) {
5260
0
                                if (change) {
5261
0
                                        assert_se(sd_event_source_set_floating(e->sigint_event_source, false) >= 0);
5262
0
                                        e->sigint_event_source = sd_event_source_unref(e->sigint_event_source);
5263
0
                                }
5264
5265
0
                                return r;
5266
0
                        }
5267
5268
0
                        assert_se(sd_event_source_set_floating(e->sigterm_event_source, true) >= 0);
5269
0
                        change = true;
5270
0
                }
5271
5272
0
        } else {
5273
0
                if (e->sigint_event_source) {
5274
0
                        assert_se(sd_event_source_set_floating(e->sigint_event_source, false) >= 0);
5275
0
                        e->sigint_event_source = sd_event_source_unref(e->sigint_event_source);
5276
0
                        change = true;
5277
0
                }
5278
5279
0
                if (e->sigterm_event_source) {
5280
0
                        assert_se(sd_event_source_set_floating(e->sigterm_event_source, false) >= 0);
5281
0
                        e->sigterm_event_source = sd_event_source_unref(e->sigterm_event_source);
5282
0
                        change = true;
5283
0
                }
5284
0
        }
5285
5286
0
        return change;
5287
0
}
5288
5289
0
_public_ int sd_event_set_exit_on_idle(sd_event *e, int b) {
5290
0
        assert_return(e, -EINVAL);
5291
0
        assert_return(e = event_resolve(e), -ENOPKG);
5292
0
        assert_return(!event_origin_changed(e), -ECHILD);
5293
5294
0
        return e->exit_on_idle = b;
5295
0
}
5296
5297
0
_public_ int sd_event_get_exit_on_idle(sd_event *e) {
5298
0
        assert_return(e, -EINVAL);
5299
0
        assert_return(e = event_resolve(e), -ENOPKG);
5300
0
        assert_return(!event_origin_changed(e), -ECHILD);
5301
5302
0
        return e->exit_on_idle;
5303
0
}
5304
5305
0
_public_ int sd_event_source_set_memory_pressure_type(sd_event_source *s, const char *ty) {
5306
0
        _cleanup_free_ char *b = NULL;
5307
0
        _cleanup_free_ void *w = NULL;
5308
5309
0
        assert_return(s, -EINVAL);
5310
0
        assert_return(s->type == SOURCE_MEMORY_PRESSURE, -EDOM);
5311
0
        assert_return(ty, -EINVAL);
5312
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
5313
5314
0
        if (!STR_IN_SET(ty, "some", "full"))
5315
0
                return -EINVAL;
5316
5317
0
        if (s->memory_pressure.locked) /* Refuse adjusting parameters, if caller told us how to watch for events */
5318
0
                return -EBUSY;
5319
5320
0
        char* space = memchr(s->memory_pressure.write_buffer, ' ', s->memory_pressure.write_buffer_size);
5321
0
        if (!space)
5322
0
                return -EINVAL;
5323
5324
0
        size_t l = space - (char*) s->memory_pressure.write_buffer;
5325
0
        b = memdup_suffix0(s->memory_pressure.write_buffer, l);
5326
0
        if (!b)
5327
0
                return -ENOMEM;
5328
0
        if (!STR_IN_SET(b, "some", "full"))
5329
0
                return -EINVAL;
5330
5331
0
        if (streq(b, ty))
5332
0
                return 0;
5333
5334
0
        size_t nl = strlen(ty) + (s->memory_pressure.write_buffer_size - l);
5335
0
        w = new(char, nl);
5336
0
        if (!w)
5337
0
                return -ENOMEM;
5338
5339
0
        memcpy(stpcpy(w, ty), space, (s->memory_pressure.write_buffer_size - l));
5340
5341
0
        free_and_replace(s->memory_pressure.write_buffer, w);
5342
0
        s->memory_pressure.write_buffer_size = nl;
5343
0
        s->memory_pressure.locked = false;
5344
5345
0
        return 1;
5346
0
}
5347
5348
0
_public_ int sd_event_source_set_memory_pressure_period(sd_event_source *s, uint64_t threshold_usec, uint64_t window_usec) {
5349
0
        _cleanup_free_ char *b = NULL;
5350
0
        _cleanup_free_ void *w = NULL;
5351
5352
0
        assert_return(s, -EINVAL);
5353
0
        assert_return(s->type == SOURCE_MEMORY_PRESSURE, -EDOM);
5354
0
        assert_return(!event_origin_changed(s->event), -ECHILD);
5355
5356
0
        if (threshold_usec <= 0 || threshold_usec >= UINT64_MAX)
5357
0
                return -ERANGE;
5358
0
        if (window_usec <= 0 || window_usec >= UINT64_MAX)
5359
0
                return -ERANGE;
5360
0
        if (threshold_usec > window_usec)
5361
0
                return -EINVAL;
5362
5363
0
        if (s->memory_pressure.locked) /* Refuse adjusting parameters, if caller told us how to watch for events */
5364
0
                return -EBUSY;
5365
5366
0
        char* space = memchr(s->memory_pressure.write_buffer, ' ', s->memory_pressure.write_buffer_size);
5367
0
        if (!space)
5368
0
                return -EINVAL;
5369
5370
0
        size_t l = space - (char*) s->memory_pressure.write_buffer;
5371
0
        b = memdup_suffix0(s->memory_pressure.write_buffer, l);
5372
0
        if (!b)
5373
0
                return -ENOMEM;
5374
0
        if (!STR_IN_SET(b, "some", "full"))
5375
0
                return -EINVAL;
5376
5377
0
        if (asprintf((char**) &w,
5378
0
                     "%s " USEC_FMT " " USEC_FMT "",
5379
0
                     b,
5380
0
                     threshold_usec,
5381
0
                     window_usec) < 0)
5382
0
                return -EINVAL;
5383
5384
0
        l = strlen(w) + 1;
5385
0
        if (memcmp_nn(s->memory_pressure.write_buffer, s->memory_pressure.write_buffer_size, w, l) == 0)
5386
0
                return 0;
5387
5388
0
        free_and_replace(s->memory_pressure.write_buffer, w);
5389
0
        s->memory_pressure.write_buffer_size = l;
5390
0
        s->memory_pressure.locked = false;
5391
5392
0
        return 1;
5393
0
}