Coverage Report

Created: 2025-06-13 06:27

/src/systemd/src/journal/journald-kmsg.c
Line
Count
Source (jump to first uncovered line)
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3
#include <fcntl.h>
4
#include <sys/mman.h>
5
#include <sys/socket.h>
6
#include <unistd.h>
7
8
#include "sd-device.h"
9
#include "sd-event.h"
10
#include "sd-messages.h"
11
12
#include "alloc-util.h"
13
#include "device-util.h"
14
#include "errno-util.h"
15
#include "escape.h"
16
#include "fd-util.h"
17
#include "format-util.h"
18
#include "iovec-util.h"
19
#include "journal-internal.h"
20
#include "journald-kmsg.h"
21
#include "journald-manager.h"
22
#include "journald-sync.h"
23
#include "journald-syslog.h"
24
#include "log.h"
25
#include "log-ratelimit.h"
26
#include "parse-util.h"
27
#include "process-util.h"
28
#include "stdio-util.h"
29
#include "string-util.h"
30
31
void manager_forward_kmsg(
32
                Manager *m,
33
                int priority,
34
                const char *identifier,
35
                const char *message,
36
1.11M
                const struct ucred *ucred) {
37
38
1.11M
        _cleanup_free_ char *ident_buf = NULL;
39
1.11M
        struct iovec iovec[5];
40
1.11M
        char header_priority[DECIMAL_STR_MAX(priority) + 3],
41
1.11M
             header_pid[STRLEN("[]: ") + DECIMAL_STR_MAX(pid_t) + 1];
42
1.11M
        size_t n = 0;
43
44
1.11M
        assert(m);
45
1.11M
        assert(priority >= 0);
46
1.11M
        assert(priority <= 999);
47
1.11M
        assert(message);
48
49
1.11M
        if (_unlikely_(LOG_PRI(priority) > m->max_level_kmsg))
50
28.5k
                return;
51
52
1.08M
        if (_unlikely_(m->dev_kmsg_fd < 0))
53
1.08M
                return;
54
55
        /* Never allow messages with kernel facility to be written to
56
         * kmsg, regardless where the data comes from. */
57
0
        priority = syslog_fixup_facility(priority);
58
59
        /* First: priority field */
60
0
        xsprintf(header_priority, "<%i>", priority);
61
0
        iovec[n++] = IOVEC_MAKE_STRING(header_priority);
62
63
        /* Second: identifier and PID */
64
0
        if (ucred) {
65
0
                if (!identifier) {
66
0
                        (void) pid_get_comm(ucred->pid, &ident_buf);
67
0
                        identifier = ident_buf;
68
0
                }
69
70
0
                if (identifier)
71
0
                        iovec[n++] = IOVEC_MAKE_STRING(identifier);
72
73
0
                xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
74
0
                iovec[n++] = IOVEC_MAKE_STRING(header_pid);
75
0
        } else if (identifier) {
76
0
                iovec[n++] = IOVEC_MAKE_STRING(identifier);
77
0
                iovec[n++] = IOVEC_MAKE_STRING(": ");
78
0
        }
79
80
        /* Fourth: message */
81
0
        iovec[n++] = IOVEC_MAKE_STRING(message);
82
0
        iovec[n++] = IOVEC_MAKE_STRING("\n");
83
84
0
        if (writev(m->dev_kmsg_fd, iovec, n) < 0)
85
0
                log_debug_errno(errno, "Failed to write to /dev/kmsg for logging, ignoring: %m");
86
0
}
87
88
869
static bool is_us(const char *identifier, const char *pid) {
89
869
        pid_t pid_num;
90
91
869
        if (!identifier || !pid)
92
662
                return false;
93
94
207
        if (parse_pid(pid, &pid_num) < 0)
95
117
                return false;
96
97
90
        return pid_num == getpid_cached() &&
98
90
               streq(identifier, program_invocation_short_name);
99
207
}
100
101
2.63k
void dev_kmsg_record(Manager *m, char *p, size_t l) {
102
103
2.63k
        _cleanup_free_ char *message = NULL, *syslog_pid = NULL, *syslog_identifier = NULL, *identifier = NULL, *pid = NULL;
104
2.63k
        struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS + 2 + N_IOVEC_UDEV_FIELDS];
105
2.63k
        char *kernel_device = NULL;
106
2.63k
        unsigned long long usec;
107
2.63k
        size_t n = 0, z = 0, j;
108
2.63k
        int priority, r;
109
2.63k
        char *e, *k;
110
2.63k
        uint64_t serial;
111
2.63k
        size_t pl;
112
2.63k
        int saved_log_max_level = INT_MAX;
113
2.63k
        ClientContext *c = NULL;
114
115
2.63k
        assert(m);
116
2.63k
        assert(p);
117
118
2.63k
        if (l <= 0)
119
0
                return;
120
121
        /* syslog prefix including priority and facility */
122
2.63k
        e = memchr(p, ',', l);
123
2.63k
        if (!e)
124
12
                return;
125
2.62k
        *e = 0;
126
127
2.62k
        r = safe_atoi(p, &priority);
128
2.62k
        if (r < 0 || priority < 0 || priority > 999)
129
175
                return;
130
131
2.45k
        if (m->forward_to_kmsg && LOG_FAC(priority) != LOG_KERN)
132
0
                return;
133
134
        /* seqnum */
135
2.45k
        l -= (e - p) + 1;
136
2.45k
        p = e + 1;
137
2.45k
        e = memchr(p, ',', l);
138
2.45k
        if (!e)
139
24
                return;
140
2.42k
        *e = 0;
141
142
2.42k
        r = safe_atou64(p, &serial);
143
2.42k
        if (r < 0)
144
81
                return;
145
146
2.34k
        if (m->kernel_seqnum) {
147
                /* We already read this one? */
148
0
                if (serial < *m->kernel_seqnum)
149
0
                        return;
150
151
                /* Did we lose any? */
152
0
                if (serial > *m->kernel_seqnum)
153
0
                        manager_driver_message(m, 0,
154
0
                                               LOG_MESSAGE_ID(SD_MESSAGE_JOURNAL_MISSED_STR),
155
0
                                               LOG_MESSAGE("Missed %"PRIu64" kernel messages",
156
0
                                                           serial - *m->kernel_seqnum));
157
158
                /* Make sure we never read this one again. Note that
159
                 * we always store the next message serial we expect
160
                 * here, simply because this makes handling the first
161
                 * message with serial 0 easy. */
162
0
                *m->kernel_seqnum = serial + 1;
163
0
        }
164
165
        /* CLOCK_BOOTTIME timestamp */
166
2.34k
        l -= (e - p) + 1;
167
2.34k
        p = e + 1;
168
2.34k
        e = memchr(p, ',', l);
169
2.34k
        if (!e)
170
45
                return;
171
2.30k
        *e = 0;
172
173
2.30k
        r = safe_atollu(p, &usec);
174
2.30k
        if (r < 0)
175
6
                return;
176
177
        /* ignore flags and any other fields, and find the beginning of the message */
178
2.29k
        l -= (e - p) + 1;
179
2.29k
        p = e + 1;
180
2.29k
        e = memchr(p, ';', l);
181
2.29k
        if (!e)
182
2
                return;
183
184
        /* find the end of the message */
185
2.29k
        l -= (e - p) + 1;
186
2.29k
        p = e + 1;
187
2.29k
        e = memchr(p, '\n', l);
188
2.29k
        if (!e)
189
1
                return;
190
2.29k
        *e = 0;
191
192
2.29k
        pl = e - p;
193
2.29k
        l -= (e - p) + 1;
194
2.29k
        k = e + 1;
195
196
4.62k
        for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
197
2.40k
                char *mm;
198
                /* Metadata fields attached */
199
200
2.40k
                if (*k != ' ')
201
64
                        break;
202
203
2.34k
                k++, l--;
204
205
2.34k
                e = memchr(k, '\n', l);
206
2.34k
                if (!e)
207
9
                        goto finish;
208
209
2.33k
                *e = 0;
210
211
2.33k
                if (cunescape_length_with_prefix(k, e - k, "_KERNEL_", UNESCAPE_RELAX, &mm) < 0)
212
0
                        break;
213
214
2.33k
                if (startswith(mm, "_KERNEL_DEVICE="))
215
1.20k
                        kernel_device = mm + 15;
216
217
2.33k
                iovec[n++] = IOVEC_MAKE_STRING(mm);
218
2.33k
                z++;
219
220
2.33k
                l -= (e - k) + 1;
221
2.33k
                k = e + 1;
222
2.33k
        }
223
224
2.28k
        if (kernel_device) {
225
1.10k
                _cleanup_(sd_device_unrefp) sd_device *d = NULL;
226
227
1.10k
                if (sd_device_new_from_device_id(&d, kernel_device) >= 0) {
228
67
                        const char *g;
229
67
                        char *b;
230
231
67
                        if (sd_device_get_devname(d, &g) >= 0) {
232
38
                                b = strjoin("_UDEV_DEVNODE=", g);
233
38
                                if (b) {
234
38
                                        iovec[n++] = IOVEC_MAKE_STRING(b);
235
38
                                        z++;
236
38
                                }
237
38
                        }
238
239
67
                        if (sd_device_get_sysname(d, &g) >= 0) {
240
67
                                b = strjoin("_UDEV_SYSNAME=", g);
241
67
                                if (b) {
242
67
                                        iovec[n++] = IOVEC_MAKE_STRING(b);
243
67
                                        z++;
244
67
                                }
245
67
                        }
246
247
67
                        j = 0;
248
67
                        FOREACH_DEVICE_DEVLINK(d, link) {
249
250
0
                                if (j >= N_IOVEC_UDEV_FIELDS)
251
0
                                        break;
252
253
0
                                b = strjoin("_UDEV_DEVLINK=", link);
254
0
                                if (b) {
255
0
                                        iovec[n++] = IOVEC_MAKE_STRING(b);
256
0
                                        z++;
257
0
                                }
258
259
0
                                j++;
260
0
                        }
261
67
                }
262
1.10k
        }
263
264
2.28k
        char source_boot_time[STRLEN("_SOURCE_BOOTTIME_TIMESTAMP=") + DECIMAL_STR_MAX(unsigned long long)];
265
2.28k
        xsprintf(source_boot_time, "_SOURCE_BOOTTIME_TIMESTAMP=%llu", usec);
266
2.28k
        iovec[n++] = IOVEC_MAKE_STRING(source_boot_time);
267
268
        /* Historically, we stored the timestamp 'usec' as _SOURCE_MONOTONIC_TIMESTAMP, so we cannot remove
269
         * the field as it is already used in other projects. This is for backward compatibility. */
270
2.28k
        char source_monotonic_time[STRLEN("_SOURCE_MONOTONIC_TIMESTAMP=") + DECIMAL_STR_MAX(unsigned long long)];
271
2.28k
        xsprintf(source_monotonic_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu", usec);
272
2.28k
        iovec[n++] = IOVEC_MAKE_STRING(source_monotonic_time);
273
274
2.28k
        iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=kernel");
275
276
2.28k
        char syslog_priority[STRLEN("PRIORITY=") + DECIMAL_STR_MAX(int)];
277
2.28k
        xsprintf(syslog_priority, "PRIORITY=%i", LOG_PRI(priority));
278
2.28k
        iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
279
280
2.28k
        char syslog_facility[STRLEN("SYSLOG_FACILITY=") + DECIMAL_STR_MAX(int)];
281
2.28k
        xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
282
2.28k
        iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
283
284
2.28k
        if (LOG_FAC(priority) == LOG_KERN)
285
1.41k
                iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=kernel");
286
869
        else {
287
869
                pl -= syslog_parse_identifier((const char**) &p, &identifier, &pid);
288
289
                /* Avoid logging any new messages when we're processing messages generated by ourselves via
290
                 * log_info() and friends to avoid infinite loops. */
291
869
                if (is_us(identifier, pid)) {
292
0
                        if (!ratelimit_below(&m->kmsg_own_ratelimit))
293
0
                                return;
294
295
0
                        saved_log_max_level = log_get_max_level();
296
0
                        c = m->my_context;
297
0
                        log_set_max_level(LOG_NULL);
298
0
                }
299
300
869
                if (identifier) {
301
286
                        syslog_identifier = strjoin("SYSLOG_IDENTIFIER=", identifier);
302
286
                        if (syslog_identifier)
303
286
                                iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
304
286
                }
305
306
869
                if (pid) {
307
207
                        syslog_pid = strjoin("SYSLOG_PID=", pid);
308
207
                        if (syslog_pid)
309
207
                                iovec[n++] = IOVEC_MAKE_STRING(syslog_pid);
310
207
                }
311
869
        }
312
313
2.28k
        if (cunescape_length_with_prefix(p, pl, "MESSAGE=", UNESCAPE_RELAX, &message) >= 0)
314
2.28k
                iovec[n++] = IOVEC_MAKE_STRING(message);
315
316
2.28k
        manager_dispatch_message(m, iovec, n, ELEMENTSOF(iovec), c, NULL, priority, 0);
317
318
2.28k
        if (saved_log_max_level != INT_MAX)
319
0
                log_set_max_level(saved_log_max_level);
320
321
2.28k
        m->dev_kmsg_timestamp = usec;
322
2.28k
        sync_req_revalidate_by_timestamp(m);
323
324
2.29k
finish:
325
4.73k
        for (j = 0; j < z; j++)
326
2.43k
                free(iovec[j].iov_base);
327
2.29k
}
328
329
0
static int manager_read_dev_kmsg(Manager *m) {
330
0
        char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */
331
0
        ssize_t l;
332
333
0
        assert(m);
334
0
        assert(m->dev_kmsg_fd >= 0);
335
0
        assert(m->read_kmsg);
336
337
0
        l = read(m->dev_kmsg_fd, buffer, sizeof(buffer) - 1);
338
0
        if (l == 0)
339
0
                return 0;
340
0
        if (l < 0) {
341
0
                if (ERRNO_IS_TRANSIENT(errno) || errno == EPIPE)
342
0
                        return 0;
343
344
0
                return log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT, "Failed to read from /dev/kmsg: %m");
345
0
        }
346
347
0
        dev_kmsg_record(m, buffer, l);
348
0
        return 1;
349
0
}
350
351
0
int manager_flush_dev_kmsg(Manager *m) {
352
0
        int r;
353
354
0
        assert(m);
355
356
0
        if (m->dev_kmsg_fd < 0)
357
0
                return 0;
358
359
0
        if (!m->read_kmsg)
360
0
                return 0;
361
362
0
        log_debug("Flushing /dev/kmsg...");
363
364
0
        for (;;) {
365
0
                r = manager_read_dev_kmsg(m);
366
0
                if (r < 0)
367
0
                        return r;
368
369
0
                if (r == 0)
370
0
                        break;
371
0
        }
372
373
0
        return 0;
374
0
}
375
376
0
static int dispatch_dev_kmsg(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
377
0
        Manager *m = ASSERT_PTR(userdata);
378
379
0
        assert(es);
380
0
        assert(fd == m->dev_kmsg_fd);
381
382
0
        if (revents & EPOLLERR)
383
0
                log_ratelimit_warning(JOURNAL_LOG_RATELIMIT,
384
0
                                      "/dev/kmsg buffer overrun, some messages lost.");
385
386
0
        if (!(revents & EPOLLIN))
387
0
                log_error("Got invalid event from epoll for /dev/kmsg: %"PRIx32, revents);
388
389
0
        return manager_read_dev_kmsg(m);
390
0
}
391
392
0
int manager_open_dev_kmsg(Manager *m) {
393
0
        int r;
394
395
0
        assert(m);
396
0
        assert(m->dev_kmsg_fd < 0);
397
0
        assert(!m->dev_kmsg_event_source);
398
399
0
        mode_t mode = O_CLOEXEC|O_NONBLOCK|O_NOCTTY|(m->read_kmsg ? O_RDWR : O_WRONLY);
400
401
0
        _cleanup_close_ int fd = open("/dev/kmsg", mode);
402
0
        if (fd < 0) {
403
0
                log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING,
404
0
                               errno, "Failed to open /dev/kmsg for %s access, ignoring: %m", accmode_to_string(mode));
405
0
                return 0;
406
0
        }
407
408
0
        if (!m->read_kmsg) {
409
0
                m->dev_kmsg_fd = TAKE_FD(fd);
410
0
                return 0;
411
0
        }
412
413
0
        _cleanup_(sd_event_source_unrefp) sd_event_source *es = NULL;
414
0
        r = sd_event_add_io(m->event, &es, fd, EPOLLIN, dispatch_dev_kmsg, m);
415
0
        if (r < 0)
416
0
                return log_error_errno(r, "Failed to add /dev/kmsg fd to event loop: %m");
417
418
0
        r = sd_event_source_set_priority(es, SD_EVENT_PRIORITY_NORMAL+5);
419
0
        if (r < 0)
420
0
                return log_error_errno(r, "Failed to adjust priority of kmsg event source: %m");
421
422
0
        m->dev_kmsg_fd = TAKE_FD(fd);
423
0
        m->dev_kmsg_event_source = TAKE_PTR(es);
424
0
        return 0;
425
0
}
426
427
0
int manager_open_kernel_seqnum(Manager *m) {
428
0
        int r;
429
430
0
        assert(m);
431
432
        /* We store the seqnum we last read in an mmapped file. That way we can just use it like a variable,
433
         * but it is persistent and automatically flushed at reboot. */
434
435
0
        if (!m->read_kmsg)
436
0
                return 0;
437
438
0
        r = manager_map_seqnum_file(m, "kernel-seqnum", sizeof(uint64_t), (void**) &m->kernel_seqnum);
439
0
        if (r < 0)
440
0
                return log_error_errno(r, "Failed to map kernel seqnum file: %m");
441
442
0
        return 0;
443
0
}