/src/systemd/src/journal/journald-kmsg.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
2 | | |
3 | | #include <fcntl.h> |
4 | | #include <sys/mman.h> |
5 | | #include <sys/socket.h> |
6 | | #include <unistd.h> |
7 | | |
8 | | #include "sd-device.h" |
9 | | #include "sd-event.h" |
10 | | #include "sd-messages.h" |
11 | | |
12 | | #include "alloc-util.h" |
13 | | #include "device-util.h" |
14 | | #include "errno-util.h" |
15 | | #include "escape.h" |
16 | | #include "fd-util.h" |
17 | | #include "format-util.h" |
18 | | #include "iovec-util.h" |
19 | | #include "journal-internal.h" |
20 | | #include "journald-kmsg.h" |
21 | | #include "journald-manager.h" |
22 | | #include "journald-sync.h" |
23 | | #include "journald-syslog.h" |
24 | | #include "log.h" |
25 | | #include "log-ratelimit.h" |
26 | | #include "parse-util.h" |
27 | | #include "process-util.h" |
28 | | #include "stdio-util.h" |
29 | | #include "string-util.h" |
30 | | |
31 | | void manager_forward_kmsg( |
32 | | Manager *m, |
33 | | int priority, |
34 | | const char *identifier, |
35 | | const char *message, |
36 | 1.11M | const struct ucred *ucred) { |
37 | | |
38 | 1.11M | _cleanup_free_ char *ident_buf = NULL; |
39 | 1.11M | struct iovec iovec[5]; |
40 | 1.11M | char header_priority[DECIMAL_STR_MAX(priority) + 3], |
41 | 1.11M | header_pid[STRLEN("[]: ") + DECIMAL_STR_MAX(pid_t) + 1]; |
42 | 1.11M | size_t n = 0; |
43 | | |
44 | 1.11M | assert(m); |
45 | 1.11M | assert(priority >= 0); |
46 | 1.11M | assert(priority <= 999); |
47 | 1.11M | assert(message); |
48 | | |
49 | 1.11M | if (_unlikely_(LOG_PRI(priority) > m->max_level_kmsg)) |
50 | 28.5k | return; |
51 | | |
52 | 1.08M | if (_unlikely_(m->dev_kmsg_fd < 0)) |
53 | 1.08M | return; |
54 | | |
55 | | /* Never allow messages with kernel facility to be written to |
56 | | * kmsg, regardless where the data comes from. */ |
57 | 0 | priority = syslog_fixup_facility(priority); |
58 | | |
59 | | /* First: priority field */ |
60 | 0 | xsprintf(header_priority, "<%i>", priority); |
61 | 0 | iovec[n++] = IOVEC_MAKE_STRING(header_priority); |
62 | | |
63 | | /* Second: identifier and PID */ |
64 | 0 | if (ucred) { |
65 | 0 | if (!identifier) { |
66 | 0 | (void) pid_get_comm(ucred->pid, &ident_buf); |
67 | 0 | identifier = ident_buf; |
68 | 0 | } |
69 | |
|
70 | 0 | if (identifier) |
71 | 0 | iovec[n++] = IOVEC_MAKE_STRING(identifier); |
72 | |
|
73 | 0 | xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid); |
74 | 0 | iovec[n++] = IOVEC_MAKE_STRING(header_pid); |
75 | 0 | } else if (identifier) { |
76 | 0 | iovec[n++] = IOVEC_MAKE_STRING(identifier); |
77 | 0 | iovec[n++] = IOVEC_MAKE_STRING(": "); |
78 | 0 | } |
79 | | |
80 | | /* Fourth: message */ |
81 | 0 | iovec[n++] = IOVEC_MAKE_STRING(message); |
82 | 0 | iovec[n++] = IOVEC_MAKE_STRING("\n"); |
83 | |
|
84 | 0 | if (writev(m->dev_kmsg_fd, iovec, n) < 0) |
85 | 0 | log_debug_errno(errno, "Failed to write to /dev/kmsg for logging, ignoring: %m"); |
86 | 0 | } |
87 | | |
88 | 869 | static bool is_us(const char *identifier, const char *pid) { |
89 | 869 | pid_t pid_num; |
90 | | |
91 | 869 | if (!identifier || !pid) |
92 | 662 | return false; |
93 | | |
94 | 207 | if (parse_pid(pid, &pid_num) < 0) |
95 | 117 | return false; |
96 | | |
97 | 90 | return pid_num == getpid_cached() && |
98 | 90 | streq(identifier, program_invocation_short_name); |
99 | 207 | } |
100 | | |
101 | 2.63k | void dev_kmsg_record(Manager *m, char *p, size_t l) { |
102 | | |
103 | 2.63k | _cleanup_free_ char *message = NULL, *syslog_pid = NULL, *syslog_identifier = NULL, *identifier = NULL, *pid = NULL; |
104 | 2.63k | struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS + 2 + N_IOVEC_UDEV_FIELDS]; |
105 | 2.63k | char *kernel_device = NULL; |
106 | 2.63k | unsigned long long usec; |
107 | 2.63k | size_t n = 0, z = 0, j; |
108 | 2.63k | int priority, r; |
109 | 2.63k | char *e, *k; |
110 | 2.63k | uint64_t serial; |
111 | 2.63k | size_t pl; |
112 | 2.63k | int saved_log_max_level = INT_MAX; |
113 | 2.63k | ClientContext *c = NULL; |
114 | | |
115 | 2.63k | assert(m); |
116 | 2.63k | assert(p); |
117 | | |
118 | 2.63k | if (l <= 0) |
119 | 0 | return; |
120 | | |
121 | | /* syslog prefix including priority and facility */ |
122 | 2.63k | e = memchr(p, ',', l); |
123 | 2.63k | if (!e) |
124 | 12 | return; |
125 | 2.62k | *e = 0; |
126 | | |
127 | 2.62k | r = safe_atoi(p, &priority); |
128 | 2.62k | if (r < 0 || priority < 0 || priority > 999) |
129 | 175 | return; |
130 | | |
131 | 2.45k | if (m->forward_to_kmsg && LOG_FAC(priority) != LOG_KERN) |
132 | 0 | return; |
133 | | |
134 | | /* seqnum */ |
135 | 2.45k | l -= (e - p) + 1; |
136 | 2.45k | p = e + 1; |
137 | 2.45k | e = memchr(p, ',', l); |
138 | 2.45k | if (!e) |
139 | 24 | return; |
140 | 2.42k | *e = 0; |
141 | | |
142 | 2.42k | r = safe_atou64(p, &serial); |
143 | 2.42k | if (r < 0) |
144 | 81 | return; |
145 | | |
146 | 2.34k | if (m->kernel_seqnum) { |
147 | | /* We already read this one? */ |
148 | 0 | if (serial < *m->kernel_seqnum) |
149 | 0 | return; |
150 | | |
151 | | /* Did we lose any? */ |
152 | 0 | if (serial > *m->kernel_seqnum) |
153 | 0 | manager_driver_message(m, 0, |
154 | 0 | LOG_MESSAGE_ID(SD_MESSAGE_JOURNAL_MISSED_STR), |
155 | 0 | LOG_MESSAGE("Missed %"PRIu64" kernel messages", |
156 | 0 | serial - *m->kernel_seqnum)); |
157 | | |
158 | | /* Make sure we never read this one again. Note that |
159 | | * we always store the next message serial we expect |
160 | | * here, simply because this makes handling the first |
161 | | * message with serial 0 easy. */ |
162 | 0 | *m->kernel_seqnum = serial + 1; |
163 | 0 | } |
164 | | |
165 | | /* CLOCK_BOOTTIME timestamp */ |
166 | 2.34k | l -= (e - p) + 1; |
167 | 2.34k | p = e + 1; |
168 | 2.34k | e = memchr(p, ',', l); |
169 | 2.34k | if (!e) |
170 | 45 | return; |
171 | 2.30k | *e = 0; |
172 | | |
173 | 2.30k | r = safe_atollu(p, &usec); |
174 | 2.30k | if (r < 0) |
175 | 6 | return; |
176 | | |
177 | | /* ignore flags and any other fields, and find the beginning of the message */ |
178 | 2.29k | l -= (e - p) + 1; |
179 | 2.29k | p = e + 1; |
180 | 2.29k | e = memchr(p, ';', l); |
181 | 2.29k | if (!e) |
182 | 2 | return; |
183 | | |
184 | | /* find the end of the message */ |
185 | 2.29k | l -= (e - p) + 1; |
186 | 2.29k | p = e + 1; |
187 | 2.29k | e = memchr(p, '\n', l); |
188 | 2.29k | if (!e) |
189 | 1 | return; |
190 | 2.29k | *e = 0; |
191 | | |
192 | 2.29k | pl = e - p; |
193 | 2.29k | l -= (e - p) + 1; |
194 | 2.29k | k = e + 1; |
195 | | |
196 | 4.62k | for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) { |
197 | 2.40k | char *mm; |
198 | | /* Metadata fields attached */ |
199 | | |
200 | 2.40k | if (*k != ' ') |
201 | 64 | break; |
202 | | |
203 | 2.34k | k++, l--; |
204 | | |
205 | 2.34k | e = memchr(k, '\n', l); |
206 | 2.34k | if (!e) |
207 | 9 | goto finish; |
208 | | |
209 | 2.33k | *e = 0; |
210 | | |
211 | 2.33k | if (cunescape_length_with_prefix(k, e - k, "_KERNEL_", UNESCAPE_RELAX, &mm) < 0) |
212 | 0 | break; |
213 | | |
214 | 2.33k | if (startswith(mm, "_KERNEL_DEVICE=")) |
215 | 1.20k | kernel_device = mm + 15; |
216 | | |
217 | 2.33k | iovec[n++] = IOVEC_MAKE_STRING(mm); |
218 | 2.33k | z++; |
219 | | |
220 | 2.33k | l -= (e - k) + 1; |
221 | 2.33k | k = e + 1; |
222 | 2.33k | } |
223 | | |
224 | 2.28k | if (kernel_device) { |
225 | 1.10k | _cleanup_(sd_device_unrefp) sd_device *d = NULL; |
226 | | |
227 | 1.10k | if (sd_device_new_from_device_id(&d, kernel_device) >= 0) { |
228 | 67 | const char *g; |
229 | 67 | char *b; |
230 | | |
231 | 67 | if (sd_device_get_devname(d, &g) >= 0) { |
232 | 38 | b = strjoin("_UDEV_DEVNODE=", g); |
233 | 38 | if (b) { |
234 | 38 | iovec[n++] = IOVEC_MAKE_STRING(b); |
235 | 38 | z++; |
236 | 38 | } |
237 | 38 | } |
238 | | |
239 | 67 | if (sd_device_get_sysname(d, &g) >= 0) { |
240 | 67 | b = strjoin("_UDEV_SYSNAME=", g); |
241 | 67 | if (b) { |
242 | 67 | iovec[n++] = IOVEC_MAKE_STRING(b); |
243 | 67 | z++; |
244 | 67 | } |
245 | 67 | } |
246 | | |
247 | 67 | j = 0; |
248 | 67 | FOREACH_DEVICE_DEVLINK(d, link) { |
249 | |
|
250 | 0 | if (j >= N_IOVEC_UDEV_FIELDS) |
251 | 0 | break; |
252 | | |
253 | 0 | b = strjoin("_UDEV_DEVLINK=", link); |
254 | 0 | if (b) { |
255 | 0 | iovec[n++] = IOVEC_MAKE_STRING(b); |
256 | 0 | z++; |
257 | 0 | } |
258 | |
|
259 | 0 | j++; |
260 | 0 | } |
261 | 67 | } |
262 | 1.10k | } |
263 | | |
264 | 2.28k | char source_boot_time[STRLEN("_SOURCE_BOOTTIME_TIMESTAMP=") + DECIMAL_STR_MAX(unsigned long long)]; |
265 | 2.28k | xsprintf(source_boot_time, "_SOURCE_BOOTTIME_TIMESTAMP=%llu", usec); |
266 | 2.28k | iovec[n++] = IOVEC_MAKE_STRING(source_boot_time); |
267 | | |
268 | | /* Historically, we stored the timestamp 'usec' as _SOURCE_MONOTONIC_TIMESTAMP, so we cannot remove |
269 | | * the field as it is already used in other projects. This is for backward compatibility. */ |
270 | 2.28k | char source_monotonic_time[STRLEN("_SOURCE_MONOTONIC_TIMESTAMP=") + DECIMAL_STR_MAX(unsigned long long)]; |
271 | 2.28k | xsprintf(source_monotonic_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu", usec); |
272 | 2.28k | iovec[n++] = IOVEC_MAKE_STRING(source_monotonic_time); |
273 | | |
274 | 2.28k | iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=kernel"); |
275 | | |
276 | 2.28k | char syslog_priority[STRLEN("PRIORITY=") + DECIMAL_STR_MAX(int)]; |
277 | 2.28k | xsprintf(syslog_priority, "PRIORITY=%i", LOG_PRI(priority)); |
278 | 2.28k | iovec[n++] = IOVEC_MAKE_STRING(syslog_priority); |
279 | | |
280 | 2.28k | char syslog_facility[STRLEN("SYSLOG_FACILITY=") + DECIMAL_STR_MAX(int)]; |
281 | 2.28k | xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)); |
282 | 2.28k | iovec[n++] = IOVEC_MAKE_STRING(syslog_facility); |
283 | | |
284 | 2.28k | if (LOG_FAC(priority) == LOG_KERN) |
285 | 1.41k | iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=kernel"); |
286 | 869 | else { |
287 | 869 | pl -= syslog_parse_identifier((const char**) &p, &identifier, &pid); |
288 | | |
289 | | /* Avoid logging any new messages when we're processing messages generated by ourselves via |
290 | | * log_info() and friends to avoid infinite loops. */ |
291 | 869 | if (is_us(identifier, pid)) { |
292 | 0 | if (!ratelimit_below(&m->kmsg_own_ratelimit)) |
293 | 0 | return; |
294 | | |
295 | 0 | saved_log_max_level = log_get_max_level(); |
296 | 0 | c = m->my_context; |
297 | 0 | log_set_max_level(LOG_NULL); |
298 | 0 | } |
299 | | |
300 | 869 | if (identifier) { |
301 | 286 | syslog_identifier = strjoin("SYSLOG_IDENTIFIER=", identifier); |
302 | 286 | if (syslog_identifier) |
303 | 286 | iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier); |
304 | 286 | } |
305 | | |
306 | 869 | if (pid) { |
307 | 207 | syslog_pid = strjoin("SYSLOG_PID=", pid); |
308 | 207 | if (syslog_pid) |
309 | 207 | iovec[n++] = IOVEC_MAKE_STRING(syslog_pid); |
310 | 207 | } |
311 | 869 | } |
312 | | |
313 | 2.28k | if (cunescape_length_with_prefix(p, pl, "MESSAGE=", UNESCAPE_RELAX, &message) >= 0) |
314 | 2.28k | iovec[n++] = IOVEC_MAKE_STRING(message); |
315 | | |
316 | 2.28k | manager_dispatch_message(m, iovec, n, ELEMENTSOF(iovec), c, NULL, priority, 0); |
317 | | |
318 | 2.28k | if (saved_log_max_level != INT_MAX) |
319 | 0 | log_set_max_level(saved_log_max_level); |
320 | | |
321 | 2.28k | m->dev_kmsg_timestamp = usec; |
322 | 2.28k | sync_req_revalidate_by_timestamp(m); |
323 | | |
324 | 2.29k | finish: |
325 | 4.73k | for (j = 0; j < z; j++) |
326 | 2.43k | free(iovec[j].iov_base); |
327 | 2.29k | } |
328 | | |
329 | 0 | static int manager_read_dev_kmsg(Manager *m) { |
330 | 0 | char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */ |
331 | 0 | ssize_t l; |
332 | |
|
333 | 0 | assert(m); |
334 | 0 | assert(m->dev_kmsg_fd >= 0); |
335 | 0 | assert(m->read_kmsg); |
336 | |
|
337 | 0 | l = read(m->dev_kmsg_fd, buffer, sizeof(buffer) - 1); |
338 | 0 | if (l == 0) |
339 | 0 | return 0; |
340 | 0 | if (l < 0) { |
341 | 0 | if (ERRNO_IS_TRANSIENT(errno) || errno == EPIPE) |
342 | 0 | return 0; |
343 | | |
344 | 0 | return log_ratelimit_error_errno(errno, JOURNAL_LOG_RATELIMIT, "Failed to read from /dev/kmsg: %m"); |
345 | 0 | } |
346 | | |
347 | 0 | dev_kmsg_record(m, buffer, l); |
348 | 0 | return 1; |
349 | 0 | } |
350 | | |
351 | 0 | int manager_flush_dev_kmsg(Manager *m) { |
352 | 0 | int r; |
353 | |
|
354 | 0 | assert(m); |
355 | |
|
356 | 0 | if (m->dev_kmsg_fd < 0) |
357 | 0 | return 0; |
358 | | |
359 | 0 | if (!m->read_kmsg) |
360 | 0 | return 0; |
361 | | |
362 | 0 | log_debug("Flushing /dev/kmsg..."); |
363 | |
|
364 | 0 | for (;;) { |
365 | 0 | r = manager_read_dev_kmsg(m); |
366 | 0 | if (r < 0) |
367 | 0 | return r; |
368 | | |
369 | 0 | if (r == 0) |
370 | 0 | break; |
371 | 0 | } |
372 | | |
373 | 0 | return 0; |
374 | 0 | } |
375 | | |
376 | 0 | static int dispatch_dev_kmsg(sd_event_source *es, int fd, uint32_t revents, void *userdata) { |
377 | 0 | Manager *m = ASSERT_PTR(userdata); |
378 | |
|
379 | 0 | assert(es); |
380 | 0 | assert(fd == m->dev_kmsg_fd); |
381 | |
|
382 | 0 | if (revents & EPOLLERR) |
383 | 0 | log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, |
384 | 0 | "/dev/kmsg buffer overrun, some messages lost."); |
385 | |
|
386 | 0 | if (!(revents & EPOLLIN)) |
387 | 0 | log_error("Got invalid event from epoll for /dev/kmsg: %"PRIx32, revents); |
388 | |
|
389 | 0 | return manager_read_dev_kmsg(m); |
390 | 0 | } |
391 | | |
392 | 0 | int manager_open_dev_kmsg(Manager *m) { |
393 | 0 | int r; |
394 | |
|
395 | 0 | assert(m); |
396 | 0 | assert(m->dev_kmsg_fd < 0); |
397 | 0 | assert(!m->dev_kmsg_event_source); |
398 | |
|
399 | 0 | mode_t mode = O_CLOEXEC|O_NONBLOCK|O_NOCTTY|(m->read_kmsg ? O_RDWR : O_WRONLY); |
400 | |
|
401 | 0 | _cleanup_close_ int fd = open("/dev/kmsg", mode); |
402 | 0 | if (fd < 0) { |
403 | 0 | log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, |
404 | 0 | errno, "Failed to open /dev/kmsg for %s access, ignoring: %m", accmode_to_string(mode)); |
405 | 0 | return 0; |
406 | 0 | } |
407 | | |
408 | 0 | if (!m->read_kmsg) { |
409 | 0 | m->dev_kmsg_fd = TAKE_FD(fd); |
410 | 0 | return 0; |
411 | 0 | } |
412 | | |
413 | 0 | _cleanup_(sd_event_source_unrefp) sd_event_source *es = NULL; |
414 | 0 | r = sd_event_add_io(m->event, &es, fd, EPOLLIN, dispatch_dev_kmsg, m); |
415 | 0 | if (r < 0) |
416 | 0 | return log_error_errno(r, "Failed to add /dev/kmsg fd to event loop: %m"); |
417 | | |
418 | 0 | r = sd_event_source_set_priority(es, SD_EVENT_PRIORITY_NORMAL+5); |
419 | 0 | if (r < 0) |
420 | 0 | return log_error_errno(r, "Failed to adjust priority of kmsg event source: %m"); |
421 | | |
422 | 0 | m->dev_kmsg_fd = TAKE_FD(fd); |
423 | 0 | m->dev_kmsg_event_source = TAKE_PTR(es); |
424 | 0 | return 0; |
425 | 0 | } |
426 | | |
427 | 0 | int manager_open_kernel_seqnum(Manager *m) { |
428 | 0 | int r; |
429 | |
|
430 | 0 | assert(m); |
431 | | |
432 | | /* We store the seqnum we last read in an mmapped file. That way we can just use it like a variable, |
433 | | * but it is persistent and automatically flushed at reboot. */ |
434 | |
|
435 | 0 | if (!m->read_kmsg) |
436 | 0 | return 0; |
437 | | |
438 | 0 | r = manager_map_seqnum_file(m, "kernel-seqnum", sizeof(uint64_t), (void**) &m->kernel_seqnum); |
439 | 0 | if (r < 0) |
440 | 0 | return log_error_errno(r, "Failed to map kernel seqnum file: %m"); |
441 | | |
442 | 0 | return 0; |
443 | 0 | } |