Coverage Report

Created: 2026-04-29 07:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/systemd/src/basic/cgroup-util.c
Line
Count
Source
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3
#include <linux/fs.h>
4
#include <linux/magic.h>
5
#include <signal.h>
6
#include <stdlib.h>
7
#include <sys/xattr.h>
8
#include <threads.h>
9
#include <unistd.h>
10
11
#include "alloc-util.h"
12
#include "capsule-util.h"
13
#include "cgroup-util.h"
14
#include "dirent-util.h"
15
#include "errno-util.h"
16
#include "extract-word.h"
17
#include "fd-util.h"
18
#include "fileio.h"
19
#include "format-util.h"
20
#include "fs-util.h"
21
#include "log.h"
22
#include "login-util.h"
23
#include "parse-util.h"
24
#include "path-util.h"
25
#include "pidref.h"
26
#include "process-util.h"
27
#include "set.h"
28
#include "special.h"
29
#include "stat-util.h"
30
#include "string-table.h"
31
#include "string-util.h"
32
#include "strv.h"
33
#include "unaligned.h"
34
#include "unit-name.h"
35
#include "user-util.h"
36
#include "xattr-util.h"
37
38
0
int cg_is_available(void) {
39
0
        struct statfs fs;
40
41
0
        if (statfs("/sys/fs/cgroup/", &fs) < 0) {
42
0
                if (errno == ENOENT) /* sysfs not mounted? */
43
0
                        return false;
44
45
0
                return log_debug_errno(errno, "Failed to statfs /sys/fs/cgroup/: %m");
46
0
        }
47
48
0
        return is_fs_type(&fs, CGROUP2_SUPER_MAGIC);
49
0
}
50
51
0
int cg_path_open(const char *path) {
52
0
        _cleanup_free_ char *fs = NULL;
53
0
        int r;
54
55
0
        r = cg_get_path(path, /* suffix= */ NULL, &fs);
56
0
        if (r < 0)
57
0
                return r;
58
59
0
        return RET_NERRNO(open(fs, O_DIRECTORY|O_CLOEXEC));
60
0
}
61
62
0
int cg_cgroupid_open(int cgroupfs_fd, uint64_t id) {
63
0
        _cleanup_close_ int fsfd = -EBADF;
64
65
0
        if (cgroupfs_fd < 0) {
66
0
                fsfd = open("/sys/fs/cgroup", O_CLOEXEC|O_DIRECTORY);
67
0
                if (fsfd < 0)
68
0
                        return -errno;
69
70
0
                cgroupfs_fd = fsfd;
71
0
        }
72
73
0
        union {
74
0
                struct file_handle file_handle;
75
0
                uint8_t space[offsetof(struct file_handle, f_handle) + sizeof(uint64_t)];
76
0
        } fh = {
77
0
                .file_handle.handle_bytes = sizeof(uint64_t),
78
0
                .file_handle.handle_type = FILEID_KERNFS,
79
0
        };
80
81
0
        unaligned_write_ne64(fh.file_handle.f_handle, id);
82
83
0
        return RET_NERRNO(open_by_handle_at(cgroupfs_fd, &fh.file_handle, O_DIRECTORY|O_CLOEXEC));
84
0
}
85
86
0
int cg_path_from_cgroupid(int cgroupfs_fd, uint64_t id, char **ret) {
87
0
        _cleanup_close_ int cgfd = -EBADF;
88
0
        int r;
89
90
0
        cgfd = cg_cgroupid_open(cgroupfs_fd, id);
91
0
        if (cgfd < 0)
92
0
                return cgfd;
93
94
0
        _cleanup_free_ char *path = NULL;
95
0
        r = fd_get_path(cgfd, &path);
96
0
        if (r < 0)
97
0
                return r;
98
99
0
        if (!path_startswith(path, "/sys/fs/cgroup/"))
100
0
                return -EXDEV; /* recognizable error */
101
102
0
        if (ret)
103
0
                *ret = TAKE_PTR(path);
104
0
        return 0;
105
0
}
106
107
0
int cg_enumerate_processes(const char *path, FILE **ret) {
108
0
        _cleanup_free_ char *fs = NULL;
109
0
        FILE *f;
110
0
        int r;
111
112
0
        assert(ret);
113
114
0
        r = cg_get_path(path, "cgroup.procs", &fs);
115
0
        if (r < 0)
116
0
                return r;
117
118
0
        f = fopen(fs, "re");
119
0
        if (!f)
120
0
                return -errno;
121
122
0
        *ret = f;
123
0
        return 0;
124
0
}
125
126
0
int cg_read_pid(FILE *f, pid_t *ret, CGroupFlags flags) {
127
0
        unsigned long ul;
128
129
        /* Note that the cgroup.procs might contain duplicates! See cgroups.txt for details. */
130
131
0
        assert(f);
132
0
        assert(ret);
133
134
        /* NB: The kernel returns ENODEV if we tried to read from cgroup.procs of a cgroup that has been
135
         * removed already. Callers should handle that! */
136
137
0
        for (;;) {
138
0
                errno = 0;
139
0
                if (fscanf(f, "%lu", &ul) != 1) {
140
141
0
                        if (feof(f)) {
142
0
                                *ret = 0;
143
0
                                return 0;
144
0
                        }
145
146
0
                        return errno_or_else(EIO);
147
0
                }
148
149
0
                if (ul > PID_T_MAX)
150
0
                        return -EIO;
151
152
                /* In some circumstances (e.g. WSL), cgroups might contain unmappable PIDs from other
153
                 * contexts. These show up as zeros, and depending on the caller, can either be plain
154
                 * skipped over, or returned as-is. */
155
0
                if (ul == 0 && !FLAGS_SET(flags, CGROUP_DONT_SKIP_UNMAPPED))
156
0
                        continue;
157
158
0
                *ret = (pid_t) ul;
159
0
                return 1;
160
0
        }
161
0
}
162
163
0
int cg_read_pidref(FILE *f, PidRef *ret, CGroupFlags flags) {
164
0
        int r;
165
166
0
        assert(f);
167
0
        assert(ret);
168
169
0
        for (;;) {
170
0
                pid_t pid;
171
172
0
                r = cg_read_pid(f, &pid, flags);
173
0
                if (r < 0)
174
0
                        return log_debug_errno(r, "Failed to read pid from cgroup item: %m");
175
0
                if (r == 0) {
176
0
                        *ret = PIDREF_NULL;
177
0
                        return 0;
178
0
                }
179
180
0
                if (pid == 0)
181
0
                        return -EREMOTE;
182
183
0
                r = pidref_set_pid(ret, pid);
184
0
                if (r >= 0)
185
0
                        return 1;
186
0
                if (r != -ESRCH)
187
0
                        return r;
188
189
                /* ESRCH → gone by now? just skip over it, read the next */
190
0
        }
191
0
}
192
193
0
bool cg_kill_supported(void) {
194
0
        static thread_local int supported = -1;
195
196
0
        if (supported >= 0)
197
0
                return supported;
198
199
0
        if (cg_is_available() <= 0)
200
0
                return (supported = false);
201
202
0
        if (access("/sys/fs/cgroup/init.scope/cgroup.kill", F_OK) >= 0)
203
0
                return (supported = true);
204
0
        if (errno != ENOENT)
205
0
                log_debug_errno(errno, "Failed to check whether cgroup.kill is available, assuming not: %m");
206
0
        return (supported = false);
207
0
}
208
209
0
int cg_enumerate_subgroups(const char *path, DIR **ret) {
210
0
        _cleanup_free_ char *fs = NULL;
211
0
        DIR *d;
212
0
        int r;
213
214
0
        assert(ret);
215
216
        /* This is not recursive! */
217
218
0
        r = cg_get_path(path, /* suffix= */ NULL, &fs);
219
0
        if (r < 0)
220
0
                return r;
221
222
0
        d = opendir(fs);
223
0
        if (!d)
224
0
                return -errno;
225
226
0
        *ret = d;
227
0
        return 0;
228
0
}
229
230
0
int cg_read_subgroup(DIR *d, char **ret) {
231
0
        assert(d);
232
0
        assert(ret);
233
234
0
        FOREACH_DIRENT_ALL(de, d, return -errno) {
235
0
                if (de->d_type != DT_DIR)
236
0
                        continue;
237
238
0
                if (dot_or_dot_dot(de->d_name))
239
0
                        continue;
240
241
0
                return strdup_to_full(ret, de->d_name);
242
0
        }
243
244
0
        *ret = NULL;
245
0
        return 0;
246
0
}
247
248
int cg_kill(
249
                const char *path,
250
                int sig,
251
                CGroupFlags flags,
252
                Set *killed_pids,
253
                cg_kill_log_func_t log_kill,
254
0
                void *userdata) {
255
256
0
        _cleanup_set_free_ Set *allocated_set = NULL;
257
0
        int r, ret = 0;
258
259
0
        assert(path);
260
0
        assert(sig >= 0);
261
262
         /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence
263
          * don't send SIGCONT on SIGKILL. */
264
0
        if (IN_SET(sig, SIGCONT, SIGKILL))
265
0
                flags &= ~CGROUP_SIGCONT;
266
267
        /* This goes through the tasks list and kills them all. This is repeated until no further processes
268
         * are added to the tasks list, to properly handle forking processes.
269
         *
270
         * When sending SIGKILL, prefer cg_kill_kernel_sigkill(), which is fully atomic. */
271
272
0
        if (!killed_pids) {
273
0
                killed_pids = allocated_set = set_new(NULL);
274
0
                if (!killed_pids)
275
0
                        return -ENOMEM;
276
0
        }
277
278
0
        bool done;
279
0
        do {
280
0
                _cleanup_fclose_ FILE *f = NULL;
281
0
                int ret_log_kill;
282
283
0
                done = true;
284
285
0
                r = cg_enumerate_processes(path, &f);
286
0
                if (r == -ENOENT)
287
0
                        break;
288
0
                if (r < 0)
289
0
                        return RET_GATHER(ret, log_debug_errno(r, "Failed to enumerate cgroup items: %m"));
290
291
0
                for (;;) {
292
0
                        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
293
294
0
                        r = cg_read_pidref(f, &pidref, flags);
295
0
                        if (r == -ENODEV) {
296
                                /* reading from cgroup.pids will result in ENODEV if the cgroup is
297
                                 * concurrently removed. Just leave in that case, because a removed cgroup
298
                                 * contains no processes anymore. */
299
0
                                done = true;
300
0
                                break;
301
0
                        }
302
0
                        if (r < 0)
303
0
                                return RET_GATHER(ret, log_debug_errno(r, "Failed to read pidref from cgroup '%s': %m", path));
304
0
                        if (r == 0)
305
0
                                break;
306
307
0
                        if ((flags & CGROUP_IGNORE_SELF) && pidref_is_self(&pidref))
308
0
                                continue;
309
310
0
                        if (set_contains(killed_pids, PID_TO_PTR(pidref.pid)))
311
0
                                continue;
312
313
                        /* Ignore kernel threads to mimic the behavior of cgroup.kill. */
314
0
                        if (pidref_is_kernel_thread(&pidref) > 0) {
315
0
                                log_debug("Ignoring kernel thread with pid " PID_FMT " in cgroup '%s'", pidref.pid, path);
316
0
                                continue;
317
0
                        }
318
319
0
                        if (log_kill)
320
0
                                ret_log_kill = log_kill(&pidref, sig, userdata);
321
322
                        /* If we haven't killed this process yet, kill it */
323
0
                        r = pidref_kill(&pidref, sig);
324
0
                        if (r < 0 && r != -ESRCH)
325
0
                                RET_GATHER(ret, log_debug_errno(r, "Failed to kill process with pid " PID_FMT " from cgroup '%s': %m", pidref.pid, path));
326
0
                        if (r >= 0) {
327
0
                                if (flags & CGROUP_SIGCONT)
328
0
                                        (void) pidref_kill(&pidref, SIGCONT);
329
330
0
                                if (ret == 0) {
331
0
                                        if (log_kill)
332
0
                                                ret = ret_log_kill;
333
0
                                        else
334
0
                                                ret = 1;
335
0
                                }
336
0
                        }
337
338
0
                        done = false;
339
340
0
                        r = set_put(killed_pids, PID_TO_PTR(pidref.pid));
341
0
                        if (r < 0)
342
0
                                return RET_GATHER(ret, r);
343
0
                }
344
345
                /* To avoid racing against processes which fork quicker than we can kill them, we repeat this
346
                 * until no new pids need to be killed. */
347
348
0
        } while (!done);
349
350
0
        return ret;
351
0
}
352
353
int cg_kill_recursive(
354
                const char *path,
355
                int sig,
356
                CGroupFlags flags,
357
                Set *killed_pids,
358
                cg_kill_log_func_t log_kill,
359
0
                void *userdata) {
360
361
0
        _cleanup_set_free_ Set *allocated_set = NULL;
362
0
        _cleanup_closedir_ DIR *d = NULL;
363
0
        int r, ret;
364
365
0
        assert(path);
366
0
        assert(sig >= 0);
367
368
0
        if (!killed_pids) {
369
0
                killed_pids = allocated_set = set_new(NULL);
370
0
                if (!killed_pids)
371
0
                        return -ENOMEM;
372
0
        }
373
374
0
        ret = cg_kill(path, sig, flags, killed_pids, log_kill, userdata);
375
376
0
        r = cg_enumerate_subgroups(path, &d);
377
0
        if (r < 0) {
378
0
                if (r != -ENOENT)
379
0
                        RET_GATHER(ret, log_debug_errno(r, "Failed to enumerate cgroup '%s' subgroups: %m", path));
380
381
0
                return ret;
382
0
        }
383
384
0
        for (;;) {
385
0
                _cleanup_free_ char *fn = NULL, *p = NULL;
386
387
0
                r = cg_read_subgroup(d, &fn);
388
0
                if (r < 0) {
389
0
                        RET_GATHER(ret, log_debug_errno(r, "Failed to read subgroup from cgroup '%s': %m", path));
390
0
                        break;
391
0
                }
392
0
                if (r == 0)
393
0
                        break;
394
395
0
                p = path_join(empty_to_root(path), fn);
396
0
                if (!p)
397
0
                        return -ENOMEM;
398
399
0
                r = cg_kill_recursive(p, sig, flags, killed_pids, log_kill, userdata);
400
0
                if (r < 0)
401
0
                        log_debug_errno(r, "Failed to recursively kill processes in cgroup '%s': %m", p);
402
0
                if (r != 0 && ret >= 0)
403
0
                        ret = r;
404
0
        }
405
406
0
        return ret;
407
0
}
408
409
0
int cg_kill_kernel_sigkill(const char *path) {
410
0
        _cleanup_free_ char *killfile = NULL;
411
0
        int r;
412
413
        /* Kills the cgroup at `path` directly by writing to its cgroup.kill file.  This sends SIGKILL to all
414
         * processes in the cgroup and has the advantage of being completely atomic, unlike cg_kill_items(). */
415
416
0
        assert(path);
417
418
0
        if (!cg_kill_supported())
419
0
                return -EOPNOTSUPP;
420
421
0
        r = cg_get_path(path, "cgroup.kill", &killfile);
422
0
        if (r < 0)
423
0
                return r;
424
425
0
        r = write_string_file(killfile, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
426
0
        if (r < 0)
427
0
                return log_debug_errno(r, "Failed to write to cgroup.kill for cgroup '%s': %m", path);
428
429
0
        return 0;
430
0
}
431
432
1.62k
int cg_get_path(const char *path, const char *suffix, char **ret) {
433
1.62k
        char *t;
434
435
1.62k
        assert(ret);
436
437
1.62k
        if (isempty(path))
438
28
                path = TAKE_PTR(suffix);
439
440
1.62k
        t = path_join("/sys/fs/cgroup", path, suffix);
441
1.62k
        if (!t)
442
0
                return -ENOMEM;
443
444
1.62k
        *ret = path_simplify(t);
445
1.62k
        return 0;
446
1.62k
}
447
448
0
int cg_set_xattr(const char *path, const char *name, const void *value, size_t size, int flags) {
449
0
        _cleanup_free_ char *fs = NULL;
450
0
        int r;
451
452
0
        assert(path);
453
0
        assert(name);
454
0
        assert(value || size <= 0);
455
456
0
        r = cg_get_path(path, /* suffix= */ NULL, &fs);
457
0
        if (r < 0)
458
0
                return r;
459
460
0
        return RET_NERRNO(setxattr(fs, name, value, size, flags));
461
0
}
462
463
0
int cg_get_xattr(const char *path, const char *name, char **ret, size_t *ret_size) {
464
0
        _cleanup_free_ char *fs = NULL;
465
0
        int r;
466
467
0
        assert(path);
468
0
        assert(name);
469
470
0
        r = cg_get_path(path, /* suffix= */ NULL, &fs);
471
0
        if (r < 0)
472
0
                return r;
473
474
0
        return lgetxattr_malloc(fs, name, ret, ret_size);
475
0
}
476
477
0
int cg_get_xattr_bool(const char *path, const char *name) {
478
0
        _cleanup_free_ char *fs = NULL;
479
0
        int r;
480
481
0
        assert(path);
482
0
        assert(name);
483
484
0
        r = cg_get_path(path, /* suffix= */ NULL, &fs);
485
0
        if (r < 0)
486
0
                return r;
487
488
0
        return getxattr_at_bool(AT_FDCWD, fs, name, /* at_flags= */ 0);
489
0
}
490
491
0
int cg_remove_xattr(const char *path, const char *name) {
492
0
        _cleanup_free_ char *fs = NULL;
493
0
        int r;
494
495
0
        assert(path);
496
0
        assert(name);
497
498
0
        r = cg_get_path(path, /* suffix= */ NULL, &fs);
499
0
        if (r < 0)
500
0
                return r;
501
502
0
        return RET_NERRNO(removexattr(fs, name));
503
0
}
504
505
4.05k
int cg_pid_get_path(pid_t pid, char **ret_path) {
506
4.05k
        _cleanup_fclose_ FILE *f = NULL;
507
4.05k
        const char *fs;
508
4.05k
        int r;
509
510
4.05k
        assert(pid >= 0);
511
4.05k
        assert(ret_path);
512
513
4.05k
        fs = procfs_file_alloca(pid, "cgroup");
514
4.05k
        r = fopen_unlocked(fs, "re", &f);
515
4.05k
        if (r == -ENOENT)
516
0
                return -ESRCH;
517
4.05k
        if (r < 0)
518
0
                return r;
519
520
52.6k
        for (;;) {
521
52.6k
                _cleanup_free_ char *line = NULL;
522
52.6k
                char *e;
523
524
52.6k
                r = read_line(f, LONG_LINE_MAX, &line);
525
52.6k
                if (r < 0)
526
0
                        return r;
527
52.6k
                if (r == 0)
528
0
                        return -ENODATA;
529
530
52.6k
                e = startswith(line, "0:");
531
52.6k
                if (!e)
532
48.6k
                        continue;
533
534
4.05k
                e = strchr(e, ':');
535
4.05k
                if (!e)
536
0
                        continue;
537
538
4.05k
                _cleanup_free_ char *path = strdup(e + 1);
539
4.05k
                if (!path)
540
0
                        return -ENOMEM;
541
542
                /* Refuse cgroup paths from outside our cgroup namespace */
543
4.05k
                if (startswith(path, "/../"))
544
0
                        return -EUNATCH;
545
546
                /* Truncate suffix indicating the process is a zombie */
547
4.05k
                e = endswith(path, " (deleted)");
548
4.05k
                if (e)
549
0
                        *e = 0;
550
551
4.05k
                *ret_path = TAKE_PTR(path);
552
4.05k
                return 0;
553
4.05k
        }
554
4.05k
}
555
556
0
int cg_pidref_get_path(const PidRef *pidref, char **ret_path) {
557
0
        _cleanup_free_ char *path = NULL;
558
0
        int r;
559
560
0
        assert(ret_path);
561
562
0
        if (!pidref_is_set(pidref))
563
0
                return -ESRCH;
564
0
        if (pidref_is_remote(pidref))
565
0
                return -EREMOTE;
566
567
        // XXX: Ideally we'd use pidfd_get_cgroupid() + cg_path_from_cgroupid() here, to extract this
568
        // bit of information from pidfd directly. However, the latter requires privilege and it's
569
        // not entirely clear how to handle cgroups from outer namespace.
570
571
0
        r = cg_pid_get_path(pidref->pid, &path);
572
0
        if (r < 0)
573
0
                return r;
574
575
        /* Before we return the path, make sure the procfs entry for this pid still matches the pidref */
576
0
        r = pidref_verify(pidref);
577
0
        if (r < 0)
578
0
                return r;
579
580
0
        *ret_path = TAKE_PTR(path);
581
0
        return 0;
582
0
}
583
584
0
int cg_is_empty(const char *path) {
585
0
        _cleanup_free_ char *t = NULL;
586
0
        int r;
587
588
        /* Check if the cgroup hierarchy under 'path' is empty. On cgroup v2 it's exposed via the "populated"
589
         * attribute of "cgroup.events". */
590
591
0
        assert(path);
592
593
        /* The root cgroup is always populated */
594
0
        if (empty_or_root(path))
595
0
                return false;
596
597
0
        r = cg_get_keyed_attribute(path, "cgroup.events", STRV_MAKE("populated"), &t);
598
0
        if (r == -ENOENT)
599
0
                return true;
600
0
        if (r < 0)
601
0
                return r;
602
603
0
        return streq(t, "0");
604
0
}
605
606
0
int cg_split_spec(const char *spec, char **ret_controller, char **ret_path) {
607
0
        _cleanup_free_ char *controller = NULL;
608
0
        const char *path;
609
0
        int r;
610
611
0
        assert(spec);
612
613
        /* This extracts the path part from the deprecated controller:path spec. The path must be absolute or
614
         * an empty string. No validation is done for the controller part. */
615
616
0
        if (isempty(spec) || path_is_absolute(spec)) {
617
                /* Assume this does not contain controller. */
618
0
                path = spec;
619
0
                goto finalize;
620
0
        }
621
622
0
        const char *e = strchr(spec, ':');
623
0
        if (!e) {
624
                /* Controller only. */
625
0
                if (ret_controller) {
626
0
                        controller = strdup(spec);
627
0
                        if (!controller)
628
0
                                return -ENOMEM;
629
0
                }
630
631
0
                path = NULL;
632
0
        } else {
633
                /* Both controller and path. */
634
0
                if (ret_controller) {
635
0
                        controller = strndup(spec, e - spec);
636
0
                        if (!controller)
637
0
                                return -ENOMEM;
638
0
                }
639
640
0
                path = e + 1;
641
0
        }
642
643
0
finalize:
644
0
        path = empty_to_null(path);
645
646
0
        if (path) {
647
                /* Non-empty path must be absolute. */
648
0
                if (!path_is_absolute(path))
649
0
                        return -EINVAL;
650
651
                /* Path must not contain dot-dot. */
652
0
                if (!path_is_safe(path))
653
0
                        return -EINVAL;
654
0
        }
655
656
0
        if (ret_path) {
657
0
                r = path_simplify_alloc(path, ret_path);
658
0
                if (r < 0)
659
0
                        return r;
660
0
        }
661
662
0
        if (ret_controller)
663
0
                *ret_controller = TAKE_PTR(controller);
664
665
0
        return 0;
666
0
}
667
668
2.46k
int cg_get_root_path(char **ret_path) {
669
2.46k
        char *p, *e;
670
2.46k
        int r;
671
672
2.46k
        assert(ret_path);
673
674
2.46k
        r = cg_pid_get_path(1, &p);
675
2.46k
        if (r < 0)
676
0
                return r;
677
678
2.46k
        e = endswith(p, "/" SPECIAL_INIT_SCOPE);
679
2.46k
        if (e)
680
0
                *e = 0;
681
682
2.46k
        *ret_path = p;
683
2.46k
        return 0;
684
2.46k
}
685
686
1.58k
int cg_shift_path(const char *cgroup, const char *root, const char **ret_shifted) {
687
1.58k
        int r;
688
689
1.58k
        assert(cgroup);
690
1.58k
        assert(ret_shifted);
691
692
1.58k
        _cleanup_free_ char *rt = NULL;
693
1.58k
        if (!root) {
694
                /* If the root was specified let's use that, otherwise
695
                 * let's determine it from PID 1 */
696
697
1.58k
                r = cg_get_root_path(&rt);
698
1.58k
                if (r < 0)
699
0
                        return r;
700
701
1.58k
                root = rt;
702
1.58k
        }
703
704
1.58k
        *ret_shifted = path_startswith_full(cgroup, root, PATH_STARTSWITH_RETURN_LEADING_SLASH|PATH_STARTSWITH_REFUSE_DOT_DOT) ?: cgroup;
705
1.58k
        return 0;
706
1.58k
}
707
708
1.58k
int cg_pid_get_path_shifted(pid_t pid, const char *root, char **ret_cgroup) {
709
1.58k
        _cleanup_free_ char *raw = NULL;
710
1.58k
        const char *c;
711
1.58k
        int r;
712
713
1.58k
        assert(pid >= 0);
714
1.58k
        assert(ret_cgroup);
715
716
1.58k
        r = cg_pid_get_path(pid, &raw);
717
1.58k
        if (r < 0)
718
0
                return r;
719
720
1.58k
        r = cg_shift_path(raw, root, &c);
721
1.58k
        if (r < 0)
722
0
                return r;
723
724
1.58k
        if (c == raw) {
725
1.58k
                *ret_cgroup = TAKE_PTR(raw);
726
1.58k
                return 0;
727
1.58k
        }
728
729
0
        return strdup_to(ret_cgroup, c);
730
1.58k
}
731
732
3.59k
int cg_path_decode_unit(const char *cgroup, char **ret_unit) {
733
3.59k
        assert(cgroup);
734
735
3.59k
        size_t n = strcspn(cgroup, "/");
736
3.59k
        if (n < 3)
737
0
                return -ENXIO;
738
739
3.59k
        char *c = strndupa_safe(cgroup, n);
740
3.59k
        c = cg_unescape(c);
741
742
3.59k
        if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
743
3.17k
                return -ENXIO;
744
745
424
        if (ret_unit)
746
424
                return strdup_to(ret_unit, c);
747
748
0
        return 0;
749
424
}
750
751
12.6k
static bool valid_slice_name(const char *p, size_t n) {
752
12.6k
        assert(p || n == 0);
753
754
12.6k
        if (n < STRLEN("x.slice"))
755
11.5k
                return false;
756
757
1.12k
        char *c = strndupa_safe(p, n);
758
1.12k
        if (!endswith(c, ".slice"))
759
44
                return false;
760
761
1.08k
        return unit_name_is_valid(cg_unescape(c), UNIT_NAME_PLAIN);
762
1.12k
}
763
764
7.92k
static const char* skip_slices(const char *p) {
765
7.92k
        assert(p);
766
767
        /* Skips over all slice assignments */
768
769
7.92k
        for (;;) {
770
7.92k
                size_t n;
771
772
7.92k
                p += strspn(p, "/");
773
774
7.92k
                n = strcspn(p, "/");
775
7.92k
                if (!valid_slice_name(p, n))
776
7.92k
                        return p;
777
778
0
                p += n;
779
0
        }
780
7.92k
}
781
782
3.17k
int cg_path_get_unit_full(const char *path, char **ret_unit, char **ret_subgroup) {
783
3.17k
        int r;
784
785
3.17k
        assert(path);
786
787
3.17k
        const char *e = skip_slices(path);
788
789
3.17k
        _cleanup_free_ char *unit = NULL;
790
3.17k
        r = cg_path_decode_unit(e, &unit);
791
3.17k
        if (r < 0)
792
3.17k
                return r;
793
794
        /* We skipped over the slices, don't accept any now */
795
0
        if (endswith(unit, ".slice"))
796
0
                return -ENXIO;
797
798
0
        if (ret_subgroup) {
799
0
                _cleanup_free_ char *subgroup = NULL;
800
0
                e += strcspn(e, "/");
801
0
                e += strspn(e, "/");
802
803
0
                if (isempty(e))
804
0
                        subgroup = NULL;
805
0
                else {
806
0
                        subgroup = strdup(e);
807
0
                        if (!subgroup)
808
0
                                return -ENOMEM;
809
0
                }
810
811
0
                path_simplify(subgroup);
812
813
0
                *ret_subgroup = TAKE_PTR(subgroup);
814
0
        }
815
816
0
        if (ret_unit)
817
0
                *ret_unit = TAKE_PTR(unit);
818
819
0
        return 0;
820
0
}
821
822
1.58k
int cg_path_get_unit_path(const char *path, char **ret) {
823
1.58k
        _cleanup_free_ char *path_copy = NULL;
824
1.58k
        char *unit_name;
825
826
1.58k
        assert(path);
827
1.58k
        assert(ret);
828
829
1.58k
        path_copy = strdup(path);
830
1.58k
        if (!path_copy)
831
0
                return -ENOMEM;
832
833
1.58k
        unit_name = (char*) skip_slices(path_copy);
834
1.58k
        unit_name[strcspn(unit_name, "/")] = 0;
835
836
1.58k
        if (!unit_name_is_valid(cg_unescape(unit_name), UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
837
1.58k
                return -ENXIO;
838
839
0
        *ret = TAKE_PTR(path_copy);
840
841
0
        return 0;
842
1.58k
}
843
844
0
int cg_pid_get_unit_full(pid_t pid, char **ret_unit, char **ret_subgroup) {
845
0
        int r;
846
847
0
        _cleanup_free_ char *cgroup = NULL;
848
0
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
849
0
        if (r < 0)
850
0
                return r;
851
852
0
        return cg_path_get_unit_full(cgroup, ret_unit, ret_subgroup);
853
0
}
854
855
0
int cg_pidref_get_unit_full(const PidRef *pidref, char **ret_unit, char **ret_subgroup) {
856
0
        int r;
857
858
0
        if (!pidref_is_set(pidref))
859
0
                return -ESRCH;
860
0
        if (pidref_is_remote(pidref))
861
0
                return -EREMOTE;
862
863
0
        _cleanup_free_ char *unit = NULL, *subgroup = NULL;
864
0
        r = cg_pid_get_unit_full(pidref->pid, &unit, &subgroup);
865
0
        if (r < 0)
866
0
                return r;
867
868
0
        r = pidref_verify(pidref);
869
0
        if (r < 0)
870
0
                return r;
871
872
0
        if (ret_unit)
873
0
                *ret_unit = TAKE_PTR(unit);
874
0
        if (ret_subgroup)
875
0
                *ret_subgroup = TAKE_PTR(subgroup);
876
0
        return 0;
877
0
}
878
879
3.17k
static const char* skip_session(const char *p) {
880
3.17k
        size_t n;
881
882
        /* Skip session-*.scope, but require it to be there. */
883
884
3.17k
        if (isempty(p))
885
0
                return NULL;
886
887
3.17k
        p += strspn(p, "/");
888
889
3.17k
        n = strcspn(p, "/");
890
3.17k
        if (n < STRLEN("session-x.scope"))
891
3.17k
                return NULL;
892
893
0
        const char *s = startswith(p, "session-");
894
0
        if (!s)
895
0
                return NULL;
896
897
        /* Note that session scopes never need unescaping, since they cannot conflict with the kernel's
898
         * own names, hence we don't need to call cg_unescape() here. */
899
0
        char *f = strndupa_safe(s, p + n - s),
900
0
             *e = endswith(f, ".scope");
901
0
        if (!e)
902
0
                return NULL;
903
0
        *e = '\0';
904
905
0
        if (!session_id_valid(f))
906
0
                return NULL;
907
908
0
        return skip_leading_slash(p + n);
909
0
}
910
911
3.17k
static const char* skip_user_manager(const char *p) {
912
3.17k
        size_t n;
913
914
        /* Skip user@*.service or capsule@*.service, but require either of them to be there. */
915
916
3.17k
        if (isempty(p))
917
0
                return NULL;
918
919
3.17k
        p += strspn(p, "/");
920
921
3.17k
        n = strcspn(p, "/");
922
3.17k
        if (n < CONST_MIN(STRLEN("user@x.service"), STRLEN("capsule@x.service")))
923
3.17k
                return NULL;
924
925
        /* Any possible errors from functions called below are converted to NULL return, so our callers won't
926
         * resolve user/capsule name. */
927
0
        _cleanup_free_ char *unit_name = strndup(p, n);
928
0
        if (!unit_name)
929
0
                return NULL;
930
931
0
        _cleanup_free_ char *i = NULL;
932
0
        UnitNameFlags type = unit_name_to_instance(unit_name, &i);
933
934
0
        if (type != UNIT_NAME_INSTANCE)
935
0
                return NULL;
936
937
        /* Note that user manager services never need unescaping, since they cannot conflict with the
938
         * kernel's own names, hence we don't need to call cg_unescape() here.  Prudently check validity of
939
         * instance names, they should be always valid as we validate them upon unit start. */
940
0
        if (!(startswith(unit_name, "user@") && parse_uid(i, NULL) >= 0) &&
941
0
            !(startswith(unit_name, "capsule@") && capsule_name_is_valid(i) > 0))
942
0
                return NULL;
943
944
0
        return skip_leading_slash(p + n);
945
0
}
946
947
3.17k
static const char* skip_user_prefix(const char *path) {
948
3.17k
        const char *e, *t;
949
950
3.17k
        assert(path);
951
952
        /* Skip slices, if there are any */
953
3.17k
        e = skip_slices(path);
954
955
        /* Skip the user manager, if it's in the path now... */
956
3.17k
        t = skip_user_manager(e);
957
3.17k
        if (t)
958
0
                return t;
959
960
        /* Alternatively skip the user session if it is in the path... */
961
3.17k
        return skip_session(e);
962
3.17k
}
963
964
1.58k
int cg_path_get_user_unit_full(const char *path, char **ret_unit, char **ret_subgroup) {
965
1.58k
        const char *t;
966
967
1.58k
        assert(path);
968
969
1.58k
        t = skip_user_prefix(path);
970
1.58k
        if (!t)
971
1.58k
                return -ENXIO;
972
973
        /* And from here on it looks pretty much the same as for a system unit, hence let's use the same
974
         * parser. */
975
0
        return cg_path_get_unit_full(t, ret_unit, ret_subgroup);
976
1.58k
}
977
978
0
int cg_pid_get_user_unit_full(pid_t pid, char **ret_unit, char **ret_subgroup) {
979
0
        int r;
980
981
0
        _cleanup_free_ char *cgroup = NULL;
982
0
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
983
0
        if (r < 0)
984
0
                return r;
985
986
0
        return cg_path_get_user_unit_full(cgroup, ret_unit, ret_subgroup);
987
0
}
988
989
0
int cg_pidref_get_user_unit_full(const PidRef *pidref, char **ret_unit, char **ret_subgroup) {
990
0
        int r;
991
992
0
        if (!pidref_is_set(pidref))
993
0
                return -ESRCH;
994
0
        if (pidref_is_remote(pidref))
995
0
                return -EREMOTE;
996
997
0
        _cleanup_free_ char *unit = NULL, *subgroup = NULL;
998
0
        r = cg_pid_get_user_unit_full(pidref->pid, &unit, &subgroup);
999
0
        if (r < 0)
1000
0
                return r;
1001
1002
0
        r = pidref_verify(pidref);
1003
0
        if (r < 0)
1004
0
                return r;
1005
1006
0
        if (ret_unit)
1007
0
                *ret_unit = TAKE_PTR(unit);
1008
0
        if (ret_subgroup)
1009
0
                *ret_subgroup = TAKE_PTR(subgroup);
1010
0
        return 0;
1011
0
}
1012
1013
0
int cg_path_get_machine_name(const char *path, char **ret_machine) {
1014
0
        _cleanup_free_ char *u = NULL;
1015
0
        const char *sl;
1016
0
        int r;
1017
1018
0
        r = cg_path_get_unit(path, &u);
1019
0
        if (r < 0)
1020
0
                return r;
1021
1022
0
        sl = strjoina("/run/systemd/machines/unit:", u);
1023
0
        return readlink_malloc(sl, ret_machine);
1024
0
}
1025
1026
0
int cg_pid_get_machine_name(pid_t pid, char **ret_machine) {
1027
0
        _cleanup_free_ char *cgroup = NULL;
1028
0
        int r;
1029
1030
0
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1031
0
        if (r < 0)
1032
0
                return r;
1033
1034
0
        return cg_path_get_machine_name(cgroup, ret_machine);
1035
0
}
1036
1037
1.58k
int cg_path_get_session(const char *path, char **ret_session) {
1038
1.58k
        _cleanup_free_ char *unit = NULL;
1039
1.58k
        char *start, *end;
1040
1.58k
        int r;
1041
1042
1.58k
        assert(path);
1043
1044
1.58k
        r = cg_path_get_unit(path, &unit);
1045
1.58k
        if (r < 0)
1046
1.58k
                return r;
1047
1048
0
        start = startswith(unit, "session-");
1049
0
        if (!start)
1050
0
                return -ENXIO;
1051
0
        end = endswith(start, ".scope");
1052
0
        if (!end)
1053
0
                return -ENXIO;
1054
1055
0
        *end = 0;
1056
0
        if (!session_id_valid(start))
1057
0
                return -ENXIO;
1058
1059
0
        if (!ret_session)
1060
0
                return 0;
1061
1062
0
        return strdup_to(ret_session, start);
1063
0
}
1064
1065
0
int cg_pid_get_session(pid_t pid, char **ret_session) {
1066
0
        _cleanup_free_ char *cgroup = NULL;
1067
0
        int r;
1068
1069
0
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1070
0
        if (r < 0)
1071
0
                return r;
1072
1073
0
        return cg_path_get_session(cgroup, ret_session);
1074
0
}
1075
1076
0
int cg_pidref_get_session(const PidRef *pidref, char **ret) {
1077
0
        int r;
1078
1079
0
        if (!pidref_is_set(pidref))
1080
0
                return -ESRCH;
1081
0
        if (pidref_is_remote(pidref))
1082
0
                return -EREMOTE;
1083
1084
0
        _cleanup_free_ char *session = NULL;
1085
0
        r = cg_pid_get_session(pidref->pid, &session);
1086
0
        if (r < 0)
1087
0
                return r;
1088
1089
0
        r = pidref_verify(pidref);
1090
0
        if (r < 0)
1091
0
                return r;
1092
1093
0
        if (ret)
1094
0
                *ret = TAKE_PTR(session);
1095
0
        return 0;
1096
0
}
1097
1098
1.58k
int cg_path_get_owner_uid(const char *path, uid_t *ret_uid) {
1099
1.58k
        _cleanup_free_ char *slice = NULL;
1100
1.58k
        char *start, *end;
1101
1.58k
        int r;
1102
1103
1.58k
        assert(path);
1104
1105
1.58k
        r = cg_path_get_slice(path, &slice);
1106
1.58k
        if (r < 0)
1107
0
                return r;
1108
1109
1.58k
        start = startswith(slice, "user-");
1110
1.58k
        if (!start)
1111
1.58k
                return -ENXIO;
1112
1113
0
        end = endswith(start, ".slice");
1114
0
        if (!end)
1115
0
                return -ENXIO;
1116
1117
0
        *end = 0;
1118
0
        if (parse_uid(start, ret_uid) < 0)
1119
0
                return -ENXIO;
1120
1121
0
        return 0;
1122
0
}
1123
1124
0
int cg_pid_get_owner_uid(pid_t pid, uid_t *ret_uid) {
1125
0
        _cleanup_free_ char *cgroup = NULL;
1126
0
        int r;
1127
1128
0
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1129
0
        if (r < 0)
1130
0
                return r;
1131
1132
0
        return cg_path_get_owner_uid(cgroup, ret_uid);
1133
0
}
1134
1135
0
int cg_pidref_get_owner_uid(const PidRef *pidref, uid_t *ret) {
1136
0
        int r;
1137
1138
0
        if (!pidref_is_set(pidref))
1139
0
                return -ESRCH;
1140
0
        if (pidref_is_remote(pidref))
1141
0
                return -EREMOTE;
1142
1143
0
        uid_t uid;
1144
0
        r = cg_pid_get_owner_uid(pidref->pid, &uid);
1145
0
        if (r < 0)
1146
0
                return r;
1147
1148
0
        r = pidref_verify(pidref);
1149
0
        if (r < 0)
1150
0
                return r;
1151
1152
0
        if (ret)
1153
0
                *ret = uid;
1154
1155
0
        return 0;
1156
0
}
1157
1158
3.70k
int cg_path_get_slice(const char *p, char **ret_slice) {
1159
3.70k
        const char *e = NULL;
1160
1161
3.70k
        assert(p);
1162
1163
        /* Finds the right-most slice unit from the beginning, but stops before we come to
1164
         * the first non-slice unit. */
1165
1166
4.75k
        for (;;) {
1167
4.75k
                const char *s;
1168
4.75k
                int n;
1169
1170
4.75k
                n = path_find_first_component(&p, /* accept_dot_dot= */ false, &s);
1171
4.75k
                if (n < 0)
1172
7
                        return n;
1173
4.74k
                if (!valid_slice_name(s, n))
1174
3.69k
                        break;
1175
1176
1.05k
                e = s;
1177
1.05k
        }
1178
1179
3.69k
        if (e)
1180
424
                return cg_path_decode_unit(e, ret_slice);
1181
1182
3.27k
        if (ret_slice)
1183
3.27k
                return strdup_to(ret_slice, SPECIAL_ROOT_SLICE);
1184
1185
0
        return 0;
1186
3.27k
}
1187
1188
0
int cg_pid_get_slice(pid_t pid, char **ret_slice) {
1189
0
        _cleanup_free_ char *cgroup = NULL;
1190
0
        int r;
1191
1192
0
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1193
0
        if (r < 0)
1194
0
                return r;
1195
1196
0
        return cg_path_get_slice(cgroup, ret_slice);
1197
0
}
1198
1199
1.58k
int cg_path_get_user_slice(const char *p, char **ret_slice) {
1200
1.58k
        const char *t;
1201
1.58k
        assert(p);
1202
1203
1.58k
        t = skip_user_prefix(p);
1204
1.58k
        if (!t)
1205
1.58k
                return -ENXIO;
1206
1207
        /* And now it looks pretty much the same as for a system slice, so let's just use the same parser
1208
         * from here on. */
1209
0
        return cg_path_get_slice(t, ret_slice);
1210
1.58k
}
1211
1212
0
int cg_pid_get_user_slice(pid_t pid, char **ret_slice) {
1213
0
        _cleanup_free_ char *cgroup = NULL;
1214
0
        int r;
1215
1216
0
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1217
0
        if (r < 0)
1218
0
                return r;
1219
1220
0
        return cg_path_get_user_slice(cgroup, ret_slice);
1221
0
}
1222
1223
212k
bool cg_needs_escape(const char *p) {
1224
1225
        /* Checks if the specified path is a valid cgroup name by our rules, or if it must be escaped. Note
1226
         * that we consider escaped cgroup names invalid here, as they need to be escaped a second time if
1227
         * they shall be used. Also note that various names cannot be made valid by escaping even if we
1228
         * return true here (because too long, or contain the forbidden character "/"). */
1229
1230
212k
        if (!filename_is_valid(p))
1231
195
                return true;
1232
1233
212k
        if (IN_SET(p[0], '_', '.'))
1234
6.24k
                return true;
1235
1236
206k
        if (STR_IN_SET(p, "notify_on_release", "release_agent", "tasks"))
1237
279
                return true;
1238
1239
206k
        if (startswith(p, "cgroup."))
1240
93
                return true;
1241
1242
3.08M
        for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1243
2.87M
                const char *q;
1244
1245
2.87M
                q = startswith(p, cgroup_controller_to_string(c));
1246
2.87M
                if (!q)
1247
2.87M
                        continue;
1248
1249
793
                if (q[0] == '.')
1250
526
                        return true;
1251
793
        }
1252
1253
205k
        return false;
1254
205k
}
1255
1256
211k
int cg_escape(const char *p, char **ret) {
1257
211k
        _cleanup_free_ char *n = NULL;
1258
1259
211k
        assert(ret);
1260
1261
        /* This implements very minimal escaping for names to be used as file names in the cgroup tree: any
1262
         * name which might conflict with a kernel name or is prefixed with '_' is prefixed with a '_'. That
1263
         * way, when reading cgroup names it is sufficient to remove a single prefixing underscore if there
1264
         * is one. */
1265
1266
        /* The return value of this function (unlike cg_unescape()) needs free()! */
1267
1268
211k
        if (cg_needs_escape(p)) {
1269
6.86k
                n = strjoin("_", p);
1270
6.86k
                if (!n)
1271
0
                        return -ENOMEM;
1272
1273
6.86k
                if (!filename_is_valid(n)) /* became invalid due to the prefixing? Or contained things like a slash that cannot be fixed by prefixing? */
1274
2
                        return -EINVAL;
1275
205k
        } else {
1276
205k
                n = strdup(p);
1277
205k
                if (!n)
1278
0
                        return -ENOMEM;
1279
205k
        }
1280
1281
211k
        *ret = TAKE_PTR(n);
1282
211k
        return 0;
1283
211k
}
1284
1285
6.26k
char* cg_unescape(const char *p) {
1286
6.26k
        assert(p);
1287
1288
        /* The return value of this function (unlike cg_escape())
1289
         * doesn't need free()! */
1290
1291
6.26k
        if (p[0] == '_')
1292
231
                return (char*) p+1;
1293
1294
6.03k
        return (char*) p;
1295
6.26k
}
1296
1297
43.2k
int cg_slice_to_path(const char *unit, char **ret) {
1298
43.2k
        _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1299
43.2k
        const char *dash;
1300
43.2k
        int r;
1301
1302
43.2k
        assert(unit);
1303
43.2k
        assert(ret);
1304
1305
43.2k
        if (streq(unit, SPECIAL_ROOT_SLICE))
1306
104
                return strdup_to(ret, "");
1307
1308
43.1k
        if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1309
0
                return -EINVAL;
1310
1311
43.1k
        if (!endswith(unit, ".slice"))
1312
0
                return -EINVAL;
1313
1314
43.1k
        r = unit_name_to_prefix(unit, &p);
1315
43.1k
        if (r < 0)
1316
0
                return r;
1317
1318
43.1k
        dash = strchr(p, '-');
1319
1320
        /* Don't allow initial dashes */
1321
43.1k
        if (dash == p)
1322
239
                return -EINVAL;
1323
1324
166k
        while (dash) {
1325
123k
                _cleanup_free_ char *escaped = NULL;
1326
123k
                char n[dash - p + sizeof(".slice")];
1327
1328
#if HAS_FEATURE_MEMORY_SANITIZER
1329
                /* msan doesn't instrument stpncpy, so it thinks
1330
                 * n is later used uninitialized:
1331
                 * https://github.com/google/sanitizers/issues/926
1332
                 */
1333
                zero(n);
1334
#endif
1335
1336
                /* Don't allow trailing or double dashes */
1337
123k
                if (IN_SET(dash[1], 0, '-'))
1338
499
                        return -EINVAL;
1339
1340
123k
                strcpy(stpncpy(n, p, dash - p), ".slice");
1341
123k
                if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1342
0
                        return -EINVAL;
1343
1344
123k
                r = cg_escape(n, &escaped);
1345
123k
                if (r < 0)
1346
0
                        return r;
1347
1348
123k
                if (!strextend(&s, escaped, "/"))
1349
0
                        return -ENOMEM;
1350
1351
123k
                dash = strchr(dash+1, '-');
1352
123k
        }
1353
1354
42.4k
        r = cg_escape(unit, &e);
1355
42.4k
        if (r < 0)
1356
2
                return r;
1357
1358
42.4k
        if (!strextend(&s, e))
1359
0
                return -ENOMEM;
1360
1361
42.4k
        *ret = TAKE_PTR(s);
1362
42.4k
        return 0;
1363
42.4k
}
1364
1365
0
int cg_is_threaded(const char *path) {
1366
0
        _cleanup_free_ char *fs = NULL, *contents = NULL;
1367
0
        _cleanup_strv_free_ char **v = NULL;
1368
0
        int r;
1369
1370
0
        r = cg_get_path(path, "cgroup.type", &fs);
1371
0
        if (r < 0)
1372
0
                return r;
1373
1374
0
        r = read_full_virtual_file(fs, &contents, NULL);
1375
0
        if (r == -ENOENT)
1376
0
                return false; /* Assume no. */
1377
0
        if (r < 0)
1378
0
                return r;
1379
1380
0
        v = strv_split(contents, NULL);
1381
0
        if (!v)
1382
0
                return -ENOMEM;
1383
1384
        /* If the cgroup is in the threaded mode, it contains "threaded".
1385
         * If one of the parents or siblings is in the threaded mode, it may contain "invalid". */
1386
0
        return strv_contains(v, "threaded") || strv_contains(v, "invalid");
1387
0
}
1388
1389
0
int cg_set_attribute(const char *path, const char *attribute, const char *value) {
1390
0
        _cleanup_free_ char *p = NULL;
1391
0
        int r;
1392
1393
0
        assert(attribute);
1394
1395
0
        r = cg_get_path(path, attribute, &p);
1396
0
        if (r < 0)
1397
0
                return r;
1398
1399
        /* https://lore.kernel.org/all/20250419183545.1982187-1-shakeel.butt@linux.dev/ adds O_NONBLOCK
1400
         * semantics to memory.max and memory.high to skip synchronous memory reclaim when O_NONBLOCK is
1401
         * enabled. Let's always open cgroupv2 attribute files in nonblocking mode to immediately take
1402
         * advantage of this and any other asynchronous resource reclaim that's added to the cgroupv2 API in
1403
         * the future. */
1404
0
        return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER|WRITE_STRING_FILE_OPEN_NONBLOCKING);
1405
0
}
1406
1407
1.62k
int cg_get_attribute(const char *path, const char *attribute, char **ret) {
1408
1.62k
        _cleanup_free_ char *p = NULL;
1409
1.62k
        int r;
1410
1411
1.62k
        assert(attribute);
1412
1413
1.62k
        r = cg_get_path(path, attribute, &p);
1414
1.62k
        if (r < 0)
1415
0
                return r;
1416
1417
1.62k
        return read_one_line_file(p, ret);
1418
1.62k
}
1419
1420
751
int cg_get_attribute_as_uint64(const char *path, const char *attribute, uint64_t *ret) {
1421
751
        _cleanup_free_ char *value = NULL;
1422
751
        uint64_t v;
1423
751
        int r;
1424
1425
751
        assert(ret);
1426
1427
751
        r = cg_get_attribute(path, attribute, &value);
1428
751
        if (r == -ENOENT)
1429
533
                return -ENODATA;
1430
218
        if (r < 0)
1431
218
                return r;
1432
1433
0
        if (streq(value, "max")) {
1434
0
                *ret = CGROUP_LIMIT_MAX;
1435
0
                return 0;
1436
0
        }
1437
1438
0
        r = safe_atou64(value, &v);
1439
0
        if (r < 0)
1440
0
                return r;
1441
1442
0
        *ret = v;
1443
0
        return 0;
1444
0
}
1445
1446
0
int cg_get_attribute_as_bool(const char *path, const char *attribute) {
1447
0
        _cleanup_free_ char *value = NULL;
1448
0
        int r;
1449
1450
0
        r = cg_get_attribute(path, attribute, &value);
1451
0
        if (r == -ENOENT)
1452
0
                return -ENODATA;
1453
0
        if (r < 0)
1454
0
                return r;
1455
1456
0
        return parse_boolean(value);
1457
0
}
1458
1459
0
int cg_get_owner(const char *path, uid_t *ret_uid) {
1460
0
        _cleanup_free_ char *f = NULL;
1461
0
        struct stat stats;
1462
0
        int r;
1463
1464
0
        assert(ret_uid);
1465
1466
0
        r = cg_get_path(path, /* suffix= */ NULL, &f);
1467
0
        if (r < 0)
1468
0
                return r;
1469
1470
0
        if (stat(f, &stats) < 0)
1471
0
                return -errno;
1472
1473
0
        r = stat_verify_directory(&stats);
1474
0
        if (r < 0)
1475
0
                return r;
1476
1477
0
        *ret_uid = stats.st_uid;
1478
0
        return 0;
1479
0
}
1480
1481
int cg_get_keyed_attribute(
1482
                const char *path,
1483
                const char *attribute,
1484
                char * const *keys,
1485
0
                char **values) {
1486
1487
0
        _cleanup_free_ char *filename = NULL, *contents = NULL;
1488
0
        size_t n;
1489
0
        int r;
1490
1491
0
        assert(path);
1492
0
        assert(attribute);
1493
1494
        /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
1495
         * all keys to retrieve. The 'values' parameter should be passed as string size with the same number of
1496
         * entries as 'keys'. On success each entry will be set to the value of the matching key.
1497
         *
1498
         * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
1499
1500
0
        r = cg_get_path(path, attribute, &filename);
1501
0
        if (r < 0)
1502
0
                return r;
1503
1504
0
        r = read_full_file(filename, &contents, /* ret_size= */ NULL);
1505
0
        if (r < 0)
1506
0
                return r;
1507
1508
0
        n = strv_length(keys);
1509
0
        if (n == 0) /* No keys to retrieve? That's easy, we are done then */
1510
0
                return 0;
1511
0
        assert(strv_is_uniq(keys));
1512
1513
        /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
1514
0
        char **v = newa0(char*, n);
1515
0
        size_t n_done = 0;
1516
1517
0
        for (const char *p = contents; *p;) {
1518
0
                const char *w;
1519
0
                size_t i;
1520
1521
0
                for (i = 0; i < n; i++) {
1522
0
                        w = first_word(p, keys[i]);
1523
0
                        if (w)
1524
0
                                break;
1525
0
                }
1526
1527
0
                if (w) {
1528
0
                        if (v[i]) { /* duplicate entry? */
1529
0
                                r = -EBADMSG;
1530
0
                                goto fail;
1531
0
                        }
1532
1533
0
                        size_t l = strcspn(w, NEWLINE);
1534
1535
0
                        v[i] = strndup(w, l);
1536
0
                        if (!v[i]) {
1537
0
                                r = -ENOMEM;
1538
0
                                goto fail;
1539
0
                        }
1540
1541
0
                        n_done++;
1542
0
                        if (n_done >= n)
1543
0
                                break;
1544
1545
0
                        p = w + l;
1546
0
                } else
1547
0
                        p += strcspn(p, NEWLINE);
1548
1549
0
                p += strspn(p, NEWLINE);
1550
0
        }
1551
1552
0
        if (n_done < n) {
1553
0
                r = -ENXIO;
1554
0
                goto fail;
1555
0
        }
1556
1557
0
        memcpy(values, v, sizeof(char*) * n);
1558
0
        return 0;
1559
1560
0
fail:
1561
0
        free_many_charp(v, n);
1562
0
        return r;
1563
0
}
1564
1565
0
int cg_get_keyed_attribute_uint64(const char *path, const char *attribute, const char *key, uint64_t *ret) {
1566
0
        _cleanup_free_ char *val = NULL;
1567
0
        int r;
1568
1569
0
        assert(key);
1570
0
        assert(ret);
1571
1572
0
        r = cg_get_keyed_attribute(path, attribute, STRV_MAKE(key), &val);
1573
0
        if (r < 0)
1574
0
                return r;
1575
1576
0
        r = safe_atou64(val, ret);
1577
0
        if (r < 0)
1578
0
                return log_debug_errno(r, "Failed to parse value '%s' of key '%s' in cgroup attribute '%s': %m", val, key, attribute);
1579
1580
0
        return 0;
1581
0
}
1582
1583
60.2k
int cg_mask_to_string(CGroupMask mask, char **ret) {
1584
60.2k
        _cleanup_free_ char *s = NULL;
1585
60.2k
        bool space = false;
1586
60.2k
        CGroupController c;
1587
60.2k
        size_t n = 0;
1588
1589
60.2k
        assert(ret);
1590
1591
60.2k
        if (mask == 0) {
1592
42.9k
                *ret = NULL;
1593
42.9k
                return 0;
1594
42.9k
        }
1595
1596
258k
        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1597
241k
                const char *k;
1598
241k
                size_t l;
1599
1600
241k
                if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
1601
125k
                        continue;
1602
1603
116k
                k = cgroup_controller_to_string(c);
1604
116k
                l = strlen(k);
1605
1606
116k
                if (!GREEDY_REALLOC(s, n + space + l + 1))
1607
0
                        return -ENOMEM;
1608
1609
116k
                if (space)
1610
98.9k
                        s[n] = ' ';
1611
116k
                memcpy(s + n + space, k, l);
1612
116k
                n += space + l;
1613
1614
116k
                space = true;
1615
116k
        }
1616
1617
17.2k
        assert(s);
1618
1619
17.2k
        s[n] = 0;
1620
17.2k
        *ret = TAKE_PTR(s);
1621
1622
17.2k
        return 0;
1623
17.2k
}
1624
1625
3.18k
int cg_mask_from_string(const char *s, CGroupMask *ret) {
1626
3.18k
        CGroupMask m = 0;
1627
1628
3.18k
        assert(ret);
1629
3.18k
        assert(s);
1630
1631
5.62k
        for (;;) {
1632
5.62k
                _cleanup_free_ char *n = NULL;
1633
5.62k
                CGroupController v;
1634
5.62k
                int r;
1635
1636
5.62k
                r = extract_first_word(&s, &n, NULL, 0);
1637
5.62k
                if (r < 0)
1638
1.36k
                        return r;
1639
4.25k
                if (r == 0)
1640
1.82k
                        break;
1641
1642
2.43k
                v = cgroup_controller_from_string(n);
1643
2.43k
                if (v < 0)
1644
1.35k
                        continue;
1645
1646
1.08k
                m |= CGROUP_CONTROLLER_TO_MASK(v);
1647
1.08k
        }
1648
1649
1.82k
        *ret = m;
1650
1.82k
        return 0;
1651
3.18k
}
1652
1653
0
int cg_mask_supported_subtree(const char *root, CGroupMask *ret) {
1654
0
        CGroupMask mask;
1655
0
        int r;
1656
1657
0
        assert(ret);
1658
1659
        /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
1660
         * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
1661
         * pseudo-controllers. */
1662
1663
        /* We can read the supported and accessible controllers from the top-level cgroup attribute */
1664
0
        _cleanup_free_ char *controllers = NULL, *path = NULL;
1665
0
        r = cg_get_path(root, "cgroup.controllers", &path);
1666
0
        if (r < 0)
1667
0
                return r;
1668
1669
0
        r = read_one_line_file(path, &controllers);
1670
0
        if (r < 0)
1671
0
                return r;
1672
1673
0
        r = cg_mask_from_string(controllers, &mask);
1674
0
        if (r < 0)
1675
0
                return r;
1676
1677
        /* Mask controllers that are not supported in cgroup v2. */
1678
0
        mask &= CGROUP_MASK_V2;
1679
1680
0
        *ret = mask;
1681
0
        return 0;
1682
0
}
1683
1684
0
int cg_mask_supported(CGroupMask *ret) {
1685
0
        _cleanup_free_ char *root = NULL;
1686
0
        int r;
1687
1688
0
        r = cg_get_root_path(&root);
1689
0
        if (r < 0)
1690
0
                return r;
1691
1692
0
        return cg_mask_supported_subtree(root, ret);
1693
0
}
1694
1695
0
int cg_is_delegated(const char *path) {
1696
0
        int r;
1697
1698
0
        assert(path);
1699
1700
0
        r = cg_get_xattr_bool(path, "trusted.delegate");
1701
0
        if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
1702
0
                return r;
1703
1704
        /* If the trusted xattr isn't set (preferred), then check the untrusted one. Under the assumption
1705
         * that whoever is trusted enough to own the cgroup, is also trusted enough to decide if it is
1706
         * delegated or not this should be safe. */
1707
0
        r = cg_get_xattr_bool(path, "user.delegate");
1708
0
        return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
1709
0
}
1710
1711
0
int cg_is_delegated_fd(int fd) {
1712
0
        int r;
1713
1714
0
        assert(fd >= 0);
1715
1716
0
        r = getxattr_at_bool(fd, /* path= */ NULL, "trusted.delegate", /* at_flags= */ 0);
1717
0
        if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
1718
0
                return r;
1719
1720
0
        r = getxattr_at_bool(fd, /* path= */ NULL, "user.delegate", /* at_flags= */ 0);
1721
0
        return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
1722
0
}
1723
1724
0
int cg_has_coredump_receive(const char *path) {
1725
0
        int r;
1726
1727
0
        assert(path);
1728
1729
0
        r = cg_get_xattr_bool(path, "user.coredump_receive");
1730
0
        if (ERRNO_IS_NEG_XATTR_ABSENT(r))
1731
0
                return false;
1732
1733
0
        return r;
1734
0
}
1735
1736
const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
1737
        [CGROUP_IO_RBPS_MAX]  = CGROUP_LIMIT_MAX,
1738
        [CGROUP_IO_WBPS_MAX]  = CGROUP_LIMIT_MAX,
1739
        [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
1740
        [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
1741
};
1742
1743
static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
1744
        [CGROUP_IO_RBPS_MAX]  = "IOReadBandwidthMax",
1745
        [CGROUP_IO_WBPS_MAX]  = "IOWriteBandwidthMax",
1746
        [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
1747
        [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
1748
};
1749
1750
DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
1751
1752
0
void cgroup_io_limits_list(void) {
1753
        DUMP_STRING_TABLE(cgroup_io_limit_type, CGroupIOLimitType, _CGROUP_IO_LIMIT_TYPE_MAX);
1754
0
}
1755
1756
static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
1757
        [CGROUP_CONTROLLER_CPU]                             = "cpu",
1758
        [CGROUP_CONTROLLER_CPUACCT]                         = "cpuacct",
1759
        [CGROUP_CONTROLLER_CPUSET]                          = "cpuset",
1760
        [CGROUP_CONTROLLER_IO]                              = "io",
1761
        [CGROUP_CONTROLLER_BLKIO]                           = "blkio",
1762
        [CGROUP_CONTROLLER_MEMORY]                          = "memory",
1763
        [CGROUP_CONTROLLER_DEVICES]                         = "devices",
1764
        [CGROUP_CONTROLLER_PIDS]                            = "pids",
1765
        [CGROUP_CONTROLLER_BPF_FIREWALL]                    = "bpf-firewall",
1766
        [CGROUP_CONTROLLER_BPF_DEVICES]                     = "bpf-devices",
1767
        [CGROUP_CONTROLLER_BPF_FOREIGN]                     = "bpf-foreign",
1768
        [CGROUP_CONTROLLER_BPF_SOCKET_BIND]                 = "bpf-socket-bind",
1769
        [CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES] = "bpf-restrict-network-interfaces",
1770
        [CGROUP_CONTROLLER_BPF_BIND_NETWORK_INTERFACE]      = "bpf-bind-network-interface",
1771
};
1772
1773
DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
1774
1775
static const char* const managed_oom_mode_table[_MANAGED_OOM_MODE_MAX] = {
1776
        [MANAGED_OOM_AUTO] = "auto",
1777
        [MANAGED_OOM_KILL] = "kill",
1778
};
1779
1780
DEFINE_STRING_TABLE_LOOKUP(managed_oom_mode, ManagedOOMMode);
1781
1782
static const char* const managed_oom_preference_table[_MANAGED_OOM_PREFERENCE_MAX] = {
1783
        [MANAGED_OOM_PREFERENCE_NONE] = "none",
1784
        [MANAGED_OOM_PREFERENCE_AVOID] = "avoid",
1785
        [MANAGED_OOM_PREFERENCE_OMIT] = "omit",
1786
};
1787
1788
DEFINE_STRING_TABLE_LOOKUP(managed_oom_preference, ManagedOOMPreference);