/src/systemd/src/shared/cgroup-setup.c

Source
/* SPDX-License-Identifier: LGPL-2.1-or-later */

#include <linux/magic.h>
#include <unistd.h>

#include "cgroup-setup.h"
#include "cgroup-util.h"
#include "errno-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "format-util.h"
#include "fs-util.h"
#include "log.h"
#include "mkdir.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
#include "recurse-dir.h"
#include "set.h"
#include "stat-util.h"
#include "stdio-util.h"
#include "string-util.h"
#include "user-util.h"

int cg_weight_parse(const char *s, uint64_t *ret) {
        uint64_t u;
        int r;

        assert(s);
        assert(ret);

        if (isempty(s)) {
                *ret = CGROUP_WEIGHT_INVALID;
                return 0;
        }

        r = safe_atou64(s, &u);
        if (r < 0)
                return r;

        if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
                return -ERANGE;

        *ret = u;
        return 0;
}

int cg_cpu_weight_parse(const char *s, uint64_t *ret) {
        assert(s);
        assert(ret);

        if (streq(s, "idle"))
                return *ret = CGROUP_WEIGHT_IDLE;

        return cg_weight_parse(s, ret);
}

static int trim_cb(
                RecurseDirEvent event,
                const char *path,
                int dir_fd,
                int inode_fd,
                const struct dirent *de,
                const struct statx *sx,
                void *userdata) {

        /* Failures to delete inner cgroup we ignore (but debug log in case error code is unexpected) */
        if (event == RECURSE_DIR_LEAVE &&
            de->d_type == DT_DIR &&
            unlinkat(dir_fd, de->d_name, AT_REMOVEDIR) < 0 &&
            !IN_SET(errno, ENOENT, ENOTEMPTY, EBUSY))
                log_debug_errno(errno, "Failed to trim inner cgroup %s, ignoring: %m", path);

        return RECURSE_DIR_CONTINUE;
}

int cg_trim(const char *path, bool delete_root) {
        _cleanup_free_ char *fs = NULL;
        int r;

        r = cg_get_path(path, /* suffix= */ NULL, &fs);
        if (r < 0)
                return r;

        r = recurse_dir_at(
                        AT_FDCWD,
                        fs,
                        /* statx_mask= */ 0,
                        /* n_depth_max= */ UINT_MAX,
                        RECURSE_DIR_ENSURE_TYPE,
                        trim_cb,
                        /* userdata= */ NULL);
        if (r == -ENOENT) /* non-existing is the ultimate trimming, hence no error */
                r = 0;
        else if (r < 0)
                log_debug_errno(r, "Failed to trim subcgroups of '%s': %m", path);

        /* If we shall delete the top-level cgroup, then propagate the failure to do so (except if it is
         * already gone anyway). Also, let's debug log about this failure, except if the error code is an
         * expected one. */
        if (delete_root && !empty_or_root(path) &&
            rmdir(fs) < 0 && errno != ENOENT) {
                if (!IN_SET(errno, ENOTEMPTY, EBUSY))
                        log_debug_errno(errno, "Failed to trim cgroup '%s': %m", path);
                RET_GATHER(r, -errno);
        }

        return r;
}

/* Create a cgroup in the hierarchy of controller.
 * Returns 0 if the group already existed, 1 on success, negative otherwise.
 */
int cg_create(const char *path) {
        _cleanup_free_ char *fs = NULL;
        int r;

        r = cg_get_path(path, /* suffix= */ NULL, &fs);
        if (r < 0)
                return r;

        r = mkdir_parents(fs, 0755);
        if (r < 0)
                return r;

        r = RET_NERRNO(mkdir(fs, 0755));
        if (r == -EEXIST)
                return 0;
        if (r < 0)
                return r;

        return 1;
}

int cg_attach(const char *path, pid_t pid) {
        _cleanup_free_ char *fs = NULL;
        char c[DECIMAL_STR_MAX(pid_t) + 2];
        int r;

        assert(path);
        assert(pid >= 0);

        r = cg_get_path(path, "cgroup.procs", &fs);
        if (r < 0)
                return r;

        if (pid == 0)
                pid = getpid_cached();

        xsprintf(c, PID_FMT "\n", pid);

        r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
        if (r == -EOPNOTSUPP && cg_is_threaded(path) > 0)
                /* When the threaded mode is used, we cannot read/write the file. Let's return recognizable error. */
                return -EUCLEAN;
        if (r < 0)
                return r;

        return 0;
}

int cg_fd_attach(int fd, pid_t pid) {
        char c[DECIMAL_STR_MAX(pid_t) + 2];

        assert(fd >= 0);
        assert(pid >= 0);

        if (pid == 0)
                pid = getpid_cached();

        xsprintf(c, PID_FMT "\n", pid);

        return write_string_file_at(fd, "cgroup.procs", c, WRITE_STRING_FILE_DISABLE_BUFFER);
}

int cg_create_and_attach(const char *path, pid_t pid) {
        int r, q;

        /* This does not remove the cgroup on failure */

        assert(pid >= 0);

        r = cg_create(path);
        if (r < 0)
                return r;

        q = cg_attach(path, pid);
        if (q < 0)
                return q;

        return r;
}

int cg_set_access(
                const char *path,
                uid_t uid,
                gid_t gid) {

        static const struct {
                const char *name;
                bool fatal;
        } attributes[] = {
                { "cgroup.procs",           true  },
                { "cgroup.subtree_control", true  },
                { "cgroup.threads",         false },
                { "memory.oom.group",       false },
                { "memory.reclaim",         false },
        };

        _cleanup_free_ char *fs = NULL;
        int r;

        assert(path);

        if (uid == UID_INVALID && gid == GID_INVALID)
                return 0;

        /* Configure access to the cgroup itself */
        r = cg_get_path(path, /* suffix= */ NULL, &fs);
        if (r < 0)
                return r;

        r = chmod_and_chown(fs, 0755, uid, gid);
        if (r < 0)
                return r;

        /* Configure access to the cgroup's attributes */
        FOREACH_ELEMENT(i, attributes) {
                _cleanup_free_ char *a = path_join(fs, i->name);
                if (!a)
                        return -ENOMEM;

                r = chmod_and_chown(a, 0644, uid, gid);
                if (r < 0) {
                        if (i->fatal)
                                return r;

                        log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", a);
                }
        }

        return 0;
}

struct access_callback_data {
        uid_t uid;
        gid_t gid;
        int error;
};

static int access_callback(
                RecurseDirEvent event,
                const char *path,
                int dir_fd,
                int inode_fd,
                const struct dirent *de,
                const struct statx *sx,
                void *userdata) {

        if (!IN_SET(event, RECURSE_DIR_ENTER, RECURSE_DIR_ENTRY))
                return RECURSE_DIR_CONTINUE;

        struct access_callback_data *d = ASSERT_PTR(userdata);

        assert(path);
        assert(inode_fd >= 0);

        if (fchownat(inode_fd, "", d->uid, d->gid, AT_EMPTY_PATH) < 0)
                RET_GATHER(d->error, log_debug_errno(errno, "Failed to change ownership of '%s', ignoring: %m", path));

        return RECURSE_DIR_CONTINUE;
}

int cg_set_access_recursive(
                const char *path,
                uid_t uid,
                gid_t gid) {

        _cleanup_close_ int fd = -EBADF;
        _cleanup_free_ char *fs = NULL;
        int r;

        assert(path);

        /* A recursive version of cg_set_access(). But note that this one changes ownership of *all* files,
         * not just the allowlist that cg_set_access() uses. Use cg_set_access() on the cgroup you want to
         * delegate, and cg_set_access_recursive() for any subcgroups you might want to create below it. */

        if (!uid_is_valid(uid) && !gid_is_valid(gid))
                return 0;

        r = cg_get_path(path, /* suffix= */ NULL, &fs);
        if (r < 0)
                return r;

        fd = open(fs, O_DIRECTORY|O_CLOEXEC);
        if (fd < 0)
                return -errno;

        struct access_callback_data d = {
                .uid = uid,
                .gid = gid,
        };

        r = recurse_dir(fd,
                        fs,
                        /* statx_mask= */ 0,
                        /* n_depth_max= */ UINT_MAX,
                        RECURSE_DIR_SAME_MOUNT|RECURSE_DIR_INODE_FD|RECURSE_DIR_TOPLEVEL,
                        access_callback,
                        &d);
        if (r < 0)
                return r;

        assert(d.error <= 0);
        return d.error;
}

int cg_migrate(
                const char *from,
                const char *to,
                CGroupFlags flags) {

        _cleanup_set_free_ Set *s = NULL;
        bool done;
        int r, ret = 0;

        assert(from);
        assert(to);

        do {
                _cleanup_fclose_ FILE *f = NULL;
                pid_t pid;

                done = true;

                r = cg_enumerate_processes(from, &f);
                if (r < 0)
                        return RET_GATHER(ret, r);

                while ((r = cg_read_pid(f, &pid, flags)) > 0) {
                        /* Throw an error if unmappable PIDs are in output, we can't migrate those. */
                        if (pid == 0)
                                return -EREMOTE;

                        /* This might do weird stuff if we aren't a single-threaded program. However, we
                         * luckily know we are. */
                        if (FLAGS_SET(flags, CGROUP_IGNORE_SELF) && pid == getpid_cached())
                                continue;

                        if (set_contains(s, PID_TO_PTR(pid)))
                                continue;

                        if (pid_is_kernel_thread(pid) > 0)
                                continue;

                        r = cg_attach(to, pid);
                        if (r < 0) {
                                if (r != -ESRCH)
                                        RET_GATHER(ret, r);
                        } else if (ret == 0)
                                ret = 1;

                        done = false;

                        r = set_ensure_put(&s, /* hash_ops= */ NULL, PID_TO_PTR(pid));
                        if (r < 0)
                                return RET_GATHER(ret, r);
                }
                if (r == -ENODEV)
                        continue;
                if (r < 0)
                        return RET_GATHER(ret, r);
        } while (!done);

        return ret;
}

int cg_enable(
                CGroupMask supported,
                CGroupMask mask,
                const char *p,
                CGroupMask *ret_result_mask) {

        _cleanup_fclose_ FILE *f = NULL;
        _cleanup_free_ char *fs = NULL;
        CGroupController c;
        CGroupMask ret = 0;
        int r;

        assert(p);

        if (supported == 0) {
                if (ret_result_mask)
                        *ret_result_mask = 0;
                return 0;
        }

        r = cg_get_path(p, "cgroup.subtree_control", &fs);
        if (r < 0)
                return r;

        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
                CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
                const char *n;

                if (!FLAGS_SET(CGROUP_MASK_V2, bit))
                        continue;

                if (!FLAGS_SET(supported, bit))
                        continue;

                n = cgroup_controller_to_string(c);
                {
                        char s[1 + strlen(n) + 1];

                        s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
                        strcpy(s + 1, n);

                        if (!f) {
                                f = fopen(fs, "we");
                                if (!f)
                                        return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
                        }

                        r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
                        if (r < 0) {
                                log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
                                                FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
                                clearerr(f);

                                /* If we can't turn off a controller, leave it on in the reported resulting mask. This
                                 * happens for example when we attempt to turn off a controller up in the tree that is
                                 * used down in the tree. */
                                if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
                                                                           * only here, and not follow the same logic
                                                                           * for other errors such as EINVAL or
                                                                           * EOPNOTSUPP or anything else. That's
                                                                           * because EBUSY indicates that the
                                                                           * controllers is currently enabled and
                                                                           * cannot be disabled because something down
                                                                           * the hierarchy is still using it. Any other
                                                                           * error most likely means something like "I
                                                                           * never heard of this controller" or
                                                                           * similar. In the former case it's hence
                                                                           * safe to assume the controller is still on
                                                                           * after the failed operation, while in the
                                                                           * latter case it's safer to assume the
                                                                           * controller is unknown and hence certainly
                                                                           * not enabled. */
                                        ret |= bit;
                        } else {
                                /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
                                if (FLAGS_SET(mask, bit))
                                        ret |= bit;
                        }
                }
        }

        /* Let's return the precise set of controllers now enabled for the cgroup. */
        if (ret_result_mask)
                *ret_result_mask = ret;

        return 0;
}

int cg_has_legacy(void) {
        struct statfs fs;

        /* Checks if any legacy controller/hierarchy is mounted. */

        if (statfs("/sys/fs/cgroup/", &fs) < 0) {
                if (errno == ENOENT) /* sysfs not mounted? */
                        return false;

                return log_error_errno(errno, "Failed to statfs /sys/fs/cgroup/: %m");
        }

        if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC) ||
            is_fs_type(&fs, SYSFS_MAGIC)) /* not mounted yet */
                return false;

        if (is_fs_type(&fs, TMPFS_MAGIC)) {
                log_info("Found tmpfs on /sys/fs/cgroup/, assuming legacy hierarchy.");
                return true;
        }

        return log_error_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
                               "Unknown filesystem type %llx mounted on /sys/fs/cgroup/.",
                               (unsigned long long) fs.f_type);
}

Coverage Report

Created: 2026-05-04 06:47

Line	Count	Source
1		/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3		#include <linux/magic.h>
4		#include <unistd.h>
5
6		#include "cgroup-setup.h"
7		#include "cgroup-util.h"
8		#include "errno-util.h"
9		#include "fd-util.h"
10		#include "fileio.h"
11		#include "format-util.h"
12		#include "fs-util.h"
13		#include "log.h"
14		#include "mkdir.h"
15		#include "parse-util.h"
16		#include "path-util.h"
17		#include "process-util.h"
18		#include "recurse-dir.h"
19		#include "set.h"
20		#include "stat-util.h"
21		#include "stdio-util.h"
22		#include "string-util.h"
23		#include "user-util.h"
24
25	4.34k	int cg_weight_parse(const char s, uint64_t ret) {
26	4.34k	uint64_t u;
27	4.34k	int r;
28
29	4.34k	assert(s);
30	4.34k	assert(ret);
31
32	4.34k	if (isempty(s)) {
33	395	*ret = CGROUP_WEIGHT_INVALID;
34	395	return 0;
35	395	}
36
37	3.94k	r = safe_atou64(s, &u);
38	3.94k	if (r < 0)
39	1.22k	return r;
40
41	2.72k	if (u < CGROUP_WEIGHT_MIN \|\| u > CGROUP_WEIGHT_MAX)
42	1.15k	return -ERANGE;
43
44	1.56k	*ret = u;
45	1.56k	return 0;
46	2.72k	}
47
48	1.87k	int cg_cpu_weight_parse(const char s, uint64_t ret) {
49	1.87k	assert(s);
50	1.87k	assert(ret);
51
52	1.87k	if (streq(s, "idle"))
53	254	return *ret = CGROUP_WEIGHT_IDLE;
54
55	1.61k	return cg_weight_parse(s, ret);
56	1.87k	}
57
58		static int trim_cb(
59		RecurseDirEvent event,
60		const char *path,
61		int dir_fd,
62		int inode_fd,
63		const struct dirent *de,
64		const struct statx *sx,
65	0	void *userdata) {
66
67		/* Failures to delete inner cgroup we ignore (but debug log in case error code is unexpected) */
68	0	if (event == RECURSE_DIR_LEAVE &&
69	0	de->d_type == DT_DIR &&
70	0	unlinkat(dir_fd, de->d_name, AT_REMOVEDIR) < 0 &&
71	0	!IN_SET(errno, ENOENT, ENOTEMPTY, EBUSY))
72	0	log_debug_errno(errno, "Failed to trim inner cgroup %s, ignoring: %m", path);
73
74	0	return RECURSE_DIR_CONTINUE;
75	0	}
76
77	0	int cg_trim(const char *path, bool delete_root) {
78	0	_cleanup_free_ char *fs = NULL;
79	0	int r;
80
81	0	r = cg_get_path(path, /* suffix= */ NULL, &fs);
82	0	if (r < 0)
83	0	return r;
84
85	0	r = recurse_dir_at(
86	0	AT_FDCWD,
87	0	fs,
88	0	/* statx_mask= */ 0,
89	0	/* n_depth_max= */ UINT_MAX,
90	0	RECURSE_DIR_ENSURE_TYPE,
91	0	trim_cb,
92	0	/* userdata= */ NULL);
93	0	if (r == -ENOENT) /* non-existing is the ultimate trimming, hence no error */
94	0	r = 0;
95	0	else if (r < 0)
96	0	log_debug_errno(r, "Failed to trim subcgroups of '%s': %m", path);
97
98		/* If we shall delete the top-level cgroup, then propagate the failure to do so (except if it is
99		* already gone anyway). Also, let's debug log about this failure, except if the error code is an
100		* expected one. */
101	0	if (delete_root && !empty_or_root(path) &&
102	0	rmdir(fs) < 0 && errno != ENOENT) {
103	0	if (!IN_SET(errno, ENOTEMPTY, EBUSY))
104	0	log_debug_errno(errno, "Failed to trim cgroup '%s': %m", path);
105	0	RET_GATHER(r, -errno);
106	0	}
107
108	0	return r;
109	0	}
110
111		/* Create a cgroup in the hierarchy of controller.
112		* Returns 0 if the group already existed, 1 on success, negative otherwise.
113		*/
114	0	int cg_create(const char *path) {
115	0	_cleanup_free_ char *fs = NULL;
116	0	int r;
117
118	0	r = cg_get_path(path, /* suffix= */ NULL, &fs);
119	0	if (r < 0)
120	0	return r;
121
122	0	r = mkdir_parents(fs, 0755);
123	0	if (r < 0)
124	0	return r;
125
126	0	r = RET_NERRNO(mkdir(fs, 0755));
127	0	if (r == -EEXIST)
128	0	return 0;
129	0	if (r < 0)
130	0	return r;
131
132	0	return 1;
133	0	}
134
135	0	int cg_attach(const char *path, pid_t pid) {
136	0	_cleanup_free_ char *fs = NULL;
137	0	char c[DECIMAL_STR_MAX(pid_t) + 2];
138	0	int r;
139
140	0	assert(path);
141	0	assert(pid >= 0);
142
143	0	r = cg_get_path(path, "cgroup.procs", &fs);
144	0	if (r < 0)
145	0	return r;
146
147	0	if (pid == 0)
148	0	pid = getpid_cached();
149
150	0	xsprintf(c, PID_FMT "\n", pid);
151
152	0	r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
153	0	if (r == -EOPNOTSUPP && cg_is_threaded(path) > 0)
154		/* When the threaded mode is used, we cannot read/write the file. Let's return recognizable error. */
155	0	return -EUCLEAN;
156	0	if (r < 0)
157	0	return r;
158
159	0	return 0;
160	0	}
161
162	0	int cg_fd_attach(int fd, pid_t pid) {
163	0	char c[DECIMAL_STR_MAX(pid_t) + 2];
164
165	0	assert(fd >= 0);
166	0	assert(pid >= 0);
167
168	0	if (pid == 0)
169	0	pid = getpid_cached();
170
171	0	xsprintf(c, PID_FMT "\n", pid);
172
173	0	return write_string_file_at(fd, "cgroup.procs", c, WRITE_STRING_FILE_DISABLE_BUFFER);
174	0	}
175
176	0	int cg_create_and_attach(const char *path, pid_t pid) {
177	0	int r, q;
178
179		/* This does not remove the cgroup on failure */
180
181	0	assert(pid >= 0);
182
183	0	r = cg_create(path);
184	0	if (r < 0)
185	0	return r;
186
187	0	q = cg_attach(path, pid);
188	0	if (q < 0)
189	0	return q;
190
191	0	return r;
192	0	}
193
194		int cg_set_access(
195		const char *path,
196		uid_t uid,
197	0	gid_t gid) {
198
199	0	static const struct {
200	0	const char *name;
201	0	bool fatal;
202	0	} attributes[] = {
203	0	{ "cgroup.procs", true },
204	0	{ "cgroup.subtree_control", true },
205	0	{ "cgroup.threads", false },
206	0	{ "memory.oom.group", false },
207	0	{ "memory.reclaim", false },
208	0	};
209
210	0	_cleanup_free_ char *fs = NULL;
211	0	int r;
212
213	0	assert(path);
214
215	0	if (uid == UID_INVALID && gid == GID_INVALID)
216	0	return 0;
217
218		/* Configure access to the cgroup itself */
219	0	r = cg_get_path(path, /* suffix= */ NULL, &fs);
220	0	if (r < 0)
221	0	return r;
222
223	0	r = chmod_and_chown(fs, 0755, uid, gid);
224	0	if (r < 0)
225	0	return r;
226
227		/* Configure access to the cgroup's attributes */
228	0	FOREACH_ELEMENT(i, attributes) {
229	0	_cleanup_free_ char *a = path_join(fs, i->name);
230	0	if (!a)
231	0	return -ENOMEM;
232
233	0	r = chmod_and_chown(a, 0644, uid, gid);
234	0	if (r < 0) {
235	0	if (i->fatal)
236	0	return r;
237
238	0	log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", a);
239	0	}
240	0	}
241
242	0	return 0;
243	0	}
244
245		struct access_callback_data {
246		uid_t uid;
247		gid_t gid;
248		int error;
249		};
250
251		static int access_callback(
252		RecurseDirEvent event,
253		const char *path,
254		int dir_fd,
255		int inode_fd,
256		const struct dirent *de,
257		const struct statx *sx,
258	0	void *userdata) {
259
260	0	if (!IN_SET(event, RECURSE_DIR_ENTER, RECURSE_DIR_ENTRY))
261	0	return RECURSE_DIR_CONTINUE;
262
263	0	struct access_callback_data *d = ASSERT_PTR(userdata);
264
265	0	assert(path);
266	0	assert(inode_fd >= 0);
267
268	0	if (fchownat(inode_fd, "", d->uid, d->gid, AT_EMPTY_PATH) < 0)
269	0	RET_GATHER(d->error, log_debug_errno(errno, "Failed to change ownership of '%s', ignoring: %m", path));
270
271	0	return RECURSE_DIR_CONTINUE;
272	0	}
273
274		int cg_set_access_recursive(
275		const char *path,
276		uid_t uid,
277	0	gid_t gid) {
278
279	0	_cleanup_close_ int fd = -EBADF;
280	0	_cleanup_free_ char *fs = NULL;
281	0	int r;
282
283	0	assert(path);
284
285		/* A recursive version of cg_set_access(). But note that this one changes ownership of all files,
286		* not just the allowlist that cg_set_access() uses. Use cg_set_access() on the cgroup you want to
287		* delegate, and cg_set_access_recursive() for any subcgroups you might want to create below it. */
288
289	0	if (!uid_is_valid(uid) && !gid_is_valid(gid))
290	0	return 0;
291
292	0	r = cg_get_path(path, /* suffix= */ NULL, &fs);
293	0	if (r < 0)
294	0	return r;
295
296	0	fd = open(fs, O_DIRECTORY\|O_CLOEXEC);
297	0	if (fd < 0)
298	0	return -errno;
299
300	0	struct access_callback_data d = {
301	0	.uid = uid,
302	0	.gid = gid,
303	0	};
304
305	0	r = recurse_dir(fd,
306	0	fs,
307	0	/* statx_mask= */ 0,
308	0	/* n_depth_max= */ UINT_MAX,
309	0	RECURSE_DIR_SAME_MOUNT\|RECURSE_DIR_INODE_FD\|RECURSE_DIR_TOPLEVEL,
310	0	access_callback,
311	0	&d);
312	0	if (r < 0)
313	0	return r;
314
315	0	assert(d.error <= 0);
316	0	return d.error;
317	0	}
318
319		int cg_migrate(
320		const char *from,
321		const char *to,
322	0	CGroupFlags flags) {
323
324	0	_cleanup_set_free_ Set *s = NULL;
325	0	bool done;
326	0	int r, ret = 0;
327
328	0	assert(from);
329	0	assert(to);
330
331	0	do {
332	0	_cleanup_fclose_ FILE *f = NULL;
333	0	pid_t pid;
334
335	0	done = true;
336
337	0	r = cg_enumerate_processes(from, &f);
338	0	if (r < 0)
339	0	return RET_GATHER(ret, r);
340
341	0	while ((r = cg_read_pid(f, &pid, flags)) > 0) {
342		/* Throw an error if unmappable PIDs are in output, we can't migrate those. */
343	0	if (pid == 0)
344	0	return -EREMOTE;
345
346		/* This might do weird stuff if we aren't a single-threaded program. However, we
347		* luckily know we are. */
348	0	if (FLAGS_SET(flags, CGROUP_IGNORE_SELF) && pid == getpid_cached())
349	0	continue;
350
351	0	if (set_contains(s, PID_TO_PTR(pid)))
352	0	continue;
353
354	0	if (pid_is_kernel_thread(pid) > 0)
355	0	continue;
356
357	0	r = cg_attach(to, pid);
358	0	if (r < 0) {
359	0	if (r != -ESRCH)
360	0	RET_GATHER(ret, r);
361	0	} else if (ret == 0)
362	0	ret = 1;
363
364	0	done = false;
365
366	0	r = set_ensure_put(&s, /* hash_ops= */ NULL, PID_TO_PTR(pid));
367	0	if (r < 0)
368	0	return RET_GATHER(ret, r);
369	0	}
370	0	if (r == -ENODEV)
371	0	continue;
372	0	if (r < 0)
373	0	return RET_GATHER(ret, r);
374	0	} while (!done);
375
376	0	return ret;
377	0	}
378
379		int cg_enable(
380		CGroupMask supported,
381		CGroupMask mask,
382		const char *p,
383	0	CGroupMask *ret_result_mask) {
384
385	0	_cleanup_fclose_ FILE *f = NULL;
386	0	_cleanup_free_ char *fs = NULL;
387	0	CGroupController c;
388	0	CGroupMask ret = 0;
389	0	int r;
390
391	0	assert(p);
392
393	0	if (supported == 0) {
394	0	if (ret_result_mask)
395	0	*ret_result_mask = 0;
396	0	return 0;
397	0	}
398
399	0	r = cg_get_path(p, "cgroup.subtree_control", &fs);
400	0	if (r < 0)
401	0	return r;
402
403	0	for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
404	0	CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
405	0	const char *n;
406
407	0	if (!FLAGS_SET(CGROUP_MASK_V2, bit))
408	0	continue;
409
410	0	if (!FLAGS_SET(supported, bit))
411	0	continue;
412
413	0	n = cgroup_controller_to_string(c);
414	0	{
415	0	char s[1 + strlen(n) + 1];
416
417	0	s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
418	0	strcpy(s + 1, n);
419
420	0	if (!f) {
421	0	f = fopen(fs, "we");
422	0	if (!f)
423	0	return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
424	0	}
425
426	0	r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
427	0	if (r < 0) {
428	0	log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
429	0	FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
430	0	clearerr(f);
431
432		/* If we can't turn off a controller, leave it on in the reported resulting mask. This
433		* happens for example when we attempt to turn off a controller up in the tree that is
434		* used down in the tree. */
435	0	if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
436		* only here, and not follow the same logic
437		* for other errors such as EINVAL or
438		* EOPNOTSUPP or anything else. That's
439		* because EBUSY indicates that the
440		* controllers is currently enabled and
441		* cannot be disabled because something down
442		* the hierarchy is still using it. Any other
443		* error most likely means something like "I
444		* never heard of this controller" or
445		* similar. In the former case it's hence
446		* safe to assume the controller is still on
447		* after the failed operation, while in the
448		* latter case it's safer to assume the
449		* controller is unknown and hence certainly
450		* not enabled. */
451	0	ret \|= bit;
452	0	} else {
453		/* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
454	0	if (FLAGS_SET(mask, bit))
455	0	ret \|= bit;
456	0	}
457	0	}
458	0	}
459
460		/* Let's return the precise set of controllers now enabled for the cgroup. */
461	0	if (ret_result_mask)
462	0	*ret_result_mask = ret;
463
464	0	return 0;
465	0	}
466
467	0	int cg_has_legacy(void) {
468	0	struct statfs fs;
469
470		/* Checks if any legacy controller/hierarchy is mounted. */
471
472	0	if (statfs("/sys/fs/cgroup/", &fs) < 0) {
473	0	if (errno == ENOENT) /* sysfs not mounted? */
474	0	return false;
475
476	0	return log_error_errno(errno, "Failed to statfs /sys/fs/cgroup/: %m");
477	0	}
478
479	0	if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC) \|\|
480	0	is_fs_type(&fs, SYSFS_MAGIC)) /* not mounted yet */
481	0	return false;
482
483	0	if (is_fs_type(&fs, TMPFS_MAGIC)) {
484	0	log_info("Found tmpfs on /sys/fs/cgroup/, assuming legacy hierarchy.");
485	0	return true;
486	0	}
487
488	0	return log_error_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
489	0	"Unknown filesystem type %llx mounted on /sys/fs/cgroup/.",
490	0	(unsigned long long) fs.f_type);
491	0	}