/src/systemd/src/basic/pidfd-util.c
Line | Count | Source |
1 | | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
2 | | |
3 | | #include <linux/fs.h> |
4 | | #include <linux/magic.h> |
5 | | #include <sys/ioctl.h> |
6 | | #include <threads.h> |
7 | | #include <unistd.h> |
8 | | |
9 | | #include "errno-util.h" |
10 | | #include "fd-util.h" |
11 | | #include "fileio.h" |
12 | | #include "mountpoint-util.h" |
13 | | #include "parse-util.h" |
14 | | #include "pidfd-util.h" |
15 | | #include "process-util.h" |
16 | | #include "stat-util.h" |
17 | | #include "stdio-util.h" |
18 | | #include "string-util.h" |
19 | | #include "unaligned.h" |
20 | | |
21 | | static thread_local int have_pidfs = -1; |
22 | | |
23 | 0 | int pidfd_check_pidfs(int pid_fd) { |
24 | | |
25 | | /* NB: the passed fd *must* be acquired via pidfd_open(), i.e. must be a true pidfd! */ |
26 | |
|
27 | 0 | if (have_pidfs >= 0) |
28 | 0 | return have_pidfs; |
29 | | |
30 | 0 | _cleanup_close_ int our_fd = -EBADF; |
31 | 0 | if (pid_fd < 0) { |
32 | 0 | our_fd = pidfd_open(getpid_cached(), /* flags= */ 0); |
33 | 0 | if (our_fd < 0) |
34 | 0 | return -errno; |
35 | | |
36 | 0 | pid_fd = our_fd; |
37 | 0 | } |
38 | | |
39 | 0 | return (have_pidfs = fd_is_fs_type(pid_fd, PID_FS_MAGIC)); |
40 | 0 | } |
41 | | |
42 | 0 | int pidfd_get_namespace(int fd, unsigned long ns_type_cmd) { |
43 | 0 | static bool cached_supported = true; |
44 | | |
45 | | /* Obtain the namespace fd from pidfd directly through ioctl(PIDFD_GET_*_NAMESPACE). |
46 | | * |
47 | | * Returns -EOPNOTSUPP if ioctl on pidfds are not supported, -ENOPKG if the requested namespace |
48 | | * is disabled in kernel. (The errno used are different from what kernel returns via ioctl(), |
49 | | * see below) */ |
50 | |
|
51 | 0 | assert(fd >= 0); |
52 | | |
53 | | /* If we know ahead of time that pidfs is unavailable, shortcut things. But otherwise we don't |
54 | | * call pidfd_check_pidfs() here, which is kinda extraneous and our own cache is required |
55 | | * anyways (pidfs is introduced in kernel 6.9 while ioctl support there is added in 6.11). */ |
56 | 0 | if (have_pidfs == 0 || !cached_supported) |
57 | 0 | return -EOPNOTSUPP; |
58 | | |
59 | 0 | int nsfd = ioctl(fd, ns_type_cmd, 0); |
60 | 0 | if (nsfd < 0) { |
61 | | /* Kernel returns EOPNOTSUPP if the ns type in question is disabled. Hence we need to look |
62 | | * at precise errno instead of generic ERRNO_IS_(IOCTL_)NOT_SUPPORTED. */ |
63 | 0 | if (IN_SET(errno, ENOTTY, EINVAL)) { |
64 | 0 | cached_supported = false; |
65 | 0 | return -EOPNOTSUPP; |
66 | 0 | } |
67 | 0 | if (errno == EOPNOTSUPP) /* Translate to something more recognizable */ |
68 | 0 | return -ENOPKG; |
69 | | |
70 | 0 | return -errno; |
71 | 0 | } |
72 | | |
73 | 0 | return nsfd; |
74 | 0 | } |
75 | | |
76 | 405 | int pidfd_get_info(int fd, struct pidfd_info *info) { |
77 | 405 | static bool cached_supported = true; |
78 | | |
79 | 405 | assert(fd >= 0); |
80 | 405 | assert(info); |
81 | | |
82 | 405 | if (have_pidfs == 0 || !cached_supported) |
83 | 404 | return -EOPNOTSUPP; |
84 | | |
85 | 1 | if (ioctl(fd, PIDFD_GET_INFO, info) < 0) { |
86 | 1 | if (ERRNO_IS_IOCTL_NOT_SUPPORTED(errno)) { |
87 | 1 | cached_supported = false; |
88 | 1 | return -EOPNOTSUPP; |
89 | 1 | } |
90 | | |
91 | 0 | return -errno; |
92 | 1 | } |
93 | | |
94 | 0 | return 0; |
95 | 1 | } |
96 | | |
97 | 270 | static int pidfd_get_pid_fdinfo(int fd, pid_t *ret) { |
98 | 270 | char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)]; |
99 | 270 | _cleanup_free_ char *p = NULL; |
100 | 270 | int r; |
101 | | |
102 | 270 | assert(fd >= 0); |
103 | | |
104 | 270 | xsprintf(path, "/proc/self/fdinfo/%i", fd); |
105 | | |
106 | 270 | r = get_proc_field(path, "Pid", &p); |
107 | 270 | if (r == -ENOENT) |
108 | 0 | return -EBADF; |
109 | 270 | if (r == -ENODATA) /* not a pidfd? */ |
110 | 0 | return -ENOTTY; |
111 | 270 | if (r < 0) |
112 | 0 | return r; |
113 | | |
114 | 270 | if (streq(p, "0")) |
115 | 0 | return -EREMOTE; /* PID is in foreign PID namespace? */ |
116 | 270 | if (streq(p, "-1")) |
117 | 0 | return -ESRCH; /* refers to reaped process? */ |
118 | | |
119 | 270 | return parse_pid(p, ret); |
120 | 270 | } |
121 | | |
122 | 270 | static int pidfd_get_pid_ioctl(int fd, pid_t *ret) { |
123 | 270 | struct pidfd_info info = { .mask = PIDFD_INFO_PID }; |
124 | 270 | int r; |
125 | | |
126 | 270 | assert(fd >= 0); |
127 | | |
128 | 270 | r = pidfd_get_info(fd, &info); |
129 | 270 | if (r < 0) |
130 | 270 | return r; |
131 | | |
132 | 0 | assert(FLAGS_SET(info.mask, PIDFD_INFO_PID)); |
133 | |
|
134 | 0 | if (ret) |
135 | 0 | *ret = info.pid; |
136 | 0 | return 0; |
137 | 270 | } |
138 | | |
139 | 270 | int pidfd_get_pid(int fd, pid_t *ret) { |
140 | 270 | int r; |
141 | | |
142 | | /* Converts a pidfd into a pid. We try ioctl(PIDFD_GET_INFO) (kernel 6.13+) first, |
143 | | * /proc/self/fdinfo/ as fallback. Well known errors: |
144 | | * |
145 | | * -EBADF → fd invalid |
146 | | * -ESRCH → fd valid, but process is already reaped |
147 | | * |
148 | | * pidfd_get_pid_fdinfo() might additionally fail for other reasons: |
149 | | * |
150 | | * -ENOSYS → /proc/ not mounted |
151 | | * -ENOTTY → fd valid, but not a pidfd |
152 | | * -EREMOTE → fd valid, but pid is in another namespace we cannot translate to the local one |
153 | | * (when using PIDFD_GET_INFO this is indistinguishable from -ESRCH) |
154 | | */ |
155 | | |
156 | 270 | assert(fd >= 0); |
157 | | |
158 | 270 | r = pidfd_get_pid_ioctl(fd, ret); |
159 | 270 | if (r != -EOPNOTSUPP) |
160 | 0 | return r; |
161 | | |
162 | 270 | return pidfd_get_pid_fdinfo(fd, ret); |
163 | 270 | } |
164 | | |
165 | 270 | int pidfd_verify_pid(int pidfd, pid_t pid) { |
166 | 270 | pid_t current_pid; |
167 | 270 | int r; |
168 | | |
169 | 270 | assert(pidfd >= 0); |
170 | 270 | assert(pid > 0); |
171 | | |
172 | 270 | r = pidfd_get_pid(pidfd, ¤t_pid); |
173 | 270 | if (r < 0) |
174 | 0 | return r; |
175 | | |
176 | 270 | return current_pid != pid ? -ESRCH : 0; |
177 | 270 | } |
178 | | |
179 | 135 | int pidfd_get_ppid(int fd, pid_t *ret) { |
180 | 135 | struct pidfd_info info = { .mask = PIDFD_INFO_PID }; |
181 | 135 | int r; |
182 | | |
183 | 135 | assert(fd >= 0); |
184 | | |
185 | 135 | r = pidfd_get_info(fd, &info); |
186 | 135 | if (r < 0) |
187 | 135 | return r; |
188 | | |
189 | 0 | assert(FLAGS_SET(info.mask, PIDFD_INFO_PID)); |
190 | |
|
191 | 0 | if (info.ppid == 0) /* See comments in pid_get_ppid() */ |
192 | 0 | return -EADDRNOTAVAIL; |
193 | | |
194 | 0 | if (ret) |
195 | 0 | *ret = info.ppid; |
196 | 0 | return 0; |
197 | 0 | } |
198 | | |
199 | 0 | int pidfd_get_uid(int fd, uid_t *ret) { |
200 | 0 | struct pidfd_info info = { .mask = PIDFD_INFO_CREDS }; |
201 | 0 | int r; |
202 | |
|
203 | 0 | assert(fd >= 0); |
204 | |
|
205 | 0 | r = pidfd_get_info(fd, &info); |
206 | 0 | if (r < 0) |
207 | 0 | return r; |
208 | | |
209 | 0 | assert(FLAGS_SET(info.mask, PIDFD_INFO_CREDS)); |
210 | |
|
211 | 0 | if (ret) |
212 | 0 | *ret = info.ruid; |
213 | 0 | return 0; |
214 | 0 | } |
215 | | |
216 | 0 | int pidfd_get_cgroupid(int fd, uint64_t *ret) { |
217 | 0 | struct pidfd_info info = { .mask = PIDFD_INFO_CGROUPID }; |
218 | 0 | int r; |
219 | |
|
220 | 0 | assert(fd >= 0); |
221 | |
|
222 | 0 | r = pidfd_get_info(fd, &info); |
223 | 0 | if (r < 0) |
224 | 0 | return r; |
225 | | |
226 | 0 | assert(FLAGS_SET(info.mask, PIDFD_INFO_CGROUPID)); |
227 | |
|
228 | 0 | if (ret) |
229 | 0 | *ret = info.cgroupid; |
230 | 0 | return 0; |
231 | 0 | } |
232 | | |
233 | 0 | int pidfd_get_inode_id_impl(int fd, uint64_t *ret) { |
234 | 0 | static thread_local bool file_handle_supported = true; |
235 | 0 | int r; |
236 | |
|
237 | 0 | assert(fd >= 0); |
238 | |
|
239 | 0 | if (file_handle_supported) { |
240 | 0 | union { |
241 | 0 | struct file_handle file_handle; |
242 | 0 | uint8_t space[offsetof(struct file_handle, f_handle) + sizeof(uint64_t)]; |
243 | 0 | } fh = { |
244 | 0 | .file_handle.handle_bytes = sizeof(uint64_t), |
245 | 0 | .file_handle.handle_type = FILEID_KERNFS, |
246 | 0 | }; |
247 | 0 | int mnt_id; |
248 | |
|
249 | 0 | r = RET_NERRNO(name_to_handle_at(fd, "", &fh.file_handle, &mnt_id, AT_EMPTY_PATH)); |
250 | 0 | if (r >= 0) { |
251 | 0 | if (ret) |
252 | | /* Note, "struct file_handle" is 32bit aligned usually, but we need to read a 64bit value from it */ |
253 | 0 | *ret = unaligned_read_ne64(fh.file_handle.f_handle); |
254 | 0 | return 0; |
255 | 0 | } |
256 | 0 | assert(r != -EOVERFLOW); |
257 | 0 | if (is_name_to_handle_at_fatal_error(r)) |
258 | 0 | return r; |
259 | | |
260 | 0 | file_handle_supported = false; |
261 | 0 | } |
262 | | |
263 | 0 | #if SIZEOF_INO_T == 8 |
264 | 0 | struct stat st; |
265 | 0 | if (fstat(fd, &st) < 0) |
266 | 0 | return -errno; |
267 | | |
268 | 0 | if (ret) |
269 | 0 | *ret = (uint64_t) st.st_ino; |
270 | 0 | return 0; |
271 | |
|
272 | | #elif SIZEOF_INO_T == 4 |
273 | | /* On 32-bit systems (where sizeof(ino_t) == 4), the inode id returned by fstat() cannot be used to |
274 | | * reliably identify the process, nor can we communicate the origin of the id with the clients. |
275 | | * Hence let's just refuse to acquire pidfdid through fstat() here. All clients shall also insist on |
276 | | * the 64-bit id from name_to_handle_at(). */ |
277 | | return -EOPNOTSUPP; |
278 | | #else |
279 | | # error Unsupported ino_t size |
280 | | #endif |
281 | 0 | } |
282 | | |
283 | 0 | int pidfd_get_inode_id(int fd, uint64_t *ret) { |
284 | 0 | int r; |
285 | |
|
286 | 0 | assert(fd >= 0); |
287 | |
|
288 | 0 | r = pidfd_check_pidfs(fd); |
289 | 0 | if (r < 0) |
290 | 0 | return r; |
291 | 0 | if (r == 0) |
292 | 0 | return -EOPNOTSUPP; |
293 | | |
294 | 0 | return pidfd_get_inode_id_impl(fd, ret); |
295 | 0 | } |
296 | | |
297 | 0 | int pidfd_get_inode_id_self_cached(uint64_t *ret) { |
298 | 0 | static thread_local uint64_t cached = 0; |
299 | 0 | static thread_local pid_t initialized = 0; /* < 0: cached error; == 0: invalid; > 0: valid and pid that was current */ |
300 | 0 | int r; |
301 | |
|
302 | 0 | assert(ret); |
303 | |
|
304 | 0 | if (initialized == getpid_cached()) { |
305 | 0 | *ret = cached; |
306 | 0 | return 0; |
307 | 0 | } |
308 | 0 | if (initialized < 0) |
309 | 0 | return initialized; |
310 | | |
311 | 0 | _cleanup_close_ int fd = pidfd_open(getpid_cached(), 0); |
312 | 0 | if (fd < 0) |
313 | 0 | return -errno; |
314 | | |
315 | 0 | r = pidfd_get_inode_id(fd, &cached); |
316 | 0 | if (ERRNO_IS_NEG_NOT_SUPPORTED(r)) |
317 | 0 | return (initialized = -EOPNOTSUPP); |
318 | 0 | if (r < 0) |
319 | 0 | return r; |
320 | | |
321 | 0 | *ret = cached; |
322 | 0 | initialized = getpid_cached(); |
323 | 0 | return 0; |
324 | 0 | } |