/src/openvswitch/lib/socket-util-unix.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2014, 2016 Nicira, Inc. |
3 | | * |
4 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | * you may not use this file except in compliance with the License. |
6 | | * You may obtain a copy of the License at: |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | * See the License for the specific language governing permissions and |
14 | | * limitations under the License. |
15 | | */ |
16 | | |
17 | | #include <config.h> |
18 | | #include "socket-util.h" |
19 | | #include <errno.h> |
20 | | #include <fcntl.h> |
21 | | #include <net/if.h> |
22 | | #include <sys/ioctl.h> |
23 | | #include <sys/types.h> |
24 | | #include <sys/stat.h> |
25 | | #include <sys/un.h> |
26 | | #include <sys/wait.h> |
27 | | #include <unistd.h> |
28 | | #include "fatal-signal.h" |
29 | | #include "random.h" |
30 | | #include "util.h" |
31 | | #include "openvswitch/vlog.h" |
32 | | |
33 | | VLOG_DEFINE_THIS_MODULE(socket_util_unix); |
34 | | |
35 | | /* #ifdefs make it a pain to maintain code: you have to try to build both ways. |
36 | | * Thus, this file compiles all of the code regardless of the target, by |
37 | | * writing "if (LINUX)" instead of "#ifdef __linux__". */ |
38 | | #ifdef __linux__ |
39 | 0 | #define LINUX 1 |
40 | | #else |
41 | | #define LINUX 0 |
42 | | #endif |
43 | | |
44 | | #ifndef O_DIRECTORY |
45 | | #define O_DIRECTORY 0 |
46 | | #endif |
47 | | |
48 | | /* Maximum length of the sun_path member in a struct sockaddr_un, excluding |
49 | | * space for a null terminator. */ |
50 | 0 | #define MAX_UN_LEN (sizeof(((struct sockaddr_un *) 0)->sun_path) - 1) |
51 | | |
52 | | void |
53 | | xpipe(int fds[2]) |
54 | 0 | { |
55 | 0 | if (pipe(fds)) { |
56 | 0 | VLOG_FATAL("failed to create pipe (%s)", ovs_strerror(errno)); |
57 | 0 | } |
58 | 0 | } |
59 | | |
60 | | void |
61 | | xpipe_nonblocking(int fds[2]) |
62 | 0 | { |
63 | 0 | xpipe(fds); |
64 | 0 | xset_nonblocking(fds[0]); |
65 | 0 | xset_nonblocking(fds[1]); |
66 | 0 | } |
67 | | |
68 | | /* Drain all the data currently in the receive queue of a datagram socket (and |
69 | | * possibly additional data). There is no way to know how many packets are in |
70 | | * the receive queue, but we do know that the total number of bytes queued does |
71 | | * not exceed the receive buffer size, so we pull packets until none are left |
72 | | * or we've read that many bytes. */ |
73 | | int |
74 | | drain_rcvbuf(int fd) |
75 | 0 | { |
76 | 0 | int rcvbuf; |
77 | |
|
78 | 0 | rcvbuf = get_socket_rcvbuf(fd); |
79 | 0 | if (rcvbuf < 0) { |
80 | 0 | return -rcvbuf; |
81 | 0 | } |
82 | | |
83 | 0 | while (rcvbuf > 0) { |
84 | | /* In Linux, specifying MSG_TRUNC in the flags argument causes the |
85 | | * datagram length to be returned, even if that is longer than the |
86 | | * buffer provided. Thus, we can use a 1-byte buffer to discard the |
87 | | * incoming datagram and still be able to account how many bytes were |
88 | | * removed from the receive buffer. |
89 | | * |
90 | | * On other Unix-like OSes, MSG_TRUNC has no effect in the flags |
91 | | * argument. */ |
92 | 0 | char buffer[LINUX ? 1 : 2048]; |
93 | 0 | ssize_t n_bytes = recv(fd, buffer, sizeof buffer, |
94 | 0 | MSG_TRUNC | MSG_DONTWAIT); |
95 | 0 | if (n_bytes <= 0 || n_bytes >= rcvbuf) { |
96 | 0 | break; |
97 | 0 | } |
98 | 0 | rcvbuf -= n_bytes; |
99 | 0 | } |
100 | 0 | return 0; |
101 | 0 | } |
102 | | |
103 | | /* Attempts to shorten 'name' by opening a file descriptor for the directory |
104 | | * part of the name and indirecting through /proc/self/fd/<dirfd>/<basename>. |
105 | | * On systems with Linux-like /proc, this works as long as <basename> isn't too |
106 | | * long. |
107 | | * |
108 | | * On success, returns 0 and stores the short name in 'short_name' and a |
109 | | * directory file descriptor to eventually be closed in '*dirfpd'. */ |
110 | | static int |
111 | | shorten_name_via_proc(const char *name, char short_name[MAX_UN_LEN + 1], |
112 | | int *dirfdp) |
113 | 0 | { |
114 | 0 | char *dir, *base; |
115 | 0 | int dirfd; |
116 | 0 | int len; |
117 | |
|
118 | 0 | if (!LINUX) { |
119 | 0 | return ENAMETOOLONG; |
120 | 0 | } |
121 | | |
122 | 0 | dir = dir_name(name); |
123 | 0 | dirfd = open(dir, O_DIRECTORY | O_RDONLY); |
124 | 0 | if (dirfd < 0) { |
125 | 0 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); |
126 | 0 | int error = errno; |
127 | |
|
128 | 0 | VLOG_WARN_RL(&rl, "%s: open failed (%s)", dir, ovs_strerror(error)); |
129 | 0 | free(dir); |
130 | |
|
131 | 0 | return error; |
132 | 0 | } |
133 | 0 | free(dir); |
134 | |
|
135 | 0 | base = base_name(name); |
136 | 0 | len = snprintf(short_name, MAX_UN_LEN + 1, |
137 | 0 | "/proc/self/fd/%d/%s", dirfd, base); |
138 | 0 | free(base); |
139 | |
|
140 | 0 | if (len >= 0 && len <= MAX_UN_LEN) { |
141 | 0 | *dirfdp = dirfd; |
142 | 0 | return 0; |
143 | 0 | } else { |
144 | 0 | close(dirfd); |
145 | 0 | return ENAMETOOLONG; |
146 | 0 | } |
147 | 0 | } |
148 | | |
149 | | /* Attempts to shorten 'name' by creating a symlink for the directory part of |
150 | | * the name and indirecting through <symlink>/<basename>. This works on |
151 | | * systems that support symlinks, as long as <basename> isn't too long. |
152 | | * |
153 | | * On success, returns 0 and stores the short name in 'short_name' and the |
154 | | * symbolic link to eventually delete in 'linkname'. */ |
155 | | static int |
156 | | shorten_name_via_symlink(const char *name, char short_name[MAX_UN_LEN + 1], |
157 | | char linkname[MAX_UN_LEN + 1]) |
158 | 0 | { |
159 | 0 | char *abs, *dir, *base; |
160 | 0 | const char *tmpdir; |
161 | 0 | int error; |
162 | 0 | int i; |
163 | |
|
164 | 0 | abs = abs_file_name(NULL, name); |
165 | 0 | dir = dir_name(abs); |
166 | 0 | base = base_name(abs); |
167 | 0 | free(abs); |
168 | |
|
169 | 0 | tmpdir = getenv("TMPDIR"); |
170 | 0 | if (tmpdir == NULL) { |
171 | 0 | tmpdir = "/tmp"; |
172 | 0 | } |
173 | |
|
174 | 0 | for (i = 0; i < 1000; i++) { |
175 | 0 | int len; |
176 | |
|
177 | 0 | len = snprintf(linkname, MAX_UN_LEN + 1, |
178 | 0 | "%s/ovs-un-c-%"PRIu32, tmpdir, random_uint32()); |
179 | 0 | error = (len < 0 || len > MAX_UN_LEN ? ENAMETOOLONG |
180 | 0 | : symlink(dir, linkname) ? errno |
181 | 0 | : 0); |
182 | 0 | if (error != EEXIST) { |
183 | 0 | break; |
184 | 0 | } |
185 | 0 | } |
186 | |
|
187 | 0 | if (!error) { |
188 | 0 | int len; |
189 | |
|
190 | 0 | fatal_signal_add_file_to_unlink(linkname); |
191 | |
|
192 | 0 | len = snprintf(short_name, MAX_UN_LEN + 1, "%s/%s", linkname, base); |
193 | 0 | if (len < 0 || len > MAX_UN_LEN) { |
194 | 0 | fatal_signal_unlink_file_now(linkname); |
195 | 0 | error = ENAMETOOLONG; |
196 | 0 | } |
197 | 0 | } |
198 | |
|
199 | 0 | if (error) { |
200 | 0 | linkname[0] = '\0'; |
201 | 0 | } |
202 | 0 | free(dir); |
203 | 0 | free(base); |
204 | |
|
205 | 0 | return error; |
206 | 0 | } |
207 | | |
208 | | /* Stores in '*un' a sockaddr_un that refers to file 'name'. Stores in |
209 | | * '*un_len' the size of the sockaddr_un. |
210 | | * |
211 | | * Returns 0 on success, otherwise a positive errno value. |
212 | | * |
213 | | * Uses '*dirfdp' and 'linkname' to store references to data when the caller no |
214 | | * longer needs to use 'un'. On success, freeing these references with |
215 | | * free_sockaddr_un() is mandatory to avoid a leak; on failure, freeing them is |
216 | | * unnecessary but harmless. */ |
217 | | static int |
218 | | make_sockaddr_un(const char *name, struct sockaddr_un *un, socklen_t *un_len, |
219 | | int *dirfdp, char linkname[MAX_UN_LEN + 1]) |
220 | 0 | { |
221 | 0 | char short_name[MAX_UN_LEN + 1]; |
222 | |
|
223 | 0 | *dirfdp = -1; |
224 | 0 | linkname[0] = '\0'; |
225 | 0 | if (strlen(name) > MAX_UN_LEN) { |
226 | | /* 'name' is too long to fit in a sockaddr_un. Try a workaround. */ |
227 | 0 | int error = shorten_name_via_proc(name, short_name, dirfdp); |
228 | 0 | if (error == ENAMETOOLONG) { |
229 | 0 | error = shorten_name_via_symlink(name, short_name, linkname); |
230 | 0 | } |
231 | 0 | if (error) { |
232 | 0 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); |
233 | |
|
234 | 0 | VLOG_WARN_RL(&rl, "Unix socket name %s is longer than maximum " |
235 | 0 | "%"PRIuSIZE" bytes", name, MAX_UN_LEN); |
236 | 0 | return error; |
237 | 0 | } |
238 | | |
239 | 0 | name = short_name; |
240 | 0 | } |
241 | | |
242 | 0 | un->sun_family = AF_UNIX; |
243 | 0 | ovs_strzcpy(un->sun_path, name, sizeof un->sun_path); |
244 | 0 | *un_len = (offsetof(struct sockaddr_un, sun_path) |
245 | 0 | + strlen (un->sun_path) + 1); |
246 | 0 | return 0; |
247 | 0 | } |
248 | | |
249 | | /* Clean up after make_sockaddr_un(). */ |
250 | | static void |
251 | | free_sockaddr_un(int dirfd, const char *linkname) |
252 | 0 | { |
253 | 0 | if (dirfd >= 0) { |
254 | 0 | close(dirfd); |
255 | 0 | } |
256 | 0 | if (linkname[0]) { |
257 | 0 | fatal_signal_unlink_file_now(linkname); |
258 | 0 | } |
259 | 0 | } |
260 | | |
261 | | /* Binds Unix domain socket 'fd' to a file with permissions 0700. */ |
262 | | static int bind_unix_socket(int fd, struct sockaddr *sun, socklen_t sun_len) |
263 | 0 | { |
264 | 0 | const mode_t mode = 0770; /* Allow both user and group access. */ |
265 | |
|
266 | 0 | if (LINUX) { |
267 | | /* On Linux, the fd's permissions become the file's permissions. |
268 | | * fchmod() does not affect other files, like umask() does. */ |
269 | 0 | if (fchmod(fd, mode)) { |
270 | 0 | return errno; |
271 | 0 | } |
272 | | |
273 | | /* Must be after fchmod(). */ |
274 | 0 | if (bind(fd, sun, sun_len)) { |
275 | 0 | return errno; |
276 | 0 | } |
277 | 0 | return 0; |
278 | 0 | } else { |
279 | | /* On FreeBSD and NetBSD, only the umask affects permissions. The |
280 | | * umask is process-wide rather than thread-specific, so we have to use |
281 | | * a subprocess for safety. */ |
282 | 0 | pid_t pid = fork(); |
283 | |
|
284 | 0 | if (!pid) { |
285 | 0 | umask(mode ^ 0777); |
286 | 0 | _exit(bind(fd, sun, sun_len) ? errno : 0); |
287 | 0 | } else if (pid > 0) { |
288 | 0 | int status; |
289 | 0 | int error; |
290 | |
|
291 | 0 | do { |
292 | 0 | error = waitpid(pid, &status, 0) < 0 ? errno : 0; |
293 | 0 | } while (error == EINTR); |
294 | |
|
295 | 0 | return (error ? error |
296 | 0 | : WIFEXITED(status) ? WEXITSTATUS(status) |
297 | 0 | : WIFSIGNALED(status) ? EINTR |
298 | 0 | : ECHILD /* WTF? */); |
299 | 0 | } else { |
300 | 0 | return errno; |
301 | 0 | } |
302 | 0 | } |
303 | 0 | } |
304 | | |
305 | | /* Creates a Unix domain socket in the given 'style' (either SOCK_DGRAM or |
306 | | * SOCK_STREAM) that is bound to '*bind_path' (if 'bind_path' is non-null) and |
307 | | * connected to '*connect_path' (if 'connect_path' is non-null). If 'nonblock' |
308 | | * is true, the socket is made non-blocking. |
309 | | * |
310 | | * Returns the socket's fd if successful, otherwise a negative errno value. */ |
311 | | int |
312 | | make_unix_socket(int style, bool nonblock, |
313 | | const char *bind_path, const char *connect_path) |
314 | 0 | { |
315 | 0 | int error; |
316 | 0 | int fd; |
317 | |
|
318 | 0 | fd = socket(PF_UNIX, style, 0); |
319 | 0 | if (fd < 0) { |
320 | 0 | return -errno; |
321 | 0 | } |
322 | | |
323 | | /* Set nonblocking mode right away, if we want it. This prevents blocking |
324 | | * in connect(), if connect_path != NULL. (In turn, that's a corner case: |
325 | | * it will only happen if style is SOCK_STREAM or SOCK_SEQPACKET, and only |
326 | | * if a backlog of un-accepted connections has built up in the kernel.) */ |
327 | 0 | if (nonblock) { |
328 | 0 | error = set_nonblocking(fd); |
329 | 0 | if (error) { |
330 | 0 | goto error; |
331 | 0 | } |
332 | 0 | } |
333 | | |
334 | 0 | if (bind_path) { |
335 | 0 | char linkname[MAX_UN_LEN + 1]; |
336 | 0 | struct sockaddr_un un; |
337 | 0 | socklen_t un_len; |
338 | 0 | int dirfd; |
339 | |
|
340 | 0 | if (unlink(bind_path) && errno != ENOENT) { |
341 | 0 | VLOG_WARN("unlinking \"%s\": %s\n", |
342 | 0 | bind_path, ovs_strerror(errno)); |
343 | 0 | } |
344 | 0 | fatal_signal_add_file_to_unlink(bind_path); |
345 | |
|
346 | 0 | error = make_sockaddr_un(bind_path, &un, &un_len, &dirfd, linkname); |
347 | 0 | if (!error) { |
348 | 0 | error = bind_unix_socket(fd, (struct sockaddr *) &un, un_len); |
349 | 0 | } |
350 | 0 | free_sockaddr_un(dirfd, linkname); |
351 | |
|
352 | 0 | if (error) { |
353 | 0 | goto error; |
354 | 0 | } |
355 | 0 | } |
356 | | |
357 | 0 | if (connect_path) { |
358 | 0 | char linkname[MAX_UN_LEN + 1]; |
359 | 0 | struct sockaddr_un un; |
360 | 0 | socklen_t un_len; |
361 | 0 | int dirfd; |
362 | |
|
363 | 0 | error = make_sockaddr_un(connect_path, &un, &un_len, &dirfd, linkname); |
364 | 0 | if (!error |
365 | 0 | && connect(fd, (struct sockaddr*) &un, un_len) |
366 | 0 | && errno != EINPROGRESS) { |
367 | 0 | error = errno; |
368 | 0 | } |
369 | 0 | free_sockaddr_un(dirfd, linkname); |
370 | |
|
371 | 0 | if (error) { |
372 | 0 | goto error; |
373 | 0 | } |
374 | 0 | } |
375 | | |
376 | 0 | return fd; |
377 | | |
378 | 0 | error: |
379 | 0 | if (error == EAGAIN) { |
380 | 0 | error = EPROTO; |
381 | 0 | } |
382 | 0 | if (bind_path) { |
383 | 0 | fatal_signal_unlink_file_now(bind_path); |
384 | 0 | } |
385 | 0 | close(fd); |
386 | 0 | return -error; |
387 | 0 | } |
388 | | |
389 | | int |
390 | | get_unix_name_len(const struct sockaddr_un *sun, socklen_t sun_len) |
391 | 0 | { |
392 | 0 | return (sun_len > offsetof(struct sockaddr_un, sun_path) && |
393 | 0 | sun->sun_path[0] != 0 |
394 | 0 | ? sun_len - offsetof(struct sockaddr_un, sun_path) |
395 | 0 | : 0); |
396 | 0 | } |
397 | | |
398 | | /* Calls ioctl() on an AF_INET sock, passing the specified 'command' and |
399 | | * 'arg'. Returns 0 if successful, otherwise a positive errno value. */ |
400 | | int |
401 | | af_inet_ioctl(unsigned long int command, const void *arg) |
402 | 0 | { |
403 | 0 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; |
404 | 0 | static int sock; |
405 | |
|
406 | 0 | if (ovsthread_once_start(&once)) { |
407 | 0 | sock = socket(AF_INET, SOCK_DGRAM, 0); |
408 | 0 | if (sock < 0) { |
409 | 0 | int error = sock_errno(); |
410 | 0 | VLOG_ERR("failed to create inet socket: %s", sock_strerror(error)); |
411 | 0 | sock = -error; |
412 | 0 | } |
413 | 0 | ovsthread_once_done(&once); |
414 | 0 | } |
415 | |
|
416 | 0 | return (sock < 0 ? -sock |
417 | 0 | : ioctl(sock, command, arg) == -1 ? errno |
418 | 0 | : 0); |
419 | 0 | } |
420 | | |
421 | | int |
422 | | af_inet_ifreq_ioctl(const char *name, struct ifreq *ifr, unsigned long int cmd, |
423 | | const char *cmd_name) |
424 | 0 | { |
425 | 0 | int error; |
426 | |
|
427 | 0 | ovs_strzcpy(ifr->ifr_name, name, sizeof ifr->ifr_name); |
428 | 0 | error = af_inet_ioctl(cmd, ifr); |
429 | 0 | if (error) { |
430 | 0 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); |
431 | 0 | VLOG_DBG_RL(&rl, "%s: ioctl(%s) failed: %s", name, cmd_name, |
432 | 0 | ovs_strerror(error)); |
433 | 0 | } |
434 | 0 | return error; |
435 | 0 | } |