Coverage Report

Created: 2026-06-22 06:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openvswitch/lib/socket-util.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at:
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
#include <config.h>
18
#include "socket-util.h"
19
#include <sys/types.h>
20
#include <netinet/in.h>
21
#include <arpa/inet.h>
22
#include <ctype.h>
23
#include <errno.h>
24
#include <fcntl.h>
25
#include <net/if.h>
26
#include <netdb.h>
27
#include <netinet/tcp.h>
28
#include <poll.h>
29
#include <stddef.h>
30
#include <stdio.h>
31
#include <stdlib.h>
32
#include <string.h>
33
#include <sys/socket.h>
34
#include <sys/stat.h>
35
#include <sys/uio.h>
36
#include <sys/un.h>
37
#include <unistd.h>
38
#include "openvswitch/dynamic-string.h"
39
#include "ovs-thread.h"
40
#include "packets.h"
41
#include "openvswitch/poll-loop.h"
42
#include "util.h"
43
#include "openvswitch/vlog.h"
44
#ifdef __linux__
45
#include <linux/if_packet.h>
46
#endif
47
#ifdef HAVE_NETLINK
48
#include "netlink-protocol.h"
49
#include "netlink-socket.h"
50
#endif
51
#include "dns-resolve.h"
52
53
VLOG_DEFINE_THIS_MODULE(socket_util);
54
55
static int getsockopt_int(int fd, int level, int option, const char *optname,
56
                          int *valuep);
57
static struct sockaddr_in *sin_cast(const struct sockaddr *);
58
static struct sockaddr_in6 *sin6_cast(const struct sockaddr *);
59
static const struct sockaddr *sa_cast(const struct sockaddr_storage *);
60
static bool parse_sockaddr_components(struct sockaddr_storage *ss,
61
                                      char *host_s,
62
                                      const char *port_s,
63
                                      uint16_t default_port,
64
                                      const char *s,
65
                                      bool resolve_host,
66
                                      bool *dns_failure);
67
68
/* Sets 'fd' to non-blocking mode.  Returns 0 if successful, otherwise a
69
 * positive errno value. */
70
int
71
set_nonblocking(int fd)
72
0
{
73
0
    int flags = fcntl(fd, F_GETFL, 0);
74
0
    if (flags != -1) {
75
0
        if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) != -1) {
76
0
            return 0;
77
0
        } else {
78
0
            VLOG_ERR("fcntl(F_SETFL) failed: %s", ovs_strerror(errno));
79
0
            return errno;
80
0
        }
81
0
    } else {
82
0
        VLOG_ERR("fcntl(F_GETFL) failed: %s", ovs_strerror(errno));
83
0
        return errno;
84
0
    }
85
0
}
86
87
void
88
xset_nonblocking(int fd)
89
0
{
90
0
    if (set_nonblocking(fd)) {
91
0
        exit(EXIT_FAILURE);
92
0
    }
93
0
}
94
95
void
96
setsockopt_tcp_nodelay(int fd)
97
0
{
98
0
    int on = 1;
99
0
    int retval;
100
101
0
    retval = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof on);
102
0
    if (retval) {
103
0
        retval = sock_errno();
104
0
        VLOG_ERR("setsockopt(TCP_NODELAY): %s", sock_strerror(retval));
105
0
    }
106
0
}
107
108
/* Sets the DSCP value of socket 'fd' to 'dscp', which must be 63 or less.
109
 * 'family' must indicate the socket's address family (AF_INET or AF_INET6, to
110
 * do anything useful). */
111
int
112
set_dscp(int fd, int family, uint8_t dscp)
113
0
{
114
0
    int retval;
115
0
    int val;
116
117
0
    if (dscp > 63) {
118
0
        return EINVAL;
119
0
    }
120
0
    val = dscp << 2;
121
122
0
    switch (family) {
123
0
    case AF_INET:
124
0
        retval = setsockopt(fd, IPPROTO_IP, IP_TOS, &val, sizeof val);
125
0
        break;
126
127
0
    case AF_INET6:
128
0
        retval = setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &val, sizeof val);
129
0
        break;
130
131
0
    default:
132
0
        return ENOPROTOOPT;
133
0
    }
134
135
0
    return retval ? sock_errno() : 0;
136
0
}
137
138
/* Checks whether 'host_name' is an IPv4 or IPv6 address.  It is assumed
139
 * that 'host_name' is valid.  Returns false if it is IPv4 address, true if
140
 * it is IPv6 address. */
141
bool
142
addr_is_ipv6(const char *host_name)
143
0
{
144
0
    return strchr(host_name, ':') != NULL;
145
0
}
146
147
/* Translates 'host_name', which must be a string representation of an IP
148
 * address, into a numeric IP address in '*addr'.  Returns 0 if successful,
149
 * otherwise a positive errno value. */
150
int
151
lookup_ip(const char *host_name, struct in_addr *addr)
152
3.27k
{
153
3.27k
    if (!ip_parse(host_name, &addr->s_addr)) {
154
9
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
155
9
        VLOG_ERR_RL(&rl, "\"%s\" is not a valid IP address", host_name);
156
9
        return ENOENT;
157
9
    }
158
3.26k
    return 0;
159
3.27k
}
160
161
/* Translates 'host_name', which must be a string representation of an IPv6
162
 * address, into a numeric IPv6 address in '*addr'.  Returns 0 if successful,
163
 * otherwise a positive errno value. */
164
int
165
lookup_ipv6(const char *host_name, struct in6_addr *addr)
166
0
{
167
0
    if (!ipv6_parse(host_name, addr)) {
168
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
169
0
        VLOG_ERR_RL(&rl, "\"%s\" is not a valid IPv6 address", host_name);
170
0
        return ENOENT;
171
0
    }
172
0
    return 0;
173
0
}
174
175
/* Translates 'host_name', which must be a host name or a string representation
176
 * of an IP address, into a numeric IP address in '*addr'.  Returns 0 if
177
 * successful, otherwise a positive errno value.
178
 *
179
 * Most Open vSwitch code should not use this because it causes deadlocks:
180
 * getaddrinfo() sends out a DNS request but that starts a new flow for which
181
 * OVS must set up a flow, but it can't because it's waiting for a DNS reply.
182
 * The synchronous lookup also delays other activity.  (Of course we can solve
183
 * this but it doesn't seem worthwhile quite yet.)  */
184
int
185
lookup_hostname(const char *host_name, struct in_addr *addr)
186
0
{
187
0
    struct addrinfo *result;
188
0
    struct addrinfo hints;
189
190
0
    if (ip_parse(host_name, &addr->s_addr)) {
191
0
        return 0;
192
0
    }
193
194
0
    memset(&hints, 0, sizeof hints);
195
0
    hints.ai_family = AF_INET;
196
197
0
    switch (getaddrinfo(host_name, NULL, &hints, &result)) {
198
0
    case 0:
199
0
        *addr = ALIGNED_CAST(struct sockaddr_in *,
200
0
                             result->ai_addr)->sin_addr;
201
0
        freeaddrinfo(result);
202
0
        return 0;
203
204
0
#ifdef EAI_ADDRFAMILY
205
0
    case EAI_ADDRFAMILY:
206
0
#endif
207
0
    case EAI_NONAME:
208
0
    case EAI_SERVICE:
209
0
        return ENOENT;
210
211
0
    case EAI_AGAIN:
212
0
        return EAGAIN;
213
214
0
    case EAI_BADFLAGS:
215
0
    case EAI_FAMILY:
216
0
    case EAI_SOCKTYPE:
217
0
        return EINVAL;
218
219
0
    case EAI_FAIL:
220
0
        return EIO;
221
222
0
    case EAI_MEMORY:
223
0
        return ENOMEM;
224
225
0
#if defined (EAI_NODATA) && EAI_NODATA != EAI_NONAME
226
0
    case EAI_NODATA:
227
0
        return ENXIO;
228
0
#endif
229
230
0
#ifdef EAI_SYSTEM
231
0
    case EAI_SYSTEM:
232
0
        return sock_errno();
233
0
#endif
234
235
0
    default:
236
0
        return EPROTO;
237
0
    }
238
0
}
239
240
int
241
check_connection_completion(int fd)
242
0
{
243
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 10);
244
0
    struct pollfd pfd;
245
0
    int retval;
246
247
0
    pfd.fd = fd;
248
0
    pfd.events = POLLOUT;
249
250
0
    do {
251
0
        retval = poll(&pfd, 1, 0);
252
0
    } while (retval < 0 && errno == EINTR);
253
254
0
    if (retval == 1) {
255
0
        if (pfd.revents & (POLLERR | POLLHUP)) {
256
0
            ssize_t n = send(fd, "", 1, 0);
257
0
            if (n < 0) {
258
0
                return sock_errno();
259
0
            } else {
260
0
                VLOG_ERR_RL(&rl, "poll return POLLERR but send succeeded");
261
0
                return EPROTO;
262
0
            }
263
0
        }
264
0
        return 0;
265
0
    } else if (retval < 0) {
266
0
        VLOG_ERR_RL(&rl, "poll: %s", sock_strerror(sock_errno()));
267
0
        return errno;
268
0
    } else {
269
0
        return EAGAIN;
270
0
    }
271
0
}
272
273
/* Returns the size of socket 'sock''s receive buffer (SO_RCVBUF), or a
274
 * negative errno value if an error occurs. */
275
int
276
get_socket_rcvbuf(int sock)
277
0
{
278
0
    int rcvbuf;
279
0
    int error;
280
281
0
    error = getsockopt_int(sock, SOL_SOCKET, SO_RCVBUF, "SO_RCVBUF", &rcvbuf);
282
0
    return error ? -error : rcvbuf;
283
0
}
284
285
/* Reads and discards up to 'n' datagrams from 'fd', stopping as soon as no
286
 * more data can be immediately read.  ('fd' should therefore be in
287
 * non-blocking mode.)*/
288
void
289
drain_fd(int fd, size_t n_packets)
290
0
{
291
0
    for (; n_packets > 0; n_packets--) {
292
        /* 'buffer' only needs to be 1 byte long in most circumstances.  This
293
         * size is defensive against the possibility that we someday want to
294
         * use a Linux tap device without TUN_NO_PI, in which case a buffer
295
         * smaller than sizeof(struct tun_pi) will give EINVAL on read. */
296
0
        char buffer[128];
297
0
        if (read(fd, buffer, sizeof buffer) <= 0) {
298
0
            break;
299
0
        }
300
0
    }
301
0
}
302
303
ovs_be32
304
guess_netmask(ovs_be32 ip_)
305
0
{
306
0
    uint32_t ip = ntohl(ip_);
307
0
    return ((ip >> 31) == 0 ? htonl(0xff000000)   /* Class A */
308
0
            : (ip >> 30) == 2 ? htonl(0xffff0000) /* Class B */
309
0
            : (ip >> 29) == 6 ? htonl(0xffffff00) /* Class C */
310
0
            : htonl(0));                          /* ??? */
311
0
}
312
313
static char *
314
unbracket(char *s)
315
0
{
316
0
    if (*s == '[') {
317
0
        s++;
318
319
0
        char *end = strchr(s, '\0');
320
0
        if (end[-1] == ']') {
321
0
            end[-1] = '\0';
322
0
        }
323
0
    }
324
0
    return s;
325
0
}
326
327
/* 'host_index' is 0 if the host precedes the port within 's', 1 otherwise. */
328
static void
329
inet_parse_tokens__(char *s, int host_index, char **hostp, char **portp)
330
0
{
331
0
    char *colon = NULL;
332
0
    bool in_brackets = false;
333
0
    int n_colons = 0;
334
0
    for (char *p = s; *p; p++) {
335
0
        if (*p == '[') {
336
0
            in_brackets = true;
337
0
        } else if (*p == ']') {
338
0
            in_brackets = false;
339
0
        } else if (*p == ':' && !in_brackets) {
340
0
            n_colons++;
341
0
            colon = p;
342
0
        }
343
0
    }
344
345
0
    *hostp = *portp = NULL;
346
0
    if (n_colons > 1) {
347
0
        *hostp = s;
348
0
    } else {
349
0
        char **tokens[2];
350
0
        tokens[host_index] = hostp;
351
0
        tokens[!host_index] = portp;
352
353
0
        if (colon) {
354
0
            *colon = '\0';
355
0
            *tokens[1] = unbracket(colon + 1);
356
0
        }
357
0
        *tokens[0] = unbracket(s);
358
0
    }
359
0
}
360
361
/* Parses 's', a string in the form "<host>[:<port>]", into its (required) host
362
 * and (optional) port components, and stores pointers to them in '*hostp' and
363
 * '*portp' respectively.  Always sets '*hostp' nonnull, although possibly to
364
 * an empty string.  Can set '*portp' to the null string.
365
 *
366
 * Supports both IPv4 and IPv6.  IPv6 addresses may be quoted with square
367
 * brackets.  Resolves ambiguous cases that might represent an IPv6 address or
368
 * an IPv6 address and a port as representing just a host, e.g. "::1:2:3:4:80"
369
 * is a host but "[::1:2:3:4]:80" is a host and a port.
370
 *
371
 * Modifies 's' and points '*hostp' and '*portp' (if nonnull) into it.
372
 */
373
void
374
inet_parse_host_port_tokens(char *s, char **hostp, char **portp)
375
0
{
376
0
    inet_parse_tokens__(s, 0, hostp, portp);
377
0
}
378
379
/* Parses 's', a string in the form "<port>[:<host>]", into its port and host
380
 * components, and stores pointers to them in '*portp' and '*hostp'
381
 * respectively.  Either '*portp' and '*hostp' (but not both) can end up null.
382
 *
383
 * Supports both IPv4 and IPv6.  IPv6 addresses may be quoted with square
384
 * brackets.  Resolves ambiguous cases that might represent an IPv6 address or
385
 * an IPv6 address and a port as representing just a host, e.g. "::1:2:3:4:80"
386
 * is a host but "[::1:2:3:4]:80" is a host and a port.
387
 *
388
 * Modifies 's' and points '*hostp' and '*portp' (if nonnull) into it.
389
 */
390
void
391
inet_parse_port_host_tokens(char *s, char **portp, char **hostp)
392
0
{
393
0
    inet_parse_tokens__(s, 1, hostp, portp);
394
0
}
395
396
static bool
397
parse_sockaddr_components_dns(struct sockaddr_storage *ss OVS_UNUSED,
398
                              char *host_s,
399
                              const char *port_s OVS_UNUSED,
400
                              uint16_t default_port OVS_UNUSED,
401
                              const char *s OVS_UNUSED)
402
0
{
403
0
    char *tmp_host_s;
404
405
0
    dns_resolve(host_s, &tmp_host_s);
406
0
    if (tmp_host_s != NULL) {
407
0
        parse_sockaddr_components(ss, tmp_host_s, port_s,
408
0
                                  default_port, s, false, NULL);
409
0
        free(tmp_host_s);
410
0
        return true;
411
0
    }
412
0
    return false;
413
0
}
414
415
static bool
416
parse_sockaddr_components(struct sockaddr_storage *ss,
417
                          char *host_s,
418
                          const char *port_s, uint16_t default_port,
419
                          const char *s,
420
                          bool resolve_host, bool *dns_failure)
421
0
{
422
0
    struct sockaddr_in *sin = sin_cast(sa_cast(ss));
423
0
    int port;
424
425
0
    if (dns_failure) {
426
0
        *dns_failure = false;
427
0
    }
428
429
0
    if (port_s && port_s[0]) {
430
0
        if (!str_to_int(port_s, 10, &port) || port < 0 || port > 65535) {
431
0
            VLOG_ERR("%s: bad port number \"%s\"", s, port_s);
432
0
            goto exit;
433
0
        }
434
0
    } else {
435
0
        port = default_port;
436
0
    }
437
438
0
    memset(ss, 0, sizeof *ss);
439
0
    if (host_s && strchr(host_s, ':')) {
440
0
        struct sockaddr_in6 *sin6 = sin6_cast(sa_cast(ss));
441
0
        char *addr = strsep(&host_s, "%");
442
443
0
        sin6->sin6_family = AF_INET6;
444
0
        sin6->sin6_port = htons(port);
445
0
        if (!addr || !*addr || !ipv6_parse(addr, &sin6->sin6_addr)) {
446
0
            goto exit;
447
0
        }
448
449
0
#ifdef HAVE_STRUCT_SOCKADDR_IN6_SIN6_SCOPE_ID
450
0
        char *scope = strsep(&host_s, "%");
451
0
        if (scope && *scope) {
452
0
            if (!scope[strspn(scope, "0123456789")]) {
453
0
                sin6->sin6_scope_id = atoi(scope);
454
0
            } else {
455
0
                sin6->sin6_scope_id = if_nametoindex(scope);
456
0
                if (!sin6->sin6_scope_id) {
457
0
                    VLOG_ERR("%s: bad IPv6 scope \"%s\" (%s)",
458
0
                             s, scope, ovs_strerror(errno));
459
0
                    goto exit;
460
0
                }
461
0
            }
462
0
        }
463
0
#endif
464
0
    } else {
465
0
        sin->sin_family = AF_INET;
466
0
        sin->sin_port = htons(port);
467
0
        if (host_s && !ip_parse(host_s, &sin->sin_addr.s_addr)) {
468
0
            goto resolve;
469
0
        }
470
0
    }
471
472
0
    return true;
473
474
0
resolve:
475
0
    if (resolve_host) {
476
0
        if (parse_sockaddr_components_dns(ss, host_s, port_s,
477
0
                                          default_port, s)) {
478
0
            return true;
479
0
        }
480
0
        if (dns_failure) {
481
0
            *dns_failure = true;
482
0
        }
483
0
    } else {
484
0
        VLOG_ERR("%s: bad IP address \"%s\"", s, host_s);
485
0
    }
486
0
exit:
487
0
    memset(ss, 0, sizeof *ss);
488
0
    return false;
489
0
}
490
491
/* Parses 'target', which should be a string in the format "<host>[:<port>]".
492
 * <host>, which is required, may be an IPv4 address or an IPv6 address
493
 * enclosed in square brackets.  If 'default_port' is nonnegative then <port>
494
 * is optional and defaults to 'default_port' (use 0 to make the kernel choose
495
 * an available port, although this isn't usually appropriate for active
496
 * connections).  If 'default_port' is negative, then <port> is required.
497
 * It resolves the host if 'resolve_host' is true.
498
 *
499
 * On success, returns true and stores the parsed remote address into '*ss'.
500
 * On failure, logs an error, stores zeros into '*ss', and returns false,
501
 * '*dns_failure' indicates if the host resolution failed. */
502
bool
503
inet_parse_active(const char *target_, int default_port,
504
                  struct sockaddr_storage *ss,
505
                  bool resolve_host, bool *dns_failure)
506
0
{
507
0
    char *target = xstrdup(target_);
508
0
    char *port, *host;
509
0
    bool ok;
510
511
0
    inet_parse_host_port_tokens(target, &host, &port);
512
0
    if (!host) {
513
0
        VLOG_ERR("%s: host must be specified", target_);
514
0
        ok = false;
515
0
        if (dns_failure) {
516
0
            *dns_failure = false;
517
0
        }
518
0
    } else if (!port && default_port < 0) {
519
0
        VLOG_ERR("%s: port must be specified", target_);
520
0
        ok = false;
521
0
        if (dns_failure) {
522
0
            *dns_failure = false;
523
0
        }
524
0
    } else {
525
0
        ok = parse_sockaddr_components(ss, host, port, default_port,
526
0
                                       target_, resolve_host, dns_failure);
527
0
    }
528
0
    if (!ok) {
529
0
        memset(ss, 0, sizeof *ss);
530
0
    }
531
0
    free(target);
532
0
    return ok;
533
0
}
534
535
536
/* Opens a non-blocking IPv4 or IPv6 socket of the specified 'style' and
537
 * connects to 'target', which should be a string in the format
538
 * "<host>[:<port>]".  <host>, which is required, may be an IPv4 address or an
539
 * IPv6 address enclosed in square brackets.  If 'default_port' is nonnegative
540
 * then <port> is optional and defaults to 'default_port'.
541
 *
542
 * 'style' should be SOCK_STREAM (for TCP) or SOCK_DGRAM (for UDP).
543
 *
544
 * On success, returns 0 (indicating connection complete) or EAGAIN (indicating
545
 * connection in progress), in which case the new file descriptor is stored
546
 * into '*fdp'.  On failure, returns a positive errno value other than EAGAIN
547
 * and stores -1 into '*fdp'.
548
 *
549
 * If 'ss' is non-null, then on success stores the target address into '*ss'.
550
 *
551
 * 'dscp' becomes the DSCP bits in the IP headers for the new connection.  It
552
 * should be in the range [0, 63] and will automatically be shifted to the
553
 * appropriately place in the IP tos field. */
554
int
555
inet_open_active(int style, const char *target, int default_port,
556
                 struct sockaddr_storage *ssp, int *fdp, uint8_t dscp)
557
0
{
558
0
    struct sockaddr_storage ss;
559
0
    int fd = -1;
560
0
    int error;
561
562
    /* Parse. */
563
0
    if (!inet_parse_active(target, default_port, &ss, true, NULL)) {
564
0
        error = EAFNOSUPPORT;
565
0
        goto exit;
566
0
    }
567
568
    /* Create non-blocking socket. */
569
0
    fd = socket(ss.ss_family, style, 0);
570
0
    if (fd < 0) {
571
0
        error = sock_errno();
572
0
        VLOG_ERR("%s: socket: %s", target, sock_strerror(error));
573
0
        goto exit;
574
0
    }
575
0
    error = set_nonblocking(fd);
576
0
    if (error) {
577
0
        goto exit;
578
0
    }
579
580
    /* The dscp bits must be configured before connect() to ensure that the
581
     * TOS field is set during the connection establishment.  If set after
582
     * connect(), the handshake SYN frames will be sent with a TOS of 0. */
583
0
    error = set_dscp(fd, ss.ss_family, dscp);
584
0
    if (error) {
585
0
        VLOG_ERR("%s: set_dscp: %s", target, sock_strerror(error));
586
0
        goto exit;
587
0
    }
588
589
    /* Connect. */
590
0
    error = connect(fd, (struct sockaddr *) &ss, ss_length(&ss)) == 0
591
0
                    ? 0
592
0
                    : sock_errno();
593
0
    if (error == EINPROGRESS) {
594
0
        error = EAGAIN;
595
0
    }
596
597
0
exit:
598
0
    if (error && error != EAGAIN) {
599
0
        if (ssp) {
600
0
            memset(ssp, 0, sizeof *ssp);
601
0
        }
602
0
        if (fd >= 0) {
603
0
            closesocket(fd);
604
0
            fd = -1;
605
0
        }
606
0
    } else {
607
0
        if (ssp) {
608
0
            *ssp = ss;
609
0
        }
610
0
    }
611
0
    *fdp = fd;
612
0
    return error;
613
0
}
614
615
/* Parses 'target', which should be a string in the format "[<port>][:<host>]":
616
 *
617
 *      - If 'default_port' is -1, then <port> is required.  Otherwise, if
618
 *        <port> is omitted, then 'default_port' is used instead.
619
 *
620
 *      - If <port> (or 'default_port', if used) is 0, then no port is bound
621
 *        and the TCP/IP stack will select a port.
622
 *
623
 *      - <host> is optional.  If supplied, it may be an IPv4 address or an
624
 *        IPv6 address enclosed in square brackets.  If omitted, the IP address
625
 *        is wildcarded.
626
 *
627
 * If successful, stores the address into '*ss' and returns true; otherwise
628
 * zeros '*ss' and returns false. */
629
bool
630
inet_parse_passive(const char *target_, int default_port,
631
                   struct sockaddr_storage *ss,
632
                   bool resolve_host, bool *dns_failure)
633
0
{
634
0
    char *target = xstrdup(target_);
635
0
    char *port, *host;
636
0
    bool ok;
637
638
0
    inet_parse_port_host_tokens(target, &port, &host);
639
0
    if (!port && default_port < 0) {
640
0
        VLOG_ERR("%s: port must be specified", target_);
641
0
        ok = false;
642
0
        if (dns_failure) {
643
0
            *dns_failure = false;
644
0
        }
645
0
    } else {
646
0
        ok = parse_sockaddr_components(ss, host, port, default_port,
647
0
                                       target_, resolve_host, dns_failure);
648
0
    }
649
0
    if (!ok) {
650
0
        memset(ss, 0, sizeof *ss);
651
0
    }
652
0
    free(target);
653
0
    return ok;
654
0
}
655
656
657
/* Opens a non-blocking IPv4 or IPv6 socket of the specified 'style', binds to
658
 * 'target', and listens for incoming connections.  Parses 'target' in the same
659
 * way was inet_parse_passive().
660
 *
661
 * 'style' should be SOCK_STREAM (for TCP) or SOCK_DGRAM (for UDP).
662
 *
663
 * For TCP, the socket will have SO_REUSEADDR turned on.
664
 *
665
 * On success, returns a non-negative file descriptor.  On failure, returns a
666
 * negative errno value.
667
 *
668
 * If 'ss' is non-null, then on success stores the bound address into '*ss'.
669
 *
670
 * 'dscp' becomes the DSCP bits in the IP headers for the new connection.  It
671
 * should be in the range [0, 63] and will automatically be shifted to the
672
 * appropriately place in the IP tos field.
673
 *
674
 * If 'kernel_print_port' is true and the port is dynamically assigned by
675
 * the kernel, print the chosen port. */
676
int
677
inet_open_passive(int style, const char *target, int default_port,
678
                  struct sockaddr_storage *ssp, uint8_t dscp,
679
                  bool kernel_print_port)
680
0
{
681
0
    bool kernel_chooses_port;
682
0
    struct sockaddr_storage ss;
683
0
    int fd = 0, error;
684
0
    unsigned int yes = 1;
685
0
    bool dns_failure;
686
687
0
    if (!inet_parse_passive(target, default_port, &ss, true, &dns_failure)) {
688
0
        if (dns_failure) {
689
            /* DNS failure means asynchronous DNS resolution is in progress,
690
             * or that the name does currently not resolve. */
691
0
            return -EAGAIN;
692
0
        }
693
0
        return -EAFNOSUPPORT;
694
0
    }
695
0
    kernel_chooses_port = ss_get_port(&ss) == 0;
696
697
    /* Create non-blocking socket, set SO_REUSEADDR. */
698
0
    fd = socket(ss.ss_family, style, 0);
699
0
    if (fd < 0) {
700
0
        error = sock_errno();
701
0
        VLOG_ERR("%s: socket: %s", target, sock_strerror(error));
702
0
        return -error;
703
0
    }
704
0
    error = set_nonblocking(fd);
705
0
    if (error) {
706
0
        goto error;
707
0
    }
708
0
    if (style == SOCK_STREAM
709
0
        && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof yes) < 0) {
710
0
        error = sock_errno();
711
0
        VLOG_ERR("%s: setsockopt(SO_REUSEADDR): %s",
712
0
                 target, sock_strerror(error));
713
0
        goto error;
714
0
    }
715
716
    /* Bind. */
717
0
    if (bind(fd, (struct sockaddr *) &ss, ss_length(&ss)) < 0) {
718
0
        error = sock_errno();
719
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
720
0
        VLOG_ERR_RL(&rl, "%s: bind: %s", target, sock_strerror(error));
721
0
        goto error;
722
0
    }
723
724
    /* The dscp bits must be configured before connect() to ensure that the TOS
725
     * field is set during the connection establishment.  If set after
726
     * connect(), the handshake SYN frames will be sent with a TOS of 0. */
727
0
    error = set_dscp(fd, ss.ss_family, dscp);
728
0
    if (error) {
729
0
        VLOG_ERR("%s: set_dscp: %s", target, sock_strerror(error));
730
0
        goto error;
731
0
    }
732
733
    /* Listen. */
734
0
    if (style == SOCK_STREAM && listen(fd, 64) < 0) {
735
0
        error = sock_errno();
736
0
        VLOG_ERR("%s: listen: %s", target, sock_strerror(error));
737
0
        goto error;
738
0
    }
739
740
0
    if (ssp || kernel_chooses_port) {
741
0
        socklen_t ss_len = sizeof ss;
742
0
        if (getsockname(fd, (struct sockaddr *) &ss, &ss_len) < 0) {
743
0
            error = sock_errno();
744
0
            VLOG_ERR("%s: getsockname: %s", target, sock_strerror(error));
745
0
            goto error;
746
0
        }
747
0
        if (kernel_chooses_port && kernel_print_port) {
748
0
            VLOG_INFO("%s: listening on port %"PRIu16,
749
0
                      target, ss_get_port(&ss));
750
0
        }
751
0
        if (ssp) {
752
0
            *ssp = ss;
753
0
        }
754
0
    }
755
756
0
    return fd;
757
758
0
error:
759
0
    if (ssp) {
760
0
        memset(ssp, 0, sizeof *ssp);
761
0
    }
762
0
    closesocket(fd);
763
0
    return -error;
764
0
}
765
766
/* Parses 'target', which may be an IPv4 address or an IPv6 address
767
 * enclosed in square brackets.
768
 *
769
 * On success, returns true and stores the parsed remote address into '*ss'.
770
 * On failure, logs an error, stores zeros into '*ss', and returns false. */
771
bool
772
inet_parse_address(const char *target_, struct sockaddr_storage *ss)
773
0
{
774
0
    char *target = xstrdup(target_);
775
0
    char *host = unbracket(target);
776
0
    bool ok = parse_sockaddr_components(ss, host, NULL, 0,
777
0
                                        target_, false, NULL);
778
0
    if (!ok) {
779
0
        memset(ss, 0, sizeof *ss);
780
0
    }
781
0
    free(target);
782
0
    return ok;
783
0
}
784
785
int
786
read_fully(int fd, void *p_, size_t size, size_t *bytes_read)
787
0
{
788
0
    uint8_t *p = p_;
789
790
0
    *bytes_read = 0;
791
0
    while (size > 0) {
792
0
        ssize_t retval = read(fd, p, size);
793
0
        if (retval > 0) {
794
0
            *bytes_read += retval;
795
0
            size -= retval;
796
0
            p += retval;
797
0
        } else if (retval == 0) {
798
0
            return EOF;
799
0
        } else if (errno != EINTR) {
800
0
            return errno;
801
0
        }
802
0
    }
803
0
    return 0;
804
0
}
805
806
int
807
write_fully(int fd, const void *p_, size_t size, size_t *bytes_written)
808
0
{
809
0
    const uint8_t *p = p_;
810
811
0
    *bytes_written = 0;
812
0
    while (size > 0) {
813
0
        ssize_t retval = write(fd, p, size);
814
0
        if (retval > 0) {
815
0
            *bytes_written += retval;
816
0
            size -= retval;
817
0
            p += retval;
818
0
        } else if (retval == 0) {
819
0
            VLOG_WARN("write returned 0");
820
0
            return EPROTO;
821
0
        } else if (errno != EINTR) {
822
0
            return errno;
823
0
        }
824
0
    }
825
0
    return 0;
826
0
}
827
828
/* Given file name 'file_name', fsyncs the directory in which it is contained.
829
 * Returns 0 if successful, otherwise a positive errno value. */
830
int
831
fsync_parent_dir(const char *file_name)
832
0
{
833
0
    int error = 0;
834
0
    char *dir;
835
0
    int fd;
836
837
0
    dir = dir_name(file_name);
838
0
    fd = open(dir, O_RDONLY);
839
0
    if (fd >= 0) {
840
0
        if (fsync(fd)) {
841
0
            if (errno == EINVAL || errno == EROFS) {
842
                /* This directory does not support synchronization.  Not
843
                 * really an error. */
844
0
            } else {
845
0
                error = errno;
846
0
                VLOG_ERR("%s: fsync failed (%s)", dir, ovs_strerror(error));
847
0
            }
848
0
        }
849
0
        close(fd);
850
0
    } else {
851
0
        error = errno;
852
0
        VLOG_ERR("%s: open failed (%s)", dir, ovs_strerror(error));
853
0
    }
854
0
    free(dir);
855
856
0
    return error;
857
0
}
858
859
/* Obtains the modification time of the file named 'file_name' to the greatest
860
 * supported precision.  If successful, stores the mtime in '*mtime' and
861
 * returns 0.  On error, returns a positive errno value and stores zeros in
862
 * '*mtime'. */
863
int
864
get_mtime(const char *file_name, struct timespec *mtime)
865
0
{
866
0
    struct stat s;
867
868
0
    if (!stat(file_name, &s)) {
869
0
        mtime->tv_sec = s.st_mtime;
870
871
0
#if HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
872
0
        mtime->tv_nsec = s.st_mtim.tv_nsec;
873
#elif HAVE_STRUCT_STAT_ST_MTIMENSEC
874
        mtime->tv_nsec = s.st_mtimensec;
875
#else
876
        mtime->tv_nsec = 0;
877
#endif
878
879
0
        return 0;
880
0
    } else {
881
0
        mtime->tv_sec = mtime->tv_nsec = 0;
882
0
        return errno;
883
0
    }
884
0
}
885
886
static int
887
getsockopt_int(int fd, int level, int option, const char *optname, int *valuep)
888
0
{
889
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 10);
890
0
    socklen_t len;
891
0
    int value;
892
0
    int error;
893
894
0
    len = sizeof value;
895
0
    if (getsockopt(fd, level, option, &value, &len)) {
896
0
        error = sock_errno();
897
0
        VLOG_ERR_RL(&rl, "getsockopt(%s): %s", optname, sock_strerror(error));
898
0
    } else if (len != sizeof value) {
899
0
        error = EINVAL;
900
0
        VLOG_ERR_RL(&rl, "getsockopt(%s): value is %u bytes (expected %"PRIuSIZE")",
901
0
                    optname, (unsigned int) len, sizeof value);
902
0
    } else {
903
0
        error = 0;
904
0
    }
905
906
0
    *valuep = error ? 0 : value;
907
0
    return error;
908
0
}
909
910
static void
911
describe_sockaddr(struct ds *string, int fd,
912
                  int (*getaddr)(int, struct sockaddr *, socklen_t *))
913
0
{
914
0
    struct sockaddr_storage ss;
915
0
    socklen_t len = sizeof ss;
916
917
0
    if (!getaddr(fd, (struct sockaddr *) &ss, &len)) {
918
0
        if (ss.ss_family == AF_INET || ss.ss_family == AF_INET6) {
919
0
            ss_format_address(&ss, string);
920
0
            ds_put_format(string, ":%"PRIu16, ss_get_port(&ss));
921
0
        } else if (ss.ss_family == AF_UNIX) {
922
0
            struct sockaddr_un sun;
923
0
            const char *null;
924
0
            size_t maxlen;
925
926
0
            memcpy(&sun, &ss, sizeof sun);
927
0
            maxlen = len - offsetof(struct sockaddr_un, sun_path);
928
0
            null = memchr(sun.sun_path, '\0', maxlen);
929
0
            ds_put_buffer(string, sun.sun_path,
930
0
                          null ? null - sun.sun_path : maxlen);
931
0
        }
932
0
#ifdef HAVE_NETLINK
933
0
        else if (ss.ss_family == AF_NETLINK) {
934
0
            int protocol;
935
936
/* SO_PROTOCOL was introduced in 2.6.32.  Support it regardless of the version
937
 * of the Linux kernel headers in use at build time. */
938
#ifndef SO_PROTOCOL
939
#define SO_PROTOCOL 38
940
#endif
941
942
0
            if (!getsockopt_int(fd, SOL_SOCKET, SO_PROTOCOL, "SO_PROTOCOL",
943
0
                                &protocol)) {
944
0
                switch (protocol) {
945
0
                case NETLINK_ROUTE:
946
0
                    ds_put_cstr(string, "NETLINK_ROUTE");
947
0
                    break;
948
949
0
                case NETLINK_GENERIC:
950
0
                    ds_put_cstr(string, "NETLINK_GENERIC");
951
0
                    break;
952
953
0
                default:
954
0
                    ds_put_format(string, "AF_NETLINK family %d", protocol);
955
0
                    break;
956
0
                }
957
0
            } else {
958
0
                ds_put_cstr(string, "AF_NETLINK");
959
0
            }
960
0
        }
961
0
#endif
962
0
#if __linux__
963
0
        else if (ss.ss_family == AF_PACKET) {
964
0
            struct sockaddr_ll sll;
965
966
0
            memcpy(&sll, &ss, sizeof sll);
967
0
            ds_put_cstr(string, "AF_PACKET");
968
0
            if (sll.sll_ifindex) {
969
0
                char name[IFNAMSIZ];
970
971
0
                if (if_indextoname(sll.sll_ifindex, name)) {
972
0
                    ds_put_format(string, "(%s)", name);
973
0
                } else {
974
0
                    ds_put_format(string, "(ifindex=%d)", sll.sll_ifindex);
975
0
                }
976
0
            }
977
0
            if (sll.sll_protocol) {
978
0
                ds_put_format(string, "(protocol=0x%"PRIu16")",
979
0
                              ntohs(sll.sll_protocol));
980
0
            }
981
0
        }
982
0
#endif
983
0
        else if (ss.ss_family == AF_UNSPEC) {
984
0
            ds_put_cstr(string, "AF_UNSPEC");
985
0
        } else {
986
0
            ds_put_format(string, "AF_%d", (int) ss.ss_family);
987
0
        }
988
0
    }
989
0
}
990
991
992
#ifdef __linux__
993
static void
994
put_fd_filename(struct ds *string, int fd)
995
0
{
996
0
    char buf[1024];
997
0
    char *linkname;
998
0
    int n;
999
1000
0
    linkname = xasprintf("/proc/self/fd/%d", fd);
1001
0
    n = readlink(linkname, buf, sizeof buf);
1002
0
    if (n > 0) {
1003
0
        ds_put_char(string, ' ');
1004
0
        ds_put_buffer(string, buf, n);
1005
0
        if (n > sizeof buf) {
1006
0
            ds_put_cstr(string, "...");
1007
0
        }
1008
0
    }
1009
0
    free(linkname);
1010
0
}
1011
#endif
1012
1013
/* Returns a malloc()'d string describing 'fd', for use in logging. */
1014
char *
1015
describe_fd(int fd)
1016
0
{
1017
0
    struct ds string;
1018
0
    struct stat s;
1019
1020
0
    ds_init(&string);
1021
0
    if (fstat(fd, &s)) {
1022
0
        ds_put_format(&string, "fstat failed (%s)", ovs_strerror(errno));
1023
0
    } else if (S_ISSOCK(s.st_mode)) {
1024
0
        describe_sockaddr(&string, fd, getsockname);
1025
0
        ds_put_cstr(&string, "<->");
1026
0
        describe_sockaddr(&string, fd, getpeername);
1027
0
    } else {
1028
0
        ds_put_cstr(&string, (isatty(fd) ? "tty"
1029
0
                              : S_ISDIR(s.st_mode) ? "directory"
1030
0
                              : S_ISCHR(s.st_mode) ? "character device"
1031
0
                              : S_ISBLK(s.st_mode) ? "block device"
1032
0
                              : S_ISREG(s.st_mode) ? "file"
1033
0
                              : S_ISFIFO(s.st_mode) ? "FIFO"
1034
0
                              : S_ISLNK(s.st_mode) ? "symbolic link"
1035
0
                              : "unknown"));
1036
0
#ifdef __linux__
1037
0
        put_fd_filename(&string, fd);
1038
0
#endif
1039
0
    }
1040
0
    return ds_steal_cstr(&string);
1041
0
}
1042

1043
/* sockaddr helpers. */
1044
1045
static struct sockaddr_in *
1046
sin_cast(const struct sockaddr *sa)
1047
0
{
1048
0
    return ALIGNED_CAST(struct sockaddr_in *, sa);
1049
0
}
1050
1051
static struct sockaddr_in6 *
1052
sin6_cast(const struct sockaddr *sa)
1053
0
{
1054
0
    return ALIGNED_CAST(struct sockaddr_in6 *, sa);
1055
0
}
1056
1057
/* Returns true if 'sa' represents an IPv4 or IPv6 address, false otherwise. */
1058
bool
1059
sa_is_ip(const struct sockaddr *sa)
1060
0
{
1061
0
    return sa->sa_family == AF_INET || sa->sa_family == AF_INET6;
1062
0
}
1063
1064
/* Returns the IPv4 or IPv6 address in 'sa'.  Returns IPv4 addresses as
1065
 * v6-mapped. */
1066
struct in6_addr
1067
sa_get_address(const struct sockaddr *sa)
1068
0
{
1069
0
    ovs_assert(sa_is_ip(sa));
1070
0
    return (sa->sa_family == AF_INET
1071
0
            ? in6_addr_mapped_ipv4(sin_cast(sa)->sin_addr.s_addr)
1072
0
            : sin6_cast(sa)->sin6_addr);
1073
0
}
1074
1075
/* Returns the IPv4 or IPv6 port in 'sa'. */
1076
uint16_t
1077
sa_get_port(const struct sockaddr *sa)
1078
0
{
1079
0
    ovs_assert(sa_is_ip(sa));
1080
0
    return ntohs(sa->sa_family == AF_INET
1081
0
                 ? sin_cast(sa)->sin_port
1082
0
                 : sin6_cast(sa)->sin6_port);
1083
0
}
1084
1085
/* Returns true if 'name' is safe to include inside a network address field.
1086
 * We want to avoid names that include confusing punctuation, etc. */
1087
static bool OVS_UNUSED
1088
is_safe_name(const char *name)
1089
0
{
1090
0
    if (!name[0] || isdigit((unsigned char) name[0])) {
1091
0
        return false;
1092
0
    }
1093
0
    for (const char *p = name; *p; p++) {
1094
0
        if (!isalnum((unsigned char) *p) && *p != '-' && *p != '_') {
1095
0
            return false;
1096
0
        }
1097
0
    }
1098
0
    return true;
1099
0
}
1100
1101
static void
1102
sa_format_address__(const struct sockaddr *sa,
1103
                    const char *lbrack, const char *rbrack,
1104
                    struct ds *s)
1105
0
{
1106
0
    ovs_assert(sa_is_ip(sa));
1107
0
    if (sa->sa_family == AF_INET) {
1108
0
        ds_put_format(s, IP_FMT, IP_ARGS(sin_cast(sa)->sin_addr.s_addr));
1109
0
    } else {
1110
0
        const struct sockaddr_in6 *sin6 = sin6_cast(sa);
1111
1112
0
        ds_put_cstr(s, lbrack);
1113
0
        ds_reserve(s, s->length + INET6_ADDRSTRLEN);
1114
0
        char *tail = &s->string[s->length];
1115
0
        inet_ntop(AF_INET6, sin6->sin6_addr.s6_addr, tail, INET6_ADDRSTRLEN);
1116
0
        s->length += strlen(tail);
1117
1118
0
#ifdef HAVE_STRUCT_SOCKADDR_IN6_SIN6_SCOPE_ID
1119
0
        uint32_t scope = sin6->sin6_scope_id;
1120
0
        if (scope) {
1121
0
            char namebuf[IF_NAMESIZE];
1122
0
            char *name = if_indextoname(scope, namebuf);
1123
0
            ds_put_char(s, '%');
1124
0
            if (name && is_safe_name(name)) {
1125
0
                ds_put_cstr(s, name);
1126
0
            } else {
1127
0
                ds_put_format(s, "%"PRIu32, scope);
1128
0
            }
1129
0
        }
1130
0
#endif
1131
1132
0
        ds_put_cstr(s, rbrack);
1133
0
    }
1134
0
}
1135
1136
/* Formats the IPv4 or IPv6 address in 'sa' into 's'.  If 'sa' is an IPv6
1137
 * address, puts square brackets around the address. */
1138
void
1139
sa_format_address(const struct sockaddr *sa, struct ds *s)
1140
0
{
1141
0
    sa_format_address__(sa, "[", "]", s);
1142
0
}
1143
1144
/* Formats the IPv4 or IPv6 address in 'sa' into 's'.  Does not add square
1145
 * brackets around IPv6 addresses. */
1146
void
1147
sa_format_address_nobracks(const struct sockaddr *sa, struct ds *s)
1148
0
{
1149
0
    sa_format_address__(sa, "", "", s);
1150
0
}
1151
1152
size_t
1153
sa_length(const struct sockaddr *sa)
1154
0
{
1155
0
    switch (sa->sa_family) {
1156
0
    case AF_INET:
1157
0
        return sizeof(struct sockaddr_in);
1158
1159
0
    case AF_INET6:
1160
0
        return sizeof(struct sockaddr_in6);
1161
1162
0
    default:
1163
0
        OVS_NOT_REACHED();
1164
0
    }
1165
0
}
1166

1167
/* sockaddr_storage helpers.  */
1168
1169
static const struct sockaddr *
1170
sa_cast(const struct sockaddr_storage *ss)
1171
0
{
1172
0
    return ALIGNED_CAST(const struct sockaddr *, ss);
1173
0
}
1174
1175
bool
1176
ss_is_ip(const struct sockaddr_storage *ss)
1177
0
{
1178
0
    return sa_is_ip(sa_cast(ss));
1179
0
}
1180
1181
uint16_t
1182
ss_get_port(const struct sockaddr_storage *ss)
1183
0
{
1184
0
    return sa_get_port(sa_cast(ss));
1185
0
}
1186
1187
struct in6_addr
1188
ss_get_address(const struct sockaddr_storage *ss)
1189
0
{
1190
0
    return sa_get_address(sa_cast(ss));
1191
0
}
1192
1193
void
1194
ss_format_address(const struct sockaddr_storage *ss, struct ds *s)
1195
0
{
1196
0
    sa_format_address(sa_cast(ss), s);
1197
0
}
1198
1199
void
1200
ss_format_address_nobracks(const struct sockaddr_storage *ss, struct ds *s)
1201
0
{
1202
0
    sa_format_address_nobracks(sa_cast(ss), s);
1203
0
}
1204
1205
size_t
1206
ss_length(const struct sockaddr_storage *ss)
1207
0
{
1208
0
    return sa_length(sa_cast(ss));
1209
0
}
1210

1211
const char *
1212
sock_strerror(int error)
1213
0
{
1214
0
    return ovs_strerror(error);
1215
0
}
1216

1217
#ifdef __linux__
1218
static int
1219
emulate_sendmmsg(int fd, struct mmsghdr *msgs, unsigned int n,
1220
                 unsigned int flags)
1221
0
{
1222
0
    for (unsigned int i = 0; i < n; i++) {
1223
0
        ssize_t retval = sendmsg(fd, &msgs[i].msg_hdr, flags);
1224
0
        if (retval < 0) {
1225
0
            return i ? i : retval;
1226
0
        }
1227
0
        msgs[i].msg_len = retval;
1228
0
    }
1229
0
    return n;
1230
0
}
1231
1232
#ifndef HAVE_SENDMMSG
1233
int
1234
sendmmsg(int fd, struct mmsghdr *msgs, unsigned int n, unsigned int flags)
1235
{
1236
    return emulate_sendmmsg(fd, msgs, n, flags);
1237
}
1238
#else
1239
/* sendmmsg was redefined in lib/socket-util.c, should undef sendmmsg here
1240
 * to avoid recursion */
1241
#undef sendmmsg
1242
int
1243
wrap_sendmmsg(int fd, struct mmsghdr *msgs, unsigned int n, unsigned int flags)
1244
0
{
1245
0
    static bool sendmmsg_broken = false;
1246
0
    if (!sendmmsg_broken) {
1247
0
        int save_errno = errno;
1248
0
        int retval = sendmmsg(fd, msgs, n, flags);
1249
0
        if (retval >= 0 || errno != ENOSYS) {
1250
0
            return retval;
1251
0
        }
1252
0
        sendmmsg_broken = true;
1253
0
        errno = save_errno;
1254
0
    }
1255
0
    return emulate_sendmmsg(fd, msgs, n, flags);
1256
0
}
1257
#endif
1258
1259
static int
1260
emulate_recvmmsg(int fd, struct mmsghdr *msgs, unsigned int n,
1261
                 int flags, struct timespec *timeout OVS_UNUSED)
1262
0
{
1263
0
    ovs_assert(!timeout);       /* XXX not emulated */
1264
1265
0
    bool waitforone = flags & MSG_WAITFORONE;
1266
0
    flags &= ~MSG_WAITFORONE;
1267
1268
0
    for (unsigned int i = 0; i < n; i++) {
1269
0
        ssize_t retval = recvmsg(fd, &msgs[i].msg_hdr, flags);
1270
0
        if (retval < 0) {
1271
0
            return i ? i : retval;
1272
0
        }
1273
0
        msgs[i].msg_len = retval;
1274
1275
0
        if (waitforone) {
1276
0
            flags |= MSG_DONTWAIT;
1277
0
        }
1278
0
    }
1279
0
    return n;
1280
0
}
1281
1282
#ifndef HAVE_SENDMMSG
1283
int
1284
recvmmsg(int fd, struct mmsghdr *msgs, unsigned int n,
1285
         int flags, struct timespec *timeout)
1286
{
1287
    return emulate_recvmmsg(fd, msgs, n, flags, timeout);
1288
}
1289
#else
1290
/* recvmmsg was redefined in lib/socket-util.c, should undef recvmmsg here
1291
 * to avoid recursion */
1292
#undef recvmmsg
1293
int
1294
wrap_recvmmsg(int fd, struct mmsghdr *msgs, unsigned int n,
1295
              int flags, struct timespec *timeout)
1296
0
{
1297
0
    ovs_assert(!timeout);       /* XXX not emulated */
1298
1299
0
    static bool recvmmsg_broken = false;
1300
0
    if (!recvmmsg_broken) {
1301
0
        int save_errno = errno;
1302
0
        int retval = recvmmsg(fd, msgs, n, flags, timeout);
1303
0
        if (retval >= 0 || errno != ENOSYS) {
1304
0
            return retval;
1305
0
        }
1306
0
        recvmmsg_broken = true;
1307
0
        errno = save_errno;
1308
0
    }
1309
0
    return emulate_recvmmsg(fd, msgs, n, flags, timeout);
1310
0
}
1311
#endif
1312
#endif /* __linux__ */