Coverage Report

Created: 2023-03-26 07:41

/src/openvswitch/lib/socket-util.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at:
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
#include <config.h>
18
#include "socket-util.h"
19
#include <sys/types.h>
20
#include <netinet/in.h>
21
#include <arpa/inet.h>
22
#include <ctype.h>
23
#include <errno.h>
24
#include <fcntl.h>
25
#include <net/if.h>
26
#include <netdb.h>
27
#include <netinet/tcp.h>
28
#include <poll.h>
29
#include <stddef.h>
30
#include <stdio.h>
31
#include <stdlib.h>
32
#include <string.h>
33
#include <sys/socket.h>
34
#include <sys/stat.h>
35
#include <sys/uio.h>
36
#include <sys/un.h>
37
#include <unistd.h>
38
#include "openvswitch/dynamic-string.h"
39
#include "ovs-thread.h"
40
#include "packets.h"
41
#include "openvswitch/poll-loop.h"
42
#include "util.h"
43
#include "openvswitch/vlog.h"
44
#ifdef __linux__
45
#include <linux/if_packet.h>
46
#endif
47
#ifdef HAVE_NETLINK
48
#include "netlink-protocol.h"
49
#include "netlink-socket.h"
50
#endif
51
#include "dns-resolve.h"
52
53
VLOG_DEFINE_THIS_MODULE(socket_util);
54
55
static int getsockopt_int(int fd, int level, int option, const char *optname,
56
                          int *valuep);
57
static struct sockaddr_in *sin_cast(const struct sockaddr *);
58
static struct sockaddr_in6 *sin6_cast(const struct sockaddr *);
59
static const struct sockaddr *sa_cast(const struct sockaddr_storage *);
60
static bool parse_sockaddr_components(struct sockaddr_storage *ss,
61
                                      char *host_s,
62
                                      const char *port_s,
63
                                      uint16_t default_port,
64
                                      const char *s,
65
                                      bool resolve_host,
66
                                      bool *dns_failure);
67
68
/* Sets 'fd' to non-blocking mode.  Returns 0 if successful, otherwise a
69
 * positive errno value. */
70
int
71
set_nonblocking(int fd)
72
0
{
73
0
#ifndef _WIN32
74
0
    int flags = fcntl(fd, F_GETFL, 0);
75
0
    if (flags != -1) {
76
0
        if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) != -1) {
77
0
            return 0;
78
0
        } else {
79
0
            VLOG_ERR("fcntl(F_SETFL) failed: %s", ovs_strerror(errno));
80
0
            return errno;
81
0
        }
82
0
    } else {
83
0
        VLOG_ERR("fcntl(F_GETFL) failed: %s", ovs_strerror(errno));
84
0
        return errno;
85
0
    }
86
#else
87
    unsigned long arg = 1;
88
    if (ioctlsocket(fd, FIONBIO, &arg)) {
89
        int error = sock_errno();
90
        VLOG_ERR("set_nonblocking failed: %s", sock_strerror(error));
91
        return error;
92
    }
93
    return 0;
94
#endif
95
0
}
96
97
void
98
xset_nonblocking(int fd)
99
0
{
100
0
    if (set_nonblocking(fd)) {
101
0
        exit(EXIT_FAILURE);
102
0
    }
103
0
}
104
105
void
106
setsockopt_tcp_nodelay(int fd)
107
0
{
108
0
    int on = 1;
109
0
    int retval;
110
111
0
    retval = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof on);
112
0
    if (retval) {
113
0
        retval = sock_errno();
114
0
        VLOG_ERR("setsockopt(TCP_NODELAY): %s", sock_strerror(retval));
115
0
    }
116
0
}
117
118
/* Sets the DSCP value of socket 'fd' to 'dscp', which must be 63 or less.
119
 * 'family' must indicate the socket's address family (AF_INET or AF_INET6, to
120
 * do anything useful). */
121
int
122
set_dscp(int fd, int family, uint8_t dscp)
123
0
{
124
0
    int retval;
125
0
    int val;
126
127
#ifdef _WIN32
128
    /* XXX: Consider using QoS2 APIs for Windows to set dscp. */
129
    return 0;
130
#endif
131
132
0
    if (dscp > 63) {
133
0
        return EINVAL;
134
0
    }
135
0
    val = dscp << 2;
136
137
0
    switch (family) {
138
0
    case AF_INET:
139
0
        retval = setsockopt(fd, IPPROTO_IP, IP_TOS, &val, sizeof val);
140
0
        break;
141
142
0
    case AF_INET6:
143
0
        retval = setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &val, sizeof val);
144
0
        break;
145
146
0
    default:
147
0
        return ENOPROTOOPT;
148
0
    }
149
150
0
    return retval ? sock_errno() : 0;
151
0
}
152
153
/* Checks whether 'host_name' is an IPv4 or IPv6 address.  It is assumed
154
 * that 'host_name' is valid.  Returns false if it is IPv4 address, true if
155
 * it is IPv6 address. */
156
bool
157
addr_is_ipv6(const char *host_name)
158
0
{
159
0
    return strchr(host_name, ':') != NULL;
160
0
}
161
162
/* Translates 'host_name', which must be a string representation of an IP
163
 * address, into a numeric IP address in '*addr'.  Returns 0 if successful,
164
 * otherwise a positive errno value. */
165
int
166
lookup_ip(const char *host_name, struct in_addr *addr)
167
0
{
168
0
    if (!ip_parse(host_name, &addr->s_addr)) {
169
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
170
0
        VLOG_ERR_RL(&rl, "\"%s\" is not a valid IP address", host_name);
171
0
        return ENOENT;
172
0
    }
173
0
    return 0;
174
0
}
175
176
/* Translates 'host_name', which must be a string representation of an IPv6
177
 * address, into a numeric IPv6 address in '*addr'.  Returns 0 if successful,
178
 * otherwise a positive errno value. */
179
int
180
lookup_ipv6(const char *host_name, struct in6_addr *addr)
181
0
{
182
0
    if (!ipv6_parse(host_name, addr)) {
183
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
184
0
        VLOG_ERR_RL(&rl, "\"%s\" is not a valid IPv6 address", host_name);
185
0
        return ENOENT;
186
0
    }
187
0
    return 0;
188
0
}
189
190
/* Translates 'host_name', which must be a host name or a string representation
191
 * of an IP address, into a numeric IP address in '*addr'.  Returns 0 if
192
 * successful, otherwise a positive errno value.
193
 *
194
 * Most Open vSwitch code should not use this because it causes deadlocks:
195
 * getaddrinfo() sends out a DNS request but that starts a new flow for which
196
 * OVS must set up a flow, but it can't because it's waiting for a DNS reply.
197
 * The synchronous lookup also delays other activity.  (Of course we can solve
198
 * this but it doesn't seem worthwhile quite yet.)  */
199
int
200
lookup_hostname(const char *host_name, struct in_addr *addr)
201
0
{
202
0
    struct addrinfo *result;
203
0
    struct addrinfo hints;
204
205
0
    if (ip_parse(host_name, &addr->s_addr)) {
206
0
        return 0;
207
0
    }
208
209
0
    memset(&hints, 0, sizeof hints);
210
0
    hints.ai_family = AF_INET;
211
212
0
    switch (getaddrinfo(host_name, NULL, &hints, &result)) {
213
0
    case 0:
214
0
        *addr = ALIGNED_CAST(struct sockaddr_in *,
215
0
                             result->ai_addr)->sin_addr;
216
0
        freeaddrinfo(result);
217
0
        return 0;
218
219
0
#ifdef EAI_ADDRFAMILY
220
0
    case EAI_ADDRFAMILY:
221
0
#endif
222
0
    case EAI_NONAME:
223
0
    case EAI_SERVICE:
224
0
        return ENOENT;
225
226
0
    case EAI_AGAIN:
227
0
        return EAGAIN;
228
229
0
    case EAI_BADFLAGS:
230
0
    case EAI_FAMILY:
231
0
    case EAI_SOCKTYPE:
232
0
        return EINVAL;
233
234
0
    case EAI_FAIL:
235
0
        return EIO;
236
237
0
    case EAI_MEMORY:
238
0
        return ENOMEM;
239
240
0
#if defined (EAI_NODATA) && EAI_NODATA != EAI_NONAME
241
0
    case EAI_NODATA:
242
0
        return ENXIO;
243
0
#endif
244
245
0
#ifdef EAI_SYSTEM
246
0
    case EAI_SYSTEM:
247
0
        return sock_errno();
248
0
#endif
249
250
0
    default:
251
0
        return EPROTO;
252
0
    }
253
0
}
254
255
int
256
check_connection_completion(int fd)
257
0
{
258
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 10);
259
0
    struct pollfd pfd;
260
0
    int retval;
261
262
0
    pfd.fd = fd;
263
0
    pfd.events = POLLOUT;
264
265
0
#ifndef _WIN32
266
0
    do {
267
0
        retval = poll(&pfd, 1, 0);
268
0
    } while (retval < 0 && errno == EINTR);
269
#else
270
    fd_set wrset, exset;
271
    FD_ZERO(&wrset);
272
    FD_ZERO(&exset);
273
    FD_SET(fd, &exset);
274
    FD_SET(fd, &wrset);
275
    pfd.revents = 0;
276
    struct timeval tv = { 0, 0 };
277
    /* WSAPoll is broken on Windows, instead do a select */
278
    retval = select(0, NULL, &wrset, &exset, &tv);
279
    if (retval == 1) {
280
        if (FD_ISSET(fd, &wrset)) {
281
            pfd.revents |= pfd.events;
282
        }
283
        if (FD_ISSET(fd, &exset)) {
284
            pfd.revents |= POLLERR;
285
        }
286
    }
287
#endif
288
0
    if (retval == 1) {
289
0
        if (pfd.revents & (POLLERR | POLLHUP)) {
290
0
            ssize_t n = send(fd, "", 1, 0);
291
0
            if (n < 0) {
292
0
                return sock_errno();
293
0
            } else {
294
0
                VLOG_ERR_RL(&rl, "poll return POLLERR but send succeeded");
295
0
                return EPROTO;
296
0
            }
297
0
        }
298
0
        return 0;
299
0
    } else if (retval < 0) {
300
0
        VLOG_ERR_RL(&rl, "poll: %s", sock_strerror(sock_errno()));
301
0
        return errno;
302
0
    } else {
303
0
        return EAGAIN;
304
0
    }
305
0
}
306
307
/* Returns the size of socket 'sock''s receive buffer (SO_RCVBUF), or a
308
 * negative errno value if an error occurs. */
309
int
310
get_socket_rcvbuf(int sock)
311
0
{
312
0
    int rcvbuf;
313
0
    int error;
314
315
0
    error = getsockopt_int(sock, SOL_SOCKET, SO_RCVBUF, "SO_RCVBUF", &rcvbuf);
316
0
    return error ? -error : rcvbuf;
317
0
}
318
319
/* Reads and discards up to 'n' datagrams from 'fd', stopping as soon as no
320
 * more data can be immediately read.  ('fd' should therefore be in
321
 * non-blocking mode.)*/
322
void
323
drain_fd(int fd, size_t n_packets)
324
0
{
325
0
    for (; n_packets > 0; n_packets--) {
326
        /* 'buffer' only needs to be 1 byte long in most circumstances.  This
327
         * size is defensive against the possibility that we someday want to
328
         * use a Linux tap device without TUN_NO_PI, in which case a buffer
329
         * smaller than sizeof(struct tun_pi) will give EINVAL on read. */
330
0
        char buffer[128];
331
0
        if (read(fd, buffer, sizeof buffer) <= 0) {
332
0
            break;
333
0
        }
334
0
    }
335
0
}
336
337
ovs_be32
338
guess_netmask(ovs_be32 ip_)
339
0
{
340
0
    uint32_t ip = ntohl(ip_);
341
    return ((ip >> 31) == 0 ? htonl(0xff000000)   /* Class A */
342
            : (ip >> 30) == 2 ? htonl(0xffff0000) /* Class B */
343
            : (ip >> 29) == 6 ? htonl(0xffffff00) /* Class C */
344
0
            : htonl(0));                          /* ??? */
345
0
}
346
347
static char *
348
unbracket(char *s)
349
0
{
350
0
    if (*s == '[') {
351
0
        s++;
352
353
0
        char *end = strchr(s, '\0');
354
0
        if (end[-1] == ']') {
355
0
            end[-1] = '\0';
356
0
        }
357
0
    }
358
0
    return s;
359
0
}
360
361
/* 'host_index' is 0 if the host precedes the port within 's', 1 otherwise. */
362
static void
363
inet_parse_tokens__(char *s, int host_index, char **hostp, char **portp)
364
0
{
365
0
    char *colon = NULL;
366
0
    bool in_brackets = false;
367
0
    int n_colons = 0;
368
0
    for (char *p = s; *p; p++) {
369
0
        if (*p == '[') {
370
0
            in_brackets = true;
371
0
        } else if (*p == ']') {
372
0
            in_brackets = false;
373
0
        } else if (*p == ':' && !in_brackets) {
374
0
            n_colons++;
375
0
            colon = p;
376
0
        }
377
0
    }
378
379
0
    *hostp = *portp = NULL;
380
0
    if (n_colons > 1) {
381
0
        *hostp = s;
382
0
    } else {
383
0
        char **tokens[2];
384
0
        tokens[host_index] = hostp;
385
0
        tokens[!host_index] = portp;
386
387
0
        if (colon) {
388
0
            *colon = '\0';
389
0
            *tokens[1] = unbracket(colon + 1);
390
0
        }
391
0
        *tokens[0] = unbracket(s);
392
0
    }
393
0
}
394
395
/* Parses 's', a string in the form "<host>[:<port>]", into its (required) host
396
 * and (optional) port components, and stores pointers to them in '*hostp' and
397
 * '*portp' respectively.  Always sets '*hostp' nonnull, although possibly to
398
 * an empty string.  Can set '*portp' to the null string.
399
 *
400
 * Supports both IPv4 and IPv6.  IPv6 addresses may be quoted with square
401
 * brackets.  Resolves ambiguous cases that might represent an IPv6 address or
402
 * an IPv6 address and a port as representing just a host, e.g. "::1:2:3:4:80"
403
 * is a host but "[::1:2:3:4]:80" is a host and a port.
404
 *
405
 * Modifies 's' and points '*hostp' and '*portp' (if nonnull) into it.
406
 */
407
void
408
inet_parse_host_port_tokens(char *s, char **hostp, char **portp)
409
0
{
410
0
    inet_parse_tokens__(s, 0, hostp, portp);
411
0
}
412
413
/* Parses 's', a string in the form "<port>[:<host>]", into its port and host
414
 * components, and stores pointers to them in '*portp' and '*hostp'
415
 * respectively.  Either '*portp' and '*hostp' (but not both) can end up null.
416
 *
417
 * Supports both IPv4 and IPv6.  IPv6 addresses may be quoted with square
418
 * brackets.  Resolves ambiguous cases that might represent an IPv6 address or
419
 * an IPv6 address and a port as representing just a host, e.g. "::1:2:3:4:80"
420
 * is a host but "[::1:2:3:4]:80" is a host and a port.
421
 *
422
 * Modifies 's' and points '*hostp' and '*portp' (if nonnull) into it.
423
 */
424
void
425
inet_parse_port_host_tokens(char *s, char **portp, char **hostp)
426
0
{
427
0
    inet_parse_tokens__(s, 1, hostp, portp);
428
0
}
429
430
static bool
431
parse_sockaddr_components_dns(struct sockaddr_storage *ss OVS_UNUSED,
432
                              char *host_s,
433
                              const char *port_s OVS_UNUSED,
434
                              uint16_t default_port OVS_UNUSED,
435
                              const char *s OVS_UNUSED)
436
0
{
437
0
    char *tmp_host_s;
438
439
0
    dns_resolve(host_s, &tmp_host_s);
440
0
    if (tmp_host_s != NULL) {
441
0
        parse_sockaddr_components(ss, tmp_host_s, port_s,
442
0
                                  default_port, s, false, NULL);
443
0
        free(tmp_host_s);
444
0
        return true;
445
0
    }
446
0
    return false;
447
0
}
448
449
static bool
450
parse_sockaddr_components(struct sockaddr_storage *ss,
451
                          char *host_s,
452
                          const char *port_s, uint16_t default_port,
453
                          const char *s,
454
                          bool resolve_host, bool *dns_failure)
455
0
{
456
0
    struct sockaddr_in *sin = sin_cast(sa_cast(ss));
457
0
    int port;
458
459
0
    if (dns_failure) {
460
0
        *dns_failure = false;
461
0
    }
462
463
0
    if (port_s && port_s[0]) {
464
0
        if (!str_to_int(port_s, 10, &port) || port < 0 || port > 65535) {
465
0
            VLOG_ERR("%s: bad port number \"%s\"", s, port_s);
466
0
            goto exit;
467
0
        }
468
0
    } else {
469
0
        port = default_port;
470
0
    }
471
472
0
    memset(ss, 0, sizeof *ss);
473
0
    if (host_s && strchr(host_s, ':')) {
474
0
        struct sockaddr_in6 *sin6 = sin6_cast(sa_cast(ss));
475
0
        char *addr = strsep(&host_s, "%");
476
477
0
        sin6->sin6_family = AF_INET6;
478
0
        sin6->sin6_port = htons(port);
479
0
        if (!addr || !*addr || !ipv6_parse(addr, &sin6->sin6_addr)) {
480
0
            goto exit;
481
0
        }
482
483
0
#ifdef HAVE_STRUCT_SOCKADDR_IN6_SIN6_SCOPE_ID
484
0
        char *scope = strsep(&host_s, "%");
485
0
        if (scope && *scope) {
486
0
            if (!scope[strspn(scope, "0123456789")]) {
487
0
                sin6->sin6_scope_id = atoi(scope);
488
0
            } else {
489
0
                sin6->sin6_scope_id = if_nametoindex(scope);
490
0
                if (!sin6->sin6_scope_id) {
491
0
                    VLOG_ERR("%s: bad IPv6 scope \"%s\" (%s)",
492
0
                             s, scope, ovs_strerror(errno));
493
0
                    goto exit;
494
0
                }
495
0
            }
496
0
        }
497
0
#endif
498
0
    } else {
499
0
        sin->sin_family = AF_INET;
500
0
        sin->sin_port = htons(port);
501
0
        if (host_s && !ip_parse(host_s, &sin->sin_addr.s_addr)) {
502
0
            goto resolve;
503
0
        }
504
0
    }
505
506
0
    return true;
507
508
0
resolve:
509
0
    if (resolve_host) {
510
0
        if (parse_sockaddr_components_dns(ss, host_s, port_s,
511
0
                                          default_port, s)) {
512
0
            return true;
513
0
        }
514
0
        if (dns_failure) {
515
0
            *dns_failure = true;
516
0
        }
517
0
    } else {
518
0
        VLOG_ERR("%s: bad IP address \"%s\"", s, host_s);
519
0
    }
520
0
exit:
521
0
    memset(ss, 0, sizeof *ss);
522
0
    return false;
523
0
}
524
525
/* Parses 'target', which should be a string in the format "<host>[:<port>]".
526
 * <host>, which is required, may be an IPv4 address or an IPv6 address
527
 * enclosed in square brackets.  If 'default_port' is nonnegative then <port>
528
 * is optional and defaults to 'default_port' (use 0 to make the kernel choose
529
 * an available port, although this isn't usually appropriate for active
530
 * connections).  If 'default_port' is negative, then <port> is required.
531
 * It resolves the host if 'resolve_host' is true.
532
 *
533
 * On success, returns true and stores the parsed remote address into '*ss'.
534
 * On failure, logs an error, stores zeros into '*ss', and returns false,
535
 * '*dns_failure' indicates if the host resolution failed. */
536
bool
537
inet_parse_active(const char *target_, int default_port,
538
                  struct sockaddr_storage *ss,
539
                  bool resolve_host, bool *dns_failure)
540
0
{
541
0
    char *target = xstrdup(target_);
542
0
    char *port, *host;
543
0
    bool ok;
544
545
0
    inet_parse_host_port_tokens(target, &host, &port);
546
0
    if (!host) {
547
0
        VLOG_ERR("%s: host must be specified", target_);
548
0
        ok = false;
549
0
    } else if (!port && default_port < 0) {
550
0
        VLOG_ERR("%s: port must be specified", target_);
551
0
        ok = false;
552
0
    } else {
553
0
        ok = parse_sockaddr_components(ss, host, port, default_port,
554
0
                                       target_, resolve_host, dns_failure);
555
0
    }
556
0
    if (!ok) {
557
0
        memset(ss, 0, sizeof *ss);
558
0
    }
559
0
    free(target);
560
0
    return ok;
561
0
}
562
563
564
/* Opens a non-blocking IPv4 or IPv6 socket of the specified 'style' and
565
 * connects to 'target', which should be a string in the format
566
 * "<host>[:<port>]".  <host>, which is required, may be an IPv4 address or an
567
 * IPv6 address enclosed in square brackets.  If 'default_port' is nonnegative
568
 * then <port> is optional and defaults to 'default_port'.
569
 *
570
 * 'style' should be SOCK_STREAM (for TCP) or SOCK_DGRAM (for UDP).
571
 *
572
 * On success, returns 0 (indicating connection complete) or EAGAIN (indicating
573
 * connection in progress), in which case the new file descriptor is stored
574
 * into '*fdp'.  On failure, returns a positive errno value other than EAGAIN
575
 * and stores -1 into '*fdp'.
576
 *
577
 * If 'ss' is non-null, then on success stores the target address into '*ss'.
578
 *
579
 * 'dscp' becomes the DSCP bits in the IP headers for the new connection.  It
580
 * should be in the range [0, 63] and will automatically be shifted to the
581
 * appropriately place in the IP tos field. */
582
int
583
inet_open_active(int style, const char *target, int default_port,
584
                 struct sockaddr_storage *ssp, int *fdp, uint8_t dscp)
585
0
{
586
0
    struct sockaddr_storage ss;
587
0
    int fd = -1;
588
0
    int error;
589
590
    /* Parse. */
591
0
    if (!inet_parse_active(target, default_port, &ss, true, NULL)) {
592
0
        error = EAFNOSUPPORT;
593
0
        goto exit;
594
0
    }
595
596
    /* Create non-blocking socket. */
597
0
    fd = socket(ss.ss_family, style, 0);
598
0
    if (fd < 0) {
599
0
        error = sock_errno();
600
0
        VLOG_ERR("%s: socket: %s", target, sock_strerror(error));
601
0
        goto exit;
602
0
    }
603
0
    error = set_nonblocking(fd);
604
0
    if (error) {
605
0
        goto exit;
606
0
    }
607
608
    /* The dscp bits must be configured before connect() to ensure that the
609
     * TOS field is set during the connection establishment.  If set after
610
     * connect(), the handshake SYN frames will be sent with a TOS of 0. */
611
0
    error = set_dscp(fd, ss.ss_family, dscp);
612
0
    if (error) {
613
0
        VLOG_ERR("%s: set_dscp: %s", target, sock_strerror(error));
614
0
        goto exit;
615
0
    }
616
617
    /* Connect. */
618
0
    error = connect(fd, (struct sockaddr *) &ss, ss_length(&ss)) == 0
619
0
                    ? 0
620
0
                    : sock_errno();
621
0
    if (error == EINPROGRESS
622
#ifdef _WIN32
623
        || error == WSAEALREADY || error == WSAEWOULDBLOCK
624
#endif
625
0
        ) {
626
0
        error = EAGAIN;
627
0
    }
628
629
0
exit:
630
0
    if (error && error != EAGAIN) {
631
0
        if (ssp) {
632
0
            memset(ssp, 0, sizeof *ssp);
633
0
        }
634
0
        if (fd >= 0) {
635
0
            closesocket(fd);
636
0
            fd = -1;
637
0
        }
638
0
    } else {
639
0
        if (ssp) {
640
0
            *ssp = ss;
641
0
        }
642
0
    }
643
0
    *fdp = fd;
644
0
    return error;
645
0
}
646
647
/* Parses 'target', which should be a string in the format "[<port>][:<host>]":
648
 *
649
 *      - If 'default_port' is -1, then <port> is required.  Otherwise, if
650
 *        <port> is omitted, then 'default_port' is used instead.
651
 *
652
 *      - If <port> (or 'default_port', if used) is 0, then no port is bound
653
 *        and the TCP/IP stack will select a port.
654
 *
655
 *      - <host> is optional.  If supplied, it may be an IPv4 address or an
656
 *        IPv6 address enclosed in square brackets.  If omitted, the IP address
657
 *        is wildcarded.
658
 *
659
 * If successful, stores the address into '*ss' and returns true; otherwise
660
 * zeros '*ss' and returns false. */
661
bool
662
inet_parse_passive(const char *target_, int default_port,
663
                   struct sockaddr_storage *ss,
664
                   bool resolve_host, bool *dns_failure)
665
0
{
666
0
    char *target = xstrdup(target_);
667
0
    char *port, *host;
668
0
    bool ok;
669
670
0
    inet_parse_port_host_tokens(target, &port, &host);
671
0
    if (!port && default_port < 0) {
672
0
        VLOG_ERR("%s: port must be specified", target_);
673
0
        ok = false;
674
0
    } else {
675
0
        ok = parse_sockaddr_components(ss, host, port, default_port,
676
0
                                       target_, resolve_host, dns_failure);
677
0
    }
678
0
    if (!ok) {
679
0
        memset(ss, 0, sizeof *ss);
680
0
    }
681
0
    free(target);
682
0
    return ok;
683
0
}
684
685
686
/* Opens a non-blocking IPv4 or IPv6 socket of the specified 'style', binds to
687
 * 'target', and listens for incoming connections.  Parses 'target' in the same
688
 * way was inet_parse_passive().
689
 *
690
 * 'style' should be SOCK_STREAM (for TCP) or SOCK_DGRAM (for UDP).
691
 *
692
 * For TCP, the socket will have SO_REUSEADDR turned on.
693
 *
694
 * On success, returns a non-negative file descriptor.  On failure, returns a
695
 * negative errno value.
696
 *
697
 * If 'ss' is non-null, then on success stores the bound address into '*ss'.
698
 *
699
 * 'dscp' becomes the DSCP bits in the IP headers for the new connection.  It
700
 * should be in the range [0, 63] and will automatically be shifted to the
701
 * appropriately place in the IP tos field.
702
 *
703
 * If 'kernel_print_port' is true and the port is dynamically assigned by
704
 * the kernel, print the chosen port. */
705
int
706
inet_open_passive(int style, const char *target, int default_port,
707
                  struct sockaddr_storage *ssp, uint8_t dscp,
708
                  bool kernel_print_port)
709
0
{
710
0
    bool kernel_chooses_port;
711
0
    struct sockaddr_storage ss;
712
0
    int fd = 0, error;
713
0
    unsigned int yes = 1;
714
0
    bool dns_failure;
715
716
0
    if (!inet_parse_passive(target, default_port, &ss, true, &dns_failure)) {
717
0
        if (dns_failure) {
718
            /* DNS failure means asynchronous DNS resolution is in progress,
719
             * or that the name does currently not resolve. */
720
0
            return -EAGAIN;
721
0
        }
722
0
        return -EAFNOSUPPORT;
723
0
    }
724
0
    kernel_chooses_port = ss_get_port(&ss) == 0;
725
726
    /* Create non-blocking socket, set SO_REUSEADDR. */
727
0
    fd = socket(ss.ss_family, style, 0);
728
0
    if (fd < 0) {
729
0
        error = sock_errno();
730
0
        VLOG_ERR("%s: socket: %s", target, sock_strerror(error));
731
0
        return -error;
732
0
    }
733
0
    error = set_nonblocking(fd);
734
0
    if (error) {
735
0
        goto error;
736
0
    }
737
0
    if (style == SOCK_STREAM
738
0
        && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof yes) < 0) {
739
0
        error = sock_errno();
740
0
        VLOG_ERR("%s: setsockopt(SO_REUSEADDR): %s",
741
0
                 target, sock_strerror(error));
742
0
        goto error;
743
0
    }
744
745
    /* Bind. */
746
0
    if (bind(fd, (struct sockaddr *) &ss, ss_length(&ss)) < 0) {
747
0
        error = sock_errno();
748
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
749
0
        VLOG_ERR_RL(&rl, "%s: bind: %s", target, sock_strerror(error));
750
0
        goto error;
751
0
    }
752
753
    /* The dscp bits must be configured before connect() to ensure that the TOS
754
     * field is set during the connection establishment.  If set after
755
     * connect(), the handshake SYN frames will be sent with a TOS of 0. */
756
0
    error = set_dscp(fd, ss.ss_family, dscp);
757
0
    if (error) {
758
0
        VLOG_ERR("%s: set_dscp: %s", target, sock_strerror(error));
759
0
        goto error;
760
0
    }
761
762
    /* Listen. */
763
0
    if (style == SOCK_STREAM && listen(fd, 10) < 0) {
764
0
        error = sock_errno();
765
0
        VLOG_ERR("%s: listen: %s", target, sock_strerror(error));
766
0
        goto error;
767
0
    }
768
769
0
    if (ssp || kernel_chooses_port) {
770
0
        socklen_t ss_len = sizeof ss;
771
0
        if (getsockname(fd, (struct sockaddr *) &ss, &ss_len) < 0) {
772
0
            error = sock_errno();
773
0
            VLOG_ERR("%s: getsockname: %s", target, sock_strerror(error));
774
0
            goto error;
775
0
        }
776
0
        if (kernel_chooses_port && kernel_print_port) {
777
0
            VLOG_INFO("%s: listening on port %"PRIu16,
778
0
                      target, ss_get_port(&ss));
779
0
        }
780
0
        if (ssp) {
781
0
            *ssp = ss;
782
0
        }
783
0
    }
784
785
0
    return fd;
786
787
0
error:
788
0
    if (ssp) {
789
0
        memset(ssp, 0, sizeof *ssp);
790
0
    }
791
0
    closesocket(fd);
792
0
    return -error;
793
0
}
794
795
/* Parses 'target', which may be an IPv4 address or an IPv6 address
796
 * enclosed in square brackets.
797
 *
798
 * On success, returns true and stores the parsed remote address into '*ss'.
799
 * On failure, logs an error, stores zeros into '*ss', and returns false. */
800
bool
801
inet_parse_address(const char *target_, struct sockaddr_storage *ss)
802
0
{
803
0
    char *target = xstrdup(target_);
804
0
    char *host = unbracket(target);
805
0
    bool ok = parse_sockaddr_components(ss, host, NULL, 0,
806
0
                                        target_, false, NULL);
807
0
    if (!ok) {
808
0
        memset(ss, 0, sizeof *ss);
809
0
    }
810
0
    free(target);
811
0
    return ok;
812
0
}
813
814
int
815
read_fully(int fd, void *p_, size_t size, size_t *bytes_read)
816
0
{
817
0
    uint8_t *p = p_;
818
819
0
    *bytes_read = 0;
820
0
    while (size > 0) {
821
0
        ssize_t retval = read(fd, p, size);
822
0
        if (retval > 0) {
823
0
            *bytes_read += retval;
824
0
            size -= retval;
825
0
            p += retval;
826
0
        } else if (retval == 0) {
827
0
            return EOF;
828
0
        } else if (errno != EINTR) {
829
0
            return errno;
830
0
        }
831
0
    }
832
0
    return 0;
833
0
}
834
835
int
836
write_fully(int fd, const void *p_, size_t size, size_t *bytes_written)
837
0
{
838
0
    const uint8_t *p = p_;
839
840
0
    *bytes_written = 0;
841
0
    while (size > 0) {
842
0
        ssize_t retval = write(fd, p, size);
843
0
        if (retval > 0) {
844
0
            *bytes_written += retval;
845
0
            size -= retval;
846
0
            p += retval;
847
0
        } else if (retval == 0) {
848
0
            VLOG_WARN("write returned 0");
849
0
            return EPROTO;
850
0
        } else if (errno != EINTR) {
851
0
            return errno;
852
0
        }
853
0
    }
854
0
    return 0;
855
0
}
856
857
/* Given file name 'file_name', fsyncs the directory in which it is contained.
858
 * Returns 0 if successful, otherwise a positive errno value. */
859
int
860
fsync_parent_dir(const char *file_name)
861
0
{
862
0
    int error = 0;
863
0
#ifndef _WIN32
864
0
    char *dir;
865
0
    int fd;
866
867
0
    dir = dir_name(file_name);
868
0
    fd = open(dir, O_RDONLY);
869
0
    if (fd >= 0) {
870
0
        if (fsync(fd)) {
871
0
            if (errno == EINVAL || errno == EROFS) {
872
                /* This directory does not support synchronization.  Not
873
                 * really an error. */
874
0
            } else {
875
0
                error = errno;
876
0
                VLOG_ERR("%s: fsync failed (%s)", dir, ovs_strerror(error));
877
0
            }
878
0
        }
879
0
        close(fd);
880
0
    } else {
881
0
        error = errno;
882
0
        VLOG_ERR("%s: open failed (%s)", dir, ovs_strerror(error));
883
0
    }
884
0
    free(dir);
885
0
#endif
886
887
0
    return error;
888
0
}
889
890
/* Obtains the modification time of the file named 'file_name' to the greatest
891
 * supported precision.  If successful, stores the mtime in '*mtime' and
892
 * returns 0.  On error, returns a positive errno value and stores zeros in
893
 * '*mtime'. */
894
int
895
get_mtime(const char *file_name, struct timespec *mtime)
896
0
{
897
0
    struct stat s;
898
899
0
    if (!stat(file_name, &s)) {
900
0
        mtime->tv_sec = s.st_mtime;
901
902
0
#if HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
903
0
        mtime->tv_nsec = s.st_mtim.tv_nsec;
904
#elif HAVE_STRUCT_STAT_ST_MTIMENSEC
905
        mtime->tv_nsec = s.st_mtimensec;
906
#else
907
        mtime->tv_nsec = 0;
908
#endif
909
910
0
        return 0;
911
0
    } else {
912
0
        mtime->tv_sec = mtime->tv_nsec = 0;
913
0
        return errno;
914
0
    }
915
0
}
916
917
static int
918
getsockopt_int(int fd, int level, int option, const char *optname, int *valuep)
919
0
{
920
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 10);
921
0
    socklen_t len;
922
0
    int value;
923
0
    int error;
924
925
0
    len = sizeof value;
926
0
    if (getsockopt(fd, level, option, &value, &len)) {
927
0
        error = sock_errno();
928
0
        VLOG_ERR_RL(&rl, "getsockopt(%s): %s", optname, sock_strerror(error));
929
0
    } else if (len != sizeof value) {
930
0
        error = EINVAL;
931
0
        VLOG_ERR_RL(&rl, "getsockopt(%s): value is %u bytes (expected %"PRIuSIZE")",
932
0
                    optname, (unsigned int) len, sizeof value);
933
0
    } else {
934
0
        error = 0;
935
0
    }
936
937
0
    *valuep = error ? 0 : value;
938
0
    return error;
939
0
}
940
941
static void
942
describe_sockaddr(struct ds *string, int fd,
943
                  int (*getaddr)(int, struct sockaddr *, socklen_t *))
944
0
{
945
0
    struct sockaddr_storage ss;
946
0
    socklen_t len = sizeof ss;
947
948
0
    if (!getaddr(fd, (struct sockaddr *) &ss, &len)) {
949
0
        if (ss.ss_family == AF_INET || ss.ss_family == AF_INET6) {
950
0
            ss_format_address(&ss, string);
951
0
            ds_put_format(string, ":%"PRIu16, ss_get_port(&ss));
952
0
#ifndef _WIN32
953
0
        } else if (ss.ss_family == AF_UNIX) {
954
0
            struct sockaddr_un sun;
955
0
            const char *null;
956
0
            size_t maxlen;
957
958
0
            memcpy(&sun, &ss, sizeof sun);
959
0
            maxlen = len - offsetof(struct sockaddr_un, sun_path);
960
0
            null = memchr(sun.sun_path, '\0', maxlen);
961
0
            ds_put_buffer(string, sun.sun_path,
962
0
                          null ? null - sun.sun_path : maxlen);
963
0
#endif
964
0
        }
965
0
#ifdef HAVE_NETLINK
966
0
        else if (ss.ss_family == AF_NETLINK) {
967
0
            int protocol;
968
969
/* SO_PROTOCOL was introduced in 2.6.32.  Support it regardless of the version
970
 * of the Linux kernel headers in use at build time. */
971
#ifndef SO_PROTOCOL
972
#define SO_PROTOCOL 38
973
#endif
974
975
0
            if (!getsockopt_int(fd, SOL_SOCKET, SO_PROTOCOL, "SO_PROTOCOL",
976
0
                                &protocol)) {
977
0
                switch (protocol) {
978
0
                case NETLINK_ROUTE:
979
0
                    ds_put_cstr(string, "NETLINK_ROUTE");
980
0
                    break;
981
982
0
                case NETLINK_GENERIC:
983
0
                    ds_put_cstr(string, "NETLINK_GENERIC");
984
0
                    break;
985
986
0
                default:
987
0
                    ds_put_format(string, "AF_NETLINK family %d", protocol);
988
0
                    break;
989
0
                }
990
0
            } else {
991
0
                ds_put_cstr(string, "AF_NETLINK");
992
0
            }
993
0
        }
994
0
#endif
995
0
#if __linux__
996
0
        else if (ss.ss_family == AF_PACKET) {
997
0
            struct sockaddr_ll sll;
998
999
0
            memcpy(&sll, &ss, sizeof sll);
1000
0
            ds_put_cstr(string, "AF_PACKET");
1001
0
            if (sll.sll_ifindex) {
1002
0
                char name[IFNAMSIZ];
1003
1004
0
                if (if_indextoname(sll.sll_ifindex, name)) {
1005
0
                    ds_put_format(string, "(%s)", name);
1006
0
                } else {
1007
0
                    ds_put_format(string, "(ifindex=%d)", sll.sll_ifindex);
1008
0
                }
1009
0
            }
1010
0
            if (sll.sll_protocol) {
1011
0
                ds_put_format(string, "(protocol=0x%"PRIu16")",
1012
0
                              ntohs(sll.sll_protocol));
1013
0
            }
1014
0
        }
1015
0
#endif
1016
0
        else if (ss.ss_family == AF_UNSPEC) {
1017
0
            ds_put_cstr(string, "AF_UNSPEC");
1018
0
        } else {
1019
0
            ds_put_format(string, "AF_%d", (int) ss.ss_family);
1020
0
        }
1021
0
    }
1022
0
}
1023
1024
1025
#ifdef __linux__
1026
static void
1027
put_fd_filename(struct ds *string, int fd)
1028
0
{
1029
0
    char buf[1024];
1030
0
    char *linkname;
1031
0
    int n;
1032
1033
0
    linkname = xasprintf("/proc/self/fd/%d", fd);
1034
0
    n = readlink(linkname, buf, sizeof buf);
1035
0
    if (n > 0) {
1036
0
        ds_put_char(string, ' ');
1037
0
        ds_put_buffer(string, buf, n);
1038
0
        if (n > sizeof buf) {
1039
0
            ds_put_cstr(string, "...");
1040
0
        }
1041
0
    }
1042
0
    free(linkname);
1043
0
}
1044
#endif
1045
1046
/* Returns a malloc()'d string describing 'fd', for use in logging. */
1047
char *
1048
describe_fd(int fd)
1049
0
{
1050
0
    struct ds string;
1051
0
    struct stat s;
1052
1053
0
    ds_init(&string);
1054
0
#ifndef _WIN32
1055
0
    if (fstat(fd, &s)) {
1056
0
        ds_put_format(&string, "fstat failed (%s)", ovs_strerror(errno));
1057
0
    } else if (S_ISSOCK(s.st_mode)) {
1058
0
        describe_sockaddr(&string, fd, getsockname);
1059
0
        ds_put_cstr(&string, "<->");
1060
0
        describe_sockaddr(&string, fd, getpeername);
1061
0
    } else {
1062
0
        ds_put_cstr(&string, (isatty(fd) ? "tty"
1063
0
                              : S_ISDIR(s.st_mode) ? "directory"
1064
0
                              : S_ISCHR(s.st_mode) ? "character device"
1065
0
                              : S_ISBLK(s.st_mode) ? "block device"
1066
0
                              : S_ISREG(s.st_mode) ? "file"
1067
0
                              : S_ISFIFO(s.st_mode) ? "FIFO"
1068
0
                              : S_ISLNK(s.st_mode) ? "symbolic link"
1069
0
                              : "unknown"));
1070
0
#ifdef __linux__
1071
0
        put_fd_filename(&string, fd);
1072
0
#endif
1073
0
    }
1074
#else
1075
    ds_put_format(&string,"file descriptor");
1076
#endif /* _WIN32 */
1077
0
    return ds_steal_cstr(&string);
1078
0
}
1079

1080
/* sockaddr helpers. */
1081
1082
static struct sockaddr_in *
1083
sin_cast(const struct sockaddr *sa)
1084
0
{
1085
0
    return ALIGNED_CAST(struct sockaddr_in *, sa);
1086
0
}
1087
1088
static struct sockaddr_in6 *
1089
sin6_cast(const struct sockaddr *sa)
1090
0
{
1091
0
    return ALIGNED_CAST(struct sockaddr_in6 *, sa);
1092
0
}
1093
1094
/* Returns true if 'sa' represents an IPv4 or IPv6 address, false otherwise. */
1095
bool
1096
sa_is_ip(const struct sockaddr *sa)
1097
0
{
1098
0
    return sa->sa_family == AF_INET || sa->sa_family == AF_INET6;
1099
0
}
1100
1101
/* Returns the IPv4 or IPv6 address in 'sa'.  Returns IPv4 addresses as
1102
 * v6-mapped. */
1103
struct in6_addr
1104
sa_get_address(const struct sockaddr *sa)
1105
0
{
1106
0
    ovs_assert(sa_is_ip(sa));
1107
0
    return (sa->sa_family == AF_INET
1108
0
            ? in6_addr_mapped_ipv4(sin_cast(sa)->sin_addr.s_addr)
1109
0
            : sin6_cast(sa)->sin6_addr);
1110
0
}
1111
1112
/* Returns the IPv4 or IPv6 port in 'sa'. */
1113
uint16_t
1114
sa_get_port(const struct sockaddr *sa)
1115
0
{
1116
0
    ovs_assert(sa_is_ip(sa));
1117
0
    return ntohs(sa->sa_family == AF_INET
1118
0
                 ? sin_cast(sa)->sin_port
1119
0
                 : sin6_cast(sa)->sin6_port);
1120
0
}
1121
1122
/* Returns true if 'name' is safe to include inside a network address field.
1123
 * We want to avoid names that include confusing punctuation, etc. */
1124
static bool OVS_UNUSED
1125
is_safe_name(const char *name)
1126
0
{
1127
0
    if (!name[0] || isdigit((unsigned char) name[0])) {
1128
0
        return false;
1129
0
    }
1130
0
    for (const char *p = name; *p; p++) {
1131
0
        if (!isalnum((unsigned char) *p) && *p != '-' && *p != '_') {
1132
0
            return false;
1133
0
        }
1134
0
    }
1135
0
    return true;
1136
0
}
1137
1138
static void
1139
sa_format_address__(const struct sockaddr *sa,
1140
                    const char *lbrack, const char *rbrack,
1141
                    struct ds *s)
1142
0
{
1143
0
    ovs_assert(sa_is_ip(sa));
1144
0
    if (sa->sa_family == AF_INET) {
1145
0
        ds_put_format(s, IP_FMT, IP_ARGS(sin_cast(sa)->sin_addr.s_addr));
1146
0
    } else {
1147
0
        const struct sockaddr_in6 *sin6 = sin6_cast(sa);
1148
1149
0
        ds_put_cstr(s, lbrack);
1150
0
        ds_reserve(s, s->length + INET6_ADDRSTRLEN);
1151
0
        char *tail = &s->string[s->length];
1152
0
        inet_ntop(AF_INET6, sin6->sin6_addr.s6_addr, tail, INET6_ADDRSTRLEN);
1153
0
        s->length += strlen(tail);
1154
1155
0
#ifdef HAVE_STRUCT_SOCKADDR_IN6_SIN6_SCOPE_ID
1156
0
        uint32_t scope = sin6->sin6_scope_id;
1157
0
        if (scope) {
1158
0
            char namebuf[IF_NAMESIZE];
1159
0
            char *name = if_indextoname(scope, namebuf);
1160
0
            ds_put_char(s, '%');
1161
0
            if (name && is_safe_name(name)) {
1162
0
                ds_put_cstr(s, name);
1163
0
            } else {
1164
0
                ds_put_format(s, "%"PRIu32, scope);
1165
0
            }
1166
0
        }
1167
0
#endif
1168
1169
0
        ds_put_cstr(s, rbrack);
1170
0
    }
1171
0
}
1172
1173
/* Formats the IPv4 or IPv6 address in 'sa' into 's'.  If 'sa' is an IPv6
1174
 * address, puts square brackets around the address. */
1175
void
1176
sa_format_address(const struct sockaddr *sa, struct ds *s)
1177
0
{
1178
0
    sa_format_address__(sa, "[", "]", s);
1179
0
}
1180
1181
/* Formats the IPv4 or IPv6 address in 'sa' into 's'.  Does not add square
1182
 * brackets around IPv6 addresses. */
1183
void
1184
sa_format_address_nobracks(const struct sockaddr *sa, struct ds *s)
1185
0
{
1186
0
    sa_format_address__(sa, "", "", s);
1187
0
}
1188
1189
size_t
1190
sa_length(const struct sockaddr *sa)
1191
0
{
1192
0
    switch (sa->sa_family) {
1193
0
    case AF_INET:
1194
0
        return sizeof(struct sockaddr_in);
1195
1196
0
    case AF_INET6:
1197
0
        return sizeof(struct sockaddr_in6);
1198
1199
0
    default:
1200
0
        OVS_NOT_REACHED();
1201
0
    }
1202
0
}
1203

1204
/* sockaddr_storage helpers.  */
1205
1206
static const struct sockaddr *
1207
sa_cast(const struct sockaddr_storage *ss)
1208
0
{
1209
0
    return ALIGNED_CAST(const struct sockaddr *, ss);
1210
0
}
1211
1212
bool
1213
ss_is_ip(const struct sockaddr_storage *ss)
1214
0
{
1215
0
    return sa_is_ip(sa_cast(ss));
1216
0
}
1217
1218
uint16_t
1219
ss_get_port(const struct sockaddr_storage *ss)
1220
0
{
1221
0
    return sa_get_port(sa_cast(ss));
1222
0
}
1223
1224
struct in6_addr
1225
ss_get_address(const struct sockaddr_storage *ss)
1226
0
{
1227
0
    return sa_get_address(sa_cast(ss));
1228
0
}
1229
1230
void
1231
ss_format_address(const struct sockaddr_storage *ss, struct ds *s)
1232
0
{
1233
0
    sa_format_address(sa_cast(ss), s);
1234
0
}
1235
1236
void
1237
ss_format_address_nobracks(const struct sockaddr_storage *ss, struct ds *s)
1238
0
{
1239
0
    sa_format_address_nobracks(sa_cast(ss), s);
1240
0
}
1241
1242
size_t
1243
ss_length(const struct sockaddr_storage *ss)
1244
0
{
1245
0
    return sa_length(sa_cast(ss));
1246
0
}
1247

1248
/* For Windows socket calls, 'errno' is not set.  One has to call
1249
 * WSAGetLastError() to get the error number and then pass it to
1250
 * this function to get the correct error string.
1251
 *
1252
 * ovs_strerror() calls strerror_r() and would not get the correct error
1253
 * string for Windows sockets, but is good for POSIX. */
1254
const char *
1255
sock_strerror(int error)
1256
0
{
1257
#ifdef _WIN32
1258
    return ovs_format_message(error);
1259
#else
1260
0
    return ovs_strerror(error);
1261
0
#endif
1262
0
}
1263

1264
#ifdef __linux__
1265
static int
1266
emulate_sendmmsg(int fd, struct mmsghdr *msgs, unsigned int n,
1267
                 unsigned int flags)
1268
0
{
1269
0
    for (unsigned int i = 0; i < n; i++) {
1270
0
        ssize_t retval = sendmsg(fd, &msgs[i].msg_hdr, flags);
1271
0
        if (retval < 0) {
1272
0
            return i ? i : retval;
1273
0
        }
1274
0
        msgs[i].msg_len = retval;
1275
0
    }
1276
0
    return n;
1277
0
}
1278
1279
#ifndef HAVE_SENDMMSG
1280
int
1281
sendmmsg(int fd, struct mmsghdr *msgs, unsigned int n, unsigned int flags)
1282
{
1283
    return emulate_sendmmsg(fd, msgs, n, flags);
1284
}
1285
#else
1286
/* sendmmsg was redefined in lib/socket-util.c, should undef sendmmsg here
1287
 * to avoid recursion */
1288
#undef sendmmsg
1289
int
1290
wrap_sendmmsg(int fd, struct mmsghdr *msgs, unsigned int n, unsigned int flags)
1291
0
{
1292
0
    static bool sendmmsg_broken = false;
1293
0
    if (!sendmmsg_broken) {
1294
0
        int save_errno = errno;
1295
0
        int retval = sendmmsg(fd, msgs, n, flags);
1296
0
        if (retval >= 0 || errno != ENOSYS) {
1297
0
            return retval;
1298
0
        }
1299
0
        sendmmsg_broken = true;
1300
0
        errno = save_errno;
1301
0
    }
1302
0
    return emulate_sendmmsg(fd, msgs, n, flags);
1303
0
}
1304
#endif
1305
1306
static int
1307
emulate_recvmmsg(int fd, struct mmsghdr *msgs, unsigned int n,
1308
                 int flags, struct timespec *timeout OVS_UNUSED)
1309
0
{
1310
0
    ovs_assert(!timeout);       /* XXX not emulated */
1311
1312
0
    bool waitforone = flags & MSG_WAITFORONE;
1313
0
    flags &= ~MSG_WAITFORONE;
1314
1315
0
    for (unsigned int i = 0; i < n; i++) {
1316
0
        ssize_t retval = recvmsg(fd, &msgs[i].msg_hdr, flags);
1317
0
        if (retval < 0) {
1318
0
            return i ? i : retval;
1319
0
        }
1320
0
        msgs[i].msg_len = retval;
1321
1322
0
        if (waitforone) {
1323
0
            flags |= MSG_DONTWAIT;
1324
0
        }
1325
0
    }
1326
0
    return n;
1327
0
}
1328
1329
#ifndef HAVE_SENDMMSG
1330
int
1331
recvmmsg(int fd, struct mmsghdr *msgs, unsigned int n,
1332
         int flags, struct timespec *timeout)
1333
{
1334
    return emulate_recvmmsg(fd, msgs, n, flags, timeout);
1335
}
1336
#else
1337
/* recvmmsg was redefined in lib/socket-util.c, should undef recvmmsg here
1338
 * to avoid recursion */
1339
#undef recvmmsg
1340
int
1341
wrap_recvmmsg(int fd, struct mmsghdr *msgs, unsigned int n,
1342
              int flags, struct timespec *timeout)
1343
0
{
1344
0
    ovs_assert(!timeout);       /* XXX not emulated */
1345
1346
0
    static bool recvmmsg_broken = false;
1347
0
    if (!recvmmsg_broken) {
1348
0
        int save_errno = errno;
1349
0
        int retval = recvmmsg(fd, msgs, n, flags, timeout);
1350
0
        if (retval >= 0 || errno != ENOSYS) {
1351
0
            return retval;
1352
0
        }
1353
0
        recvmmsg_broken = true;
1354
0
        errno = save_errno;
1355
0
    }
1356
0
    return emulate_recvmmsg(fd, msgs, n, flags, timeout);
1357
0
}
1358
#endif
1359
#endif /* __linux__ */