Coverage Report

Created: 2026-03-02 06:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openvswitch/lib/dpif-netlink.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2008-2018 Nicira, Inc.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at:
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
#include <config.h>
18
19
#include "dpif-netlink.h"
20
21
#include <ctype.h>
22
#include <errno.h>
23
#include <fcntl.h>
24
#include <inttypes.h>
25
#include <net/if.h>
26
#include <linux/types.h>
27
#include <linux/pkt_sched.h>
28
#include <poll.h>
29
#include <stdlib.h>
30
#include <strings.h>
31
#include <sys/epoll.h>
32
#include <sys/stat.h>
33
#include <unistd.h>
34
35
#include "bitmap.h"
36
#include "dpif-netlink-rtnl.h"
37
#include "dpif-offload.h"
38
#include "dpif-provider.h"
39
#include "fat-rwlock.h"
40
#include "flow.h"
41
#include "netdev-linux.h"
42
#include "netdev-provider.h"
43
#include "netdev-vport.h"
44
#include "netdev.h"
45
#include "netlink-conntrack.h"
46
#include "netlink-notifier.h"
47
#include "netlink-socket.h"
48
#include "netlink.h"
49
#include "netnsid.h"
50
#include "odp-util.h"
51
#include "openvswitch/dynamic-string.h"
52
#include "openvswitch/flow.h"
53
#include "openvswitch/hmap.h"
54
#include "openvswitch/match.h"
55
#include "openvswitch/ofpbuf.h"
56
#include "openvswitch/poll-loop.h"
57
#include "openvswitch/shash.h"
58
#include "openvswitch/thread.h"
59
#include "openvswitch/usdt-probes.h"
60
#include "openvswitch/vlog.h"
61
#include "packets.h"
62
#include "random.h"
63
#include "sset.h"
64
#include "timeval.h"
65
#include "unaligned.h"
66
#include "util.h"
67
68
VLOG_DEFINE_THIS_MODULE(dpif_netlink);
69
#ifdef _WIN32
70
#include "wmi.h"
71
enum { WINDOWS = 1 };
72
#else
73
enum { WINDOWS = 0 };
74
#endif
75
enum { MAX_PORTS = USHRT_MAX };
76
77
/* This ethtool flag was introduced in Linux 2.6.24, so it might be
78
 * missing if we have old headers. */
79
0
#define ETH_FLAG_LRO      (1 << 15)    /* LRO is enabled */
80
81
#define OPERATE_MAX_OPS 50
82
83
#ifndef EPOLLEXCLUSIVE
84
#define EPOLLEXCLUSIVE (1u << 28)
85
#endif
86
87
0
#define OVS_DP_F_UNSUPPORTED (1u << 31);
88
89
/* This PID is not used by the kernel datapath when using dispatch per CPU,
90
 * but it is required to be set (not zero). */
91
0
#define DPIF_NETLINK_PER_CPU_PID UINT32_MAX
92
struct dpif_netlink_dp {
93
    /* Generic Netlink header. */
94
    uint8_t cmd;
95
96
    /* struct ovs_header. */
97
    int dp_ifindex;
98
99
    /* Attributes. */
100
    const char *name;                  /* OVS_DP_ATTR_NAME. */
101
    const uint32_t *upcall_pid;        /* OVS_DP_ATTR_UPCALL_PID. */
102
    uint32_t user_features;            /* OVS_DP_ATTR_USER_FEATURES */
103
    uint32_t cache_size;               /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
104
    const struct ovs_dp_stats *stats;  /* OVS_DP_ATTR_STATS. */
105
    const struct ovs_dp_megaflow_stats *megaflow_stats;
106
                                       /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
107
    const uint32_t *upcall_pids;       /* OVS_DP_ATTR_PER_CPU_PIDS */
108
    uint32_t n_upcall_pids;
109
};
110
111
static void dpif_netlink_dp_init(struct dpif_netlink_dp *);
112
static int dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *,
113
                                       const struct ofpbuf *);
114
static void dpif_netlink_dp_dump_start(struct nl_dump *);
115
static int dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
116
                                    struct dpif_netlink_dp *reply,
117
                                    struct ofpbuf **bufp);
118
static int dpif_netlink_dp_get(const struct dpif *,
119
                               struct dpif_netlink_dp *reply,
120
                               struct ofpbuf **bufp);
121
static int
122
dpif_netlink_set_features(struct dpif *dpif_, uint32_t new_features);
123
static uint32_t
124
dpif_netlink_get_features(struct dpif *dpif_);
125
126
static void
127
dpif_netlink_unixctl_dispatch_mode(struct unixctl_conn *conn, int argc,
128
                                   const char *argv[], void *aux);
129
130
struct dpif_netlink_flow {
131
    /* Generic Netlink header. */
132
    uint8_t cmd;
133
134
    /* struct ovs_header. */
135
    unsigned int nlmsg_flags;
136
    int dp_ifindex;
137
138
    /* Attributes.
139
     *
140
     * The 'stats' member points to 64-bit data that might only be aligned on
141
     * 32-bit boundaries, so get_unaligned_u64() should be used to access its
142
     * values.
143
     *
144
     * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
145
     * the Netlink version of the command, even if actions_len is zero. */
146
    const struct nlattr *key;           /* OVS_FLOW_ATTR_KEY. */
147
    size_t key_len;
148
    const struct nlattr *mask;          /* OVS_FLOW_ATTR_MASK. */
149
    size_t mask_len;
150
    const struct nlattr *actions;       /* OVS_FLOW_ATTR_ACTIONS. */
151
    size_t actions_len;
152
    ovs_u128 ufid;                      /* OVS_FLOW_ATTR_FLOW_ID. */
153
    bool ufid_present;                  /* Is there a UFID? */
154
    bool ufid_terse;                    /* Skip serializing key/mask/acts? */
155
    const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */
156
    const uint8_t *tcp_flags;           /* OVS_FLOW_ATTR_TCP_FLAGS. */
157
    const ovs_32aligned_u64 *used;      /* OVS_FLOW_ATTR_USED. */
158
    bool clear;                         /* OVS_FLOW_ATTR_CLEAR. */
159
    bool probe;                         /* OVS_FLOW_ATTR_PROBE. */
160
};
161
162
static void dpif_netlink_flow_init(struct dpif_netlink_flow *);
163
static int dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *,
164
                                         const struct ofpbuf *);
165
static void dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *,
166
                                        struct ofpbuf *);
167
static int dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
168
                                      struct dpif_netlink_flow *reply,
169
                                      struct ofpbuf **bufp);
170
static void dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *,
171
                                        struct dpif_flow_stats *);
172
static void dpif_netlink_flow_to_dpif_flow(struct dpif_flow *,
173
                                           const struct dpif_netlink_flow *);
174
175
/* One of the dpif channels between the kernel and userspace. */
176
struct dpif_channel {
177
    struct nl_sock *sock;       /* Netlink socket. */
178
    long long int last_poll;    /* Last time this channel was polled. */
179
};
180
181
#ifdef _WIN32
182
#define VPORT_SOCK_POOL_SIZE 1
183
/* On Windows, there is no native support for epoll.  There are equivalent
184
 * interfaces though, that are not used currently.  For simpicity, a pool of
185
 * netlink sockets is used.  Each socket is represented by 'struct
186
 * dpif_windows_vport_sock'.  Since it is a pool, multiple OVS ports may be
187
 * sharing the same socket.  In the future, we can add a reference count and
188
 * such fields. */
189
struct dpif_windows_vport_sock {
190
    struct nl_sock *nl_sock;    /* netlink socket. */
191
};
192
#endif
193
194
struct dpif_handler {
195
    /* per-vport dispatch mode. */
196
    struct epoll_event *epoll_events;
197
    int epoll_fd;                 /* epoll fd that includes channel socks. */
198
    int n_events;                 /* Num events returned by epoll_wait(). */
199
    int event_offset;             /* Offset into 'epoll_events'. */
200
201
    /* per-cpu dispatch mode. */
202
    struct nl_sock *sock;         /* Each handler thread holds one netlink
203
                                     socket. */
204
205
#ifdef _WIN32
206
    /* Pool of sockets. */
207
    struct dpif_windows_vport_sock *vport_sock_pool;
208
    size_t last_used_pool_idx; /* Index to aid in allocating a
209
                                  socket in the pool to a port. */
210
#endif
211
};
212
213
/* Datapath interface for the openvswitch Linux kernel module. */
214
struct dpif_netlink {
215
    struct dpif dpif;
216
    int dp_ifindex;
217
    uint32_t user_features;
218
219
    /* Upcall messages. */
220
    struct fat_rwlock upcall_lock;
221
    struct dpif_handler *handlers;
222
    uint32_t n_handlers;           /* Num of upcall handlers. */
223
224
    /* Per-vport dispatch mode. */
225
    struct dpif_channel *channels; /* Array of channels for each port. */
226
    int uc_array_size;             /* Size of 'handler->channels' and */
227
                                   /* 'handler->epoll_events'. */
228
229
    /* Change notification. */
230
    struct nl_sock *port_notifier; /* vport multicast group subscriber. */
231
    bool refresh_channels;
232
};
233
234
static void report_loss(struct dpif_netlink *, struct dpif_channel *,
235
                        uint32_t ch_idx, uint32_t handler_id);
236
237
static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
238
239
/* Generic Netlink family numbers for OVS.
240
 *
241
 * Initialized by dpif_netlink_init(). */
242
static int ovs_datapath_family;
243
static int ovs_vport_family;
244
static int ovs_flow_family;
245
static int ovs_packet_family;
246
static int ovs_meter_family;
247
static int ovs_ct_limit_family;
248
249
/* Generic Netlink multicast groups for OVS.
250
 *
251
 * Initialized by dpif_netlink_init(). */
252
static unsigned int ovs_vport_mcgroup;
253
254
/* If true, tunnel devices are created using OVS compat/genetlink.
255
 * If false, tunnel devices are created with rtnetlink and using light weight
256
 * tunnels. If we fail to create the tunnel the rtnetlink+LWT, then we fallback
257
 * to using the compat interface. */
258
static bool ovs_tunnels_out_of_tree = true;
259
260
static int dpif_netlink_init(void);
261
static int open_dpif(const struct dpif_netlink_dp *, struct dpif **);
262
static uint32_t dpif_netlink_port_get_pid(const struct dpif *,
263
                                          odp_port_t port_no);
264
static void dpif_netlink_handler_uninit(struct dpif_handler *handler);
265
static int dpif_netlink_refresh_handlers_vport_dispatch(struct dpif_netlink *,
266
                                                        uint32_t n_handlers);
267
static void destroy_all_channels(struct dpif_netlink *);
268
static int dpif_netlink_refresh_handlers_cpu_dispatch(struct dpif_netlink *);
269
static void destroy_all_handlers(struct dpif_netlink *);
270
271
static void dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *,
272
                                         struct ofpbuf *);
273
static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *,
274
                                          const struct ofpbuf *);
275
static int dpif_netlink_port_query__(const struct dpif_netlink *dpif,
276
                                     odp_port_t port_no, const char *port_name,
277
                                     struct dpif_port *dpif_port);
278
static void vport_del_channels(struct dpif_netlink *, odp_port_t);
279
280
static int
281
create_nl_sock(struct dpif_netlink *dpif OVS_UNUSED, struct nl_sock **sockp)
282
    OVS_REQ_WRLOCK(dpif->upcall_lock)
283
0
{
284
0
#ifndef _WIN32
285
0
    return nl_sock_create(NETLINK_GENERIC, sockp);
286
#else
287
    /* Pick netlink sockets to use in a round-robin fashion from each
288
     * handler's pool of sockets. */
289
    struct dpif_handler *handler = &dpif->handlers[0];
290
    struct dpif_windows_vport_sock *sock_pool = handler->vport_sock_pool;
291
    size_t index = handler->last_used_pool_idx;
292
293
    /* A pool of sockets is allocated when the handler is initialized. */
294
    if (sock_pool == NULL) {
295
        *sockp = NULL;
296
        return EINVAL;
297
    }
298
299
    ovs_assert(index < VPORT_SOCK_POOL_SIZE);
300
    *sockp = sock_pool[index].nl_sock;
301
    ovs_assert(*sockp);
302
    index = (index == VPORT_SOCK_POOL_SIZE - 1) ? 0 : index + 1;
303
    handler->last_used_pool_idx = index;
304
    return 0;
305
#endif
306
0
}
307
308
static void
309
close_nl_sock(struct nl_sock *sock)
310
0
{
311
0
#ifndef _WIN32
312
0
    nl_sock_destroy(sock);
313
0
#endif
314
0
}
315
316
static struct dpif_netlink *
317
dpif_netlink_cast(const struct dpif *dpif)
318
0
{
319
0
    dpif_assert_class(dpif, &dpif_netlink_class);
320
0
    return CONTAINER_OF(dpif, struct dpif_netlink, dpif);
321
0
}
322
323
static inline bool
324
0
dpif_netlink_upcall_per_cpu(const struct dpif_netlink *dpif) {
325
0
    return !!((dpif)->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU);
326
0
}
327
328
static int
329
dpif_netlink_enumerate(struct sset *all_dps,
330
                       const struct dpif_class *dpif_class OVS_UNUSED)
331
0
{
332
0
    struct nl_dump dump;
333
0
    uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
334
0
    struct ofpbuf msg, buf;
335
0
    int error;
336
337
0
    error = dpif_netlink_init();
338
0
    if (error) {
339
0
        return error;
340
0
    }
341
342
0
    ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
343
0
    dpif_netlink_dp_dump_start(&dump);
344
0
    while (nl_dump_next(&dump, &msg, &buf)) {
345
0
        struct dpif_netlink_dp dp;
346
347
0
        if (!dpif_netlink_dp_from_ofpbuf(&dp, &msg)) {
348
0
            sset_add(all_dps, dp.name);
349
0
        }
350
0
    }
351
0
    ofpbuf_uninit(&buf);
352
0
    return nl_dump_done(&dump);
353
0
}
354
355
static int
356
dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
357
                  bool create, struct dpif **dpifp)
358
0
{
359
0
    struct dpif_netlink_dp dp_request, dp;
360
0
    struct ofpbuf *buf;
361
0
    uint32_t upcall_pid;
362
0
    int error;
363
364
0
    error = dpif_netlink_init();
365
0
    if (error) {
366
0
        return error;
367
0
    }
368
369
    /* Create or look up datapath. */
370
0
    dpif_netlink_dp_init(&dp_request);
371
0
    upcall_pid = 0;
372
0
    dp_request.upcall_pid = &upcall_pid;
373
0
    dp_request.name = name;
374
375
0
    if (create) {
376
0
        dp_request.cmd = OVS_DP_CMD_NEW;
377
0
    } else {
378
0
        dp_request.cmd = OVS_DP_CMD_GET;
379
380
0
        error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
381
0
        if (error) {
382
0
            return error;
383
0
        }
384
0
        dp_request.user_features = dp.user_features;
385
0
        ofpbuf_delete(buf);
386
387
        /* Use OVS_DP_CMD_SET to report user features */
388
0
        dp_request.cmd = OVS_DP_CMD_SET;
389
0
    }
390
391
    /* Some older kernels will not reject unknown features. This will cause
392
     * 'ovs-vswitchd' to incorrectly assume a feature is supported. In order to
393
     * test for that, we attempt to set a feature that we know is not supported
394
     * by any kernel. If this feature is not rejected, we can assume we are
395
     * running on one of these older kernels.
396
     */
397
0
    dp_request.user_features |= OVS_DP_F_UNALIGNED;
398
0
    dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
399
0
    dp_request.user_features |= OVS_DP_F_UNSUPPORTED;
400
0
    error = dpif_netlink_dp_transact(&dp_request, NULL, NULL);
401
0
    if (error) {
402
        /* The Open vSwitch kernel module has two modes for dispatching
403
         * upcalls: per-vport and per-cpu.
404
         *
405
         * When dispatching upcalls per-vport, the kernel will
406
         * send the upcall via a Netlink socket that has been selected based on
407
         * the vport that received the packet that is causing the upcall.
408
         *
409
         * When dispatching upcall per-cpu, the kernel will send the upcall via
410
         * a Netlink socket that has been selected based on the cpu that
411
         * received the packet that is causing the upcall.
412
         *
413
         * First we test to see if the kernel module supports per-cpu
414
         * dispatching (the preferred method). If it does not support per-cpu
415
         * dispatching, we fall back to the per-vport dispatch mode.
416
         */
417
0
        dp_request.user_features &= ~OVS_DP_F_UNSUPPORTED;
418
0
        dp_request.user_features &= ~OVS_DP_F_VPORT_PIDS;
419
0
        dp_request.user_features |= OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
420
0
        error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
421
0
        if (error == EOPNOTSUPP) {
422
0
            dp_request.user_features &= ~OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
423
0
            dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
424
0
            error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
425
0
        }
426
0
        if (error) {
427
0
            return error;
428
0
        }
429
430
0
        error = open_dpif(&dp, dpifp);
431
0
        dpif_netlink_set_features(*dpifp, OVS_DP_F_TC_RECIRC_SHARING);
432
0
    } else {
433
0
        VLOG_INFO("Kernel does not correctly support feature negotiation. "
434
0
                  "Using standard features.");
435
0
        dp_request.cmd = OVS_DP_CMD_SET;
436
0
        dp_request.user_features = 0;
437
0
        dp_request.user_features |= OVS_DP_F_UNALIGNED;
438
0
        dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
439
0
        error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
440
0
        if (error) {
441
0
            return error;
442
0
        }
443
0
        error = open_dpif(&dp, dpifp);
444
0
    }
445
446
0
    ofpbuf_delete(buf);
447
448
0
    if (create) {
449
0
        VLOG_INFO("Datapath dispatch mode: %s",
450
0
                  dpif_netlink_upcall_per_cpu(dpif_netlink_cast(*dpifp)) ?
451
0
                  "per-cpu" : "per-vport");
452
0
    }
453
454
0
    return error;
455
0
}
456
457
static int
458
open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
459
0
{
460
0
    struct dpif_netlink *dpif;
461
462
0
    dpif = xzalloc(sizeof *dpif);
463
0
    dpif->port_notifier = NULL;
464
0
    fat_rwlock_init(&dpif->upcall_lock);
465
466
0
    dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name,
467
0
              dp->dp_ifindex, dp->dp_ifindex);
468
469
0
    dpif->dp_ifindex = dp->dp_ifindex;
470
0
    dpif->user_features = dp->user_features;
471
0
    *dpifp = &dpif->dpif;
472
473
0
    return 0;
474
0
}
475
476
#ifdef _WIN32
477
static void
478
vport_delete_sock_pool(struct dpif_handler *handler)
479
    OVS_REQ_WRLOCK(dpif->upcall_lock)
480
{
481
    if (handler->vport_sock_pool) {
482
        uint32_t i;
483
        struct dpif_windows_vport_sock *sock_pool =
484
            handler->vport_sock_pool;
485
486
        for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
487
            if (sock_pool[i].nl_sock) {
488
                nl_sock_unsubscribe_packets(sock_pool[i].nl_sock);
489
                nl_sock_destroy(sock_pool[i].nl_sock);
490
                sock_pool[i].nl_sock = NULL;
491
            }
492
        }
493
494
        free(handler->vport_sock_pool);
495
        handler->vport_sock_pool = NULL;
496
    }
497
}
498
499
static int
500
vport_create_sock_pool(struct dpif_handler *handler)
501
    OVS_REQ_WRLOCK(dpif->upcall_lock)
502
{
503
    struct dpif_windows_vport_sock *sock_pool;
504
    size_t i;
505
    int error = 0;
506
507
    sock_pool = xzalloc(VPORT_SOCK_POOL_SIZE * sizeof *sock_pool);
508
    for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
509
        error = nl_sock_create(NETLINK_GENERIC, &sock_pool[i].nl_sock);
510
        if (error) {
511
            goto error;
512
        }
513
514
        /* Enable the netlink socket to receive packets.  This is equivalent to
515
         * calling nl_sock_join_mcgroup() to receive events. */
516
        error = nl_sock_subscribe_packets(sock_pool[i].nl_sock);
517
        if (error) {
518
           goto error;
519
        }
520
    }
521
522
    handler->vport_sock_pool = sock_pool;
523
    handler->last_used_pool_idx = 0;
524
    return 0;
525
526
error:
527
    vport_delete_sock_pool(handler);
528
    return error;
529
}
530
#endif /* _WIN32 */
531
532
/* Given the port number 'port_idx', extracts the pid of netlink socket
533
 * associated to the port and assigns it to 'upcall_pid'. */
534
static bool
535
vport_get_pid(struct dpif_netlink *dpif, uint32_t port_idx,
536
              uint32_t *upcall_pid)
537
0
{
538
    /* Since the nl_sock can only be assigned in either all
539
     * or none "dpif" channels, the following check
540
     * would suffice. */
541
0
    if (!dpif->channels[port_idx].sock) {
542
0
        return false;
543
0
    }
544
0
    ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
545
546
0
    *upcall_pid = nl_sock_pid(dpif->channels[port_idx].sock);
547
548
0
    return true;
549
0
}
550
551
static int
552
vport_add_channel(struct dpif_netlink *dpif, odp_port_t port_no,
553
                  struct nl_sock *sock)
554
0
{
555
0
    struct epoll_event event;
556
0
    uint32_t port_idx = odp_to_u32(port_no);
557
0
    size_t i;
558
0
    int error;
559
560
0
    if (dpif->handlers == NULL) {
561
0
        close_nl_sock(sock);
562
0
        return 0;
563
0
    }
564
565
    /* We assume that the datapath densely chooses port numbers, which can
566
     * therefore be used as an index into 'channels' and 'epoll_events' of
567
     * 'dpif'. */
568
0
    if (port_idx >= dpif->uc_array_size) {
569
0
        uint32_t new_size = port_idx + 1;
570
571
0
        if (new_size > MAX_PORTS) {
572
0
            VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
573
0
                         dpif_name(&dpif->dpif), port_no);
574
0
            return EFBIG;
575
0
        }
576
577
0
        dpif->channels = xrealloc(dpif->channels,
578
0
                                  new_size * sizeof *dpif->channels);
579
580
0
        for (i = dpif->uc_array_size; i < new_size; i++) {
581
0
            dpif->channels[i].sock = NULL;
582
0
        }
583
584
0
        for (i = 0; i < dpif->n_handlers; i++) {
585
0
            struct dpif_handler *handler = &dpif->handlers[i];
586
587
0
            handler->epoll_events = xrealloc(handler->epoll_events,
588
0
                new_size * sizeof *handler->epoll_events);
589
590
0
        }
591
0
        dpif->uc_array_size = new_size;
592
0
    }
593
594
0
    vport_del_channels(dpif, port_no);
595
596
0
    memset(&event, 0, sizeof event);
597
0
    event.events = EPOLLIN | EPOLLEXCLUSIVE;
598
0
    event.data.u32 = port_idx;
599
600
0
    for (i = 0; i < dpif->n_handlers; i++) {
601
0
        struct dpif_handler *handler = &dpif->handlers[i];
602
603
0
#ifndef _WIN32
604
0
        if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(sock),
605
0
                      &event) < 0) {
606
0
            error = errno;
607
0
            goto error;
608
0
        }
609
0
#endif
610
0
    }
611
0
    dpif->channels[port_idx].sock = sock;
612
0
    dpif->channels[port_idx].last_poll = LLONG_MIN;
613
614
0
    return 0;
615
616
0
error:
617
0
#ifndef _WIN32
618
0
    while (i--) {
619
0
        epoll_ctl(dpif->handlers[i].epoll_fd, EPOLL_CTL_DEL,
620
0
                  nl_sock_fd(sock), NULL);
621
0
    }
622
0
#endif
623
0
    dpif->channels[port_idx].sock = NULL;
624
625
0
    return error;
626
0
}
627
628
static void
629
vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
630
0
{
631
0
    uint32_t port_idx = odp_to_u32(port_no);
632
0
    size_t i;
633
634
0
    if (!dpif->handlers || port_idx >= dpif->uc_array_size
635
0
        || !dpif->channels[port_idx].sock) {
636
0
        return;
637
0
    }
638
639
0
    for (i = 0; i < dpif->n_handlers; i++) {
640
0
        struct dpif_handler *handler = &dpif->handlers[i];
641
0
#ifndef _WIN32
642
0
        epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
643
0
                  nl_sock_fd(dpif->channels[port_idx].sock), NULL);
644
0
#endif
645
0
        handler->event_offset = handler->n_events = 0;
646
0
    }
647
0
#ifndef _WIN32
648
0
    nl_sock_destroy(dpif->channels[port_idx].sock);
649
0
#endif
650
0
    dpif->channels[port_idx].sock = NULL;
651
0
}
652
653
static void
654
destroy_all_channels(struct dpif_netlink *dpif)
655
    OVS_REQ_WRLOCK(dpif->upcall_lock)
656
0
{
657
0
    unsigned int i;
658
659
0
    if (!dpif->handlers) {
660
0
        return;
661
0
    }
662
663
0
    for (i = 0; i < dpif->uc_array_size; i++ ) {
664
0
        struct dpif_netlink_vport vport_request;
665
0
        uint32_t upcall_pids = 0;
666
667
0
        if (!dpif->channels[i].sock) {
668
0
            continue;
669
0
        }
670
671
        /* Turn off upcalls. */
672
0
        dpif_netlink_vport_init(&vport_request);
673
0
        vport_request.cmd = OVS_VPORT_CMD_SET;
674
0
        vport_request.dp_ifindex = dpif->dp_ifindex;
675
0
        vport_request.port_no = u32_to_odp(i);
676
0
        vport_request.n_upcall_pids = 1;
677
0
        vport_request.upcall_pids = &upcall_pids;
678
0
        dpif_netlink_vport_transact(&vport_request, NULL, NULL);
679
680
0
        vport_del_channels(dpif, u32_to_odp(i));
681
0
    }
682
683
0
    for (i = 0; i < dpif->n_handlers; i++) {
684
0
        struct dpif_handler *handler = &dpif->handlers[i];
685
686
0
        dpif_netlink_handler_uninit(handler);
687
0
        free(handler->epoll_events);
688
0
    }
689
0
    free(dpif->channels);
690
0
    free(dpif->handlers);
691
0
    dpif->handlers = NULL;
692
0
    dpif->channels = NULL;
693
0
    dpif->n_handlers = 0;
694
0
    dpif->uc_array_size = 0;
695
0
}
696
697
static void
698
destroy_all_handlers(struct dpif_netlink *dpif)
699
    OVS_REQ_WRLOCK(dpif->upcall_lock)
700
0
{
701
0
    int i = 0;
702
703
0
    if (!dpif->handlers) {
704
0
        return;
705
0
    }
706
0
    for (i = 0; i < dpif->n_handlers; i++) {
707
0
        struct dpif_handler *handler = &dpif->handlers[i];
708
0
        close_nl_sock(handler->sock);
709
0
    }
710
0
    free(dpif->handlers);
711
0
    dpif->handlers = NULL;
712
0
    dpif->n_handlers = 0;
713
0
}
714
715
static void
716
dpif_netlink_close(struct dpif *dpif_)
717
0
{
718
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
719
720
0
    nl_sock_destroy(dpif->port_notifier);
721
722
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
723
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
724
0
        destroy_all_handlers(dpif);
725
0
    } else {
726
0
        destroy_all_channels(dpif);
727
0
    }
728
0
    fat_rwlock_unlock(&dpif->upcall_lock);
729
730
0
    fat_rwlock_destroy(&dpif->upcall_lock);
731
0
    free(dpif);
732
0
}
733
734
static int
735
dpif_netlink_destroy(struct dpif *dpif_)
736
0
{
737
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
738
0
    struct dpif_netlink_dp dp;
739
740
0
    dpif_netlink_dp_init(&dp);
741
0
    dp.cmd = OVS_DP_CMD_DEL;
742
0
    dp.dp_ifindex = dpif->dp_ifindex;
743
0
    return dpif_netlink_dp_transact(&dp, NULL, NULL);
744
0
}
745
746
static bool
747
dpif_netlink_run(struct dpif *dpif_)
748
0
{
749
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
750
751
0
    if (!dpif_netlink_upcall_per_cpu(dpif)) {
752
0
        if (dpif->refresh_channels) {
753
0
            dpif->refresh_channels = false;
754
0
            fat_rwlock_wrlock(&dpif->upcall_lock);
755
0
            dpif_netlink_refresh_handlers_vport_dispatch(dpif,
756
0
                                                         dpif->n_handlers);
757
0
            fat_rwlock_unlock(&dpif->upcall_lock);
758
0
        }
759
0
    }
760
0
    return false;
761
0
}
762
763
static int
764
dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats)
765
0
{
766
0
    struct dpif_netlink_dp dp;
767
0
    struct ofpbuf *buf;
768
0
    int error;
769
770
0
    error = dpif_netlink_dp_get(dpif_, &dp, &buf);
771
0
    if (!error) {
772
0
        memset(stats, 0, sizeof *stats);
773
774
0
        if (dp.stats) {
775
0
            stats->n_hit    = get_32aligned_u64(&dp.stats->n_hit);
776
0
            stats->n_missed = get_32aligned_u64(&dp.stats->n_missed);
777
0
            stats->n_lost   = get_32aligned_u64(&dp.stats->n_lost);
778
0
            stats->n_flows  = get_32aligned_u64(&dp.stats->n_flows);
779
0
        }
780
781
0
        if (dp.megaflow_stats) {
782
0
            stats->n_masks = dp.megaflow_stats->n_masks;
783
0
            stats->n_mask_hit = get_32aligned_u64(
784
0
                &dp.megaflow_stats->n_mask_hit);
785
0
            stats->n_cache_hit = get_32aligned_u64(
786
0
                &dp.megaflow_stats->n_cache_hit);
787
788
0
            if (!stats->n_cache_hit) {
789
                /* Old kernels don't use this field and always
790
                 * report zero instead.  Disable this stat. */
791
0
                stats->n_cache_hit = UINT64_MAX;
792
0
            }
793
0
        } else {
794
0
            stats->n_masks = UINT32_MAX;
795
0
            stats->n_mask_hit = UINT64_MAX;
796
0
            stats->n_cache_hit = UINT64_MAX;
797
0
        }
798
0
        ofpbuf_delete(buf);
799
0
    }
800
0
    return error;
801
0
}
802
803
static int
804
dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids,
805
                              uint32_t n_upcall_pids)
806
0
{
807
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
808
0
    int largest_cpu_id = ovs_numa_get_largest_core_id();
809
0
    struct dpif_netlink_dp request, reply;
810
0
    struct ofpbuf *bufp;
811
812
0
    uint32_t *corrected;
813
0
    int error, i, n_cores;
814
815
0
    if (largest_cpu_id == OVS_NUMA_UNSPEC) {
816
0
        largest_cpu_id = -1;
817
0
    }
818
819
    /* Some systems have non-continuous cpu core ids.  count_total_cores()
820
     * would return an accurate number, however, this number cannot be used.
821
     * e.g. If the largest core_id of a system is cpu9, but the system only
822
     * has 4 cpus then the OVS kernel module would throw a "CPU mismatch"
823
     * warning.  With the MAX() in place in this example we send an array of
824
     * size 10 and prevent the warning.  This has no bearing on the number of
825
     * threads created.
826
     */
827
0
    n_cores = MAX(count_total_cores(), largest_cpu_id + 1);
828
0
    VLOG_DBG("Dispatch mode(per-cpu): Setting up handler PIDs for %d cores",
829
0
             n_cores);
830
831
0
    dpif_netlink_dp_init(&request);
832
0
    request.cmd = OVS_DP_CMD_SET;
833
0
    request.name = dpif_->base_name;
834
0
    request.dp_ifindex = dpif->dp_ifindex;
835
0
    request.user_features = dpif->user_features |
836
0
                            OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
837
838
0
    corrected = xcalloc(n_cores, sizeof *corrected);
839
840
0
    for (i = 0; i < n_cores; i++) {
841
0
        corrected[i] = upcall_pids[i % n_upcall_pids];
842
0
    }
843
0
    request.upcall_pids = corrected;
844
0
    request.n_upcall_pids = n_cores;
845
846
0
    error = dpif_netlink_dp_transact(&request, &reply, &bufp);
847
0
    if (!error) {
848
0
        dpif->user_features = reply.user_features;
849
0
        ofpbuf_delete(bufp);
850
0
        if (!dpif_netlink_upcall_per_cpu(dpif)) {
851
0
            error = -EOPNOTSUPP;
852
0
        }
853
0
    }
854
0
    free(corrected);
855
0
    return error;
856
0
}
857
858
static int
859
dpif_netlink_set_features(struct dpif *dpif_, uint32_t new_features)
860
0
{
861
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
862
0
    struct dpif_netlink_dp request, reply;
863
0
    struct ofpbuf *bufp;
864
0
    int error;
865
866
0
    dpif_netlink_dp_init(&request);
867
0
    request.cmd = OVS_DP_CMD_SET;
868
0
    request.name = dpif_->base_name;
869
0
    request.dp_ifindex = dpif->dp_ifindex;
870
0
    request.user_features = dpif->user_features | new_features;
871
872
0
    error = dpif_netlink_dp_transact(&request, &reply, &bufp);
873
0
    if (!error) {
874
0
        dpif->user_features = reply.user_features;
875
0
        ofpbuf_delete(bufp);
876
0
        if (!(dpif->user_features & new_features)) {
877
0
            return -EOPNOTSUPP;
878
0
        }
879
0
    }
880
881
0
    return error;
882
0
}
883
884
static uint32_t
885
dpif_netlink_get_features(struct dpif *dpif_)
886
0
{
887
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
888
889
0
    return dpif->user_features;
890
0
}
891
892
static const char *
893
get_vport_type(const struct dpif_netlink_vport *vport)
894
0
{
895
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
896
897
0
    switch (vport->type) {
898
0
    case OVS_VPORT_TYPE_NETDEV: {
899
0
        const char *type = netdev_get_type_from_name(vport->name);
900
901
0
        return type ? type : "system";
902
0
    }
903
904
0
    case OVS_VPORT_TYPE_INTERNAL:
905
0
        return "internal";
906
907
0
    case OVS_VPORT_TYPE_GENEVE:
908
0
        return "geneve";
909
910
0
    case OVS_VPORT_TYPE_GRE:
911
0
        return "gre";
912
913
0
    case OVS_VPORT_TYPE_VXLAN:
914
0
        return "vxlan";
915
916
0
    case OVS_VPORT_TYPE_ERSPAN:
917
0
        return "erspan";
918
919
0
    case OVS_VPORT_TYPE_IP6ERSPAN:
920
0
        return "ip6erspan";
921
922
0
    case OVS_VPORT_TYPE_IP6GRE:
923
0
        return "ip6gre";
924
925
0
    case OVS_VPORT_TYPE_GTPU:
926
0
        return "gtpu";
927
928
0
    case OVS_VPORT_TYPE_SRV6:
929
0
        return "srv6";
930
931
0
    case OVS_VPORT_TYPE_BAREUDP:
932
0
        return "bareudp";
933
934
0
    case OVS_VPORT_TYPE_UNSPEC:
935
0
    case __OVS_VPORT_TYPE_MAX:
936
0
        break;
937
0
    }
938
939
0
    VLOG_WARN_RL(&rl, "dp%d: port `%s' has unsupported type %u",
940
0
                 vport->dp_ifindex, vport->name, (unsigned int) vport->type);
941
0
    return "unknown";
942
0
}
943
944
enum ovs_vport_type
945
netdev_to_ovs_vport_type(const char *type)
946
0
{
947
0
    if (!strcmp(type, "tap") || !strcmp(type, "system")) {
948
0
        return OVS_VPORT_TYPE_NETDEV;
949
0
    } else if (!strcmp(type, "internal")) {
950
0
        return OVS_VPORT_TYPE_INTERNAL;
951
0
    } else if (!strcmp(type, "geneve")) {
952
0
        return OVS_VPORT_TYPE_GENEVE;
953
0
    } else if (!strcmp(type, "vxlan")) {
954
0
        return OVS_VPORT_TYPE_VXLAN;
955
0
    } else if (!strcmp(type, "erspan")) {
956
0
        return OVS_VPORT_TYPE_ERSPAN;
957
0
    } else if (!strcmp(type, "ip6erspan")) {
958
0
        return OVS_VPORT_TYPE_IP6ERSPAN;
959
0
    } else if (!strcmp(type, "ip6gre")) {
960
0
        return OVS_VPORT_TYPE_IP6GRE;
961
0
    } else if (!strcmp(type, "gre")) {
962
0
        return OVS_VPORT_TYPE_GRE;
963
0
    } else if (!strcmp(type, "gtpu")) {
964
0
        return OVS_VPORT_TYPE_GTPU;
965
0
    } else if (!strcmp(type, "srv6")) {
966
0
        return OVS_VPORT_TYPE_SRV6;
967
0
    } else if (!strcmp(type, "bareudp")) {
968
0
        return OVS_VPORT_TYPE_BAREUDP;
969
0
    } else {
970
0
        return OVS_VPORT_TYPE_UNSPEC;
971
0
    }
972
0
}
973
974
static int
975
dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
976
                        enum ovs_vport_type type,
977
                        struct ofpbuf *options,
978
                        odp_port_t *port_nop)
979
    OVS_REQ_WRLOCK(dpif->upcall_lock)
980
0
{
981
0
    struct dpif_netlink_vport request, reply;
982
0
    struct ofpbuf *buf;
983
0
    struct nl_sock *sock = NULL;
984
0
    uint32_t upcall_pids = 0;
985
0
    int error = 0;
986
987
    /* per-cpu dispatch mode does not require a socket per vport. */
988
0
    if (!dpif_netlink_upcall_per_cpu(dpif)) {
989
0
        if (dpif->handlers) {
990
0
            error = create_nl_sock(dpif, &sock);
991
0
            if (error) {
992
0
                return error;
993
0
            }
994
0
        }
995
0
        if (sock) {
996
0
            upcall_pids = nl_sock_pid(sock);
997
0
        }
998
0
    }
999
1000
0
    dpif_netlink_vport_init(&request);
1001
0
    request.cmd = OVS_VPORT_CMD_NEW;
1002
0
    request.dp_ifindex = dpif->dp_ifindex;
1003
0
    request.type = type;
1004
0
    request.name = name;
1005
1006
0
    request.port_no = *port_nop;
1007
0
    request.n_upcall_pids = 1;
1008
0
    request.upcall_pids = &upcall_pids;
1009
1010
0
    if (options) {
1011
0
        request.options = options->data;
1012
0
        request.options_len = options->size;
1013
0
    }
1014
1015
0
    error = dpif_netlink_vport_transact(&request, &reply, &buf);
1016
0
    if (!error) {
1017
0
        *port_nop = reply.port_no;
1018
0
    } else {
1019
0
        if (error == EBUSY && *port_nop != ODPP_NONE) {
1020
0
            VLOG_INFO("%s: requested port %"PRIu32" is in use",
1021
0
                      dpif_name(&dpif->dpif), *port_nop);
1022
0
        }
1023
1024
0
        close_nl_sock(sock);
1025
0
        goto exit;
1026
0
    }
1027
1028
0
    if (!dpif_netlink_upcall_per_cpu(dpif)) {
1029
0
        error = vport_add_channel(dpif, *port_nop, sock);
1030
0
        if (error) {
1031
0
            VLOG_INFO("%s: could not add channel for port %s",
1032
0
                        dpif_name(&dpif->dpif), name);
1033
1034
            /* Delete the port. */
1035
0
            dpif_netlink_vport_init(&request);
1036
0
            request.cmd = OVS_VPORT_CMD_DEL;
1037
0
            request.dp_ifindex = dpif->dp_ifindex;
1038
0
            request.port_no = *port_nop;
1039
0
            dpif_netlink_vport_transact(&request, NULL, NULL);
1040
0
            close_nl_sock(sock);
1041
0
            goto exit;
1042
0
        }
1043
0
    }
1044
1045
0
exit:
1046
0
    ofpbuf_delete(buf);
1047
1048
0
    return error;
1049
0
}
1050
1051
static int
1052
dpif_netlink_port_add_compat(struct dpif_netlink *dpif, struct netdev *netdev,
1053
                             odp_port_t *port_nop)
1054
    OVS_REQ_WRLOCK(dpif->upcall_lock)
1055
0
{
1056
0
    const struct netdev_tunnel_config *tnl_cfg;
1057
0
    char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1058
0
    const char *type = netdev_get_type(netdev);
1059
0
    uint64_t options_stub[64 / 8];
1060
0
    enum ovs_vport_type ovs_type;
1061
0
    struct ofpbuf options;
1062
0
    const char *name;
1063
1064
0
    name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
1065
1066
0
    ovs_type = netdev_to_ovs_vport_type(netdev_get_type(netdev));
1067
0
    if (ovs_type == OVS_VPORT_TYPE_UNSPEC) {
1068
0
        VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has "
1069
0
                     "unsupported type `%s'",
1070
0
                     dpif_name(&dpif->dpif), name, type);
1071
0
        return EINVAL;
1072
0
    }
1073
1074
0
    if (ovs_type == OVS_VPORT_TYPE_NETDEV) {
1075
#ifdef _WIN32
1076
        /* XXX : Map appropiate Windows handle */
1077
#else
1078
0
        netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false);
1079
0
#endif
1080
0
    }
1081
1082
#ifdef _WIN32
1083
    if (ovs_type == OVS_VPORT_TYPE_INTERNAL) {
1084
        if (!create_wmi_port(name)){
1085
            VLOG_ERR("Could not create wmi internal port with name:%s", name);
1086
            return EINVAL;
1087
        };
1088
    }
1089
#endif
1090
1091
0
    tnl_cfg = netdev_get_tunnel_config(netdev);
1092
0
    if (tnl_cfg && (tnl_cfg->dst_port != 0 || tnl_cfg->exts)) {
1093
0
        ofpbuf_use_stack(&options, options_stub, sizeof options_stub);
1094
0
        if (tnl_cfg->dst_port) {
1095
0
            nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT,
1096
0
                           ntohs(tnl_cfg->dst_port));
1097
0
        }
1098
0
        if (tnl_cfg->exts) {
1099
0
            size_t ext_ofs;
1100
0
            int i;
1101
1102
0
            ext_ofs = nl_msg_start_nested(&options, OVS_TUNNEL_ATTR_EXTENSION);
1103
0
            for (i = 0; i < 32; i++) {
1104
0
                if (tnl_cfg->exts & (UINT32_C(1) << i)) {
1105
0
                    nl_msg_put_flag(&options, i);
1106
0
                }
1107
0
            }
1108
0
            nl_msg_end_nested(&options, ext_ofs);
1109
0
        }
1110
0
        return dpif_netlink_port_add__(dpif, name, ovs_type, &options,
1111
0
                                       port_nop);
1112
0
    } else {
1113
0
        return dpif_netlink_port_add__(dpif, name, ovs_type, NULL, port_nop);
1114
0
    }
1115
1116
0
}
1117
1118
static int
1119
dpif_netlink_rtnl_port_create_and_add(struct dpif_netlink *dpif,
1120
                                      struct netdev *netdev,
1121
                                      odp_port_t *port_nop)
1122
    OVS_REQ_WRLOCK(dpif->upcall_lock)
1123
0
{
1124
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
1125
0
    char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1126
0
    const char *name;
1127
0
    int error;
1128
1129
0
    error = dpif_netlink_rtnl_port_create(netdev);
1130
0
    if (error) {
1131
0
        if (error != EOPNOTSUPP) {
1132
0
            VLOG_WARN_RL(&rl, "Failed to create %s with rtnetlink: %s",
1133
0
                         netdev_get_name(netdev), ovs_strerror(error));
1134
0
        }
1135
0
        return error;
1136
0
    }
1137
1138
0
    name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
1139
0
    error = dpif_netlink_port_add__(dpif, name, OVS_VPORT_TYPE_NETDEV, NULL,
1140
0
                                    port_nop);
1141
0
    if (error) {
1142
0
        dpif_netlink_rtnl_port_destroy(name, netdev_get_type(netdev));
1143
0
    }
1144
0
    return error;
1145
0
}
1146
1147
static int
1148
dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,
1149
                      odp_port_t *port_nop)
1150
0
{
1151
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1152
0
    int error = EOPNOTSUPP;
1153
1154
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
1155
0
    if (!ovs_tunnels_out_of_tree) {
1156
0
        error = dpif_netlink_rtnl_port_create_and_add(dpif, netdev, port_nop);
1157
0
    }
1158
0
    if (error) {
1159
0
        error = dpif_netlink_port_add_compat(dpif, netdev, port_nop);
1160
0
    }
1161
0
    fat_rwlock_unlock(&dpif->upcall_lock);
1162
1163
0
    return error;
1164
0
}
1165
1166
static int
1167
dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
1168
    OVS_REQ_WRLOCK(dpif->upcall_lock)
1169
0
{
1170
0
    struct dpif_netlink_vport vport;
1171
0
    struct dpif_port dpif_port;
1172
0
    int error;
1173
1174
0
    error = dpif_netlink_port_query__(dpif, port_no, NULL, &dpif_port);
1175
0
    if (error) {
1176
0
        return error;
1177
0
    }
1178
1179
0
    dpif_netlink_vport_init(&vport);
1180
0
    vport.cmd = OVS_VPORT_CMD_DEL;
1181
0
    vport.dp_ifindex = dpif->dp_ifindex;
1182
0
    vport.port_no = port_no;
1183
#ifdef _WIN32
1184
    if (!strcmp(dpif_port.type, "internal")) {
1185
        if (!delete_wmi_port(dpif_port.name)) {
1186
            VLOG_ERR("Could not delete wmi port with name: %s",
1187
                     dpif_port.name);
1188
        };
1189
    }
1190
#endif
1191
0
    error = dpif_netlink_vport_transact(&vport, NULL, NULL);
1192
1193
0
    vport_del_channels(dpif, port_no);
1194
1195
0
    if (!error && !ovs_tunnels_out_of_tree) {
1196
0
        error = dpif_netlink_rtnl_port_destroy(dpif_port.name, dpif_port.type);
1197
0
        if (error == EOPNOTSUPP) {
1198
0
            error = 0;
1199
0
        }
1200
0
    }
1201
1202
0
    dpif_port_destroy(&dpif_port);
1203
1204
0
    return error;
1205
0
}
1206
1207
static int
1208
dpif_netlink_port_del(struct dpif *dpif_, odp_port_t port_no)
1209
0
{
1210
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1211
0
    int error;
1212
1213
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
1214
0
    error = dpif_netlink_port_del__(dpif, port_no);
1215
0
    fat_rwlock_unlock(&dpif->upcall_lock);
1216
1217
0
    return error;
1218
0
}
1219
1220
static int
1221
dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
1222
                          const char *port_name, struct dpif_port *dpif_port)
1223
0
{
1224
0
    struct dpif_netlink_vport request;
1225
0
    struct dpif_netlink_vport reply;
1226
0
    struct ofpbuf *buf;
1227
0
    int error;
1228
1229
0
    dpif_netlink_vport_init(&request);
1230
0
    request.cmd = OVS_VPORT_CMD_GET;
1231
0
    request.dp_ifindex = dpif->dp_ifindex;
1232
0
    request.port_no = port_no;
1233
0
    request.name = port_name;
1234
1235
0
    error = dpif_netlink_vport_transact(&request, &reply, &buf);
1236
0
    if (!error) {
1237
0
        if (reply.dp_ifindex != request.dp_ifindex) {
1238
            /* A query by name reported that 'port_name' is in some datapath
1239
             * other than 'dpif', but the caller wants to know about 'dpif'. */
1240
0
            error = ENODEV;
1241
0
        } else if (dpif_port) {
1242
0
            dpif_port->name = xstrdup(reply.name);
1243
0
            dpif_port->type = xstrdup(get_vport_type(&reply));
1244
0
            dpif_port->port_no = reply.port_no;
1245
0
        }
1246
0
        ofpbuf_delete(buf);
1247
0
    }
1248
0
    return error;
1249
0
}
1250
1251
static int
1252
dpif_netlink_port_query_by_number(const struct dpif *dpif_, odp_port_t port_no,
1253
                                  struct dpif_port *dpif_port)
1254
0
{
1255
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1256
1257
0
    return dpif_netlink_port_query__(dpif, port_no, NULL, dpif_port);
1258
0
}
1259
1260
static int
1261
dpif_netlink_port_query_by_name(const struct dpif *dpif_, const char *devname,
1262
                              struct dpif_port *dpif_port)
1263
0
{
1264
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1265
1266
0
    return dpif_netlink_port_query__(dpif, 0, devname, dpif_port);
1267
0
}
1268
1269
static uint32_t
1270
dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
1271
                            odp_port_t port_no)
1272
    OVS_REQ_RDLOCK(dpif->upcall_lock)
1273
0
{
1274
0
    uint32_t port_idx = odp_to_u32(port_no);
1275
0
    uint32_t pid = 0;
1276
1277
0
    if (dpif->handlers && dpif->uc_array_size > 0) {
1278
        /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
1279
         * channel, since it is not heavily loaded. */
1280
0
        uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
1281
1282
        /* Needs to check in case the socket pointer is changed in between
1283
         * the holding of upcall_lock.  A known case happens when the main
1284
         * thread deletes the vport while the handler thread is handling
1285
         * the upcall from that port. */
1286
0
        if (dpif->channels[idx].sock) {
1287
0
            pid = nl_sock_pid(dpif->channels[idx].sock);
1288
0
        }
1289
0
    }
1290
1291
0
    return pid;
1292
0
}
1293
1294
static uint32_t
1295
dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no)
1296
0
{
1297
0
    const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1298
0
    uint32_t ret;
1299
1300
    /* In per-cpu dispatch mode, vports do not have an associated PID */
1301
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
1302
        /* In per-cpu dispatch mode, this will be ignored as kernel space will
1303
         * select the PID before sending to user space. We set to
1304
         * DPIF_NETLINK_PER_CPU_PID as 0 is rejected by kernel space as an
1305
         * invalid PID.
1306
         */
1307
0
        return DPIF_NETLINK_PER_CPU_PID;
1308
0
    }
1309
1310
0
    fat_rwlock_rdlock(&dpif->upcall_lock);
1311
0
    ret = dpif_netlink_port_get_pid__(dpif, port_no);
1312
0
    fat_rwlock_unlock(&dpif->upcall_lock);
1313
1314
0
    return ret;
1315
0
}
1316
1317
static int
1318
dpif_netlink_flow_flush(struct dpif *dpif_)
1319
0
{
1320
0
    const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1321
0
    struct dpif_netlink_flow flow;
1322
1323
0
    dpif_netlink_flow_init(&flow);
1324
0
    flow.cmd = OVS_FLOW_CMD_DEL;
1325
0
    flow.dp_ifindex = dpif->dp_ifindex;
1326
1327
0
    return dpif_netlink_flow_transact(&flow, NULL, NULL);
1328
0
}
1329
1330
struct dpif_netlink_port_state {
1331
    struct nl_dump dump;
1332
    struct ofpbuf buf;
1333
};
1334
1335
static void
1336
dpif_netlink_port_dump_start__(const struct dpif_netlink *dpif,
1337
                               struct nl_dump *dump)
1338
0
{
1339
0
    struct dpif_netlink_vport request;
1340
0
    struct ofpbuf *buf;
1341
1342
0
    dpif_netlink_vport_init(&request);
1343
0
    request.cmd = OVS_VPORT_CMD_GET;
1344
0
    request.dp_ifindex = dpif->dp_ifindex;
1345
1346
0
    buf = ofpbuf_new(1024);
1347
0
    dpif_netlink_vport_to_ofpbuf(&request, buf);
1348
0
    nl_dump_start(dump, NETLINK_GENERIC, buf);
1349
0
    ofpbuf_delete(buf);
1350
0
}
1351
1352
static int
1353
dpif_netlink_port_dump_start(const struct dpif *dpif_, void **statep)
1354
0
{
1355
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1356
0
    struct dpif_netlink_port_state *state;
1357
1358
0
    *statep = state = xmalloc(sizeof *state);
1359
0
    dpif_netlink_port_dump_start__(dpif, &state->dump);
1360
1361
0
    ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
1362
0
    return 0;
1363
0
}
1364
1365
static int
1366
dpif_netlink_port_dump_next__(const struct dpif_netlink *dpif,
1367
                              struct nl_dump *dump,
1368
                              struct dpif_netlink_vport *vport,
1369
                              struct ofpbuf *buffer)
1370
0
{
1371
0
    struct ofpbuf buf;
1372
0
    int error;
1373
1374
0
    if (!nl_dump_next(dump, &buf, buffer)) {
1375
0
        return EOF;
1376
0
    }
1377
1378
0
    error = dpif_netlink_vport_from_ofpbuf(vport, &buf);
1379
0
    if (error) {
1380
0
        VLOG_WARN_RL(&error_rl, "%s: failed to parse vport record (%s)",
1381
0
                     dpif_name(&dpif->dpif), ovs_strerror(error));
1382
0
    }
1383
0
    return error;
1384
0
}
1385
1386
static int
1387
dpif_netlink_port_dump_next(const struct dpif *dpif_, void *state_,
1388
                            struct dpif_port *dpif_port)
1389
0
{
1390
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1391
0
    struct dpif_netlink_port_state *state = state_;
1392
0
    struct dpif_netlink_vport vport;
1393
0
    int error;
1394
1395
0
    error = dpif_netlink_port_dump_next__(dpif, &state->dump, &vport,
1396
0
                                          &state->buf);
1397
0
    if (error) {
1398
0
        return error;
1399
0
    }
1400
0
    dpif_port->name = CONST_CAST(char *, vport.name);
1401
0
    dpif_port->type = CONST_CAST(char *, get_vport_type(&vport));
1402
0
    dpif_port->port_no = vport.port_no;
1403
0
    return 0;
1404
0
}
1405
1406
static int
1407
dpif_netlink_port_dump_done(const struct dpif *dpif_ OVS_UNUSED, void *state_)
1408
0
{
1409
0
    struct dpif_netlink_port_state *state = state_;
1410
0
    int error = nl_dump_done(&state->dump);
1411
1412
0
    ofpbuf_uninit(&state->buf);
1413
0
    free(state);
1414
0
    return error;
1415
0
}
1416
1417
static int
1418
dpif_netlink_port_poll(const struct dpif *dpif_, char **devnamep)
1419
0
{
1420
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1421
1422
    /* Lazily create the Netlink socket to listen for notifications. */
1423
0
    if (!dpif->port_notifier) {
1424
0
        struct nl_sock *sock;
1425
0
        int error;
1426
1427
0
        error = nl_sock_create(NETLINK_GENERIC, &sock);
1428
0
        if (error) {
1429
0
            return error;
1430
0
        }
1431
1432
0
        error = nl_sock_join_mcgroup(sock, ovs_vport_mcgroup);
1433
0
        if (error) {
1434
0
            nl_sock_destroy(sock);
1435
0
            return error;
1436
0
        }
1437
0
        dpif->port_notifier = sock;
1438
1439
        /* We have no idea of the current state so report that everything
1440
         * changed. */
1441
0
        return ENOBUFS;
1442
0
    }
1443
1444
0
    for (;;) {
1445
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1446
0
        uint64_t buf_stub[4096 / 8];
1447
0
        struct ofpbuf buf;
1448
0
        int error;
1449
1450
0
        ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
1451
0
        error = nl_sock_recv(dpif->port_notifier, &buf, NULL, false);
1452
0
        if (!error) {
1453
0
            struct dpif_netlink_vport vport;
1454
1455
0
            error = dpif_netlink_vport_from_ofpbuf(&vport, &buf);
1456
0
            if (!error) {
1457
0
                if (vport.dp_ifindex == dpif->dp_ifindex
1458
0
                    && (vport.cmd == OVS_VPORT_CMD_NEW
1459
0
                        || vport.cmd == OVS_VPORT_CMD_DEL
1460
0
                        || vport.cmd == OVS_VPORT_CMD_SET)) {
1461
0
                    VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8,
1462
0
                             dpif->dpif.full_name, vport.name, vport.cmd);
1463
0
                    if (vport.cmd == OVS_VPORT_CMD_DEL && dpif->handlers) {
1464
0
                        dpif->refresh_channels = true;
1465
0
                    }
1466
0
                    *devnamep = xstrdup(vport.name);
1467
0
                    ofpbuf_uninit(&buf);
1468
0
                    return 0;
1469
0
                }
1470
0
            }
1471
0
        } else if (error != EAGAIN) {
1472
0
            VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)",
1473
0
                         ovs_strerror(error));
1474
0
            nl_sock_drain(dpif->port_notifier);
1475
0
            error = ENOBUFS;
1476
0
        }
1477
1478
0
        ofpbuf_uninit(&buf);
1479
0
        if (error) {
1480
0
            return error;
1481
0
        }
1482
0
    }
1483
0
}
1484
1485
static void
1486
dpif_netlink_port_poll_wait(const struct dpif *dpif_)
1487
0
{
1488
0
    const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1489
1490
0
    if (dpif->port_notifier) {
1491
0
        nl_sock_wait(dpif->port_notifier, POLLIN);
1492
0
    } else {
1493
0
        poll_immediate_wake();
1494
0
    }
1495
0
}
1496
1497
static void
1498
dpif_netlink_flow_init_ufid(struct dpif_netlink_flow *request,
1499
                            const ovs_u128 *ufid, bool terse)
1500
0
{
1501
0
    if (ufid) {
1502
0
        request->ufid = *ufid;
1503
0
        request->ufid_present = true;
1504
0
    } else {
1505
0
        request->ufid_present = false;
1506
0
    }
1507
0
    request->ufid_terse = terse;
1508
0
}
1509
1510
static void
1511
dpif_netlink_init_flow_get__(const struct dpif_netlink *dpif,
1512
                             const struct nlattr *key, size_t key_len,
1513
                             const ovs_u128 *ufid, bool terse,
1514
                             struct dpif_netlink_flow *request)
1515
0
{
1516
0
    dpif_netlink_flow_init(request);
1517
0
    request->cmd = OVS_FLOW_CMD_GET;
1518
0
    request->dp_ifindex = dpif->dp_ifindex;
1519
0
    request->key = key;
1520
0
    request->key_len = key_len;
1521
0
    dpif_netlink_flow_init_ufid(request, ufid, terse);
1522
0
}
1523
1524
static void
1525
dpif_netlink_init_flow_get(const struct dpif_netlink *dpif,
1526
                           const struct dpif_flow_get *get,
1527
                           struct dpif_netlink_flow *request)
1528
0
{
1529
0
    dpif_netlink_init_flow_get__(dpif, get->key, get->key_len, get->ufid,
1530
0
                                 false, request);
1531
0
}
1532
1533
static int
1534
dpif_netlink_flow_get__(const struct dpif_netlink *dpif,
1535
                        const struct nlattr *key, size_t key_len,
1536
                        const ovs_u128 *ufid, bool terse,
1537
                        struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1538
0
{
1539
0
    struct dpif_netlink_flow request;
1540
1541
0
    dpif_netlink_init_flow_get__(dpif, key, key_len, ufid, terse, &request);
1542
0
    return dpif_netlink_flow_transact(&request, reply, bufp);
1543
0
}
1544
1545
static int
1546
dpif_netlink_flow_get(const struct dpif_netlink *dpif,
1547
                      const struct dpif_netlink_flow *flow,
1548
                      struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1549
0
{
1550
0
    return dpif_netlink_flow_get__(dpif, flow->key, flow->key_len,
1551
0
                                   flow->ufid_present ? &flow->ufid : NULL,
1552
0
                                   false, reply, bufp);
1553
0
}
1554
1555
static void
1556
dpif_netlink_init_flow_put(struct dpif_netlink *dpif,
1557
                           const struct dpif_flow_put *put,
1558
                           struct dpif_netlink_flow *request)
1559
0
{
1560
0
    static const struct nlattr dummy_action;
1561
1562
0
    dpif_netlink_flow_init(request);
1563
0
    request->cmd = (put->flags & DPIF_FP_CREATE
1564
0
                    ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET);
1565
0
    request->dp_ifindex = dpif->dp_ifindex;
1566
0
    request->key = put->key;
1567
0
    request->key_len = put->key_len;
1568
0
    request->mask = put->mask;
1569
0
    request->mask_len = put->mask_len;
1570
0
    dpif_netlink_flow_init_ufid(request, put->ufid, false);
1571
1572
    /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
1573
0
    request->actions = (put->actions
1574
0
                        ? put->actions
1575
0
                        : CONST_CAST(struct nlattr *, &dummy_action));
1576
0
    request->actions_len = put->actions_len;
1577
0
    if (put->flags & DPIF_FP_ZERO_STATS) {
1578
0
        request->clear = true;
1579
0
    }
1580
0
    if (put->flags & DPIF_FP_PROBE) {
1581
0
        request->probe = true;
1582
0
    }
1583
0
    request->nlmsg_flags = put->flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE;
1584
0
}
1585
1586
static void
1587
dpif_netlink_init_flow_del__(struct dpif_netlink *dpif,
1588
                             const struct nlattr *key, size_t key_len,
1589
                             const ovs_u128 *ufid, bool terse,
1590
                             struct dpif_netlink_flow *request)
1591
0
{
1592
0
    dpif_netlink_flow_init(request);
1593
0
    request->cmd = OVS_FLOW_CMD_DEL;
1594
0
    request->dp_ifindex = dpif->dp_ifindex;
1595
0
    request->key = key;
1596
0
    request->key_len = key_len;
1597
0
    dpif_netlink_flow_init_ufid(request, ufid, terse);
1598
0
}
1599
1600
static void
1601
dpif_netlink_init_flow_del(struct dpif_netlink *dpif,
1602
                           const struct dpif_flow_del *del,
1603
                           struct dpif_netlink_flow *request)
1604
0
{
1605
0
    dpif_netlink_init_flow_del__(dpif, del->key, del->key_len,
1606
0
                                 del->ufid, del->terse, request);
1607
0
}
1608
1609
struct dpif_netlink_flow_dump {
1610
    struct dpif_flow_dump up;
1611
    struct nl_dump nl_dump;
1612
    atomic_int status;
1613
};
1614
1615
static struct dpif_netlink_flow_dump *
1616
dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
1617
0
{
1618
0
    return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
1619
0
}
1620
1621
static struct dpif_flow_dump *
1622
dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse,
1623
                              struct dpif_flow_dump_types *types)
1624
0
{
1625
0
    const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1626
0
    struct dpif_netlink_flow_dump *dump;
1627
0
    struct dpif_netlink_flow request;
1628
0
    struct ofpbuf *buf;
1629
1630
0
    dump = xmalloc(sizeof *dump);
1631
0
    dpif_flow_dump_init(&dump->up, dpif_, terse, types);
1632
1633
0
    dpif_netlink_flow_init(&request);
1634
0
    request.cmd = OVS_FLOW_CMD_GET;
1635
0
    request.dp_ifindex = dpif->dp_ifindex;
1636
0
    request.ufid_present = false;
1637
0
    request.ufid_terse = terse;
1638
1639
0
    buf = ofpbuf_new(1024);
1640
0
    dpif_netlink_flow_to_ofpbuf(&request, buf);
1641
0
    nl_dump_start(&dump->nl_dump, NETLINK_GENERIC, buf);
1642
0
    ofpbuf_delete(buf);
1643
0
    atomic_init(&dump->status, 0);
1644
1645
0
    return &dump->up;
1646
0
}
1647
1648
static int
1649
dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_)
1650
0
{
1651
0
    struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1652
0
    unsigned int nl_status = nl_dump_done(&dump->nl_dump);
1653
0
    int dump_status;
1654
1655
    /* No other thread has access to 'dump' at this point. */
1656
0
    atomic_read_relaxed(&dump->status, &dump_status);
1657
0
    free(dump);
1658
0
    return dump_status ? dump_status : nl_status;
1659
0
}
1660
1661
struct dpif_netlink_flow_dump_thread {
1662
    struct dpif_flow_dump_thread up;
1663
    struct dpif_netlink_flow_dump *dump;
1664
    struct dpif_netlink_flow flow;
1665
    struct dpif_flow_stats stats;
1666
    struct ofpbuf nl_flows;     /* Always used to store flows. */
1667
    struct ofpbuf *nl_actions;  /* Used if kernel does not supply actions. */
1668
};
1669
1670
static struct dpif_netlink_flow_dump_thread *
1671
dpif_netlink_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
1672
0
{
1673
0
    return CONTAINER_OF(thread, struct dpif_netlink_flow_dump_thread, up);
1674
0
}
1675
1676
static struct dpif_flow_dump_thread *
1677
dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump *dump_)
1678
0
{
1679
0
    struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1680
0
    struct dpif_netlink_flow_dump_thread *thread;
1681
1682
0
    thread = xmalloc(sizeof *thread);
1683
0
    dpif_flow_dump_thread_init(&thread->up, &dump->up);
1684
0
    thread->dump = dump;
1685
0
    ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE);
1686
0
    thread->nl_actions = NULL;
1687
1688
0
    return &thread->up;
1689
0
}
1690
1691
static void
1692
dpif_netlink_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
1693
0
{
1694
0
    struct dpif_netlink_flow_dump_thread *thread
1695
0
        = dpif_netlink_flow_dump_thread_cast(thread_);
1696
1697
0
    ofpbuf_uninit(&thread->nl_flows);
1698
0
    ofpbuf_delete(thread->nl_actions);
1699
0
    free(thread);
1700
0
}
1701
1702
static void
1703
dpif_netlink_flow_to_dpif_flow(struct dpif_flow *dpif_flow,
1704
                               const struct dpif_netlink_flow *datapath_flow)
1705
0
{
1706
0
    dpif_flow->key = datapath_flow->key;
1707
0
    dpif_flow->key_len = datapath_flow->key_len;
1708
0
    dpif_flow->mask = datapath_flow->mask;
1709
0
    dpif_flow->mask_len = datapath_flow->mask_len;
1710
0
    dpif_flow->actions = datapath_flow->actions;
1711
0
    dpif_flow->actions_len = datapath_flow->actions_len;
1712
0
    dpif_flow->ufid_present = datapath_flow->ufid_present;
1713
0
    dpif_flow->pmd_id = PMD_ID_NULL;
1714
0
    if (datapath_flow->ufid_present) {
1715
0
        dpif_flow->ufid = datapath_flow->ufid;
1716
0
    } else {
1717
0
        ovs_assert(datapath_flow->key && datapath_flow->key_len);
1718
0
        odp_flow_key_hash(datapath_flow->key, datapath_flow->key_len,
1719
0
                          &dpif_flow->ufid);
1720
0
    }
1721
0
    dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats);
1722
0
    dpif_flow->attrs.offloaded = false;
1723
0
    dpif_flow->attrs.dp_layer = "ovs";
1724
0
    dpif_flow->attrs.dp_extra_info = NULL;
1725
0
}
1726
1727
static int
1728
dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
1729
                            struct dpif_flow *flows, int max_flows)
1730
0
{
1731
0
    struct dpif_netlink_flow_dump_thread *thread
1732
0
        = dpif_netlink_flow_dump_thread_cast(thread_);
1733
0
    struct dpif_netlink_flow_dump *dump = thread->dump;
1734
0
    struct dpif_netlink *dpif = dpif_netlink_cast(thread->up.dump->dpif);
1735
0
    int n_flows = 0;
1736
1737
0
    ofpbuf_delete(thread->nl_actions);
1738
0
    thread->nl_actions = NULL;
1739
1740
0
    while (!n_flows
1741
0
           || (n_flows < max_flows && thread->nl_flows.size)) {
1742
0
        struct dpif_netlink_flow datapath_flow;
1743
0
        struct ofpbuf nl_flow;
1744
0
        int error;
1745
1746
        /* Try to grab another flow. */
1747
0
        if (!nl_dump_next(&dump->nl_dump, &nl_flow, &thread->nl_flows)) {
1748
0
            break;
1749
0
        }
1750
1751
        /* Convert the flow to our output format. */
1752
0
        error = dpif_netlink_flow_from_ofpbuf(&datapath_flow, &nl_flow);
1753
0
        if (error) {
1754
0
            atomic_store_relaxed(&dump->status, error);
1755
0
            break;
1756
0
        }
1757
1758
0
        if (dump->up.terse || datapath_flow.actions) {
1759
            /* Common case: we don't want actions, or the flow includes
1760
             * actions. */
1761
0
            dpif_netlink_flow_to_dpif_flow(&flows[n_flows++], &datapath_flow);
1762
0
        } else {
1763
            /* Rare case: the flow does not include actions.  Retrieve this
1764
             * individual flow again to get the actions. */
1765
0
            error = dpif_netlink_flow_get(dpif, &datapath_flow,
1766
0
                                          &datapath_flow, &thread->nl_actions);
1767
0
            if (error == ENOENT) {
1768
0
                VLOG_DBG("dumped flow disappeared on get");
1769
0
                continue;
1770
0
            } else if (error) {
1771
0
                VLOG_WARN("error fetching dumped flow: %s",
1772
0
                          ovs_strerror(error));
1773
0
                atomic_store_relaxed(&dump->status, error);
1774
0
                break;
1775
0
            }
1776
1777
            /* Save this flow.  Then exit, because we only have one buffer to
1778
             * handle this case. */
1779
0
            dpif_netlink_flow_to_dpif_flow(&flows[n_flows++], &datapath_flow);
1780
0
            break;
1781
0
        }
1782
0
    }
1783
0
    return n_flows;
1784
0
}
1785
1786
static void
1787
dpif_netlink_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec,
1788
                            struct ofpbuf *buf)
1789
0
{
1790
0
    struct ovs_header *k_exec;
1791
0
    size_t key_ofs;
1792
1793
0
    ofpbuf_prealloc_tailroom(buf, (64
1794
0
                                   + dp_packet_size(d_exec->packet)
1795
0
                                   + ODP_KEY_METADATA_SIZE
1796
0
                                   + d_exec->actions_len));
1797
1798
0
    nl_msg_put_genlmsghdr(buf, 0, ovs_packet_family, NLM_F_REQUEST,
1799
0
                          OVS_PACKET_CMD_EXECUTE, OVS_PACKET_VERSION);
1800
1801
0
    k_exec = ofpbuf_put_uninit(buf, sizeof *k_exec);
1802
0
    k_exec->dp_ifindex = dp_ifindex;
1803
1804
0
    nl_msg_put_unspec(buf, OVS_PACKET_ATTR_PACKET,
1805
0
                      dp_packet_data(d_exec->packet),
1806
0
                      dp_packet_size(d_exec->packet));
1807
1808
0
    key_ofs = nl_msg_start_nested(buf, OVS_PACKET_ATTR_KEY);
1809
0
    odp_key_from_dp_packet(buf, d_exec->packet);
1810
0
    nl_msg_end_nested(buf, key_ofs);
1811
1812
0
    nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS,
1813
0
                      d_exec->actions, d_exec->actions_len);
1814
0
    if (d_exec->probe) {
1815
0
        nl_msg_put_flag(buf, OVS_PACKET_ATTR_PROBE);
1816
0
    }
1817
0
    if (d_exec->mtu) {
1818
0
        nl_msg_put_u16(buf, OVS_PACKET_ATTR_MRU, d_exec->mtu);
1819
0
    }
1820
1821
0
    if (d_exec->hash) {
1822
0
        nl_msg_put_u64(buf, OVS_PACKET_ATTR_HASH, d_exec->hash);
1823
0
    }
1824
1825
0
    if (d_exec->upcall_pid) {
1826
0
        nl_msg_put_u32(buf, OVS_PACKET_ATTR_UPCALL_PID, d_exec->upcall_pid);
1827
0
    }
1828
0
}
1829
1830
/* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
1831
 * Returns the number actually executed (at least 1, if 'n_ops' is
1832
 * positive). */
1833
static size_t
1834
dpif_netlink_operate__(struct dpif_netlink *dpif,
1835
                       struct dpif_op **ops, size_t n_ops)
1836
0
{
1837
0
    struct op_auxdata {
1838
0
        struct nl_transaction txn;
1839
1840
0
        struct ofpbuf request;
1841
0
        uint64_t request_stub[1024 / 8];
1842
1843
0
        struct ofpbuf reply;
1844
0
        uint64_t reply_stub[1024 / 8];
1845
0
    } auxes[OPERATE_MAX_OPS];
1846
1847
0
    struct nl_transaction *txnsp[OPERATE_MAX_OPS];
1848
0
    size_t i;
1849
1850
0
    n_ops = MIN(n_ops, OPERATE_MAX_OPS);
1851
0
    for (i = 0; i < n_ops; i++) {
1852
0
        struct op_auxdata *aux = &auxes[i];
1853
0
        struct dpif_op *op = ops[i];
1854
0
        struct dpif_flow_put *put;
1855
0
        struct dpif_flow_del *del;
1856
0
        struct dpif_flow_get *get;
1857
0
        struct dpif_netlink_flow flow;
1858
1859
0
        ofpbuf_use_stub(&aux->request,
1860
0
                        aux->request_stub, sizeof aux->request_stub);
1861
0
        aux->txn.request = &aux->request;
1862
1863
0
        ofpbuf_use_stub(&aux->reply, aux->reply_stub, sizeof aux->reply_stub);
1864
0
        aux->txn.reply = NULL;
1865
1866
0
        switch (op->type) {
1867
0
        case DPIF_OP_FLOW_PUT:
1868
0
            put = &op->flow_put;
1869
0
            dpif_netlink_init_flow_put(dpif, put, &flow);
1870
0
            if (put->stats) {
1871
0
                flow.nlmsg_flags |= NLM_F_ECHO;
1872
0
                aux->txn.reply = &aux->reply;
1873
0
            }
1874
0
            dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
1875
1876
0
            OVS_USDT_PROBE(dpif_netlink_operate__, op_flow_put,
1877
0
                           dpif, put, &flow, &aux->request);
1878
0
            break;
1879
1880
0
        case DPIF_OP_FLOW_DEL:
1881
0
            del = &op->flow_del;
1882
0
            dpif_netlink_init_flow_del(dpif, del, &flow);
1883
0
            if (del->stats) {
1884
0
                flow.nlmsg_flags |= NLM_F_ECHO;
1885
0
                aux->txn.reply = &aux->reply;
1886
0
            }
1887
0
            dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
1888
1889
0
            OVS_USDT_PROBE(dpif_netlink_operate__, op_flow_del,
1890
0
                           dpif, del, &flow, &aux->request);
1891
0
            break;
1892
1893
0
        case DPIF_OP_EXECUTE:
1894
            /* Can't execute a packet that won't fit in a Netlink attribute. */
1895
0
            if (OVS_UNLIKELY(nl_attr_oversized(
1896
0
                                 dp_packet_size(op->execute.packet)))) {
1897
                /* Report an error immediately if this is the first operation.
1898
                 * Otherwise the easiest thing to do is to postpone to the next
1899
                 * call (when this will be the first operation). */
1900
0
                if (i == 0) {
1901
0
                    VLOG_ERR_RL(&error_rl,
1902
0
                                "dropping oversized %"PRIu32"-byte packet",
1903
0
                                dp_packet_size(op->execute.packet));
1904
0
                    op->error = ENOBUFS;
1905
0
                    return 1;
1906
0
                }
1907
0
                n_ops = i;
1908
0
            } else {
1909
0
                dpif_netlink_encode_execute(dpif->dp_ifindex, &op->execute,
1910
0
                                            &aux->request);
1911
1912
0
                OVS_USDT_PROBE(dpif_netlink_operate__, op_flow_execute,
1913
0
                               dpif, &op->execute,
1914
0
                               dp_packet_data(op->execute.packet),
1915
0
                               dp_packet_size(op->execute.packet),
1916
0
                               &aux->request);
1917
0
            }
1918
0
            break;
1919
1920
0
        case DPIF_OP_FLOW_GET:
1921
0
            get = &op->flow_get;
1922
0
            dpif_netlink_init_flow_get(dpif, get, &flow);
1923
0
            aux->txn.reply = get->buffer;
1924
0
            dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
1925
1926
0
            OVS_USDT_PROBE(dpif_netlink_operate__, op_flow_get,
1927
0
                           dpif, get, &flow, &aux->request);
1928
0
            break;
1929
1930
0
        default:
1931
0
            OVS_NOT_REACHED();
1932
0
        }
1933
0
    }
1934
1935
0
    for (i = 0; i < n_ops; i++) {
1936
0
        txnsp[i] = &auxes[i].txn;
1937
0
    }
1938
0
    nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops);
1939
1940
0
    for (i = 0; i < n_ops; i++) {
1941
0
        struct op_auxdata *aux = &auxes[i];
1942
0
        struct nl_transaction *txn = &auxes[i].txn;
1943
0
        struct dpif_op *op = ops[i];
1944
0
        struct dpif_flow_put *put;
1945
0
        struct dpif_flow_del *del;
1946
0
        struct dpif_flow_get *get;
1947
1948
0
        op->error = txn->error;
1949
1950
0
        switch (op->type) {
1951
0
        case DPIF_OP_FLOW_PUT:
1952
0
            put = &op->flow_put;
1953
0
            if (put->stats) {
1954
0
                if (!op->error) {
1955
0
                    struct dpif_netlink_flow reply;
1956
1957
0
                    op->error = dpif_netlink_flow_from_ofpbuf(&reply,
1958
0
                                                              txn->reply);
1959
0
                    if (!op->error) {
1960
0
                        dpif_netlink_flow_get_stats(&reply, put->stats);
1961
0
                    }
1962
0
                }
1963
0
            }
1964
0
            break;
1965
1966
0
        case DPIF_OP_FLOW_DEL:
1967
0
            del = &op->flow_del;
1968
0
            if (del->stats) {
1969
0
                if (!op->error) {
1970
0
                    struct dpif_netlink_flow reply;
1971
1972
0
                    op->error = dpif_netlink_flow_from_ofpbuf(&reply,
1973
0
                                                              txn->reply);
1974
0
                    if (!op->error) {
1975
0
                        dpif_netlink_flow_get_stats(&reply, del->stats);
1976
0
                    }
1977
0
                }
1978
0
            }
1979
0
            break;
1980
1981
0
        case DPIF_OP_EXECUTE:
1982
0
            break;
1983
1984
0
        case DPIF_OP_FLOW_GET:
1985
0
            get = &op->flow_get;
1986
0
            if (!op->error) {
1987
0
                struct dpif_netlink_flow reply;
1988
1989
0
                op->error = dpif_netlink_flow_from_ofpbuf(&reply, txn->reply);
1990
0
                if (!op->error) {
1991
0
                    dpif_netlink_flow_to_dpif_flow(get->flow, &reply);
1992
0
                }
1993
0
            }
1994
0
            break;
1995
1996
0
        default:
1997
0
            OVS_NOT_REACHED();
1998
0
        }
1999
2000
0
        ofpbuf_uninit(&aux->request);
2001
0
        ofpbuf_uninit(&aux->reply);
2002
0
    }
2003
2004
0
    return n_ops;
2005
0
}
2006
2007
static void
2008
dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
2009
0
{
2010
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2011
2012
0
    while (n_ops > 0) {
2013
0
        size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops);
2014
2015
0
        ops += chunk;
2016
0
        n_ops -= chunk;
2017
0
    }
2018
0
}
2019
2020
#if _WIN32
2021
static void
2022
dpif_netlink_handler_uninit(struct dpif_handler *handler)
2023
{
2024
    vport_delete_sock_pool(handler);
2025
}
2026
2027
static int
2028
dpif_netlink_handler_init(struct dpif_handler *handler)
2029
{
2030
    return vport_create_sock_pool(handler);
2031
}
2032
#else
2033
2034
static int
2035
dpif_netlink_handler_init(struct dpif_handler *handler)
2036
0
{
2037
0
    handler->epoll_fd = epoll_create(10);
2038
0
    return handler->epoll_fd < 0 ? errno : 0;
2039
0
}
2040
2041
static void
2042
dpif_netlink_handler_uninit(struct dpif_handler *handler)
2043
0
{
2044
0
    close(handler->epoll_fd);
2045
0
}
2046
#endif
2047
2048
/* Returns true if num is a prime number,
2049
 * otherwise, return false.
2050
 */
2051
static bool
2052
is_prime(uint32_t num)
2053
0
{
2054
0
    if (num == 2) {
2055
0
        return true;
2056
0
    }
2057
2058
0
    if (num < 2) {
2059
0
        return false;
2060
0
    }
2061
2062
0
    if (num % 2 == 0) {
2063
0
        return false;
2064
0
    }
2065
2066
0
    for (uint64_t i = 3; i * i <= num; i += 2) {
2067
0
        if (num % i == 0) {
2068
0
            return false;
2069
0
        }
2070
0
    }
2071
2072
0
    return true;
2073
0
}
2074
2075
/* Returns start if start is a prime number.  Otherwise returns the next
2076
 * prime greater than start.  Search is limited by UINT32_MAX.
2077
 *
2078
 * Returns 0 if no prime has been found between start and UINT32_MAX.
2079
 */
2080
static uint32_t
2081
next_prime(uint32_t start)
2082
0
{
2083
0
    if (start <= 2) {
2084
0
        return 2;
2085
0
    }
2086
2087
0
    for (uint32_t i = start; i < UINT32_MAX; i++) {
2088
0
        if (is_prime(i)) {
2089
0
            return i;
2090
0
        }
2091
0
    }
2092
2093
0
    return 0;
2094
0
}
2095
2096
/* Calculates and returns the number of handler threads needed based
2097
 * the following formula:
2098
 *
2099
 * handlers_n = min(next_prime(active_cores + 1), total_cores)
2100
 */
2101
static uint32_t
2102
dpif_netlink_calculate_n_handlers(void)
2103
0
{
2104
0
    uint32_t total_cores = count_total_cores();
2105
0
    uint32_t n_handlers = count_cpu_cores();
2106
0
    uint32_t next_prime_num;
2107
2108
    /* If not all cores are available to OVS, create additional handler
2109
     * threads to ensure more fair distribution of load between them.
2110
     */
2111
0
    if (n_handlers < total_cores && total_cores > 2) {
2112
0
        next_prime_num = next_prime(n_handlers + 1);
2113
0
        n_handlers = MIN(next_prime_num, total_cores);
2114
0
    }
2115
2116
0
    return MAX(n_handlers, 1);
2117
0
}
2118
2119
static int
2120
dpif_netlink_refresh_handlers_cpu_dispatch(struct dpif_netlink *dpif)
2121
    OVS_REQ_WRLOCK(dpif->upcall_lock)
2122
0
{
2123
0
    int handler_id;
2124
0
    int error = 0;
2125
0
    uint32_t n_handlers;
2126
0
    uint32_t *upcall_pids;
2127
2128
0
    n_handlers = dpif_netlink_calculate_n_handlers();
2129
0
    if (dpif->n_handlers != n_handlers) {
2130
0
        VLOG_DBG("Dispatch mode(per-cpu): initializing %d handlers",
2131
0
                   n_handlers);
2132
0
        destroy_all_handlers(dpif);
2133
0
        upcall_pids = xzalloc(n_handlers * sizeof *upcall_pids);
2134
0
        dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers);
2135
0
        for (handler_id = 0; handler_id < n_handlers; handler_id++) {
2136
0
            struct dpif_handler *handler = &dpif->handlers[handler_id];
2137
0
            error = create_nl_sock(dpif, &handler->sock);
2138
0
            if (error) {
2139
0
                VLOG_ERR("Dispatch mode(per-cpu): Cannot create socket for"
2140
0
                         "handler %d", handler_id);
2141
0
                continue;
2142
0
            }
2143
0
            upcall_pids[handler_id] = nl_sock_pid(handler->sock);
2144
0
            VLOG_DBG("Dispatch mode(per-cpu): "
2145
0
                      "handler %d has Netlink PID of %u",
2146
0
                      handler_id, upcall_pids[handler_id]);
2147
0
        }
2148
2149
0
        dpif->n_handlers = n_handlers;
2150
0
        error = dpif_netlink_set_handler_pids(&dpif->dpif, upcall_pids,
2151
0
                                              n_handlers);
2152
0
        free(upcall_pids);
2153
0
    }
2154
0
    return error;
2155
0
}
2156
2157
/* Synchronizes 'channels' in 'dpif->handlers'  with the set of vports
2158
 * currently in 'dpif' in the kernel, by adding a new set of channels for
2159
 * any kernel vport that lacks one and deleting any channels that have no
2160
 * backing kernel vports. */
2161
static int
2162
dpif_netlink_refresh_handlers_vport_dispatch(struct dpif_netlink *dpif,
2163
                                             uint32_t n_handlers)
2164
    OVS_REQ_WRLOCK(dpif->upcall_lock)
2165
0
{
2166
0
    unsigned long int *keep_channels;
2167
0
    struct dpif_netlink_vport vport;
2168
0
    size_t keep_channels_nbits;
2169
0
    struct nl_dump dump;
2170
0
    uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
2171
0
    struct ofpbuf buf;
2172
0
    int retval = 0;
2173
0
    size_t i;
2174
2175
0
    ovs_assert(!WINDOWS || n_handlers <= 1);
2176
0
    ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
2177
2178
0
    if (dpif->n_handlers != n_handlers) {
2179
0
        destroy_all_channels(dpif);
2180
0
        dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers);
2181
0
        for (i = 0; i < n_handlers; i++) {
2182
0
            int error;
2183
0
            struct dpif_handler *handler = &dpif->handlers[i];
2184
2185
0
            error = dpif_netlink_handler_init(handler);
2186
0
            if (error) {
2187
0
                size_t j;
2188
2189
0
                for (j = 0; j < i; j++) {
2190
0
                    struct dpif_handler *tmp = &dpif->handlers[j];
2191
0
                    dpif_netlink_handler_uninit(tmp);
2192
0
                }
2193
0
                free(dpif->handlers);
2194
0
                dpif->handlers = NULL;
2195
2196
0
                return error;
2197
0
            }
2198
0
        }
2199
0
        dpif->n_handlers = n_handlers;
2200
0
    }
2201
2202
0
    for (i = 0; i < n_handlers; i++) {
2203
0
        struct dpif_handler *handler = &dpif->handlers[i];
2204
2205
0
        handler->event_offset = handler->n_events = 0;
2206
0
    }
2207
2208
0
    keep_channels_nbits = dpif->uc_array_size;
2209
0
    keep_channels = bitmap_allocate(keep_channels_nbits);
2210
2211
0
    ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
2212
0
    dpif_netlink_port_dump_start__(dpif, &dump);
2213
0
    while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
2214
0
        uint32_t port_no = odp_to_u32(vport.port_no);
2215
0
        uint32_t upcall_pid;
2216
0
        int error;
2217
2218
0
        if (port_no >= dpif->uc_array_size
2219
0
            || !vport_get_pid(dpif, port_no, &upcall_pid)) {
2220
0
            struct nl_sock *sock;
2221
0
            error = create_nl_sock(dpif, &sock);
2222
2223
0
            if (error) {
2224
0
                goto error;
2225
0
            }
2226
2227
0
            error = vport_add_channel(dpif, vport.port_no, sock);
2228
0
            if (error) {
2229
0
                VLOG_INFO("%s: could not add channels for port %s",
2230
0
                          dpif_name(&dpif->dpif), vport.name);
2231
0
                nl_sock_destroy(sock);
2232
0
                retval = error;
2233
0
                goto error;
2234
0
            }
2235
0
            upcall_pid = nl_sock_pid(sock);
2236
0
        }
2237
2238
        /* Configure the vport to deliver misses to 'sock'. */
2239
0
        if (vport.upcall_pids[0] == 0
2240
0
            || vport.n_upcall_pids != 1
2241
0
            || upcall_pid != vport.upcall_pids[0]) {
2242
0
            struct dpif_netlink_vport vport_request;
2243
2244
0
            dpif_netlink_vport_init(&vport_request);
2245
0
            vport_request.cmd = OVS_VPORT_CMD_SET;
2246
0
            vport_request.dp_ifindex = dpif->dp_ifindex;
2247
0
            vport_request.port_no = vport.port_no;
2248
0
            vport_request.n_upcall_pids = 1;
2249
0
            vport_request.upcall_pids = &upcall_pid;
2250
0
            error = dpif_netlink_vport_transact(&vport_request, NULL, NULL);
2251
0
            if (error) {
2252
0
                VLOG_WARN_RL(&error_rl,
2253
0
                             "%s: failed to set upcall pid on port: %s",
2254
0
                             dpif_name(&dpif->dpif), ovs_strerror(error));
2255
2256
0
                if (error != ENODEV && error != ENOENT) {
2257
0
                    retval = error;
2258
0
                } else {
2259
                    /* The vport isn't really there, even though the dump says
2260
                     * it is.  Probably we just hit a race after a port
2261
                     * disappeared. */
2262
0
                }
2263
0
                goto error;
2264
0
            }
2265
0
        }
2266
2267
0
        if (port_no < keep_channels_nbits) {
2268
0
            bitmap_set1(keep_channels, port_no);
2269
0
        }
2270
0
        continue;
2271
2272
0
    error:
2273
0
        vport_del_channels(dpif, vport.port_no);
2274
0
    }
2275
0
    nl_dump_done(&dump);
2276
0
    ofpbuf_uninit(&buf);
2277
2278
    /* Discard any saved channels that we didn't reuse. */
2279
0
    for (i = 0; i < keep_channels_nbits; i++) {
2280
0
        if (!bitmap_is_set(keep_channels, i)) {
2281
0
            vport_del_channels(dpif, u32_to_odp(i));
2282
0
        }
2283
0
    }
2284
0
    free(keep_channels);
2285
2286
0
    return retval;
2287
0
}
2288
2289
static int
2290
dpif_netlink_recv_set_vport_dispatch(struct dpif_netlink *dpif, bool enable)
2291
    OVS_REQ_WRLOCK(dpif->upcall_lock)
2292
0
{
2293
0
    if ((dpif->handlers != NULL) == enable) {
2294
0
        return 0;
2295
0
    } else if (!enable) {
2296
0
        destroy_all_channels(dpif);
2297
0
        return 0;
2298
0
    } else {
2299
0
        return dpif_netlink_refresh_handlers_vport_dispatch(dpif, 1);
2300
0
    }
2301
0
}
2302
2303
static int
2304
dpif_netlink_recv_set_cpu_dispatch(struct dpif_netlink *dpif, bool enable)
2305
    OVS_REQ_WRLOCK(dpif->upcall_lock)
2306
0
{
2307
0
    if ((dpif->handlers != NULL) == enable) {
2308
0
        return 0;
2309
0
    } else if (!enable) {
2310
0
        destroy_all_handlers(dpif);
2311
0
        return 0;
2312
0
    } else {
2313
0
        return dpif_netlink_refresh_handlers_cpu_dispatch(dpif);
2314
0
    }
2315
0
}
2316
2317
static int
2318
dpif_netlink_recv_set(struct dpif *dpif_, bool enable)
2319
0
{
2320
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2321
0
    int error;
2322
2323
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
2324
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
2325
0
        error = dpif_netlink_recv_set_cpu_dispatch(dpif, enable);
2326
0
    } else {
2327
0
        error = dpif_netlink_recv_set_vport_dispatch(dpif, enable);
2328
0
    }
2329
0
    fat_rwlock_unlock(&dpif->upcall_lock);
2330
2331
0
    return error;
2332
0
}
2333
2334
static int
2335
dpif_netlink_handlers_set(struct dpif *dpif_, uint32_t n_handlers)
2336
0
{
2337
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2338
0
    int error = 0;
2339
2340
#ifdef _WIN32
2341
    /* Multiple upcall handlers will be supported once kernel datapath supports
2342
     * it. */
2343
    if (n_handlers > 1) {
2344
        return error;
2345
    }
2346
#endif
2347
2348
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
2349
0
    if (dpif->handlers) {
2350
0
        if (dpif_netlink_upcall_per_cpu(dpif)) {
2351
0
            error = dpif_netlink_refresh_handlers_cpu_dispatch(dpif);
2352
0
        } else {
2353
0
            error = dpif_netlink_refresh_handlers_vport_dispatch(dpif,
2354
0
                                                                 n_handlers);
2355
0
        }
2356
0
    }
2357
0
    fat_rwlock_unlock(&dpif->upcall_lock);
2358
2359
0
    return error;
2360
0
}
2361
2362
static bool
2363
dpif_netlink_number_handlers_required(struct dpif *dpif_, uint32_t *n_handlers)
2364
0
{
2365
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2366
2367
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
2368
0
        *n_handlers = dpif_netlink_calculate_n_handlers();
2369
0
        return true;
2370
0
    }
2371
2372
0
    return false;
2373
0
}
2374
2375
static int
2376
dpif_netlink_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
2377
                             uint32_t queue_id, uint32_t *priority)
2378
0
{
2379
0
    if (queue_id < 0xf000) {
2380
0
        *priority = TC_H_MAKE(1 << 16, queue_id + 1);
2381
0
        return 0;
2382
0
    } else {
2383
0
        return EINVAL;
2384
0
    }
2385
0
}
2386
2387
static int
2388
parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall,
2389
                 int *dp_ifindex)
2390
0
{
2391
0
    static const struct nl_policy ovs_packet_policy[] = {
2392
        /* Always present. */
2393
0
        [OVS_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
2394
0
                                     .min_len = ETH_HEADER_LEN },
2395
0
        [OVS_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },
2396
2397
        /* OVS_PACKET_CMD_ACTION only. */
2398
0
        [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true },
2399
0
        [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = { .type = NL_A_NESTED, .optional = true },
2400
0
        [OVS_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
2401
0
        [OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = true },
2402
0
        [OVS_PACKET_ATTR_HASH] = { .type = NL_A_U64, .optional = true }
2403
0
    };
2404
2405
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2406
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2407
0
    struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2408
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2409
2410
0
    struct nlattr *a[ARRAY_SIZE(ovs_packet_policy)];
2411
0
    if (!nlmsg || !genl || !ovs_header
2412
0
        || nlmsg->nlmsg_type != ovs_packet_family
2413
0
        || !nl_policy_parse(&b, 0, ovs_packet_policy, a,
2414
0
                            ARRAY_SIZE(ovs_packet_policy))) {
2415
0
        return EINVAL;
2416
0
    }
2417
2418
0
    int type = (genl->cmd == OVS_PACKET_CMD_MISS ? DPIF_UC_MISS
2419
0
                : genl->cmd == OVS_PACKET_CMD_ACTION ? DPIF_UC_ACTION
2420
0
                : -1);
2421
0
    if (type < 0) {
2422
0
        return EINVAL;
2423
0
    }
2424
2425
    /* (Re)set ALL fields of '*upcall' on successful return. */
2426
0
    upcall->type = type;
2427
0
    upcall->key = CONST_CAST(struct nlattr *,
2428
0
                             nl_attr_get(a[OVS_PACKET_ATTR_KEY]));
2429
0
    upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]);
2430
0
    odp_flow_key_hash(upcall->key, upcall->key_len, &upcall->ufid);
2431
0
    upcall->userdata = a[OVS_PACKET_ATTR_USERDATA];
2432
0
    upcall->out_tun_key = a[OVS_PACKET_ATTR_EGRESS_TUN_KEY];
2433
0
    upcall->actions = a[OVS_PACKET_ATTR_ACTIONS];
2434
0
    upcall->mru = a[OVS_PACKET_ATTR_MRU];
2435
0
    upcall->hash = a[OVS_PACKET_ATTR_HASH];
2436
2437
    /* Allow overwriting the netlink attribute header without reallocating. */
2438
0
    dp_packet_use_stub(&upcall->packet,
2439
0
                    CONST_CAST(struct nlattr *,
2440
0
                               nl_attr_get(a[OVS_PACKET_ATTR_PACKET])) - 1,
2441
0
                    nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]) +
2442
0
                    sizeof(struct nlattr));
2443
0
    dp_packet_set_data(&upcall->packet,
2444
0
                    (char *)dp_packet_data(&upcall->packet) + sizeof(struct nlattr));
2445
0
    dp_packet_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]));
2446
2447
0
    if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) {
2448
        /* Ethernet frame */
2449
0
        upcall->packet.packet_type = htonl(PT_ETH);
2450
0
    } else {
2451
        /* Non-Ethernet packet. Get the Ethertype from the NL attributes */
2452
0
        ovs_be16 ethertype = 0;
2453
0
        const struct nlattr *et_nla = nl_attr_find__(upcall->key,
2454
0
                                                     upcall->key_len,
2455
0
                                                     OVS_KEY_ATTR_ETHERTYPE);
2456
0
        if (et_nla) {
2457
0
            ethertype = nl_attr_get_be16(et_nla);
2458
0
        }
2459
0
        upcall->packet.packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE,
2460
0
                                                    ntohs(ethertype));
2461
0
        dp_packet_set_l3(&upcall->packet, dp_packet_data(&upcall->packet));
2462
0
    }
2463
2464
0
    *dp_ifindex = ovs_header->dp_ifindex;
2465
2466
0
    return 0;
2467
0
}
2468
2469
#ifdef _WIN32
2470
#define PACKET_RECV_BATCH_SIZE 50
2471
static int
2472
dpif_netlink_recv_windows(struct dpif_netlink *dpif, uint32_t handler_id,
2473
                          struct dpif_upcall *upcall, struct ofpbuf *buf)
2474
    OVS_REQ_RDLOCK(dpif->upcall_lock)
2475
{
2476
    struct dpif_handler *handler;
2477
    int read_tries = 0;
2478
    struct dpif_windows_vport_sock *sock_pool;
2479
    uint32_t i;
2480
2481
    if (!dpif->handlers) {
2482
        return EAGAIN;
2483
    }
2484
2485
    /* Only one handler is supported currently. */
2486
    if (handler_id >= 1) {
2487
        return EAGAIN;
2488
    }
2489
2490
    if (handler_id >= dpif->n_handlers) {
2491
        return EAGAIN;
2492
    }
2493
2494
    handler = &dpif->handlers[handler_id];
2495
    sock_pool = handler->vport_sock_pool;
2496
2497
    for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2498
        for (;;) {
2499
            int dp_ifindex;
2500
            int error;
2501
2502
            if (++read_tries > PACKET_RECV_BATCH_SIZE) {
2503
                return EAGAIN;
2504
            }
2505
2506
            error = nl_sock_recv(sock_pool[i].nl_sock, buf, NULL, false);
2507
            if (error == ENOBUFS) {
2508
                /* ENOBUFS typically means that we've received so many
2509
                 * packets that the buffer overflowed.  Try again
2510
                 * immediately because there's almost certainly a packet
2511
                 * waiting for us. */
2512
                /* XXX: report_loss(dpif, ch, idx, handler_id); */
2513
                continue;
2514
            }
2515
2516
            /* XXX: ch->last_poll = time_msec(); */
2517
            if (error) {
2518
                if (error == EAGAIN) {
2519
                    break;
2520
                }
2521
                return error;
2522
            }
2523
2524
            error = parse_odp_packet(buf, upcall, &dp_ifindex);
2525
            if (!error && dp_ifindex == dpif->dp_ifindex) {
2526
                upcall->pid = 0;
2527
                return 0;
2528
            } else if (error) {
2529
                return error;
2530
            }
2531
        }
2532
    }
2533
2534
    return EAGAIN;
2535
}
2536
#else
2537
static int
2538
dpif_netlink_recv_cpu_dispatch(struct dpif_netlink *dpif, uint32_t handler_id,
2539
                               struct dpif_upcall *upcall, struct ofpbuf *buf)
2540
    OVS_REQ_RDLOCK(dpif->upcall_lock)
2541
0
{
2542
0
    struct dpif_handler *handler;
2543
0
    int read_tries = 0;
2544
2545
0
    if (!dpif->handlers || handler_id >= dpif->n_handlers) {
2546
0
        return EAGAIN;
2547
0
    }
2548
2549
0
    handler = &dpif->handlers[handler_id];
2550
2551
0
    for (;;) {
2552
0
        int dp_ifindex;
2553
0
        int error;
2554
2555
0
        if (++read_tries > 50) {
2556
0
            return EAGAIN;
2557
0
        }
2558
0
        error = nl_sock_recv(handler->sock, buf, NULL, false);
2559
0
        if (error == ENOBUFS) {
2560
            /* ENOBUFS typically means that we've received so many
2561
             * packets that the buffer overflowed.  Try again
2562
             * immediately because there's almost certainly a packet
2563
             * waiting for us. */
2564
0
            report_loss(dpif, NULL, 0, handler_id);
2565
0
            continue;
2566
0
        }
2567
2568
0
        if (error) {
2569
0
            if (error == EAGAIN) {
2570
0
                break;
2571
0
            }
2572
0
            return error;
2573
0
        }
2574
2575
0
        error = parse_odp_packet(buf, upcall, &dp_ifindex);
2576
0
        if (!error && dp_ifindex == dpif->dp_ifindex) {
2577
0
            upcall->pid = nl_sock_pid(handler->sock);
2578
0
            return 0;
2579
0
        } else if (error) {
2580
0
            return error;
2581
0
        }
2582
0
    }
2583
2584
0
    return EAGAIN;
2585
0
}
2586
2587
static int
2588
dpif_netlink_recv_vport_dispatch(struct dpif_netlink *dpif,
2589
                                 uint32_t handler_id,
2590
                                 struct dpif_upcall *upcall,
2591
                                 struct ofpbuf *buf)
2592
    OVS_REQ_RDLOCK(dpif->upcall_lock)
2593
0
{
2594
0
    struct dpif_handler *handler;
2595
0
    int read_tries = 0;
2596
2597
0
    if (!dpif->handlers || handler_id >= dpif->n_handlers) {
2598
0
        return EAGAIN;
2599
0
    }
2600
2601
0
    handler = &dpif->handlers[handler_id];
2602
0
    if (handler->event_offset >= handler->n_events) {
2603
0
        int retval;
2604
2605
0
        handler->event_offset = handler->n_events = 0;
2606
2607
0
        do {
2608
0
            retval = epoll_wait(handler->epoll_fd, handler->epoll_events,
2609
0
                                dpif->uc_array_size, 0);
2610
0
        } while (retval < 0 && errno == EINTR);
2611
2612
0
        if (retval < 0) {
2613
0
            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2614
0
            VLOG_WARN_RL(&rl, "epoll_wait failed (%s)", ovs_strerror(errno));
2615
0
        } else if (retval > 0) {
2616
0
            handler->n_events = retval;
2617
0
        }
2618
0
    }
2619
2620
0
    while (handler->event_offset < handler->n_events) {
2621
0
        int idx = handler->epoll_events[handler->event_offset].data.u32;
2622
0
        struct dpif_channel *ch = &dpif->channels[idx];
2623
2624
0
        handler->event_offset++;
2625
2626
0
        for (;;) {
2627
0
            int dp_ifindex;
2628
0
            int error;
2629
2630
0
            if (++read_tries > 50) {
2631
0
                return EAGAIN;
2632
0
            }
2633
2634
0
            error = nl_sock_recv(ch->sock, buf, NULL, false);
2635
0
            if (error == ENOBUFS) {
2636
                /* ENOBUFS typically means that we've received so many
2637
                 * packets that the buffer overflowed.  Try again
2638
                 * immediately because there's almost certainly a packet
2639
                 * waiting for us. */
2640
0
                report_loss(dpif, ch, idx, handler_id);
2641
0
                continue;
2642
0
            }
2643
2644
0
            ch->last_poll = time_msec();
2645
0
            if (error) {
2646
0
                if (error == EAGAIN) {
2647
0
                    break;
2648
0
                }
2649
0
                return error;
2650
0
            }
2651
2652
0
            error = parse_odp_packet(buf, upcall, &dp_ifindex);
2653
0
            if (!error && dp_ifindex == dpif->dp_ifindex) {
2654
0
                upcall->pid = nl_sock_pid(ch->sock);
2655
0
                return 0;
2656
0
            } else if (error) {
2657
0
                return error;
2658
0
            }
2659
0
        }
2660
0
    }
2661
2662
0
    return EAGAIN;
2663
0
}
2664
#endif
2665
2666
static int
2667
dpif_netlink_recv(struct dpif *dpif_, uint32_t handler_id,
2668
                  struct dpif_upcall *upcall, struct ofpbuf *buf)
2669
0
{
2670
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2671
0
    int error;
2672
2673
0
    fat_rwlock_rdlock(&dpif->upcall_lock);
2674
#ifdef _WIN32
2675
    error = dpif_netlink_recv_windows(dpif, handler_id, upcall, buf);
2676
#else
2677
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
2678
0
        error = dpif_netlink_recv_cpu_dispatch(dpif, handler_id, upcall, buf);
2679
0
    } else {
2680
0
        error = dpif_netlink_recv_vport_dispatch(dpif,
2681
0
                                                 handler_id, upcall, buf);
2682
0
    }
2683
0
#endif
2684
0
    fat_rwlock_unlock(&dpif->upcall_lock);
2685
2686
0
    return error;
2687
0
}
2688
2689
#ifdef _WIN32
2690
static void
2691
dpif_netlink_recv_wait_windows(struct dpif_netlink *dpif, uint32_t handler_id)
2692
    OVS_REQ_RDLOCK(dpif->upcall_lock)
2693
{
2694
    uint32_t i;
2695
    struct dpif_windows_vport_sock *sock_pool =
2696
        dpif->handlers[handler_id].vport_sock_pool;
2697
2698
    /* Only one handler is supported currently. */
2699
    if (handler_id >= 1) {
2700
        return;
2701
    }
2702
2703
    for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2704
        nl_sock_wait(sock_pool[i].nl_sock, POLLIN);
2705
    }
2706
}
2707
#else
2708
2709
static void
2710
dpif_netlink_recv_wait_vport_dispatch(struct dpif_netlink *dpif,
2711
                                      uint32_t handler_id)
2712
    OVS_REQ_RDLOCK(dpif->upcall_lock)
2713
0
{
2714
0
    if (dpif->handlers && handler_id < dpif->n_handlers) {
2715
0
        struct dpif_handler *handler = &dpif->handlers[handler_id];
2716
2717
0
        poll_fd_wait(handler->epoll_fd, POLLIN);
2718
0
    }
2719
0
}
2720
2721
static void
2722
dpif_netlink_recv_wait_cpu_dispatch(struct dpif_netlink *dpif,
2723
                                    uint32_t handler_id)
2724
    OVS_REQ_RDLOCK(dpif->upcall_lock)
2725
0
{
2726
0
    if (dpif->handlers && handler_id < dpif->n_handlers) {
2727
0
        struct dpif_handler *handler = &dpif->handlers[handler_id];
2728
2729
0
        poll_fd_wait(nl_sock_fd(handler->sock), POLLIN);
2730
0
    }
2731
0
}
2732
#endif
2733
2734
static void
2735
dpif_netlink_recv_wait(struct dpif *dpif_, uint32_t handler_id)
2736
0
{
2737
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2738
2739
0
    fat_rwlock_rdlock(&dpif->upcall_lock);
2740
#ifdef _WIN32
2741
    dpif_netlink_recv_wait_windows(dpif, handler_id);
2742
#else
2743
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
2744
0
        dpif_netlink_recv_wait_cpu_dispatch(dpif, handler_id);
2745
0
    } else {
2746
0
        dpif_netlink_recv_wait_vport_dispatch(dpif, handler_id);
2747
0
    }
2748
0
#endif
2749
0
    fat_rwlock_unlock(&dpif->upcall_lock);
2750
0
}
2751
2752
static void
2753
dpif_netlink_recv_purge_vport_dispatch(struct dpif_netlink *dpif)
2754
    OVS_REQ_WRLOCK(dpif->upcall_lock)
2755
0
{
2756
0
    if (dpif->handlers) {
2757
0
        size_t i;
2758
2759
0
        if (!dpif->channels[0].sock) {
2760
0
            return;
2761
0
        }
2762
0
        for (i = 0; i < dpif->uc_array_size; i++ ) {
2763
2764
0
            nl_sock_drain(dpif->channels[i].sock);
2765
0
        }
2766
0
    }
2767
0
}
2768
2769
static void
2770
dpif_netlink_recv_purge_cpu_dispatch(struct dpif_netlink *dpif)
2771
    OVS_REQ_WRLOCK(dpif->upcall_lock)
2772
0
{
2773
0
    int handler_id;
2774
2775
0
    if (dpif->handlers) {
2776
0
        for (handler_id = 0; handler_id < dpif->n_handlers; handler_id++) {
2777
0
            struct dpif_handler *handler = &dpif->handlers[handler_id];
2778
0
            nl_sock_drain(handler->sock);
2779
0
        }
2780
0
    }
2781
0
}
2782
2783
static void
2784
dpif_netlink_recv_purge(struct dpif *dpif_)
2785
0
{
2786
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2787
2788
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
2789
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
2790
0
        dpif_netlink_recv_purge_cpu_dispatch(dpif);
2791
0
    } else {
2792
0
        dpif_netlink_recv_purge_vport_dispatch(dpif);
2793
0
    }
2794
0
    fat_rwlock_unlock(&dpif->upcall_lock);
2795
0
}
2796
2797
static char *
2798
dpif_netlink_get_datapath_version(void)
2799
0
{
2800
0
    char *version_str = NULL;
2801
2802
0
#ifdef __linux__
2803
2804
0
#define MAX_VERSION_STR_SIZE 80
2805
0
#define LINUX_DATAPATH_VERSION_FILE  "/sys/module/openvswitch/version"
2806
0
    FILE *f;
2807
2808
0
    f = fopen(LINUX_DATAPATH_VERSION_FILE, "r");
2809
0
    if (f) {
2810
0
        char *newline;
2811
0
        char version[MAX_VERSION_STR_SIZE];
2812
2813
0
        if (fgets(version, MAX_VERSION_STR_SIZE, f)) {
2814
0
            newline = strchr(version, '\n');
2815
0
            if (newline) {
2816
0
                *newline = '\0';
2817
0
            }
2818
0
            version_str = xstrdup(version);
2819
0
        }
2820
0
        fclose(f);
2821
0
    }
2822
0
#endif
2823
2824
0
    return version_str;
2825
0
}
2826
2827
struct dpif_netlink_ct_dump_state {
2828
    struct ct_dpif_dump_state up;
2829
    struct nl_ct_dump_state *nl_ct_dump;
2830
};
2831
2832
static int
2833
dpif_netlink_ct_dump_start(struct dpif *dpif OVS_UNUSED,
2834
                           struct ct_dpif_dump_state **dump_,
2835
                           const uint16_t *zone, int *ptot_bkts)
2836
0
{
2837
0
    struct dpif_netlink_ct_dump_state *dump;
2838
0
    int err;
2839
2840
0
    dump = xzalloc(sizeof *dump);
2841
0
    err = nl_ct_dump_start(&dump->nl_ct_dump, zone, ptot_bkts);
2842
0
    if (err) {
2843
0
        free(dump);
2844
0
        return err;
2845
0
    }
2846
2847
0
    *dump_ = &dump->up;
2848
2849
0
    return 0;
2850
0
}
2851
2852
static int
2853
dpif_netlink_ct_dump_next(struct dpif *dpif OVS_UNUSED,
2854
                          struct ct_dpif_dump_state *dump_,
2855
                          struct ct_dpif_entry *entry)
2856
0
{
2857
0
    struct dpif_netlink_ct_dump_state *dump;
2858
2859
0
    INIT_CONTAINER(dump, dump_, up);
2860
2861
0
    return nl_ct_dump_next(dump->nl_ct_dump, entry);
2862
0
}
2863
2864
static int
2865
dpif_netlink_ct_dump_done(struct dpif *dpif OVS_UNUSED,
2866
                          struct ct_dpif_dump_state *dump_)
2867
0
{
2868
0
    struct dpif_netlink_ct_dump_state *dump;
2869
2870
0
    INIT_CONTAINER(dump, dump_, up);
2871
2872
0
    int err = nl_ct_dump_done(dump->nl_ct_dump);
2873
0
    free(dump);
2874
0
    return err;
2875
0
}
2876
2877
static int
2878
dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone,
2879
                      const struct ct_dpif_tuple *tuple)
2880
0
{
2881
0
    if (tuple) {
2882
0
        return nl_ct_flush_tuple(tuple, zone ? *zone : 0);
2883
0
    } else if (zone) {
2884
0
        return nl_ct_flush_zone(*zone);
2885
0
    } else {
2886
0
        return nl_ct_flush();
2887
0
    }
2888
0
}
2889
2890
static int
2891
dpif_netlink_ct_set_limits(struct dpif *dpif OVS_UNUSED,
2892
                           const struct ovs_list *zone_limits)
2893
0
{
2894
0
    if (ovs_ct_limit_family < 0) {
2895
0
        return EOPNOTSUPP;
2896
0
    }
2897
2898
0
    struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
2899
0
    nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
2900
0
                          NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_SET,
2901
0
                          OVS_CT_LIMIT_VERSION);
2902
2903
0
    struct ovs_header *ovs_header;
2904
0
    ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
2905
0
    ovs_header->dp_ifindex = 0;
2906
2907
0
    size_t opt_offset;
2908
0
    opt_offset = nl_msg_start_nested(request, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
2909
2910
0
    if (!ovs_list_is_empty(zone_limits)) {
2911
0
        struct ct_dpif_zone_limit *zone_limit;
2912
2913
0
        LIST_FOR_EACH (zone_limit, node, zone_limits) {
2914
0
            struct ovs_zone_limit req_zone_limit = {
2915
0
                .zone_id = zone_limit->zone,
2916
0
                .limit   = zone_limit->limit,
2917
0
            };
2918
0
            nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2919
0
        }
2920
0
    }
2921
0
    nl_msg_end_nested(request, opt_offset);
2922
2923
0
    int err = nl_transact(NETLINK_GENERIC, request, NULL);
2924
0
    ofpbuf_delete(request);
2925
0
    return err;
2926
0
}
2927
2928
static int
2929
dpif_netlink_zone_limits_from_ofpbuf(const struct ofpbuf *buf,
2930
                                     struct ovs_list *zone_limits)
2931
0
{
2932
0
    static const struct nl_policy ovs_ct_limit_policy[] = {
2933
0
        [OVS_CT_LIMIT_ATTR_ZONE_LIMIT] = { .type = NL_A_NESTED,
2934
0
                                           .optional = true },
2935
0
    };
2936
2937
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2938
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2939
0
    struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2940
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2941
2942
0
    struct nlattr *attr[ARRAY_SIZE(ovs_ct_limit_policy)];
2943
2944
0
    if (!nlmsg || !genl || !ovs_header
2945
0
        || nlmsg->nlmsg_type != ovs_ct_limit_family
2946
0
        || !nl_policy_parse(&b, 0, ovs_ct_limit_policy, attr,
2947
0
                            ARRAY_SIZE(ovs_ct_limit_policy))) {
2948
0
        return EINVAL;
2949
0
    }
2950
2951
2952
0
    if (!attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
2953
0
        return EINVAL;
2954
0
    }
2955
2956
0
    int rem = NLA_ALIGN(
2957
0
                nl_attr_get_size(attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]));
2958
0
    const struct ovs_zone_limit *zone_limit =
2959
0
                nl_attr_get(attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]);
2960
2961
0
    while (rem >= sizeof *zone_limit) {
2962
0
        if (zone_limit->zone_id >= OVS_ZONE_LIMIT_DEFAULT_ZONE &&
2963
0
            zone_limit->zone_id <= UINT16_MAX) {
2964
0
            ct_dpif_push_zone_limit(zone_limits, zone_limit->zone_id,
2965
0
                                    zone_limit->limit, zone_limit->count);
2966
0
        }
2967
0
        rem -= NLA_ALIGN(sizeof *zone_limit);
2968
0
        zone_limit = ALIGNED_CAST(struct ovs_zone_limit *,
2969
0
            (unsigned char *) zone_limit  + NLA_ALIGN(sizeof *zone_limit));
2970
0
    }
2971
0
    return 0;
2972
0
}
2973
2974
static int
2975
dpif_netlink_ct_get_limits(struct dpif *dpif OVS_UNUSED,
2976
                           const struct ovs_list *zone_limits_request,
2977
                           struct ovs_list *zone_limits_reply)
2978
0
{
2979
0
    if (ovs_ct_limit_family < 0) {
2980
0
        return EOPNOTSUPP;
2981
0
    }
2982
2983
0
    struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
2984
0
    nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
2985
0
            NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_GET,
2986
0
            OVS_CT_LIMIT_VERSION);
2987
2988
0
    struct ovs_header *ovs_header;
2989
0
    ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
2990
0
    ovs_header->dp_ifindex = 0;
2991
2992
0
    if (!ovs_list_is_empty(zone_limits_request)) {
2993
0
        size_t opt_offset = nl_msg_start_nested(request,
2994
0
                                                OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
2995
2996
0
        struct ct_dpif_zone_limit *zone_limit;
2997
0
        LIST_FOR_EACH (zone_limit, node, zone_limits_request) {
2998
0
            struct ovs_zone_limit req_zone_limit = {
2999
0
                .zone_id = zone_limit->zone,
3000
0
            };
3001
0
            nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
3002
0
        }
3003
3004
0
        nl_msg_end_nested(request, opt_offset);
3005
0
    }
3006
3007
0
    struct ofpbuf *reply;
3008
0
    int err = nl_transact(NETLINK_GENERIC, request, &reply);
3009
0
    if (err) {
3010
0
        goto out;
3011
0
    }
3012
3013
0
    err = dpif_netlink_zone_limits_from_ofpbuf(reply, zone_limits_reply);
3014
3015
0
out:
3016
0
    ofpbuf_delete(request);
3017
0
    ofpbuf_delete(reply);
3018
0
    return err;
3019
0
}
3020
3021
static int
3022
dpif_netlink_ct_del_limits(struct dpif *dpif OVS_UNUSED,
3023
                           const struct ovs_list *zone_limits)
3024
0
{
3025
0
    if (ovs_ct_limit_family < 0) {
3026
0
        return EOPNOTSUPP;
3027
0
    }
3028
3029
0
    struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
3030
0
    nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
3031
0
            NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_DEL,
3032
0
            OVS_CT_LIMIT_VERSION);
3033
3034
0
    struct ovs_header *ovs_header;
3035
0
    ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
3036
0
    ovs_header->dp_ifindex = 0;
3037
3038
0
    if (!ovs_list_is_empty(zone_limits)) {
3039
0
        size_t opt_offset =
3040
0
            nl_msg_start_nested(request, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
3041
3042
0
        struct ct_dpif_zone_limit *zone_limit;
3043
0
        LIST_FOR_EACH (zone_limit, node, zone_limits) {
3044
0
            struct ovs_zone_limit req_zone_limit = {
3045
0
                .zone_id = zone_limit->zone,
3046
0
            };
3047
0
            nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
3048
0
        }
3049
0
        nl_msg_end_nested(request, opt_offset);
3050
0
    }
3051
3052
0
    int err = nl_transact(NETLINK_GENERIC, request, NULL);
3053
3054
0
    ofpbuf_delete(request);
3055
0
    return err;
3056
0
}
3057
3058
0
#define NL_TP_NAME_PREFIX "ovs_tp_"
3059
3060
struct dpif_netlink_timeout_policy_protocol {
3061
    uint16_t    l3num;
3062
    uint8_t     l4num;
3063
};
3064
3065
enum OVS_PACKED_ENUM dpif_netlink_support_timeout_policy_protocol {
3066
    DPIF_NL_TP_AF_INET_TCP,
3067
    DPIF_NL_TP_AF_INET_UDP,
3068
    DPIF_NL_TP_AF_INET_ICMP,
3069
    DPIF_NL_TP_AF_INET6_TCP,
3070
    DPIF_NL_TP_AF_INET6_UDP,
3071
    DPIF_NL_TP_AF_INET6_ICMPV6,
3072
    DPIF_NL_TP_MAX
3073
};
3074
3075
0
#define DPIF_NL_ALL_TP ((1UL << DPIF_NL_TP_MAX) - 1)
3076
3077
3078
static struct dpif_netlink_timeout_policy_protocol tp_protos[] = {
3079
    [DPIF_NL_TP_AF_INET_TCP] = { .l3num = AF_INET, .l4num = IPPROTO_TCP },
3080
    [DPIF_NL_TP_AF_INET_UDP] = { .l3num = AF_INET, .l4num = IPPROTO_UDP },
3081
    [DPIF_NL_TP_AF_INET_ICMP] = { .l3num = AF_INET, .l4num = IPPROTO_ICMP },
3082
    [DPIF_NL_TP_AF_INET6_TCP] = { .l3num = AF_INET6, .l4num = IPPROTO_TCP },
3083
    [DPIF_NL_TP_AF_INET6_UDP] = { .l3num = AF_INET6, .l4num = IPPROTO_UDP },
3084
    [DPIF_NL_TP_AF_INET6_ICMPV6] = { .l3num = AF_INET6,
3085
                                     .l4num = IPPROTO_ICMPV6 },
3086
};
3087
3088
static void
3089
dpif_netlink_format_tp_name(uint32_t id, uint16_t l3num, uint8_t l4num,
3090
                            char **tp_name)
3091
0
{
3092
0
    struct ds ds = DS_EMPTY_INITIALIZER;
3093
0
    ds_put_format(&ds, "%s%"PRIu32"_", NL_TP_NAME_PREFIX, id);
3094
0
    ct_dpif_format_ipproto(&ds, l4num);
3095
3096
0
    if (l3num == AF_INET) {
3097
0
        ds_put_cstr(&ds, "4");
3098
0
    } else if (l3num == AF_INET6 && l4num != IPPROTO_ICMPV6) {
3099
0
        ds_put_cstr(&ds, "6");
3100
0
    }
3101
3102
0
    ovs_assert(ds.length < CTNL_TIMEOUT_NAME_MAX);
3103
3104
0
    *tp_name = ds_steal_cstr(&ds);
3105
0
}
3106
3107
static int
3108
dpif_netlink_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
3109
                                        uint32_t tp_id, uint16_t dl_type,
3110
                                        uint8_t nw_proto, char **tp_name,
3111
                                        bool *is_generic)
3112
0
{
3113
0
    dpif_netlink_format_tp_name(tp_id,
3114
0
                                dl_type == ETH_TYPE_IP ? AF_INET : AF_INET6,
3115
0
                                nw_proto, tp_name);
3116
0
    *is_generic = false;
3117
0
    return 0;
3118
0
}
3119
3120
static int
3121
dpif_netlink_ct_get_features(struct dpif *dpif OVS_UNUSED,
3122
                             enum ct_features *features)
3123
0
{
3124
0
    if (features != NULL) {
3125
0
#ifndef _WIN32
3126
0
        *features = CONNTRACK_F_ZERO_SNAT;
3127
#else
3128
        *features = 0;
3129
#endif
3130
0
    }
3131
0
    return 0;
3132
0
}
3133
3134
#define CT_DPIF_NL_TP_TCP_MAPPINGS                              \
3135
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT)         \
3136
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV)         \
3137
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, ESTABLISHED, ESTABLISHED)   \
3138
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, FIN_WAIT, FIN_WAIT)         \
3139
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, CLOSE_WAIT, CLOSE_WAIT)     \
3140
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, LAST_ACK, LAST_ACK)         \
3141
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, TIME_WAIT, TIME_WAIT)       \
3142
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, CLOSE, CLOSE)               \
3143
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT2, SYN_SENT2)       \
3144
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, RETRANSMIT, RETRANS)        \
3145
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, UNACK, UNACK)
3146
3147
#define CT_DPIF_NL_TP_UDP_MAPPINGS                              \
3148
0
    CT_DPIF_NL_TP_MAPPING(UDP, UDP, SINGLE, UNREPLIED)          \
3149
0
    CT_DPIF_NL_TP_MAPPING(UDP, UDP, MULTIPLE, REPLIED)
3150
3151
#define CT_DPIF_NL_TP_ICMP_MAPPINGS                             \
3152
0
    CT_DPIF_NL_TP_MAPPING(ICMP, ICMP, FIRST, TIMEOUT)
3153
3154
#define CT_DPIF_NL_TP_ICMPV6_MAPPINGS                           \
3155
0
    CT_DPIF_NL_TP_MAPPING(ICMP, ICMPV6, FIRST, TIMEOUT)
3156
3157
3158
0
#define CT_DPIF_NL_TP_MAPPING(PROTO1, PROTO2, ATTR1, ATTR2)     \
3159
0
if (tp->present & (1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1)) {  \
3160
0
    nl_tp->present |= 1 << CTA_TIMEOUT_##PROTO2##_##ATTR2;      \
3161
0
    nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2] =              \
3162
0
        tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1];          \
3163
0
}
3164
3165
static void
3166
dpif_netlink_get_nl_tp_tcp_attrs(const struct ct_dpif_timeout_policy *tp,
3167
                                 struct nl_ct_timeout_policy *nl_tp)
3168
0
{
3169
0
    CT_DPIF_NL_TP_TCP_MAPPINGS
3170
0
}
3171
3172
static void
3173
dpif_netlink_get_nl_tp_udp_attrs(const struct ct_dpif_timeout_policy *tp,
3174
                                 struct nl_ct_timeout_policy *nl_tp)
3175
0
{
3176
0
    CT_DPIF_NL_TP_UDP_MAPPINGS
3177
0
}
3178
3179
static void
3180
dpif_netlink_get_nl_tp_icmp_attrs(const struct ct_dpif_timeout_policy *tp,
3181
                                  struct nl_ct_timeout_policy *nl_tp)
3182
0
{
3183
0
    CT_DPIF_NL_TP_ICMP_MAPPINGS
3184
0
}
3185
3186
static void
3187
dpif_netlink_get_nl_tp_icmpv6_attrs(const struct ct_dpif_timeout_policy *tp,
3188
                                    struct nl_ct_timeout_policy *nl_tp)
3189
0
{
3190
0
    CT_DPIF_NL_TP_ICMPV6_MAPPINGS
3191
0
}
3192
3193
#undef CT_DPIF_NL_TP_MAPPING
3194
3195
static void
3196
dpif_netlink_get_nl_tp_attrs(const struct ct_dpif_timeout_policy *tp,
3197
                             uint8_t l4num, struct nl_ct_timeout_policy *nl_tp)
3198
0
{
3199
0
    nl_tp->present = 0;
3200
3201
0
    if (l4num == IPPROTO_TCP) {
3202
0
        dpif_netlink_get_nl_tp_tcp_attrs(tp, nl_tp);
3203
0
    } else if (l4num == IPPROTO_UDP) {
3204
0
        dpif_netlink_get_nl_tp_udp_attrs(tp, nl_tp);
3205
0
    } else if (l4num == IPPROTO_ICMP) {
3206
0
        dpif_netlink_get_nl_tp_icmp_attrs(tp, nl_tp);
3207
0
    } else if (l4num == IPPROTO_ICMPV6) {
3208
0
        dpif_netlink_get_nl_tp_icmpv6_attrs(tp, nl_tp);
3209
0
    }
3210
0
}
3211
3212
0
#define CT_DPIF_NL_TP_MAPPING(PROTO1, PROTO2, ATTR1, ATTR2)                 \
3213
0
if (nl_tp->present & (1 << CTA_TIMEOUT_##PROTO2##_##ATTR2)) {               \
3214
0
    if (tp->present & (1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1)) {          \
3215
0
        if (tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1] !=                \
3216
0
            nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2]) {                 \
3217
0
            VLOG_WARN_RL(&error_rl, "Inconsistent timeout policy %s "       \
3218
0
                         "attribute %s=%"PRIu32" while %s=%"PRIu32,         \
3219
0
                         nl_tp->name, "CTA_TIMEOUT_"#PROTO2"_"#ATTR2,       \
3220
0
                         nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2],      \
3221
0
                         "CT_DPIF_TP_ATTR_"#PROTO1"_"#ATTR1,                \
3222
0
                         tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1]);    \
3223
0
        }                                                                   \
3224
0
    } else {                                                                \
3225
0
        tp->present |= 1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1;             \
3226
0
        tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1] =                     \
3227
0
            nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2];                   \
3228
0
    }                                                                       \
3229
0
}
3230
3231
static void
3232
dpif_netlink_set_ct_dpif_tp_tcp_attrs(const struct nl_ct_timeout_policy *nl_tp,
3233
                                      struct ct_dpif_timeout_policy *tp)
3234
0
{
3235
0
    CT_DPIF_NL_TP_TCP_MAPPINGS
3236
0
}
3237
3238
static void
3239
dpif_netlink_set_ct_dpif_tp_udp_attrs(const struct nl_ct_timeout_policy *nl_tp,
3240
                                      struct ct_dpif_timeout_policy *tp)
3241
0
{
3242
0
    CT_DPIF_NL_TP_UDP_MAPPINGS
3243
0
}
3244
3245
static void
3246
dpif_netlink_set_ct_dpif_tp_icmp_attrs(
3247
    const struct nl_ct_timeout_policy *nl_tp,
3248
    struct ct_dpif_timeout_policy *tp)
3249
0
{
3250
0
    CT_DPIF_NL_TP_ICMP_MAPPINGS
3251
0
}
3252
3253
static void
3254
dpif_netlink_set_ct_dpif_tp_icmpv6_attrs(
3255
    const struct nl_ct_timeout_policy *nl_tp,
3256
    struct ct_dpif_timeout_policy *tp)
3257
0
{
3258
0
    CT_DPIF_NL_TP_ICMPV6_MAPPINGS
3259
0
}
3260
3261
#undef CT_DPIF_NL_TP_MAPPING
3262
3263
static void
3264
dpif_netlink_set_ct_dpif_tp_attrs(const struct nl_ct_timeout_policy *nl_tp,
3265
                                  struct ct_dpif_timeout_policy *tp)
3266
0
{
3267
0
    if (nl_tp->l4num == IPPROTO_TCP) {
3268
0
        dpif_netlink_set_ct_dpif_tp_tcp_attrs(nl_tp, tp);
3269
0
    } else if (nl_tp->l4num == IPPROTO_UDP) {
3270
0
        dpif_netlink_set_ct_dpif_tp_udp_attrs(nl_tp, tp);
3271
0
    } else if (nl_tp->l4num == IPPROTO_ICMP) {
3272
0
        dpif_netlink_set_ct_dpif_tp_icmp_attrs(nl_tp, tp);
3273
0
    } else if (nl_tp->l4num == IPPROTO_ICMPV6) {
3274
0
        dpif_netlink_set_ct_dpif_tp_icmpv6_attrs(nl_tp, tp);
3275
0
    }
3276
0
}
3277
3278
#ifdef _WIN32
3279
static int
3280
dpif_netlink_ct_set_timeout_policy(struct dpif *dpif OVS_UNUSED,
3281
                                   const struct ct_dpif_timeout_policy *tp)
3282
{
3283
    return EOPNOTSUPP;
3284
}
3285
3286
static int
3287
dpif_netlink_ct_get_timeout_policy(struct dpif *dpif OVS_UNUSED,
3288
                                   uint32_t tp_id,
3289
                                   struct ct_dpif_timeout_policy *tp)
3290
{
3291
    return EOPNOTSUPP;
3292
}
3293
3294
static int
3295
dpif_netlink_ct_del_timeout_policy(struct dpif *dpif OVS_UNUSED,
3296
                                   uint32_t tp_id)
3297
{
3298
    return EOPNOTSUPP;
3299
}
3300
3301
static int
3302
dpif_netlink_ct_timeout_policy_dump_start(struct dpif *dpif OVS_UNUSED,
3303
                                          void **statep)
3304
{
3305
    return EOPNOTSUPP;
3306
}
3307
3308
static int
3309
dpif_netlink_ct_timeout_policy_dump_next(struct dpif *dpif OVS_UNUSED,
3310
                                         void *state,
3311
                                         struct ct_dpif_timeout_policy **tp)
3312
{
3313
    return EOPNOTSUPP;
3314
}
3315
3316
static int
3317
dpif_netlink_ct_timeout_policy_dump_done(struct dpif *dpif OVS_UNUSED,
3318
                                         void *state)
3319
{
3320
    return EOPNOTSUPP;
3321
}
3322
#else
3323
static int
3324
dpif_netlink_ct_set_timeout_policy(struct dpif *dpif OVS_UNUSED,
3325
                                   const struct ct_dpif_timeout_policy *tp)
3326
0
{
3327
0
    int err = 0;
3328
3329
0
    for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
3330
0
        struct nl_ct_timeout_policy nl_tp;
3331
0
        char *nl_tp_name;
3332
3333
0
        dpif_netlink_format_tp_name(tp->id, tp_protos[i].l3num,
3334
0
                                    tp_protos[i].l4num, &nl_tp_name);
3335
0
        ovs_strlcpy(nl_tp.name, nl_tp_name, sizeof nl_tp.name);
3336
0
        free(nl_tp_name);
3337
3338
0
        nl_tp.l3num = tp_protos[i].l3num;
3339
0
        nl_tp.l4num = tp_protos[i].l4num;
3340
0
        dpif_netlink_get_nl_tp_attrs(tp, tp_protos[i].l4num, &nl_tp);
3341
0
        err = nl_ct_set_timeout_policy(&nl_tp);
3342
0
        if (err) {
3343
0
            VLOG_WARN_RL(&error_rl, "failed to add timeout policy %s (%s)",
3344
0
                         nl_tp.name, ovs_strerror(err));
3345
0
            goto out;
3346
0
        }
3347
0
    }
3348
3349
0
out:
3350
0
    return err;
3351
0
}
3352
3353
static int
3354
dpif_netlink_ct_get_timeout_policy(struct dpif *dpif OVS_UNUSED,
3355
                                   uint32_t tp_id,
3356
                                   struct ct_dpif_timeout_policy *tp)
3357
0
{
3358
0
    int err = 0;
3359
3360
0
    tp->id = tp_id;
3361
0
    tp->present = 0;
3362
0
    for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
3363
0
        struct nl_ct_timeout_policy nl_tp;
3364
0
        char *nl_tp_name;
3365
3366
0
        dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num,
3367
0
                                    tp_protos[i].l4num, &nl_tp_name);
3368
0
        err = nl_ct_get_timeout_policy(nl_tp_name, &nl_tp);
3369
3370
0
        if (err) {
3371
0
            VLOG_WARN_RL(&error_rl, "failed to get timeout policy %s (%s)",
3372
0
                         nl_tp_name, ovs_strerror(err));
3373
0
            free(nl_tp_name);
3374
0
            goto out;
3375
0
        }
3376
0
        free(nl_tp_name);
3377
0
        dpif_netlink_set_ct_dpif_tp_attrs(&nl_tp, tp);
3378
0
    }
3379
3380
0
out:
3381
0
    return err;
3382
0
}
3383
3384
/* Returns 0 if all the sub timeout policies are deleted or not exist in the
3385
 * kernel.  Returns 1 if any sub timeout policy deletion failed. */
3386
static int
3387
dpif_netlink_ct_del_timeout_policy(struct dpif *dpif OVS_UNUSED,
3388
                                   uint32_t tp_id)
3389
0
{
3390
0
    int ret = 0;
3391
3392
0
    for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
3393
0
        char *nl_tp_name;
3394
0
        dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num,
3395
0
                                    tp_protos[i].l4num, &nl_tp_name);
3396
0
        int err = nl_ct_del_timeout_policy(nl_tp_name);
3397
0
        if (err == ENOENT) {
3398
0
            err = 0;
3399
0
        }
3400
0
        if (err) {
3401
0
            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(6, 6);
3402
0
            VLOG_INFO_RL(&rl, "failed to delete timeout policy %s (%s)",
3403
0
                         nl_tp_name, ovs_strerror(err));
3404
0
            ret = 1;
3405
0
        }
3406
0
        free(nl_tp_name);
3407
0
    }
3408
3409
0
    return ret;
3410
0
}
3411
3412
struct dpif_netlink_ct_timeout_policy_dump_state {
3413
    struct nl_ct_timeout_policy_dump_state *nl_dump_state;
3414
    struct hmap tp_dump_map;
3415
};
3416
3417
struct dpif_netlink_tp_dump_node {
3418
    struct      hmap_node hmap_node;      /* node in tp_dump_map. */
3419
    struct      ct_dpif_timeout_policy *tp;
3420
    uint32_t    l3_l4_present;
3421
};
3422
3423
static struct dpif_netlink_tp_dump_node *
3424
get_dpif_netlink_tp_dump_node_by_tp_id(uint32_t tp_id,
3425
                                       struct hmap *tp_dump_map)
3426
0
{
3427
0
    struct dpif_netlink_tp_dump_node *tp_dump_node;
3428
3429
0
    HMAP_FOR_EACH_WITH_HASH (tp_dump_node, hmap_node, hash_int(tp_id, 0),
3430
0
                             tp_dump_map) {
3431
0
        if (tp_dump_node->tp->id == tp_id) {
3432
0
            return tp_dump_node;
3433
0
        }
3434
0
    }
3435
0
    return NULL;
3436
0
}
3437
3438
static void
3439
update_dpif_netlink_tp_dump_node(
3440
    const struct nl_ct_timeout_policy *nl_tp,
3441
    struct dpif_netlink_tp_dump_node *tp_dump_node)
3442
0
{
3443
0
    dpif_netlink_set_ct_dpif_tp_attrs(nl_tp, tp_dump_node->tp);
3444
0
    for (int i = 0; i < DPIF_NL_TP_MAX; ++i) {
3445
0
        if (nl_tp->l3num == tp_protos[i].l3num &&
3446
0
            nl_tp->l4num == tp_protos[i].l4num) {
3447
0
            tp_dump_node->l3_l4_present |= 1 << i;
3448
0
            break;
3449
0
        }
3450
0
    }
3451
0
}
3452
3453
static int
3454
dpif_netlink_ct_timeout_policy_dump_start(struct dpif *dpif OVS_UNUSED,
3455
                                          void **statep)
3456
0
{
3457
0
    struct dpif_netlink_ct_timeout_policy_dump_state *dump_state;
3458
3459
0
    *statep = dump_state = xzalloc(sizeof *dump_state);
3460
0
    int err = nl_ct_timeout_policy_dump_start(&dump_state->nl_dump_state);
3461
0
    if (err) {
3462
0
        free(dump_state);
3463
0
        return err;
3464
0
    }
3465
0
    hmap_init(&dump_state->tp_dump_map);
3466
0
    return 0;
3467
0
}
3468
3469
static void
3470
get_and_cleanup_tp_dump_node(struct hmap *hmap,
3471
                             struct dpif_netlink_tp_dump_node *tp_dump_node,
3472
                             struct ct_dpif_timeout_policy *tp)
3473
0
{
3474
0
    hmap_remove(hmap, &tp_dump_node->hmap_node);
3475
0
    *tp = *tp_dump_node->tp;
3476
0
    free(tp_dump_node->tp);
3477
0
    free(tp_dump_node);
3478
0
}
3479
3480
static int
3481
dpif_netlink_ct_timeout_policy_dump_next(struct dpif *dpif OVS_UNUSED,
3482
                                         void *state,
3483
                                         struct ct_dpif_timeout_policy *tp)
3484
0
{
3485
0
    struct dpif_netlink_ct_timeout_policy_dump_state *dump_state = state;
3486
0
    struct dpif_netlink_tp_dump_node *tp_dump_node;
3487
0
    int err;
3488
3489
    /* Dumps all the timeout policies in the kernel. */
3490
0
    do {
3491
0
        struct nl_ct_timeout_policy nl_tp;
3492
0
        uint32_t tp_id;
3493
3494
0
        err =  nl_ct_timeout_policy_dump_next(dump_state->nl_dump_state,
3495
0
                                              &nl_tp);
3496
0
        if (err) {
3497
0
            break;
3498
0
        }
3499
3500
        /* We only interest in OVS installed timeout policies. */
3501
0
        if (!ovs_scan(nl_tp.name, NL_TP_NAME_PREFIX"%"PRIu32, &tp_id)) {
3502
0
            continue;
3503
0
        }
3504
3505
0
        tp_dump_node = get_dpif_netlink_tp_dump_node_by_tp_id(
3506
0
                            tp_id, &dump_state->tp_dump_map);
3507
0
        if (!tp_dump_node) {
3508
0
            tp_dump_node = xzalloc(sizeof *tp_dump_node);
3509
0
            tp_dump_node->tp = xzalloc(sizeof *tp_dump_node->tp);
3510
0
            tp_dump_node->tp->id = tp_id;
3511
0
            hmap_insert(&dump_state->tp_dump_map, &tp_dump_node->hmap_node,
3512
0
                        hash_int(tp_id, 0));
3513
0
        }
3514
3515
0
        update_dpif_netlink_tp_dump_node(&nl_tp, tp_dump_node);
3516
3517
        /* Returns one ct_dpif_timeout_policy if we gather all the L3/L4
3518
         * sub-pieces. */
3519
0
        if (tp_dump_node->l3_l4_present == DPIF_NL_ALL_TP) {
3520
0
            get_and_cleanup_tp_dump_node(&dump_state->tp_dump_map,
3521
0
                                         tp_dump_node, tp);
3522
0
            break;
3523
0
        }
3524
0
    } while (true);
3525
3526
    /* Dump the incomplete timeout policies. */
3527
0
    if (err == EOF) {
3528
0
        if (!hmap_is_empty(&dump_state->tp_dump_map)) {
3529
0
            struct hmap_node *hmap_node = hmap_first(&dump_state->tp_dump_map);
3530
0
            tp_dump_node = CONTAINER_OF(hmap_node,
3531
0
                                        struct dpif_netlink_tp_dump_node,
3532
0
                                        hmap_node);
3533
0
            get_and_cleanup_tp_dump_node(&dump_state->tp_dump_map,
3534
0
                                         tp_dump_node, tp);
3535
0
            return 0;
3536
0
        }
3537
0
    }
3538
3539
0
    return err;
3540
0
}
3541
3542
static int
3543
dpif_netlink_ct_timeout_policy_dump_done(struct dpif *dpif OVS_UNUSED,
3544
                                         void *state)
3545
0
{
3546
0
    struct dpif_netlink_ct_timeout_policy_dump_state *dump_state = state;
3547
0
    struct dpif_netlink_tp_dump_node *tp_dump_node;
3548
3549
0
    int err = nl_ct_timeout_policy_dump_done(dump_state->nl_dump_state);
3550
0
    HMAP_FOR_EACH_POP (tp_dump_node, hmap_node, &dump_state->tp_dump_map) {
3551
0
        free(tp_dump_node->tp);
3552
0
        free(tp_dump_node);
3553
0
    }
3554
0
    hmap_destroy(&dump_state->tp_dump_map);
3555
0
    free(dump_state);
3556
0
    return err;
3557
0
}
3558
#endif
3559
3560

3561
/* Meters */
3562
3563
/* Set of supported meter flags */
3564
#define DP_SUPPORTED_METER_FLAGS_MASK \
3565
0
    (OFPMF13_STATS | OFPMF13_PKTPS | OFPMF13_KBPS | OFPMF13_BURST)
3566
3567
/* Meter support was introduced in Linux 4.15.  In some versions of
3568
 * Linux 4.15, 4.16, and 4.17, there was a bug that never set the id
3569
 * when the meter was created, so all meters essentially had an id of
3570
 * zero.  Check for that condition and disable meters on those kernels. */
3571
static bool probe_broken_meters(struct dpif *);
3572
3573
static void
3574
dpif_netlink_meter_init(struct dpif_netlink *dpif, struct ofpbuf *buf,
3575
                        void *stub, size_t size, uint32_t command)
3576
0
{
3577
0
    ofpbuf_use_stub(buf, stub, size);
3578
3579
0
    nl_msg_put_genlmsghdr(buf, 0, ovs_meter_family, NLM_F_REQUEST | NLM_F_ECHO,
3580
0
                          command, OVS_METER_VERSION);
3581
3582
0
    struct ovs_header *ovs_header;
3583
0
    ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3584
0
    ovs_header->dp_ifindex = dpif->dp_ifindex;
3585
0
}
3586
3587
/* Execute meter 'request' in the kernel datapath.  If the command
3588
 * fails, returns a positive errno value.  Otherwise, stores the reply
3589
 * in '*replyp', parses the policy according to 'reply_policy' into the
3590
 * array of Netlink attribute in 'a', and returns 0.  On success, the
3591
 * caller is responsible for calling ofpbuf_delete() on '*replyp'
3592
 * ('replyp' will contain pointers into 'a'). */
3593
static int
3594
dpif_netlink_meter_transact(struct ofpbuf *request, struct ofpbuf **replyp,
3595
                            const struct nl_policy *reply_policy,
3596
                            struct nlattr **a, size_t size_a)
3597
0
{
3598
0
    int error = nl_transact(NETLINK_GENERIC, request, replyp);
3599
0
    ofpbuf_uninit(request);
3600
3601
0
    if (error) {
3602
0
        return error;
3603
0
    }
3604
3605
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(*replyp, sizeof *nlmsg);
3606
0
    struct genlmsghdr *genl = ofpbuf_try_pull(*replyp, sizeof *genl);
3607
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(*replyp,
3608
0
                                                    sizeof *ovs_header);
3609
0
    if (!nlmsg || !genl || !ovs_header
3610
0
        || nlmsg->nlmsg_type != ovs_meter_family
3611
0
        || !nl_policy_parse(*replyp, 0, reply_policy, a, size_a)) {
3612
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3613
0
        VLOG_DBG_RL(&rl,
3614
0
                    "Kernel module response to meter tranaction is invalid");
3615
0
        ofpbuf_delete(*replyp);
3616
0
        return EINVAL;
3617
0
    }
3618
0
    return 0;
3619
0
}
3620
3621
static void
3622
dpif_netlink_meter_get_features(const struct dpif *dpif_,
3623
                                struct ofputil_meter_features *features)
3624
0
{
3625
0
    if (probe_broken_meters(CONST_CAST(struct dpif *, dpif_))) {
3626
0
        return;
3627
0
    }
3628
3629
0
    struct ofpbuf buf, *msg;
3630
0
    uint64_t stub[1024 / 8];
3631
3632
0
    static const struct nl_policy ovs_meter_features_policy[] = {
3633
0
        [OVS_METER_ATTR_MAX_METERS] = { .type = NL_A_U32 },
3634
0
        [OVS_METER_ATTR_MAX_BANDS] = { .type = NL_A_U32 },
3635
0
        [OVS_METER_ATTR_BANDS] = { .type = NL_A_NESTED, .optional = true },
3636
0
    };
3637
0
    struct nlattr *a[ARRAY_SIZE(ovs_meter_features_policy)];
3638
3639
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3640
0
    dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub,
3641
0
                            OVS_METER_CMD_FEATURES);
3642
0
    if (dpif_netlink_meter_transact(&buf, &msg, ovs_meter_features_policy, a,
3643
0
                                    ARRAY_SIZE(ovs_meter_features_policy))) {
3644
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3645
0
        VLOG_INFO_RL(&rl,
3646
0
                  "dpif_netlink_meter_transact OVS_METER_CMD_FEATURES failed");
3647
0
        return;
3648
0
    }
3649
3650
0
    features->max_meters = nl_attr_get_u32(a[OVS_METER_ATTR_MAX_METERS]);
3651
0
    features->max_bands = nl_attr_get_u32(a[OVS_METER_ATTR_MAX_BANDS]);
3652
3653
    /* Bands is a nested attribute of zero or more nested
3654
     * band attributes.  */
3655
0
    if (a[OVS_METER_ATTR_BANDS]) {
3656
0
        const struct nlattr *nla;
3657
0
        size_t left;
3658
3659
0
        NL_NESTED_FOR_EACH (nla, left, a[OVS_METER_ATTR_BANDS]) {
3660
0
            const struct nlattr *band_nla;
3661
0
            size_t band_left;
3662
3663
0
            NL_NESTED_FOR_EACH (band_nla, band_left, nla) {
3664
0
                if (nl_attr_type(band_nla) == OVS_BAND_ATTR_TYPE) {
3665
0
                    if (nl_attr_get_size(band_nla) == sizeof(uint32_t)) {
3666
0
                        switch (nl_attr_get_u32(band_nla)) {
3667
0
                        case OVS_METER_BAND_TYPE_DROP:
3668
0
                            features->band_types |= 1 << OFPMBT13_DROP;
3669
0
                            break;
3670
0
                        }
3671
0
                    }
3672
0
                }
3673
0
            }
3674
0
        }
3675
0
    }
3676
0
    features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
3677
3678
0
    ofpbuf_delete(msg);
3679
0
}
3680
3681
static int
3682
dpif_netlink_meter_set__(struct dpif *dpif_, ofproto_meter_id meter_id,
3683
                         struct ofputil_meter_config *config)
3684
0
{
3685
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3686
0
    struct ofpbuf buf, *msg;
3687
0
    uint64_t stub[1024 / 8];
3688
3689
0
    static const struct nl_policy ovs_meter_set_response_policy[] = {
3690
0
        [OVS_METER_ATTR_ID] = { .type = NL_A_U32 },
3691
0
    };
3692
0
    struct nlattr *a[ARRAY_SIZE(ovs_meter_set_response_policy)];
3693
3694
0
    if (config->flags & ~DP_SUPPORTED_METER_FLAGS_MASK) {
3695
0
        return EBADF; /* Unsupported flags set */
3696
0
    }
3697
3698
0
    for (size_t i = 0; i < config->n_bands; i++) {
3699
0
        switch (config->bands[i].type) {
3700
0
        case OFPMBT13_DROP:
3701
0
            break;
3702
0
        default:
3703
0
            return ENODEV; /* Unsupported band type */
3704
0
        }
3705
0
    }
3706
3707
0
    dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub, OVS_METER_CMD_SET);
3708
3709
0
    nl_msg_put_u32(&buf, OVS_METER_ATTR_ID, meter_id.uint32);
3710
3711
0
    if (config->flags & OFPMF13_KBPS) {
3712
0
        nl_msg_put_flag(&buf, OVS_METER_ATTR_KBPS);
3713
0
    }
3714
3715
0
    size_t bands_offset = nl_msg_start_nested(&buf, OVS_METER_ATTR_BANDS);
3716
    /* Bands */
3717
0
    for (size_t i = 0; i < config->n_bands; ++i) {
3718
0
        struct ofputil_meter_band * band = &config->bands[i];
3719
0
        uint32_t band_type;
3720
3721
0
        size_t band_offset = nl_msg_start_nested(&buf, OVS_BAND_ATTR_UNSPEC);
3722
3723
0
        switch (band->type) {
3724
0
        case OFPMBT13_DROP:
3725
0
            band_type = OVS_METER_BAND_TYPE_DROP;
3726
0
            break;
3727
0
        default:
3728
0
            band_type = OVS_METER_BAND_TYPE_UNSPEC;
3729
0
        }
3730
0
        nl_msg_put_u32(&buf, OVS_BAND_ATTR_TYPE, band_type);
3731
0
        nl_msg_put_u32(&buf, OVS_BAND_ATTR_RATE, band->rate);
3732
0
        nl_msg_put_u32(&buf, OVS_BAND_ATTR_BURST,
3733
0
                       config->flags & OFPMF13_BURST ?
3734
0
                       band->burst_size : band->rate);
3735
0
        nl_msg_end_nested(&buf, band_offset);
3736
0
    }
3737
0
    nl_msg_end_nested(&buf, bands_offset);
3738
3739
0
    int error = dpif_netlink_meter_transact(&buf, &msg,
3740
0
                                    ovs_meter_set_response_policy, a,
3741
0
                                    ARRAY_SIZE(ovs_meter_set_response_policy));
3742
0
    if (error) {
3743
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3744
0
        VLOG_INFO_RL(&rl,
3745
0
                     "dpif_netlink_meter_transact OVS_METER_CMD_SET failed");
3746
0
        return error;
3747
0
    }
3748
3749
0
    if (nl_attr_get_u32(a[OVS_METER_ATTR_ID]) != meter_id.uint32) {
3750
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3751
0
        VLOG_INFO_RL(&rl,
3752
0
                     "Kernel returned a different meter id than requested");
3753
0
    }
3754
0
    ofpbuf_delete(msg);
3755
0
    return 0;
3756
0
}
3757
3758
static int
3759
dpif_netlink_meter_set(struct dpif *dpif_, ofproto_meter_id meter_id,
3760
                       struct ofputil_meter_config *config)
3761
0
{
3762
0
    if (probe_broken_meters(dpif_)) {
3763
0
        return ENOMEM;
3764
0
    }
3765
3766
0
    return dpif_netlink_meter_set__(dpif_, meter_id, config);
3767
0
}
3768
3769
/* Retrieve statistics and/or delete meter 'meter_id'.  Statistics are
3770
 * stored in 'stats', if it is not null.  If 'command' is
3771
 * OVS_METER_CMD_DEL, the meter is deleted and statistics are optionally
3772
 * retrieved.  If 'command' is OVS_METER_CMD_GET, then statistics are
3773
 * simply retrieved. */
3774
static int
3775
dpif_netlink_meter_get_stats(const struct dpif *dpif_,
3776
                             ofproto_meter_id meter_id,
3777
                             struct ofputil_meter_stats *stats,
3778
                             uint16_t max_bands,
3779
                             enum ovs_meter_cmd command)
3780
0
{
3781
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3782
0
    struct ofpbuf buf, *msg;
3783
0
    uint64_t stub[1024 / 8];
3784
3785
0
    static const struct nl_policy ovs_meter_stats_policy[] = {
3786
0
        [OVS_METER_ATTR_ID] = { .type = NL_A_U32, .optional = true},
3787
0
        [OVS_METER_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
3788
0
                                   .optional = true},
3789
0
        [OVS_METER_ATTR_BANDS] = { .type = NL_A_NESTED, .optional = true },
3790
0
    };
3791
0
    struct nlattr *a[ARRAY_SIZE(ovs_meter_stats_policy)];
3792
3793
0
    dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub, command);
3794
3795
0
    nl_msg_put_u32(&buf, OVS_METER_ATTR_ID, meter_id.uint32);
3796
3797
0
    int error = dpif_netlink_meter_transact(&buf, &msg,
3798
0
                                            ovs_meter_stats_policy, a,
3799
0
                                            ARRAY_SIZE(ovs_meter_stats_policy));
3800
0
    if (error) {
3801
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3802
0
        VLOG_RL(&rl, error == ENOENT ? VLL_DBG : VLL_WARN,
3803
0
                "dpif_netlink_meter_transact %s failed: %s",
3804
0
                command == OVS_METER_CMD_GET ? "get" : "del",
3805
0
                ovs_strerror(error));
3806
0
        return error;
3807
0
    }
3808
3809
0
    if (a[OVS_METER_ATTR_ID]
3810
0
        && nl_attr_get_u32(a[OVS_METER_ATTR_ID]) != meter_id.uint32) {
3811
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3812
0
        VLOG_INFO_RL(&rl,
3813
0
                     "Kernel returned a different meter id than requested");
3814
0
        ofpbuf_delete(msg);
3815
0
        return EINVAL;
3816
0
    }
3817
3818
0
    if (stats && a[OVS_METER_ATTR_STATS]) {
3819
        /* return stats */
3820
0
        const struct ovs_flow_stats *stat;
3821
0
        const struct nlattr *nla;
3822
0
        size_t left;
3823
3824
0
        stat = nl_attr_get(a[OVS_METER_ATTR_STATS]);
3825
0
        stats->packet_in_count = get_32aligned_u64(&stat->n_packets);
3826
0
        stats->byte_in_count = get_32aligned_u64(&stat->n_bytes);
3827
3828
0
        if (a[OVS_METER_ATTR_BANDS]) {
3829
0
            size_t n_bands = 0;
3830
0
            NL_NESTED_FOR_EACH (nla, left, a[OVS_METER_ATTR_BANDS]) {
3831
0
                const struct nlattr *band_nla;
3832
0
                band_nla = nl_attr_find_nested(nla, OVS_BAND_ATTR_STATS);
3833
0
                if (band_nla && nl_attr_get_size(band_nla) \
3834
0
                                == sizeof(struct ovs_flow_stats)) {
3835
0
                    stat = nl_attr_get(band_nla);
3836
3837
0
                    if (n_bands < max_bands) {
3838
0
                        stats->bands[n_bands].packet_count
3839
0
                            = get_32aligned_u64(&stat->n_packets);
3840
0
                        stats->bands[n_bands].byte_count
3841
0
                            = get_32aligned_u64(&stat->n_bytes);
3842
0
                        ++n_bands;
3843
0
                    }
3844
0
                } else {
3845
0
                    stats->bands[n_bands].packet_count = 0;
3846
0
                    stats->bands[n_bands].byte_count = 0;
3847
0
                    ++n_bands;
3848
0
                }
3849
0
            }
3850
0
            stats->n_bands = n_bands;
3851
0
        } else {
3852
            /* For a non-existent meter, return 0 stats. */
3853
0
            stats->n_bands = 0;
3854
0
        }
3855
0
    }
3856
3857
0
    ofpbuf_delete(msg);
3858
0
    return error;
3859
0
}
3860
3861
static int
3862
dpif_netlink_meter_get(const struct dpif *dpif, ofproto_meter_id meter_id,
3863
                       struct ofputil_meter_stats *stats, uint16_t max_bands)
3864
0
{
3865
0
    return dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
3866
0
                                        OVS_METER_CMD_GET);
3867
0
}
3868
3869
static int
3870
dpif_netlink_meter_del(struct dpif *dpif, ofproto_meter_id meter_id,
3871
                       struct ofputil_meter_stats *stats, uint16_t max_bands)
3872
0
{
3873
0
    return dpif_netlink_meter_get_stats(dpif, meter_id, stats,
3874
0
                                        max_bands, OVS_METER_CMD_DEL);
3875
0
}
3876
3877
static bool
3878
probe_broken_meters__(struct dpif *dpif)
3879
0
{
3880
    /* This test is destructive if a probe occurs while ovs-vswitchd is
3881
     * running (e.g., an ovs-dpctl meter command is called), so choose a
3882
     * high meter id to make this less likely to occur.
3883
     *
3884
     * In Linux kernel v5.10+ meters are stored in a table that is not
3885
     * a real hash table.  It's just an array with 'meter_id % size' used
3886
     * as an index.  The numbers are chosen to fit into the minimal table
3887
     * size (1024) without wrapping, so these IDs are guaranteed to be
3888
     * found under normal conditions in the meter table, if such meters
3889
     * exist.  It's possible to break this check by creating some meters
3890
     * in the kernel manually with different IDs that map onto the same
3891
     * indexes, but that should not be a big problem since ovs-vswitchd
3892
     * always allocates densely packed meter IDs with an id-pool.
3893
     *
3894
     * These IDs will also work in cases where the table in the kernel is
3895
     * a proper hash table. */
3896
0
    ofproto_meter_id id1 = { 1021 };
3897
0
    ofproto_meter_id id2 = { 1022 };
3898
0
    struct ofputil_meter_band band = {OFPMBT13_DROP, 0, 1, 0};
3899
0
    struct ofputil_meter_config config1 = { 1, OFPMF13_KBPS, 1, &band};
3900
0
    struct ofputil_meter_config config2 = { 2, OFPMF13_KBPS, 1, &band};
3901
3902
    /* First check if these meters are already in the kernel.  If we get
3903
     * a proper response from the kernel with all the good meter IDs, then
3904
     * meters are likley supported correctly. */
3905
0
    if (!dpif_netlink_meter_get(dpif, id1, NULL, 0)
3906
0
        || !dpif_netlink_meter_get(dpif, id2, NULL, 0)) {
3907
0
        return false;
3908
0
    }
3909
3910
    /* Try adding two meters and make sure that they both come back with
3911
     * the proper meter id.  Use the "__" version so that we don't cause
3912
     * a recurve deadlock. */
3913
0
    dpif_netlink_meter_set__(dpif, id1, &config1);
3914
0
    dpif_netlink_meter_set__(dpif, id2, &config2);
3915
3916
0
    if (dpif_netlink_meter_get(dpif, id1, NULL, 0)
3917
0
        || dpif_netlink_meter_get(dpif, id2, NULL, 0)) {
3918
0
        VLOG_INFO("The kernel module has a broken meter implementation.");
3919
0
        return true;
3920
0
    }
3921
3922
0
    dpif_netlink_meter_del(dpif, id1, NULL, 0);
3923
0
    dpif_netlink_meter_del(dpif, id2, NULL, 0);
3924
3925
0
    return false;
3926
0
}
3927
3928
static bool
3929
probe_broken_meters(struct dpif *dpif)
3930
0
{
3931
    /* This is a once-only test because currently OVS only has at most a single
3932
     * Netlink capable datapath on any given platform. */
3933
0
    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
3934
3935
0
    static bool broken_meters = false;
3936
0
    if (ovsthread_once_start(&once)) {
3937
0
        broken_meters = probe_broken_meters__(dpif);
3938
0
        ovsthread_once_done(&once);
3939
0
    }
3940
0
    return broken_meters;
3941
0
}
3942
3943
3944
static int
3945
dpif_netlink_cache_get_supported_levels(struct dpif *dpif_, uint32_t *levels)
3946
0
{
3947
0
    struct dpif_netlink_dp dp;
3948
0
    struct ofpbuf *buf;
3949
0
    int error;
3950
3951
    /* If available, in the kernel we support one level of cache.
3952
     * Unfortunately, there is no way to detect if the older kernel module has
3953
     * the cache feature.  For now, we only report the cache information if the
3954
     * kernel module reports the OVS_DP_ATTR_MASKS_CACHE_SIZE attribute. */
3955
3956
0
    *levels = 0;
3957
0
    error = dpif_netlink_dp_get(dpif_, &dp, &buf);
3958
0
    if (!error) {
3959
3960
0
        if (dp.cache_size != UINT32_MAX) {
3961
0
            *levels = 1;
3962
0
        }
3963
0
        ofpbuf_delete(buf);
3964
0
    }
3965
3966
0
    return error;
3967
0
}
3968
3969
static int
3970
dpif_netlink_cache_get_name(struct dpif *dpif_ OVS_UNUSED, uint32_t level,
3971
                            const char **name)
3972
0
{
3973
0
    if (level != 0) {
3974
0
        return EINVAL;
3975
0
    }
3976
3977
0
    *name = "masks-cache";
3978
0
    return 0;
3979
0
}
3980
3981
static int
3982
dpif_netlink_cache_get_size(struct dpif *dpif_, uint32_t level, uint32_t *size)
3983
0
{
3984
0
    struct dpif_netlink_dp dp;
3985
0
    struct ofpbuf *buf;
3986
0
    int error;
3987
3988
0
    if (level != 0) {
3989
0
        return EINVAL;
3990
0
    }
3991
3992
0
    error = dpif_netlink_dp_get(dpif_, &dp, &buf);
3993
0
    if (!error) {
3994
3995
0
        ofpbuf_delete(buf);
3996
3997
0
        if (dp.cache_size == UINT32_MAX) {
3998
0
            return EOPNOTSUPP;
3999
0
        }
4000
0
        *size = dp.cache_size;
4001
0
    }
4002
0
    return error;
4003
0
}
4004
4005
static int
4006
dpif_netlink_cache_set_size(struct dpif *dpif_, uint32_t level, uint32_t size)
4007
0
{
4008
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
4009
0
    struct dpif_netlink_dp request, reply;
4010
0
    struct ofpbuf *bufp;
4011
0
    int error;
4012
4013
0
    size = ROUND_UP_POW2(size);
4014
4015
0
    if (level != 0) {
4016
0
        return EINVAL;
4017
0
    }
4018
4019
0
    dpif_netlink_dp_init(&request);
4020
0
    request.cmd = OVS_DP_CMD_SET;
4021
0
    request.name = dpif_->base_name;
4022
0
    request.dp_ifindex = dpif->dp_ifindex;
4023
0
    request.cache_size = size;
4024
    /* We need to set the dpif user_features, as the kernel module assumes the
4025
     * OVS_DP_ATTR_USER_FEATURES attribute is always present. If not, it will
4026
     * reset all the features. */
4027
0
    request.user_features = dpif->user_features;
4028
4029
0
    error = dpif_netlink_dp_transact(&request, &reply, &bufp);
4030
0
    if (!error) {
4031
0
        ofpbuf_delete(bufp);
4032
0
        if (reply.cache_size != size) {
4033
0
            return EINVAL;
4034
0
        }
4035
0
    }
4036
4037
0
    return error;
4038
0
}
4039
4040

4041
const struct dpif_class dpif_netlink_class = {
4042
    "system",
4043
    false,                      /* cleanup_required */
4044
    NULL,                       /* init */
4045
    dpif_netlink_enumerate,
4046
    NULL,
4047
    dpif_netlink_open,
4048
    dpif_netlink_close,
4049
    dpif_netlink_destroy,
4050
    dpif_netlink_run,
4051
    NULL,                       /* wait */
4052
    dpif_netlink_get_stats,
4053
    dpif_netlink_set_features,
4054
    dpif_netlink_get_features,
4055
    dpif_netlink_port_add,
4056
    dpif_netlink_port_del,
4057
    NULL,                       /* port_set_config */
4058
    dpif_netlink_port_query_by_number,
4059
    dpif_netlink_port_query_by_name,
4060
    dpif_netlink_port_get_pid,
4061
    dpif_netlink_port_dump_start,
4062
    dpif_netlink_port_dump_next,
4063
    dpif_netlink_port_dump_done,
4064
    dpif_netlink_port_poll,
4065
    dpif_netlink_port_poll_wait,
4066
    dpif_netlink_flow_flush,
4067
    dpif_netlink_flow_dump_create,
4068
    dpif_netlink_flow_dump_destroy,
4069
    dpif_netlink_flow_dump_thread_create,
4070
    dpif_netlink_flow_dump_thread_destroy,
4071
    dpif_netlink_flow_dump_next,
4072
    dpif_netlink_operate,
4073
    dpif_netlink_recv_set,
4074
    dpif_netlink_handlers_set,
4075
    dpif_netlink_number_handlers_required,
4076
    NULL,                       /* set_config */
4077
    dpif_netlink_queue_to_priority,
4078
    dpif_netlink_recv,
4079
    dpif_netlink_recv_wait,
4080
    dpif_netlink_recv_purge,
4081
    NULL,                       /* register_dp_purge_cb */
4082
    NULL,                       /* register_upcall_cb */
4083
    NULL,                       /* enable_upcall */
4084
    NULL,                       /* disable_upcall */
4085
    dpif_netlink_get_datapath_version, /* get_datapath_version */
4086
    dpif_netlink_ct_dump_start,
4087
    dpif_netlink_ct_dump_next,
4088
    dpif_netlink_ct_dump_done,
4089
    NULL,                       /* ct_exp_dump_start */
4090
    NULL,                       /* ct_exp_dump_next */
4091
    NULL,                       /* ct_exp_dump_done */
4092
    dpif_netlink_ct_flush,
4093
    NULL,                       /* ct_set_maxconns */
4094
    NULL,                       /* ct_get_maxconns */
4095
    NULL,                       /* ct_get_nconns */
4096
    NULL,                       /* ct_set_tcp_seq_chk */
4097
    NULL,                       /* ct_get_tcp_seq_chk */
4098
    NULL,                       /* ct_set_sweep_interval */
4099
    NULL,                       /* ct_get_sweep_interval */
4100
    dpif_netlink_ct_set_limits,
4101
    dpif_netlink_ct_get_limits,
4102
    dpif_netlink_ct_del_limits,
4103
    dpif_netlink_ct_set_timeout_policy,
4104
    dpif_netlink_ct_get_timeout_policy,
4105
    dpif_netlink_ct_del_timeout_policy,
4106
    dpif_netlink_ct_timeout_policy_dump_start,
4107
    dpif_netlink_ct_timeout_policy_dump_next,
4108
    dpif_netlink_ct_timeout_policy_dump_done,
4109
    dpif_netlink_ct_get_timeout_policy_name,
4110
    dpif_netlink_ct_get_features,
4111
    NULL,                       /* ipf_set_enabled */
4112
    NULL,                       /* ipf_set_min_frag */
4113
    NULL,                       /* ipf_set_max_nfrags */
4114
    NULL,                       /* ipf_get_status */
4115
    NULL,                       /* ipf_dump_start */
4116
    NULL,                       /* ipf_dump_next */
4117
    NULL,                       /* ipf_dump_done */
4118
    dpif_netlink_meter_get_features,
4119
    dpif_netlink_meter_set,
4120
    dpif_netlink_meter_get,
4121
    dpif_netlink_meter_del,
4122
    NULL,                       /* bond_add */
4123
    NULL,                       /* bond_del */
4124
    NULL,                       /* bond_stats_get */
4125
    dpif_netlink_cache_get_supported_levels,
4126
    dpif_netlink_cache_get_name,
4127
    dpif_netlink_cache_get_size,
4128
    dpif_netlink_cache_set_size,
4129
};
4130
4131
static int
4132
dpif_netlink_init(void)
4133
0
{
4134
0
    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
4135
0
    static int error;
4136
4137
0
    if (ovsthread_once_start(&once)) {
4138
0
        error = nl_lookup_genl_family(OVS_DATAPATH_FAMILY,
4139
0
                                      &ovs_datapath_family);
4140
0
        if (error) {
4141
0
            VLOG_INFO("Generic Netlink family '%s' does not exist. "
4142
0
                      "The Open vSwitch kernel module is probably not loaded.",
4143
0
                      OVS_DATAPATH_FAMILY);
4144
0
        }
4145
0
        if (!error) {
4146
0
            error = nl_lookup_genl_family(OVS_VPORT_FAMILY, &ovs_vport_family);
4147
0
        }
4148
0
        if (!error) {
4149
0
            error = nl_lookup_genl_family(OVS_FLOW_FAMILY, &ovs_flow_family);
4150
0
        }
4151
0
        if (!error) {
4152
0
            error = nl_lookup_genl_family(OVS_PACKET_FAMILY,
4153
0
                                          &ovs_packet_family);
4154
0
        }
4155
0
        if (!error) {
4156
0
            error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, OVS_VPORT_MCGROUP,
4157
0
                                           &ovs_vport_mcgroup);
4158
0
        }
4159
0
        if (!error) {
4160
0
            if (nl_lookup_genl_family(OVS_METER_FAMILY, &ovs_meter_family)) {
4161
0
                VLOG_INFO("The kernel module does not support meters.");
4162
0
            }
4163
0
        }
4164
0
        if (nl_lookup_genl_family(OVS_CT_LIMIT_FAMILY,
4165
0
                                  &ovs_ct_limit_family) < 0) {
4166
0
            VLOG_INFO("Generic Netlink family '%s' does not exist. "
4167
0
                      "Please update the Open vSwitch kernel module to enable "
4168
0
                      "the conntrack limit feature.", OVS_CT_LIMIT_FAMILY);
4169
0
        }
4170
4171
0
        ovs_tunnels_out_of_tree = dpif_netlink_rtnl_probe_oot_tunnels();
4172
4173
0
        unixctl_command_register("dpif-netlink/dispatch-mode", "", 0, 0,
4174
0
                                 dpif_netlink_unixctl_dispatch_mode, NULL);
4175
4176
0
        ovsthread_once_done(&once);
4177
0
    }
4178
4179
0
    return error;
4180
0
}
4181
4182
bool
4183
dpif_netlink_is_internal_device(const char *name)
4184
0
{
4185
0
    struct dpif_netlink_vport reply;
4186
0
    struct ofpbuf *buf;
4187
0
    int error;
4188
4189
0
    error = dpif_netlink_vport_get(name, &reply, &buf);
4190
0
    if (!error) {
4191
0
        ofpbuf_delete(buf);
4192
0
    } else if (error != ENODEV && error != ENOENT) {
4193
0
        VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)",
4194
0
                     name, ovs_strerror(error));
4195
0
    }
4196
4197
0
    return reply.type == OVS_VPORT_TYPE_INTERNAL;
4198
0
}
4199
4200
/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
4201
 * by Netlink attributes, into 'vport'.  Returns 0 if successful, otherwise a
4202
 * positive errno value.
4203
 *
4204
 * 'vport' will contain pointers into 'buf', so the caller should not free
4205
 * 'buf' while 'vport' is still in use. */
4206
static int
4207
dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *vport,
4208
                             const struct ofpbuf *buf)
4209
0
{
4210
0
    static const struct nl_policy ovs_vport_policy[] = {
4211
0
        [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 },
4212
0
        [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32 },
4213
0
        [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
4214
0
        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_UNSPEC },
4215
0
        [OVS_VPORT_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_vport_stats),
4216
0
                                   .optional = true },
4217
0
        [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
4218
0
        [OVS_VPORT_ATTR_NETNSID] = { .type = NL_A_U32, .optional = true },
4219
0
        [OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NL_A_NESTED,
4220
0
                                          .optional = true },
4221
0
    };
4222
4223
0
    dpif_netlink_vport_init(vport);
4224
4225
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
4226
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
4227
0
    struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
4228
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
4229
4230
0
    struct nlattr *a[ARRAY_SIZE(ovs_vport_policy)];
4231
0
    if (!nlmsg || !genl || !ovs_header
4232
0
        || nlmsg->nlmsg_type != ovs_vport_family
4233
0
        || !nl_policy_parse(&b, 0, ovs_vport_policy, a,
4234
0
                            ARRAY_SIZE(ovs_vport_policy))) {
4235
0
        return EINVAL;
4236
0
    }
4237
4238
0
    vport->cmd = genl->cmd;
4239
0
    vport->dp_ifindex = ovs_header->dp_ifindex;
4240
0
    vport->port_no = nl_attr_get_odp_port(a[OVS_VPORT_ATTR_PORT_NO]);
4241
0
    vport->type = nl_attr_get_u32(a[OVS_VPORT_ATTR_TYPE]);
4242
0
    vport->name = nl_attr_get_string(a[OVS_VPORT_ATTR_NAME]);
4243
0
    if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
4244
0
        vport->n_upcall_pids = nl_attr_get_size(a[OVS_VPORT_ATTR_UPCALL_PID])
4245
0
                               / (sizeof *vport->upcall_pids);
4246
0
        vport->upcall_pids = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PID]);
4247
4248
0
    }
4249
0
    if (a[OVS_VPORT_ATTR_STATS]) {
4250
0
        vport->stats = nl_attr_get(a[OVS_VPORT_ATTR_STATS]);
4251
0
    }
4252
0
    if (a[OVS_VPORT_ATTR_UPCALL_STATS]) {
4253
0
        const struct nlattr *nla;
4254
0
        size_t left;
4255
4256
0
        NL_NESTED_FOR_EACH (nla, left, a[OVS_VPORT_ATTR_UPCALL_STATS]) {
4257
0
            if (nl_attr_type(nla) == OVS_VPORT_UPCALL_ATTR_SUCCESS) {
4258
0
                vport->upcall_success = nl_attr_get_u64(nla);
4259
0
            } else if (nl_attr_type(nla) == OVS_VPORT_UPCALL_ATTR_FAIL) {
4260
0
                vport->upcall_fail = nl_attr_get_u64(nla);
4261
0
            }
4262
0
        }
4263
0
    } else {
4264
0
        vport->upcall_success = UINT64_MAX;
4265
0
        vport->upcall_fail = UINT64_MAX;
4266
0
    }
4267
0
    if (a[OVS_VPORT_ATTR_OPTIONS]) {
4268
0
        vport->options = nl_attr_get(a[OVS_VPORT_ATTR_OPTIONS]);
4269
0
        vport->options_len = nl_attr_get_size(a[OVS_VPORT_ATTR_OPTIONS]);
4270
0
    }
4271
0
    if (a[OVS_VPORT_ATTR_NETNSID]) {
4272
0
        netnsid_set(&vport->netnsid,
4273
0
                    nl_attr_get_u32(a[OVS_VPORT_ATTR_NETNSID]));
4274
0
    } else {
4275
0
        netnsid_set_local(&vport->netnsid);
4276
0
    }
4277
0
    return 0;
4278
0
}
4279
4280
/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
4281
 * followed by Netlink attributes corresponding to 'vport'. */
4282
static void
4283
dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *vport,
4284
                             struct ofpbuf *buf)
4285
0
{
4286
0
    struct ovs_header *ovs_header;
4287
4288
0
    nl_msg_put_genlmsghdr(buf, 0, ovs_vport_family, NLM_F_REQUEST | NLM_F_ECHO,
4289
0
                          vport->cmd, OVS_VPORT_VERSION);
4290
4291
0
    ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
4292
0
    ovs_header->dp_ifindex = vport->dp_ifindex;
4293
4294
0
    if (vport->port_no != ODPP_NONE) {
4295
0
        nl_msg_put_odp_port(buf, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
4296
0
    }
4297
4298
0
    if (vport->type != OVS_VPORT_TYPE_UNSPEC) {
4299
0
        nl_msg_put_u32(buf, OVS_VPORT_ATTR_TYPE, vport->type);
4300
0
    }
4301
4302
0
    if (vport->name) {
4303
0
        nl_msg_put_string(buf, OVS_VPORT_ATTR_NAME, vport->name);
4304
0
    }
4305
4306
0
    if (vport->upcall_pids) {
4307
0
        nl_msg_put_unspec(buf, OVS_VPORT_ATTR_UPCALL_PID,
4308
0
                          vport->upcall_pids,
4309
0
                          vport->n_upcall_pids * sizeof *vport->upcall_pids);
4310
0
    }
4311
4312
0
    if (vport->stats) {
4313
0
        nl_msg_put_unspec(buf, OVS_VPORT_ATTR_STATS,
4314
0
                          vport->stats, sizeof *vport->stats);
4315
0
    }
4316
4317
0
    if (vport->options) {
4318
0
        nl_msg_put_nested(buf, OVS_VPORT_ATTR_OPTIONS,
4319
0
                          vport->options, vport->options_len);
4320
0
    }
4321
0
}
4322
4323
/* Clears 'vport' to "empty" values. */
4324
void
4325
dpif_netlink_vport_init(struct dpif_netlink_vport *vport)
4326
0
{
4327
0
    memset(vport, 0, sizeof *vport);
4328
0
    vport->port_no = ODPP_NONE;
4329
0
}
4330
4331
/* Executes 'request' in the kernel datapath.  If the command fails, returns a
4332
 * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
4333
 * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
4334
 * result of the command is expected to be an ovs_vport also, which is decoded
4335
 * and stored in '*reply' and '*bufp'.  The caller must free '*bufp' when the
4336
 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
4337
int
4338
dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
4339
                            struct dpif_netlink_vport *reply,
4340
                            struct ofpbuf **bufp)
4341
0
{
4342
0
    struct ofpbuf *request_buf;
4343
0
    int error;
4344
4345
0
    ovs_assert((reply != NULL) == (bufp != NULL));
4346
4347
0
    error = dpif_netlink_init();
4348
0
    if (error) {
4349
0
        if (reply) {
4350
0
            *bufp = NULL;
4351
0
            dpif_netlink_vport_init(reply);
4352
0
        }
4353
0
        return error;
4354
0
    }
4355
4356
0
    request_buf = ofpbuf_new(1024);
4357
0
    dpif_netlink_vport_to_ofpbuf(request, request_buf);
4358
0
    error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
4359
0
    ofpbuf_delete(request_buf);
4360
4361
0
    if (reply) {
4362
0
        if (!error) {
4363
0
            error = dpif_netlink_vport_from_ofpbuf(reply, *bufp);
4364
0
        }
4365
0
        if (error) {
4366
0
            dpif_netlink_vport_init(reply);
4367
0
            ofpbuf_delete(*bufp);
4368
0
            *bufp = NULL;
4369
0
        }
4370
0
    }
4371
0
    return error;
4372
0
}
4373
4374
/* Obtains information about the kernel vport named 'name' and stores it into
4375
 * '*reply' and '*bufp'.  The caller must free '*bufp' when the reply is no
4376
 * longer needed ('reply' will contain pointers into '*bufp').  */
4377
int
4378
dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
4379
                       struct ofpbuf **bufp)
4380
0
{
4381
0
    struct dpif_netlink_vport request;
4382
4383
0
    dpif_netlink_vport_init(&request);
4384
0
    request.cmd = OVS_VPORT_CMD_GET;
4385
0
    request.name = name;
4386
4387
0
    return dpif_netlink_vport_transact(&request, reply, bufp);
4388
0
}
4389
4390
/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
4391
 * by Netlink attributes, into 'dp'.  Returns 0 if successful, otherwise a
4392
 * positive errno value.
4393
 *
4394
 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
4395
 * while 'dp' is still in use. */
4396
static int
4397
dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *dp, const struct ofpbuf *buf)
4398
0
{
4399
0
    static const struct nl_policy ovs_datapath_policy[] = {
4400
0
        [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
4401
0
        [OVS_DP_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_dp_stats),
4402
0
                                .optional = true },
4403
0
        [OVS_DP_ATTR_MEGAFLOW_STATS] = {
4404
0
                        NL_POLICY_FOR(struct ovs_dp_megaflow_stats),
4405
0
                        .optional = true },
4406
0
        [OVS_DP_ATTR_USER_FEATURES] = {
4407
0
                        .type = NL_A_U32,
4408
0
                        .optional = true },
4409
0
        [OVS_DP_ATTR_MASKS_CACHE_SIZE] = {
4410
0
                        .type = NL_A_U32,
4411
0
                        .optional = true },
4412
0
    };
4413
4414
0
    dpif_netlink_dp_init(dp);
4415
4416
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
4417
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
4418
0
    struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
4419
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
4420
4421
0
    struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)];
4422
0
    if (!nlmsg || !genl || !ovs_header
4423
0
        || nlmsg->nlmsg_type != ovs_datapath_family
4424
0
        || !nl_policy_parse(&b, 0, ovs_datapath_policy, a,
4425
0
                            ARRAY_SIZE(ovs_datapath_policy))) {
4426
0
        return EINVAL;
4427
0
    }
4428
4429
0
    dp->cmd = genl->cmd;
4430
0
    dp->dp_ifindex = ovs_header->dp_ifindex;
4431
0
    dp->name = nl_attr_get_string(a[OVS_DP_ATTR_NAME]);
4432
0
    if (a[OVS_DP_ATTR_STATS]) {
4433
0
        dp->stats = nl_attr_get(a[OVS_DP_ATTR_STATS]);
4434
0
    }
4435
4436
0
    if (a[OVS_DP_ATTR_MEGAFLOW_STATS]) {
4437
0
        dp->megaflow_stats = nl_attr_get(a[OVS_DP_ATTR_MEGAFLOW_STATS]);
4438
0
    }
4439
4440
0
    if (a[OVS_DP_ATTR_USER_FEATURES]) {
4441
0
        dp->user_features = nl_attr_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
4442
0
    }
4443
4444
0
    if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
4445
0
        dp->cache_size = nl_attr_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
4446
0
    } else {
4447
0
        dp->cache_size = UINT32_MAX;
4448
0
    }
4449
4450
0
    return 0;
4451
0
}
4452
4453
/* Appends to 'buf' the Generic Netlink message described by 'dp'. */
4454
static void
4455
dpif_netlink_dp_to_ofpbuf(const struct dpif_netlink_dp *dp, struct ofpbuf *buf)
4456
0
{
4457
0
    struct ovs_header *ovs_header;
4458
4459
0
    nl_msg_put_genlmsghdr(buf, 0, ovs_datapath_family,
4460
0
                          NLM_F_REQUEST | NLM_F_ECHO, dp->cmd,
4461
0
                          OVS_DATAPATH_VERSION);
4462
4463
0
    ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
4464
0
    ovs_header->dp_ifindex = dp->dp_ifindex;
4465
4466
0
    if (dp->name) {
4467
0
        nl_msg_put_string(buf, OVS_DP_ATTR_NAME, dp->name);
4468
0
    }
4469
4470
0
    if (dp->upcall_pid) {
4471
0
        nl_msg_put_u32(buf, OVS_DP_ATTR_UPCALL_PID, *dp->upcall_pid);
4472
0
    }
4473
4474
0
    if (dp->user_features) {
4475
0
        nl_msg_put_u32(buf, OVS_DP_ATTR_USER_FEATURES, dp->user_features);
4476
0
    }
4477
4478
0
    if (dp->upcall_pids) {
4479
0
        nl_msg_put_unspec(buf, OVS_DP_ATTR_PER_CPU_PIDS, dp->upcall_pids,
4480
0
                          sizeof *dp->upcall_pids * dp->n_upcall_pids);
4481
0
    }
4482
4483
0
    if (dp->cache_size != UINT32_MAX) {
4484
0
        nl_msg_put_u32(buf, OVS_DP_ATTR_MASKS_CACHE_SIZE, dp->cache_size);
4485
0
    }
4486
4487
    /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
4488
0
}
4489
4490
/* Clears 'dp' to "empty" values. */
4491
static void
4492
dpif_netlink_dp_init(struct dpif_netlink_dp *dp)
4493
0
{
4494
0
    memset(dp, 0, sizeof *dp);
4495
0
    dp->cache_size = UINT32_MAX;
4496
0
}
4497
4498
static void
4499
dpif_netlink_dp_dump_start(struct nl_dump *dump)
4500
0
{
4501
0
    struct dpif_netlink_dp request;
4502
0
    struct ofpbuf *buf;
4503
4504
0
    dpif_netlink_dp_init(&request);
4505
0
    request.cmd = OVS_DP_CMD_GET;
4506
4507
0
    buf = ofpbuf_new(1024);
4508
0
    dpif_netlink_dp_to_ofpbuf(&request, buf);
4509
0
    nl_dump_start(dump, NETLINK_GENERIC, buf);
4510
0
    ofpbuf_delete(buf);
4511
0
}
4512
4513
/* Executes 'request' in the kernel datapath.  If the command fails, returns a
4514
 * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
4515
 * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
4516
 * result of the command is expected to be of the same form, which is decoded
4517
 * and stored in '*reply' and '*bufp'.  The caller must free '*bufp' when the
4518
 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
4519
static int
4520
dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
4521
                         struct dpif_netlink_dp *reply, struct ofpbuf **bufp)
4522
0
{
4523
0
    struct ofpbuf *request_buf;
4524
0
    int error;
4525
4526
0
    ovs_assert((reply != NULL) == (bufp != NULL));
4527
4528
0
    request_buf = ofpbuf_new(1024);
4529
0
    dpif_netlink_dp_to_ofpbuf(request, request_buf);
4530
0
    error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
4531
0
    ofpbuf_delete(request_buf);
4532
4533
0
    if (reply) {
4534
0
        dpif_netlink_dp_init(reply);
4535
0
        if (!error) {
4536
0
            error = dpif_netlink_dp_from_ofpbuf(reply, *bufp);
4537
0
        }
4538
0
        if (error) {
4539
0
            ofpbuf_delete(*bufp);
4540
0
            *bufp = NULL;
4541
0
        }
4542
0
    }
4543
0
    return error;
4544
0
}
4545
4546
/* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
4547
 * The caller must free '*bufp' when the reply is no longer needed ('reply'
4548
 * will contain pointers into '*bufp').  */
4549
static int
4550
dpif_netlink_dp_get(const struct dpif *dpif_, struct dpif_netlink_dp *reply,
4551
                    struct ofpbuf **bufp)
4552
0
{
4553
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
4554
0
    struct dpif_netlink_dp request;
4555
4556
0
    dpif_netlink_dp_init(&request);
4557
0
    request.cmd = OVS_DP_CMD_GET;
4558
0
    request.dp_ifindex = dpif->dp_ifindex;
4559
4560
0
    return dpif_netlink_dp_transact(&request, reply, bufp);
4561
0
}
4562
4563
/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
4564
 * by Netlink attributes, into 'flow'.  Returns 0 if successful, otherwise a
4565
 * positive errno value.
4566
 *
4567
 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
4568
 * while 'flow' is still in use. */
4569
static int
4570
dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *flow,
4571
                              const struct ofpbuf *buf)
4572
0
{
4573
0
    static const struct nl_policy ovs_flow_policy[__OVS_FLOW_ATTR_MAX] = {
4574
0
        [OVS_FLOW_ATTR_KEY] = { .type = NL_A_NESTED, .optional = true },
4575
0
        [OVS_FLOW_ATTR_MASK] = { .type = NL_A_NESTED, .optional = true },
4576
0
        [OVS_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
4577
0
        [OVS_FLOW_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
4578
0
                                  .optional = true },
4579
0
        [OVS_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
4580
0
        [OVS_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
4581
0
        [OVS_FLOW_ATTR_UFID] = { .type = NL_A_U128, .optional = true },
4582
        /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
4583
        /* The kernel never uses OVS_FLOW_ATTR_PROBE. */
4584
        /* The kernel never uses OVS_FLOW_ATTR_UFID_FLAGS. */
4585
0
    };
4586
4587
0
    dpif_netlink_flow_init(flow);
4588
4589
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
4590
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
4591
0
    struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
4592
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
4593
4594
0
    struct nlattr *a[ARRAY_SIZE(ovs_flow_policy)];
4595
0
    if (!nlmsg || !genl || !ovs_header
4596
0
        || nlmsg->nlmsg_type != ovs_flow_family
4597
0
        || !nl_policy_parse(&b, 0, ovs_flow_policy, a,
4598
0
                            ARRAY_SIZE(ovs_flow_policy))) {
4599
0
        return EINVAL;
4600
0
    }
4601
0
    if (!a[OVS_FLOW_ATTR_KEY] && !a[OVS_FLOW_ATTR_UFID]) {
4602
0
        return EINVAL;
4603
0
    }
4604
4605
0
    flow->nlmsg_flags = nlmsg->nlmsg_flags;
4606
0
    flow->dp_ifindex = ovs_header->dp_ifindex;
4607
0
    if (a[OVS_FLOW_ATTR_KEY]) {
4608
0
        flow->key = nl_attr_get(a[OVS_FLOW_ATTR_KEY]);
4609
0
        flow->key_len = nl_attr_get_size(a[OVS_FLOW_ATTR_KEY]);
4610
0
    }
4611
4612
0
    if (a[OVS_FLOW_ATTR_UFID]) {
4613
0
        flow->ufid = nl_attr_get_u128(a[OVS_FLOW_ATTR_UFID]);
4614
0
        flow->ufid_present = true;
4615
0
    }
4616
0
    if (a[OVS_FLOW_ATTR_MASK]) {
4617
0
        flow->mask = nl_attr_get(a[OVS_FLOW_ATTR_MASK]);
4618
0
        flow->mask_len = nl_attr_get_size(a[OVS_FLOW_ATTR_MASK]);
4619
0
    }
4620
0
    if (a[OVS_FLOW_ATTR_ACTIONS]) {
4621
0
        flow->actions = nl_attr_get(a[OVS_FLOW_ATTR_ACTIONS]);
4622
0
        flow->actions_len = nl_attr_get_size(a[OVS_FLOW_ATTR_ACTIONS]);
4623
0
    }
4624
0
    if (a[OVS_FLOW_ATTR_STATS]) {
4625
0
        flow->stats = nl_attr_get(a[OVS_FLOW_ATTR_STATS]);
4626
0
    }
4627
0
    if (a[OVS_FLOW_ATTR_TCP_FLAGS]) {
4628
0
        flow->tcp_flags = nl_attr_get(a[OVS_FLOW_ATTR_TCP_FLAGS]);
4629
0
    }
4630
0
    if (a[OVS_FLOW_ATTR_USED]) {
4631
0
        flow->used = nl_attr_get(a[OVS_FLOW_ATTR_USED]);
4632
0
    }
4633
0
    return 0;
4634
0
}
4635
4636
4637
/*
4638
 * If PACKET_TYPE attribute is present in 'data', it filters PACKET_TYPE out.
4639
 * If the flow is not Ethernet, the OVS_KEY_ATTR_PACKET_TYPE is converted to
4640
 * OVS_KEY_ATTR_ETHERTYPE. Puts 'data' to 'buf'.
4641
 */
4642
static void
4643
put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
4644
                        const struct nlattr *data, uint16_t data_len)
4645
0
{
4646
0
    const struct nlattr *packet_type;
4647
4648
0
    packet_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_PACKET_TYPE);
4649
4650
0
    if (packet_type) {
4651
        /* exclude PACKET_TYPE Netlink attribute. */
4652
0
        ovs_assert(NLA_ALIGN(packet_type->nla_len) == NL_A_U32_SIZE);
4653
0
        size_t packet_type_len = NL_A_U32_SIZE;
4654
0
        size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
4655
0
        size_t second_chunk_size = data_len - first_chunk_size
4656
0
                                   - packet_type_len;
4657
0
        struct nlattr *next_attr = nl_attr_next(packet_type);
4658
0
        size_t ofs;
4659
4660
0
        ofs = nl_msg_start_nested(buf, type);
4661
0
        nl_msg_put(buf, data, first_chunk_size);
4662
0
        nl_msg_put(buf, next_attr, second_chunk_size);
4663
0
        if (!nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERNET)) {
4664
0
            ovs_be16 pt = pt_ns_type_be(nl_attr_get_be32(packet_type));
4665
0
            const struct nlattr *nla;
4666
4667
0
            nla = nl_attr_find(buf, ofs + NLA_HDRLEN, OVS_KEY_ATTR_ETHERTYPE);
4668
0
            if (nla) {
4669
0
                ovs_be16 *ethertype;
4670
4671
0
                ethertype = CONST_CAST(ovs_be16 *, nl_attr_get(nla));
4672
0
                *ethertype = pt;
4673
0
            } else {
4674
0
                nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, pt);
4675
0
            }
4676
0
        }
4677
0
        nl_msg_end_nested(buf, ofs);
4678
0
    } else {
4679
0
        nl_msg_put_unspec(buf, type, data, data_len);
4680
0
    }
4681
0
}
4682
4683
/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
4684
 * followed by Netlink attributes corresponding to 'flow'. */
4685
static void
4686
dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
4687
                            struct ofpbuf *buf)
4688
0
{
4689
0
    struct ovs_header *ovs_header;
4690
4691
0
    nl_msg_put_genlmsghdr(buf, 0, ovs_flow_family,
4692
0
                          NLM_F_REQUEST | flow->nlmsg_flags,
4693
0
                          flow->cmd, OVS_FLOW_VERSION);
4694
4695
0
    ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
4696
0
    ovs_header->dp_ifindex = flow->dp_ifindex;
4697
4698
0
    if (flow->ufid_present) {
4699
0
        nl_msg_put_u128(buf, OVS_FLOW_ATTR_UFID, flow->ufid);
4700
0
    }
4701
0
    if (flow->ufid_terse) {
4702
0
        nl_msg_put_u32(buf, OVS_FLOW_ATTR_UFID_FLAGS,
4703
0
                       OVS_UFID_F_OMIT_KEY | OVS_UFID_F_OMIT_MASK
4704
0
                       | OVS_UFID_F_OMIT_ACTIONS);
4705
0
    }
4706
0
    if (!flow->ufid_terse || !flow->ufid_present) {
4707
0
        if (flow->key_len) {
4708
0
            put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
4709
0
                                           flow->key_len);
4710
0
        }
4711
0
        if (flow->mask_len) {
4712
0
            put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
4713
0
                                           flow->mask_len);
4714
0
        }
4715
0
        if (flow->actions || flow->actions_len) {
4716
0
            nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS,
4717
0
                              flow->actions, flow->actions_len);
4718
0
        }
4719
0
    }
4720
4721
    /* We never need to send these to the kernel. */
4722
0
    ovs_assert(!flow->stats);
4723
0
    ovs_assert(!flow->tcp_flags);
4724
0
    ovs_assert(!flow->used);
4725
4726
0
    if (flow->clear) {
4727
0
        nl_msg_put_flag(buf, OVS_FLOW_ATTR_CLEAR);
4728
0
    }
4729
0
    if (flow->probe) {
4730
0
        nl_msg_put_flag(buf, OVS_FLOW_ATTR_PROBE);
4731
0
    }
4732
0
}
4733
4734
/* Clears 'flow' to "empty" values. */
4735
static void
4736
dpif_netlink_flow_init(struct dpif_netlink_flow *flow)
4737
0
{
4738
0
    memset(flow, 0, sizeof *flow);
4739
0
}
4740
4741
/* Executes 'request' in the kernel datapath.  If the command fails, returns a
4742
 * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
4743
 * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
4744
 * result of the command is expected to be a flow also, which is decoded and
4745
 * stored in '*reply' and '*bufp'.  The caller must free '*bufp' when the reply
4746
 * is no longer needed ('reply' will contain pointers into '*bufp'). */
4747
static int
4748
dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
4749
                           struct dpif_netlink_flow *reply,
4750
                           struct ofpbuf **bufp)
4751
0
{
4752
0
    struct ofpbuf *request_buf;
4753
0
    int error;
4754
4755
0
    ovs_assert((reply != NULL) == (bufp != NULL));
4756
4757
0
    if (reply) {
4758
0
        request->nlmsg_flags |= NLM_F_ECHO;
4759
0
    }
4760
4761
0
    request_buf = ofpbuf_new(1024);
4762
0
    dpif_netlink_flow_to_ofpbuf(request, request_buf);
4763
0
    error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
4764
0
    ofpbuf_delete(request_buf);
4765
4766
0
    if (reply) {
4767
0
        if (!error) {
4768
0
            error = dpif_netlink_flow_from_ofpbuf(reply, *bufp);
4769
0
        }
4770
0
        if (error) {
4771
0
            dpif_netlink_flow_init(reply);
4772
0
            ofpbuf_delete(*bufp);
4773
0
            *bufp = NULL;
4774
0
        }
4775
0
    }
4776
0
    return error;
4777
0
}
4778
4779
static void
4780
dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *flow,
4781
                            struct dpif_flow_stats *stats)
4782
0
{
4783
0
    if (flow->stats) {
4784
0
        stats->n_packets = get_32aligned_u64(&flow->stats->n_packets);
4785
0
        stats->n_bytes = get_32aligned_u64(&flow->stats->n_bytes);
4786
0
    } else {
4787
0
        stats->n_packets = 0;
4788
0
        stats->n_bytes = 0;
4789
0
    }
4790
0
    stats->used = flow->used ? get_32aligned_u64(flow->used) : 0;
4791
0
    stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;
4792
0
}
4793
4794
/* Logs information about a packet that was recently lost in 'ch' (in
4795
 * 'dpif_'). */
4796
static void
4797
report_loss(struct dpif_netlink *dpif, struct dpif_channel *ch, uint32_t ch_idx,
4798
            uint32_t handler_id)
4799
0
{
4800
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
4801
0
    struct ds s;
4802
4803
0
    if (VLOG_DROP_WARN(&rl)) {
4804
0
        return;
4805
0
    }
4806
4807
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
4808
0
        VLOG_WARN("%s: lost packet on handler %u",
4809
0
                  dpif_name(&dpif->dpif), handler_id);
4810
0
    } else {
4811
0
        ds_init(&s);
4812
0
        if (ch->last_poll != LLONG_MIN) {
4813
0
            ds_put_format(&s, " (last polled %lld ms ago)",
4814
0
                        time_msec() - ch->last_poll);
4815
0
        }
4816
4817
0
        VLOG_WARN("%s: lost packet on port channel %u of handler %u%s",
4818
0
                  dpif_name(&dpif->dpif), ch_idx, handler_id, ds_cstr(&s));
4819
0
        ds_destroy(&s);
4820
0
    }
4821
0
}
4822
4823
static void
4824
dpif_netlink_unixctl_dispatch_mode(struct unixctl_conn *conn,
4825
                                   int argc OVS_UNUSED,
4826
                                   const char *argv[] OVS_UNUSED,
4827
                                   void *aux OVS_UNUSED)
4828
0
{
4829
0
    struct ds reply = DS_EMPTY_INITIALIZER;
4830
0
    struct nl_dump dump;
4831
0
    uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
4832
0
    struct ofpbuf msg, buf;
4833
0
    int error;
4834
4835
0
    error = dpif_netlink_init();
4836
0
    if (error) {
4837
0
        return;
4838
0
    }
4839
4840
0
    ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
4841
0
    dpif_netlink_dp_dump_start(&dump);
4842
0
    while (nl_dump_next(&dump, &msg, &buf)) {
4843
0
        struct dpif_netlink_dp dp;
4844
0
        if (!dpif_netlink_dp_from_ofpbuf(&dp, &msg)) {
4845
0
            ds_put_format(&reply, "%s: ", dp.name);
4846
0
            if (dp.user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) {
4847
0
                ds_put_format(&reply, "per-cpu dispatch mode");
4848
0
            } else {
4849
0
                ds_put_format(&reply, "per-vport dispatch mode");
4850
0
            }
4851
0
            ds_put_format(&reply, "\n");
4852
0
        }
4853
0
    }
4854
0
    ofpbuf_uninit(&buf);
4855
0
    error = nl_dump_done(&dump);
4856
0
    if (!error) {
4857
0
        unixctl_command_reply(conn, ds_cstr(&reply));
4858
0
    }
4859
4860
0
    ds_destroy(&reply);
4861
0
}