Coverage Report

Created: 2025-07-18 06:07

/src/openvswitch/lib/dpif-netlink.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2008-2018 Nicira, Inc.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at:
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
#include <config.h>
18
19
#include "dpif-netlink.h"
20
21
#include <ctype.h>
22
#include <errno.h>
23
#include <fcntl.h>
24
#include <inttypes.h>
25
#include <net/if.h>
26
#include <linux/types.h>
27
#include <linux/pkt_sched.h>
28
#include <poll.h>
29
#include <stdlib.h>
30
#include <strings.h>
31
#include <sys/epoll.h>
32
#include <sys/stat.h>
33
#include <unistd.h>
34
35
#include "bitmap.h"
36
#include "dpif-netlink-rtnl.h"
37
#include "dpif-provider.h"
38
#include "fat-rwlock.h"
39
#include "flow.h"
40
#include "netdev-linux.h"
41
#include "netdev-offload.h"
42
#include "netdev-provider.h"
43
#include "netdev-vport.h"
44
#include "netdev.h"
45
#include "netlink-conntrack.h"
46
#include "netlink-notifier.h"
47
#include "netlink-socket.h"
48
#include "netlink.h"
49
#include "netnsid.h"
50
#include "odp-util.h"
51
#include "openvswitch/dynamic-string.h"
52
#include "openvswitch/flow.h"
53
#include "openvswitch/hmap.h"
54
#include "openvswitch/match.h"
55
#include "openvswitch/ofpbuf.h"
56
#include "openvswitch/poll-loop.h"
57
#include "openvswitch/shash.h"
58
#include "openvswitch/thread.h"
59
#include "openvswitch/usdt-probes.h"
60
#include "openvswitch/vlog.h"
61
#include "packets.h"
62
#include "random.h"
63
#include "sset.h"
64
#include "timeval.h"
65
#include "unaligned.h"
66
#include "util.h"
67
68
VLOG_DEFINE_THIS_MODULE(dpif_netlink);
69
#ifdef _WIN32
70
#include "wmi.h"
71
enum { WINDOWS = 1 };
72
#else
73
enum { WINDOWS = 0 };
74
#endif
75
enum { MAX_PORTS = USHRT_MAX };
76
77
/* This ethtool flag was introduced in Linux 2.6.24, so it might be
78
 * missing if we have old headers. */
79
0
#define ETH_FLAG_LRO      (1 << 15)    /* LRO is enabled */
80
81
#define FLOW_DUMP_MAX_BATCH 50
82
0
#define OPERATE_MAX_OPS 50
83
84
#ifndef EPOLLEXCLUSIVE
85
#define EPOLLEXCLUSIVE (1u << 28)
86
#endif
87
88
0
#define OVS_DP_F_UNSUPPORTED (1u << 31);
89
90
/* This PID is not used by the kernel datapath when using dispatch per CPU,
91
 * but it is required to be set (not zero). */
92
0
#define DPIF_NETLINK_PER_CPU_PID UINT32_MAX
93
struct dpif_netlink_dp {
94
    /* Generic Netlink header. */
95
    uint8_t cmd;
96
97
    /* struct ovs_header. */
98
    int dp_ifindex;
99
100
    /* Attributes. */
101
    const char *name;                  /* OVS_DP_ATTR_NAME. */
102
    const uint32_t *upcall_pid;        /* OVS_DP_ATTR_UPCALL_PID. */
103
    uint32_t user_features;            /* OVS_DP_ATTR_USER_FEATURES */
104
    uint32_t cache_size;               /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
105
    const struct ovs_dp_stats *stats;  /* OVS_DP_ATTR_STATS. */
106
    const struct ovs_dp_megaflow_stats *megaflow_stats;
107
                                       /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
108
    const uint32_t *upcall_pids;       /* OVS_DP_ATTR_PER_CPU_PIDS */
109
    uint32_t n_upcall_pids;
110
};
111
112
static void dpif_netlink_dp_init(struct dpif_netlink_dp *);
113
static int dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *,
114
                                       const struct ofpbuf *);
115
static void dpif_netlink_dp_dump_start(struct nl_dump *);
116
static int dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
117
                                    struct dpif_netlink_dp *reply,
118
                                    struct ofpbuf **bufp);
119
static int dpif_netlink_dp_get(const struct dpif *,
120
                               struct dpif_netlink_dp *reply,
121
                               struct ofpbuf **bufp);
122
static int
123
dpif_netlink_set_features(struct dpif *dpif_, uint32_t new_features);
124
125
static void
126
dpif_netlink_unixctl_dispatch_mode(struct unixctl_conn *conn, int argc,
127
                                   const char *argv[], void *aux);
128
129
struct dpif_netlink_flow {
130
    /* Generic Netlink header. */
131
    uint8_t cmd;
132
133
    /* struct ovs_header. */
134
    unsigned int nlmsg_flags;
135
    int dp_ifindex;
136
137
    /* Attributes.
138
     *
139
     * The 'stats' member points to 64-bit data that might only be aligned on
140
     * 32-bit boundaries, so get_unaligned_u64() should be used to access its
141
     * values.
142
     *
143
     * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
144
     * the Netlink version of the command, even if actions_len is zero. */
145
    const struct nlattr *key;           /* OVS_FLOW_ATTR_KEY. */
146
    size_t key_len;
147
    const struct nlattr *mask;          /* OVS_FLOW_ATTR_MASK. */
148
    size_t mask_len;
149
    const struct nlattr *actions;       /* OVS_FLOW_ATTR_ACTIONS. */
150
    size_t actions_len;
151
    ovs_u128 ufid;                      /* OVS_FLOW_ATTR_FLOW_ID. */
152
    bool ufid_present;                  /* Is there a UFID? */
153
    bool ufid_terse;                    /* Skip serializing key/mask/acts? */
154
    const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */
155
    const uint8_t *tcp_flags;           /* OVS_FLOW_ATTR_TCP_FLAGS. */
156
    const ovs_32aligned_u64 *used;      /* OVS_FLOW_ATTR_USED. */
157
    bool clear;                         /* OVS_FLOW_ATTR_CLEAR. */
158
    bool probe;                         /* OVS_FLOW_ATTR_PROBE. */
159
};
160
161
static void dpif_netlink_flow_init(struct dpif_netlink_flow *);
162
static int dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *,
163
                                         const struct ofpbuf *);
164
static void dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *,
165
                                        struct ofpbuf *);
166
static int dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
167
                                      struct dpif_netlink_flow *reply,
168
                                      struct ofpbuf **bufp);
169
static void dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *,
170
                                        struct dpif_flow_stats *);
171
static void dpif_netlink_flow_to_dpif_flow(struct dpif_flow *,
172
                                           const struct dpif_netlink_flow *);
173
174
/* One of the dpif channels between the kernel and userspace. */
175
struct dpif_channel {
176
    struct nl_sock *sock;       /* Netlink socket. */
177
    long long int last_poll;    /* Last time this channel was polled. */
178
};
179
180
#ifdef _WIN32
181
#define VPORT_SOCK_POOL_SIZE 1
182
/* On Windows, there is no native support for epoll.  There are equivalent
183
 * interfaces though, that are not used currently.  For simpicity, a pool of
184
 * netlink sockets is used.  Each socket is represented by 'struct
185
 * dpif_windows_vport_sock'.  Since it is a pool, multiple OVS ports may be
186
 * sharing the same socket.  In the future, we can add a reference count and
187
 * such fields. */
188
struct dpif_windows_vport_sock {
189
    struct nl_sock *nl_sock;    /* netlink socket. */
190
};
191
#endif
192
193
struct dpif_handler {
194
    /* per-vport dispatch mode. */
195
    struct epoll_event *epoll_events;
196
    int epoll_fd;                 /* epoll fd that includes channel socks. */
197
    int n_events;                 /* Num events returned by epoll_wait(). */
198
    int event_offset;             /* Offset into 'epoll_events'. */
199
200
    /* per-cpu dispatch mode. */
201
    struct nl_sock *sock;         /* Each handler thread holds one netlink
202
                                     socket. */
203
204
#ifdef _WIN32
205
    /* Pool of sockets. */
206
    struct dpif_windows_vport_sock *vport_sock_pool;
207
    size_t last_used_pool_idx; /* Index to aid in allocating a
208
                                  socket in the pool to a port. */
209
#endif
210
};
211
212
/* Datapath interface for the openvswitch Linux kernel module. */
213
struct dpif_netlink {
214
    struct dpif dpif;
215
    int dp_ifindex;
216
    uint32_t user_features;
217
218
    /* Upcall messages. */
219
    struct fat_rwlock upcall_lock;
220
    struct dpif_handler *handlers;
221
    uint32_t n_handlers;           /* Num of upcall handlers. */
222
223
    /* Per-vport dispatch mode. */
224
    struct dpif_channel *channels; /* Array of channels for each port. */
225
    int uc_array_size;             /* Size of 'handler->channels' and */
226
                                   /* 'handler->epoll_events'. */
227
228
    /* Change notification. */
229
    struct nl_sock *port_notifier; /* vport multicast group subscriber. */
230
    bool refresh_channels;
231
};
232
233
static void report_loss(struct dpif_netlink *, struct dpif_channel *,
234
                        uint32_t ch_idx, uint32_t handler_id);
235
236
static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
237
238
/* Generic Netlink family numbers for OVS.
239
 *
240
 * Initialized by dpif_netlink_init(). */
241
static int ovs_datapath_family;
242
static int ovs_vport_family;
243
static int ovs_flow_family;
244
static int ovs_packet_family;
245
static int ovs_meter_family;
246
static int ovs_ct_limit_family;
247
248
/* Generic Netlink multicast groups for OVS.
249
 *
250
 * Initialized by dpif_netlink_init(). */
251
static unsigned int ovs_vport_mcgroup;
252
253
/* If true, tunnel devices are created using OVS compat/genetlink.
254
 * If false, tunnel devices are created with rtnetlink and using light weight
255
 * tunnels. If we fail to create the tunnel the rtnetlink+LWT, then we fallback
256
 * to using the compat interface. */
257
static bool ovs_tunnels_out_of_tree = true;
258
259
static int dpif_netlink_init(void);
260
static int open_dpif(const struct dpif_netlink_dp *, struct dpif **);
261
static uint32_t dpif_netlink_port_get_pid(const struct dpif *,
262
                                          odp_port_t port_no);
263
static void dpif_netlink_handler_uninit(struct dpif_handler *handler);
264
static int dpif_netlink_refresh_handlers_vport_dispatch(struct dpif_netlink *,
265
                                                        uint32_t n_handlers);
266
static void destroy_all_channels(struct dpif_netlink *);
267
static int dpif_netlink_refresh_handlers_cpu_dispatch(struct dpif_netlink *);
268
static void destroy_all_handlers(struct dpif_netlink *);
269
270
static void dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *,
271
                                         struct ofpbuf *);
272
static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *,
273
                                          const struct ofpbuf *);
274
static int dpif_netlink_port_query__(const struct dpif_netlink *dpif,
275
                                     odp_port_t port_no, const char *port_name,
276
                                     struct dpif_port *dpif_port);
277
278
static int
279
create_nl_sock(struct dpif_netlink *dpif OVS_UNUSED, struct nl_sock **sockp)
280
    OVS_REQ_WRLOCK(dpif->upcall_lock)
281
0
{
282
0
#ifndef _WIN32
283
0
    return nl_sock_create(NETLINK_GENERIC, sockp);
284
#else
285
    /* Pick netlink sockets to use in a round-robin fashion from each
286
     * handler's pool of sockets. */
287
    struct dpif_handler *handler = &dpif->handlers[0];
288
    struct dpif_windows_vport_sock *sock_pool = handler->vport_sock_pool;
289
    size_t index = handler->last_used_pool_idx;
290
291
    /* A pool of sockets is allocated when the handler is initialized. */
292
    if (sock_pool == NULL) {
293
        *sockp = NULL;
294
        return EINVAL;
295
    }
296
297
    ovs_assert(index < VPORT_SOCK_POOL_SIZE);
298
    *sockp = sock_pool[index].nl_sock;
299
    ovs_assert(*sockp);
300
    index = (index == VPORT_SOCK_POOL_SIZE - 1) ? 0 : index + 1;
301
    handler->last_used_pool_idx = index;
302
    return 0;
303
#endif
304
0
}
305
306
static void
307
close_nl_sock(struct nl_sock *sock)
308
0
{
309
0
#ifndef _WIN32
310
0
    nl_sock_destroy(sock);
311
0
#endif
312
0
}
313
314
static struct dpif_netlink *
315
dpif_netlink_cast(const struct dpif *dpif)
316
0
{
317
0
    dpif_assert_class(dpif, &dpif_netlink_class);
318
0
    return CONTAINER_OF(dpif, struct dpif_netlink, dpif);
319
0
}
320
321
static inline bool
322
0
dpif_netlink_upcall_per_cpu(const struct dpif_netlink *dpif) {
323
0
    return !!((dpif)->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU);
324
0
}
325
326
static int
327
dpif_netlink_enumerate(struct sset *all_dps,
328
                       const struct dpif_class *dpif_class OVS_UNUSED)
329
0
{
330
0
    struct nl_dump dump;
331
0
    uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
332
0
    struct ofpbuf msg, buf;
333
0
    int error;
334
335
0
    error = dpif_netlink_init();
336
0
    if (error) {
337
0
        return error;
338
0
    }
339
340
0
    ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
341
0
    dpif_netlink_dp_dump_start(&dump);
342
0
    while (nl_dump_next(&dump, &msg, &buf)) {
343
0
        struct dpif_netlink_dp dp;
344
345
0
        if (!dpif_netlink_dp_from_ofpbuf(&dp, &msg)) {
346
0
            sset_add(all_dps, dp.name);
347
0
        }
348
0
    }
349
0
    ofpbuf_uninit(&buf);
350
0
    return nl_dump_done(&dump);
351
0
}
352
353
static int
354
dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
355
                  bool create, struct dpif **dpifp)
356
0
{
357
0
    struct dpif_netlink_dp dp_request, dp;
358
0
    struct ofpbuf *buf;
359
0
    uint32_t upcall_pid;
360
0
    int error;
361
362
0
    error = dpif_netlink_init();
363
0
    if (error) {
364
0
        return error;
365
0
    }
366
367
    /* Create or look up datapath. */
368
0
    dpif_netlink_dp_init(&dp_request);
369
0
    upcall_pid = 0;
370
0
    dp_request.upcall_pid = &upcall_pid;
371
0
    dp_request.name = name;
372
373
0
    if (create) {
374
0
        dp_request.cmd = OVS_DP_CMD_NEW;
375
0
    } else {
376
0
        dp_request.cmd = OVS_DP_CMD_GET;
377
378
0
        error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
379
0
        if (error) {
380
0
            return error;
381
0
        }
382
0
        dp_request.user_features = dp.user_features;
383
0
        ofpbuf_delete(buf);
384
385
        /* Use OVS_DP_CMD_SET to report user features */
386
0
        dp_request.cmd = OVS_DP_CMD_SET;
387
0
    }
388
389
    /* Some older kernels will not reject unknown features. This will cause
390
     * 'ovs-vswitchd' to incorrectly assume a feature is supported. In order to
391
     * test for that, we attempt to set a feature that we know is not supported
392
     * by any kernel. If this feature is not rejected, we can assume we are
393
     * running on one of these older kernels.
394
     */
395
0
    dp_request.user_features |= OVS_DP_F_UNALIGNED;
396
0
    dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
397
0
    dp_request.user_features |= OVS_DP_F_UNSUPPORTED;
398
0
    error = dpif_netlink_dp_transact(&dp_request, NULL, NULL);
399
0
    if (error) {
400
        /* The Open vSwitch kernel module has two modes for dispatching
401
         * upcalls: per-vport and per-cpu.
402
         *
403
         * When dispatching upcalls per-vport, the kernel will
404
         * send the upcall via a Netlink socket that has been selected based on
405
         * the vport that received the packet that is causing the upcall.
406
         *
407
         * When dispatching upcall per-cpu, the kernel will send the upcall via
408
         * a Netlink socket that has been selected based on the cpu that
409
         * received the packet that is causing the upcall.
410
         *
411
         * First we test to see if the kernel module supports per-cpu
412
         * dispatching (the preferred method). If it does not support per-cpu
413
         * dispatching, we fall back to the per-vport dispatch mode.
414
         */
415
0
        dp_request.user_features &= ~OVS_DP_F_UNSUPPORTED;
416
0
        dp_request.user_features &= ~OVS_DP_F_VPORT_PIDS;
417
0
        dp_request.user_features |= OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
418
0
        error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
419
0
        if (error == EOPNOTSUPP) {
420
0
            dp_request.user_features &= ~OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
421
0
            dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
422
0
            error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
423
0
        }
424
0
        if (error) {
425
0
            return error;
426
0
        }
427
428
0
        error = open_dpif(&dp, dpifp);
429
0
        dpif_netlink_set_features(*dpifp, OVS_DP_F_TC_RECIRC_SHARING);
430
0
    } else {
431
0
        VLOG_INFO("Kernel does not correctly support feature negotiation. "
432
0
                  "Using standard features.");
433
0
        dp_request.cmd = OVS_DP_CMD_SET;
434
0
        dp_request.user_features = 0;
435
0
        dp_request.user_features |= OVS_DP_F_UNALIGNED;
436
0
        dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
437
0
        error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
438
0
        if (error) {
439
0
            return error;
440
0
        }
441
0
        error = open_dpif(&dp, dpifp);
442
0
    }
443
444
0
    ofpbuf_delete(buf);
445
446
0
    if (create) {
447
0
        VLOG_INFO("Datapath dispatch mode: %s",
448
0
                  dpif_netlink_upcall_per_cpu(dpif_netlink_cast(*dpifp)) ?
449
0
                  "per-cpu" : "per-vport");
450
0
    }
451
452
0
    return error;
453
0
}
454
455
static int
456
open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
457
0
{
458
0
    struct dpif_netlink *dpif;
459
460
0
    dpif = xzalloc(sizeof *dpif);
461
0
    dpif->port_notifier = NULL;
462
0
    fat_rwlock_init(&dpif->upcall_lock);
463
464
0
    dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name,
465
0
              dp->dp_ifindex, dp->dp_ifindex);
466
467
0
    dpif->dp_ifindex = dp->dp_ifindex;
468
0
    dpif->user_features = dp->user_features;
469
0
    *dpifp = &dpif->dpif;
470
471
0
    return 0;
472
0
}
473
474
#ifdef _WIN32
475
static void
476
vport_delete_sock_pool(struct dpif_handler *handler)
477
    OVS_REQ_WRLOCK(dpif->upcall_lock)
478
{
479
    if (handler->vport_sock_pool) {
480
        uint32_t i;
481
        struct dpif_windows_vport_sock *sock_pool =
482
            handler->vport_sock_pool;
483
484
        for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
485
            if (sock_pool[i].nl_sock) {
486
                nl_sock_unsubscribe_packets(sock_pool[i].nl_sock);
487
                nl_sock_destroy(sock_pool[i].nl_sock);
488
                sock_pool[i].nl_sock = NULL;
489
            }
490
        }
491
492
        free(handler->vport_sock_pool);
493
        handler->vport_sock_pool = NULL;
494
    }
495
}
496
497
static int
498
vport_create_sock_pool(struct dpif_handler *handler)
499
    OVS_REQ_WRLOCK(dpif->upcall_lock)
500
{
501
    struct dpif_windows_vport_sock *sock_pool;
502
    size_t i;
503
    int error = 0;
504
505
    sock_pool = xzalloc(VPORT_SOCK_POOL_SIZE * sizeof *sock_pool);
506
    for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
507
        error = nl_sock_create(NETLINK_GENERIC, &sock_pool[i].nl_sock);
508
        if (error) {
509
            goto error;
510
        }
511
512
        /* Enable the netlink socket to receive packets.  This is equivalent to
513
         * calling nl_sock_join_mcgroup() to receive events. */
514
        error = nl_sock_subscribe_packets(sock_pool[i].nl_sock);
515
        if (error) {
516
           goto error;
517
        }
518
    }
519
520
    handler->vport_sock_pool = sock_pool;
521
    handler->last_used_pool_idx = 0;
522
    return 0;
523
524
error:
525
    vport_delete_sock_pool(handler);
526
    return error;
527
}
528
#endif /* _WIN32 */
529
530
/* Given the port number 'port_idx', extracts the pid of netlink socket
531
 * associated to the port and assigns it to 'upcall_pid'. */
532
static bool
533
vport_get_pid(struct dpif_netlink *dpif, uint32_t port_idx,
534
              uint32_t *upcall_pid)
535
0
{
536
    /* Since the nl_sock can only be assigned in either all
537
     * or none "dpif" channels, the following check
538
     * would suffice. */
539
0
    if (!dpif->channels[port_idx].sock) {
540
0
        return false;
541
0
    }
542
0
    ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
543
544
0
    *upcall_pid = nl_sock_pid(dpif->channels[port_idx].sock);
545
546
0
    return true;
547
0
}
548
549
static int
550
vport_add_channel(struct dpif_netlink *dpif, odp_port_t port_no,
551
                  struct nl_sock *sock)
552
0
{
553
0
    struct epoll_event event;
554
0
    uint32_t port_idx = odp_to_u32(port_no);
555
0
    size_t i;
556
0
    int error;
557
558
0
    if (dpif->handlers == NULL) {
559
0
        close_nl_sock(sock);
560
0
        return 0;
561
0
    }
562
563
    /* We assume that the datapath densely chooses port numbers, which can
564
     * therefore be used as an index into 'channels' and 'epoll_events' of
565
     * 'dpif'. */
566
0
    if (port_idx >= dpif->uc_array_size) {
567
0
        uint32_t new_size = port_idx + 1;
568
569
0
        if (new_size > MAX_PORTS) {
570
0
            VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
571
0
                         dpif_name(&dpif->dpif), port_no);
572
0
            return EFBIG;
573
0
        }
574
575
0
        dpif->channels = xrealloc(dpif->channels,
576
0
                                  new_size * sizeof *dpif->channels);
577
578
0
        for (i = dpif->uc_array_size; i < new_size; i++) {
579
0
            dpif->channels[i].sock = NULL;
580
0
        }
581
582
0
        for (i = 0; i < dpif->n_handlers; i++) {
583
0
            struct dpif_handler *handler = &dpif->handlers[i];
584
585
0
            handler->epoll_events = xrealloc(handler->epoll_events,
586
0
                new_size * sizeof *handler->epoll_events);
587
588
0
        }
589
0
        dpif->uc_array_size = new_size;
590
0
    }
591
592
0
    memset(&event, 0, sizeof event);
593
0
    event.events = EPOLLIN | EPOLLEXCLUSIVE;
594
0
    event.data.u32 = port_idx;
595
596
0
    for (i = 0; i < dpif->n_handlers; i++) {
597
0
        struct dpif_handler *handler = &dpif->handlers[i];
598
599
0
#ifndef _WIN32
600
0
        if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(sock),
601
0
                      &event) < 0) {
602
0
            error = errno;
603
0
            goto error;
604
0
        }
605
0
#endif
606
0
    }
607
0
    dpif->channels[port_idx].sock = sock;
608
0
    dpif->channels[port_idx].last_poll = LLONG_MIN;
609
610
0
    return 0;
611
612
0
error:
613
0
#ifndef _WIN32
614
0
    while (i--) {
615
0
        epoll_ctl(dpif->handlers[i].epoll_fd, EPOLL_CTL_DEL,
616
0
                  nl_sock_fd(sock), NULL);
617
0
    }
618
0
#endif
619
0
    dpif->channels[port_idx].sock = NULL;
620
621
0
    return error;
622
0
}
623
624
static void
625
vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
626
0
{
627
0
    uint32_t port_idx = odp_to_u32(port_no);
628
0
    size_t i;
629
630
0
    if (!dpif->handlers || port_idx >= dpif->uc_array_size
631
0
        || !dpif->channels[port_idx].sock) {
632
0
        return;
633
0
    }
634
635
0
    for (i = 0; i < dpif->n_handlers; i++) {
636
0
        struct dpif_handler *handler = &dpif->handlers[i];
637
0
#ifndef _WIN32
638
0
        epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
639
0
                  nl_sock_fd(dpif->channels[port_idx].sock), NULL);
640
0
#endif
641
0
        handler->event_offset = handler->n_events = 0;
642
0
    }
643
0
#ifndef _WIN32
644
0
    nl_sock_destroy(dpif->channels[port_idx].sock);
645
0
#endif
646
0
    dpif->channels[port_idx].sock = NULL;
647
0
}
648
649
static void
650
destroy_all_channels(struct dpif_netlink *dpif)
651
    OVS_REQ_WRLOCK(dpif->upcall_lock)
652
0
{
653
0
    unsigned int i;
654
655
0
    if (!dpif->handlers) {
656
0
        return;
657
0
    }
658
659
0
    for (i = 0; i < dpif->uc_array_size; i++ ) {
660
0
        struct dpif_netlink_vport vport_request;
661
0
        uint32_t upcall_pids = 0;
662
663
0
        if (!dpif->channels[i].sock) {
664
0
            continue;
665
0
        }
666
667
        /* Turn off upcalls. */
668
0
        dpif_netlink_vport_init(&vport_request);
669
0
        vport_request.cmd = OVS_VPORT_CMD_SET;
670
0
        vport_request.dp_ifindex = dpif->dp_ifindex;
671
0
        vport_request.port_no = u32_to_odp(i);
672
0
        vport_request.n_upcall_pids = 1;
673
0
        vport_request.upcall_pids = &upcall_pids;
674
0
        dpif_netlink_vport_transact(&vport_request, NULL, NULL);
675
676
0
        vport_del_channels(dpif, u32_to_odp(i));
677
0
    }
678
679
0
    for (i = 0; i < dpif->n_handlers; i++) {
680
0
        struct dpif_handler *handler = &dpif->handlers[i];
681
682
0
        dpif_netlink_handler_uninit(handler);
683
0
        free(handler->epoll_events);
684
0
    }
685
0
    free(dpif->channels);
686
0
    free(dpif->handlers);
687
0
    dpif->handlers = NULL;
688
0
    dpif->channels = NULL;
689
0
    dpif->n_handlers = 0;
690
0
    dpif->uc_array_size = 0;
691
0
}
692
693
static void
694
destroy_all_handlers(struct dpif_netlink *dpif)
695
    OVS_REQ_WRLOCK(dpif->upcall_lock)
696
0
{
697
0
    int i = 0;
698
699
0
    if (!dpif->handlers) {
700
0
        return;
701
0
    }
702
0
    for (i = 0; i < dpif->n_handlers; i++) {
703
0
        struct dpif_handler *handler = &dpif->handlers[i];
704
0
        close_nl_sock(handler->sock);
705
0
    }
706
0
    free(dpif->handlers);
707
0
    dpif->handlers = NULL;
708
0
    dpif->n_handlers = 0;
709
0
}
710
711
static void
712
dpif_netlink_close(struct dpif *dpif_)
713
0
{
714
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
715
716
0
    nl_sock_destroy(dpif->port_notifier);
717
718
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
719
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
720
0
        destroy_all_handlers(dpif);
721
0
    } else {
722
0
        destroy_all_channels(dpif);
723
0
    }
724
0
    fat_rwlock_unlock(&dpif->upcall_lock);
725
726
0
    fat_rwlock_destroy(&dpif->upcall_lock);
727
0
    free(dpif);
728
0
}
729
730
static int
731
dpif_netlink_destroy(struct dpif *dpif_)
732
0
{
733
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
734
0
    struct dpif_netlink_dp dp;
735
736
0
    dpif_netlink_dp_init(&dp);
737
0
    dp.cmd = OVS_DP_CMD_DEL;
738
0
    dp.dp_ifindex = dpif->dp_ifindex;
739
0
    return dpif_netlink_dp_transact(&dp, NULL, NULL);
740
0
}
741
742
static bool
743
dpif_netlink_run(struct dpif *dpif_)
744
0
{
745
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
746
747
0
    if (!dpif_netlink_upcall_per_cpu(dpif)) {
748
0
        if (dpif->refresh_channels) {
749
0
            dpif->refresh_channels = false;
750
0
            fat_rwlock_wrlock(&dpif->upcall_lock);
751
0
            dpif_netlink_refresh_handlers_vport_dispatch(dpif,
752
0
                                                         dpif->n_handlers);
753
0
            fat_rwlock_unlock(&dpif->upcall_lock);
754
0
        }
755
0
    }
756
0
    return false;
757
0
}
758
759
static int
760
dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats)
761
0
{
762
0
    struct dpif_netlink_dp dp;
763
0
    struct ofpbuf *buf;
764
0
    int error;
765
766
0
    error = dpif_netlink_dp_get(dpif_, &dp, &buf);
767
0
    if (!error) {
768
0
        memset(stats, 0, sizeof *stats);
769
770
0
        if (dp.stats) {
771
0
            stats->n_hit    = get_32aligned_u64(&dp.stats->n_hit);
772
0
            stats->n_missed = get_32aligned_u64(&dp.stats->n_missed);
773
0
            stats->n_lost   = get_32aligned_u64(&dp.stats->n_lost);
774
0
            stats->n_flows  = get_32aligned_u64(&dp.stats->n_flows);
775
0
        }
776
777
0
        if (dp.megaflow_stats) {
778
0
            stats->n_masks = dp.megaflow_stats->n_masks;
779
0
            stats->n_mask_hit = get_32aligned_u64(
780
0
                &dp.megaflow_stats->n_mask_hit);
781
0
            stats->n_cache_hit = get_32aligned_u64(
782
0
                &dp.megaflow_stats->n_cache_hit);
783
784
0
            if (!stats->n_cache_hit) {
785
                /* Old kernels don't use this field and always
786
                 * report zero instead.  Disable this stat. */
787
0
                stats->n_cache_hit = UINT64_MAX;
788
0
            }
789
0
        } else {
790
0
            stats->n_masks = UINT32_MAX;
791
0
            stats->n_mask_hit = UINT64_MAX;
792
0
            stats->n_cache_hit = UINT64_MAX;
793
0
        }
794
0
        ofpbuf_delete(buf);
795
0
    }
796
0
    return error;
797
0
}
798
799
static int
800
dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids,
801
                              uint32_t n_upcall_pids)
802
0
{
803
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
804
0
    int largest_cpu_id = ovs_numa_get_largest_core_id();
805
0
    struct dpif_netlink_dp request, reply;
806
0
    struct ofpbuf *bufp;
807
808
0
    uint32_t *corrected;
809
0
    int error, i, n_cores;
810
811
0
    if (largest_cpu_id == OVS_NUMA_UNSPEC) {
812
0
        largest_cpu_id = -1;
813
0
    }
814
815
    /* Some systems have non-continuous cpu core ids.  count_total_cores()
816
     * would return an accurate number, however, this number cannot be used.
817
     * e.g. If the largest core_id of a system is cpu9, but the system only
818
     * has 4 cpus then the OVS kernel module would throw a "CPU mismatch"
819
     * warning.  With the MAX() in place in this example we send an array of
820
     * size 10 and prevent the warning.  This has no bearing on the number of
821
     * threads created.
822
     */
823
0
    n_cores = MAX(count_total_cores(), largest_cpu_id + 1);
824
0
    VLOG_DBG("Dispatch mode(per-cpu): Setting up handler PIDs for %d cores",
825
0
             n_cores);
826
827
0
    dpif_netlink_dp_init(&request);
828
0
    request.cmd = OVS_DP_CMD_SET;
829
0
    request.name = dpif_->base_name;
830
0
    request.dp_ifindex = dpif->dp_ifindex;
831
0
    request.user_features = dpif->user_features |
832
0
                            OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
833
834
0
    corrected = xcalloc(n_cores, sizeof *corrected);
835
836
0
    for (i = 0; i < n_cores; i++) {
837
0
        corrected[i] = upcall_pids[i % n_upcall_pids];
838
0
    }
839
0
    request.upcall_pids = corrected;
840
0
    request.n_upcall_pids = n_cores;
841
842
0
    error = dpif_netlink_dp_transact(&request, &reply, &bufp);
843
0
    if (!error) {
844
0
        dpif->user_features = reply.user_features;
845
0
        ofpbuf_delete(bufp);
846
0
        if (!dpif_netlink_upcall_per_cpu(dpif)) {
847
0
            error = -EOPNOTSUPP;
848
0
        }
849
0
    }
850
0
    free(corrected);
851
0
    return error;
852
0
}
853
854
static int
855
dpif_netlink_set_features(struct dpif *dpif_, uint32_t new_features)
856
0
{
857
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
858
0
    struct dpif_netlink_dp request, reply;
859
0
    struct ofpbuf *bufp;
860
0
    int error;
861
862
0
    dpif_netlink_dp_init(&request);
863
0
    request.cmd = OVS_DP_CMD_SET;
864
0
    request.name = dpif_->base_name;
865
0
    request.dp_ifindex = dpif->dp_ifindex;
866
0
    request.user_features = dpif->user_features | new_features;
867
868
0
    error = dpif_netlink_dp_transact(&request, &reply, &bufp);
869
0
    if (!error) {
870
0
        dpif->user_features = reply.user_features;
871
0
        ofpbuf_delete(bufp);
872
0
        if (!(dpif->user_features & new_features)) {
873
0
            return -EOPNOTSUPP;
874
0
        }
875
0
    }
876
877
0
    return error;
878
0
}
879
880
static const char *
881
get_vport_type(const struct dpif_netlink_vport *vport)
882
0
{
883
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
884
885
0
    switch (vport->type) {
886
0
    case OVS_VPORT_TYPE_NETDEV: {
887
0
        const char *type = netdev_get_type_from_name(vport->name);
888
889
0
        return type ? type : "system";
890
0
    }
891
892
0
    case OVS_VPORT_TYPE_INTERNAL:
893
0
        return "internal";
894
895
0
    case OVS_VPORT_TYPE_GENEVE:
896
0
        return "geneve";
897
898
0
    case OVS_VPORT_TYPE_GRE:
899
0
        return "gre";
900
901
0
    case OVS_VPORT_TYPE_VXLAN:
902
0
        return "vxlan";
903
904
0
    case OVS_VPORT_TYPE_ERSPAN:
905
0
        return "erspan";
906
907
0
    case OVS_VPORT_TYPE_IP6ERSPAN:
908
0
        return "ip6erspan";
909
910
0
    case OVS_VPORT_TYPE_IP6GRE:
911
0
        return "ip6gre";
912
913
0
    case OVS_VPORT_TYPE_GTPU:
914
0
        return "gtpu";
915
916
0
    case OVS_VPORT_TYPE_SRV6:
917
0
        return "srv6";
918
919
0
    case OVS_VPORT_TYPE_BAREUDP:
920
0
        return "bareudp";
921
922
0
    case OVS_VPORT_TYPE_UNSPEC:
923
0
    case __OVS_VPORT_TYPE_MAX:
924
0
        break;
925
0
    }
926
927
0
    VLOG_WARN_RL(&rl, "dp%d: port `%s' has unsupported type %u",
928
0
                 vport->dp_ifindex, vport->name, (unsigned int) vport->type);
929
0
    return "unknown";
930
0
}
931
932
enum ovs_vport_type
933
netdev_to_ovs_vport_type(const char *type)
934
0
{
935
0
    if (!strcmp(type, "tap") || !strcmp(type, "system")) {
936
0
        return OVS_VPORT_TYPE_NETDEV;
937
0
    } else if (!strcmp(type, "internal")) {
938
0
        return OVS_VPORT_TYPE_INTERNAL;
939
0
    } else if (!strcmp(type, "geneve")) {
940
0
        return OVS_VPORT_TYPE_GENEVE;
941
0
    } else if (!strcmp(type, "vxlan")) {
942
0
        return OVS_VPORT_TYPE_VXLAN;
943
0
    } else if (!strcmp(type, "erspan")) {
944
0
        return OVS_VPORT_TYPE_ERSPAN;
945
0
    } else if (!strcmp(type, "ip6erspan")) {
946
0
        return OVS_VPORT_TYPE_IP6ERSPAN;
947
0
    } else if (!strcmp(type, "ip6gre")) {
948
0
        return OVS_VPORT_TYPE_IP6GRE;
949
0
    } else if (!strcmp(type, "gre")) {
950
0
        return OVS_VPORT_TYPE_GRE;
951
0
    } else if (!strcmp(type, "gtpu")) {
952
0
        return OVS_VPORT_TYPE_GTPU;
953
0
    } else if (!strcmp(type, "srv6")) {
954
0
        return OVS_VPORT_TYPE_SRV6;
955
0
    } else if (!strcmp(type, "bareudp")) {
956
0
        return OVS_VPORT_TYPE_BAREUDP;
957
0
    } else {
958
0
        return OVS_VPORT_TYPE_UNSPEC;
959
0
    }
960
0
}
961
962
static int
963
dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
964
                        enum ovs_vport_type type,
965
                        struct ofpbuf *options,
966
                        odp_port_t *port_nop)
967
    OVS_REQ_WRLOCK(dpif->upcall_lock)
968
0
{
969
0
    struct dpif_netlink_vport request, reply;
970
0
    struct ofpbuf *buf;
971
0
    struct nl_sock *sock = NULL;
972
0
    uint32_t upcall_pids = 0;
973
0
    int error = 0;
974
975
    /* per-cpu dispatch mode does not require a socket per vport. */
976
0
    if (!dpif_netlink_upcall_per_cpu(dpif)) {
977
0
        if (dpif->handlers) {
978
0
            error = create_nl_sock(dpif, &sock);
979
0
            if (error) {
980
0
                return error;
981
0
            }
982
0
        }
983
0
        if (sock) {
984
0
            upcall_pids = nl_sock_pid(sock);
985
0
        }
986
0
    }
987
988
0
    dpif_netlink_vport_init(&request);
989
0
    request.cmd = OVS_VPORT_CMD_NEW;
990
0
    request.dp_ifindex = dpif->dp_ifindex;
991
0
    request.type = type;
992
0
    request.name = name;
993
994
0
    request.port_no = *port_nop;
995
0
    request.n_upcall_pids = 1;
996
0
    request.upcall_pids = &upcall_pids;
997
998
0
    if (options) {
999
0
        request.options = options->data;
1000
0
        request.options_len = options->size;
1001
0
    }
1002
1003
0
    error = dpif_netlink_vport_transact(&request, &reply, &buf);
1004
0
    if (!error) {
1005
0
        *port_nop = reply.port_no;
1006
0
    } else {
1007
0
        if (error == EBUSY && *port_nop != ODPP_NONE) {
1008
0
            VLOG_INFO("%s: requested port %"PRIu32" is in use",
1009
0
                      dpif_name(&dpif->dpif), *port_nop);
1010
0
        }
1011
1012
0
        close_nl_sock(sock);
1013
0
        goto exit;
1014
0
    }
1015
1016
0
    if (!dpif_netlink_upcall_per_cpu(dpif)) {
1017
0
        error = vport_add_channel(dpif, *port_nop, sock);
1018
0
        if (error) {
1019
0
            VLOG_INFO("%s: could not add channel for port %s",
1020
0
                        dpif_name(&dpif->dpif), name);
1021
1022
            /* Delete the port. */
1023
0
            dpif_netlink_vport_init(&request);
1024
0
            request.cmd = OVS_VPORT_CMD_DEL;
1025
0
            request.dp_ifindex = dpif->dp_ifindex;
1026
0
            request.port_no = *port_nop;
1027
0
            dpif_netlink_vport_transact(&request, NULL, NULL);
1028
0
            close_nl_sock(sock);
1029
0
            goto exit;
1030
0
        }
1031
0
    }
1032
1033
0
exit:
1034
0
    ofpbuf_delete(buf);
1035
1036
0
    return error;
1037
0
}
1038
1039
static int
1040
dpif_netlink_port_add_compat(struct dpif_netlink *dpif, struct netdev *netdev,
1041
                             odp_port_t *port_nop)
1042
    OVS_REQ_WRLOCK(dpif->upcall_lock)
1043
0
{
1044
0
    const struct netdev_tunnel_config *tnl_cfg;
1045
0
    char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1046
0
    const char *type = netdev_get_type(netdev);
1047
0
    uint64_t options_stub[64 / 8];
1048
0
    enum ovs_vport_type ovs_type;
1049
0
    struct ofpbuf options;
1050
0
    const char *name;
1051
1052
0
    name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
1053
1054
0
    ovs_type = netdev_to_ovs_vport_type(netdev_get_type(netdev));
1055
0
    if (ovs_type == OVS_VPORT_TYPE_UNSPEC) {
1056
0
        VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has "
1057
0
                     "unsupported type `%s'",
1058
0
                     dpif_name(&dpif->dpif), name, type);
1059
0
        return EINVAL;
1060
0
    }
1061
1062
0
    if (ovs_type == OVS_VPORT_TYPE_NETDEV) {
1063
#ifdef _WIN32
1064
        /* XXX : Map appropiate Windows handle */
1065
#else
1066
0
        netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false);
1067
0
#endif
1068
0
    }
1069
1070
#ifdef _WIN32
1071
    if (ovs_type == OVS_VPORT_TYPE_INTERNAL) {
1072
        if (!create_wmi_port(name)){
1073
            VLOG_ERR("Could not create wmi internal port with name:%s", name);
1074
            return EINVAL;
1075
        };
1076
    }
1077
#endif
1078
1079
0
    tnl_cfg = netdev_get_tunnel_config(netdev);
1080
0
    if (tnl_cfg && (tnl_cfg->dst_port != 0 || tnl_cfg->exts)) {
1081
0
        ofpbuf_use_stack(&options, options_stub, sizeof options_stub);
1082
0
        if (tnl_cfg->dst_port) {
1083
0
            nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT,
1084
0
                           ntohs(tnl_cfg->dst_port));
1085
0
        }
1086
0
        if (tnl_cfg->exts) {
1087
0
            size_t ext_ofs;
1088
0
            int i;
1089
1090
0
            ext_ofs = nl_msg_start_nested(&options, OVS_TUNNEL_ATTR_EXTENSION);
1091
0
            for (i = 0; i < 32; i++) {
1092
0
                if (tnl_cfg->exts & (UINT32_C(1) << i)) {
1093
0
                    nl_msg_put_flag(&options, i);
1094
0
                }
1095
0
            }
1096
0
            nl_msg_end_nested(&options, ext_ofs);
1097
0
        }
1098
0
        return dpif_netlink_port_add__(dpif, name, ovs_type, &options,
1099
0
                                       port_nop);
1100
0
    } else {
1101
0
        return dpif_netlink_port_add__(dpif, name, ovs_type, NULL, port_nop);
1102
0
    }
1103
1104
0
}
1105
1106
static int
1107
dpif_netlink_rtnl_port_create_and_add(struct dpif_netlink *dpif,
1108
                                      struct netdev *netdev,
1109
                                      odp_port_t *port_nop)
1110
    OVS_REQ_WRLOCK(dpif->upcall_lock)
1111
0
{
1112
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
1113
0
    char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1114
0
    const char *name;
1115
0
    int error;
1116
1117
0
    error = dpif_netlink_rtnl_port_create(netdev);
1118
0
    if (error) {
1119
0
        if (error != EOPNOTSUPP) {
1120
0
            VLOG_WARN_RL(&rl, "Failed to create %s with rtnetlink: %s",
1121
0
                         netdev_get_name(netdev), ovs_strerror(error));
1122
0
        }
1123
0
        return error;
1124
0
    }
1125
1126
0
    name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
1127
0
    error = dpif_netlink_port_add__(dpif, name, OVS_VPORT_TYPE_NETDEV, NULL,
1128
0
                                    port_nop);
1129
0
    if (error) {
1130
0
        dpif_netlink_rtnl_port_destroy(name, netdev_get_type(netdev));
1131
0
    }
1132
0
    return error;
1133
0
}
1134
1135
static int
1136
dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,
1137
                      odp_port_t *port_nop)
1138
0
{
1139
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1140
0
    int error = EOPNOTSUPP;
1141
1142
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
1143
0
    if (!ovs_tunnels_out_of_tree) {
1144
0
        error = dpif_netlink_rtnl_port_create_and_add(dpif, netdev, port_nop);
1145
0
    }
1146
0
    if (error) {
1147
0
        error = dpif_netlink_port_add_compat(dpif, netdev, port_nop);
1148
0
    }
1149
0
    fat_rwlock_unlock(&dpif->upcall_lock);
1150
1151
0
    return error;
1152
0
}
1153
1154
static int
1155
dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
1156
    OVS_REQ_WRLOCK(dpif->upcall_lock)
1157
0
{
1158
0
    struct dpif_netlink_vport vport;
1159
0
    struct dpif_port dpif_port;
1160
0
    int error;
1161
1162
0
    error = dpif_netlink_port_query__(dpif, port_no, NULL, &dpif_port);
1163
0
    if (error) {
1164
0
        return error;
1165
0
    }
1166
1167
0
    dpif_netlink_vport_init(&vport);
1168
0
    vport.cmd = OVS_VPORT_CMD_DEL;
1169
0
    vport.dp_ifindex = dpif->dp_ifindex;
1170
0
    vport.port_no = port_no;
1171
#ifdef _WIN32
1172
    if (!strcmp(dpif_port.type, "internal")) {
1173
        if (!delete_wmi_port(dpif_port.name)) {
1174
            VLOG_ERR("Could not delete wmi port with name: %s",
1175
                     dpif_port.name);
1176
        };
1177
    }
1178
#endif
1179
0
    error = dpif_netlink_vport_transact(&vport, NULL, NULL);
1180
1181
0
    vport_del_channels(dpif, port_no);
1182
1183
0
    if (!error && !ovs_tunnels_out_of_tree) {
1184
0
        error = dpif_netlink_rtnl_port_destroy(dpif_port.name, dpif_port.type);
1185
0
        if (error == EOPNOTSUPP) {
1186
0
            error = 0;
1187
0
        }
1188
0
    }
1189
1190
0
    dpif_port_destroy(&dpif_port);
1191
1192
0
    return error;
1193
0
}
1194
1195
static int
1196
dpif_netlink_port_del(struct dpif *dpif_, odp_port_t port_no)
1197
0
{
1198
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1199
0
    int error;
1200
1201
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
1202
0
    error = dpif_netlink_port_del__(dpif, port_no);
1203
0
    fat_rwlock_unlock(&dpif->upcall_lock);
1204
1205
0
    return error;
1206
0
}
1207
1208
static int
1209
dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
1210
                          const char *port_name, struct dpif_port *dpif_port)
1211
0
{
1212
0
    struct dpif_netlink_vport request;
1213
0
    struct dpif_netlink_vport reply;
1214
0
    struct ofpbuf *buf;
1215
0
    int error;
1216
1217
0
    dpif_netlink_vport_init(&request);
1218
0
    request.cmd = OVS_VPORT_CMD_GET;
1219
0
    request.dp_ifindex = dpif->dp_ifindex;
1220
0
    request.port_no = port_no;
1221
0
    request.name = port_name;
1222
1223
0
    error = dpif_netlink_vport_transact(&request, &reply, &buf);
1224
0
    if (!error) {
1225
0
        if (reply.dp_ifindex != request.dp_ifindex) {
1226
            /* A query by name reported that 'port_name' is in some datapath
1227
             * other than 'dpif', but the caller wants to know about 'dpif'. */
1228
0
            error = ENODEV;
1229
0
        } else if (dpif_port) {
1230
0
            dpif_port->name = xstrdup(reply.name);
1231
0
            dpif_port->type = xstrdup(get_vport_type(&reply));
1232
0
            dpif_port->port_no = reply.port_no;
1233
0
        }
1234
0
        ofpbuf_delete(buf);
1235
0
    }
1236
0
    return error;
1237
0
}
1238
1239
static int
1240
dpif_netlink_port_query_by_number(const struct dpif *dpif_, odp_port_t port_no,
1241
                                  struct dpif_port *dpif_port)
1242
0
{
1243
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1244
1245
0
    return dpif_netlink_port_query__(dpif, port_no, NULL, dpif_port);
1246
0
}
1247
1248
static int
1249
dpif_netlink_port_query_by_name(const struct dpif *dpif_, const char *devname,
1250
                              struct dpif_port *dpif_port)
1251
0
{
1252
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1253
1254
0
    return dpif_netlink_port_query__(dpif, 0, devname, dpif_port);
1255
0
}
1256
1257
static uint32_t
1258
dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
1259
                            odp_port_t port_no)
1260
    OVS_REQ_RDLOCK(dpif->upcall_lock)
1261
0
{
1262
0
    uint32_t port_idx = odp_to_u32(port_no);
1263
0
    uint32_t pid = 0;
1264
1265
0
    if (dpif->handlers && dpif->uc_array_size > 0) {
1266
        /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
1267
         * channel, since it is not heavily loaded. */
1268
0
        uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
1269
1270
        /* Needs to check in case the socket pointer is changed in between
1271
         * the holding of upcall_lock.  A known case happens when the main
1272
         * thread deletes the vport while the handler thread is handling
1273
         * the upcall from that port. */
1274
0
        if (dpif->channels[idx].sock) {
1275
0
            pid = nl_sock_pid(dpif->channels[idx].sock);
1276
0
        }
1277
0
    }
1278
1279
0
    return pid;
1280
0
}
1281
1282
static uint32_t
1283
dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no)
1284
0
{
1285
0
    const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1286
0
    uint32_t ret;
1287
1288
    /* In per-cpu dispatch mode, vports do not have an associated PID */
1289
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
1290
        /* In per-cpu dispatch mode, this will be ignored as kernel space will
1291
         * select the PID before sending to user space. We set to
1292
         * DPIF_NETLINK_PER_CPU_PID as 0 is rejected by kernel space as an
1293
         * invalid PID.
1294
         */
1295
0
        return DPIF_NETLINK_PER_CPU_PID;
1296
0
    }
1297
1298
0
    fat_rwlock_rdlock(&dpif->upcall_lock);
1299
0
    ret = dpif_netlink_port_get_pid__(dpif, port_no);
1300
0
    fat_rwlock_unlock(&dpif->upcall_lock);
1301
1302
0
    return ret;
1303
0
}
1304
1305
static int
1306
dpif_netlink_flow_flush(struct dpif *dpif_)
1307
0
{
1308
0
    const char *dpif_type_str = dpif_normalize_type(dpif_type(dpif_));
1309
0
    const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1310
0
    struct dpif_netlink_flow flow;
1311
1312
0
    dpif_netlink_flow_init(&flow);
1313
0
    flow.cmd = OVS_FLOW_CMD_DEL;
1314
0
    flow.dp_ifindex = dpif->dp_ifindex;
1315
1316
0
    if (netdev_is_flow_api_enabled()) {
1317
0
        netdev_ports_flow_flush(dpif_type_str);
1318
0
    }
1319
1320
0
    return dpif_netlink_flow_transact(&flow, NULL, NULL);
1321
0
}
1322
1323
struct dpif_netlink_port_state {
1324
    struct nl_dump dump;
1325
    struct ofpbuf buf;
1326
};
1327
1328
static void
1329
dpif_netlink_port_dump_start__(const struct dpif_netlink *dpif,
1330
                               struct nl_dump *dump)
1331
0
{
1332
0
    struct dpif_netlink_vport request;
1333
0
    struct ofpbuf *buf;
1334
1335
0
    dpif_netlink_vport_init(&request);
1336
0
    request.cmd = OVS_VPORT_CMD_GET;
1337
0
    request.dp_ifindex = dpif->dp_ifindex;
1338
1339
0
    buf = ofpbuf_new(1024);
1340
0
    dpif_netlink_vport_to_ofpbuf(&request, buf);
1341
0
    nl_dump_start(dump, NETLINK_GENERIC, buf);
1342
0
    ofpbuf_delete(buf);
1343
0
}
1344
1345
static int
1346
dpif_netlink_port_dump_start(const struct dpif *dpif_, void **statep)
1347
0
{
1348
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1349
0
    struct dpif_netlink_port_state *state;
1350
1351
0
    *statep = state = xmalloc(sizeof *state);
1352
0
    dpif_netlink_port_dump_start__(dpif, &state->dump);
1353
1354
0
    ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
1355
0
    return 0;
1356
0
}
1357
1358
static int
1359
dpif_netlink_port_dump_next__(const struct dpif_netlink *dpif,
1360
                              struct nl_dump *dump,
1361
                              struct dpif_netlink_vport *vport,
1362
                              struct ofpbuf *buffer)
1363
0
{
1364
0
    struct ofpbuf buf;
1365
0
    int error;
1366
1367
0
    if (!nl_dump_next(dump, &buf, buffer)) {
1368
0
        return EOF;
1369
0
    }
1370
1371
0
    error = dpif_netlink_vport_from_ofpbuf(vport, &buf);
1372
0
    if (error) {
1373
0
        VLOG_WARN_RL(&error_rl, "%s: failed to parse vport record (%s)",
1374
0
                     dpif_name(&dpif->dpif), ovs_strerror(error));
1375
0
    }
1376
0
    return error;
1377
0
}
1378
1379
static int
1380
dpif_netlink_port_dump_next(const struct dpif *dpif_, void *state_,
1381
                            struct dpif_port *dpif_port)
1382
0
{
1383
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1384
0
    struct dpif_netlink_port_state *state = state_;
1385
0
    struct dpif_netlink_vport vport;
1386
0
    int error;
1387
1388
0
    error = dpif_netlink_port_dump_next__(dpif, &state->dump, &vport,
1389
0
                                          &state->buf);
1390
0
    if (error) {
1391
0
        return error;
1392
0
    }
1393
0
    dpif_port->name = CONST_CAST(char *, vport.name);
1394
0
    dpif_port->type = CONST_CAST(char *, get_vport_type(&vport));
1395
0
    dpif_port->port_no = vport.port_no;
1396
0
    return 0;
1397
0
}
1398
1399
static int
1400
dpif_netlink_port_dump_done(const struct dpif *dpif_ OVS_UNUSED, void *state_)
1401
0
{
1402
0
    struct dpif_netlink_port_state *state = state_;
1403
0
    int error = nl_dump_done(&state->dump);
1404
1405
0
    ofpbuf_uninit(&state->buf);
1406
0
    free(state);
1407
0
    return error;
1408
0
}
1409
1410
static int
1411
dpif_netlink_port_poll(const struct dpif *dpif_, char **devnamep)
1412
0
{
1413
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1414
1415
    /* Lazily create the Netlink socket to listen for notifications. */
1416
0
    if (!dpif->port_notifier) {
1417
0
        struct nl_sock *sock;
1418
0
        int error;
1419
1420
0
        error = nl_sock_create(NETLINK_GENERIC, &sock);
1421
0
        if (error) {
1422
0
            return error;
1423
0
        }
1424
1425
0
        error = nl_sock_join_mcgroup(sock, ovs_vport_mcgroup);
1426
0
        if (error) {
1427
0
            nl_sock_destroy(sock);
1428
0
            return error;
1429
0
        }
1430
0
        dpif->port_notifier = sock;
1431
1432
        /* We have no idea of the current state so report that everything
1433
         * changed. */
1434
0
        return ENOBUFS;
1435
0
    }
1436
1437
0
    for (;;) {
1438
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1439
0
        uint64_t buf_stub[4096 / 8];
1440
0
        struct ofpbuf buf;
1441
0
        int error;
1442
1443
0
        ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
1444
0
        error = nl_sock_recv(dpif->port_notifier, &buf, NULL, false);
1445
0
        if (!error) {
1446
0
            struct dpif_netlink_vport vport;
1447
1448
0
            error = dpif_netlink_vport_from_ofpbuf(&vport, &buf);
1449
0
            if (!error) {
1450
0
                if (vport.dp_ifindex == dpif->dp_ifindex
1451
0
                    && (vport.cmd == OVS_VPORT_CMD_NEW
1452
0
                        || vport.cmd == OVS_VPORT_CMD_DEL
1453
0
                        || vport.cmd == OVS_VPORT_CMD_SET)) {
1454
0
                    VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8,
1455
0
                             dpif->dpif.full_name, vport.name, vport.cmd);
1456
0
                    if (vport.cmd == OVS_VPORT_CMD_DEL && dpif->handlers) {
1457
0
                        dpif->refresh_channels = true;
1458
0
                    }
1459
0
                    *devnamep = xstrdup(vport.name);
1460
0
                    ofpbuf_uninit(&buf);
1461
0
                    return 0;
1462
0
                }
1463
0
            }
1464
0
        } else if (error != EAGAIN) {
1465
0
            VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)",
1466
0
                         ovs_strerror(error));
1467
0
            nl_sock_drain(dpif->port_notifier);
1468
0
            error = ENOBUFS;
1469
0
        }
1470
1471
0
        ofpbuf_uninit(&buf);
1472
0
        if (error) {
1473
0
            return error;
1474
0
        }
1475
0
    }
1476
0
}
1477
1478
static void
1479
dpif_netlink_port_poll_wait(const struct dpif *dpif_)
1480
0
{
1481
0
    const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1482
1483
0
    if (dpif->port_notifier) {
1484
0
        nl_sock_wait(dpif->port_notifier, POLLIN);
1485
0
    } else {
1486
0
        poll_immediate_wake();
1487
0
    }
1488
0
}
1489
1490
static void
1491
dpif_netlink_flow_init_ufid(struct dpif_netlink_flow *request,
1492
                            const ovs_u128 *ufid, bool terse)
1493
0
{
1494
0
    if (ufid) {
1495
0
        request->ufid = *ufid;
1496
0
        request->ufid_present = true;
1497
0
    } else {
1498
0
        request->ufid_present = false;
1499
0
    }
1500
0
    request->ufid_terse = terse;
1501
0
}
1502
1503
static void
1504
dpif_netlink_init_flow_get__(const struct dpif_netlink *dpif,
1505
                             const struct nlattr *key, size_t key_len,
1506
                             const ovs_u128 *ufid, bool terse,
1507
                             struct dpif_netlink_flow *request)
1508
0
{
1509
0
    dpif_netlink_flow_init(request);
1510
0
    request->cmd = OVS_FLOW_CMD_GET;
1511
0
    request->dp_ifindex = dpif->dp_ifindex;
1512
0
    request->key = key;
1513
0
    request->key_len = key_len;
1514
0
    dpif_netlink_flow_init_ufid(request, ufid, terse);
1515
0
}
1516
1517
static void
1518
dpif_netlink_init_flow_get(const struct dpif_netlink *dpif,
1519
                           const struct dpif_flow_get *get,
1520
                           struct dpif_netlink_flow *request)
1521
0
{
1522
0
    dpif_netlink_init_flow_get__(dpif, get->key, get->key_len, get->ufid,
1523
0
                                 false, request);
1524
0
}
1525
1526
static int
1527
dpif_netlink_flow_get__(const struct dpif_netlink *dpif,
1528
                        const struct nlattr *key, size_t key_len,
1529
                        const ovs_u128 *ufid, bool terse,
1530
                        struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1531
0
{
1532
0
    struct dpif_netlink_flow request;
1533
1534
0
    dpif_netlink_init_flow_get__(dpif, key, key_len, ufid, terse, &request);
1535
0
    return dpif_netlink_flow_transact(&request, reply, bufp);
1536
0
}
1537
1538
static int
1539
dpif_netlink_flow_get(const struct dpif_netlink *dpif,
1540
                      const struct dpif_netlink_flow *flow,
1541
                      struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1542
0
{
1543
0
    return dpif_netlink_flow_get__(dpif, flow->key, flow->key_len,
1544
0
                                   flow->ufid_present ? &flow->ufid : NULL,
1545
0
                                   false, reply, bufp);
1546
0
}
1547
1548
static void
1549
dpif_netlink_init_flow_put(struct dpif_netlink *dpif,
1550
                           const struct dpif_flow_put *put,
1551
                           struct dpif_netlink_flow *request)
1552
0
{
1553
0
    static const struct nlattr dummy_action;
1554
1555
0
    dpif_netlink_flow_init(request);
1556
0
    request->cmd = (put->flags & DPIF_FP_CREATE
1557
0
                    ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET);
1558
0
    request->dp_ifindex = dpif->dp_ifindex;
1559
0
    request->key = put->key;
1560
0
    request->key_len = put->key_len;
1561
0
    request->mask = put->mask;
1562
0
    request->mask_len = put->mask_len;
1563
0
    dpif_netlink_flow_init_ufid(request, put->ufid, false);
1564
1565
    /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
1566
0
    request->actions = (put->actions
1567
0
                        ? put->actions
1568
0
                        : CONST_CAST(struct nlattr *, &dummy_action));
1569
0
    request->actions_len = put->actions_len;
1570
0
    if (put->flags & DPIF_FP_ZERO_STATS) {
1571
0
        request->clear = true;
1572
0
    }
1573
0
    if (put->flags & DPIF_FP_PROBE) {
1574
0
        request->probe = true;
1575
0
    }
1576
0
    request->nlmsg_flags = put->flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE;
1577
0
}
1578
1579
static void
1580
dpif_netlink_init_flow_del__(struct dpif_netlink *dpif,
1581
                             const struct nlattr *key, size_t key_len,
1582
                             const ovs_u128 *ufid, bool terse,
1583
                             struct dpif_netlink_flow *request)
1584
0
{
1585
0
    dpif_netlink_flow_init(request);
1586
0
    request->cmd = OVS_FLOW_CMD_DEL;
1587
0
    request->dp_ifindex = dpif->dp_ifindex;
1588
0
    request->key = key;
1589
0
    request->key_len = key_len;
1590
0
    dpif_netlink_flow_init_ufid(request, ufid, terse);
1591
0
}
1592
1593
static void
1594
dpif_netlink_init_flow_del(struct dpif_netlink *dpif,
1595
                           const struct dpif_flow_del *del,
1596
                           struct dpif_netlink_flow *request)
1597
0
{
1598
0
    dpif_netlink_init_flow_del__(dpif, del->key, del->key_len,
1599
0
                                 del->ufid, del->terse, request);
1600
0
}
1601
1602
struct dpif_netlink_flow_dump {
1603
    struct dpif_flow_dump up;
1604
    struct nl_dump nl_dump;
1605
    atomic_int status;
1606
    struct netdev_flow_dump **netdev_dumps;
1607
    int netdev_dumps_num;                    /* Number of netdev_flow_dumps */
1608
    struct ovs_mutex netdev_lock;            /* Guards the following. */
1609
    int netdev_current_dump OVS_GUARDED;     /* Shared current dump */
1610
    struct dpif_flow_dump_types types;       /* Type of dump */
1611
};
1612
1613
static struct dpif_netlink_flow_dump *
1614
dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
1615
0
{
1616
0
    return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
1617
0
}
1618
1619
static void
1620
start_netdev_dump(const struct dpif *dpif_,
1621
                  struct dpif_netlink_flow_dump *dump)
1622
0
{
1623
0
    ovs_mutex_init(&dump->netdev_lock);
1624
1625
0
    if (!(dump->types.netdev_flows)) {
1626
0
        dump->netdev_dumps_num = 0;
1627
0
        dump->netdev_dumps = NULL;
1628
0
        return;
1629
0
    }
1630
1631
0
    ovs_mutex_lock(&dump->netdev_lock);
1632
0
    dump->netdev_current_dump = 0;
1633
0
    dump->netdev_dumps
1634
0
        = netdev_ports_flow_dump_create(dpif_normalize_type(dpif_type(dpif_)),
1635
0
                                        &dump->netdev_dumps_num,
1636
0
                                        dump->up.terse);
1637
0
    ovs_mutex_unlock(&dump->netdev_lock);
1638
0
}
1639
1640
static void
1641
dpif_netlink_populate_flow_dump_types(struct dpif_netlink_flow_dump *dump,
1642
                                      struct dpif_flow_dump_types *types)
1643
0
{
1644
0
    if (!types) {
1645
0
        dump->types.ovs_flows = true;
1646
0
        dump->types.netdev_flows = true;
1647
0
    } else {
1648
0
        memcpy(&dump->types, types, sizeof *types);
1649
0
    }
1650
0
}
1651
1652
static struct dpif_flow_dump *
1653
dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse,
1654
                              struct dpif_flow_dump_types *types)
1655
0
{
1656
0
    const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1657
0
    struct dpif_netlink_flow_dump *dump;
1658
0
    struct dpif_netlink_flow request;
1659
0
    struct ofpbuf *buf;
1660
1661
0
    dump = xmalloc(sizeof *dump);
1662
0
    dpif_flow_dump_init(&dump->up, dpif_);
1663
1664
0
    dpif_netlink_populate_flow_dump_types(dump, types);
1665
1666
0
    if (dump->types.ovs_flows) {
1667
0
        dpif_netlink_flow_init(&request);
1668
0
        request.cmd = OVS_FLOW_CMD_GET;
1669
0
        request.dp_ifindex = dpif->dp_ifindex;
1670
0
        request.ufid_present = false;
1671
0
        request.ufid_terse = terse;
1672
1673
0
        buf = ofpbuf_new(1024);
1674
0
        dpif_netlink_flow_to_ofpbuf(&request, buf);
1675
0
        nl_dump_start(&dump->nl_dump, NETLINK_GENERIC, buf);
1676
0
        ofpbuf_delete(buf);
1677
0
    }
1678
0
    atomic_init(&dump->status, 0);
1679
0
    dump->up.terse = terse;
1680
1681
0
    start_netdev_dump(dpif_, dump);
1682
1683
0
    return &dump->up;
1684
0
}
1685
1686
static int
1687
dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_)
1688
0
{
1689
0
    struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1690
0
    unsigned int nl_status = 0;
1691
0
    int dump_status;
1692
1693
0
    if (dump->types.ovs_flows) {
1694
0
        nl_status = nl_dump_done(&dump->nl_dump);
1695
0
    }
1696
1697
0
    for (int i = 0; i < dump->netdev_dumps_num; i++) {
1698
0
        int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]);
1699
1700
0
        if (err != 0 && err != EOPNOTSUPP) {
1701
0
            VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err));
1702
0
        }
1703
0
    }
1704
1705
0
    free(dump->netdev_dumps);
1706
0
    ovs_mutex_destroy(&dump->netdev_lock);
1707
1708
    /* No other thread has access to 'dump' at this point. */
1709
0
    atomic_read_relaxed(&dump->status, &dump_status);
1710
0
    free(dump);
1711
0
    return dump_status ? dump_status : nl_status;
1712
0
}
1713
1714
struct dpif_netlink_flow_dump_thread {
1715
    struct dpif_flow_dump_thread up;
1716
    struct dpif_netlink_flow_dump *dump;
1717
    struct dpif_netlink_flow flow;
1718
    struct dpif_flow_stats stats;
1719
    struct ofpbuf nl_flows;     /* Always used to store flows. */
1720
    struct ofpbuf *nl_actions;  /* Used if kernel does not supply actions. */
1721
    int netdev_dump_idx;        /* This thread current netdev dump index */
1722
    bool netdev_done;           /* If we are finished dumping netdevs */
1723
1724
    /* (Key/Mask/Actions) Buffers for netdev dumping */
1725
    struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
1726
    struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
1727
    struct odputil_keybuf actbuf[FLOW_DUMP_MAX_BATCH];
1728
};
1729
1730
static struct dpif_netlink_flow_dump_thread *
1731
dpif_netlink_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
1732
0
{
1733
0
    return CONTAINER_OF(thread, struct dpif_netlink_flow_dump_thread, up);
1734
0
}
1735
1736
static struct dpif_flow_dump_thread *
1737
dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump *dump_)
1738
0
{
1739
0
    struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1740
0
    struct dpif_netlink_flow_dump_thread *thread;
1741
1742
0
    thread = xmalloc(sizeof *thread);
1743
0
    dpif_flow_dump_thread_init(&thread->up, &dump->up);
1744
0
    thread->dump = dump;
1745
0
    ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE);
1746
0
    thread->nl_actions = NULL;
1747
0
    thread->netdev_dump_idx = 0;
1748
0
    thread->netdev_done = !(thread->netdev_dump_idx < dump->netdev_dumps_num);
1749
1750
0
    return &thread->up;
1751
0
}
1752
1753
static void
1754
dpif_netlink_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
1755
0
{
1756
0
    struct dpif_netlink_flow_dump_thread *thread
1757
0
        = dpif_netlink_flow_dump_thread_cast(thread_);
1758
1759
0
    ofpbuf_uninit(&thread->nl_flows);
1760
0
    ofpbuf_delete(thread->nl_actions);
1761
0
    free(thread);
1762
0
}
1763
1764
static void
1765
dpif_netlink_flow_to_dpif_flow(struct dpif_flow *dpif_flow,
1766
                               const struct dpif_netlink_flow *datapath_flow)
1767
0
{
1768
0
    dpif_flow->key = datapath_flow->key;
1769
0
    dpif_flow->key_len = datapath_flow->key_len;
1770
0
    dpif_flow->mask = datapath_flow->mask;
1771
0
    dpif_flow->mask_len = datapath_flow->mask_len;
1772
0
    dpif_flow->actions = datapath_flow->actions;
1773
0
    dpif_flow->actions_len = datapath_flow->actions_len;
1774
0
    dpif_flow->ufid_present = datapath_flow->ufid_present;
1775
0
    dpif_flow->pmd_id = PMD_ID_NULL;
1776
0
    if (datapath_flow->ufid_present) {
1777
0
        dpif_flow->ufid = datapath_flow->ufid;
1778
0
    } else {
1779
0
        ovs_assert(datapath_flow->key && datapath_flow->key_len);
1780
0
        odp_flow_key_hash(datapath_flow->key, datapath_flow->key_len,
1781
0
                          &dpif_flow->ufid);
1782
0
    }
1783
0
    dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats);
1784
0
    dpif_flow->attrs.offloaded = false;
1785
0
    dpif_flow->attrs.dp_layer = "ovs";
1786
0
    dpif_flow->attrs.dp_extra_info = NULL;
1787
0
}
1788
1789
/* The design is such that all threads are working together on the first dump
1790
 * to the last, in order (at first they all on dump 0).
1791
 * When the first thread finds that the given dump is finished,
1792
 * they all move to the next. If two or more threads find the same dump
1793
 * is finished at the same time, the first one will advance the shared
1794
 * netdev_current_dump and the others will catch up. */
1795
static void
1796
dpif_netlink_advance_netdev_dump(struct dpif_netlink_flow_dump_thread *thread)
1797
0
{
1798
0
    struct dpif_netlink_flow_dump *dump = thread->dump;
1799
1800
0
    ovs_mutex_lock(&dump->netdev_lock);
1801
    /* if we haven't finished (dumped everything) */
1802
0
    if (dump->netdev_current_dump < dump->netdev_dumps_num) {
1803
        /* if we are the first to find that current dump is finished
1804
         * advance it. */
1805
0
        if (thread->netdev_dump_idx == dump->netdev_current_dump) {
1806
0
            thread->netdev_dump_idx = ++dump->netdev_current_dump;
1807
            /* did we just finish the last dump? done. */
1808
0
            if (dump->netdev_current_dump == dump->netdev_dumps_num) {
1809
0
                thread->netdev_done = true;
1810
0
            }
1811
0
        } else {
1812
            /* otherwise, we are behind, catch up */
1813
0
            thread->netdev_dump_idx = dump->netdev_current_dump;
1814
0
        }
1815
0
    } else {
1816
        /* some other thread finished */
1817
0
        thread->netdev_done = true;
1818
0
    }
1819
0
    ovs_mutex_unlock(&dump->netdev_lock);
1820
0
}
1821
1822
static int
1823
dpif_netlink_netdev_match_to_dpif_flow(struct match *match,
1824
                                       struct ofpbuf *key_buf,
1825
                                       struct ofpbuf *mask_buf,
1826
                                       struct nlattr *actions,
1827
                                       struct dpif_flow_stats *stats,
1828
                                       struct dpif_flow_attrs *attrs,
1829
                                       ovs_u128 *ufid,
1830
                                       struct dpif_flow *flow,
1831
                                       bool terse)
1832
0
{
1833
0
    memset(flow, 0, sizeof *flow);
1834
1835
0
    if (!terse) {
1836
0
        struct odp_flow_key_parms odp_parms = {
1837
0
            .flow = &match->flow,
1838
0
            .mask = &match->wc.masks,
1839
0
            .support = {
1840
0
                .max_vlan_headers = 2,
1841
0
                .recirc = true,
1842
0
                .ct_state = true,
1843
0
                .ct_zone = true,
1844
0
                .ct_mark = true,
1845
0
                .ct_label = true,
1846
0
            },
1847
0
        };
1848
0
        size_t offset;
1849
1850
        /* Key */
1851
0
        offset = key_buf->size;
1852
0
        flow->key = ofpbuf_tail(key_buf);
1853
0
        odp_flow_key_from_flow(&odp_parms, key_buf);
1854
0
        flow->key_len = key_buf->size - offset;
1855
1856
        /* Mask */
1857
0
        offset = mask_buf->size;
1858
0
        flow->mask = ofpbuf_tail(mask_buf);
1859
0
        odp_parms.key_buf = key_buf;
1860
0
        odp_flow_key_from_mask(&odp_parms, mask_buf);
1861
0
        flow->mask_len = mask_buf->size - offset;
1862
1863
        /* Actions */
1864
0
        flow->actions = nl_attr_get(actions);
1865
0
        flow->actions_len = nl_attr_get_size(actions);
1866
0
    }
1867
1868
    /* Stats */
1869
0
    memcpy(&flow->stats, stats, sizeof *stats);
1870
1871
    /* UFID */
1872
0
    flow->ufid_present = true;
1873
0
    flow->ufid = *ufid;
1874
1875
0
    flow->pmd_id = PMD_ID_NULL;
1876
1877
0
    memcpy(&flow->attrs, attrs, sizeof *attrs);
1878
1879
0
    return 0;
1880
0
}
1881
1882
static int
1883
dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
1884
                            struct dpif_flow *flows, int max_flows)
1885
0
{
1886
0
    struct dpif_netlink_flow_dump_thread *thread
1887
0
        = dpif_netlink_flow_dump_thread_cast(thread_);
1888
0
    struct dpif_netlink_flow_dump *dump = thread->dump;
1889
0
    struct dpif_netlink *dpif = dpif_netlink_cast(thread->up.dpif);
1890
0
    int n_flows;
1891
1892
0
    ofpbuf_delete(thread->nl_actions);
1893
0
    thread->nl_actions = NULL;
1894
1895
0
    n_flows = 0;
1896
0
    max_flows = MIN(max_flows, FLOW_DUMP_MAX_BATCH);
1897
1898
0
    while (!thread->netdev_done && n_flows < max_flows) {
1899
0
        struct odputil_keybuf *maskbuf = &thread->maskbuf[n_flows];
1900
0
        struct odputil_keybuf *keybuf = &thread->keybuf[n_flows];
1901
0
        struct odputil_keybuf *actbuf = &thread->actbuf[n_flows];
1902
0
        struct ofpbuf key, mask, act;
1903
0
        struct dpif_flow *f = &flows[n_flows];
1904
0
        int cur = thread->netdev_dump_idx;
1905
0
        struct netdev_flow_dump *netdev_dump = dump->netdev_dumps[cur];
1906
0
        struct match match;
1907
0
        struct nlattr *actions;
1908
0
        struct dpif_flow_stats stats;
1909
0
        struct dpif_flow_attrs attrs;
1910
0
        ovs_u128 ufid;
1911
0
        bool has_next;
1912
1913
0
        ofpbuf_use_stack(&key, keybuf, sizeof *keybuf);
1914
0
        ofpbuf_use_stack(&act, actbuf, sizeof *actbuf);
1915
0
        ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf);
1916
0
        has_next = netdev_flow_dump_next(netdev_dump, &match,
1917
0
                                        &actions, &stats, &attrs,
1918
0
                                        &ufid,
1919
0
                                        &thread->nl_flows,
1920
0
                                        &act);
1921
0
        if (has_next) {
1922
0
            dpif_netlink_netdev_match_to_dpif_flow(&match,
1923
0
                                                   &key, &mask,
1924
0
                                                   actions,
1925
0
                                                   &stats,
1926
0
                                                   &attrs,
1927
0
                                                   &ufid,
1928
0
                                                   f,
1929
0
                                                   dump->up.terse);
1930
0
            n_flows++;
1931
0
        } else {
1932
0
            dpif_netlink_advance_netdev_dump(thread);
1933
0
        }
1934
0
    }
1935
1936
0
    if (!(dump->types.ovs_flows)) {
1937
0
        return n_flows;
1938
0
    }
1939
1940
0
    while (!n_flows
1941
0
           || (n_flows < max_flows && thread->nl_flows.size)) {
1942
0
        struct dpif_netlink_flow datapath_flow;
1943
0
        struct ofpbuf nl_flow;
1944
0
        int error;
1945
1946
        /* Try to grab another flow. */
1947
0
        if (!nl_dump_next(&dump->nl_dump, &nl_flow, &thread->nl_flows)) {
1948
0
            break;
1949
0
        }
1950
1951
        /* Convert the flow to our output format. */
1952
0
        error = dpif_netlink_flow_from_ofpbuf(&datapath_flow, &nl_flow);
1953
0
        if (error) {
1954
0
            atomic_store_relaxed(&dump->status, error);
1955
0
            break;
1956
0
        }
1957
1958
0
        if (dump->up.terse || datapath_flow.actions) {
1959
            /* Common case: we don't want actions, or the flow includes
1960
             * actions. */
1961
0
            dpif_netlink_flow_to_dpif_flow(&flows[n_flows++], &datapath_flow);
1962
0
        } else {
1963
            /* Rare case: the flow does not include actions.  Retrieve this
1964
             * individual flow again to get the actions. */
1965
0
            error = dpif_netlink_flow_get(dpif, &datapath_flow,
1966
0
                                          &datapath_flow, &thread->nl_actions);
1967
0
            if (error == ENOENT) {
1968
0
                VLOG_DBG("dumped flow disappeared on get");
1969
0
                continue;
1970
0
            } else if (error) {
1971
0
                VLOG_WARN("error fetching dumped flow: %s",
1972
0
                          ovs_strerror(error));
1973
0
                atomic_store_relaxed(&dump->status, error);
1974
0
                break;
1975
0
            }
1976
1977
            /* Save this flow.  Then exit, because we only have one buffer to
1978
             * handle this case. */
1979
0
            dpif_netlink_flow_to_dpif_flow(&flows[n_flows++], &datapath_flow);
1980
0
            break;
1981
0
        }
1982
0
    }
1983
0
    return n_flows;
1984
0
}
1985
1986
static void
1987
dpif_netlink_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec,
1988
                            struct ofpbuf *buf)
1989
0
{
1990
0
    struct ovs_header *k_exec;
1991
0
    size_t key_ofs;
1992
1993
0
    ofpbuf_prealloc_tailroom(buf, (64
1994
0
                                   + dp_packet_size(d_exec->packet)
1995
0
                                   + ODP_KEY_METADATA_SIZE
1996
0
                                   + d_exec->actions_len));
1997
1998
0
    nl_msg_put_genlmsghdr(buf, 0, ovs_packet_family, NLM_F_REQUEST,
1999
0
                          OVS_PACKET_CMD_EXECUTE, OVS_PACKET_VERSION);
2000
2001
0
    k_exec = ofpbuf_put_uninit(buf, sizeof *k_exec);
2002
0
    k_exec->dp_ifindex = dp_ifindex;
2003
2004
0
    nl_msg_put_unspec(buf, OVS_PACKET_ATTR_PACKET,
2005
0
                      dp_packet_data(d_exec->packet),
2006
0
                      dp_packet_size(d_exec->packet));
2007
2008
0
    key_ofs = nl_msg_start_nested(buf, OVS_PACKET_ATTR_KEY);
2009
0
    odp_key_from_dp_packet(buf, d_exec->packet);
2010
0
    nl_msg_end_nested(buf, key_ofs);
2011
2012
0
    nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS,
2013
0
                      d_exec->actions, d_exec->actions_len);
2014
0
    if (d_exec->probe) {
2015
0
        nl_msg_put_flag(buf, OVS_PACKET_ATTR_PROBE);
2016
0
    }
2017
0
    if (d_exec->mtu) {
2018
0
        nl_msg_put_u16(buf, OVS_PACKET_ATTR_MRU, d_exec->mtu);
2019
0
    }
2020
2021
0
    if (d_exec->hash) {
2022
0
        nl_msg_put_u64(buf, OVS_PACKET_ATTR_HASH, d_exec->hash);
2023
0
    }
2024
2025
0
    if (d_exec->upcall_pid) {
2026
0
        nl_msg_put_u32(buf, OVS_PACKET_ATTR_UPCALL_PID, d_exec->upcall_pid);
2027
0
    }
2028
0
}
2029
2030
/* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
2031
 * Returns the number actually executed (at least 1, if 'n_ops' is
2032
 * positive). */
2033
static size_t
2034
dpif_netlink_operate__(struct dpif_netlink *dpif,
2035
                       struct dpif_op **ops, size_t n_ops)
2036
0
{
2037
0
    struct op_auxdata {
2038
0
        struct nl_transaction txn;
2039
2040
0
        struct ofpbuf request;
2041
0
        uint64_t request_stub[1024 / 8];
2042
2043
0
        struct ofpbuf reply;
2044
0
        uint64_t reply_stub[1024 / 8];
2045
0
    } auxes[OPERATE_MAX_OPS];
2046
2047
0
    struct nl_transaction *txnsp[OPERATE_MAX_OPS];
2048
0
    size_t i;
2049
2050
0
    n_ops = MIN(n_ops, OPERATE_MAX_OPS);
2051
0
    for (i = 0; i < n_ops; i++) {
2052
0
        struct op_auxdata *aux = &auxes[i];
2053
0
        struct dpif_op *op = ops[i];
2054
0
        struct dpif_flow_put *put;
2055
0
        struct dpif_flow_del *del;
2056
0
        struct dpif_flow_get *get;
2057
0
        struct dpif_netlink_flow flow;
2058
2059
0
        ofpbuf_use_stub(&aux->request,
2060
0
                        aux->request_stub, sizeof aux->request_stub);
2061
0
        aux->txn.request = &aux->request;
2062
2063
0
        ofpbuf_use_stub(&aux->reply, aux->reply_stub, sizeof aux->reply_stub);
2064
0
        aux->txn.reply = NULL;
2065
2066
0
        switch (op->type) {
2067
0
        case DPIF_OP_FLOW_PUT:
2068
0
            put = &op->flow_put;
2069
0
            dpif_netlink_init_flow_put(dpif, put, &flow);
2070
0
            if (put->stats) {
2071
0
                flow.nlmsg_flags |= NLM_F_ECHO;
2072
0
                aux->txn.reply = &aux->reply;
2073
0
            }
2074
0
            dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
2075
2076
0
            OVS_USDT_PROBE(dpif_netlink_operate__, op_flow_put,
2077
0
                           dpif, put, &flow, &aux->request);
2078
0
            break;
2079
2080
0
        case DPIF_OP_FLOW_DEL:
2081
0
            del = &op->flow_del;
2082
0
            dpif_netlink_init_flow_del(dpif, del, &flow);
2083
0
            if (del->stats) {
2084
0
                flow.nlmsg_flags |= NLM_F_ECHO;
2085
0
                aux->txn.reply = &aux->reply;
2086
0
            }
2087
0
            dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
2088
2089
0
            OVS_USDT_PROBE(dpif_netlink_operate__, op_flow_del,
2090
0
                           dpif, del, &flow, &aux->request);
2091
0
            break;
2092
2093
0
        case DPIF_OP_EXECUTE:
2094
            /* Can't execute a packet that won't fit in a Netlink attribute. */
2095
0
            if (OVS_UNLIKELY(nl_attr_oversized(
2096
0
                                 dp_packet_size(op->execute.packet)))) {
2097
                /* Report an error immediately if this is the first operation.
2098
                 * Otherwise the easiest thing to do is to postpone to the next
2099
                 * call (when this will be the first operation). */
2100
0
                if (i == 0) {
2101
0
                    VLOG_ERR_RL(&error_rl,
2102
0
                                "dropping oversized %"PRIu32"-byte packet",
2103
0
                                dp_packet_size(op->execute.packet));
2104
0
                    op->error = ENOBUFS;
2105
0
                    return 1;
2106
0
                }
2107
0
                n_ops = i;
2108
0
            } else {
2109
0
                dpif_netlink_encode_execute(dpif->dp_ifindex, &op->execute,
2110
0
                                            &aux->request);
2111
2112
0
                OVS_USDT_PROBE(dpif_netlink_operate__, op_flow_execute,
2113
0
                               dpif, &op->execute,
2114
0
                               dp_packet_data(op->execute.packet),
2115
0
                               dp_packet_size(op->execute.packet),
2116
0
                               &aux->request);
2117
0
            }
2118
0
            break;
2119
2120
0
        case DPIF_OP_FLOW_GET:
2121
0
            get = &op->flow_get;
2122
0
            dpif_netlink_init_flow_get(dpif, get, &flow);
2123
0
            aux->txn.reply = get->buffer;
2124
0
            dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
2125
2126
0
            OVS_USDT_PROBE(dpif_netlink_operate__, op_flow_get,
2127
0
                           dpif, get, &flow, &aux->request);
2128
0
            break;
2129
2130
0
        default:
2131
0
            OVS_NOT_REACHED();
2132
0
        }
2133
0
    }
2134
2135
0
    for (i = 0; i < n_ops; i++) {
2136
0
        txnsp[i] = &auxes[i].txn;
2137
0
    }
2138
0
    nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops);
2139
2140
0
    for (i = 0; i < n_ops; i++) {
2141
0
        struct op_auxdata *aux = &auxes[i];
2142
0
        struct nl_transaction *txn = &auxes[i].txn;
2143
0
        struct dpif_op *op = ops[i];
2144
0
        struct dpif_flow_put *put;
2145
0
        struct dpif_flow_del *del;
2146
0
        struct dpif_flow_get *get;
2147
2148
0
        op->error = txn->error;
2149
2150
0
        switch (op->type) {
2151
0
        case DPIF_OP_FLOW_PUT:
2152
0
            put = &op->flow_put;
2153
0
            if (put->stats) {
2154
0
                if (!op->error) {
2155
0
                    struct dpif_netlink_flow reply;
2156
2157
0
                    op->error = dpif_netlink_flow_from_ofpbuf(&reply,
2158
0
                                                              txn->reply);
2159
0
                    if (!op->error) {
2160
0
                        dpif_netlink_flow_get_stats(&reply, put->stats);
2161
0
                    }
2162
0
                }
2163
0
            }
2164
0
            break;
2165
2166
0
        case DPIF_OP_FLOW_DEL:
2167
0
            del = &op->flow_del;
2168
0
            if (del->stats) {
2169
0
                if (!op->error) {
2170
0
                    struct dpif_netlink_flow reply;
2171
2172
0
                    op->error = dpif_netlink_flow_from_ofpbuf(&reply,
2173
0
                                                              txn->reply);
2174
0
                    if (!op->error) {
2175
0
                        dpif_netlink_flow_get_stats(&reply, del->stats);
2176
0
                    }
2177
0
                }
2178
0
            }
2179
0
            break;
2180
2181
0
        case DPIF_OP_EXECUTE:
2182
0
            break;
2183
2184
0
        case DPIF_OP_FLOW_GET:
2185
0
            get = &op->flow_get;
2186
0
            if (!op->error) {
2187
0
                struct dpif_netlink_flow reply;
2188
2189
0
                op->error = dpif_netlink_flow_from_ofpbuf(&reply, txn->reply);
2190
0
                if (!op->error) {
2191
0
                    dpif_netlink_flow_to_dpif_flow(get->flow, &reply);
2192
0
                }
2193
0
            }
2194
0
            break;
2195
2196
0
        default:
2197
0
            OVS_NOT_REACHED();
2198
0
        }
2199
2200
0
        ofpbuf_uninit(&aux->request);
2201
0
        ofpbuf_uninit(&aux->reply);
2202
0
    }
2203
2204
0
    return n_ops;
2205
0
}
2206
2207
static int
2208
parse_flow_get(struct dpif_netlink *dpif, struct dpif_flow_get *get)
2209
0
{
2210
0
    const char *dpif_type_str = dpif_normalize_type(dpif_type(&dpif->dpif));
2211
0
    struct dpif_flow *dpif_flow = get->flow;
2212
0
    struct match match;
2213
0
    struct nlattr *actions;
2214
0
    struct dpif_flow_stats stats;
2215
0
    struct dpif_flow_attrs attrs;
2216
0
    struct ofpbuf buf;
2217
0
    uint64_t act_buf[1024 / 8];
2218
0
    struct odputil_keybuf maskbuf;
2219
0
    struct odputil_keybuf keybuf;
2220
0
    struct odputil_keybuf actbuf;
2221
0
    struct ofpbuf key, mask, act;
2222
0
    int err;
2223
2224
0
    ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf);
2225
0
    err = netdev_ports_flow_get(dpif_type_str, &match, &actions, get->ufid,
2226
0
                                &stats, &attrs, &buf);
2227
0
    if (err) {
2228
0
        return err;
2229
0
    }
2230
2231
0
    VLOG_DBG("found flow from netdev, translating to dpif flow");
2232
2233
0
    ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
2234
0
    ofpbuf_use_stack(&act, &actbuf, sizeof actbuf);
2235
0
    ofpbuf_use_stack(&mask, &maskbuf, sizeof maskbuf);
2236
0
    dpif_netlink_netdev_match_to_dpif_flow(&match, &key, &mask, actions,
2237
0
                                           &stats, &attrs,
2238
0
                                           (ovs_u128 *) get->ufid,
2239
0
                                           dpif_flow,
2240
0
                                           false);
2241
0
    ofpbuf_put(get->buffer, nl_attr_get(actions), nl_attr_get_size(actions));
2242
0
    dpif_flow->actions = ofpbuf_at(get->buffer, 0, 0);
2243
0
    dpif_flow->actions_len = nl_attr_get_size(actions);
2244
2245
0
    return 0;
2246
0
}
2247
2248
static int
2249
parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
2250
0
{
2251
0
    const char *dpif_type_str = dpif_normalize_type(dpif_type(&dpif->dpif));
2252
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
2253
0
    struct match match;
2254
0
    odp_port_t in_port;
2255
0
    const struct nlattr *nla;
2256
0
    size_t left;
2257
0
    struct netdev *dev;
2258
0
    struct offload_info info;
2259
0
    int err;
2260
2261
0
    info.tc_modify_flow_deleted = false;
2262
0
    if (put->flags & DPIF_FP_PROBE) {
2263
0
        return EOPNOTSUPP;
2264
0
    }
2265
2266
0
    err = parse_key_and_mask_to_match(put->key, put->key_len, put->mask,
2267
0
                                      put->mask_len, &match);
2268
0
    if (err) {
2269
0
        return err;
2270
0
    }
2271
2272
0
    in_port = match.flow.in_port.odp_port;
2273
0
    dev = netdev_ports_get(in_port, dpif_type_str);
2274
0
    if (!dev) {
2275
0
        return EOPNOTSUPP;
2276
0
    }
2277
2278
    /* Check the output port for a tunnel. */
2279
0
    NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
2280
0
        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
2281
0
            struct netdev *outdev;
2282
0
            odp_port_t out_port;
2283
2284
0
            out_port = nl_attr_get_odp_port(nla);
2285
0
            outdev = netdev_ports_get(out_port, dpif_type_str);
2286
0
            if (!outdev) {
2287
0
                err = EOPNOTSUPP;
2288
0
                goto out;
2289
0
            }
2290
0
            netdev_close(outdev);
2291
0
        }
2292
0
    }
2293
2294
0
    info.recirc_id_shared_with_tc = (dpif->user_features
2295
0
                                     & OVS_DP_F_TC_RECIRC_SHARING);
2296
0
    err = netdev_flow_put(dev, &match,
2297
0
                          CONST_CAST(struct nlattr *, put->actions),
2298
0
                          put->actions_len,
2299
0
                          CONST_CAST(ovs_u128 *, put->ufid),
2300
0
                          &info, put->stats);
2301
2302
0
    if (!err) {
2303
0
        if (put->flags & DPIF_FP_MODIFY) {
2304
0
            struct dpif_op *opp;
2305
0
            struct dpif_op op;
2306
2307
0
            op.type = DPIF_OP_FLOW_DEL;
2308
0
            op.flow_del.key = put->key;
2309
0
            op.flow_del.key_len = put->key_len;
2310
0
            op.flow_del.ufid = put->ufid;
2311
0
            op.flow_del.pmd_id = put->pmd_id;
2312
0
            op.flow_del.stats = NULL;
2313
0
            op.flow_del.terse = false;
2314
2315
0
            opp = &op;
2316
0
            dpif_netlink_operate__(dpif, &opp, 1);
2317
0
        }
2318
2319
0
        VLOG_DBG("added flow");
2320
0
    } else if (err != EEXIST) {
2321
0
        struct netdev *oor_netdev = NULL;
2322
0
        enum vlog_level level;
2323
0
        if (err == ENOSPC && netdev_is_offload_rebalance_policy_enabled()) {
2324
            /*
2325
             * We need to set OOR on the input netdev (i.e, 'dev') for the
2326
             * flow. But if the flow has a tunnel attribute (i.e, decap action,
2327
             * with a virtual device like a VxLAN interface as its in-port),
2328
             * then lookup and set OOR on the underlying tunnel (real) netdev.
2329
             */
2330
0
            oor_netdev = flow_get_tunnel_netdev(&match.flow.tunnel);
2331
0
            if (!oor_netdev) {
2332
                /* Not a 'tunnel' flow */
2333
0
                oor_netdev = dev;
2334
0
            }
2335
0
            netdev_set_hw_info(oor_netdev, HW_INFO_TYPE_OOR, true);
2336
0
        }
2337
0
        level = (err == ENOSPC || err == EOPNOTSUPP) ? VLL_DBG : VLL_ERR;
2338
0
        VLOG_RL(&rl, level, "failed to offload flow: %s: %s",
2339
0
                ovs_strerror(err),
2340
0
                (oor_netdev ? oor_netdev->name : dev->name));
2341
0
    }
2342
2343
0
out:
2344
0
    if (err && err != EEXIST && (put->flags & DPIF_FP_MODIFY)) {
2345
        /* Modified rule can't be offloaded, try and delete from HW */
2346
0
        int del_err = 0;
2347
2348
0
        if (!info.tc_modify_flow_deleted) {
2349
0
            del_err = netdev_flow_del(dev, put->ufid, put->stats);
2350
0
        }
2351
2352
0
        if (!del_err) {
2353
            /* Delete from hw success, so old flow was offloaded.
2354
             * Change flags to create the flow in kernel */
2355
0
            put->flags &= ~DPIF_FP_MODIFY;
2356
0
            put->flags |= DPIF_FP_CREATE;
2357
0
        } else if (del_err != ENOENT) {
2358
0
            VLOG_ERR_RL(&rl, "failed to delete offloaded flow: %s",
2359
0
                        ovs_strerror(del_err));
2360
            /* stop proccesing the flow in kernel */
2361
0
            err = 0;
2362
0
        }
2363
0
    }
2364
2365
0
    netdev_close(dev);
2366
2367
0
    return err;
2368
0
}
2369
2370
static int
2371
try_send_to_netdev(struct dpif_netlink *dpif, struct dpif_op *op)
2372
0
{
2373
0
    int err = EOPNOTSUPP;
2374
2375
0
    switch (op->type) {
2376
0
    case DPIF_OP_FLOW_PUT: {
2377
0
        struct dpif_flow_put *put = &op->flow_put;
2378
2379
0
        if (!put->ufid) {
2380
0
            break;
2381
0
        }
2382
2383
0
        err = parse_flow_put(dpif, put);
2384
0
        log_flow_put_message(&dpif->dpif, &this_module, put, 0);
2385
0
        break;
2386
0
    }
2387
0
    case DPIF_OP_FLOW_DEL: {
2388
0
        struct dpif_flow_del *del = &op->flow_del;
2389
2390
0
        if (!del->ufid) {
2391
0
            break;
2392
0
        }
2393
2394
0
        err = netdev_ports_flow_del(
2395
0
                                dpif_normalize_type(dpif_type(&dpif->dpif)),
2396
0
                                del->ufid,
2397
0
                                del->stats);
2398
0
        log_flow_del_message(&dpif->dpif, &this_module, del, 0);
2399
0
        break;
2400
0
    }
2401
0
    case DPIF_OP_FLOW_GET: {
2402
0
        struct dpif_flow_get *get = &op->flow_get;
2403
2404
0
        if (!op->flow_get.ufid) {
2405
0
            break;
2406
0
        }
2407
2408
0
        err = parse_flow_get(dpif, get);
2409
0
        log_flow_get_message(&dpif->dpif, &this_module, get, 0);
2410
0
        break;
2411
0
    }
2412
0
    case DPIF_OP_EXECUTE:
2413
0
    default:
2414
0
        break;
2415
0
    }
2416
2417
0
    return err;
2418
0
}
2419
2420
static void
2421
dpif_netlink_operate_chunks(struct dpif_netlink *dpif, struct dpif_op **ops,
2422
                            size_t n_ops)
2423
0
{
2424
0
    while (n_ops > 0) {
2425
0
        size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops);
2426
2427
0
        ops += chunk;
2428
0
        n_ops -= chunk;
2429
0
    }
2430
0
}
2431
2432
static void
2433
dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops,
2434
                     enum dpif_offload_type offload_type)
2435
0
{
2436
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2437
0
    struct dpif_op *new_ops[OPERATE_MAX_OPS];
2438
0
    int count = 0;
2439
0
    int i = 0;
2440
0
    int err = 0;
2441
2442
0
    if (offload_type == DPIF_OFFLOAD_ALWAYS && !netdev_is_flow_api_enabled()) {
2443
0
        VLOG_DBG("Invalid offload_type: %d", offload_type);
2444
0
        return;
2445
0
    }
2446
2447
0
    if (offload_type != DPIF_OFFLOAD_NEVER && netdev_is_flow_api_enabled()) {
2448
0
        while (n_ops > 0) {
2449
0
            count = 0;
2450
2451
0
            while (n_ops > 0 && count < OPERATE_MAX_OPS) {
2452
0
                struct dpif_op *op = ops[i++];
2453
2454
0
                err = try_send_to_netdev(dpif, op);
2455
0
                if (err && err != EEXIST) {
2456
0
                    if (offload_type == DPIF_OFFLOAD_ALWAYS) {
2457
                        /* We got an error while offloading an op. Since
2458
                         * OFFLOAD_ALWAYS is specified, we stop further
2459
                         * processing and return to the caller without
2460
                         * invoking kernel datapath as fallback. But the
2461
                         * interface requires us to process all n_ops; so
2462
                         * return the same error in the remaining ops too.
2463
                         */
2464
0
                        op->error = err;
2465
0
                        n_ops--;
2466
0
                        while (n_ops > 0) {
2467
0
                            op = ops[i++];
2468
0
                            op->error = err;
2469
0
                            n_ops--;
2470
0
                        }
2471
0
                        return;
2472
0
                    }
2473
0
                    new_ops[count++] = op;
2474
0
                } else {
2475
0
                    op->error = err;
2476
0
                }
2477
2478
0
                n_ops--;
2479
0
            }
2480
2481
0
            dpif_netlink_operate_chunks(dpif, new_ops, count);
2482
0
        }
2483
0
    } else if (offload_type != DPIF_OFFLOAD_ALWAYS) {
2484
0
        dpif_netlink_operate_chunks(dpif, ops, n_ops);
2485
0
    }
2486
0
}
2487
2488
#if _WIN32
2489
static void
2490
dpif_netlink_handler_uninit(struct dpif_handler *handler)
2491
{
2492
    vport_delete_sock_pool(handler);
2493
}
2494
2495
static int
2496
dpif_netlink_handler_init(struct dpif_handler *handler)
2497
{
2498
    return vport_create_sock_pool(handler);
2499
}
2500
#else
2501
2502
static int
2503
dpif_netlink_handler_init(struct dpif_handler *handler)
2504
0
{
2505
0
    handler->epoll_fd = epoll_create(10);
2506
0
    return handler->epoll_fd < 0 ? errno : 0;
2507
0
}
2508
2509
static void
2510
dpif_netlink_handler_uninit(struct dpif_handler *handler)
2511
0
{
2512
0
    close(handler->epoll_fd);
2513
0
}
2514
#endif
2515
2516
/* Returns true if num is a prime number,
2517
 * otherwise, return false.
2518
 */
2519
static bool
2520
is_prime(uint32_t num)
2521
0
{
2522
0
    if (num == 2) {
2523
0
        return true;
2524
0
    }
2525
2526
0
    if (num < 2) {
2527
0
        return false;
2528
0
    }
2529
2530
0
    if (num % 2 == 0) {
2531
0
        return false;
2532
0
    }
2533
2534
0
    for (uint64_t i = 3; i * i <= num; i += 2) {
2535
0
        if (num % i == 0) {
2536
0
            return false;
2537
0
        }
2538
0
    }
2539
2540
0
    return true;
2541
0
}
2542
2543
/* Returns start if start is a prime number.  Otherwise returns the next
2544
 * prime greater than start.  Search is limited by UINT32_MAX.
2545
 *
2546
 * Returns 0 if no prime has been found between start and UINT32_MAX.
2547
 */
2548
static uint32_t
2549
next_prime(uint32_t start)
2550
0
{
2551
0
    if (start <= 2) {
2552
0
        return 2;
2553
0
    }
2554
2555
0
    for (uint32_t i = start; i < UINT32_MAX; i++) {
2556
0
        if (is_prime(i)) {
2557
0
            return i;
2558
0
        }
2559
0
    }
2560
2561
0
    return 0;
2562
0
}
2563
2564
/* Calculates and returns the number of handler threads needed based
2565
 * the following formula:
2566
 *
2567
 * handlers_n = min(next_prime(active_cores + 1), total_cores)
2568
 */
2569
static uint32_t
2570
dpif_netlink_calculate_n_handlers(void)
2571
0
{
2572
0
    uint32_t total_cores = count_total_cores();
2573
0
    uint32_t n_handlers = count_cpu_cores();
2574
0
    uint32_t next_prime_num;
2575
2576
    /* If not all cores are available to OVS, create additional handler
2577
     * threads to ensure more fair distribution of load between them.
2578
     */
2579
0
    if (n_handlers < total_cores && total_cores > 2) {
2580
0
        next_prime_num = next_prime(n_handlers + 1);
2581
0
        n_handlers = MIN(next_prime_num, total_cores);
2582
0
    }
2583
2584
0
    return MAX(n_handlers, 1);
2585
0
}
2586
2587
static int
2588
dpif_netlink_refresh_handlers_cpu_dispatch(struct dpif_netlink *dpif)
2589
    OVS_REQ_WRLOCK(dpif->upcall_lock)
2590
0
{
2591
0
    int handler_id;
2592
0
    int error = 0;
2593
0
    uint32_t n_handlers;
2594
0
    uint32_t *upcall_pids;
2595
2596
0
    n_handlers = dpif_netlink_calculate_n_handlers();
2597
0
    if (dpif->n_handlers != n_handlers) {
2598
0
        VLOG_DBG("Dispatch mode(per-cpu): initializing %d handlers",
2599
0
                   n_handlers);
2600
0
        destroy_all_handlers(dpif);
2601
0
        upcall_pids = xzalloc(n_handlers * sizeof *upcall_pids);
2602
0
        dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers);
2603
0
        for (handler_id = 0; handler_id < n_handlers; handler_id++) {
2604
0
            struct dpif_handler *handler = &dpif->handlers[handler_id];
2605
0
            error = create_nl_sock(dpif, &handler->sock);
2606
0
            if (error) {
2607
0
                VLOG_ERR("Dispatch mode(per-cpu): Cannot create socket for"
2608
0
                         "handler %d", handler_id);
2609
0
                continue;
2610
0
            }
2611
0
            upcall_pids[handler_id] = nl_sock_pid(handler->sock);
2612
0
            VLOG_DBG("Dispatch mode(per-cpu): "
2613
0
                      "handler %d has Netlink PID of %u",
2614
0
                      handler_id, upcall_pids[handler_id]);
2615
0
        }
2616
2617
0
        dpif->n_handlers = n_handlers;
2618
0
        error = dpif_netlink_set_handler_pids(&dpif->dpif, upcall_pids,
2619
0
                                              n_handlers);
2620
0
        free(upcall_pids);
2621
0
    }
2622
0
    return error;
2623
0
}
2624
2625
/* Synchronizes 'channels' in 'dpif->handlers'  with the set of vports
2626
 * currently in 'dpif' in the kernel, by adding a new set of channels for
2627
 * any kernel vport that lacks one and deleting any channels that have no
2628
 * backing kernel vports. */
2629
static int
2630
dpif_netlink_refresh_handlers_vport_dispatch(struct dpif_netlink *dpif,
2631
                                             uint32_t n_handlers)
2632
    OVS_REQ_WRLOCK(dpif->upcall_lock)
2633
0
{
2634
0
    unsigned long int *keep_channels;
2635
0
    struct dpif_netlink_vport vport;
2636
0
    size_t keep_channels_nbits;
2637
0
    struct nl_dump dump;
2638
0
    uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
2639
0
    struct ofpbuf buf;
2640
0
    int retval = 0;
2641
0
    size_t i;
2642
2643
0
    ovs_assert(!WINDOWS || n_handlers <= 1);
2644
0
    ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
2645
2646
0
    if (dpif->n_handlers != n_handlers) {
2647
0
        destroy_all_channels(dpif);
2648
0
        dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers);
2649
0
        for (i = 0; i < n_handlers; i++) {
2650
0
            int error;
2651
0
            struct dpif_handler *handler = &dpif->handlers[i];
2652
2653
0
            error = dpif_netlink_handler_init(handler);
2654
0
            if (error) {
2655
0
                size_t j;
2656
2657
0
                for (j = 0; j < i; j++) {
2658
0
                    struct dpif_handler *tmp = &dpif->handlers[j];
2659
0
                    dpif_netlink_handler_uninit(tmp);
2660
0
                }
2661
0
                free(dpif->handlers);
2662
0
                dpif->handlers = NULL;
2663
2664
0
                return error;
2665
0
            }
2666
0
        }
2667
0
        dpif->n_handlers = n_handlers;
2668
0
    }
2669
2670
0
    for (i = 0; i < n_handlers; i++) {
2671
0
        struct dpif_handler *handler = &dpif->handlers[i];
2672
2673
0
        handler->event_offset = handler->n_events = 0;
2674
0
    }
2675
2676
0
    keep_channels_nbits = dpif->uc_array_size;
2677
0
    keep_channels = bitmap_allocate(keep_channels_nbits);
2678
2679
0
    ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
2680
0
    dpif_netlink_port_dump_start__(dpif, &dump);
2681
0
    while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
2682
0
        uint32_t port_no = odp_to_u32(vport.port_no);
2683
0
        uint32_t upcall_pid;
2684
0
        int error;
2685
2686
0
        if (port_no >= dpif->uc_array_size
2687
0
            || !vport_get_pid(dpif, port_no, &upcall_pid)) {
2688
0
            struct nl_sock *sock;
2689
0
            error = create_nl_sock(dpif, &sock);
2690
2691
0
            if (error) {
2692
0
                goto error;
2693
0
            }
2694
2695
0
            error = vport_add_channel(dpif, vport.port_no, sock);
2696
0
            if (error) {
2697
0
                VLOG_INFO("%s: could not add channels for port %s",
2698
0
                          dpif_name(&dpif->dpif), vport.name);
2699
0
                nl_sock_destroy(sock);
2700
0
                retval = error;
2701
0
                goto error;
2702
0
            }
2703
0
            upcall_pid = nl_sock_pid(sock);
2704
0
        }
2705
2706
        /* Configure the vport to deliver misses to 'sock'. */
2707
0
        if (vport.upcall_pids[0] == 0
2708
0
            || vport.n_upcall_pids != 1
2709
0
            || upcall_pid != vport.upcall_pids[0]) {
2710
0
            struct dpif_netlink_vport vport_request;
2711
2712
0
            dpif_netlink_vport_init(&vport_request);
2713
0
            vport_request.cmd = OVS_VPORT_CMD_SET;
2714
0
            vport_request.dp_ifindex = dpif->dp_ifindex;
2715
0
            vport_request.port_no = vport.port_no;
2716
0
            vport_request.n_upcall_pids = 1;
2717
0
            vport_request.upcall_pids = &upcall_pid;
2718
0
            error = dpif_netlink_vport_transact(&vport_request, NULL, NULL);
2719
0
            if (error) {
2720
0
                VLOG_WARN_RL(&error_rl,
2721
0
                             "%s: failed to set upcall pid on port: %s",
2722
0
                             dpif_name(&dpif->dpif), ovs_strerror(error));
2723
2724
0
                if (error != ENODEV && error != ENOENT) {
2725
0
                    retval = error;
2726
0
                } else {
2727
                    /* The vport isn't really there, even though the dump says
2728
                     * it is.  Probably we just hit a race after a port
2729
                     * disappeared. */
2730
0
                }
2731
0
                goto error;
2732
0
            }
2733
0
        }
2734
2735
0
        if (port_no < keep_channels_nbits) {
2736
0
            bitmap_set1(keep_channels, port_no);
2737
0
        }
2738
0
        continue;
2739
2740
0
    error:
2741
0
        vport_del_channels(dpif, vport.port_no);
2742
0
    }
2743
0
    nl_dump_done(&dump);
2744
0
    ofpbuf_uninit(&buf);
2745
2746
    /* Discard any saved channels that we didn't reuse. */
2747
0
    for (i = 0; i < keep_channels_nbits; i++) {
2748
0
        if (!bitmap_is_set(keep_channels, i)) {
2749
0
            vport_del_channels(dpif, u32_to_odp(i));
2750
0
        }
2751
0
    }
2752
0
    free(keep_channels);
2753
2754
0
    return retval;
2755
0
}
2756
2757
static int
2758
dpif_netlink_recv_set_vport_dispatch(struct dpif_netlink *dpif, bool enable)
2759
    OVS_REQ_WRLOCK(dpif->upcall_lock)
2760
0
{
2761
0
    if ((dpif->handlers != NULL) == enable) {
2762
0
        return 0;
2763
0
    } else if (!enable) {
2764
0
        destroy_all_channels(dpif);
2765
0
        return 0;
2766
0
    } else {
2767
0
        return dpif_netlink_refresh_handlers_vport_dispatch(dpif, 1);
2768
0
    }
2769
0
}
2770
2771
static int
2772
dpif_netlink_recv_set_cpu_dispatch(struct dpif_netlink *dpif, bool enable)
2773
    OVS_REQ_WRLOCK(dpif->upcall_lock)
2774
0
{
2775
0
    if ((dpif->handlers != NULL) == enable) {
2776
0
        return 0;
2777
0
    } else if (!enable) {
2778
0
        destroy_all_handlers(dpif);
2779
0
        return 0;
2780
0
    } else {
2781
0
        return dpif_netlink_refresh_handlers_cpu_dispatch(dpif);
2782
0
    }
2783
0
}
2784
2785
static int
2786
dpif_netlink_recv_set(struct dpif *dpif_, bool enable)
2787
0
{
2788
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2789
0
    int error;
2790
2791
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
2792
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
2793
0
        error = dpif_netlink_recv_set_cpu_dispatch(dpif, enable);
2794
0
    } else {
2795
0
        error = dpif_netlink_recv_set_vport_dispatch(dpif, enable);
2796
0
    }
2797
0
    fat_rwlock_unlock(&dpif->upcall_lock);
2798
2799
0
    return error;
2800
0
}
2801
2802
static int
2803
dpif_netlink_handlers_set(struct dpif *dpif_, uint32_t n_handlers)
2804
0
{
2805
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2806
0
    int error = 0;
2807
2808
#ifdef _WIN32
2809
    /* Multiple upcall handlers will be supported once kernel datapath supports
2810
     * it. */
2811
    if (n_handlers > 1) {
2812
        return error;
2813
    }
2814
#endif
2815
2816
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
2817
0
    if (dpif->handlers) {
2818
0
        if (dpif_netlink_upcall_per_cpu(dpif)) {
2819
0
            error = dpif_netlink_refresh_handlers_cpu_dispatch(dpif);
2820
0
        } else {
2821
0
            error = dpif_netlink_refresh_handlers_vport_dispatch(dpif,
2822
0
                                                                 n_handlers);
2823
0
        }
2824
0
    }
2825
0
    fat_rwlock_unlock(&dpif->upcall_lock);
2826
2827
0
    return error;
2828
0
}
2829
2830
static bool
2831
dpif_netlink_number_handlers_required(struct dpif *dpif_, uint32_t *n_handlers)
2832
0
{
2833
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2834
2835
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
2836
0
        *n_handlers = dpif_netlink_calculate_n_handlers();
2837
0
        return true;
2838
0
    }
2839
2840
0
    return false;
2841
0
}
2842
2843
static int
2844
dpif_netlink_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
2845
                             uint32_t queue_id, uint32_t *priority)
2846
0
{
2847
0
    if (queue_id < 0xf000) {
2848
0
        *priority = TC_H_MAKE(1 << 16, queue_id + 1);
2849
0
        return 0;
2850
0
    } else {
2851
0
        return EINVAL;
2852
0
    }
2853
0
}
2854
2855
static int
2856
parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall,
2857
                 int *dp_ifindex)
2858
0
{
2859
0
    static const struct nl_policy ovs_packet_policy[] = {
2860
        /* Always present. */
2861
0
        [OVS_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
2862
0
                                     .min_len = ETH_HEADER_LEN },
2863
0
        [OVS_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },
2864
2865
        /* OVS_PACKET_CMD_ACTION only. */
2866
0
        [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true },
2867
0
        [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = { .type = NL_A_NESTED, .optional = true },
2868
0
        [OVS_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
2869
0
        [OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = true },
2870
0
        [OVS_PACKET_ATTR_HASH] = { .type = NL_A_U64, .optional = true }
2871
0
    };
2872
2873
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2874
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2875
0
    struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2876
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2877
2878
0
    struct nlattr *a[ARRAY_SIZE(ovs_packet_policy)];
2879
0
    if (!nlmsg || !genl || !ovs_header
2880
0
        || nlmsg->nlmsg_type != ovs_packet_family
2881
0
        || !nl_policy_parse(&b, 0, ovs_packet_policy, a,
2882
0
                            ARRAY_SIZE(ovs_packet_policy))) {
2883
0
        return EINVAL;
2884
0
    }
2885
2886
0
    int type = (genl->cmd == OVS_PACKET_CMD_MISS ? DPIF_UC_MISS
2887
0
                : genl->cmd == OVS_PACKET_CMD_ACTION ? DPIF_UC_ACTION
2888
0
                : -1);
2889
0
    if (type < 0) {
2890
0
        return EINVAL;
2891
0
    }
2892
2893
    /* (Re)set ALL fields of '*upcall' on successful return. */
2894
0
    upcall->type = type;
2895
0
    upcall->key = CONST_CAST(struct nlattr *,
2896
0
                             nl_attr_get(a[OVS_PACKET_ATTR_KEY]));
2897
0
    upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]);
2898
0
    odp_flow_key_hash(upcall->key, upcall->key_len, &upcall->ufid);
2899
0
    upcall->userdata = a[OVS_PACKET_ATTR_USERDATA];
2900
0
    upcall->out_tun_key = a[OVS_PACKET_ATTR_EGRESS_TUN_KEY];
2901
0
    upcall->actions = a[OVS_PACKET_ATTR_ACTIONS];
2902
0
    upcall->mru = a[OVS_PACKET_ATTR_MRU];
2903
0
    upcall->hash = a[OVS_PACKET_ATTR_HASH];
2904
2905
    /* Allow overwriting the netlink attribute header without reallocating. */
2906
0
    dp_packet_use_stub(&upcall->packet,
2907
0
                    CONST_CAST(struct nlattr *,
2908
0
                               nl_attr_get(a[OVS_PACKET_ATTR_PACKET])) - 1,
2909
0
                    nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]) +
2910
0
                    sizeof(struct nlattr));
2911
0
    dp_packet_set_data(&upcall->packet,
2912
0
                    (char *)dp_packet_data(&upcall->packet) + sizeof(struct nlattr));
2913
0
    dp_packet_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]));
2914
2915
0
    if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) {
2916
        /* Ethernet frame */
2917
0
        upcall->packet.packet_type = htonl(PT_ETH);
2918
0
    } else {
2919
        /* Non-Ethernet packet. Get the Ethertype from the NL attributes */
2920
0
        ovs_be16 ethertype = 0;
2921
0
        const struct nlattr *et_nla = nl_attr_find__(upcall->key,
2922
0
                                                     upcall->key_len,
2923
0
                                                     OVS_KEY_ATTR_ETHERTYPE);
2924
0
        if (et_nla) {
2925
0
            ethertype = nl_attr_get_be16(et_nla);
2926
0
        }
2927
0
        upcall->packet.packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE,
2928
0
                                                    ntohs(ethertype));
2929
0
        dp_packet_set_l3(&upcall->packet, dp_packet_data(&upcall->packet));
2930
0
    }
2931
2932
0
    *dp_ifindex = ovs_header->dp_ifindex;
2933
2934
0
    return 0;
2935
0
}
2936
2937
#ifdef _WIN32
2938
#define PACKET_RECV_BATCH_SIZE 50
2939
static int
2940
dpif_netlink_recv_windows(struct dpif_netlink *dpif, uint32_t handler_id,
2941
                          struct dpif_upcall *upcall, struct ofpbuf *buf)
2942
    OVS_REQ_RDLOCK(dpif->upcall_lock)
2943
{
2944
    struct dpif_handler *handler;
2945
    int read_tries = 0;
2946
    struct dpif_windows_vport_sock *sock_pool;
2947
    uint32_t i;
2948
2949
    if (!dpif->handlers) {
2950
        return EAGAIN;
2951
    }
2952
2953
    /* Only one handler is supported currently. */
2954
    if (handler_id >= 1) {
2955
        return EAGAIN;
2956
    }
2957
2958
    if (handler_id >= dpif->n_handlers) {
2959
        return EAGAIN;
2960
    }
2961
2962
    handler = &dpif->handlers[handler_id];
2963
    sock_pool = handler->vport_sock_pool;
2964
2965
    for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2966
        for (;;) {
2967
            int dp_ifindex;
2968
            int error;
2969
2970
            if (++read_tries > PACKET_RECV_BATCH_SIZE) {
2971
                return EAGAIN;
2972
            }
2973
2974
            error = nl_sock_recv(sock_pool[i].nl_sock, buf, NULL, false);
2975
            if (error == ENOBUFS) {
2976
                /* ENOBUFS typically means that we've received so many
2977
                 * packets that the buffer overflowed.  Try again
2978
                 * immediately because there's almost certainly a packet
2979
                 * waiting for us. */
2980
                /* XXX: report_loss(dpif, ch, idx, handler_id); */
2981
                continue;
2982
            }
2983
2984
            /* XXX: ch->last_poll = time_msec(); */
2985
            if (error) {
2986
                if (error == EAGAIN) {
2987
                    break;
2988
                }
2989
                return error;
2990
            }
2991
2992
            error = parse_odp_packet(buf, upcall, &dp_ifindex);
2993
            if (!error && dp_ifindex == dpif->dp_ifindex) {
2994
                upcall->pid = 0;
2995
                return 0;
2996
            } else if (error) {
2997
                return error;
2998
            }
2999
        }
3000
    }
3001
3002
    return EAGAIN;
3003
}
3004
#else
3005
static int
3006
dpif_netlink_recv_cpu_dispatch(struct dpif_netlink *dpif, uint32_t handler_id,
3007
                               struct dpif_upcall *upcall, struct ofpbuf *buf)
3008
    OVS_REQ_RDLOCK(dpif->upcall_lock)
3009
0
{
3010
0
    struct dpif_handler *handler;
3011
0
    int read_tries = 0;
3012
3013
0
    if (!dpif->handlers || handler_id >= dpif->n_handlers) {
3014
0
        return EAGAIN;
3015
0
    }
3016
3017
0
    handler = &dpif->handlers[handler_id];
3018
3019
0
    for (;;) {
3020
0
        int dp_ifindex;
3021
0
        int error;
3022
3023
0
        if (++read_tries > 50) {
3024
0
            return EAGAIN;
3025
0
        }
3026
0
        error = nl_sock_recv(handler->sock, buf, NULL, false);
3027
0
        if (error == ENOBUFS) {
3028
            /* ENOBUFS typically means that we've received so many
3029
             * packets that the buffer overflowed.  Try again
3030
             * immediately because there's almost certainly a packet
3031
             * waiting for us. */
3032
0
            report_loss(dpif, NULL, 0, handler_id);
3033
0
            continue;
3034
0
        }
3035
3036
0
        if (error) {
3037
0
            if (error == EAGAIN) {
3038
0
                break;
3039
0
            }
3040
0
            return error;
3041
0
        }
3042
3043
0
        error = parse_odp_packet(buf, upcall, &dp_ifindex);
3044
0
        if (!error && dp_ifindex == dpif->dp_ifindex) {
3045
0
            upcall->pid = nl_sock_pid(handler->sock);
3046
0
            return 0;
3047
0
        } else if (error) {
3048
0
            return error;
3049
0
        }
3050
0
    }
3051
3052
0
    return EAGAIN;
3053
0
}
3054
3055
static int
3056
dpif_netlink_recv_vport_dispatch(struct dpif_netlink *dpif,
3057
                                 uint32_t handler_id,
3058
                                 struct dpif_upcall *upcall,
3059
                                 struct ofpbuf *buf)
3060
    OVS_REQ_RDLOCK(dpif->upcall_lock)
3061
0
{
3062
0
    struct dpif_handler *handler;
3063
0
    int read_tries = 0;
3064
3065
0
    if (!dpif->handlers || handler_id >= dpif->n_handlers) {
3066
0
        return EAGAIN;
3067
0
    }
3068
3069
0
    handler = &dpif->handlers[handler_id];
3070
0
    if (handler->event_offset >= handler->n_events) {
3071
0
        int retval;
3072
3073
0
        handler->event_offset = handler->n_events = 0;
3074
3075
0
        do {
3076
0
            retval = epoll_wait(handler->epoll_fd, handler->epoll_events,
3077
0
                                dpif->uc_array_size, 0);
3078
0
        } while (retval < 0 && errno == EINTR);
3079
3080
0
        if (retval < 0) {
3081
0
            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
3082
0
            VLOG_WARN_RL(&rl, "epoll_wait failed (%s)", ovs_strerror(errno));
3083
0
        } else if (retval > 0) {
3084
0
            handler->n_events = retval;
3085
0
        }
3086
0
    }
3087
3088
0
    while (handler->event_offset < handler->n_events) {
3089
0
        int idx = handler->epoll_events[handler->event_offset].data.u32;
3090
0
        struct dpif_channel *ch = &dpif->channels[idx];
3091
3092
0
        handler->event_offset++;
3093
3094
0
        for (;;) {
3095
0
            int dp_ifindex;
3096
0
            int error;
3097
3098
0
            if (++read_tries > 50) {
3099
0
                return EAGAIN;
3100
0
            }
3101
3102
0
            error = nl_sock_recv(ch->sock, buf, NULL, false);
3103
0
            if (error == ENOBUFS) {
3104
                /* ENOBUFS typically means that we've received so many
3105
                 * packets that the buffer overflowed.  Try again
3106
                 * immediately because there's almost certainly a packet
3107
                 * waiting for us. */
3108
0
                report_loss(dpif, ch, idx, handler_id);
3109
0
                continue;
3110
0
            }
3111
3112
0
            ch->last_poll = time_msec();
3113
0
            if (error) {
3114
0
                if (error == EAGAIN) {
3115
0
                    break;
3116
0
                }
3117
0
                return error;
3118
0
            }
3119
3120
0
            error = parse_odp_packet(buf, upcall, &dp_ifindex);
3121
0
            if (!error && dp_ifindex == dpif->dp_ifindex) {
3122
0
                upcall->pid = nl_sock_pid(ch->sock);
3123
0
                return 0;
3124
0
            } else if (error) {
3125
0
                return error;
3126
0
            }
3127
0
        }
3128
0
    }
3129
3130
0
    return EAGAIN;
3131
0
}
3132
#endif
3133
3134
static int
3135
dpif_netlink_recv(struct dpif *dpif_, uint32_t handler_id,
3136
                  struct dpif_upcall *upcall, struct ofpbuf *buf)
3137
0
{
3138
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3139
0
    int error;
3140
3141
0
    fat_rwlock_rdlock(&dpif->upcall_lock);
3142
#ifdef _WIN32
3143
    error = dpif_netlink_recv_windows(dpif, handler_id, upcall, buf);
3144
#else
3145
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
3146
0
        error = dpif_netlink_recv_cpu_dispatch(dpif, handler_id, upcall, buf);
3147
0
    } else {
3148
0
        error = dpif_netlink_recv_vport_dispatch(dpif,
3149
0
                                                 handler_id, upcall, buf);
3150
0
    }
3151
0
#endif
3152
0
    fat_rwlock_unlock(&dpif->upcall_lock);
3153
3154
0
    return error;
3155
0
}
3156
3157
#ifdef _WIN32
3158
static void
3159
dpif_netlink_recv_wait_windows(struct dpif_netlink *dpif, uint32_t handler_id)
3160
    OVS_REQ_RDLOCK(dpif->upcall_lock)
3161
{
3162
    uint32_t i;
3163
    struct dpif_windows_vport_sock *sock_pool =
3164
        dpif->handlers[handler_id].vport_sock_pool;
3165
3166
    /* Only one handler is supported currently. */
3167
    if (handler_id >= 1) {
3168
        return;
3169
    }
3170
3171
    for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
3172
        nl_sock_wait(sock_pool[i].nl_sock, POLLIN);
3173
    }
3174
}
3175
#else
3176
3177
static void
3178
dpif_netlink_recv_wait_vport_dispatch(struct dpif_netlink *dpif,
3179
                                      uint32_t handler_id)
3180
    OVS_REQ_RDLOCK(dpif->upcall_lock)
3181
0
{
3182
0
    if (dpif->handlers && handler_id < dpif->n_handlers) {
3183
0
        struct dpif_handler *handler = &dpif->handlers[handler_id];
3184
3185
0
        poll_fd_wait(handler->epoll_fd, POLLIN);
3186
0
    }
3187
0
}
3188
3189
static void
3190
dpif_netlink_recv_wait_cpu_dispatch(struct dpif_netlink *dpif,
3191
                                    uint32_t handler_id)
3192
    OVS_REQ_RDLOCK(dpif->upcall_lock)
3193
0
{
3194
0
    if (dpif->handlers && handler_id < dpif->n_handlers) {
3195
0
        struct dpif_handler *handler = &dpif->handlers[handler_id];
3196
3197
0
        poll_fd_wait(nl_sock_fd(handler->sock), POLLIN);
3198
0
    }
3199
0
}
3200
#endif
3201
3202
static void
3203
dpif_netlink_recv_wait(struct dpif *dpif_, uint32_t handler_id)
3204
0
{
3205
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3206
3207
0
    fat_rwlock_rdlock(&dpif->upcall_lock);
3208
#ifdef _WIN32
3209
    dpif_netlink_recv_wait_windows(dpif, handler_id);
3210
#else
3211
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
3212
0
        dpif_netlink_recv_wait_cpu_dispatch(dpif, handler_id);
3213
0
    } else {
3214
0
        dpif_netlink_recv_wait_vport_dispatch(dpif, handler_id);
3215
0
    }
3216
0
#endif
3217
0
    fat_rwlock_unlock(&dpif->upcall_lock);
3218
0
}
3219
3220
static void
3221
dpif_netlink_recv_purge_vport_dispatch(struct dpif_netlink *dpif)
3222
    OVS_REQ_WRLOCK(dpif->upcall_lock)
3223
0
{
3224
0
    if (dpif->handlers) {
3225
0
        size_t i;
3226
3227
0
        if (!dpif->channels[0].sock) {
3228
0
            return;
3229
0
        }
3230
0
        for (i = 0; i < dpif->uc_array_size; i++ ) {
3231
3232
0
            nl_sock_drain(dpif->channels[i].sock);
3233
0
        }
3234
0
    }
3235
0
}
3236
3237
static void
3238
dpif_netlink_recv_purge_cpu_dispatch(struct dpif_netlink *dpif)
3239
    OVS_REQ_WRLOCK(dpif->upcall_lock)
3240
0
{
3241
0
    int handler_id;
3242
3243
0
    if (dpif->handlers) {
3244
0
        for (handler_id = 0; handler_id < dpif->n_handlers; handler_id++) {
3245
0
            struct dpif_handler *handler = &dpif->handlers[handler_id];
3246
0
            nl_sock_drain(handler->sock);
3247
0
        }
3248
0
    }
3249
0
}
3250
3251
static void
3252
dpif_netlink_recv_purge(struct dpif *dpif_)
3253
0
{
3254
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3255
3256
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
3257
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
3258
0
        dpif_netlink_recv_purge_cpu_dispatch(dpif);
3259
0
    } else {
3260
0
        dpif_netlink_recv_purge_vport_dispatch(dpif);
3261
0
    }
3262
0
    fat_rwlock_unlock(&dpif->upcall_lock);
3263
0
}
3264
3265
static char *
3266
dpif_netlink_get_datapath_version(void)
3267
0
{
3268
0
    char *version_str = NULL;
3269
3270
0
#ifdef __linux__
3271
3272
0
#define MAX_VERSION_STR_SIZE 80
3273
0
#define LINUX_DATAPATH_VERSION_FILE  "/sys/module/openvswitch/version"
3274
0
    FILE *f;
3275
3276
0
    f = fopen(LINUX_DATAPATH_VERSION_FILE, "r");
3277
0
    if (f) {
3278
0
        char *newline;
3279
0
        char version[MAX_VERSION_STR_SIZE];
3280
3281
0
        if (fgets(version, MAX_VERSION_STR_SIZE, f)) {
3282
0
            newline = strchr(version, '\n');
3283
0
            if (newline) {
3284
0
                *newline = '\0';
3285
0
            }
3286
0
            version_str = xstrdup(version);
3287
0
        }
3288
0
        fclose(f);
3289
0
    }
3290
0
#endif
3291
3292
0
    return version_str;
3293
0
}
3294
3295
struct dpif_netlink_ct_dump_state {
3296
    struct ct_dpif_dump_state up;
3297
    struct nl_ct_dump_state *nl_ct_dump;
3298
};
3299
3300
static int
3301
dpif_netlink_ct_dump_start(struct dpif *dpif OVS_UNUSED,
3302
                           struct ct_dpif_dump_state **dump_,
3303
                           const uint16_t *zone, int *ptot_bkts)
3304
0
{
3305
0
    struct dpif_netlink_ct_dump_state *dump;
3306
0
    int err;
3307
3308
0
    dump = xzalloc(sizeof *dump);
3309
0
    err = nl_ct_dump_start(&dump->nl_ct_dump, zone, ptot_bkts);
3310
0
    if (err) {
3311
0
        free(dump);
3312
0
        return err;
3313
0
    }
3314
3315
0
    *dump_ = &dump->up;
3316
3317
0
    return 0;
3318
0
}
3319
3320
static int
3321
dpif_netlink_ct_dump_next(struct dpif *dpif OVS_UNUSED,
3322
                          struct ct_dpif_dump_state *dump_,
3323
                          struct ct_dpif_entry *entry)
3324
0
{
3325
0
    struct dpif_netlink_ct_dump_state *dump;
3326
3327
0
    INIT_CONTAINER(dump, dump_, up);
3328
3329
0
    return nl_ct_dump_next(dump->nl_ct_dump, entry);
3330
0
}
3331
3332
static int
3333
dpif_netlink_ct_dump_done(struct dpif *dpif OVS_UNUSED,
3334
                          struct ct_dpif_dump_state *dump_)
3335
0
{
3336
0
    struct dpif_netlink_ct_dump_state *dump;
3337
3338
0
    INIT_CONTAINER(dump, dump_, up);
3339
3340
0
    int err = nl_ct_dump_done(dump->nl_ct_dump);
3341
0
    free(dump);
3342
0
    return err;
3343
0
}
3344
3345
static int
3346
dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone,
3347
                      const struct ct_dpif_tuple *tuple)
3348
0
{
3349
0
    if (tuple) {
3350
0
        return nl_ct_flush_tuple(tuple, zone ? *zone : 0);
3351
0
    } else if (zone) {
3352
0
        return nl_ct_flush_zone(*zone);
3353
0
    } else {
3354
0
        return nl_ct_flush();
3355
0
    }
3356
0
}
3357
3358
static int
3359
dpif_netlink_ct_set_limits(struct dpif *dpif OVS_UNUSED,
3360
                           const struct ovs_list *zone_limits)
3361
0
{
3362
0
    if (ovs_ct_limit_family < 0) {
3363
0
        return EOPNOTSUPP;
3364
0
    }
3365
3366
0
    struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
3367
0
    nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
3368
0
                          NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_SET,
3369
0
                          OVS_CT_LIMIT_VERSION);
3370
3371
0
    struct ovs_header *ovs_header;
3372
0
    ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
3373
0
    ovs_header->dp_ifindex = 0;
3374
3375
0
    size_t opt_offset;
3376
0
    opt_offset = nl_msg_start_nested(request, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
3377
3378
0
    if (!ovs_list_is_empty(zone_limits)) {
3379
0
        struct ct_dpif_zone_limit *zone_limit;
3380
3381
0
        LIST_FOR_EACH (zone_limit, node, zone_limits) {
3382
0
            struct ovs_zone_limit req_zone_limit = {
3383
0
                .zone_id = zone_limit->zone,
3384
0
                .limit   = zone_limit->limit,
3385
0
            };
3386
0
            nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
3387
0
        }
3388
0
    }
3389
0
    nl_msg_end_nested(request, opt_offset);
3390
3391
0
    int err = nl_transact(NETLINK_GENERIC, request, NULL);
3392
0
    ofpbuf_delete(request);
3393
0
    return err;
3394
0
}
3395
3396
static int
3397
dpif_netlink_zone_limits_from_ofpbuf(const struct ofpbuf *buf,
3398
                                     struct ovs_list *zone_limits)
3399
0
{
3400
0
    static const struct nl_policy ovs_ct_limit_policy[] = {
3401
0
        [OVS_CT_LIMIT_ATTR_ZONE_LIMIT] = { .type = NL_A_NESTED,
3402
0
                                           .optional = true },
3403
0
    };
3404
3405
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3406
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3407
0
    struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3408
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3409
3410
0
    struct nlattr *attr[ARRAY_SIZE(ovs_ct_limit_policy)];
3411
3412
0
    if (!nlmsg || !genl || !ovs_header
3413
0
        || nlmsg->nlmsg_type != ovs_ct_limit_family
3414
0
        || !nl_policy_parse(&b, 0, ovs_ct_limit_policy, attr,
3415
0
                            ARRAY_SIZE(ovs_ct_limit_policy))) {
3416
0
        return EINVAL;
3417
0
    }
3418
3419
3420
0
    if (!attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
3421
0
        return EINVAL;
3422
0
    }
3423
3424
0
    int rem = NLA_ALIGN(
3425
0
                nl_attr_get_size(attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]));
3426
0
    const struct ovs_zone_limit *zone_limit =
3427
0
                nl_attr_get(attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]);
3428
3429
0
    while (rem >= sizeof *zone_limit) {
3430
0
        if (zone_limit->zone_id >= OVS_ZONE_LIMIT_DEFAULT_ZONE &&
3431
0
            zone_limit->zone_id <= UINT16_MAX) {
3432
0
            ct_dpif_push_zone_limit(zone_limits, zone_limit->zone_id,
3433
0
                                    zone_limit->limit, zone_limit->count);
3434
0
        }
3435
0
        rem -= NLA_ALIGN(sizeof *zone_limit);
3436
0
        zone_limit = ALIGNED_CAST(struct ovs_zone_limit *,
3437
0
            (unsigned char *) zone_limit  + NLA_ALIGN(sizeof *zone_limit));
3438
0
    }
3439
0
    return 0;
3440
0
}
3441
3442
static int
3443
dpif_netlink_ct_get_limits(struct dpif *dpif OVS_UNUSED,
3444
                           const struct ovs_list *zone_limits_request,
3445
                           struct ovs_list *zone_limits_reply)
3446
0
{
3447
0
    if (ovs_ct_limit_family < 0) {
3448
0
        return EOPNOTSUPP;
3449
0
    }
3450
3451
0
    struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
3452
0
    nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
3453
0
            NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_GET,
3454
0
            OVS_CT_LIMIT_VERSION);
3455
3456
0
    struct ovs_header *ovs_header;
3457
0
    ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
3458
0
    ovs_header->dp_ifindex = 0;
3459
3460
0
    if (!ovs_list_is_empty(zone_limits_request)) {
3461
0
        size_t opt_offset = nl_msg_start_nested(request,
3462
0
                                                OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
3463
3464
0
        struct ct_dpif_zone_limit *zone_limit;
3465
0
        LIST_FOR_EACH (zone_limit, node, zone_limits_request) {
3466
0
            struct ovs_zone_limit req_zone_limit = {
3467
0
                .zone_id = zone_limit->zone,
3468
0
            };
3469
0
            nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
3470
0
        }
3471
3472
0
        nl_msg_end_nested(request, opt_offset);
3473
0
    }
3474
3475
0
    struct ofpbuf *reply;
3476
0
    int err = nl_transact(NETLINK_GENERIC, request, &reply);
3477
0
    if (err) {
3478
0
        goto out;
3479
0
    }
3480
3481
0
    err = dpif_netlink_zone_limits_from_ofpbuf(reply, zone_limits_reply);
3482
3483
0
out:
3484
0
    ofpbuf_delete(request);
3485
0
    ofpbuf_delete(reply);
3486
0
    return err;
3487
0
}
3488
3489
static int
3490
dpif_netlink_ct_del_limits(struct dpif *dpif OVS_UNUSED,
3491
                           const struct ovs_list *zone_limits)
3492
0
{
3493
0
    if (ovs_ct_limit_family < 0) {
3494
0
        return EOPNOTSUPP;
3495
0
    }
3496
3497
0
    struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
3498
0
    nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
3499
0
            NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_DEL,
3500
0
            OVS_CT_LIMIT_VERSION);
3501
3502
0
    struct ovs_header *ovs_header;
3503
0
    ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
3504
0
    ovs_header->dp_ifindex = 0;
3505
3506
0
    if (!ovs_list_is_empty(zone_limits)) {
3507
0
        size_t opt_offset =
3508
0
            nl_msg_start_nested(request, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
3509
3510
0
        struct ct_dpif_zone_limit *zone_limit;
3511
0
        LIST_FOR_EACH (zone_limit, node, zone_limits) {
3512
0
            struct ovs_zone_limit req_zone_limit = {
3513
0
                .zone_id = zone_limit->zone,
3514
0
            };
3515
0
            nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
3516
0
        }
3517
0
        nl_msg_end_nested(request, opt_offset);
3518
0
    }
3519
3520
0
    int err = nl_transact(NETLINK_GENERIC, request, NULL);
3521
3522
0
    ofpbuf_delete(request);
3523
0
    return err;
3524
0
}
3525
3526
0
#define NL_TP_NAME_PREFIX "ovs_tp_"
3527
3528
struct dpif_netlink_timeout_policy_protocol {
3529
    uint16_t    l3num;
3530
    uint8_t     l4num;
3531
};
3532
3533
enum OVS_PACKED_ENUM dpif_netlink_support_timeout_policy_protocol {
3534
    DPIF_NL_TP_AF_INET_TCP,
3535
    DPIF_NL_TP_AF_INET_UDP,
3536
    DPIF_NL_TP_AF_INET_ICMP,
3537
    DPIF_NL_TP_AF_INET6_TCP,
3538
    DPIF_NL_TP_AF_INET6_UDP,
3539
    DPIF_NL_TP_AF_INET6_ICMPV6,
3540
    DPIF_NL_TP_MAX
3541
};
3542
3543
0
#define DPIF_NL_ALL_TP ((1UL << DPIF_NL_TP_MAX) - 1)
3544
3545
3546
static struct dpif_netlink_timeout_policy_protocol tp_protos[] = {
3547
    [DPIF_NL_TP_AF_INET_TCP] = { .l3num = AF_INET, .l4num = IPPROTO_TCP },
3548
    [DPIF_NL_TP_AF_INET_UDP] = { .l3num = AF_INET, .l4num = IPPROTO_UDP },
3549
    [DPIF_NL_TP_AF_INET_ICMP] = { .l3num = AF_INET, .l4num = IPPROTO_ICMP },
3550
    [DPIF_NL_TP_AF_INET6_TCP] = { .l3num = AF_INET6, .l4num = IPPROTO_TCP },
3551
    [DPIF_NL_TP_AF_INET6_UDP] = { .l3num = AF_INET6, .l4num = IPPROTO_UDP },
3552
    [DPIF_NL_TP_AF_INET6_ICMPV6] = { .l3num = AF_INET6,
3553
                                     .l4num = IPPROTO_ICMPV6 },
3554
};
3555
3556
static void
3557
dpif_netlink_format_tp_name(uint32_t id, uint16_t l3num, uint8_t l4num,
3558
                            char **tp_name)
3559
0
{
3560
0
    struct ds ds = DS_EMPTY_INITIALIZER;
3561
0
    ds_put_format(&ds, "%s%"PRIu32"_", NL_TP_NAME_PREFIX, id);
3562
0
    ct_dpif_format_ipproto(&ds, l4num);
3563
3564
0
    if (l3num == AF_INET) {
3565
0
        ds_put_cstr(&ds, "4");
3566
0
    } else if (l3num == AF_INET6 && l4num != IPPROTO_ICMPV6) {
3567
0
        ds_put_cstr(&ds, "6");
3568
0
    }
3569
3570
0
    ovs_assert(ds.length < CTNL_TIMEOUT_NAME_MAX);
3571
3572
0
    *tp_name = ds_steal_cstr(&ds);
3573
0
}
3574
3575
static int
3576
dpif_netlink_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
3577
                                        uint32_t tp_id, uint16_t dl_type,
3578
                                        uint8_t nw_proto, char **tp_name,
3579
                                        bool *is_generic)
3580
0
{
3581
0
    dpif_netlink_format_tp_name(tp_id,
3582
0
                                dl_type == ETH_TYPE_IP ? AF_INET : AF_INET6,
3583
0
                                nw_proto, tp_name);
3584
0
    *is_generic = false;
3585
0
    return 0;
3586
0
}
3587
3588
static int
3589
dpif_netlink_ct_get_features(struct dpif *dpif OVS_UNUSED,
3590
                             enum ct_features *features)
3591
0
{
3592
0
    if (features != NULL) {
3593
0
#ifndef _WIN32
3594
0
        *features = CONNTRACK_F_ZERO_SNAT;
3595
#else
3596
        *features = 0;
3597
#endif
3598
0
    }
3599
0
    return 0;
3600
0
}
3601
3602
#define CT_DPIF_NL_TP_TCP_MAPPINGS                              \
3603
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT)         \
3604
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV)         \
3605
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, ESTABLISHED, ESTABLISHED)   \
3606
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, FIN_WAIT, FIN_WAIT)         \
3607
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, CLOSE_WAIT, CLOSE_WAIT)     \
3608
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, LAST_ACK, LAST_ACK)         \
3609
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, TIME_WAIT, TIME_WAIT)       \
3610
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, CLOSE, CLOSE)               \
3611
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT2, SYN_SENT2)       \
3612
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, RETRANSMIT, RETRANS)        \
3613
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, UNACK, UNACK)
3614
3615
#define CT_DPIF_NL_TP_UDP_MAPPINGS                              \
3616
0
    CT_DPIF_NL_TP_MAPPING(UDP, UDP, SINGLE, UNREPLIED)          \
3617
0
    CT_DPIF_NL_TP_MAPPING(UDP, UDP, MULTIPLE, REPLIED)
3618
3619
#define CT_DPIF_NL_TP_ICMP_MAPPINGS                             \
3620
0
    CT_DPIF_NL_TP_MAPPING(ICMP, ICMP, FIRST, TIMEOUT)
3621
3622
#define CT_DPIF_NL_TP_ICMPV6_MAPPINGS                           \
3623
0
    CT_DPIF_NL_TP_MAPPING(ICMP, ICMPV6, FIRST, TIMEOUT)
3624
3625
3626
0
#define CT_DPIF_NL_TP_MAPPING(PROTO1, PROTO2, ATTR1, ATTR2)     \
3627
0
if (tp->present & (1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1)) {  \
3628
0
    nl_tp->present |= 1 << CTA_TIMEOUT_##PROTO2##_##ATTR2;      \
3629
0
    nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2] =              \
3630
0
        tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1];          \
3631
0
}
3632
3633
static void
3634
dpif_netlink_get_nl_tp_tcp_attrs(const struct ct_dpif_timeout_policy *tp,
3635
                                 struct nl_ct_timeout_policy *nl_tp)
3636
0
{
3637
0
    CT_DPIF_NL_TP_TCP_MAPPINGS
3638
0
}
3639
3640
static void
3641
dpif_netlink_get_nl_tp_udp_attrs(const struct ct_dpif_timeout_policy *tp,
3642
                                 struct nl_ct_timeout_policy *nl_tp)
3643
0
{
3644
0
    CT_DPIF_NL_TP_UDP_MAPPINGS
3645
0
}
3646
3647
static void
3648
dpif_netlink_get_nl_tp_icmp_attrs(const struct ct_dpif_timeout_policy *tp,
3649
                                  struct nl_ct_timeout_policy *nl_tp)
3650
0
{
3651
0
    CT_DPIF_NL_TP_ICMP_MAPPINGS
3652
0
}
3653
3654
static void
3655
dpif_netlink_get_nl_tp_icmpv6_attrs(const struct ct_dpif_timeout_policy *tp,
3656
                                    struct nl_ct_timeout_policy *nl_tp)
3657
0
{
3658
0
    CT_DPIF_NL_TP_ICMPV6_MAPPINGS
3659
0
}
3660
3661
#undef CT_DPIF_NL_TP_MAPPING
3662
3663
static void
3664
dpif_netlink_get_nl_tp_attrs(const struct ct_dpif_timeout_policy *tp,
3665
                             uint8_t l4num, struct nl_ct_timeout_policy *nl_tp)
3666
0
{
3667
0
    nl_tp->present = 0;
3668
3669
0
    if (l4num == IPPROTO_TCP) {
3670
0
        dpif_netlink_get_nl_tp_tcp_attrs(tp, nl_tp);
3671
0
    } else if (l4num == IPPROTO_UDP) {
3672
0
        dpif_netlink_get_nl_tp_udp_attrs(tp, nl_tp);
3673
0
    } else if (l4num == IPPROTO_ICMP) {
3674
0
        dpif_netlink_get_nl_tp_icmp_attrs(tp, nl_tp);
3675
0
    } else if (l4num == IPPROTO_ICMPV6) {
3676
0
        dpif_netlink_get_nl_tp_icmpv6_attrs(tp, nl_tp);
3677
0
    }
3678
0
}
3679
3680
0
#define CT_DPIF_NL_TP_MAPPING(PROTO1, PROTO2, ATTR1, ATTR2)                 \
3681
0
if (nl_tp->present & (1 << CTA_TIMEOUT_##PROTO2##_##ATTR2)) {               \
3682
0
    if (tp->present & (1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1)) {          \
3683
0
        if (tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1] !=                \
3684
0
            nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2]) {                 \
3685
0
            VLOG_WARN_RL(&error_rl, "Inconsistent timeout policy %s "       \
3686
0
                         "attribute %s=%"PRIu32" while %s=%"PRIu32,         \
3687
0
                         nl_tp->name, "CTA_TIMEOUT_"#PROTO2"_"#ATTR2,       \
3688
0
                         nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2],      \
3689
0
                         "CT_DPIF_TP_ATTR_"#PROTO1"_"#ATTR1,                \
3690
0
                         tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1]);    \
3691
0
        }                                                                   \
3692
0
    } else {                                                                \
3693
0
        tp->present |= 1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1;             \
3694
0
        tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1] =                     \
3695
0
            nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2];                   \
3696
0
    }                                                                       \
3697
0
}
3698
3699
static void
3700
dpif_netlink_set_ct_dpif_tp_tcp_attrs(const struct nl_ct_timeout_policy *nl_tp,
3701
                                      struct ct_dpif_timeout_policy *tp)
3702
0
{
3703
0
    CT_DPIF_NL_TP_TCP_MAPPINGS
3704
0
}
3705
3706
static void
3707
dpif_netlink_set_ct_dpif_tp_udp_attrs(const struct nl_ct_timeout_policy *nl_tp,
3708
                                      struct ct_dpif_timeout_policy *tp)
3709
0
{
3710
0
    CT_DPIF_NL_TP_UDP_MAPPINGS
3711
0
}
3712
3713
static void
3714
dpif_netlink_set_ct_dpif_tp_icmp_attrs(
3715
    const struct nl_ct_timeout_policy *nl_tp,
3716
    struct ct_dpif_timeout_policy *tp)
3717
0
{
3718
0
    CT_DPIF_NL_TP_ICMP_MAPPINGS
3719
0
}
3720
3721
static void
3722
dpif_netlink_set_ct_dpif_tp_icmpv6_attrs(
3723
    const struct nl_ct_timeout_policy *nl_tp,
3724
    struct ct_dpif_timeout_policy *tp)
3725
0
{
3726
0
    CT_DPIF_NL_TP_ICMPV6_MAPPINGS
3727
0
}
3728
3729
#undef CT_DPIF_NL_TP_MAPPING
3730
3731
static void
3732
dpif_netlink_set_ct_dpif_tp_attrs(const struct nl_ct_timeout_policy *nl_tp,
3733
                                  struct ct_dpif_timeout_policy *tp)
3734
0
{
3735
0
    if (nl_tp->l4num == IPPROTO_TCP) {
3736
0
        dpif_netlink_set_ct_dpif_tp_tcp_attrs(nl_tp, tp);
3737
0
    } else if (nl_tp->l4num == IPPROTO_UDP) {
3738
0
        dpif_netlink_set_ct_dpif_tp_udp_attrs(nl_tp, tp);
3739
0
    } else if (nl_tp->l4num == IPPROTO_ICMP) {
3740
0
        dpif_netlink_set_ct_dpif_tp_icmp_attrs(nl_tp, tp);
3741
0
    } else if (nl_tp->l4num == IPPROTO_ICMPV6) {
3742
0
        dpif_netlink_set_ct_dpif_tp_icmpv6_attrs(nl_tp, tp);
3743
0
    }
3744
0
}
3745
3746
#ifdef _WIN32
3747
static int
3748
dpif_netlink_ct_set_timeout_policy(struct dpif *dpif OVS_UNUSED,
3749
                                   const struct ct_dpif_timeout_policy *tp)
3750
{
3751
    return EOPNOTSUPP;
3752
}
3753
3754
static int
3755
dpif_netlink_ct_get_timeout_policy(struct dpif *dpif OVS_UNUSED,
3756
                                   uint32_t tp_id,
3757
                                   struct ct_dpif_timeout_policy *tp)
3758
{
3759
    return EOPNOTSUPP;
3760
}
3761
3762
static int
3763
dpif_netlink_ct_del_timeout_policy(struct dpif *dpif OVS_UNUSED,
3764
                                   uint32_t tp_id)
3765
{
3766
    return EOPNOTSUPP;
3767
}
3768
3769
static int
3770
dpif_netlink_ct_timeout_policy_dump_start(struct dpif *dpif OVS_UNUSED,
3771
                                          void **statep)
3772
{
3773
    return EOPNOTSUPP;
3774
}
3775
3776
static int
3777
dpif_netlink_ct_timeout_policy_dump_next(struct dpif *dpif OVS_UNUSED,
3778
                                         void *state,
3779
                                         struct ct_dpif_timeout_policy **tp)
3780
{
3781
    return EOPNOTSUPP;
3782
}
3783
3784
static int
3785
dpif_netlink_ct_timeout_policy_dump_done(struct dpif *dpif OVS_UNUSED,
3786
                                         void *state)
3787
{
3788
    return EOPNOTSUPP;
3789
}
3790
#else
3791
static int
3792
dpif_netlink_ct_set_timeout_policy(struct dpif *dpif OVS_UNUSED,
3793
                                   const struct ct_dpif_timeout_policy *tp)
3794
0
{
3795
0
    int err = 0;
3796
3797
0
    for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
3798
0
        struct nl_ct_timeout_policy nl_tp;
3799
0
        char *nl_tp_name;
3800
3801
0
        dpif_netlink_format_tp_name(tp->id, tp_protos[i].l3num,
3802
0
                                    tp_protos[i].l4num, &nl_tp_name);
3803
0
        ovs_strlcpy(nl_tp.name, nl_tp_name, sizeof nl_tp.name);
3804
0
        free(nl_tp_name);
3805
3806
0
        nl_tp.l3num = tp_protos[i].l3num;
3807
0
        nl_tp.l4num = tp_protos[i].l4num;
3808
0
        dpif_netlink_get_nl_tp_attrs(tp, tp_protos[i].l4num, &nl_tp);
3809
0
        err = nl_ct_set_timeout_policy(&nl_tp);
3810
0
        if (err) {
3811
0
            VLOG_WARN_RL(&error_rl, "failed to add timeout policy %s (%s)",
3812
0
                         nl_tp.name, ovs_strerror(err));
3813
0
            goto out;
3814
0
        }
3815
0
    }
3816
3817
0
out:
3818
0
    return err;
3819
0
}
3820
3821
static int
3822
dpif_netlink_ct_get_timeout_policy(struct dpif *dpif OVS_UNUSED,
3823
                                   uint32_t tp_id,
3824
                                   struct ct_dpif_timeout_policy *tp)
3825
0
{
3826
0
    int err = 0;
3827
3828
0
    tp->id = tp_id;
3829
0
    tp->present = 0;
3830
0
    for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
3831
0
        struct nl_ct_timeout_policy nl_tp;
3832
0
        char *nl_tp_name;
3833
3834
0
        dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num,
3835
0
                                    tp_protos[i].l4num, &nl_tp_name);
3836
0
        err = nl_ct_get_timeout_policy(nl_tp_name, &nl_tp);
3837
3838
0
        if (err) {
3839
0
            VLOG_WARN_RL(&error_rl, "failed to get timeout policy %s (%s)",
3840
0
                         nl_tp_name, ovs_strerror(err));
3841
0
            free(nl_tp_name);
3842
0
            goto out;
3843
0
        }
3844
0
        free(nl_tp_name);
3845
0
        dpif_netlink_set_ct_dpif_tp_attrs(&nl_tp, tp);
3846
0
    }
3847
3848
0
out:
3849
0
    return err;
3850
0
}
3851
3852
/* Returns 0 if all the sub timeout policies are deleted or not exist in the
3853
 * kernel.  Returns 1 if any sub timeout policy deletion failed. */
3854
static int
3855
dpif_netlink_ct_del_timeout_policy(struct dpif *dpif OVS_UNUSED,
3856
                                   uint32_t tp_id)
3857
0
{
3858
0
    int ret = 0;
3859
3860
0
    for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
3861
0
        char *nl_tp_name;
3862
0
        dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num,
3863
0
                                    tp_protos[i].l4num, &nl_tp_name);
3864
0
        int err = nl_ct_del_timeout_policy(nl_tp_name);
3865
0
        if (err == ENOENT) {
3866
0
            err = 0;
3867
0
        }
3868
0
        if (err) {
3869
0
            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(6, 6);
3870
0
            VLOG_INFO_RL(&rl, "failed to delete timeout policy %s (%s)",
3871
0
                         nl_tp_name, ovs_strerror(err));
3872
0
            ret = 1;
3873
0
        }
3874
0
        free(nl_tp_name);
3875
0
    }
3876
3877
0
    return ret;
3878
0
}
3879
3880
struct dpif_netlink_ct_timeout_policy_dump_state {
3881
    struct nl_ct_timeout_policy_dump_state *nl_dump_state;
3882
    struct hmap tp_dump_map;
3883
};
3884
3885
struct dpif_netlink_tp_dump_node {
3886
    struct      hmap_node hmap_node;      /* node in tp_dump_map. */
3887
    struct      ct_dpif_timeout_policy *tp;
3888
    uint32_t    l3_l4_present;
3889
};
3890
3891
static struct dpif_netlink_tp_dump_node *
3892
get_dpif_netlink_tp_dump_node_by_tp_id(uint32_t tp_id,
3893
                                       struct hmap *tp_dump_map)
3894
0
{
3895
0
    struct dpif_netlink_tp_dump_node *tp_dump_node;
3896
3897
0
    HMAP_FOR_EACH_WITH_HASH (tp_dump_node, hmap_node, hash_int(tp_id, 0),
3898
0
                             tp_dump_map) {
3899
0
        if (tp_dump_node->tp->id == tp_id) {
3900
0
            return tp_dump_node;
3901
0
        }
3902
0
    }
3903
0
    return NULL;
3904
0
}
3905
3906
static void
3907
update_dpif_netlink_tp_dump_node(
3908
    const struct nl_ct_timeout_policy *nl_tp,
3909
    struct dpif_netlink_tp_dump_node *tp_dump_node)
3910
0
{
3911
0
    dpif_netlink_set_ct_dpif_tp_attrs(nl_tp, tp_dump_node->tp);
3912
0
    for (int i = 0; i < DPIF_NL_TP_MAX; ++i) {
3913
0
        if (nl_tp->l3num == tp_protos[i].l3num &&
3914
0
            nl_tp->l4num == tp_protos[i].l4num) {
3915
0
            tp_dump_node->l3_l4_present |= 1 << i;
3916
0
            break;
3917
0
        }
3918
0
    }
3919
0
}
3920
3921
static int
3922
dpif_netlink_ct_timeout_policy_dump_start(struct dpif *dpif OVS_UNUSED,
3923
                                          void **statep)
3924
0
{
3925
0
    struct dpif_netlink_ct_timeout_policy_dump_state *dump_state;
3926
3927
0
    *statep = dump_state = xzalloc(sizeof *dump_state);
3928
0
    int err = nl_ct_timeout_policy_dump_start(&dump_state->nl_dump_state);
3929
0
    if (err) {
3930
0
        free(dump_state);
3931
0
        return err;
3932
0
    }
3933
0
    hmap_init(&dump_state->tp_dump_map);
3934
0
    return 0;
3935
0
}
3936
3937
static void
3938
get_and_cleanup_tp_dump_node(struct hmap *hmap,
3939
                             struct dpif_netlink_tp_dump_node *tp_dump_node,
3940
                             struct ct_dpif_timeout_policy *tp)
3941
0
{
3942
0
    hmap_remove(hmap, &tp_dump_node->hmap_node);
3943
0
    *tp = *tp_dump_node->tp;
3944
0
    free(tp_dump_node->tp);
3945
0
    free(tp_dump_node);
3946
0
}
3947
3948
static int
3949
dpif_netlink_ct_timeout_policy_dump_next(struct dpif *dpif OVS_UNUSED,
3950
                                         void *state,
3951
                                         struct ct_dpif_timeout_policy *tp)
3952
0
{
3953
0
    struct dpif_netlink_ct_timeout_policy_dump_state *dump_state = state;
3954
0
    struct dpif_netlink_tp_dump_node *tp_dump_node;
3955
0
    int err;
3956
3957
    /* Dumps all the timeout policies in the kernel. */
3958
0
    do {
3959
0
        struct nl_ct_timeout_policy nl_tp;
3960
0
        uint32_t tp_id;
3961
3962
0
        err =  nl_ct_timeout_policy_dump_next(dump_state->nl_dump_state,
3963
0
                                              &nl_tp);
3964
0
        if (err) {
3965
0
            break;
3966
0
        }
3967
3968
        /* We only interest in OVS installed timeout policies. */
3969
0
        if (!ovs_scan(nl_tp.name, NL_TP_NAME_PREFIX"%"PRIu32, &tp_id)) {
3970
0
            continue;
3971
0
        }
3972
3973
0
        tp_dump_node = get_dpif_netlink_tp_dump_node_by_tp_id(
3974
0
                            tp_id, &dump_state->tp_dump_map);
3975
0
        if (!tp_dump_node) {
3976
0
            tp_dump_node = xzalloc(sizeof *tp_dump_node);
3977
0
            tp_dump_node->tp = xzalloc(sizeof *tp_dump_node->tp);
3978
0
            tp_dump_node->tp->id = tp_id;
3979
0
            hmap_insert(&dump_state->tp_dump_map, &tp_dump_node->hmap_node,
3980
0
                        hash_int(tp_id, 0));
3981
0
        }
3982
3983
0
        update_dpif_netlink_tp_dump_node(&nl_tp, tp_dump_node);
3984
3985
        /* Returns one ct_dpif_timeout_policy if we gather all the L3/L4
3986
         * sub-pieces. */
3987
0
        if (tp_dump_node->l3_l4_present == DPIF_NL_ALL_TP) {
3988
0
            get_and_cleanup_tp_dump_node(&dump_state->tp_dump_map,
3989
0
                                         tp_dump_node, tp);
3990
0
            break;
3991
0
        }
3992
0
    } while (true);
3993
3994
    /* Dump the incomplete timeout policies. */
3995
0
    if (err == EOF) {
3996
0
        if (!hmap_is_empty(&dump_state->tp_dump_map)) {
3997
0
            struct hmap_node *hmap_node = hmap_first(&dump_state->tp_dump_map);
3998
0
            tp_dump_node = CONTAINER_OF(hmap_node,
3999
0
                                        struct dpif_netlink_tp_dump_node,
4000
0
                                        hmap_node);
4001
0
            get_and_cleanup_tp_dump_node(&dump_state->tp_dump_map,
4002
0
                                         tp_dump_node, tp);
4003
0
            return 0;
4004
0
        }
4005
0
    }
4006
4007
0
    return err;
4008
0
}
4009
4010
static int
4011
dpif_netlink_ct_timeout_policy_dump_done(struct dpif *dpif OVS_UNUSED,
4012
                                         void *state)
4013
0
{
4014
0
    struct dpif_netlink_ct_timeout_policy_dump_state *dump_state = state;
4015
0
    struct dpif_netlink_tp_dump_node *tp_dump_node;
4016
4017
0
    int err = nl_ct_timeout_policy_dump_done(dump_state->nl_dump_state);
4018
0
    HMAP_FOR_EACH_POP (tp_dump_node, hmap_node, &dump_state->tp_dump_map) {
4019
0
        free(tp_dump_node->tp);
4020
0
        free(tp_dump_node);
4021
0
    }
4022
0
    hmap_destroy(&dump_state->tp_dump_map);
4023
0
    free(dump_state);
4024
0
    return err;
4025
0
}
4026
#endif
4027
4028

4029
/* Meters */
4030
4031
/* Set of supported meter flags */
4032
#define DP_SUPPORTED_METER_FLAGS_MASK \
4033
0
    (OFPMF13_STATS | OFPMF13_PKTPS | OFPMF13_KBPS | OFPMF13_BURST)
4034
4035
/* Meter support was introduced in Linux 4.15.  In some versions of
4036
 * Linux 4.15, 4.16, and 4.17, there was a bug that never set the id
4037
 * when the meter was created, so all meters essentially had an id of
4038
 * zero.  Check for that condition and disable meters on those kernels. */
4039
static bool probe_broken_meters(struct dpif *);
4040
4041
static void
4042
dpif_netlink_meter_init(struct dpif_netlink *dpif, struct ofpbuf *buf,
4043
                        void *stub, size_t size, uint32_t command)
4044
0
{
4045
0
    ofpbuf_use_stub(buf, stub, size);
4046
4047
0
    nl_msg_put_genlmsghdr(buf, 0, ovs_meter_family, NLM_F_REQUEST | NLM_F_ECHO,
4048
0
                          command, OVS_METER_VERSION);
4049
4050
0
    struct ovs_header *ovs_header;
4051
0
    ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
4052
0
    ovs_header->dp_ifindex = dpif->dp_ifindex;
4053
0
}
4054
4055
/* Execute meter 'request' in the kernel datapath.  If the command
4056
 * fails, returns a positive errno value.  Otherwise, stores the reply
4057
 * in '*replyp', parses the policy according to 'reply_policy' into the
4058
 * array of Netlink attribute in 'a', and returns 0.  On success, the
4059
 * caller is responsible for calling ofpbuf_delete() on '*replyp'
4060
 * ('replyp' will contain pointers into 'a'). */
4061
static int
4062
dpif_netlink_meter_transact(struct ofpbuf *request, struct ofpbuf **replyp,
4063
                            const struct nl_policy *reply_policy,
4064
                            struct nlattr **a, size_t size_a)
4065
0
{
4066
0
    int error = nl_transact(NETLINK_GENERIC, request, replyp);
4067
0
    ofpbuf_uninit(request);
4068
4069
0
    if (error) {
4070
0
        return error;
4071
0
    }
4072
4073
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(*replyp, sizeof *nlmsg);
4074
0
    struct genlmsghdr *genl = ofpbuf_try_pull(*replyp, sizeof *genl);
4075
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(*replyp,
4076
0
                                                    sizeof *ovs_header);
4077
0
    if (!nlmsg || !genl || !ovs_header
4078
0
        || nlmsg->nlmsg_type != ovs_meter_family
4079
0
        || !nl_policy_parse(*replyp, 0, reply_policy, a, size_a)) {
4080
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
4081
0
        VLOG_DBG_RL(&rl,
4082
0
                    "Kernel module response to meter tranaction is invalid");
4083
0
        return EINVAL;
4084
0
    }
4085
0
    return 0;
4086
0
}
4087
4088
static void
4089
dpif_netlink_meter_get_features(const struct dpif *dpif_,
4090
                                struct ofputil_meter_features *features)
4091
0
{
4092
0
    if (probe_broken_meters(CONST_CAST(struct dpif *, dpif_))) {
4093
0
        return;
4094
0
    }
4095
4096
0
    struct ofpbuf buf, *msg;
4097
0
    uint64_t stub[1024 / 8];
4098
4099
0
    static const struct nl_policy ovs_meter_features_policy[] = {
4100
0
        [OVS_METER_ATTR_MAX_METERS] = { .type = NL_A_U32 },
4101
0
        [OVS_METER_ATTR_MAX_BANDS] = { .type = NL_A_U32 },
4102
0
        [OVS_METER_ATTR_BANDS] = { .type = NL_A_NESTED, .optional = true },
4103
0
    };
4104
0
    struct nlattr *a[ARRAY_SIZE(ovs_meter_features_policy)];
4105
4106
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
4107
0
    dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub,
4108
0
                            OVS_METER_CMD_FEATURES);
4109
0
    if (dpif_netlink_meter_transact(&buf, &msg, ovs_meter_features_policy, a,
4110
0
                                    ARRAY_SIZE(ovs_meter_features_policy))) {
4111
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
4112
0
        VLOG_INFO_RL(&rl,
4113
0
                  "dpif_netlink_meter_transact OVS_METER_CMD_FEATURES failed");
4114
0
        return;
4115
0
    }
4116
4117
0
    features->max_meters = nl_attr_get_u32(a[OVS_METER_ATTR_MAX_METERS]);
4118
0
    features->max_bands = nl_attr_get_u32(a[OVS_METER_ATTR_MAX_BANDS]);
4119
4120
    /* Bands is a nested attribute of zero or more nested
4121
     * band attributes.  */
4122
0
    if (a[OVS_METER_ATTR_BANDS]) {
4123
0
        const struct nlattr *nla;
4124
0
        size_t left;
4125
4126
0
        NL_NESTED_FOR_EACH (nla, left, a[OVS_METER_ATTR_BANDS]) {
4127
0
            const struct nlattr *band_nla;
4128
0
            size_t band_left;
4129
4130
0
            NL_NESTED_FOR_EACH (band_nla, band_left, nla) {
4131
0
                if (nl_attr_type(band_nla) == OVS_BAND_ATTR_TYPE) {
4132
0
                    if (nl_attr_get_size(band_nla) == sizeof(uint32_t)) {
4133
0
                        switch (nl_attr_get_u32(band_nla)) {
4134
0
                        case OVS_METER_BAND_TYPE_DROP:
4135
0
                            features->band_types |= 1 << OFPMBT13_DROP;
4136
0
                            break;
4137
0
                        }
4138
0
                    }
4139
0
                }
4140
0
            }
4141
0
        }
4142
0
    }
4143
0
    features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
4144
4145
0
    ofpbuf_delete(msg);
4146
0
}
4147
4148
static int
4149
dpif_netlink_meter_set__(struct dpif *dpif_, ofproto_meter_id meter_id,
4150
                         struct ofputil_meter_config *config)
4151
0
{
4152
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
4153
0
    struct ofpbuf buf, *msg;
4154
0
    uint64_t stub[1024 / 8];
4155
4156
0
    static const struct nl_policy ovs_meter_set_response_policy[] = {
4157
0
        [OVS_METER_ATTR_ID] = { .type = NL_A_U32 },
4158
0
    };
4159
0
    struct nlattr *a[ARRAY_SIZE(ovs_meter_set_response_policy)];
4160
4161
0
    if (config->flags & ~DP_SUPPORTED_METER_FLAGS_MASK) {
4162
0
        return EBADF; /* Unsupported flags set */
4163
0
    }
4164
4165
0
    for (size_t i = 0; i < config->n_bands; i++) {
4166
0
        switch (config->bands[i].type) {
4167
0
        case OFPMBT13_DROP:
4168
0
            break;
4169
0
        default:
4170
0
            return ENODEV; /* Unsupported band type */
4171
0
        }
4172
0
    }
4173
4174
0
    dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub, OVS_METER_CMD_SET);
4175
4176
0
    nl_msg_put_u32(&buf, OVS_METER_ATTR_ID, meter_id.uint32);
4177
4178
0
    if (config->flags & OFPMF13_KBPS) {
4179
0
        nl_msg_put_flag(&buf, OVS_METER_ATTR_KBPS);
4180
0
    }
4181
4182
0
    size_t bands_offset = nl_msg_start_nested(&buf, OVS_METER_ATTR_BANDS);
4183
    /* Bands */
4184
0
    for (size_t i = 0; i < config->n_bands; ++i) {
4185
0
        struct ofputil_meter_band * band = &config->bands[i];
4186
0
        uint32_t band_type;
4187
4188
0
        size_t band_offset = nl_msg_start_nested(&buf, OVS_BAND_ATTR_UNSPEC);
4189
4190
0
        switch (band->type) {
4191
0
        case OFPMBT13_DROP:
4192
0
            band_type = OVS_METER_BAND_TYPE_DROP;
4193
0
            break;
4194
0
        default:
4195
0
            band_type = OVS_METER_BAND_TYPE_UNSPEC;
4196
0
        }
4197
0
        nl_msg_put_u32(&buf, OVS_BAND_ATTR_TYPE, band_type);
4198
0
        nl_msg_put_u32(&buf, OVS_BAND_ATTR_RATE, band->rate);
4199
0
        nl_msg_put_u32(&buf, OVS_BAND_ATTR_BURST,
4200
0
                       config->flags & OFPMF13_BURST ?
4201
0
                       band->burst_size : band->rate);
4202
0
        nl_msg_end_nested(&buf, band_offset);
4203
0
    }
4204
0
    nl_msg_end_nested(&buf, bands_offset);
4205
4206
0
    int error = dpif_netlink_meter_transact(&buf, &msg,
4207
0
                                    ovs_meter_set_response_policy, a,
4208
0
                                    ARRAY_SIZE(ovs_meter_set_response_policy));
4209
0
    if (error) {
4210
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
4211
0
        VLOG_INFO_RL(&rl,
4212
0
                     "dpif_netlink_meter_transact OVS_METER_CMD_SET failed");
4213
0
        return error;
4214
0
    }
4215
4216
0
    if (nl_attr_get_u32(a[OVS_METER_ATTR_ID]) != meter_id.uint32) {
4217
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
4218
0
        VLOG_INFO_RL(&rl,
4219
0
                     "Kernel returned a different meter id than requested");
4220
0
    }
4221
0
    ofpbuf_delete(msg);
4222
0
    return 0;
4223
0
}
4224
4225
static int
4226
dpif_netlink_meter_set(struct dpif *dpif_, ofproto_meter_id meter_id,
4227
                       struct ofputil_meter_config *config)
4228
0
{
4229
0
    int err;
4230
4231
0
    if (probe_broken_meters(dpif_)) {
4232
0
        return ENOMEM;
4233
0
    }
4234
4235
0
    err = dpif_netlink_meter_set__(dpif_, meter_id, config);
4236
0
    if (!err && netdev_is_flow_api_enabled()) {
4237
0
        meter_offload_set(meter_id, config);
4238
0
    }
4239
4240
0
    return err;
4241
0
}
4242
4243
/* Retrieve statistics and/or delete meter 'meter_id'.  Statistics are
4244
 * stored in 'stats', if it is not null.  If 'command' is
4245
 * OVS_METER_CMD_DEL, the meter is deleted and statistics are optionally
4246
 * retrieved.  If 'command' is OVS_METER_CMD_GET, then statistics are
4247
 * simply retrieved. */
4248
static int
4249
dpif_netlink_meter_get_stats(const struct dpif *dpif_,
4250
                             ofproto_meter_id meter_id,
4251
                             struct ofputil_meter_stats *stats,
4252
                             uint16_t max_bands,
4253
                             enum ovs_meter_cmd command)
4254
0
{
4255
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
4256
0
    struct ofpbuf buf, *msg;
4257
0
    uint64_t stub[1024 / 8];
4258
4259
0
    static const struct nl_policy ovs_meter_stats_policy[] = {
4260
0
        [OVS_METER_ATTR_ID] = { .type = NL_A_U32, .optional = true},
4261
0
        [OVS_METER_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
4262
0
                                   .optional = true},
4263
0
        [OVS_METER_ATTR_BANDS] = { .type = NL_A_NESTED, .optional = true },
4264
0
    };
4265
0
    struct nlattr *a[ARRAY_SIZE(ovs_meter_stats_policy)];
4266
4267
0
    dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub, command);
4268
4269
0
    nl_msg_put_u32(&buf, OVS_METER_ATTR_ID, meter_id.uint32);
4270
4271
0
    int error = dpif_netlink_meter_transact(&buf, &msg,
4272
0
                                            ovs_meter_stats_policy, a,
4273
0
                                            ARRAY_SIZE(ovs_meter_stats_policy));
4274
0
    if (error) {
4275
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
4276
0
        VLOG_INFO_RL(&rl, "dpif_netlink_meter_transact %s failed",
4277
0
                     command == OVS_METER_CMD_GET ? "get" : "del");
4278
0
        return error;
4279
0
    }
4280
4281
0
    if (stats
4282
0
        && a[OVS_METER_ATTR_ID]
4283
0
        && a[OVS_METER_ATTR_STATS]
4284
0
        && nl_attr_get_u32(a[OVS_METER_ATTR_ID]) == meter_id.uint32) {
4285
        /* return stats */
4286
0
        const struct ovs_flow_stats *stat;
4287
0
        const struct nlattr *nla;
4288
0
        size_t left;
4289
4290
0
        stat = nl_attr_get(a[OVS_METER_ATTR_STATS]);
4291
0
        stats->packet_in_count = get_32aligned_u64(&stat->n_packets);
4292
0
        stats->byte_in_count = get_32aligned_u64(&stat->n_bytes);
4293
4294
0
        if (a[OVS_METER_ATTR_BANDS]) {
4295
0
            size_t n_bands = 0;
4296
0
            NL_NESTED_FOR_EACH (nla, left, a[OVS_METER_ATTR_BANDS]) {
4297
0
                const struct nlattr *band_nla;
4298
0
                band_nla = nl_attr_find_nested(nla, OVS_BAND_ATTR_STATS);
4299
0
                if (band_nla && nl_attr_get_size(band_nla) \
4300
0
                                == sizeof(struct ovs_flow_stats)) {
4301
0
                    stat = nl_attr_get(band_nla);
4302
4303
0
                    if (n_bands < max_bands) {
4304
0
                        stats->bands[n_bands].packet_count
4305
0
                            = get_32aligned_u64(&stat->n_packets);
4306
0
                        stats->bands[n_bands].byte_count
4307
0
                            = get_32aligned_u64(&stat->n_bytes);
4308
0
                        ++n_bands;
4309
0
                    }
4310
0
                } else {
4311
0
                    stats->bands[n_bands].packet_count = 0;
4312
0
                    stats->bands[n_bands].byte_count = 0;
4313
0
                    ++n_bands;
4314
0
                }
4315
0
            }
4316
0
            stats->n_bands = n_bands;
4317
0
        } else {
4318
            /* For a non-existent meter, return 0 stats. */
4319
0
            stats->n_bands = 0;
4320
0
        }
4321
0
    }
4322
4323
0
    ofpbuf_delete(msg);
4324
0
    return error;
4325
0
}
4326
4327
static int
4328
dpif_netlink_meter_get(const struct dpif *dpif, ofproto_meter_id meter_id,
4329
                       struct ofputil_meter_stats *stats, uint16_t max_bands)
4330
0
{
4331
0
    int err;
4332
4333
0
    err = dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
4334
0
                                       OVS_METER_CMD_GET);
4335
0
    if (!err && netdev_is_flow_api_enabled()) {
4336
0
        meter_offload_get(meter_id, stats);
4337
0
    }
4338
4339
0
    return err;
4340
0
}
4341
4342
static int
4343
dpif_netlink_meter_del(struct dpif *dpif, ofproto_meter_id meter_id,
4344
                       struct ofputil_meter_stats *stats, uint16_t max_bands)
4345
0
{
4346
0
    int err;
4347
4348
0
    err  = dpif_netlink_meter_get_stats(dpif, meter_id, stats,
4349
0
                                        max_bands, OVS_METER_CMD_DEL);
4350
0
    if (!err && netdev_is_flow_api_enabled()) {
4351
0
        meter_offload_del(meter_id, stats);
4352
0
    }
4353
4354
0
    return err;
4355
0
}
4356
4357
static bool
4358
probe_broken_meters__(struct dpif *dpif)
4359
0
{
4360
    /* This test is destructive if a probe occurs while ovs-vswitchd is
4361
     * running (e.g., an ovs-dpctl meter command is called), so choose a
4362
     * random high meter id to make this less likely to occur. */
4363
0
    ofproto_meter_id id1 = { 54545401 };
4364
0
    ofproto_meter_id id2 = { 54545402 };
4365
0
    struct ofputil_meter_band band = {OFPMBT13_DROP, 0, 1, 0};
4366
0
    struct ofputil_meter_config config1 = { 1, OFPMF13_KBPS, 1, &band};
4367
0
    struct ofputil_meter_config config2 = { 2, OFPMF13_KBPS, 1, &band};
4368
4369
    /* Try adding two meters and make sure that they both come back with
4370
     * the proper meter id.  Use the "__" version so that we don't cause
4371
     * a recurve deadlock. */
4372
0
    dpif_netlink_meter_set__(dpif, id1, &config1);
4373
0
    dpif_netlink_meter_set__(dpif, id2, &config2);
4374
4375
0
    if (dpif_netlink_meter_get(dpif, id1, NULL, 0)
4376
0
        || dpif_netlink_meter_get(dpif, id2, NULL, 0)) {
4377
0
        VLOG_INFO("The kernel module has a broken meter implementation.");
4378
0
        return true;
4379
0
    }
4380
4381
0
    dpif_netlink_meter_del(dpif, id1, NULL, 0);
4382
0
    dpif_netlink_meter_del(dpif, id2, NULL, 0);
4383
4384
0
    return false;
4385
0
}
4386
4387
static bool
4388
probe_broken_meters(struct dpif *dpif)
4389
0
{
4390
    /* This is a once-only test because currently OVS only has at most a single
4391
     * Netlink capable datapath on any given platform. */
4392
0
    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
4393
4394
0
    static bool broken_meters = false;
4395
0
    if (ovsthread_once_start(&once)) {
4396
0
        broken_meters = probe_broken_meters__(dpif);
4397
0
        ovsthread_once_done(&once);
4398
0
    }
4399
0
    return broken_meters;
4400
0
}
4401
4402
4403
static int
4404
dpif_netlink_cache_get_supported_levels(struct dpif *dpif_, uint32_t *levels)
4405
0
{
4406
0
    struct dpif_netlink_dp dp;
4407
0
    struct ofpbuf *buf;
4408
0
    int error;
4409
4410
    /* If available, in the kernel we support one level of cache.
4411
     * Unfortunately, there is no way to detect if the older kernel module has
4412
     * the cache feature.  For now, we only report the cache information if the
4413
     * kernel module reports the OVS_DP_ATTR_MASKS_CACHE_SIZE attribute. */
4414
4415
0
    *levels = 0;
4416
0
    error = dpif_netlink_dp_get(dpif_, &dp, &buf);
4417
0
    if (!error) {
4418
4419
0
        if (dp.cache_size != UINT32_MAX) {
4420
0
            *levels = 1;
4421
0
        }
4422
0
        ofpbuf_delete(buf);
4423
0
    }
4424
4425
0
    return error;
4426
0
}
4427
4428
static int
4429
dpif_netlink_cache_get_name(struct dpif *dpif_ OVS_UNUSED, uint32_t level,
4430
                            const char **name)
4431
0
{
4432
0
    if (level != 0) {
4433
0
        return EINVAL;
4434
0
    }
4435
4436
0
    *name = "masks-cache";
4437
0
    return 0;
4438
0
}
4439
4440
static int
4441
dpif_netlink_cache_get_size(struct dpif *dpif_, uint32_t level, uint32_t *size)
4442
0
{
4443
0
    struct dpif_netlink_dp dp;
4444
0
    struct ofpbuf *buf;
4445
0
    int error;
4446
4447
0
    if (level != 0) {
4448
0
        return EINVAL;
4449
0
    }
4450
4451
0
    error = dpif_netlink_dp_get(dpif_, &dp, &buf);
4452
0
    if (!error) {
4453
4454
0
        ofpbuf_delete(buf);
4455
4456
0
        if (dp.cache_size == UINT32_MAX) {
4457
0
            return EOPNOTSUPP;
4458
0
        }
4459
0
        *size = dp.cache_size;
4460
0
    }
4461
0
    return error;
4462
0
}
4463
4464
static int
4465
dpif_netlink_cache_set_size(struct dpif *dpif_, uint32_t level, uint32_t size)
4466
0
{
4467
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
4468
0
    struct dpif_netlink_dp request, reply;
4469
0
    struct ofpbuf *bufp;
4470
0
    int error;
4471
4472
0
    size = ROUND_UP_POW2(size);
4473
4474
0
    if (level != 0) {
4475
0
        return EINVAL;
4476
0
    }
4477
4478
0
    dpif_netlink_dp_init(&request);
4479
0
    request.cmd = OVS_DP_CMD_SET;
4480
0
    request.name = dpif_->base_name;
4481
0
    request.dp_ifindex = dpif->dp_ifindex;
4482
0
    request.cache_size = size;
4483
    /* We need to set the dpif user_features, as the kernel module assumes the
4484
     * OVS_DP_ATTR_USER_FEATURES attribute is always present. If not, it will
4485
     * reset all the features. */
4486
0
    request.user_features = dpif->user_features;
4487
4488
0
    error = dpif_netlink_dp_transact(&request, &reply, &bufp);
4489
0
    if (!error) {
4490
0
        ofpbuf_delete(bufp);
4491
0
        if (reply.cache_size != size) {
4492
0
            return EINVAL;
4493
0
        }
4494
0
    }
4495
4496
0
    return error;
4497
0
}
4498
4499

4500
const struct dpif_class dpif_netlink_class = {
4501
    "system",
4502
    false,                      /* cleanup_required */
4503
    false,                      /* synced_dp_layers */
4504
    NULL,                       /* init */
4505
    dpif_netlink_enumerate,
4506
    NULL,
4507
    dpif_netlink_open,
4508
    dpif_netlink_close,
4509
    dpif_netlink_destroy,
4510
    dpif_netlink_run,
4511
    NULL,                       /* wait */
4512
    dpif_netlink_get_stats,
4513
    dpif_netlink_set_features,
4514
    dpif_netlink_port_add,
4515
    dpif_netlink_port_del,
4516
    NULL,                       /* port_set_config */
4517
    dpif_netlink_port_query_by_number,
4518
    dpif_netlink_port_query_by_name,
4519
    dpif_netlink_port_get_pid,
4520
    dpif_netlink_port_dump_start,
4521
    dpif_netlink_port_dump_next,
4522
    dpif_netlink_port_dump_done,
4523
    dpif_netlink_port_poll,
4524
    dpif_netlink_port_poll_wait,
4525
    dpif_netlink_flow_flush,
4526
    dpif_netlink_flow_dump_create,
4527
    dpif_netlink_flow_dump_destroy,
4528
    dpif_netlink_flow_dump_thread_create,
4529
    dpif_netlink_flow_dump_thread_destroy,
4530
    dpif_netlink_flow_dump_next,
4531
    dpif_netlink_operate,
4532
    NULL,                       /* offload_stats_get */
4533
    dpif_netlink_recv_set,
4534
    dpif_netlink_handlers_set,
4535
    dpif_netlink_number_handlers_required,
4536
    NULL,                       /* set_config */
4537
    dpif_netlink_queue_to_priority,
4538
    dpif_netlink_recv,
4539
    dpif_netlink_recv_wait,
4540
    dpif_netlink_recv_purge,
4541
    NULL,                       /* register_dp_purge_cb */
4542
    NULL,                       /* register_upcall_cb */
4543
    NULL,                       /* enable_upcall */
4544
    NULL,                       /* disable_upcall */
4545
    dpif_netlink_get_datapath_version, /* get_datapath_version */
4546
    dpif_netlink_ct_dump_start,
4547
    dpif_netlink_ct_dump_next,
4548
    dpif_netlink_ct_dump_done,
4549
    NULL,                       /* ct_exp_dump_start */
4550
    NULL,                       /* ct_exp_dump_next */
4551
    NULL,                       /* ct_exp_dump_done */
4552
    dpif_netlink_ct_flush,
4553
    NULL,                       /* ct_set_maxconns */
4554
    NULL,                       /* ct_get_maxconns */
4555
    NULL,                       /* ct_get_nconns */
4556
    NULL,                       /* ct_set_tcp_seq_chk */
4557
    NULL,                       /* ct_get_tcp_seq_chk */
4558
    NULL,                       /* ct_set_sweep_interval */
4559
    NULL,                       /* ct_get_sweep_interval */
4560
    dpif_netlink_ct_set_limits,
4561
    dpif_netlink_ct_get_limits,
4562
    dpif_netlink_ct_del_limits,
4563
    dpif_netlink_ct_set_timeout_policy,
4564
    dpif_netlink_ct_get_timeout_policy,
4565
    dpif_netlink_ct_del_timeout_policy,
4566
    dpif_netlink_ct_timeout_policy_dump_start,
4567
    dpif_netlink_ct_timeout_policy_dump_next,
4568
    dpif_netlink_ct_timeout_policy_dump_done,
4569
    dpif_netlink_ct_get_timeout_policy_name,
4570
    dpif_netlink_ct_get_features,
4571
    NULL,                       /* ipf_set_enabled */
4572
    NULL,                       /* ipf_set_min_frag */
4573
    NULL,                       /* ipf_set_max_nfrags */
4574
    NULL,                       /* ipf_get_status */
4575
    NULL,                       /* ipf_dump_start */
4576
    NULL,                       /* ipf_dump_next */
4577
    NULL,                       /* ipf_dump_done */
4578
    dpif_netlink_meter_get_features,
4579
    dpif_netlink_meter_set,
4580
    dpif_netlink_meter_get,
4581
    dpif_netlink_meter_del,
4582
    NULL,                       /* bond_add */
4583
    NULL,                       /* bond_del */
4584
    NULL,                       /* bond_stats_get */
4585
    dpif_netlink_cache_get_supported_levels,
4586
    dpif_netlink_cache_get_name,
4587
    dpif_netlink_cache_get_size,
4588
    dpif_netlink_cache_set_size,
4589
};
4590
4591
static int
4592
dpif_netlink_init(void)
4593
0
{
4594
0
    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
4595
0
    static int error;
4596
4597
0
    if (ovsthread_once_start(&once)) {
4598
0
        error = nl_lookup_genl_family(OVS_DATAPATH_FAMILY,
4599
0
                                      &ovs_datapath_family);
4600
0
        if (error) {
4601
0
            VLOG_INFO("Generic Netlink family '%s' does not exist. "
4602
0
                      "The Open vSwitch kernel module is probably not loaded.",
4603
0
                      OVS_DATAPATH_FAMILY);
4604
0
        }
4605
0
        if (!error) {
4606
0
            error = nl_lookup_genl_family(OVS_VPORT_FAMILY, &ovs_vport_family);
4607
0
        }
4608
0
        if (!error) {
4609
0
            error = nl_lookup_genl_family(OVS_FLOW_FAMILY, &ovs_flow_family);
4610
0
        }
4611
0
        if (!error) {
4612
0
            error = nl_lookup_genl_family(OVS_PACKET_FAMILY,
4613
0
                                          &ovs_packet_family);
4614
0
        }
4615
0
        if (!error) {
4616
0
            error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, OVS_VPORT_MCGROUP,
4617
0
                                           &ovs_vport_mcgroup);
4618
0
        }
4619
0
        if (!error) {
4620
0
            if (nl_lookup_genl_family(OVS_METER_FAMILY, &ovs_meter_family)) {
4621
0
                VLOG_INFO("The kernel module does not support meters.");
4622
0
            }
4623
0
        }
4624
0
        if (nl_lookup_genl_family(OVS_CT_LIMIT_FAMILY,
4625
0
                                  &ovs_ct_limit_family) < 0) {
4626
0
            VLOG_INFO("Generic Netlink family '%s' does not exist. "
4627
0
                      "Please update the Open vSwitch kernel module to enable "
4628
0
                      "the conntrack limit feature.", OVS_CT_LIMIT_FAMILY);
4629
0
        }
4630
4631
0
        ovs_tunnels_out_of_tree = dpif_netlink_rtnl_probe_oot_tunnels();
4632
4633
0
        unixctl_command_register("dpif-netlink/dispatch-mode", "", 0, 0,
4634
0
                                 dpif_netlink_unixctl_dispatch_mode, NULL);
4635
4636
0
        ovsthread_once_done(&once);
4637
0
    }
4638
4639
0
    return error;
4640
0
}
4641
4642
bool
4643
dpif_netlink_is_internal_device(const char *name)
4644
0
{
4645
0
    struct dpif_netlink_vport reply;
4646
0
    struct ofpbuf *buf;
4647
0
    int error;
4648
4649
0
    error = dpif_netlink_vport_get(name, &reply, &buf);
4650
0
    if (!error) {
4651
0
        ofpbuf_delete(buf);
4652
0
    } else if (error != ENODEV && error != ENOENT) {
4653
0
        VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)",
4654
0
                     name, ovs_strerror(error));
4655
0
    }
4656
4657
0
    return reply.type == OVS_VPORT_TYPE_INTERNAL;
4658
0
}
4659
4660
/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
4661
 * by Netlink attributes, into 'vport'.  Returns 0 if successful, otherwise a
4662
 * positive errno value.
4663
 *
4664
 * 'vport' will contain pointers into 'buf', so the caller should not free
4665
 * 'buf' while 'vport' is still in use. */
4666
static int
4667
dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *vport,
4668
                             const struct ofpbuf *buf)
4669
0
{
4670
0
    static const struct nl_policy ovs_vport_policy[] = {
4671
0
        [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 },
4672
0
        [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32 },
4673
0
        [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
4674
0
        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_UNSPEC },
4675
0
        [OVS_VPORT_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_vport_stats),
4676
0
                                   .optional = true },
4677
0
        [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
4678
0
        [OVS_VPORT_ATTR_NETNSID] = { .type = NL_A_U32, .optional = true },
4679
0
        [OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NL_A_NESTED,
4680
0
                                          .optional = true },
4681
0
    };
4682
4683
0
    dpif_netlink_vport_init(vport);
4684
4685
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
4686
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
4687
0
    struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
4688
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
4689
4690
0
    struct nlattr *a[ARRAY_SIZE(ovs_vport_policy)];
4691
0
    if (!nlmsg || !genl || !ovs_header
4692
0
        || nlmsg->nlmsg_type != ovs_vport_family
4693
0
        || !nl_policy_parse(&b, 0, ovs_vport_policy, a,
4694
0
                            ARRAY_SIZE(ovs_vport_policy))) {
4695
0
        return EINVAL;
4696
0
    }
4697
4698
0
    vport->cmd = genl->cmd;
4699
0
    vport->dp_ifindex = ovs_header->dp_ifindex;
4700
0
    vport->port_no = nl_attr_get_odp_port(a[OVS_VPORT_ATTR_PORT_NO]);
4701
0
    vport->type = nl_attr_get_u32(a[OVS_VPORT_ATTR_TYPE]);
4702
0
    vport->name = nl_attr_get_string(a[OVS_VPORT_ATTR_NAME]);
4703
0
    if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
4704
0
        vport->n_upcall_pids = nl_attr_get_size(a[OVS_VPORT_ATTR_UPCALL_PID])
4705
0
                               / (sizeof *vport->upcall_pids);
4706
0
        vport->upcall_pids = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PID]);
4707
4708
0
    }
4709
0
    if (a[OVS_VPORT_ATTR_STATS]) {
4710
0
        vport->stats = nl_attr_get(a[OVS_VPORT_ATTR_STATS]);
4711
0
    }
4712
0
    if (a[OVS_VPORT_ATTR_UPCALL_STATS]) {
4713
0
        const struct nlattr *nla;
4714
0
        size_t left;
4715
4716
0
        NL_NESTED_FOR_EACH (nla, left, a[OVS_VPORT_ATTR_UPCALL_STATS]) {
4717
0
            if (nl_attr_type(nla) == OVS_VPORT_UPCALL_ATTR_SUCCESS) {
4718
0
                vport->upcall_success = nl_attr_get_u64(nla);
4719
0
            } else if (nl_attr_type(nla) == OVS_VPORT_UPCALL_ATTR_FAIL) {
4720
0
                vport->upcall_fail = nl_attr_get_u64(nla);
4721
0
            }
4722
0
        }
4723
0
    } else {
4724
0
        vport->upcall_success = UINT64_MAX;
4725
0
        vport->upcall_fail = UINT64_MAX;
4726
0
    }
4727
0
    if (a[OVS_VPORT_ATTR_OPTIONS]) {
4728
0
        vport->options = nl_attr_get(a[OVS_VPORT_ATTR_OPTIONS]);
4729
0
        vport->options_len = nl_attr_get_size(a[OVS_VPORT_ATTR_OPTIONS]);
4730
0
    }
4731
0
    if (a[OVS_VPORT_ATTR_NETNSID]) {
4732
0
        netnsid_set(&vport->netnsid,
4733
0
                    nl_attr_get_u32(a[OVS_VPORT_ATTR_NETNSID]));
4734
0
    } else {
4735
0
        netnsid_set_local(&vport->netnsid);
4736
0
    }
4737
0
    return 0;
4738
0
}
4739
4740
/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
4741
 * followed by Netlink attributes corresponding to 'vport'. */
4742
static void
4743
dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *vport,
4744
                             struct ofpbuf *buf)
4745
0
{
4746
0
    struct ovs_header *ovs_header;
4747
4748
0
    nl_msg_put_genlmsghdr(buf, 0, ovs_vport_family, NLM_F_REQUEST | NLM_F_ECHO,
4749
0
                          vport->cmd, OVS_VPORT_VERSION);
4750
4751
0
    ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
4752
0
    ovs_header->dp_ifindex = vport->dp_ifindex;
4753
4754
0
    if (vport->port_no != ODPP_NONE) {
4755
0
        nl_msg_put_odp_port(buf, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
4756
0
    }
4757
4758
0
    if (vport->type != OVS_VPORT_TYPE_UNSPEC) {
4759
0
        nl_msg_put_u32(buf, OVS_VPORT_ATTR_TYPE, vport->type);
4760
0
    }
4761
4762
0
    if (vport->name) {
4763
0
        nl_msg_put_string(buf, OVS_VPORT_ATTR_NAME, vport->name);
4764
0
    }
4765
4766
0
    if (vport->upcall_pids) {
4767
0
        nl_msg_put_unspec(buf, OVS_VPORT_ATTR_UPCALL_PID,
4768
0
                          vport->upcall_pids,
4769
0
                          vport->n_upcall_pids * sizeof *vport->upcall_pids);
4770
0
    }
4771
4772
0
    if (vport->stats) {
4773
0
        nl_msg_put_unspec(buf, OVS_VPORT_ATTR_STATS,
4774
0
                          vport->stats, sizeof *vport->stats);
4775
0
    }
4776
4777
0
    if (vport->options) {
4778
0
        nl_msg_put_nested(buf, OVS_VPORT_ATTR_OPTIONS,
4779
0
                          vport->options, vport->options_len);
4780
0
    }
4781
0
}
4782
4783
/* Clears 'vport' to "empty" values. */
4784
void
4785
dpif_netlink_vport_init(struct dpif_netlink_vport *vport)
4786
0
{
4787
0
    memset(vport, 0, sizeof *vport);
4788
0
    vport->port_no = ODPP_NONE;
4789
0
}
4790
4791
/* Executes 'request' in the kernel datapath.  If the command fails, returns a
4792
 * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
4793
 * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
4794
 * result of the command is expected to be an ovs_vport also, which is decoded
4795
 * and stored in '*reply' and '*bufp'.  The caller must free '*bufp' when the
4796
 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
4797
int
4798
dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
4799
                            struct dpif_netlink_vport *reply,
4800
                            struct ofpbuf **bufp)
4801
0
{
4802
0
    struct ofpbuf *request_buf;
4803
0
    int error;
4804
4805
0
    ovs_assert((reply != NULL) == (bufp != NULL));
4806
4807
0
    error = dpif_netlink_init();
4808
0
    if (error) {
4809
0
        if (reply) {
4810
0
            *bufp = NULL;
4811
0
            dpif_netlink_vport_init(reply);
4812
0
        }
4813
0
        return error;
4814
0
    }
4815
4816
0
    request_buf = ofpbuf_new(1024);
4817
0
    dpif_netlink_vport_to_ofpbuf(request, request_buf);
4818
0
    error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
4819
0
    ofpbuf_delete(request_buf);
4820
4821
0
    if (reply) {
4822
0
        if (!error) {
4823
0
            error = dpif_netlink_vport_from_ofpbuf(reply, *bufp);
4824
0
        }
4825
0
        if (error) {
4826
0
            dpif_netlink_vport_init(reply);
4827
0
            ofpbuf_delete(*bufp);
4828
0
            *bufp = NULL;
4829
0
        }
4830
0
    }
4831
0
    return error;
4832
0
}
4833
4834
/* Obtains information about the kernel vport named 'name' and stores it into
4835
 * '*reply' and '*bufp'.  The caller must free '*bufp' when the reply is no
4836
 * longer needed ('reply' will contain pointers into '*bufp').  */
4837
int
4838
dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
4839
                       struct ofpbuf **bufp)
4840
0
{
4841
0
    struct dpif_netlink_vport request;
4842
4843
0
    dpif_netlink_vport_init(&request);
4844
0
    request.cmd = OVS_VPORT_CMD_GET;
4845
0
    request.name = name;
4846
4847
0
    return dpif_netlink_vport_transact(&request, reply, bufp);
4848
0
}
4849
4850
/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
4851
 * by Netlink attributes, into 'dp'.  Returns 0 if successful, otherwise a
4852
 * positive errno value.
4853
 *
4854
 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
4855
 * while 'dp' is still in use. */
4856
static int
4857
dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *dp, const struct ofpbuf *buf)
4858
0
{
4859
0
    static const struct nl_policy ovs_datapath_policy[] = {
4860
0
        [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
4861
0
        [OVS_DP_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_dp_stats),
4862
0
                                .optional = true },
4863
0
        [OVS_DP_ATTR_MEGAFLOW_STATS] = {
4864
0
                        NL_POLICY_FOR(struct ovs_dp_megaflow_stats),
4865
0
                        .optional = true },
4866
0
        [OVS_DP_ATTR_USER_FEATURES] = {
4867
0
                        .type = NL_A_U32,
4868
0
                        .optional = true },
4869
0
        [OVS_DP_ATTR_MASKS_CACHE_SIZE] = {
4870
0
                        .type = NL_A_U32,
4871
0
                        .optional = true },
4872
0
    };
4873
4874
0
    dpif_netlink_dp_init(dp);
4875
4876
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
4877
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
4878
0
    struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
4879
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
4880
4881
0
    struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)];
4882
0
    if (!nlmsg || !genl || !ovs_header
4883
0
        || nlmsg->nlmsg_type != ovs_datapath_family
4884
0
        || !nl_policy_parse(&b, 0, ovs_datapath_policy, a,
4885
0
                            ARRAY_SIZE(ovs_datapath_policy))) {
4886
0
        return EINVAL;
4887
0
    }
4888
4889
0
    dp->cmd = genl->cmd;
4890
0
    dp->dp_ifindex = ovs_header->dp_ifindex;
4891
0
    dp->name = nl_attr_get_string(a[OVS_DP_ATTR_NAME]);
4892
0
    if (a[OVS_DP_ATTR_STATS]) {
4893
0
        dp->stats = nl_attr_get(a[OVS_DP_ATTR_STATS]);
4894
0
    }
4895
4896
0
    if (a[OVS_DP_ATTR_MEGAFLOW_STATS]) {
4897
0
        dp->megaflow_stats = nl_attr_get(a[OVS_DP_ATTR_MEGAFLOW_STATS]);
4898
0
    }
4899
4900
0
    if (a[OVS_DP_ATTR_USER_FEATURES]) {
4901
0
        dp->user_features = nl_attr_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
4902
0
    }
4903
4904
0
    if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
4905
0
        dp->cache_size = nl_attr_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
4906
0
    } else {
4907
0
        dp->cache_size = UINT32_MAX;
4908
0
    }
4909
4910
0
    return 0;
4911
0
}
4912
4913
/* Appends to 'buf' the Generic Netlink message described by 'dp'. */
4914
static void
4915
dpif_netlink_dp_to_ofpbuf(const struct dpif_netlink_dp *dp, struct ofpbuf *buf)
4916
0
{
4917
0
    struct ovs_header *ovs_header;
4918
4919
0
    nl_msg_put_genlmsghdr(buf, 0, ovs_datapath_family,
4920
0
                          NLM_F_REQUEST | NLM_F_ECHO, dp->cmd,
4921
0
                          OVS_DATAPATH_VERSION);
4922
4923
0
    ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
4924
0
    ovs_header->dp_ifindex = dp->dp_ifindex;
4925
4926
0
    if (dp->name) {
4927
0
        nl_msg_put_string(buf, OVS_DP_ATTR_NAME, dp->name);
4928
0
    }
4929
4930
0
    if (dp->upcall_pid) {
4931
0
        nl_msg_put_u32(buf, OVS_DP_ATTR_UPCALL_PID, *dp->upcall_pid);
4932
0
    }
4933
4934
0
    if (dp->user_features) {
4935
0
        nl_msg_put_u32(buf, OVS_DP_ATTR_USER_FEATURES, dp->user_features);
4936
0
    }
4937
4938
0
    if (dp->upcall_pids) {
4939
0
        nl_msg_put_unspec(buf, OVS_DP_ATTR_PER_CPU_PIDS, dp->upcall_pids,
4940
0
                          sizeof *dp->upcall_pids * dp->n_upcall_pids);
4941
0
    }
4942
4943
0
    if (dp->cache_size != UINT32_MAX) {
4944
0
        nl_msg_put_u32(buf, OVS_DP_ATTR_MASKS_CACHE_SIZE, dp->cache_size);
4945
0
    }
4946
4947
    /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
4948
0
}
4949
4950
/* Clears 'dp' to "empty" values. */
4951
static void
4952
dpif_netlink_dp_init(struct dpif_netlink_dp *dp)
4953
0
{
4954
0
    memset(dp, 0, sizeof *dp);
4955
0
    dp->cache_size = UINT32_MAX;
4956
0
}
4957
4958
static void
4959
dpif_netlink_dp_dump_start(struct nl_dump *dump)
4960
0
{
4961
0
    struct dpif_netlink_dp request;
4962
0
    struct ofpbuf *buf;
4963
4964
0
    dpif_netlink_dp_init(&request);
4965
0
    request.cmd = OVS_DP_CMD_GET;
4966
4967
0
    buf = ofpbuf_new(1024);
4968
0
    dpif_netlink_dp_to_ofpbuf(&request, buf);
4969
0
    nl_dump_start(dump, NETLINK_GENERIC, buf);
4970
0
    ofpbuf_delete(buf);
4971
0
}
4972
4973
/* Executes 'request' in the kernel datapath.  If the command fails, returns a
4974
 * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
4975
 * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
4976
 * result of the command is expected to be of the same form, which is decoded
4977
 * and stored in '*reply' and '*bufp'.  The caller must free '*bufp' when the
4978
 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
4979
static int
4980
dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
4981
                         struct dpif_netlink_dp *reply, struct ofpbuf **bufp)
4982
0
{
4983
0
    struct ofpbuf *request_buf;
4984
0
    int error;
4985
4986
0
    ovs_assert((reply != NULL) == (bufp != NULL));
4987
4988
0
    request_buf = ofpbuf_new(1024);
4989
0
    dpif_netlink_dp_to_ofpbuf(request, request_buf);
4990
0
    error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
4991
0
    ofpbuf_delete(request_buf);
4992
4993
0
    if (reply) {
4994
0
        dpif_netlink_dp_init(reply);
4995
0
        if (!error) {
4996
0
            error = dpif_netlink_dp_from_ofpbuf(reply, *bufp);
4997
0
        }
4998
0
        if (error) {
4999
0
            ofpbuf_delete(*bufp);
5000
0
            *bufp = NULL;
5001
0
        }
5002
0
    }
5003
0
    return error;
5004
0
}
5005
5006
/* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
5007
 * The caller must free '*bufp' when the reply is no longer needed ('reply'
5008
 * will contain pointers into '*bufp').  */
5009
static int
5010
dpif_netlink_dp_get(const struct dpif *dpif_, struct dpif_netlink_dp *reply,
5011
                    struct ofpbuf **bufp)
5012
0
{
5013
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
5014
0
    struct dpif_netlink_dp request;
5015
5016
0
    dpif_netlink_dp_init(&request);
5017
0
    request.cmd = OVS_DP_CMD_GET;
5018
0
    request.dp_ifindex = dpif->dp_ifindex;
5019
5020
0
    return dpif_netlink_dp_transact(&request, reply, bufp);
5021
0
}
5022
5023
/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
5024
 * by Netlink attributes, into 'flow'.  Returns 0 if successful, otherwise a
5025
 * positive errno value.
5026
 *
5027
 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
5028
 * while 'flow' is still in use. */
5029
static int
5030
dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *flow,
5031
                              const struct ofpbuf *buf)
5032
0
{
5033
0
    static const struct nl_policy ovs_flow_policy[__OVS_FLOW_ATTR_MAX] = {
5034
0
        [OVS_FLOW_ATTR_KEY] = { .type = NL_A_NESTED, .optional = true },
5035
0
        [OVS_FLOW_ATTR_MASK] = { .type = NL_A_NESTED, .optional = true },
5036
0
        [OVS_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
5037
0
        [OVS_FLOW_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
5038
0
                                  .optional = true },
5039
0
        [OVS_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
5040
0
        [OVS_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
5041
0
        [OVS_FLOW_ATTR_UFID] = { .type = NL_A_U128, .optional = true },
5042
        /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
5043
        /* The kernel never uses OVS_FLOW_ATTR_PROBE. */
5044
        /* The kernel never uses OVS_FLOW_ATTR_UFID_FLAGS. */
5045
0
    };
5046
5047
0
    dpif_netlink_flow_init(flow);
5048
5049
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
5050
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
5051
0
    struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
5052
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
5053
5054
0
    struct nlattr *a[ARRAY_SIZE(ovs_flow_policy)];
5055
0
    if (!nlmsg || !genl || !ovs_header
5056
0
        || nlmsg->nlmsg_type != ovs_flow_family
5057
0
        || !nl_policy_parse(&b, 0, ovs_flow_policy, a,
5058
0
                            ARRAY_SIZE(ovs_flow_policy))) {
5059
0
        return EINVAL;
5060
0
    }
5061
0
    if (!a[OVS_FLOW_ATTR_KEY] && !a[OVS_FLOW_ATTR_UFID]) {
5062
0
        return EINVAL;
5063
0
    }
5064
5065
0
    flow->nlmsg_flags = nlmsg->nlmsg_flags;
5066
0
    flow->dp_ifindex = ovs_header->dp_ifindex;
5067
0
    if (a[OVS_FLOW_ATTR_KEY]) {
5068
0
        flow->key = nl_attr_get(a[OVS_FLOW_ATTR_KEY]);
5069
0
        flow->key_len = nl_attr_get_size(a[OVS_FLOW_ATTR_KEY]);
5070
0
    }
5071
5072
0
    if (a[OVS_FLOW_ATTR_UFID]) {
5073
0
        flow->ufid = nl_attr_get_u128(a[OVS_FLOW_ATTR_UFID]);
5074
0
        flow->ufid_present = true;
5075
0
    }
5076
0
    if (a[OVS_FLOW_ATTR_MASK]) {
5077
0
        flow->mask = nl_attr_get(a[OVS_FLOW_ATTR_MASK]);
5078
0
        flow->mask_len = nl_attr_get_size(a[OVS_FLOW_ATTR_MASK]);
5079
0
    }
5080
0
    if (a[OVS_FLOW_ATTR_ACTIONS]) {
5081
0
        flow->actions = nl_attr_get(a[OVS_FLOW_ATTR_ACTIONS]);
5082
0
        flow->actions_len = nl_attr_get_size(a[OVS_FLOW_ATTR_ACTIONS]);
5083
0
    }
5084
0
    if (a[OVS_FLOW_ATTR_STATS]) {
5085
0
        flow->stats = nl_attr_get(a[OVS_FLOW_ATTR_STATS]);
5086
0
    }
5087
0
    if (a[OVS_FLOW_ATTR_TCP_FLAGS]) {
5088
0
        flow->tcp_flags = nl_attr_get(a[OVS_FLOW_ATTR_TCP_FLAGS]);
5089
0
    }
5090
0
    if (a[OVS_FLOW_ATTR_USED]) {
5091
0
        flow->used = nl_attr_get(a[OVS_FLOW_ATTR_USED]);
5092
0
    }
5093
0
    return 0;
5094
0
}
5095
5096
5097
/*
5098
 * If PACKET_TYPE attribute is present in 'data', it filters PACKET_TYPE out.
5099
 * If the flow is not Ethernet, the OVS_KEY_ATTR_PACKET_TYPE is converted to
5100
 * OVS_KEY_ATTR_ETHERTYPE. Puts 'data' to 'buf'.
5101
 */
5102
static void
5103
put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
5104
                        const struct nlattr *data, uint16_t data_len)
5105
0
{
5106
0
    const struct nlattr *packet_type;
5107
5108
0
    packet_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_PACKET_TYPE);
5109
5110
0
    if (packet_type) {
5111
        /* exclude PACKET_TYPE Netlink attribute. */
5112
0
        ovs_assert(NLA_ALIGN(packet_type->nla_len) == NL_A_U32_SIZE);
5113
0
        size_t packet_type_len = NL_A_U32_SIZE;
5114
0
        size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
5115
0
        size_t second_chunk_size = data_len - first_chunk_size
5116
0
                                   - packet_type_len;
5117
0
        struct nlattr *next_attr = nl_attr_next(packet_type);
5118
0
        size_t ofs;
5119
5120
0
        ofs = nl_msg_start_nested(buf, type);
5121
0
        nl_msg_put(buf, data, first_chunk_size);
5122
0
        nl_msg_put(buf, next_attr, second_chunk_size);
5123
0
        if (!nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERNET)) {
5124
0
            ovs_be16 pt = pt_ns_type_be(nl_attr_get_be32(packet_type));
5125
0
            const struct nlattr *nla;
5126
5127
0
            nla = nl_attr_find(buf, ofs + NLA_HDRLEN, OVS_KEY_ATTR_ETHERTYPE);
5128
0
            if (nla) {
5129
0
                ovs_be16 *ethertype;
5130
5131
0
                ethertype = CONST_CAST(ovs_be16 *, nl_attr_get(nla));
5132
0
                *ethertype = pt;
5133
0
            } else {
5134
0
                nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, pt);
5135
0
            }
5136
0
        }
5137
0
        nl_msg_end_nested(buf, ofs);
5138
0
    } else {
5139
0
        nl_msg_put_unspec(buf, type, data, data_len);
5140
0
    }
5141
0
}
5142
5143
/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
5144
 * followed by Netlink attributes corresponding to 'flow'. */
5145
static void
5146
dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
5147
                            struct ofpbuf *buf)
5148
0
{
5149
0
    struct ovs_header *ovs_header;
5150
5151
0
    nl_msg_put_genlmsghdr(buf, 0, ovs_flow_family,
5152
0
                          NLM_F_REQUEST | flow->nlmsg_flags,
5153
0
                          flow->cmd, OVS_FLOW_VERSION);
5154
5155
0
    ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
5156
0
    ovs_header->dp_ifindex = flow->dp_ifindex;
5157
5158
0
    if (flow->ufid_present) {
5159
0
        nl_msg_put_u128(buf, OVS_FLOW_ATTR_UFID, flow->ufid);
5160
0
    }
5161
0
    if (flow->ufid_terse) {
5162
0
        nl_msg_put_u32(buf, OVS_FLOW_ATTR_UFID_FLAGS,
5163
0
                       OVS_UFID_F_OMIT_KEY | OVS_UFID_F_OMIT_MASK
5164
0
                       | OVS_UFID_F_OMIT_ACTIONS);
5165
0
    }
5166
0
    if (!flow->ufid_terse || !flow->ufid_present) {
5167
0
        if (flow->key_len) {
5168
0
            put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
5169
0
                                           flow->key_len);
5170
0
        }
5171
0
        if (flow->mask_len) {
5172
0
            put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
5173
0
                                           flow->mask_len);
5174
0
        }
5175
0
        if (flow->actions || flow->actions_len) {
5176
0
            nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS,
5177
0
                              flow->actions, flow->actions_len);
5178
0
        }
5179
0
    }
5180
5181
    /* We never need to send these to the kernel. */
5182
0
    ovs_assert(!flow->stats);
5183
0
    ovs_assert(!flow->tcp_flags);
5184
0
    ovs_assert(!flow->used);
5185
5186
0
    if (flow->clear) {
5187
0
        nl_msg_put_flag(buf, OVS_FLOW_ATTR_CLEAR);
5188
0
    }
5189
0
    if (flow->probe) {
5190
0
        nl_msg_put_flag(buf, OVS_FLOW_ATTR_PROBE);
5191
0
    }
5192
0
}
5193
5194
/* Clears 'flow' to "empty" values. */
5195
static void
5196
dpif_netlink_flow_init(struct dpif_netlink_flow *flow)
5197
0
{
5198
0
    memset(flow, 0, sizeof *flow);
5199
0
}
5200
5201
/* Executes 'request' in the kernel datapath.  If the command fails, returns a
5202
 * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
5203
 * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
5204
 * result of the command is expected to be a flow also, which is decoded and
5205
 * stored in '*reply' and '*bufp'.  The caller must free '*bufp' when the reply
5206
 * is no longer needed ('reply' will contain pointers into '*bufp'). */
5207
static int
5208
dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
5209
                           struct dpif_netlink_flow *reply,
5210
                           struct ofpbuf **bufp)
5211
0
{
5212
0
    struct ofpbuf *request_buf;
5213
0
    int error;
5214
5215
0
    ovs_assert((reply != NULL) == (bufp != NULL));
5216
5217
0
    if (reply) {
5218
0
        request->nlmsg_flags |= NLM_F_ECHO;
5219
0
    }
5220
5221
0
    request_buf = ofpbuf_new(1024);
5222
0
    dpif_netlink_flow_to_ofpbuf(request, request_buf);
5223
0
    error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
5224
0
    ofpbuf_delete(request_buf);
5225
5226
0
    if (reply) {
5227
0
        if (!error) {
5228
0
            error = dpif_netlink_flow_from_ofpbuf(reply, *bufp);
5229
0
        }
5230
0
        if (error) {
5231
0
            dpif_netlink_flow_init(reply);
5232
0
            ofpbuf_delete(*bufp);
5233
0
            *bufp = NULL;
5234
0
        }
5235
0
    }
5236
0
    return error;
5237
0
}
5238
5239
static void
5240
dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *flow,
5241
                            struct dpif_flow_stats *stats)
5242
0
{
5243
0
    if (flow->stats) {
5244
0
        stats->n_packets = get_32aligned_u64(&flow->stats->n_packets);
5245
0
        stats->n_bytes = get_32aligned_u64(&flow->stats->n_bytes);
5246
0
    } else {
5247
0
        stats->n_packets = 0;
5248
0
        stats->n_bytes = 0;
5249
0
    }
5250
0
    stats->used = flow->used ? get_32aligned_u64(flow->used) : 0;
5251
0
    stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;
5252
0
}
5253
5254
/* Logs information about a packet that was recently lost in 'ch' (in
5255
 * 'dpif_'). */
5256
static void
5257
report_loss(struct dpif_netlink *dpif, struct dpif_channel *ch, uint32_t ch_idx,
5258
            uint32_t handler_id)
5259
0
{
5260
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
5261
0
    struct ds s;
5262
5263
0
    if (VLOG_DROP_WARN(&rl)) {
5264
0
        return;
5265
0
    }
5266
5267
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
5268
0
        VLOG_WARN("%s: lost packet on handler %u",
5269
0
                  dpif_name(&dpif->dpif), handler_id);
5270
0
    } else {
5271
0
        ds_init(&s);
5272
0
        if (ch->last_poll != LLONG_MIN) {
5273
0
            ds_put_format(&s, " (last polled %lld ms ago)",
5274
0
                        time_msec() - ch->last_poll);
5275
0
        }
5276
5277
0
        VLOG_WARN("%s: lost packet on port channel %u of handler %u%s",
5278
0
                  dpif_name(&dpif->dpif), ch_idx, handler_id, ds_cstr(&s));
5279
0
        ds_destroy(&s);
5280
0
    }
5281
0
}
5282
5283
static void
5284
dpif_netlink_unixctl_dispatch_mode(struct unixctl_conn *conn,
5285
                                   int argc OVS_UNUSED,
5286
                                   const char *argv[] OVS_UNUSED,
5287
                                   void *aux OVS_UNUSED)
5288
0
{
5289
0
    struct ds reply = DS_EMPTY_INITIALIZER;
5290
0
    struct nl_dump dump;
5291
0
    uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
5292
0
    struct ofpbuf msg, buf;
5293
0
    int error;
5294
5295
0
    error = dpif_netlink_init();
5296
0
    if (error) {
5297
0
        return;
5298
0
    }
5299
5300
0
    ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
5301
0
    dpif_netlink_dp_dump_start(&dump);
5302
0
    while (nl_dump_next(&dump, &msg, &buf)) {
5303
0
        struct dpif_netlink_dp dp;
5304
0
        if (!dpif_netlink_dp_from_ofpbuf(&dp, &msg)) {
5305
0
            ds_put_format(&reply, "%s: ", dp.name);
5306
0
            if (dp.user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) {
5307
0
                ds_put_format(&reply, "per-cpu dispatch mode");
5308
0
            } else {
5309
0
                ds_put_format(&reply, "per-vport dispatch mode");
5310
0
            }
5311
0
            ds_put_format(&reply, "\n");
5312
0
        }
5313
0
    }
5314
0
    ofpbuf_uninit(&buf);
5315
0
    error = nl_dump_done(&dump);
5316
0
    if (!error) {
5317
0
        unixctl_command_reply(conn, ds_cstr(&reply));
5318
0
    }
5319
5320
0
    ds_destroy(&reply);
5321
0
}