Coverage Report

Created: 2026-06-10 06:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openvswitch/lib/dpif-netlink.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2008-2018 Nicira, Inc.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at:
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
#include <config.h>
18
19
#include "dpif-netlink.h"
20
21
#include <ctype.h>
22
#include <errno.h>
23
#include <fcntl.h>
24
#include <inttypes.h>
25
#include <net/if.h>
26
#include <linux/types.h>
27
#include <linux/pkt_sched.h>
28
#include <poll.h>
29
#include <stdlib.h>
30
#include <strings.h>
31
#include <sys/epoll.h>
32
#include <sys/stat.h>
33
#include <unistd.h>
34
35
#include "bitmap.h"
36
#include "dpif-netlink-rtnl.h"
37
#include "dpif-offload.h"
38
#include "dpif-provider.h"
39
#include "fat-rwlock.h"
40
#include "flow.h"
41
#include "netdev-linux.h"
42
#include "netdev-provider.h"
43
#include "netdev-vport.h"
44
#include "netdev.h"
45
#include "netlink-conntrack.h"
46
#include "netlink-notifier.h"
47
#include "netlink-socket.h"
48
#include "netlink.h"
49
#include "netnsid.h"
50
#include "odp-util.h"
51
#include "openvswitch/dynamic-string.h"
52
#include "openvswitch/flow.h"
53
#include "openvswitch/hmap.h"
54
#include "openvswitch/match.h"
55
#include "openvswitch/ofpbuf.h"
56
#include "openvswitch/poll-loop.h"
57
#include "openvswitch/shash.h"
58
#include "openvswitch/thread.h"
59
#include "openvswitch/usdt-probes.h"
60
#include "openvswitch/vlog.h"
61
#include "packets.h"
62
#include "random.h"
63
#include "sset.h"
64
#include "timeval.h"
65
#include "unaligned.h"
66
#include "util.h"
67
68
VLOG_DEFINE_THIS_MODULE(dpif_netlink);
69
70
enum { MAX_PORTS = USHRT_MAX };
71
72
/* This ethtool flag was introduced in Linux 2.6.24, so it might be
73
 * missing if we have old headers. */
74
0
#define ETH_FLAG_LRO      (1 << 15)    /* LRO is enabled */
75
76
#define OPERATE_MAX_OPS 50
77
78
#ifndef EPOLLEXCLUSIVE
79
#define EPOLLEXCLUSIVE (1u << 28)
80
#endif
81
82
0
#define OVS_DP_F_UNSUPPORTED (1u << 31);
83
84
/* This PID is not used by the kernel datapath when using dispatch per CPU,
85
 * but it is required to be set (not zero). */
86
0
#define DPIF_NETLINK_PER_CPU_PID UINT32_MAX
87
struct dpif_netlink_dp {
88
    /* Generic Netlink header. */
89
    uint8_t cmd;
90
91
    /* struct ovs_header. */
92
    int dp_ifindex;
93
94
    /* Attributes. */
95
    const char *name;                  /* OVS_DP_ATTR_NAME. */
96
    const uint32_t *upcall_pid;        /* OVS_DP_ATTR_UPCALL_PID. */
97
    uint32_t user_features;            /* OVS_DP_ATTR_USER_FEATURES */
98
    uint32_t cache_size;               /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
99
    const struct ovs_dp_stats *stats;  /* OVS_DP_ATTR_STATS. */
100
    const struct ovs_dp_megaflow_stats *megaflow_stats;
101
                                       /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
102
    const uint32_t *upcall_pids;       /* OVS_DP_ATTR_PER_CPU_PIDS */
103
    uint32_t n_upcall_pids;
104
};
105
106
static void dpif_netlink_dp_init(struct dpif_netlink_dp *);
107
static int dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *,
108
                                       const struct ofpbuf *);
109
static void dpif_netlink_dp_dump_start(struct nl_dump *);
110
static int dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
111
                                    struct dpif_netlink_dp *reply,
112
                                    struct ofpbuf **bufp);
113
static int dpif_netlink_dp_get(const struct dpif *,
114
                               struct dpif_netlink_dp *reply,
115
                               struct ofpbuf **bufp);
116
static int
117
dpif_netlink_set_features(struct dpif *dpif_, uint32_t new_features);
118
static uint32_t
119
dpif_netlink_get_features(struct dpif *dpif_);
120
121
static void
122
dpif_netlink_unixctl_dispatch_mode(struct unixctl_conn *conn, int argc,
123
                                   const char *argv[], void *aux);
124
125
struct dpif_netlink_flow {
126
    /* Generic Netlink header. */
127
    uint8_t cmd;
128
129
    /* struct ovs_header. */
130
    unsigned int nlmsg_flags;
131
    int dp_ifindex;
132
133
    /* Attributes.
134
     *
135
     * The 'stats' member points to 64-bit data that might only be aligned on
136
     * 32-bit boundaries, so get_unaligned_u64() should be used to access its
137
     * values.
138
     *
139
     * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
140
     * the Netlink version of the command, even if actions_len is zero. */
141
    const struct nlattr *key;           /* OVS_FLOW_ATTR_KEY. */
142
    size_t key_len;
143
    const struct nlattr *mask;          /* OVS_FLOW_ATTR_MASK. */
144
    size_t mask_len;
145
    const struct nlattr *actions;       /* OVS_FLOW_ATTR_ACTIONS. */
146
    size_t actions_len;
147
    ovs_u128 ufid;                      /* OVS_FLOW_ATTR_FLOW_ID. */
148
    bool ufid_present;                  /* Is there a UFID? */
149
    bool ufid_terse;                    /* Skip serializing key/mask/acts? */
150
    const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */
151
    const uint8_t *tcp_flags;           /* OVS_FLOW_ATTR_TCP_FLAGS. */
152
    const ovs_32aligned_u64 *used;      /* OVS_FLOW_ATTR_USED. */
153
    bool clear;                         /* OVS_FLOW_ATTR_CLEAR. */
154
    bool probe;                         /* OVS_FLOW_ATTR_PROBE. */
155
};
156
157
static void dpif_netlink_flow_init(struct dpif_netlink_flow *);
158
static int dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *,
159
                                         const struct ofpbuf *);
160
static void dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *,
161
                                        struct ofpbuf *);
162
static int dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
163
                                      struct dpif_netlink_flow *reply,
164
                                      struct ofpbuf **bufp);
165
static void dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *,
166
                                        struct dpif_flow_stats *);
167
static void dpif_netlink_flow_to_dpif_flow(struct dpif_flow *,
168
                                           const struct dpif_netlink_flow *);
169
170
/* One of the dpif channels between the kernel and userspace. */
171
struct dpif_channel {
172
    struct nl_sock *sock;       /* Netlink socket. */
173
    long long int last_poll;    /* Last time this channel was polled. */
174
};
175
176
struct dpif_handler {
177
    /* per-vport dispatch mode. */
178
    struct epoll_event *epoll_events;
179
    int epoll_fd;                 /* epoll fd that includes channel socks. */
180
    int n_events;                 /* Num events returned by epoll_wait(). */
181
    int event_offset;             /* Offset into 'epoll_events'. */
182
183
    /* per-cpu dispatch mode. */
184
    struct nl_sock *sock;         /* Each handler thread holds one netlink
185
                                     socket. */
186
};
187
188
/* Datapath interface for the openvswitch Linux kernel module. */
189
struct dpif_netlink {
190
    struct dpif dpif;
191
    int dp_ifindex;
192
    uint32_t user_features;
193
194
    /* Upcall messages. */
195
    struct fat_rwlock upcall_lock;
196
    struct dpif_handler *handlers;
197
    uint32_t n_handlers;           /* Num of upcall handlers. */
198
199
    /* Per-vport dispatch mode. */
200
    struct dpif_channel *channels; /* Array of channels for each port. */
201
    int uc_array_size;             /* Size of 'handler->channels' and */
202
                                   /* 'handler->epoll_events'. */
203
204
    /* Change notification. */
205
    struct nl_sock *port_notifier; /* vport multicast group subscriber. */
206
    bool refresh_channels;
207
};
208
209
static void report_loss(struct dpif_netlink *, struct dpif_channel *,
210
                        uint32_t ch_idx, uint32_t handler_id);
211
212
static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
213
214
/* Generic Netlink family numbers for OVS.
215
 *
216
 * Initialized by dpif_netlink_init(). */
217
static int ovs_datapath_family;
218
static int ovs_vport_family;
219
static int ovs_flow_family;
220
static int ovs_packet_family;
221
static int ovs_meter_family;
222
static int ovs_ct_limit_family;
223
224
/* Generic Netlink multicast groups for OVS.
225
 *
226
 * Initialized by dpif_netlink_init(). */
227
static unsigned int ovs_vport_mcgroup;
228
229
/* If true, tunnel devices are created using OVS compat/genetlink.
230
 * If false, tunnel devices are created with rtnetlink and using light weight
231
 * tunnels. If we fail to create the tunnel the rtnetlink+LWT, then we fallback
232
 * to using the compat interface. */
233
static bool ovs_tunnels_out_of_tree = true;
234
235
static int dpif_netlink_init(void);
236
static int open_dpif(const struct dpif_netlink_dp *, struct dpif **);
237
static uint32_t dpif_netlink_port_get_pid(const struct dpif *,
238
                                          odp_port_t port_no);
239
static void dpif_netlink_handler_uninit(struct dpif_handler *handler);
240
static int dpif_netlink_refresh_handlers_vport_dispatch(struct dpif_netlink *,
241
                                                        uint32_t n_handlers);
242
static void destroy_all_channels(struct dpif_netlink *);
243
static int dpif_netlink_refresh_handlers_cpu_dispatch(struct dpif_netlink *);
244
static void destroy_all_handlers(struct dpif_netlink *);
245
246
static void dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *,
247
                                         struct ofpbuf *);
248
static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *,
249
                                          const struct ofpbuf *);
250
static int dpif_netlink_port_query__(const struct dpif_netlink *dpif,
251
                                     odp_port_t port_no, const char *port_name,
252
                                     struct dpif_port *dpif_port);
253
static void vport_del_channels(struct dpif_netlink *, odp_port_t);
254
255
static int
256
create_nl_sock(struct dpif_netlink *dpif OVS_UNUSED, struct nl_sock **sockp)
257
    OVS_REQ_WRLOCK(dpif->upcall_lock)
258
0
{
259
0
    return nl_sock_create(NETLINK_GENERIC, sockp);
260
0
}
261
262
static void
263
close_nl_sock(struct nl_sock *sock)
264
0
{
265
0
    nl_sock_destroy(sock);
266
0
}
267
268
static struct dpif_netlink *
269
dpif_netlink_cast(const struct dpif *dpif)
270
0
{
271
0
    dpif_assert_class(dpif, &dpif_netlink_class);
272
0
    return CONTAINER_OF(dpif, struct dpif_netlink, dpif);
273
0
}
274
275
static inline bool
276
0
dpif_netlink_upcall_per_cpu(const struct dpif_netlink *dpif) {
277
0
    return !!((dpif)->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU);
278
0
}
279
280
static int
281
dpif_netlink_enumerate(struct sset *all_dps,
282
                       const struct dpif_class *dpif_class OVS_UNUSED)
283
0
{
284
0
    struct nl_dump dump;
285
0
    uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
286
0
    struct ofpbuf msg, buf;
287
0
    int error;
288
289
0
    error = dpif_netlink_init();
290
0
    if (error) {
291
0
        return error;
292
0
    }
293
294
0
    ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
295
0
    dpif_netlink_dp_dump_start(&dump);
296
0
    while (nl_dump_next(&dump, &msg, &buf)) {
297
0
        struct dpif_netlink_dp dp;
298
299
0
        if (!dpif_netlink_dp_from_ofpbuf(&dp, &msg)) {
300
0
            sset_add(all_dps, dp.name);
301
0
        }
302
0
    }
303
0
    ofpbuf_uninit(&buf);
304
0
    return nl_dump_done(&dump);
305
0
}
306
307
static int
308
dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
309
                  bool create, struct dpif **dpifp)
310
0
{
311
0
    struct dpif_netlink_dp dp_request, dp;
312
0
    struct ofpbuf *buf;
313
0
    uint32_t upcall_pid;
314
0
    int error;
315
316
0
    error = dpif_netlink_init();
317
0
    if (error) {
318
0
        return error;
319
0
    }
320
321
    /* Create or look up datapath. */
322
0
    dpif_netlink_dp_init(&dp_request);
323
0
    upcall_pid = 0;
324
0
    dp_request.upcall_pid = &upcall_pid;
325
0
    dp_request.name = name;
326
327
0
    if (create) {
328
0
        dp_request.cmd = OVS_DP_CMD_NEW;
329
0
    } else {
330
0
        dp_request.cmd = OVS_DP_CMD_GET;
331
332
0
        error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
333
0
        if (error) {
334
0
            return error;
335
0
        }
336
0
        dp_request.user_features = dp.user_features;
337
0
        ofpbuf_delete(buf);
338
339
        /* Use OVS_DP_CMD_SET to report user features */
340
0
        dp_request.cmd = OVS_DP_CMD_SET;
341
0
    }
342
343
    /* Some older kernels will not reject unknown features. This will cause
344
     * 'ovs-vswitchd' to incorrectly assume a feature is supported. In order to
345
     * test for that, we attempt to set a feature that we know is not supported
346
     * by any kernel. If this feature is not rejected, we can assume we are
347
     * running on one of these older kernels.
348
     */
349
0
    dp_request.user_features |= OVS_DP_F_UNALIGNED;
350
0
    dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
351
0
    dp_request.user_features |= OVS_DP_F_UNSUPPORTED;
352
0
    error = dpif_netlink_dp_transact(&dp_request, NULL, NULL);
353
0
    if (error) {
354
        /* The Open vSwitch kernel module has two modes for dispatching
355
         * upcalls: per-vport and per-cpu.
356
         *
357
         * When dispatching upcalls per-vport, the kernel will
358
         * send the upcall via a Netlink socket that has been selected based on
359
         * the vport that received the packet that is causing the upcall.
360
         *
361
         * When dispatching upcall per-cpu, the kernel will send the upcall via
362
         * a Netlink socket that has been selected based on the cpu that
363
         * received the packet that is causing the upcall.
364
         *
365
         * First we test to see if the kernel module supports per-cpu
366
         * dispatching (the preferred method). If it does not support per-cpu
367
         * dispatching, we fall back to the per-vport dispatch mode.
368
         */
369
0
        dp_request.user_features &= ~OVS_DP_F_UNSUPPORTED;
370
0
        dp_request.user_features &= ~OVS_DP_F_VPORT_PIDS;
371
0
        dp_request.user_features |= OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
372
0
        error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
373
0
        if (error == EOPNOTSUPP) {
374
0
            dp_request.user_features &= ~OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
375
0
            dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
376
0
            error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
377
0
        }
378
0
        if (error) {
379
0
            return error;
380
0
        }
381
382
0
        error = open_dpif(&dp, dpifp);
383
0
        dpif_netlink_set_features(*dpifp, OVS_DP_F_TC_RECIRC_SHARING);
384
0
    } else {
385
0
        VLOG_INFO("Kernel does not correctly support feature negotiation. "
386
0
                  "Using standard features.");
387
0
        dp_request.cmd = OVS_DP_CMD_SET;
388
0
        dp_request.user_features = 0;
389
0
        dp_request.user_features |= OVS_DP_F_UNALIGNED;
390
0
        dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
391
0
        error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
392
0
        if (error) {
393
0
            return error;
394
0
        }
395
0
        error = open_dpif(&dp, dpifp);
396
0
    }
397
398
0
    ofpbuf_delete(buf);
399
400
0
    if (create) {
401
0
        VLOG_INFO("Datapath dispatch mode: %s",
402
0
                  dpif_netlink_upcall_per_cpu(dpif_netlink_cast(*dpifp)) ?
403
0
                  "per-cpu" : "per-vport");
404
0
    }
405
406
0
    return error;
407
0
}
408
409
static int
410
open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
411
0
{
412
0
    struct dpif_netlink *dpif;
413
414
0
    dpif = xzalloc(sizeof *dpif);
415
0
    dpif->port_notifier = NULL;
416
0
    fat_rwlock_init(&dpif->upcall_lock);
417
418
0
    dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name,
419
0
              dp->dp_ifindex, dp->dp_ifindex);
420
421
0
    dpif->dp_ifindex = dp->dp_ifindex;
422
0
    dpif->user_features = dp->user_features;
423
0
    *dpifp = &dpif->dpif;
424
425
0
    return 0;
426
0
}
427
428
/* Given the port number 'port_idx', extracts the pid of netlink socket
429
 * associated to the port and assigns it to 'upcall_pid'. */
430
static bool
431
vport_get_pid(struct dpif_netlink *dpif, uint32_t port_idx,
432
              uint32_t *upcall_pid)
433
0
{
434
    /* Since the nl_sock can only be assigned in either all
435
     * or none "dpif" channels, the following check
436
     * would suffice. */
437
0
    if (!dpif->channels[port_idx].sock) {
438
0
        return false;
439
0
    }
440
441
0
    *upcall_pid = nl_sock_pid(dpif->channels[port_idx].sock);
442
443
0
    return true;
444
0
}
445
446
static int
447
vport_add_channel(struct dpif_netlink *dpif, odp_port_t port_no,
448
                  struct nl_sock *sock)
449
0
{
450
0
    struct epoll_event event;
451
0
    uint32_t port_idx = odp_to_u32(port_no);
452
0
    size_t i;
453
0
    int error;
454
455
0
    if (dpif->handlers == NULL) {
456
0
        close_nl_sock(sock);
457
0
        return 0;
458
0
    }
459
460
    /* We assume that the datapath densely chooses port numbers, which can
461
     * therefore be used as an index into 'channels' and 'epoll_events' of
462
     * 'dpif'. */
463
0
    if (port_idx >= dpif->uc_array_size) {
464
0
        uint32_t new_size = port_idx + 1;
465
466
0
        if (new_size > MAX_PORTS) {
467
0
            VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
468
0
                         dpif_name(&dpif->dpif), port_no);
469
0
            return EFBIG;
470
0
        }
471
472
0
        dpif->channels = xrealloc(dpif->channels,
473
0
                                  new_size * sizeof *dpif->channels);
474
475
0
        for (i = dpif->uc_array_size; i < new_size; i++) {
476
0
            dpif->channels[i].sock = NULL;
477
0
        }
478
479
0
        for (i = 0; i < dpif->n_handlers; i++) {
480
0
            struct dpif_handler *handler = &dpif->handlers[i];
481
482
0
            handler->epoll_events = xrealloc(handler->epoll_events,
483
0
                new_size * sizeof *handler->epoll_events);
484
485
0
        }
486
0
        dpif->uc_array_size = new_size;
487
0
    }
488
489
0
    vport_del_channels(dpif, port_no);
490
491
0
    memset(&event, 0, sizeof event);
492
0
    event.events = EPOLLIN | EPOLLEXCLUSIVE;
493
0
    event.data.u32 = port_idx;
494
495
0
    for (i = 0; i < dpif->n_handlers; i++) {
496
0
        struct dpif_handler *handler = &dpif->handlers[i];
497
498
0
        if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(sock),
499
0
                      &event) < 0) {
500
0
            error = errno;
501
0
            goto error;
502
0
        }
503
0
    }
504
0
    dpif->channels[port_idx].sock = sock;
505
0
    dpif->channels[port_idx].last_poll = LLONG_MIN;
506
507
0
    return 0;
508
509
0
error:
510
0
    while (i--) {
511
0
        epoll_ctl(dpif->handlers[i].epoll_fd, EPOLL_CTL_DEL,
512
0
                  nl_sock_fd(sock), NULL);
513
0
    }
514
0
    dpif->channels[port_idx].sock = NULL;
515
516
0
    return error;
517
0
}
518
519
static void
520
vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
521
0
{
522
0
    uint32_t port_idx = odp_to_u32(port_no);
523
0
    size_t i;
524
525
0
    if (!dpif->handlers || port_idx >= dpif->uc_array_size
526
0
        || !dpif->channels[port_idx].sock) {
527
0
        return;
528
0
    }
529
530
0
    for (i = 0; i < dpif->n_handlers; i++) {
531
0
        struct dpif_handler *handler = &dpif->handlers[i];
532
0
        epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
533
0
                  nl_sock_fd(dpif->channels[port_idx].sock), NULL);
534
0
        handler->event_offset = handler->n_events = 0;
535
0
    }
536
0
    nl_sock_destroy(dpif->channels[port_idx].sock);
537
0
    dpif->channels[port_idx].sock = NULL;
538
0
}
539
540
static void
541
destroy_all_channels(struct dpif_netlink *dpif)
542
    OVS_REQ_WRLOCK(dpif->upcall_lock)
543
0
{
544
0
    unsigned int i;
545
546
0
    if (!dpif->handlers) {
547
0
        return;
548
0
    }
549
550
0
    for (i = 0; i < dpif->uc_array_size; i++ ) {
551
0
        struct dpif_netlink_vport vport_request;
552
0
        uint32_t upcall_pids = 0;
553
554
0
        if (!dpif->channels[i].sock) {
555
0
            continue;
556
0
        }
557
558
        /* Turn off upcalls. */
559
0
        dpif_netlink_vport_init(&vport_request);
560
0
        vport_request.cmd = OVS_VPORT_CMD_SET;
561
0
        vport_request.dp_ifindex = dpif->dp_ifindex;
562
0
        vport_request.port_no = u32_to_odp(i);
563
0
        vport_request.n_upcall_pids = 1;
564
0
        vport_request.upcall_pids = &upcall_pids;
565
0
        dpif_netlink_vport_transact(&vport_request, NULL, NULL);
566
567
0
        vport_del_channels(dpif, u32_to_odp(i));
568
0
    }
569
570
0
    for (i = 0; i < dpif->n_handlers; i++) {
571
0
        struct dpif_handler *handler = &dpif->handlers[i];
572
573
0
        dpif_netlink_handler_uninit(handler);
574
0
        free(handler->epoll_events);
575
0
    }
576
0
    free(dpif->channels);
577
0
    free(dpif->handlers);
578
0
    dpif->handlers = NULL;
579
0
    dpif->channels = NULL;
580
0
    dpif->n_handlers = 0;
581
0
    dpif->uc_array_size = 0;
582
0
}
583
584
static void
585
destroy_all_handlers(struct dpif_netlink *dpif)
586
    OVS_REQ_WRLOCK(dpif->upcall_lock)
587
0
{
588
0
    int i = 0;
589
590
0
    if (!dpif->handlers) {
591
0
        return;
592
0
    }
593
0
    for (i = 0; i < dpif->n_handlers; i++) {
594
0
        struct dpif_handler *handler = &dpif->handlers[i];
595
0
        close_nl_sock(handler->sock);
596
0
    }
597
0
    free(dpif->handlers);
598
0
    dpif->handlers = NULL;
599
0
    dpif->n_handlers = 0;
600
0
}
601
602
static void
603
dpif_netlink_close(struct dpif *dpif_)
604
0
{
605
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
606
607
0
    nl_sock_destroy(dpif->port_notifier);
608
609
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
610
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
611
0
        destroy_all_handlers(dpif);
612
0
    } else {
613
0
        destroy_all_channels(dpif);
614
0
    }
615
0
    fat_rwlock_unlock(&dpif->upcall_lock);
616
617
0
    fat_rwlock_destroy(&dpif->upcall_lock);
618
0
    free(dpif);
619
0
}
620
621
static int
622
dpif_netlink_destroy(struct dpif *dpif_)
623
0
{
624
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
625
0
    struct dpif_netlink_dp dp;
626
627
0
    dpif_netlink_dp_init(&dp);
628
0
    dp.cmd = OVS_DP_CMD_DEL;
629
0
    dp.dp_ifindex = dpif->dp_ifindex;
630
0
    return dpif_netlink_dp_transact(&dp, NULL, NULL);
631
0
}
632
633
static bool
634
dpif_netlink_run(struct dpif *dpif_)
635
0
{
636
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
637
638
0
    if (!dpif_netlink_upcall_per_cpu(dpif)) {
639
0
        if (dpif->refresh_channels) {
640
0
            dpif->refresh_channels = false;
641
0
            fat_rwlock_wrlock(&dpif->upcall_lock);
642
0
            dpif_netlink_refresh_handlers_vport_dispatch(dpif,
643
0
                                                         dpif->n_handlers);
644
0
            fat_rwlock_unlock(&dpif->upcall_lock);
645
0
        }
646
0
    }
647
0
    return false;
648
0
}
649
650
static int
651
dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats)
652
0
{
653
0
    struct dpif_netlink_dp dp;
654
0
    struct ofpbuf *buf;
655
0
    int error;
656
657
0
    error = dpif_netlink_dp_get(dpif_, &dp, &buf);
658
0
    if (!error) {
659
0
        memset(stats, 0, sizeof *stats);
660
661
0
        if (dp.stats) {
662
0
            stats->n_hit    = get_32aligned_u64(&dp.stats->n_hit);
663
0
            stats->n_missed = get_32aligned_u64(&dp.stats->n_missed);
664
0
            stats->n_lost   = get_32aligned_u64(&dp.stats->n_lost);
665
0
            stats->n_flows  = get_32aligned_u64(&dp.stats->n_flows);
666
0
        }
667
668
0
        if (dp.megaflow_stats) {
669
0
            stats->n_masks = dp.megaflow_stats->n_masks;
670
0
            stats->n_mask_hit = get_32aligned_u64(
671
0
                &dp.megaflow_stats->n_mask_hit);
672
0
            stats->n_cache_hit = get_32aligned_u64(
673
0
                &dp.megaflow_stats->n_cache_hit);
674
675
0
            if (!stats->n_cache_hit) {
676
                /* Old kernels don't use this field and always
677
                 * report zero instead.  Disable this stat. */
678
0
                stats->n_cache_hit = UINT64_MAX;
679
0
            }
680
0
        } else {
681
0
            stats->n_masks = UINT32_MAX;
682
0
            stats->n_mask_hit = UINT64_MAX;
683
0
            stats->n_cache_hit = UINT64_MAX;
684
0
        }
685
0
        ofpbuf_delete(buf);
686
0
    }
687
0
    return error;
688
0
}
689
690
static int
691
dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids,
692
                              uint32_t n_upcall_pids)
693
0
{
694
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
695
0
    int largest_cpu_id = ovs_numa_get_largest_core_id();
696
0
    struct dpif_netlink_dp request, reply;
697
0
    struct ofpbuf *bufp;
698
699
0
    uint32_t *corrected;
700
0
    int error, i, n_cores;
701
702
0
    if (largest_cpu_id == OVS_NUMA_UNSPEC) {
703
0
        largest_cpu_id = -1;
704
0
    }
705
706
    /* Some systems have non-continuous cpu core ids.  count_total_cores()
707
     * would return an accurate number, however, this number cannot be used.
708
     * e.g. If the largest core_id of a system is cpu9, but the system only
709
     * has 4 cpus then the OVS kernel module would throw a "CPU mismatch"
710
     * warning.  With the MAX() in place in this example we send an array of
711
     * size 10 and prevent the warning.  This has no bearing on the number of
712
     * threads created.
713
     */
714
0
    n_cores = MAX(count_total_cores(), largest_cpu_id + 1);
715
0
    VLOG_DBG("Dispatch mode(per-cpu): Setting up handler PIDs for %d cores",
716
0
             n_cores);
717
718
0
    dpif_netlink_dp_init(&request);
719
0
    request.cmd = OVS_DP_CMD_SET;
720
0
    request.name = dpif_->base_name;
721
0
    request.dp_ifindex = dpif->dp_ifindex;
722
0
    request.user_features = dpif->user_features |
723
0
                            OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
724
725
0
    corrected = xcalloc(n_cores, sizeof *corrected);
726
727
0
    for (i = 0; i < n_cores; i++) {
728
0
        corrected[i] = upcall_pids[i % n_upcall_pids];
729
0
    }
730
0
    request.upcall_pids = corrected;
731
0
    request.n_upcall_pids = n_cores;
732
733
0
    error = dpif_netlink_dp_transact(&request, &reply, &bufp);
734
0
    if (!error) {
735
0
        dpif->user_features = reply.user_features;
736
0
        ofpbuf_delete(bufp);
737
0
        if (!dpif_netlink_upcall_per_cpu(dpif)) {
738
0
            error = -EOPNOTSUPP;
739
0
        }
740
0
    }
741
0
    free(corrected);
742
0
    return error;
743
0
}
744
745
static int
746
dpif_netlink_set_features(struct dpif *dpif_, uint32_t new_features)
747
0
{
748
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
749
0
    struct dpif_netlink_dp request, reply;
750
0
    struct ofpbuf *bufp;
751
0
    int error;
752
753
0
    dpif_netlink_dp_init(&request);
754
0
    request.cmd = OVS_DP_CMD_SET;
755
0
    request.name = dpif_->base_name;
756
0
    request.dp_ifindex = dpif->dp_ifindex;
757
0
    request.user_features = dpif->user_features | new_features;
758
759
0
    error = dpif_netlink_dp_transact(&request, &reply, &bufp);
760
0
    if (!error) {
761
0
        dpif->user_features = reply.user_features;
762
0
        ofpbuf_delete(bufp);
763
0
        if (!(dpif->user_features & new_features)) {
764
0
            return -EOPNOTSUPP;
765
0
        }
766
0
    }
767
768
0
    return error;
769
0
}
770
771
static uint32_t
772
dpif_netlink_get_features(struct dpif *dpif_)
773
0
{
774
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
775
776
0
    return dpif->user_features;
777
0
}
778
779
static const char *
780
get_vport_type(const struct dpif_netlink_vport *vport)
781
0
{
782
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
783
784
0
    switch (vport->type) {
785
0
    case OVS_VPORT_TYPE_NETDEV: {
786
0
        const char *type = netdev_get_type_from_name(vport->name);
787
788
0
        return type ? type : "system";
789
0
    }
790
791
0
    case OVS_VPORT_TYPE_INTERNAL:
792
0
        return "internal";
793
794
0
    case OVS_VPORT_TYPE_GENEVE:
795
0
        return "geneve";
796
797
0
    case OVS_VPORT_TYPE_GRE:
798
0
        return "gre";
799
800
0
    case OVS_VPORT_TYPE_VXLAN:
801
0
        return "vxlan";
802
803
0
    case OVS_VPORT_TYPE_ERSPAN:
804
0
        return "erspan";
805
806
0
    case OVS_VPORT_TYPE_IP6ERSPAN:
807
0
        return "ip6erspan";
808
809
0
    case OVS_VPORT_TYPE_IP6GRE:
810
0
        return "ip6gre";
811
812
0
    case OVS_VPORT_TYPE_GTPU:
813
0
        return "gtpu";
814
815
0
    case OVS_VPORT_TYPE_SRV6:
816
0
        return "srv6";
817
818
0
    case OVS_VPORT_TYPE_BAREUDP:
819
0
        return "bareudp";
820
821
0
    case OVS_VPORT_TYPE_UNSPEC:
822
0
    case __OVS_VPORT_TYPE_MAX:
823
0
        break;
824
0
    }
825
826
0
    VLOG_WARN_RL(&rl, "dp%d: port `%s' has unsupported type %u",
827
0
                 vport->dp_ifindex, vport->name, (unsigned int) vport->type);
828
0
    return "unknown";
829
0
}
830
831
enum ovs_vport_type
832
netdev_to_ovs_vport_type(const char *type)
833
0
{
834
0
    if (!strcmp(type, "tap") || !strcmp(type, "system")) {
835
0
        return OVS_VPORT_TYPE_NETDEV;
836
0
    } else if (!strcmp(type, "internal")) {
837
0
        return OVS_VPORT_TYPE_INTERNAL;
838
0
    } else if (!strcmp(type, "geneve")) {
839
0
        return OVS_VPORT_TYPE_GENEVE;
840
0
    } else if (!strcmp(type, "vxlan")) {
841
0
        return OVS_VPORT_TYPE_VXLAN;
842
0
    } else if (!strcmp(type, "erspan")) {
843
0
        return OVS_VPORT_TYPE_ERSPAN;
844
0
    } else if (!strcmp(type, "ip6erspan")) {
845
0
        return OVS_VPORT_TYPE_IP6ERSPAN;
846
0
    } else if (!strcmp(type, "ip6gre")) {
847
0
        return OVS_VPORT_TYPE_IP6GRE;
848
0
    } else if (!strcmp(type, "gre")) {
849
0
        return OVS_VPORT_TYPE_GRE;
850
0
    } else if (!strcmp(type, "gtpu")) {
851
0
        return OVS_VPORT_TYPE_GTPU;
852
0
    } else if (!strcmp(type, "srv6")) {
853
0
        return OVS_VPORT_TYPE_SRV6;
854
0
    } else if (!strcmp(type, "bareudp")) {
855
0
        return OVS_VPORT_TYPE_BAREUDP;
856
0
    } else {
857
0
        return OVS_VPORT_TYPE_UNSPEC;
858
0
    }
859
0
}
860
861
static int
862
dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
863
                        enum ovs_vport_type type,
864
                        struct ofpbuf *options,
865
                        odp_port_t *port_nop)
866
    OVS_REQ_WRLOCK(dpif->upcall_lock)
867
0
{
868
0
    struct dpif_netlink_vport request, reply;
869
0
    struct ofpbuf *buf;
870
0
    struct nl_sock *sock = NULL;
871
0
    uint32_t upcall_pids = 0;
872
0
    int error = 0;
873
874
    /* per-cpu dispatch mode does not require a socket per vport. */
875
0
    if (!dpif_netlink_upcall_per_cpu(dpif)) {
876
0
        if (dpif->handlers) {
877
0
            error = create_nl_sock(dpif, &sock);
878
0
            if (error) {
879
0
                return error;
880
0
            }
881
0
        }
882
0
        if (sock) {
883
0
            upcall_pids = nl_sock_pid(sock);
884
0
        }
885
0
    }
886
887
0
    dpif_netlink_vport_init(&request);
888
0
    request.cmd = OVS_VPORT_CMD_NEW;
889
0
    request.dp_ifindex = dpif->dp_ifindex;
890
0
    request.type = type;
891
0
    request.name = name;
892
893
0
    request.port_no = *port_nop;
894
0
    request.n_upcall_pids = 1;
895
0
    request.upcall_pids = &upcall_pids;
896
897
0
    if (options) {
898
0
        request.options = options->data;
899
0
        request.options_len = options->size;
900
0
    }
901
902
0
    error = dpif_netlink_vport_transact(&request, &reply, &buf);
903
0
    if (!error) {
904
0
        *port_nop = reply.port_no;
905
0
    } else {
906
0
        if (error == EBUSY && *port_nop != ODPP_NONE) {
907
0
            VLOG_INFO("%s: requested port %"PRIu32" is in use",
908
0
                      dpif_name(&dpif->dpif), *port_nop);
909
0
        }
910
911
0
        close_nl_sock(sock);
912
0
        goto exit;
913
0
    }
914
915
0
    if (!dpif_netlink_upcall_per_cpu(dpif)) {
916
0
        error = vport_add_channel(dpif, *port_nop, sock);
917
0
        if (error) {
918
0
            VLOG_INFO("%s: could not add channel for port %s",
919
0
                        dpif_name(&dpif->dpif), name);
920
921
            /* Delete the port. */
922
0
            dpif_netlink_vport_init(&request);
923
0
            request.cmd = OVS_VPORT_CMD_DEL;
924
0
            request.dp_ifindex = dpif->dp_ifindex;
925
0
            request.port_no = *port_nop;
926
0
            dpif_netlink_vport_transact(&request, NULL, NULL);
927
0
            close_nl_sock(sock);
928
0
            goto exit;
929
0
        }
930
0
    }
931
932
0
exit:
933
0
    ofpbuf_delete(buf);
934
935
0
    return error;
936
0
}
937
938
static int
939
dpif_netlink_port_add_compat(struct dpif_netlink *dpif, struct netdev *netdev,
940
                             odp_port_t *port_nop)
941
    OVS_REQ_WRLOCK(dpif->upcall_lock)
942
0
{
943
0
    const struct netdev_tunnel_config *tnl_cfg;
944
0
    char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
945
0
    const char *type = netdev_get_type(netdev);
946
0
    uint64_t options_stub[64 / 8];
947
0
    enum ovs_vport_type ovs_type;
948
0
    struct ofpbuf options;
949
0
    const char *name;
950
951
0
    name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
952
953
0
    ovs_type = netdev_to_ovs_vport_type(netdev_get_type(netdev));
954
0
    if (ovs_type == OVS_VPORT_TYPE_UNSPEC) {
955
0
        VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has "
956
0
                     "unsupported type `%s'",
957
0
                     dpif_name(&dpif->dpif), name, type);
958
0
        return EINVAL;
959
0
    }
960
961
0
    if (ovs_type == OVS_VPORT_TYPE_NETDEV) {
962
0
        netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false);
963
0
    }
964
965
0
    tnl_cfg = netdev_get_tunnel_config(netdev);
966
0
    if (tnl_cfg && (tnl_cfg->dst_port != 0 || tnl_cfg->exts)) {
967
0
        ofpbuf_use_stack(&options, options_stub, sizeof options_stub);
968
0
        if (tnl_cfg->dst_port) {
969
0
            nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT,
970
0
                           ntohs(tnl_cfg->dst_port));
971
0
        }
972
0
        if (tnl_cfg->exts) {
973
0
            size_t ext_ofs;
974
0
            int i;
975
976
0
            ext_ofs = nl_msg_start_nested(&options, OVS_TUNNEL_ATTR_EXTENSION);
977
0
            for (i = 0; i < 32; i++) {
978
0
                if (tnl_cfg->exts & (UINT32_C(1) << i)) {
979
0
                    nl_msg_put_flag(&options, i);
980
0
                }
981
0
            }
982
0
            nl_msg_end_nested(&options, ext_ofs);
983
0
        }
984
0
        return dpif_netlink_port_add__(dpif, name, ovs_type, &options,
985
0
                                       port_nop);
986
0
    } else {
987
0
        return dpif_netlink_port_add__(dpif, name, ovs_type, NULL, port_nop);
988
0
    }
989
990
0
}
991
992
static int
993
dpif_netlink_rtnl_port_create_and_add(struct dpif_netlink *dpif,
994
                                      struct netdev *netdev,
995
                                      odp_port_t *port_nop)
996
    OVS_REQ_WRLOCK(dpif->upcall_lock)
997
0
{
998
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
999
0
    char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1000
0
    const char *name;
1001
0
    int error;
1002
1003
0
    error = dpif_netlink_rtnl_port_create(netdev);
1004
0
    if (error) {
1005
0
        if (error != EOPNOTSUPP) {
1006
0
            VLOG_WARN_RL(&rl, "Failed to create %s with rtnetlink: %s",
1007
0
                         netdev_get_name(netdev), ovs_strerror(error));
1008
0
        }
1009
0
        return error;
1010
0
    }
1011
1012
0
    name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
1013
0
    error = dpif_netlink_port_add__(dpif, name, OVS_VPORT_TYPE_NETDEV, NULL,
1014
0
                                    port_nop);
1015
0
    if (error) {
1016
0
        dpif_netlink_rtnl_port_destroy(name, netdev_get_type(netdev));
1017
0
    }
1018
0
    return error;
1019
0
}
1020
1021
static int
1022
dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,
1023
                      odp_port_t *port_nop)
1024
0
{
1025
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1026
0
    int error = EOPNOTSUPP;
1027
1028
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
1029
0
    if (!ovs_tunnels_out_of_tree) {
1030
0
        error = dpif_netlink_rtnl_port_create_and_add(dpif, netdev, port_nop);
1031
0
    }
1032
0
    if (error) {
1033
0
        error = dpif_netlink_port_add_compat(dpif, netdev, port_nop);
1034
0
    }
1035
0
    fat_rwlock_unlock(&dpif->upcall_lock);
1036
1037
0
    return error;
1038
0
}
1039
1040
static int
1041
dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
1042
    OVS_REQ_WRLOCK(dpif->upcall_lock)
1043
0
{
1044
0
    struct dpif_netlink_vport vport;
1045
0
    struct dpif_port dpif_port;
1046
0
    int error;
1047
1048
0
    error = dpif_netlink_port_query__(dpif, port_no, NULL, &dpif_port);
1049
0
    if (error) {
1050
0
        return error;
1051
0
    }
1052
1053
0
    dpif_netlink_vport_init(&vport);
1054
0
    vport.cmd = OVS_VPORT_CMD_DEL;
1055
0
    vport.dp_ifindex = dpif->dp_ifindex;
1056
0
    vport.port_no = port_no;
1057
1058
0
    error = dpif_netlink_vport_transact(&vport, NULL, NULL);
1059
1060
0
    vport_del_channels(dpif, port_no);
1061
1062
0
    if (!error && !ovs_tunnels_out_of_tree) {
1063
0
        error = dpif_netlink_rtnl_port_destroy(dpif_port.name, dpif_port.type);
1064
0
        if (error == EOPNOTSUPP) {
1065
0
            error = 0;
1066
0
        }
1067
0
    }
1068
1069
0
    dpif_port_destroy(&dpif_port);
1070
1071
0
    return error;
1072
0
}
1073
1074
static int
1075
dpif_netlink_port_del(struct dpif *dpif_, odp_port_t port_no)
1076
0
{
1077
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1078
0
    int error;
1079
1080
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
1081
0
    error = dpif_netlink_port_del__(dpif, port_no);
1082
0
    fat_rwlock_unlock(&dpif->upcall_lock);
1083
1084
0
    return error;
1085
0
}
1086
1087
static int
1088
dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
1089
                          const char *port_name, struct dpif_port *dpif_port)
1090
0
{
1091
0
    struct dpif_netlink_vport request;
1092
0
    struct dpif_netlink_vport reply;
1093
0
    struct ofpbuf *buf;
1094
0
    int error;
1095
1096
0
    dpif_netlink_vport_init(&request);
1097
0
    request.cmd = OVS_VPORT_CMD_GET;
1098
0
    request.dp_ifindex = dpif->dp_ifindex;
1099
0
    request.port_no = port_no;
1100
0
    request.name = port_name;
1101
1102
0
    error = dpif_netlink_vport_transact(&request, &reply, &buf);
1103
0
    if (!error) {
1104
0
        if (reply.dp_ifindex != request.dp_ifindex) {
1105
            /* A query by name reported that 'port_name' is in some datapath
1106
             * other than 'dpif', but the caller wants to know about 'dpif'. */
1107
0
            error = ENODEV;
1108
0
        } else if (dpif_port) {
1109
0
            dpif_port->name = xstrdup(reply.name);
1110
0
            dpif_port->type = xstrdup(get_vport_type(&reply));
1111
0
            dpif_port->port_no = reply.port_no;
1112
0
        }
1113
0
        ofpbuf_delete(buf);
1114
0
    }
1115
0
    return error;
1116
0
}
1117
1118
static int
1119
dpif_netlink_port_query_by_number(const struct dpif *dpif_, odp_port_t port_no,
1120
                                  struct dpif_port *dpif_port)
1121
0
{
1122
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1123
1124
0
    return dpif_netlink_port_query__(dpif, port_no, NULL, dpif_port);
1125
0
}
1126
1127
static int
1128
dpif_netlink_port_query_by_name(const struct dpif *dpif_, const char *devname,
1129
                              struct dpif_port *dpif_port)
1130
0
{
1131
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1132
1133
0
    return dpif_netlink_port_query__(dpif, 0, devname, dpif_port);
1134
0
}
1135
1136
static uint32_t
1137
dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
1138
                            odp_port_t port_no)
1139
    OVS_REQ_RDLOCK(dpif->upcall_lock)
1140
0
{
1141
0
    uint32_t port_idx = odp_to_u32(port_no);
1142
0
    uint32_t pid = 0;
1143
1144
0
    if (dpif->handlers && dpif->uc_array_size > 0) {
1145
        /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
1146
         * channel, since it is not heavily loaded. */
1147
0
        uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
1148
1149
        /* Needs to check in case the socket pointer is changed in between
1150
         * the holding of upcall_lock.  A known case happens when the main
1151
         * thread deletes the vport while the handler thread is handling
1152
         * the upcall from that port. */
1153
0
        if (dpif->channels[idx].sock) {
1154
0
            pid = nl_sock_pid(dpif->channels[idx].sock);
1155
0
        }
1156
0
    }
1157
1158
0
    return pid;
1159
0
}
1160
1161
static uint32_t
1162
dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no)
1163
0
{
1164
0
    const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1165
0
    uint32_t ret;
1166
1167
    /* In per-cpu dispatch mode, vports do not have an associated PID */
1168
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
1169
        /* In per-cpu dispatch mode, this will be ignored as kernel space will
1170
         * select the PID before sending to user space. We set to
1171
         * DPIF_NETLINK_PER_CPU_PID as 0 is rejected by kernel space as an
1172
         * invalid PID.
1173
         */
1174
0
        return DPIF_NETLINK_PER_CPU_PID;
1175
0
    }
1176
1177
0
    fat_rwlock_rdlock(&dpif->upcall_lock);
1178
0
    ret = dpif_netlink_port_get_pid__(dpif, port_no);
1179
0
    fat_rwlock_unlock(&dpif->upcall_lock);
1180
1181
0
    return ret;
1182
0
}
1183
1184
static int
1185
dpif_netlink_flow_flush(struct dpif *dpif_)
1186
0
{
1187
0
    const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1188
0
    struct dpif_netlink_flow flow;
1189
1190
0
    dpif_netlink_flow_init(&flow);
1191
0
    flow.cmd = OVS_FLOW_CMD_DEL;
1192
0
    flow.dp_ifindex = dpif->dp_ifindex;
1193
1194
0
    return dpif_netlink_flow_transact(&flow, NULL, NULL);
1195
0
}
1196
1197
struct dpif_netlink_port_state {
1198
    struct nl_dump dump;
1199
    struct ofpbuf buf;
1200
};
1201
1202
static void
1203
dpif_netlink_port_dump_start__(const struct dpif_netlink *dpif,
1204
                               struct nl_dump *dump)
1205
0
{
1206
0
    struct dpif_netlink_vport request;
1207
0
    struct ofpbuf *buf;
1208
1209
0
    dpif_netlink_vport_init(&request);
1210
0
    request.cmd = OVS_VPORT_CMD_GET;
1211
0
    request.dp_ifindex = dpif->dp_ifindex;
1212
1213
0
    buf = ofpbuf_new(1024);
1214
0
    dpif_netlink_vport_to_ofpbuf(&request, buf);
1215
0
    nl_dump_start(dump, NETLINK_GENERIC, buf);
1216
0
    ofpbuf_delete(buf);
1217
0
}
1218
1219
static int
1220
dpif_netlink_port_dump_start(const struct dpif *dpif_, void **statep)
1221
0
{
1222
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1223
0
    struct dpif_netlink_port_state *state;
1224
1225
0
    *statep = state = xmalloc(sizeof *state);
1226
0
    dpif_netlink_port_dump_start__(dpif, &state->dump);
1227
1228
0
    ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
1229
0
    return 0;
1230
0
}
1231
1232
static int
1233
dpif_netlink_port_dump_next__(const struct dpif_netlink *dpif,
1234
                              struct nl_dump *dump,
1235
                              struct dpif_netlink_vport *vport,
1236
                              struct ofpbuf *buffer)
1237
0
{
1238
0
    struct ofpbuf buf;
1239
0
    int error;
1240
1241
0
    if (!nl_dump_next(dump, &buf, buffer)) {
1242
0
        return EOF;
1243
0
    }
1244
1245
0
    error = dpif_netlink_vport_from_ofpbuf(vport, &buf);
1246
0
    if (error) {
1247
0
        VLOG_WARN_RL(&error_rl, "%s: failed to parse vport record (%s)",
1248
0
                     dpif_name(&dpif->dpif), ovs_strerror(error));
1249
0
    }
1250
0
    return error;
1251
0
}
1252
1253
static int
1254
dpif_netlink_port_dump_next(const struct dpif *dpif_, void *state_,
1255
                            struct dpif_port *dpif_port)
1256
0
{
1257
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1258
0
    struct dpif_netlink_port_state *state = state_;
1259
0
    struct dpif_netlink_vport vport;
1260
0
    int error;
1261
1262
0
    error = dpif_netlink_port_dump_next__(dpif, &state->dump, &vport,
1263
0
                                          &state->buf);
1264
0
    if (error) {
1265
0
        return error;
1266
0
    }
1267
0
    dpif_port->name = CONST_CAST(char *, vport.name);
1268
0
    dpif_port->type = CONST_CAST(char *, get_vport_type(&vport));
1269
0
    dpif_port->port_no = vport.port_no;
1270
0
    return 0;
1271
0
}
1272
1273
static int
1274
dpif_netlink_port_dump_done(const struct dpif *dpif_ OVS_UNUSED, void *state_)
1275
0
{
1276
0
    struct dpif_netlink_port_state *state = state_;
1277
0
    int error = nl_dump_done(&state->dump);
1278
1279
0
    ofpbuf_uninit(&state->buf);
1280
0
    free(state);
1281
0
    return error;
1282
0
}
1283
1284
static int
1285
dpif_netlink_port_poll(const struct dpif *dpif_, char **devnamep)
1286
0
{
1287
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1288
1289
    /* Lazily create the Netlink socket to listen for notifications. */
1290
0
    if (!dpif->port_notifier) {
1291
0
        struct nl_sock *sock;
1292
0
        int error;
1293
1294
0
        error = nl_sock_create(NETLINK_GENERIC, &sock);
1295
0
        if (error) {
1296
0
            return error;
1297
0
        }
1298
1299
0
        error = nl_sock_join_mcgroup(sock, ovs_vport_mcgroup);
1300
0
        if (error) {
1301
0
            nl_sock_destroy(sock);
1302
0
            return error;
1303
0
        }
1304
0
        dpif->port_notifier = sock;
1305
1306
        /* We have no idea of the current state so report that everything
1307
         * changed. */
1308
0
        return ENOBUFS;
1309
0
    }
1310
1311
0
    for (;;) {
1312
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1313
0
        uint64_t buf_stub[4096 / 8];
1314
0
        struct ofpbuf buf;
1315
0
        int error;
1316
1317
0
        ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
1318
0
        error = nl_sock_recv(dpif->port_notifier, &buf, NULL, false);
1319
0
        if (!error) {
1320
0
            struct dpif_netlink_vport vport;
1321
1322
0
            error = dpif_netlink_vport_from_ofpbuf(&vport, &buf);
1323
0
            if (!error) {
1324
0
                if (vport.dp_ifindex == dpif->dp_ifindex
1325
0
                    && (vport.cmd == OVS_VPORT_CMD_NEW
1326
0
                        || vport.cmd == OVS_VPORT_CMD_DEL
1327
0
                        || vport.cmd == OVS_VPORT_CMD_SET)) {
1328
0
                    VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8,
1329
0
                             dpif->dpif.full_name, vport.name, vport.cmd);
1330
0
                    if (vport.cmd == OVS_VPORT_CMD_DEL && dpif->handlers) {
1331
0
                        dpif->refresh_channels = true;
1332
0
                    }
1333
0
                    *devnamep = xstrdup(vport.name);
1334
0
                    ofpbuf_uninit(&buf);
1335
0
                    return 0;
1336
0
                }
1337
0
            }
1338
0
        } else if (error != EAGAIN) {
1339
0
            VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)",
1340
0
                         ovs_strerror(error));
1341
0
            nl_sock_drain(dpif->port_notifier);
1342
0
            error = ENOBUFS;
1343
0
        }
1344
1345
0
        ofpbuf_uninit(&buf);
1346
0
        if (error) {
1347
0
            return error;
1348
0
        }
1349
0
    }
1350
0
}
1351
1352
static void
1353
dpif_netlink_port_poll_wait(const struct dpif *dpif_)
1354
0
{
1355
0
    const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1356
1357
0
    if (dpif->port_notifier) {
1358
0
        nl_sock_wait(dpif->port_notifier, POLLIN);
1359
0
    } else {
1360
0
        poll_immediate_wake();
1361
0
    }
1362
0
}
1363
1364
static void
1365
dpif_netlink_flow_init_ufid(struct dpif_netlink_flow *request,
1366
                            const ovs_u128 *ufid, bool terse)
1367
0
{
1368
0
    if (ufid) {
1369
0
        request->ufid = *ufid;
1370
0
        request->ufid_present = true;
1371
0
    } else {
1372
0
        request->ufid_present = false;
1373
0
    }
1374
0
    request->ufid_terse = terse;
1375
0
}
1376
1377
static void
1378
dpif_netlink_init_flow_get__(const struct dpif_netlink *dpif,
1379
                             const struct nlattr *key, size_t key_len,
1380
                             const ovs_u128 *ufid, bool terse,
1381
                             struct dpif_netlink_flow *request)
1382
0
{
1383
0
    dpif_netlink_flow_init(request);
1384
0
    request->cmd = OVS_FLOW_CMD_GET;
1385
0
    request->dp_ifindex = dpif->dp_ifindex;
1386
0
    request->key = key;
1387
0
    request->key_len = key_len;
1388
0
    dpif_netlink_flow_init_ufid(request, ufid, terse);
1389
0
}
1390
1391
static void
1392
dpif_netlink_init_flow_get(const struct dpif_netlink *dpif,
1393
                           const struct dpif_flow_get *get,
1394
                           struct dpif_netlink_flow *request)
1395
0
{
1396
0
    dpif_netlink_init_flow_get__(dpif, get->key, get->key_len, get->ufid,
1397
0
                                 false, request);
1398
0
}
1399
1400
static int
1401
dpif_netlink_flow_get__(const struct dpif_netlink *dpif,
1402
                        const struct nlattr *key, size_t key_len,
1403
                        const ovs_u128 *ufid, bool terse,
1404
                        struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1405
0
{
1406
0
    struct dpif_netlink_flow request;
1407
1408
0
    dpif_netlink_init_flow_get__(dpif, key, key_len, ufid, terse, &request);
1409
0
    return dpif_netlink_flow_transact(&request, reply, bufp);
1410
0
}
1411
1412
static int
1413
dpif_netlink_flow_get(const struct dpif_netlink *dpif,
1414
                      const struct dpif_netlink_flow *flow,
1415
                      struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1416
0
{
1417
0
    return dpif_netlink_flow_get__(dpif, flow->key, flow->key_len,
1418
0
                                   flow->ufid_present ? &flow->ufid : NULL,
1419
0
                                   false, reply, bufp);
1420
0
}
1421
1422
static void
1423
dpif_netlink_init_flow_put(struct dpif_netlink *dpif,
1424
                           const struct dpif_flow_put *put,
1425
                           struct dpif_netlink_flow *request)
1426
0
{
1427
0
    static const struct nlattr dummy_action;
1428
1429
0
    dpif_netlink_flow_init(request);
1430
0
    request->cmd = (put->flags & DPIF_FP_CREATE
1431
0
                    ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET);
1432
0
    request->dp_ifindex = dpif->dp_ifindex;
1433
0
    request->key = put->key;
1434
0
    request->key_len = put->key_len;
1435
0
    request->mask = put->mask;
1436
0
    request->mask_len = put->mask_len;
1437
0
    dpif_netlink_flow_init_ufid(request, put->ufid, false);
1438
1439
    /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
1440
0
    request->actions = (put->actions
1441
0
                        ? put->actions
1442
0
                        : CONST_CAST(struct nlattr *, &dummy_action));
1443
0
    request->actions_len = put->actions_len;
1444
0
    if (put->flags & DPIF_FP_ZERO_STATS) {
1445
0
        request->clear = true;
1446
0
    }
1447
0
    if (put->flags & DPIF_FP_PROBE) {
1448
0
        request->probe = true;
1449
0
    }
1450
0
    request->nlmsg_flags = put->flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE;
1451
0
}
1452
1453
static void
1454
dpif_netlink_init_flow_del__(struct dpif_netlink *dpif,
1455
                             const struct nlattr *key, size_t key_len,
1456
                             const ovs_u128 *ufid, bool terse,
1457
                             struct dpif_netlink_flow *request)
1458
0
{
1459
0
    dpif_netlink_flow_init(request);
1460
0
    request->cmd = OVS_FLOW_CMD_DEL;
1461
0
    request->dp_ifindex = dpif->dp_ifindex;
1462
0
    request->key = key;
1463
0
    request->key_len = key_len;
1464
0
    dpif_netlink_flow_init_ufid(request, ufid, terse);
1465
0
}
1466
1467
static void
1468
dpif_netlink_init_flow_del(struct dpif_netlink *dpif,
1469
                           const struct dpif_flow_del *del,
1470
                           struct dpif_netlink_flow *request)
1471
0
{
1472
0
    dpif_netlink_init_flow_del__(dpif, del->key, del->key_len,
1473
0
                                 del->ufid, del->terse, request);
1474
0
}
1475
1476
struct dpif_netlink_flow_dump {
1477
    struct dpif_flow_dump up;
1478
    struct nl_dump nl_dump;
1479
    atomic_int status;
1480
};
1481
1482
static struct dpif_netlink_flow_dump *
1483
dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
1484
0
{
1485
0
    return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
1486
0
}
1487
1488
static struct dpif_flow_dump *
1489
dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse,
1490
                              struct dpif_flow_dump_types *types)
1491
0
{
1492
0
    const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1493
0
    struct dpif_netlink_flow_dump *dump;
1494
0
    struct dpif_netlink_flow request;
1495
0
    struct ofpbuf *buf;
1496
1497
0
    dump = xmalloc(sizeof *dump);
1498
0
    dpif_flow_dump_init(&dump->up, dpif_, terse, types);
1499
1500
0
    dpif_netlink_flow_init(&request);
1501
0
    request.cmd = OVS_FLOW_CMD_GET;
1502
0
    request.dp_ifindex = dpif->dp_ifindex;
1503
0
    request.ufid_present = false;
1504
0
    request.ufid_terse = terse;
1505
1506
0
    buf = ofpbuf_new(1024);
1507
0
    dpif_netlink_flow_to_ofpbuf(&request, buf);
1508
0
    nl_dump_start(&dump->nl_dump, NETLINK_GENERIC, buf);
1509
0
    ofpbuf_delete(buf);
1510
0
    atomic_init(&dump->status, 0);
1511
1512
0
    return &dump->up;
1513
0
}
1514
1515
static int
1516
dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_)
1517
0
{
1518
0
    struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1519
0
    unsigned int nl_status = nl_dump_done(&dump->nl_dump);
1520
0
    int dump_status;
1521
1522
    /* No other thread has access to 'dump' at this point. */
1523
0
    atomic_read_relaxed(&dump->status, &dump_status);
1524
0
    free(dump);
1525
0
    return dump_status ? dump_status : nl_status;
1526
0
}
1527
1528
struct dpif_netlink_flow_dump_thread {
1529
    struct dpif_flow_dump_thread up;
1530
    struct dpif_netlink_flow_dump *dump;
1531
    struct dpif_netlink_flow flow;
1532
    struct dpif_flow_stats stats;
1533
    struct ofpbuf nl_flows;     /* Always used to store flows. */
1534
    struct ofpbuf *nl_actions;  /* Used if kernel does not supply actions. */
1535
};
1536
1537
static struct dpif_netlink_flow_dump_thread *
1538
dpif_netlink_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
1539
0
{
1540
0
    return CONTAINER_OF(thread, struct dpif_netlink_flow_dump_thread, up);
1541
0
}
1542
1543
static struct dpif_flow_dump_thread *
1544
dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump *dump_)
1545
0
{
1546
0
    struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1547
0
    struct dpif_netlink_flow_dump_thread *thread;
1548
1549
0
    thread = xmalloc(sizeof *thread);
1550
0
    dpif_flow_dump_thread_init(&thread->up, &dump->up);
1551
0
    thread->dump = dump;
1552
0
    ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE);
1553
0
    thread->nl_actions = NULL;
1554
1555
0
    return &thread->up;
1556
0
}
1557
1558
static void
1559
dpif_netlink_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
1560
0
{
1561
0
    struct dpif_netlink_flow_dump_thread *thread
1562
0
        = dpif_netlink_flow_dump_thread_cast(thread_);
1563
1564
0
    ofpbuf_uninit(&thread->nl_flows);
1565
0
    ofpbuf_delete(thread->nl_actions);
1566
0
    free(thread);
1567
0
}
1568
1569
static void
1570
dpif_netlink_flow_to_dpif_flow(struct dpif_flow *dpif_flow,
1571
                               const struct dpif_netlink_flow *datapath_flow)
1572
0
{
1573
0
    dpif_flow->key = datapath_flow->key;
1574
0
    dpif_flow->key_len = datapath_flow->key_len;
1575
0
    dpif_flow->mask = datapath_flow->mask;
1576
0
    dpif_flow->mask_len = datapath_flow->mask_len;
1577
0
    dpif_flow->actions = datapath_flow->actions;
1578
0
    dpif_flow->actions_len = datapath_flow->actions_len;
1579
0
    dpif_flow->ufid_present = datapath_flow->ufid_present;
1580
0
    dpif_flow->pmd_id = PMD_ID_NULL;
1581
0
    if (datapath_flow->ufid_present) {
1582
0
        dpif_flow->ufid = datapath_flow->ufid;
1583
0
    } else {
1584
0
        ovs_assert(datapath_flow->key && datapath_flow->key_len);
1585
0
        odp_flow_key_hash(datapath_flow->key, datapath_flow->key_len,
1586
0
                          &dpif_flow->ufid);
1587
0
    }
1588
0
    dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats);
1589
0
    dpif_flow->attrs.offloaded = false;
1590
0
    dpif_flow->attrs.dp_layer = "ovs";
1591
0
    dpif_flow->attrs.dp_extra_info = NULL;
1592
0
}
1593
1594
static int
1595
dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
1596
                            struct dpif_flow *flows, int max_flows)
1597
0
{
1598
0
    struct dpif_netlink_flow_dump_thread *thread
1599
0
        = dpif_netlink_flow_dump_thread_cast(thread_);
1600
0
    struct dpif_netlink_flow_dump *dump = thread->dump;
1601
0
    struct dpif_netlink *dpif = dpif_netlink_cast(thread->up.dump->dpif);
1602
0
    int n_flows = 0;
1603
1604
0
    ofpbuf_delete(thread->nl_actions);
1605
0
    thread->nl_actions = NULL;
1606
1607
0
    while (!n_flows
1608
0
           || (n_flows < max_flows && thread->nl_flows.size)) {
1609
0
        struct dpif_netlink_flow datapath_flow;
1610
0
        struct ofpbuf nl_flow;
1611
0
        int error;
1612
1613
        /* Try to grab another flow. */
1614
0
        if (!nl_dump_next(&dump->nl_dump, &nl_flow, &thread->nl_flows)) {
1615
0
            break;
1616
0
        }
1617
1618
        /* Convert the flow to our output format. */
1619
0
        error = dpif_netlink_flow_from_ofpbuf(&datapath_flow, &nl_flow);
1620
0
        if (error) {
1621
0
            atomic_store_relaxed(&dump->status, error);
1622
0
            break;
1623
0
        }
1624
1625
0
        if (dump->up.terse || datapath_flow.actions) {
1626
            /* Common case: we don't want actions, or the flow includes
1627
             * actions. */
1628
0
            dpif_netlink_flow_to_dpif_flow(&flows[n_flows++], &datapath_flow);
1629
0
        } else {
1630
            /* Rare case: the flow does not include actions.  Retrieve this
1631
             * individual flow again to get the actions. */
1632
0
            error = dpif_netlink_flow_get(dpif, &datapath_flow,
1633
0
                                          &datapath_flow, &thread->nl_actions);
1634
0
            if (error == ENOENT) {
1635
0
                VLOG_DBG("dumped flow disappeared on get");
1636
0
                continue;
1637
0
            } else if (error) {
1638
0
                VLOG_WARN("error fetching dumped flow: %s",
1639
0
                          ovs_strerror(error));
1640
0
                atomic_store_relaxed(&dump->status, error);
1641
0
                break;
1642
0
            }
1643
1644
            /* Save this flow.  Then exit, because we only have one buffer to
1645
             * handle this case. */
1646
0
            dpif_netlink_flow_to_dpif_flow(&flows[n_flows++], &datapath_flow);
1647
0
            break;
1648
0
        }
1649
0
    }
1650
0
    return n_flows;
1651
0
}
1652
1653
static void
1654
dpif_netlink_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec,
1655
                            struct ofpbuf *buf)
1656
0
{
1657
0
    struct ovs_header *k_exec;
1658
0
    size_t key_ofs;
1659
1660
0
    ofpbuf_prealloc_tailroom(buf, (64
1661
0
                                   + dp_packet_size(d_exec->packet)
1662
0
                                   + ODP_KEY_METADATA_SIZE
1663
0
                                   + d_exec->actions_len));
1664
1665
0
    nl_msg_put_genlmsghdr(buf, 0, ovs_packet_family, NLM_F_REQUEST,
1666
0
                          OVS_PACKET_CMD_EXECUTE, OVS_PACKET_VERSION);
1667
1668
0
    k_exec = ofpbuf_put_uninit(buf, sizeof *k_exec);
1669
0
    k_exec->dp_ifindex = dp_ifindex;
1670
1671
0
    nl_msg_put_unspec(buf, OVS_PACKET_ATTR_PACKET,
1672
0
                      dp_packet_data(d_exec->packet),
1673
0
                      dp_packet_size(d_exec->packet));
1674
1675
0
    key_ofs = nl_msg_start_nested(buf, OVS_PACKET_ATTR_KEY);
1676
0
    odp_key_from_dp_packet(buf, d_exec->packet);
1677
0
    nl_msg_end_nested(buf, key_ofs);
1678
1679
0
    nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS,
1680
0
                      d_exec->actions, d_exec->actions_len);
1681
0
    if (d_exec->probe) {
1682
0
        nl_msg_put_flag(buf, OVS_PACKET_ATTR_PROBE);
1683
0
    }
1684
0
    if (d_exec->mtu) {
1685
0
        nl_msg_put_u16(buf, OVS_PACKET_ATTR_MRU, d_exec->mtu);
1686
0
    }
1687
1688
0
    if (d_exec->hash) {
1689
0
        nl_msg_put_u64(buf, OVS_PACKET_ATTR_HASH, d_exec->hash);
1690
0
    }
1691
1692
0
    if (d_exec->upcall_pid) {
1693
0
        nl_msg_put_u32(buf, OVS_PACKET_ATTR_UPCALL_PID, d_exec->upcall_pid);
1694
0
    }
1695
0
}
1696
1697
/* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
1698
 * Returns the number actually executed (at least 1, if 'n_ops' is
1699
 * positive). */
1700
static size_t
1701
dpif_netlink_operate__(struct dpif_netlink *dpif,
1702
                       struct dpif_op **ops, size_t n_ops)
1703
0
{
1704
0
    struct op_auxdata {
1705
0
        struct nl_transaction txn;
1706
1707
0
        struct ofpbuf request;
1708
0
        uint64_t request_stub[1024 / 8];
1709
1710
0
        struct ofpbuf reply;
1711
0
        uint64_t reply_stub[1024 / 8];
1712
0
    } auxes[OPERATE_MAX_OPS];
1713
1714
0
    struct nl_transaction *txnsp[OPERATE_MAX_OPS];
1715
0
    size_t i;
1716
1717
0
    n_ops = MIN(n_ops, OPERATE_MAX_OPS);
1718
0
    for (i = 0; i < n_ops; i++) {
1719
0
        struct op_auxdata *aux = &auxes[i];
1720
0
        struct dpif_op *op = ops[i];
1721
0
        struct dpif_flow_put *put;
1722
0
        struct dpif_flow_del *del;
1723
0
        struct dpif_flow_get *get;
1724
0
        struct dpif_netlink_flow flow;
1725
1726
0
        ofpbuf_use_stub(&aux->request,
1727
0
                        aux->request_stub, sizeof aux->request_stub);
1728
0
        aux->txn.request = &aux->request;
1729
1730
0
        ofpbuf_use_stub(&aux->reply, aux->reply_stub, sizeof aux->reply_stub);
1731
0
        aux->txn.reply = NULL;
1732
1733
0
        switch (op->type) {
1734
0
        case DPIF_OP_FLOW_PUT:
1735
0
            put = &op->flow_put;
1736
0
            dpif_netlink_init_flow_put(dpif, put, &flow);
1737
0
            if (put->stats) {
1738
0
                flow.nlmsg_flags |= NLM_F_ECHO;
1739
0
                aux->txn.reply = &aux->reply;
1740
0
            }
1741
0
            dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
1742
1743
0
            OVS_USDT_PROBE(dpif_netlink_operate__, op_flow_put,
1744
0
                           dpif, put, &flow, &aux->request);
1745
0
            break;
1746
1747
0
        case DPIF_OP_FLOW_DEL:
1748
0
            del = &op->flow_del;
1749
0
            dpif_netlink_init_flow_del(dpif, del, &flow);
1750
0
            if (del->stats) {
1751
0
                flow.nlmsg_flags |= NLM_F_ECHO;
1752
0
                aux->txn.reply = &aux->reply;
1753
0
            }
1754
0
            dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
1755
1756
0
            OVS_USDT_PROBE(dpif_netlink_operate__, op_flow_del,
1757
0
                           dpif, del, &flow, &aux->request);
1758
0
            break;
1759
1760
0
        case DPIF_OP_EXECUTE:
1761
            /* Can't execute a packet that won't fit in a Netlink attribute. */
1762
0
            if (OVS_UNLIKELY(nl_attr_oversized(
1763
0
                                 dp_packet_size(op->execute.packet)))) {
1764
                /* Report an error immediately if this is the first operation.
1765
                 * Otherwise the easiest thing to do is to postpone to the next
1766
                 * call (when this will be the first operation). */
1767
0
                if (i == 0) {
1768
0
                    VLOG_ERR_RL(&error_rl,
1769
0
                                "dropping oversized %"PRIu32"-byte packet",
1770
0
                                dp_packet_size(op->execute.packet));
1771
0
                    op->error = ENOBUFS;
1772
0
                    return 1;
1773
0
                }
1774
0
                n_ops = i;
1775
0
            } else {
1776
0
                dpif_netlink_encode_execute(dpif->dp_ifindex, &op->execute,
1777
0
                                            &aux->request);
1778
1779
0
                OVS_USDT_PROBE(dpif_netlink_operate__, op_flow_execute,
1780
0
                               dpif, &op->execute,
1781
0
                               dp_packet_data(op->execute.packet),
1782
0
                               dp_packet_size(op->execute.packet),
1783
0
                               &aux->request);
1784
0
            }
1785
0
            break;
1786
1787
0
        case DPIF_OP_FLOW_GET:
1788
0
            get = &op->flow_get;
1789
0
            dpif_netlink_init_flow_get(dpif, get, &flow);
1790
0
            aux->txn.reply = get->buffer;
1791
0
            dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
1792
1793
0
            OVS_USDT_PROBE(dpif_netlink_operate__, op_flow_get,
1794
0
                           dpif, get, &flow, &aux->request);
1795
0
            break;
1796
1797
0
        default:
1798
0
            OVS_NOT_REACHED();
1799
0
        }
1800
0
    }
1801
1802
0
    for (i = 0; i < n_ops; i++) {
1803
0
        txnsp[i] = &auxes[i].txn;
1804
0
    }
1805
0
    nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops);
1806
1807
0
    for (i = 0; i < n_ops; i++) {
1808
0
        struct op_auxdata *aux = &auxes[i];
1809
0
        struct nl_transaction *txn = &auxes[i].txn;
1810
0
        struct dpif_op *op = ops[i];
1811
0
        struct dpif_flow_put *put;
1812
0
        struct dpif_flow_del *del;
1813
0
        struct dpif_flow_get *get;
1814
1815
0
        op->error = txn->error;
1816
1817
0
        switch (op->type) {
1818
0
        case DPIF_OP_FLOW_PUT:
1819
0
            put = &op->flow_put;
1820
0
            if (put->stats) {
1821
0
                if (!op->error) {
1822
0
                    struct dpif_netlink_flow reply;
1823
1824
0
                    op->error = dpif_netlink_flow_from_ofpbuf(&reply,
1825
0
                                                              txn->reply);
1826
0
                    if (!op->error) {
1827
0
                        dpif_netlink_flow_get_stats(&reply, put->stats);
1828
0
                    }
1829
0
                }
1830
0
            }
1831
0
            break;
1832
1833
0
        case DPIF_OP_FLOW_DEL:
1834
0
            del = &op->flow_del;
1835
0
            if (del->stats) {
1836
0
                if (!op->error) {
1837
0
                    struct dpif_netlink_flow reply;
1838
1839
0
                    op->error = dpif_netlink_flow_from_ofpbuf(&reply,
1840
0
                                                              txn->reply);
1841
0
                    if (!op->error) {
1842
0
                        dpif_netlink_flow_get_stats(&reply, del->stats);
1843
0
                    }
1844
0
                }
1845
0
            }
1846
0
            break;
1847
1848
0
        case DPIF_OP_EXECUTE:
1849
0
            break;
1850
1851
0
        case DPIF_OP_FLOW_GET:
1852
0
            get = &op->flow_get;
1853
0
            if (!op->error) {
1854
0
                struct dpif_netlink_flow reply;
1855
1856
0
                op->error = dpif_netlink_flow_from_ofpbuf(&reply, txn->reply);
1857
0
                if (!op->error) {
1858
0
                    dpif_netlink_flow_to_dpif_flow(get->flow, &reply);
1859
0
                }
1860
0
            }
1861
0
            break;
1862
1863
0
        default:
1864
0
            OVS_NOT_REACHED();
1865
0
        }
1866
1867
0
        ofpbuf_uninit(&aux->request);
1868
0
        ofpbuf_uninit(&aux->reply);
1869
0
    }
1870
1871
0
    return n_ops;
1872
0
}
1873
1874
static void
1875
dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
1876
0
{
1877
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1878
1879
0
    while (n_ops > 0) {
1880
0
        size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops);
1881
1882
0
        ops += chunk;
1883
0
        n_ops -= chunk;
1884
0
    }
1885
0
}
1886
1887
static int
1888
dpif_netlink_handler_init(struct dpif_handler *handler)
1889
0
{
1890
0
    handler->epoll_fd = epoll_create(10);
1891
0
    return handler->epoll_fd < 0 ? errno : 0;
1892
0
}
1893
1894
static void
1895
dpif_netlink_handler_uninit(struct dpif_handler *handler)
1896
0
{
1897
0
    close(handler->epoll_fd);
1898
0
}
1899
1900
/* Returns true if num is a prime number,
1901
 * otherwise, return false.
1902
 */
1903
static bool
1904
is_prime(uint32_t num)
1905
0
{
1906
0
    if (num == 2) {
1907
0
        return true;
1908
0
    }
1909
1910
0
    if (num < 2) {
1911
0
        return false;
1912
0
    }
1913
1914
0
    if (num % 2 == 0) {
1915
0
        return false;
1916
0
    }
1917
1918
0
    for (uint64_t i = 3; i * i <= num; i += 2) {
1919
0
        if (num % i == 0) {
1920
0
            return false;
1921
0
        }
1922
0
    }
1923
1924
0
    return true;
1925
0
}
1926
1927
/* Returns start if start is a prime number.  Otherwise returns the next
1928
 * prime greater than start.  Search is limited by UINT32_MAX.
1929
 *
1930
 * Returns 0 if no prime has been found between start and UINT32_MAX.
1931
 */
1932
static uint32_t
1933
next_prime(uint32_t start)
1934
0
{
1935
0
    if (start <= 2) {
1936
0
        return 2;
1937
0
    }
1938
1939
0
    for (uint32_t i = start; i < UINT32_MAX; i++) {
1940
0
        if (is_prime(i)) {
1941
0
            return i;
1942
0
        }
1943
0
    }
1944
1945
0
    return 0;
1946
0
}
1947
1948
/* Calculates and returns the number of handler threads needed based
1949
 * the following formula:
1950
 *
1951
 * handlers_n = min(next_prime(active_cores + 1), total_cores)
1952
 */
1953
static uint32_t
1954
dpif_netlink_calculate_n_handlers(void)
1955
0
{
1956
0
    uint32_t total_cores = count_total_cores();
1957
0
    uint32_t n_handlers = count_cpu_cores();
1958
0
    uint32_t next_prime_num;
1959
1960
    /* If not all cores are available to OVS, create additional handler
1961
     * threads to ensure more fair distribution of load between them.
1962
     */
1963
0
    if (n_handlers < total_cores && total_cores > 2) {
1964
0
        next_prime_num = next_prime(n_handlers + 1);
1965
0
        n_handlers = MIN(next_prime_num, total_cores);
1966
0
    }
1967
1968
0
    return MAX(n_handlers, 1);
1969
0
}
1970
1971
static int
1972
dpif_netlink_refresh_handlers_cpu_dispatch(struct dpif_netlink *dpif)
1973
    OVS_REQ_WRLOCK(dpif->upcall_lock)
1974
0
{
1975
0
    int handler_id;
1976
0
    int error = 0;
1977
0
    uint32_t n_handlers;
1978
0
    uint32_t *upcall_pids;
1979
1980
0
    n_handlers = dpif_netlink_calculate_n_handlers();
1981
0
    if (dpif->n_handlers != n_handlers) {
1982
0
        VLOG_DBG("Dispatch mode(per-cpu): initializing %d handlers",
1983
0
                   n_handlers);
1984
0
        destroy_all_handlers(dpif);
1985
0
        upcall_pids = xzalloc(n_handlers * sizeof *upcall_pids);
1986
0
        dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers);
1987
0
        for (handler_id = 0; handler_id < n_handlers; handler_id++) {
1988
0
            struct dpif_handler *handler = &dpif->handlers[handler_id];
1989
0
            error = create_nl_sock(dpif, &handler->sock);
1990
0
            if (error) {
1991
0
                VLOG_ERR("Dispatch mode(per-cpu): Cannot create socket for"
1992
0
                         "handler %d", handler_id);
1993
0
                continue;
1994
0
            }
1995
0
            upcall_pids[handler_id] = nl_sock_pid(handler->sock);
1996
0
            VLOG_DBG("Dispatch mode(per-cpu): "
1997
0
                      "handler %d has Netlink PID of %u",
1998
0
                      handler_id, upcall_pids[handler_id]);
1999
0
        }
2000
2001
0
        dpif->n_handlers = n_handlers;
2002
0
        error = dpif_netlink_set_handler_pids(&dpif->dpif, upcall_pids,
2003
0
                                              n_handlers);
2004
0
        free(upcall_pids);
2005
0
    }
2006
0
    return error;
2007
0
}
2008
2009
/* Synchronizes 'channels' in 'dpif->handlers'  with the set of vports
2010
 * currently in 'dpif' in the kernel, by adding a new set of channels for
2011
 * any kernel vport that lacks one and deleting any channels that have no
2012
 * backing kernel vports. */
2013
static int
2014
dpif_netlink_refresh_handlers_vport_dispatch(struct dpif_netlink *dpif,
2015
                                             uint32_t n_handlers)
2016
    OVS_REQ_WRLOCK(dpif->upcall_lock)
2017
0
{
2018
0
    unsigned long int *keep_channels;
2019
0
    struct dpif_netlink_vport vport;
2020
0
    size_t keep_channels_nbits;
2021
0
    struct nl_dump dump;
2022
0
    uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
2023
0
    struct ofpbuf buf;
2024
0
    int retval = 0;
2025
0
    size_t i;
2026
2027
0
    if (dpif->n_handlers != n_handlers) {
2028
0
        destroy_all_channels(dpif);
2029
0
        dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers);
2030
0
        for (i = 0; i < n_handlers; i++) {
2031
0
            int error;
2032
0
            struct dpif_handler *handler = &dpif->handlers[i];
2033
2034
0
            error = dpif_netlink_handler_init(handler);
2035
0
            if (error) {
2036
0
                size_t j;
2037
2038
0
                for (j = 0; j < i; j++) {
2039
0
                    struct dpif_handler *tmp = &dpif->handlers[j];
2040
0
                    dpif_netlink_handler_uninit(tmp);
2041
0
                }
2042
0
                free(dpif->handlers);
2043
0
                dpif->handlers = NULL;
2044
2045
0
                return error;
2046
0
            }
2047
0
        }
2048
0
        dpif->n_handlers = n_handlers;
2049
0
    }
2050
2051
0
    for (i = 0; i < n_handlers; i++) {
2052
0
        struct dpif_handler *handler = &dpif->handlers[i];
2053
2054
0
        handler->event_offset = handler->n_events = 0;
2055
0
    }
2056
2057
0
    keep_channels_nbits = dpif->uc_array_size;
2058
0
    keep_channels = bitmap_allocate(keep_channels_nbits);
2059
2060
0
    ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
2061
0
    dpif_netlink_port_dump_start__(dpif, &dump);
2062
0
    while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
2063
0
        uint32_t port_no = odp_to_u32(vport.port_no);
2064
0
        uint32_t upcall_pid;
2065
0
        int error;
2066
2067
0
        if (port_no >= dpif->uc_array_size
2068
0
            || !vport_get_pid(dpif, port_no, &upcall_pid)) {
2069
0
            struct nl_sock *sock;
2070
0
            error = create_nl_sock(dpif, &sock);
2071
2072
0
            if (error) {
2073
0
                goto error;
2074
0
            }
2075
2076
0
            error = vport_add_channel(dpif, vport.port_no, sock);
2077
0
            if (error) {
2078
0
                VLOG_INFO("%s: could not add channels for port %s",
2079
0
                          dpif_name(&dpif->dpif), vport.name);
2080
0
                nl_sock_destroy(sock);
2081
0
                retval = error;
2082
0
                goto error;
2083
0
            }
2084
0
            upcall_pid = nl_sock_pid(sock);
2085
0
        }
2086
2087
        /* Configure the vport to deliver misses to 'sock'. */
2088
0
        if (vport.upcall_pids[0] == 0
2089
0
            || vport.n_upcall_pids != 1
2090
0
            || upcall_pid != vport.upcall_pids[0]) {
2091
0
            struct dpif_netlink_vport vport_request;
2092
2093
0
            dpif_netlink_vport_init(&vport_request);
2094
0
            vport_request.cmd = OVS_VPORT_CMD_SET;
2095
0
            vport_request.dp_ifindex = dpif->dp_ifindex;
2096
0
            vport_request.port_no = vport.port_no;
2097
0
            vport_request.n_upcall_pids = 1;
2098
0
            vport_request.upcall_pids = &upcall_pid;
2099
0
            error = dpif_netlink_vport_transact(&vport_request, NULL, NULL);
2100
0
            if (error) {
2101
0
                VLOG_WARN_RL(&error_rl,
2102
0
                             "%s: failed to set upcall pid on port: %s",
2103
0
                             dpif_name(&dpif->dpif), ovs_strerror(error));
2104
2105
0
                if (error != ENODEV && error != ENOENT) {
2106
0
                    retval = error;
2107
0
                } else {
2108
                    /* The vport isn't really there, even though the dump says
2109
                     * it is.  Probably we just hit a race after a port
2110
                     * disappeared. */
2111
0
                }
2112
0
                goto error;
2113
0
            }
2114
0
        }
2115
2116
0
        if (port_no < keep_channels_nbits) {
2117
0
            bitmap_set1(keep_channels, port_no);
2118
0
        }
2119
0
        continue;
2120
2121
0
    error:
2122
0
        vport_del_channels(dpif, vport.port_no);
2123
0
    }
2124
0
    nl_dump_done(&dump);
2125
0
    ofpbuf_uninit(&buf);
2126
2127
    /* Discard any saved channels that we didn't reuse. */
2128
0
    for (i = 0; i < keep_channels_nbits; i++) {
2129
0
        if (!bitmap_is_set(keep_channels, i)) {
2130
0
            vport_del_channels(dpif, u32_to_odp(i));
2131
0
        }
2132
0
    }
2133
0
    free(keep_channels);
2134
2135
0
    return retval;
2136
0
}
2137
2138
static int
2139
dpif_netlink_recv_set_vport_dispatch(struct dpif_netlink *dpif, bool enable)
2140
    OVS_REQ_WRLOCK(dpif->upcall_lock)
2141
0
{
2142
0
    if ((dpif->handlers != NULL) == enable) {
2143
0
        return 0;
2144
0
    } else if (!enable) {
2145
0
        destroy_all_channels(dpif);
2146
0
        return 0;
2147
0
    } else {
2148
0
        return dpif_netlink_refresh_handlers_vport_dispatch(dpif, 1);
2149
0
    }
2150
0
}
2151
2152
static int
2153
dpif_netlink_recv_set_cpu_dispatch(struct dpif_netlink *dpif, bool enable)
2154
    OVS_REQ_WRLOCK(dpif->upcall_lock)
2155
0
{
2156
0
    if ((dpif->handlers != NULL) == enable) {
2157
0
        return 0;
2158
0
    } else if (!enable) {
2159
0
        destroy_all_handlers(dpif);
2160
0
        return 0;
2161
0
    } else {
2162
0
        return dpif_netlink_refresh_handlers_cpu_dispatch(dpif);
2163
0
    }
2164
0
}
2165
2166
static int
2167
dpif_netlink_recv_set(struct dpif *dpif_, bool enable)
2168
0
{
2169
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2170
0
    int error;
2171
2172
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
2173
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
2174
0
        error = dpif_netlink_recv_set_cpu_dispatch(dpif, enable);
2175
0
    } else {
2176
0
        error = dpif_netlink_recv_set_vport_dispatch(dpif, enable);
2177
0
    }
2178
0
    fat_rwlock_unlock(&dpif->upcall_lock);
2179
2180
0
    return error;
2181
0
}
2182
2183
static int
2184
dpif_netlink_handlers_set(struct dpif *dpif_, uint32_t n_handlers)
2185
0
{
2186
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2187
0
    int error = 0;
2188
2189
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
2190
0
    if (dpif->handlers) {
2191
0
        if (dpif_netlink_upcall_per_cpu(dpif)) {
2192
0
            error = dpif_netlink_refresh_handlers_cpu_dispatch(dpif);
2193
0
        } else {
2194
0
            error = dpif_netlink_refresh_handlers_vport_dispatch(dpif,
2195
0
                                                                 n_handlers);
2196
0
        }
2197
0
    }
2198
0
    fat_rwlock_unlock(&dpif->upcall_lock);
2199
2200
0
    return error;
2201
0
}
2202
2203
static bool
2204
dpif_netlink_number_handlers_required(struct dpif *dpif_, uint32_t *n_handlers)
2205
0
{
2206
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2207
2208
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
2209
0
        *n_handlers = dpif_netlink_calculate_n_handlers();
2210
0
        return true;
2211
0
    }
2212
2213
0
    return false;
2214
0
}
2215
2216
static int
2217
dpif_netlink_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
2218
                             uint32_t queue_id, uint32_t *priority)
2219
0
{
2220
0
    if (queue_id < 0xf000) {
2221
0
        *priority = TC_H_MAKE(1 << 16, queue_id + 1);
2222
0
        return 0;
2223
0
    } else {
2224
0
        return EINVAL;
2225
0
    }
2226
0
}
2227
2228
static int
2229
parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall,
2230
                 int *dp_ifindex)
2231
0
{
2232
0
    static const struct nl_policy ovs_packet_policy[] = {
2233
        /* Always present. */
2234
0
        [OVS_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
2235
0
                                     .min_len = ETH_HEADER_LEN },
2236
0
        [OVS_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },
2237
2238
        /* OVS_PACKET_CMD_ACTION only. */
2239
0
        [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true },
2240
0
        [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = { .type = NL_A_NESTED, .optional = true },
2241
0
        [OVS_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
2242
0
        [OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = true },
2243
0
        [OVS_PACKET_ATTR_HASH] = { .type = NL_A_U64, .optional = true }
2244
0
    };
2245
2246
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2247
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2248
0
    struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2249
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2250
2251
0
    struct nlattr *a[ARRAY_SIZE(ovs_packet_policy)];
2252
0
    if (!nlmsg || !genl || !ovs_header
2253
0
        || nlmsg->nlmsg_type != ovs_packet_family
2254
0
        || !nl_policy_parse(&b, 0, ovs_packet_policy, a,
2255
0
                            ARRAY_SIZE(ovs_packet_policy))) {
2256
0
        return EINVAL;
2257
0
    }
2258
2259
0
    int type = (genl->cmd == OVS_PACKET_CMD_MISS ? DPIF_UC_MISS
2260
0
                : genl->cmd == OVS_PACKET_CMD_ACTION ? DPIF_UC_ACTION
2261
0
                : -1);
2262
0
    if (type < 0) {
2263
0
        return EINVAL;
2264
0
    }
2265
2266
    /* (Re)set ALL fields of '*upcall' on successful return. */
2267
0
    upcall->type = type;
2268
0
    upcall->key = CONST_CAST(struct nlattr *,
2269
0
                             nl_attr_get(a[OVS_PACKET_ATTR_KEY]));
2270
0
    upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]);
2271
0
    odp_flow_key_hash(upcall->key, upcall->key_len, &upcall->ufid);
2272
0
    upcall->userdata = a[OVS_PACKET_ATTR_USERDATA];
2273
0
    upcall->out_tun_key = a[OVS_PACKET_ATTR_EGRESS_TUN_KEY];
2274
0
    upcall->actions = a[OVS_PACKET_ATTR_ACTIONS];
2275
0
    upcall->mru = a[OVS_PACKET_ATTR_MRU];
2276
0
    upcall->hash = a[OVS_PACKET_ATTR_HASH];
2277
2278
    /* Allow overwriting the netlink attribute header without reallocating. */
2279
0
    dp_packet_use_stub(&upcall->packet,
2280
0
                    CONST_CAST(struct nlattr *,
2281
0
                               nl_attr_get(a[OVS_PACKET_ATTR_PACKET])) - 1,
2282
0
                    nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]) +
2283
0
                    sizeof(struct nlattr));
2284
0
    dp_packet_set_data(&upcall->packet,
2285
0
                    (char *)dp_packet_data(&upcall->packet) + sizeof(struct nlattr));
2286
0
    dp_packet_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]));
2287
2288
0
    if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) {
2289
        /* Ethernet frame */
2290
0
        upcall->packet.packet_type = htonl(PT_ETH);
2291
0
    } else {
2292
        /* Non-Ethernet packet. Get the Ethertype from the NL attributes */
2293
0
        ovs_be16 ethertype = 0;
2294
0
        const struct nlattr *et_nla = nl_attr_find__(upcall->key,
2295
0
                                                     upcall->key_len,
2296
0
                                                     OVS_KEY_ATTR_ETHERTYPE);
2297
0
        if (et_nla) {
2298
0
            ethertype = nl_attr_get_be16(et_nla);
2299
0
        }
2300
0
        upcall->packet.packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE,
2301
0
                                                    ntohs(ethertype));
2302
0
        dp_packet_set_l3(&upcall->packet, dp_packet_data(&upcall->packet));
2303
0
    }
2304
2305
0
    *dp_ifindex = ovs_header->dp_ifindex;
2306
2307
0
    return 0;
2308
0
}
2309
2310
static int
2311
dpif_netlink_recv_cpu_dispatch(struct dpif_netlink *dpif, uint32_t handler_id,
2312
                               struct dpif_upcall *upcall, struct ofpbuf *buf)
2313
    OVS_REQ_RDLOCK(dpif->upcall_lock)
2314
0
{
2315
0
    struct dpif_handler *handler;
2316
0
    int read_tries = 0;
2317
2318
0
    if (!dpif->handlers || handler_id >= dpif->n_handlers) {
2319
0
        return EAGAIN;
2320
0
    }
2321
2322
0
    handler = &dpif->handlers[handler_id];
2323
2324
0
    for (;;) {
2325
0
        int dp_ifindex;
2326
0
        int error;
2327
2328
0
        if (++read_tries > 50) {
2329
0
            return EAGAIN;
2330
0
        }
2331
0
        error = nl_sock_recv(handler->sock, buf, NULL, false);
2332
0
        if (error == ENOBUFS) {
2333
            /* ENOBUFS typically means that we've received so many
2334
             * packets that the buffer overflowed.  Try again
2335
             * immediately because there's almost certainly a packet
2336
             * waiting for us. */
2337
0
            report_loss(dpif, NULL, 0, handler_id);
2338
0
            continue;
2339
0
        }
2340
2341
0
        if (error) {
2342
0
            if (error == EAGAIN) {
2343
0
                break;
2344
0
            }
2345
0
            return error;
2346
0
        }
2347
2348
0
        error = parse_odp_packet(buf, upcall, &dp_ifindex);
2349
0
        if (!error && dp_ifindex == dpif->dp_ifindex) {
2350
0
            upcall->pid = nl_sock_pid(handler->sock);
2351
0
            return 0;
2352
0
        } else if (error) {
2353
0
            return error;
2354
0
        }
2355
0
    }
2356
2357
0
    return EAGAIN;
2358
0
}
2359
2360
static int
2361
dpif_netlink_recv_vport_dispatch(struct dpif_netlink *dpif,
2362
                                 uint32_t handler_id,
2363
                                 struct dpif_upcall *upcall,
2364
                                 struct ofpbuf *buf)
2365
    OVS_REQ_RDLOCK(dpif->upcall_lock)
2366
0
{
2367
0
    struct dpif_handler *handler;
2368
0
    int read_tries = 0;
2369
2370
0
    if (!dpif->handlers || handler_id >= dpif->n_handlers) {
2371
0
        return EAGAIN;
2372
0
    }
2373
2374
0
    handler = &dpif->handlers[handler_id];
2375
0
    if (handler->event_offset >= handler->n_events) {
2376
0
        int retval;
2377
2378
0
        handler->event_offset = handler->n_events = 0;
2379
2380
0
        do {
2381
0
            retval = epoll_wait(handler->epoll_fd, handler->epoll_events,
2382
0
                                dpif->uc_array_size, 0);
2383
0
        } while (retval < 0 && errno == EINTR);
2384
2385
0
        if (retval < 0) {
2386
0
            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2387
0
            VLOG_WARN_RL(&rl, "epoll_wait failed (%s)", ovs_strerror(errno));
2388
0
        } else if (retval > 0) {
2389
0
            handler->n_events = retval;
2390
0
        }
2391
0
    }
2392
2393
0
    while (handler->event_offset < handler->n_events) {
2394
0
        int idx = handler->epoll_events[handler->event_offset].data.u32;
2395
0
        struct dpif_channel *ch = &dpif->channels[idx];
2396
2397
0
        handler->event_offset++;
2398
2399
0
        for (;;) {
2400
0
            int dp_ifindex;
2401
0
            int error;
2402
2403
0
            if (++read_tries > 50) {
2404
0
                return EAGAIN;
2405
0
            }
2406
2407
0
            error = nl_sock_recv(ch->sock, buf, NULL, false);
2408
0
            if (error == ENOBUFS) {
2409
                /* ENOBUFS typically means that we've received so many
2410
                 * packets that the buffer overflowed.  Try again
2411
                 * immediately because there's almost certainly a packet
2412
                 * waiting for us. */
2413
0
                report_loss(dpif, ch, idx, handler_id);
2414
0
                continue;
2415
0
            }
2416
2417
0
            ch->last_poll = time_msec();
2418
0
            if (error) {
2419
0
                if (error == EAGAIN) {
2420
0
                    break;
2421
0
                }
2422
0
                return error;
2423
0
            }
2424
2425
0
            error = parse_odp_packet(buf, upcall, &dp_ifindex);
2426
0
            if (!error && dp_ifindex == dpif->dp_ifindex) {
2427
0
                upcall->pid = nl_sock_pid(ch->sock);
2428
0
                return 0;
2429
0
            } else if (error) {
2430
0
                return error;
2431
0
            }
2432
0
        }
2433
0
    }
2434
2435
0
    return EAGAIN;
2436
0
}
2437
2438
static int
2439
dpif_netlink_recv(struct dpif *dpif_, uint32_t handler_id,
2440
                  struct dpif_upcall *upcall, struct ofpbuf *buf)
2441
0
{
2442
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2443
0
    int error;
2444
2445
0
    fat_rwlock_rdlock(&dpif->upcall_lock);
2446
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
2447
0
        error = dpif_netlink_recv_cpu_dispatch(dpif, handler_id, upcall, buf);
2448
0
    } else {
2449
0
        error = dpif_netlink_recv_vport_dispatch(dpif,
2450
0
                                                 handler_id, upcall, buf);
2451
0
    }
2452
0
    fat_rwlock_unlock(&dpif->upcall_lock);
2453
2454
0
    return error;
2455
0
}
2456
2457
static void
2458
dpif_netlink_recv_wait_vport_dispatch(struct dpif_netlink *dpif,
2459
                                      uint32_t handler_id)
2460
    OVS_REQ_RDLOCK(dpif->upcall_lock)
2461
0
{
2462
0
    if (dpif->handlers && handler_id < dpif->n_handlers) {
2463
0
        struct dpif_handler *handler = &dpif->handlers[handler_id];
2464
2465
0
        poll_fd_wait(handler->epoll_fd, POLLIN);
2466
0
    }
2467
0
}
2468
2469
static void
2470
dpif_netlink_recv_wait_cpu_dispatch(struct dpif_netlink *dpif,
2471
                                    uint32_t handler_id)
2472
    OVS_REQ_RDLOCK(dpif->upcall_lock)
2473
0
{
2474
0
    if (dpif->handlers && handler_id < dpif->n_handlers) {
2475
0
        struct dpif_handler *handler = &dpif->handlers[handler_id];
2476
2477
0
        poll_fd_wait(nl_sock_fd(handler->sock), POLLIN);
2478
0
    }
2479
0
}
2480
2481
static void
2482
dpif_netlink_recv_wait(struct dpif *dpif_, uint32_t handler_id)
2483
0
{
2484
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2485
2486
0
    fat_rwlock_rdlock(&dpif->upcall_lock);
2487
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
2488
0
        dpif_netlink_recv_wait_cpu_dispatch(dpif, handler_id);
2489
0
    } else {
2490
0
        dpif_netlink_recv_wait_vport_dispatch(dpif, handler_id);
2491
0
    }
2492
0
    fat_rwlock_unlock(&dpif->upcall_lock);
2493
0
}
2494
2495
static void
2496
dpif_netlink_recv_purge_vport_dispatch(struct dpif_netlink *dpif)
2497
    OVS_REQ_WRLOCK(dpif->upcall_lock)
2498
0
{
2499
0
    if (dpif->handlers) {
2500
0
        size_t i;
2501
2502
0
        if (!dpif->channels[0].sock) {
2503
0
            return;
2504
0
        }
2505
0
        for (i = 0; i < dpif->uc_array_size; i++ ) {
2506
2507
0
            nl_sock_drain(dpif->channels[i].sock);
2508
0
        }
2509
0
    }
2510
0
}
2511
2512
static void
2513
dpif_netlink_recv_purge_cpu_dispatch(struct dpif_netlink *dpif)
2514
    OVS_REQ_WRLOCK(dpif->upcall_lock)
2515
0
{
2516
0
    int handler_id;
2517
2518
0
    if (dpif->handlers) {
2519
0
        for (handler_id = 0; handler_id < dpif->n_handlers; handler_id++) {
2520
0
            struct dpif_handler *handler = &dpif->handlers[handler_id];
2521
0
            nl_sock_drain(handler->sock);
2522
0
        }
2523
0
    }
2524
0
}
2525
2526
static void
2527
dpif_netlink_recv_purge(struct dpif *dpif_)
2528
0
{
2529
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2530
2531
0
    fat_rwlock_wrlock(&dpif->upcall_lock);
2532
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
2533
0
        dpif_netlink_recv_purge_cpu_dispatch(dpif);
2534
0
    } else {
2535
0
        dpif_netlink_recv_purge_vport_dispatch(dpif);
2536
0
    }
2537
0
    fat_rwlock_unlock(&dpif->upcall_lock);
2538
0
}
2539
2540
static char *
2541
dpif_netlink_get_datapath_version(void)
2542
0
{
2543
0
    char *version_str = NULL;
2544
2545
0
#ifdef __linux__
2546
2547
0
#define MAX_VERSION_STR_SIZE 80
2548
0
#define LINUX_DATAPATH_VERSION_FILE  "/sys/module/openvswitch/version"
2549
0
    FILE *f;
2550
2551
0
    f = fopen(LINUX_DATAPATH_VERSION_FILE, "r");
2552
0
    if (f) {
2553
0
        char *newline;
2554
0
        char version[MAX_VERSION_STR_SIZE];
2555
2556
0
        if (fgets(version, MAX_VERSION_STR_SIZE, f)) {
2557
0
            newline = strchr(version, '\n');
2558
0
            if (newline) {
2559
0
                *newline = '\0';
2560
0
            }
2561
0
            version_str = xstrdup(version);
2562
0
        }
2563
0
        fclose(f);
2564
0
    }
2565
0
#endif
2566
2567
0
    return version_str;
2568
0
}
2569
2570
struct dpif_netlink_ct_dump_state {
2571
    struct ct_dpif_dump_state up;
2572
    struct nl_ct_dump_state *nl_ct_dump;
2573
};
2574
2575
static int
2576
dpif_netlink_ct_dump_start(struct dpif *dpif OVS_UNUSED,
2577
                           struct ct_dpif_dump_state **dump_,
2578
                           const uint16_t *zone, int *ptot_bkts)
2579
0
{
2580
0
    struct dpif_netlink_ct_dump_state *dump;
2581
0
    int err;
2582
2583
0
    dump = xzalloc(sizeof *dump);
2584
0
    err = nl_ct_dump_start(&dump->nl_ct_dump, zone, ptot_bkts);
2585
0
    if (err) {
2586
0
        free(dump);
2587
0
        return err;
2588
0
    }
2589
2590
0
    *dump_ = &dump->up;
2591
2592
0
    return 0;
2593
0
}
2594
2595
static int
2596
dpif_netlink_ct_dump_next(struct dpif *dpif OVS_UNUSED,
2597
                          struct ct_dpif_dump_state *dump_,
2598
                          struct ct_dpif_entry *entry)
2599
0
{
2600
0
    struct dpif_netlink_ct_dump_state *dump;
2601
2602
0
    INIT_CONTAINER(dump, dump_, up);
2603
2604
0
    return nl_ct_dump_next(dump->nl_ct_dump, entry);
2605
0
}
2606
2607
static int
2608
dpif_netlink_ct_dump_done(struct dpif *dpif OVS_UNUSED,
2609
                          struct ct_dpif_dump_state *dump_)
2610
0
{
2611
0
    struct dpif_netlink_ct_dump_state *dump;
2612
2613
0
    INIT_CONTAINER(dump, dump_, up);
2614
2615
0
    int err = nl_ct_dump_done(dump->nl_ct_dump);
2616
0
    free(dump);
2617
0
    return err;
2618
0
}
2619
2620
static int
2621
dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone,
2622
                      const struct ct_dpif_tuple *tuple)
2623
0
{
2624
0
    if (tuple) {
2625
0
        return nl_ct_flush_tuple(tuple, zone ? *zone : 0);
2626
0
    } else if (zone) {
2627
0
        return nl_ct_flush_zone(*zone);
2628
0
    } else {
2629
0
        return nl_ct_flush();
2630
0
    }
2631
0
}
2632
2633
static int
2634
dpif_netlink_ct_set_limits(struct dpif *dpif OVS_UNUSED,
2635
                           const struct ovs_list *zone_limits)
2636
0
{
2637
0
    if (ovs_ct_limit_family < 0) {
2638
0
        return EOPNOTSUPP;
2639
0
    }
2640
2641
0
    struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
2642
0
    nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
2643
0
                          NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_SET,
2644
0
                          OVS_CT_LIMIT_VERSION);
2645
2646
0
    struct ovs_header *ovs_header;
2647
0
    ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
2648
0
    ovs_header->dp_ifindex = 0;
2649
2650
0
    size_t opt_offset;
2651
0
    opt_offset = nl_msg_start_nested(request, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
2652
2653
0
    if (!ovs_list_is_empty(zone_limits)) {
2654
0
        struct ct_dpif_zone_limit *zone_limit;
2655
2656
0
        LIST_FOR_EACH (zone_limit, node, zone_limits) {
2657
0
            struct ovs_zone_limit req_zone_limit = {
2658
0
                .zone_id = zone_limit->zone,
2659
0
                .limit   = zone_limit->limit,
2660
0
            };
2661
0
            nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2662
0
        }
2663
0
    }
2664
0
    nl_msg_end_nested(request, opt_offset);
2665
2666
0
    int err = nl_transact(NETLINK_GENERIC, request, NULL);
2667
0
    ofpbuf_delete(request);
2668
0
    return err;
2669
0
}
2670
2671
static int
2672
dpif_netlink_zone_limits_from_ofpbuf(const struct ofpbuf *buf,
2673
                                     struct ovs_list *zone_limits)
2674
0
{
2675
0
    static const struct nl_policy ovs_ct_limit_policy[] = {
2676
0
        [OVS_CT_LIMIT_ATTR_ZONE_LIMIT] = { .type = NL_A_NESTED,
2677
0
                                           .optional = true },
2678
0
    };
2679
2680
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2681
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2682
0
    struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2683
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2684
2685
0
    struct nlattr *attr[ARRAY_SIZE(ovs_ct_limit_policy)];
2686
2687
0
    if (!nlmsg || !genl || !ovs_header
2688
0
        || nlmsg->nlmsg_type != ovs_ct_limit_family
2689
0
        || !nl_policy_parse(&b, 0, ovs_ct_limit_policy, attr,
2690
0
                            ARRAY_SIZE(ovs_ct_limit_policy))) {
2691
0
        return EINVAL;
2692
0
    }
2693
2694
2695
0
    if (!attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
2696
0
        return EINVAL;
2697
0
    }
2698
2699
0
    int rem = NLA_ALIGN(
2700
0
                nl_attr_get_size(attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]));
2701
0
    const struct ovs_zone_limit *zone_limit =
2702
0
                nl_attr_get(attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]);
2703
2704
0
    while (rem >= sizeof *zone_limit) {
2705
0
        if (zone_limit->zone_id >= OVS_ZONE_LIMIT_DEFAULT_ZONE &&
2706
0
            zone_limit->zone_id <= UINT16_MAX) {
2707
0
            ct_dpif_push_zone_limit(zone_limits, zone_limit->zone_id,
2708
0
                                    zone_limit->limit, zone_limit->count);
2709
0
        }
2710
0
        rem -= NLA_ALIGN(sizeof *zone_limit);
2711
0
        zone_limit = ALIGNED_CAST(struct ovs_zone_limit *,
2712
0
            (unsigned char *) zone_limit  + NLA_ALIGN(sizeof *zone_limit));
2713
0
    }
2714
0
    return 0;
2715
0
}
2716
2717
static int
2718
dpif_netlink_ct_get_limits(struct dpif *dpif OVS_UNUSED,
2719
                           const struct ovs_list *zone_limits_request,
2720
                           struct ovs_list *zone_limits_reply)
2721
0
{
2722
0
    if (ovs_ct_limit_family < 0) {
2723
0
        return EOPNOTSUPP;
2724
0
    }
2725
2726
0
    struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
2727
0
    nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
2728
0
            NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_GET,
2729
0
            OVS_CT_LIMIT_VERSION);
2730
2731
0
    struct ovs_header *ovs_header;
2732
0
    ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
2733
0
    ovs_header->dp_ifindex = 0;
2734
2735
0
    if (!ovs_list_is_empty(zone_limits_request)) {
2736
0
        size_t opt_offset = nl_msg_start_nested(request,
2737
0
                                                OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
2738
2739
0
        struct ct_dpif_zone_limit *zone_limit;
2740
0
        LIST_FOR_EACH (zone_limit, node, zone_limits_request) {
2741
0
            struct ovs_zone_limit req_zone_limit = {
2742
0
                .zone_id = zone_limit->zone,
2743
0
            };
2744
0
            nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2745
0
        }
2746
2747
0
        nl_msg_end_nested(request, opt_offset);
2748
0
    }
2749
2750
0
    struct ofpbuf *reply;
2751
0
    int err = nl_transact(NETLINK_GENERIC, request, &reply);
2752
0
    if (err) {
2753
0
        goto out;
2754
0
    }
2755
2756
0
    err = dpif_netlink_zone_limits_from_ofpbuf(reply, zone_limits_reply);
2757
2758
0
out:
2759
0
    ofpbuf_delete(request);
2760
0
    ofpbuf_delete(reply);
2761
0
    return err;
2762
0
}
2763
2764
static int
2765
dpif_netlink_ct_del_limits(struct dpif *dpif OVS_UNUSED,
2766
                           const struct ovs_list *zone_limits)
2767
0
{
2768
0
    if (ovs_ct_limit_family < 0) {
2769
0
        return EOPNOTSUPP;
2770
0
    }
2771
2772
0
    struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
2773
0
    nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
2774
0
            NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_DEL,
2775
0
            OVS_CT_LIMIT_VERSION);
2776
2777
0
    struct ovs_header *ovs_header;
2778
0
    ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
2779
0
    ovs_header->dp_ifindex = 0;
2780
2781
0
    if (!ovs_list_is_empty(zone_limits)) {
2782
0
        size_t opt_offset =
2783
0
            nl_msg_start_nested(request, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
2784
2785
0
        struct ct_dpif_zone_limit *zone_limit;
2786
0
        LIST_FOR_EACH (zone_limit, node, zone_limits) {
2787
0
            struct ovs_zone_limit req_zone_limit = {
2788
0
                .zone_id = zone_limit->zone,
2789
0
            };
2790
0
            nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2791
0
        }
2792
0
        nl_msg_end_nested(request, opt_offset);
2793
0
    }
2794
2795
0
    int err = nl_transact(NETLINK_GENERIC, request, NULL);
2796
2797
0
    ofpbuf_delete(request);
2798
0
    return err;
2799
0
}
2800
2801
0
#define NL_TP_NAME_PREFIX "ovs_tp_"
2802
2803
struct dpif_netlink_timeout_policy_protocol {
2804
    uint16_t    l3num;
2805
    uint8_t     l4num;
2806
};
2807
2808
enum OVS_PACKED_ENUM dpif_netlink_support_timeout_policy_protocol {
2809
    DPIF_NL_TP_AF_INET_TCP,
2810
    DPIF_NL_TP_AF_INET_UDP,
2811
    DPIF_NL_TP_AF_INET_ICMP,
2812
    DPIF_NL_TP_AF_INET6_TCP,
2813
    DPIF_NL_TP_AF_INET6_UDP,
2814
    DPIF_NL_TP_AF_INET6_ICMPV6,
2815
    DPIF_NL_TP_MAX
2816
};
2817
2818
0
#define DPIF_NL_ALL_TP ((1UL << DPIF_NL_TP_MAX) - 1)
2819
2820
2821
static struct dpif_netlink_timeout_policy_protocol tp_protos[] = {
2822
    [DPIF_NL_TP_AF_INET_TCP] = { .l3num = AF_INET, .l4num = IPPROTO_TCP },
2823
    [DPIF_NL_TP_AF_INET_UDP] = { .l3num = AF_INET, .l4num = IPPROTO_UDP },
2824
    [DPIF_NL_TP_AF_INET_ICMP] = { .l3num = AF_INET, .l4num = IPPROTO_ICMP },
2825
    [DPIF_NL_TP_AF_INET6_TCP] = { .l3num = AF_INET6, .l4num = IPPROTO_TCP },
2826
    [DPIF_NL_TP_AF_INET6_UDP] = { .l3num = AF_INET6, .l4num = IPPROTO_UDP },
2827
    [DPIF_NL_TP_AF_INET6_ICMPV6] = { .l3num = AF_INET6,
2828
                                     .l4num = IPPROTO_ICMPV6 },
2829
};
2830
2831
static void
2832
dpif_netlink_format_tp_name(uint32_t id, uint16_t l3num, uint8_t l4num,
2833
                            char **tp_name)
2834
0
{
2835
0
    struct ds ds = DS_EMPTY_INITIALIZER;
2836
0
    ds_put_format(&ds, "%s%"PRIu32"_", NL_TP_NAME_PREFIX, id);
2837
0
    ct_dpif_format_ipproto(&ds, l4num);
2838
2839
0
    if (l3num == AF_INET) {
2840
0
        ds_put_cstr(&ds, "4");
2841
0
    } else if (l3num == AF_INET6 && l4num != IPPROTO_ICMPV6) {
2842
0
        ds_put_cstr(&ds, "6");
2843
0
    }
2844
2845
0
    ovs_assert(ds.length < CTNL_TIMEOUT_NAME_MAX);
2846
2847
0
    *tp_name = ds_steal_cstr(&ds);
2848
0
}
2849
2850
static int
2851
dpif_netlink_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
2852
                                        uint32_t tp_id, uint16_t dl_type,
2853
                                        uint8_t nw_proto, char **tp_name,
2854
                                        bool *is_generic)
2855
0
{
2856
0
    dpif_netlink_format_tp_name(tp_id,
2857
0
                                dl_type == ETH_TYPE_IP ? AF_INET : AF_INET6,
2858
0
                                nw_proto, tp_name);
2859
0
    *is_generic = false;
2860
0
    return 0;
2861
0
}
2862
2863
static int
2864
dpif_netlink_ct_get_features(struct dpif *dpif OVS_UNUSED,
2865
                             enum ct_features *features)
2866
0
{
2867
0
    if (features != NULL) {
2868
0
        *features = CONNTRACK_F_ZERO_SNAT;
2869
0
    }
2870
0
    return 0;
2871
0
}
2872
2873
#define CT_DPIF_NL_TP_TCP_MAPPINGS                              \
2874
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT)         \
2875
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV)         \
2876
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, ESTABLISHED, ESTABLISHED)   \
2877
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, FIN_WAIT, FIN_WAIT)         \
2878
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, CLOSE_WAIT, CLOSE_WAIT)     \
2879
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, LAST_ACK, LAST_ACK)         \
2880
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, TIME_WAIT, TIME_WAIT)       \
2881
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, CLOSE, CLOSE)               \
2882
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT2, SYN_SENT2)       \
2883
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, RETRANSMIT, RETRANS)        \
2884
0
    CT_DPIF_NL_TP_MAPPING(TCP, TCP, UNACK, UNACK)
2885
2886
#define CT_DPIF_NL_TP_UDP_MAPPINGS                              \
2887
0
    CT_DPIF_NL_TP_MAPPING(UDP, UDP, SINGLE, UNREPLIED)          \
2888
0
    CT_DPIF_NL_TP_MAPPING(UDP, UDP, MULTIPLE, REPLIED)
2889
2890
#define CT_DPIF_NL_TP_ICMP_MAPPINGS                             \
2891
0
    CT_DPIF_NL_TP_MAPPING(ICMP, ICMP, FIRST, TIMEOUT)
2892
2893
#define CT_DPIF_NL_TP_ICMPV6_MAPPINGS                           \
2894
0
    CT_DPIF_NL_TP_MAPPING(ICMP, ICMPV6, FIRST, TIMEOUT)
2895
2896
2897
0
#define CT_DPIF_NL_TP_MAPPING(PROTO1, PROTO2, ATTR1, ATTR2)     \
2898
0
if (tp->present & (1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1)) {  \
2899
0
    nl_tp->present |= 1 << CTA_TIMEOUT_##PROTO2##_##ATTR2;      \
2900
0
    nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2] =              \
2901
0
        tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1];          \
2902
0
}
2903
2904
static void
2905
dpif_netlink_get_nl_tp_tcp_attrs(const struct ct_dpif_timeout_policy *tp,
2906
                                 struct nl_ct_timeout_policy *nl_tp)
2907
0
{
2908
0
    CT_DPIF_NL_TP_TCP_MAPPINGS
2909
0
}
2910
2911
static void
2912
dpif_netlink_get_nl_tp_udp_attrs(const struct ct_dpif_timeout_policy *tp,
2913
                                 struct nl_ct_timeout_policy *nl_tp)
2914
0
{
2915
0
    CT_DPIF_NL_TP_UDP_MAPPINGS
2916
0
}
2917
2918
static void
2919
dpif_netlink_get_nl_tp_icmp_attrs(const struct ct_dpif_timeout_policy *tp,
2920
                                  struct nl_ct_timeout_policy *nl_tp)
2921
0
{
2922
0
    CT_DPIF_NL_TP_ICMP_MAPPINGS
2923
0
}
2924
2925
static void
2926
dpif_netlink_get_nl_tp_icmpv6_attrs(const struct ct_dpif_timeout_policy *tp,
2927
                                    struct nl_ct_timeout_policy *nl_tp)
2928
0
{
2929
0
    CT_DPIF_NL_TP_ICMPV6_MAPPINGS
2930
0
}
2931
2932
#undef CT_DPIF_NL_TP_MAPPING
2933
2934
static void
2935
dpif_netlink_get_nl_tp_attrs(const struct ct_dpif_timeout_policy *tp,
2936
                             uint8_t l4num, struct nl_ct_timeout_policy *nl_tp)
2937
0
{
2938
0
    nl_tp->present = 0;
2939
2940
0
    if (l4num == IPPROTO_TCP) {
2941
0
        dpif_netlink_get_nl_tp_tcp_attrs(tp, nl_tp);
2942
0
    } else if (l4num == IPPROTO_UDP) {
2943
0
        dpif_netlink_get_nl_tp_udp_attrs(tp, nl_tp);
2944
0
    } else if (l4num == IPPROTO_ICMP) {
2945
0
        dpif_netlink_get_nl_tp_icmp_attrs(tp, nl_tp);
2946
0
    } else if (l4num == IPPROTO_ICMPV6) {
2947
0
        dpif_netlink_get_nl_tp_icmpv6_attrs(tp, nl_tp);
2948
0
    }
2949
0
}
2950
2951
0
#define CT_DPIF_NL_TP_MAPPING(PROTO1, PROTO2, ATTR1, ATTR2)                 \
2952
0
if (nl_tp->present & (1 << CTA_TIMEOUT_##PROTO2##_##ATTR2)) {               \
2953
0
    if (tp->present & (1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1)) {          \
2954
0
        if (tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1] !=                \
2955
0
            nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2]) {                 \
2956
0
            VLOG_WARN_RL(&error_rl, "Inconsistent timeout policy %s "       \
2957
0
                         "attribute %s=%"PRIu32" while %s=%"PRIu32,         \
2958
0
                         nl_tp->name, "CTA_TIMEOUT_"#PROTO2"_"#ATTR2,       \
2959
0
                         nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2],      \
2960
0
                         "CT_DPIF_TP_ATTR_"#PROTO1"_"#ATTR1,                \
2961
0
                         tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1]);    \
2962
0
        }                                                                   \
2963
0
    } else {                                                                \
2964
0
        tp->present |= 1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1;             \
2965
0
        tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1] =                     \
2966
0
            nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2];                   \
2967
0
    }                                                                       \
2968
0
}
2969
2970
static void
2971
dpif_netlink_set_ct_dpif_tp_tcp_attrs(const struct nl_ct_timeout_policy *nl_tp,
2972
                                      struct ct_dpif_timeout_policy *tp)
2973
0
{
2974
0
    CT_DPIF_NL_TP_TCP_MAPPINGS
2975
0
}
2976
2977
static void
2978
dpif_netlink_set_ct_dpif_tp_udp_attrs(const struct nl_ct_timeout_policy *nl_tp,
2979
                                      struct ct_dpif_timeout_policy *tp)
2980
0
{
2981
0
    CT_DPIF_NL_TP_UDP_MAPPINGS
2982
0
}
2983
2984
static void
2985
dpif_netlink_set_ct_dpif_tp_icmp_attrs(
2986
    const struct nl_ct_timeout_policy *nl_tp,
2987
    struct ct_dpif_timeout_policy *tp)
2988
0
{
2989
0
    CT_DPIF_NL_TP_ICMP_MAPPINGS
2990
0
}
2991
2992
static void
2993
dpif_netlink_set_ct_dpif_tp_icmpv6_attrs(
2994
    const struct nl_ct_timeout_policy *nl_tp,
2995
    struct ct_dpif_timeout_policy *tp)
2996
0
{
2997
0
    CT_DPIF_NL_TP_ICMPV6_MAPPINGS
2998
0
}
2999
3000
#undef CT_DPIF_NL_TP_MAPPING
3001
3002
static void
3003
dpif_netlink_set_ct_dpif_tp_attrs(const struct nl_ct_timeout_policy *nl_tp,
3004
                                  struct ct_dpif_timeout_policy *tp)
3005
0
{
3006
0
    if (nl_tp->l4num == IPPROTO_TCP) {
3007
0
        dpif_netlink_set_ct_dpif_tp_tcp_attrs(nl_tp, tp);
3008
0
    } else if (nl_tp->l4num == IPPROTO_UDP) {
3009
0
        dpif_netlink_set_ct_dpif_tp_udp_attrs(nl_tp, tp);
3010
0
    } else if (nl_tp->l4num == IPPROTO_ICMP) {
3011
0
        dpif_netlink_set_ct_dpif_tp_icmp_attrs(nl_tp, tp);
3012
0
    } else if (nl_tp->l4num == IPPROTO_ICMPV6) {
3013
0
        dpif_netlink_set_ct_dpif_tp_icmpv6_attrs(nl_tp, tp);
3014
0
    }
3015
0
}
3016
3017
static int
3018
dpif_netlink_ct_set_timeout_policy(struct dpif *dpif OVS_UNUSED,
3019
                                   const struct ct_dpif_timeout_policy *tp)
3020
0
{
3021
0
    int err = 0;
3022
3023
0
    for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
3024
0
        struct nl_ct_timeout_policy nl_tp;
3025
0
        char *nl_tp_name;
3026
3027
0
        dpif_netlink_format_tp_name(tp->id, tp_protos[i].l3num,
3028
0
                                    tp_protos[i].l4num, &nl_tp_name);
3029
0
        ovs_strlcpy(nl_tp.name, nl_tp_name, sizeof nl_tp.name);
3030
0
        free(nl_tp_name);
3031
3032
0
        nl_tp.l3num = tp_protos[i].l3num;
3033
0
        nl_tp.l4num = tp_protos[i].l4num;
3034
0
        dpif_netlink_get_nl_tp_attrs(tp, tp_protos[i].l4num, &nl_tp);
3035
0
        err = nl_ct_set_timeout_policy(&nl_tp);
3036
0
        if (err) {
3037
0
            VLOG_WARN_RL(&error_rl, "failed to add timeout policy %s (%s)",
3038
0
                         nl_tp.name, ovs_strerror(err));
3039
0
            goto out;
3040
0
        }
3041
0
    }
3042
3043
0
out:
3044
0
    return err;
3045
0
}
3046
3047
static int
3048
dpif_netlink_ct_get_timeout_policy(struct dpif *dpif OVS_UNUSED,
3049
                                   uint32_t tp_id,
3050
                                   struct ct_dpif_timeout_policy *tp)
3051
0
{
3052
0
    int err = 0;
3053
3054
0
    tp->id = tp_id;
3055
0
    tp->present = 0;
3056
0
    for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
3057
0
        struct nl_ct_timeout_policy nl_tp;
3058
0
        char *nl_tp_name;
3059
3060
0
        dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num,
3061
0
                                    tp_protos[i].l4num, &nl_tp_name);
3062
0
        err = nl_ct_get_timeout_policy(nl_tp_name, &nl_tp);
3063
3064
0
        if (err) {
3065
0
            VLOG_WARN_RL(&error_rl, "failed to get timeout policy %s (%s)",
3066
0
                         nl_tp_name, ovs_strerror(err));
3067
0
            free(nl_tp_name);
3068
0
            goto out;
3069
0
        }
3070
0
        free(nl_tp_name);
3071
0
        dpif_netlink_set_ct_dpif_tp_attrs(&nl_tp, tp);
3072
0
    }
3073
3074
0
out:
3075
0
    return err;
3076
0
}
3077
3078
/* Returns 0 if all the sub timeout policies are deleted or not exist in the
3079
 * kernel.  Returns 1 if any sub timeout policy deletion failed. */
3080
static int
3081
dpif_netlink_ct_del_timeout_policy(struct dpif *dpif OVS_UNUSED,
3082
                                   uint32_t tp_id)
3083
0
{
3084
0
    int ret = 0;
3085
3086
0
    for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
3087
0
        char *nl_tp_name;
3088
0
        dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num,
3089
0
                                    tp_protos[i].l4num, &nl_tp_name);
3090
0
        int err = nl_ct_del_timeout_policy(nl_tp_name);
3091
0
        if (err == ENOENT) {
3092
0
            err = 0;
3093
0
        }
3094
0
        if (err) {
3095
0
            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(6, 6);
3096
0
            VLOG_INFO_RL(&rl, "failed to delete timeout policy %s (%s)",
3097
0
                         nl_tp_name, ovs_strerror(err));
3098
0
            ret = 1;
3099
0
        }
3100
0
        free(nl_tp_name);
3101
0
    }
3102
3103
0
    return ret;
3104
0
}
3105
3106
struct dpif_netlink_ct_timeout_policy_dump_state {
3107
    struct nl_ct_timeout_policy_dump_state *nl_dump_state;
3108
    struct hmap tp_dump_map;
3109
};
3110
3111
struct dpif_netlink_tp_dump_node {
3112
    struct      hmap_node hmap_node;      /* node in tp_dump_map. */
3113
    struct      ct_dpif_timeout_policy *tp;
3114
    uint32_t    l3_l4_present;
3115
};
3116
3117
static struct dpif_netlink_tp_dump_node *
3118
get_dpif_netlink_tp_dump_node_by_tp_id(uint32_t tp_id,
3119
                                       struct hmap *tp_dump_map)
3120
0
{
3121
0
    struct dpif_netlink_tp_dump_node *tp_dump_node;
3122
3123
0
    HMAP_FOR_EACH_WITH_HASH (tp_dump_node, hmap_node, hash_int(tp_id, 0),
3124
0
                             tp_dump_map) {
3125
0
        if (tp_dump_node->tp->id == tp_id) {
3126
0
            return tp_dump_node;
3127
0
        }
3128
0
    }
3129
0
    return NULL;
3130
0
}
3131
3132
static void
3133
update_dpif_netlink_tp_dump_node(
3134
    const struct nl_ct_timeout_policy *nl_tp,
3135
    struct dpif_netlink_tp_dump_node *tp_dump_node)
3136
0
{
3137
0
    dpif_netlink_set_ct_dpif_tp_attrs(nl_tp, tp_dump_node->tp);
3138
0
    for (int i = 0; i < DPIF_NL_TP_MAX; ++i) {
3139
0
        if (nl_tp->l3num == tp_protos[i].l3num &&
3140
0
            nl_tp->l4num == tp_protos[i].l4num) {
3141
0
            tp_dump_node->l3_l4_present |= 1 << i;
3142
0
            break;
3143
0
        }
3144
0
    }
3145
0
}
3146
3147
static int
3148
dpif_netlink_ct_timeout_policy_dump_start(struct dpif *dpif OVS_UNUSED,
3149
                                          void **statep)
3150
0
{
3151
0
    struct dpif_netlink_ct_timeout_policy_dump_state *dump_state;
3152
3153
0
    *statep = dump_state = xzalloc(sizeof *dump_state);
3154
0
    int err = nl_ct_timeout_policy_dump_start(&dump_state->nl_dump_state);
3155
0
    if (err) {
3156
0
        free(dump_state);
3157
0
        return err;
3158
0
    }
3159
0
    hmap_init(&dump_state->tp_dump_map);
3160
0
    return 0;
3161
0
}
3162
3163
static void
3164
get_and_cleanup_tp_dump_node(struct hmap *hmap,
3165
                             struct dpif_netlink_tp_dump_node *tp_dump_node,
3166
                             struct ct_dpif_timeout_policy *tp)
3167
0
{
3168
0
    hmap_remove(hmap, &tp_dump_node->hmap_node);
3169
0
    *tp = *tp_dump_node->tp;
3170
0
    free(tp_dump_node->tp);
3171
0
    free(tp_dump_node);
3172
0
}
3173
3174
static int
3175
dpif_netlink_ct_timeout_policy_dump_next(struct dpif *dpif OVS_UNUSED,
3176
                                         void *state,
3177
                                         struct ct_dpif_timeout_policy *tp)
3178
0
{
3179
0
    struct dpif_netlink_ct_timeout_policy_dump_state *dump_state = state;
3180
0
    struct dpif_netlink_tp_dump_node *tp_dump_node;
3181
0
    int err;
3182
3183
    /* Dumps all the timeout policies in the kernel. */
3184
0
    do {
3185
0
        struct nl_ct_timeout_policy nl_tp;
3186
0
        uint32_t tp_id;
3187
3188
0
        err =  nl_ct_timeout_policy_dump_next(dump_state->nl_dump_state,
3189
0
                                              &nl_tp);
3190
0
        if (err) {
3191
0
            break;
3192
0
        }
3193
3194
        /* We only interest in OVS installed timeout policies. */
3195
0
        if (!ovs_scan(nl_tp.name, NL_TP_NAME_PREFIX"%"PRIu32, &tp_id)) {
3196
0
            continue;
3197
0
        }
3198
3199
0
        tp_dump_node = get_dpif_netlink_tp_dump_node_by_tp_id(
3200
0
                            tp_id, &dump_state->tp_dump_map);
3201
0
        if (!tp_dump_node) {
3202
0
            tp_dump_node = xzalloc(sizeof *tp_dump_node);
3203
0
            tp_dump_node->tp = xzalloc(sizeof *tp_dump_node->tp);
3204
0
            tp_dump_node->tp->id = tp_id;
3205
0
            hmap_insert(&dump_state->tp_dump_map, &tp_dump_node->hmap_node,
3206
0
                        hash_int(tp_id, 0));
3207
0
        }
3208
3209
0
        update_dpif_netlink_tp_dump_node(&nl_tp, tp_dump_node);
3210
3211
        /* Returns one ct_dpif_timeout_policy if we gather all the L3/L4
3212
         * sub-pieces. */
3213
0
        if (tp_dump_node->l3_l4_present == DPIF_NL_ALL_TP) {
3214
0
            get_and_cleanup_tp_dump_node(&dump_state->tp_dump_map,
3215
0
                                         tp_dump_node, tp);
3216
0
            break;
3217
0
        }
3218
0
    } while (true);
3219
3220
    /* Dump the incomplete timeout policies. */
3221
0
    if (err == EOF) {
3222
0
        if (!hmap_is_empty(&dump_state->tp_dump_map)) {
3223
0
            struct hmap_node *hmap_node = hmap_first(&dump_state->tp_dump_map);
3224
0
            tp_dump_node = CONTAINER_OF(hmap_node,
3225
0
                                        struct dpif_netlink_tp_dump_node,
3226
0
                                        hmap_node);
3227
0
            get_and_cleanup_tp_dump_node(&dump_state->tp_dump_map,
3228
0
                                         tp_dump_node, tp);
3229
0
            return 0;
3230
0
        }
3231
0
    }
3232
3233
0
    return err;
3234
0
}
3235
3236
static int
3237
dpif_netlink_ct_timeout_policy_dump_done(struct dpif *dpif OVS_UNUSED,
3238
                                         void *state)
3239
0
{
3240
0
    struct dpif_netlink_ct_timeout_policy_dump_state *dump_state = state;
3241
0
    struct dpif_netlink_tp_dump_node *tp_dump_node;
3242
3243
0
    int err = nl_ct_timeout_policy_dump_done(dump_state->nl_dump_state);
3244
0
    HMAP_FOR_EACH_POP (tp_dump_node, hmap_node, &dump_state->tp_dump_map) {
3245
0
        free(tp_dump_node->tp);
3246
0
        free(tp_dump_node);
3247
0
    }
3248
0
    hmap_destroy(&dump_state->tp_dump_map);
3249
0
    free(dump_state);
3250
0
    return err;
3251
0
}
3252
3253

3254
/* Meters */
3255
3256
/* Set of supported meter flags */
3257
#define DP_SUPPORTED_METER_FLAGS_MASK \
3258
0
    (OFPMF13_STATS | OFPMF13_PKTPS | OFPMF13_KBPS | OFPMF13_BURST)
3259
3260
/* Meter support was introduced in Linux 4.15.  In some versions of
3261
 * Linux 4.15, 4.16, and 4.17, there was a bug that never set the id
3262
 * when the meter was created, so all meters essentially had an id of
3263
 * zero.  Check for that condition and disable meters on those kernels. */
3264
static bool probe_broken_meters(struct dpif *);
3265
3266
static void
3267
dpif_netlink_meter_init(struct dpif_netlink *dpif, struct ofpbuf *buf,
3268
                        void *stub, size_t size, uint32_t command)
3269
0
{
3270
0
    ofpbuf_use_stub(buf, stub, size);
3271
3272
0
    nl_msg_put_genlmsghdr(buf, 0, ovs_meter_family, NLM_F_REQUEST | NLM_F_ECHO,
3273
0
                          command, OVS_METER_VERSION);
3274
3275
0
    struct ovs_header *ovs_header;
3276
0
    ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3277
0
    ovs_header->dp_ifindex = dpif->dp_ifindex;
3278
0
}
3279
3280
/* Execute meter 'request' in the kernel datapath.  If the command
3281
 * fails, returns a positive errno value.  Otherwise, stores the reply
3282
 * in '*replyp', parses the policy according to 'reply_policy' into the
3283
 * array of Netlink attribute in 'a', and returns 0.  On success, the
3284
 * caller is responsible for calling ofpbuf_delete() on '*replyp'
3285
 * ('replyp' will contain pointers into 'a'). */
3286
static int
3287
dpif_netlink_meter_transact(struct ofpbuf *request, struct ofpbuf **replyp,
3288
                            const struct nl_policy *reply_policy,
3289
                            struct nlattr **a, size_t size_a)
3290
0
{
3291
0
    int error = nl_transact(NETLINK_GENERIC, request, replyp);
3292
0
    ofpbuf_uninit(request);
3293
3294
0
    if (error) {
3295
0
        return error;
3296
0
    }
3297
3298
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(*replyp, sizeof *nlmsg);
3299
0
    struct genlmsghdr *genl = ofpbuf_try_pull(*replyp, sizeof *genl);
3300
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(*replyp,
3301
0
                                                    sizeof *ovs_header);
3302
0
    if (!nlmsg || !genl || !ovs_header
3303
0
        || nlmsg->nlmsg_type != ovs_meter_family
3304
0
        || !nl_policy_parse(*replyp, 0, reply_policy, a, size_a)) {
3305
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3306
0
        VLOG_DBG_RL(&rl,
3307
0
                    "Kernel module response to meter tranaction is invalid");
3308
0
        ofpbuf_delete(*replyp);
3309
0
        return EINVAL;
3310
0
    }
3311
0
    return 0;
3312
0
}
3313
3314
static void
3315
dpif_netlink_meter_get_features(const struct dpif *dpif_,
3316
                                struct ofputil_meter_features *features)
3317
0
{
3318
0
    if (probe_broken_meters(CONST_CAST(struct dpif *, dpif_))) {
3319
0
        return;
3320
0
    }
3321
3322
0
    struct ofpbuf buf, *msg;
3323
0
    uint64_t stub[1024 / 8];
3324
3325
0
    static const struct nl_policy ovs_meter_features_policy[] = {
3326
0
        [OVS_METER_ATTR_MAX_METERS] = { .type = NL_A_U32 },
3327
0
        [OVS_METER_ATTR_MAX_BANDS] = { .type = NL_A_U32 },
3328
0
        [OVS_METER_ATTR_BANDS] = { .type = NL_A_NESTED, .optional = true },
3329
0
    };
3330
0
    struct nlattr *a[ARRAY_SIZE(ovs_meter_features_policy)];
3331
3332
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3333
0
    dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub,
3334
0
                            OVS_METER_CMD_FEATURES);
3335
0
    if (dpif_netlink_meter_transact(&buf, &msg, ovs_meter_features_policy, a,
3336
0
                                    ARRAY_SIZE(ovs_meter_features_policy))) {
3337
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3338
0
        VLOG_INFO_RL(&rl,
3339
0
                  "dpif_netlink_meter_transact OVS_METER_CMD_FEATURES failed");
3340
0
        return;
3341
0
    }
3342
3343
0
    features->max_meters = nl_attr_get_u32(a[OVS_METER_ATTR_MAX_METERS]);
3344
0
    features->max_bands = nl_attr_get_u32(a[OVS_METER_ATTR_MAX_BANDS]);
3345
3346
    /* Bands is a nested attribute of zero or more nested
3347
     * band attributes.  */
3348
0
    if (a[OVS_METER_ATTR_BANDS]) {
3349
0
        const struct nlattr *nla;
3350
0
        size_t left;
3351
3352
0
        NL_NESTED_FOR_EACH (nla, left, a[OVS_METER_ATTR_BANDS]) {
3353
0
            const struct nlattr *band_nla;
3354
0
            size_t band_left;
3355
3356
0
            NL_NESTED_FOR_EACH (band_nla, band_left, nla) {
3357
0
                if (nl_attr_type(band_nla) == OVS_BAND_ATTR_TYPE) {
3358
0
                    if (nl_attr_get_size(band_nla) == sizeof(uint32_t)) {
3359
0
                        switch (nl_attr_get_u32(band_nla)) {
3360
0
                        case OVS_METER_BAND_TYPE_DROP:
3361
0
                            features->band_types |= 1 << OFPMBT13_DROP;
3362
0
                            break;
3363
0
                        }
3364
0
                    }
3365
0
                }
3366
0
            }
3367
0
        }
3368
0
    }
3369
0
    features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
3370
3371
0
    ofpbuf_delete(msg);
3372
0
}
3373
3374
static int
3375
dpif_netlink_meter_set__(struct dpif *dpif_, ofproto_meter_id meter_id,
3376
                         struct ofputil_meter_config *config)
3377
0
{
3378
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3379
0
    struct ofpbuf buf, *msg;
3380
0
    uint64_t stub[1024 / 8];
3381
3382
0
    static const struct nl_policy ovs_meter_set_response_policy[] = {
3383
0
        [OVS_METER_ATTR_ID] = { .type = NL_A_U32 },
3384
0
    };
3385
0
    struct nlattr *a[ARRAY_SIZE(ovs_meter_set_response_policy)];
3386
3387
0
    if (config->flags & ~DP_SUPPORTED_METER_FLAGS_MASK) {
3388
0
        return EBADF; /* Unsupported flags set */
3389
0
    }
3390
3391
0
    for (size_t i = 0; i < config->n_bands; i++) {
3392
0
        switch (config->bands[i].type) {
3393
0
        case OFPMBT13_DROP:
3394
0
            break;
3395
0
        default:
3396
0
            return ENODEV; /* Unsupported band type */
3397
0
        }
3398
0
    }
3399
3400
0
    dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub, OVS_METER_CMD_SET);
3401
3402
0
    nl_msg_put_u32(&buf, OVS_METER_ATTR_ID, meter_id.uint32);
3403
3404
0
    if (config->flags & OFPMF13_KBPS) {
3405
0
        nl_msg_put_flag(&buf, OVS_METER_ATTR_KBPS);
3406
0
    }
3407
3408
0
    size_t bands_offset = nl_msg_start_nested(&buf, OVS_METER_ATTR_BANDS);
3409
    /* Bands */
3410
0
    for (size_t i = 0; i < config->n_bands; ++i) {
3411
0
        struct ofputil_meter_band * band = &config->bands[i];
3412
0
        uint32_t band_type;
3413
3414
0
        size_t band_offset = nl_msg_start_nested(&buf, OVS_BAND_ATTR_UNSPEC);
3415
3416
0
        switch (band->type) {
3417
0
        case OFPMBT13_DROP:
3418
0
            band_type = OVS_METER_BAND_TYPE_DROP;
3419
0
            break;
3420
0
        default:
3421
0
            band_type = OVS_METER_BAND_TYPE_UNSPEC;
3422
0
        }
3423
0
        nl_msg_put_u32(&buf, OVS_BAND_ATTR_TYPE, band_type);
3424
0
        nl_msg_put_u32(&buf, OVS_BAND_ATTR_RATE, band->rate);
3425
0
        nl_msg_put_u32(&buf, OVS_BAND_ATTR_BURST,
3426
0
                       config->flags & OFPMF13_BURST ?
3427
0
                       band->burst_size : band->rate);
3428
0
        nl_msg_end_nested(&buf, band_offset);
3429
0
    }
3430
0
    nl_msg_end_nested(&buf, bands_offset);
3431
3432
0
    int error = dpif_netlink_meter_transact(&buf, &msg,
3433
0
                                    ovs_meter_set_response_policy, a,
3434
0
                                    ARRAY_SIZE(ovs_meter_set_response_policy));
3435
0
    if (error) {
3436
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3437
0
        VLOG_INFO_RL(&rl,
3438
0
                     "dpif_netlink_meter_transact OVS_METER_CMD_SET failed");
3439
0
        return error;
3440
0
    }
3441
3442
0
    if (nl_attr_get_u32(a[OVS_METER_ATTR_ID]) != meter_id.uint32) {
3443
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3444
0
        VLOG_INFO_RL(&rl,
3445
0
                     "Kernel returned a different meter id than requested");
3446
0
    }
3447
0
    ofpbuf_delete(msg);
3448
0
    return 0;
3449
0
}
3450
3451
static int
3452
dpif_netlink_meter_set(struct dpif *dpif_, ofproto_meter_id meter_id,
3453
                       struct ofputil_meter_config *config)
3454
0
{
3455
0
    if (probe_broken_meters(dpif_)) {
3456
0
        return ENOMEM;
3457
0
    }
3458
3459
0
    return dpif_netlink_meter_set__(dpif_, meter_id, config);
3460
0
}
3461
3462
/* Retrieve statistics and/or delete meter 'meter_id'.  Statistics are
3463
 * stored in 'stats', if it is not null.  If 'command' is
3464
 * OVS_METER_CMD_DEL, the meter is deleted and statistics are optionally
3465
 * retrieved.  If 'command' is OVS_METER_CMD_GET, then statistics are
3466
 * simply retrieved. */
3467
static int
3468
dpif_netlink_meter_get_stats(const struct dpif *dpif_,
3469
                             ofproto_meter_id meter_id,
3470
                             struct ofputil_meter_stats *stats,
3471
                             uint16_t max_bands,
3472
                             enum ovs_meter_cmd command)
3473
0
{
3474
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3475
0
    struct ofpbuf buf, *msg;
3476
0
    uint64_t stub[1024 / 8];
3477
3478
0
    static const struct nl_policy ovs_meter_stats_policy[] = {
3479
0
        [OVS_METER_ATTR_ID] = { .type = NL_A_U32, .optional = true},
3480
0
        [OVS_METER_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
3481
0
                                   .optional = true},
3482
0
        [OVS_METER_ATTR_BANDS] = { .type = NL_A_NESTED, .optional = true },
3483
0
    };
3484
0
    struct nlattr *a[ARRAY_SIZE(ovs_meter_stats_policy)];
3485
3486
0
    dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub, command);
3487
3488
0
    nl_msg_put_u32(&buf, OVS_METER_ATTR_ID, meter_id.uint32);
3489
3490
0
    int error = dpif_netlink_meter_transact(&buf, &msg,
3491
0
                                            ovs_meter_stats_policy, a,
3492
0
                                            ARRAY_SIZE(ovs_meter_stats_policy));
3493
0
    if (error) {
3494
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3495
0
        VLOG_RL(&rl, error == ENOENT ? VLL_DBG : VLL_WARN,
3496
0
                "dpif_netlink_meter_transact %s failed: %s",
3497
0
                command == OVS_METER_CMD_GET ? "get" : "del",
3498
0
                ovs_strerror(error));
3499
0
        return error;
3500
0
    }
3501
3502
0
    if (a[OVS_METER_ATTR_ID]
3503
0
        && nl_attr_get_u32(a[OVS_METER_ATTR_ID]) != meter_id.uint32) {
3504
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3505
0
        VLOG_INFO_RL(&rl,
3506
0
                     "Kernel returned a different meter id than requested");
3507
0
        ofpbuf_delete(msg);
3508
0
        return EINVAL;
3509
0
    }
3510
3511
0
    if (stats && a[OVS_METER_ATTR_STATS]) {
3512
        /* return stats */
3513
0
        const struct ovs_flow_stats *stat;
3514
0
        const struct nlattr *nla;
3515
0
        size_t left;
3516
3517
0
        stat = nl_attr_get(a[OVS_METER_ATTR_STATS]);
3518
0
        stats->packet_in_count = get_32aligned_u64(&stat->n_packets);
3519
0
        stats->byte_in_count = get_32aligned_u64(&stat->n_bytes);
3520
3521
0
        if (a[OVS_METER_ATTR_BANDS]) {
3522
0
            size_t n_bands = 0;
3523
0
            NL_NESTED_FOR_EACH (nla, left, a[OVS_METER_ATTR_BANDS]) {
3524
0
                const struct nlattr *band_nla;
3525
0
                band_nla = nl_attr_find_nested(nla, OVS_BAND_ATTR_STATS);
3526
0
                if (band_nla && nl_attr_get_size(band_nla) \
3527
0
                                == sizeof(struct ovs_flow_stats)) {
3528
0
                    stat = nl_attr_get(band_nla);
3529
3530
0
                    if (n_bands < max_bands) {
3531
0
                        stats->bands[n_bands].packet_count
3532
0
                            = get_32aligned_u64(&stat->n_packets);
3533
0
                        stats->bands[n_bands].byte_count
3534
0
                            = get_32aligned_u64(&stat->n_bytes);
3535
0
                        ++n_bands;
3536
0
                    }
3537
0
                } else {
3538
0
                    stats->bands[n_bands].packet_count = 0;
3539
0
                    stats->bands[n_bands].byte_count = 0;
3540
0
                    ++n_bands;
3541
0
                }
3542
0
            }
3543
0
            stats->n_bands = n_bands;
3544
0
        } else {
3545
            /* For a non-existent meter, return 0 stats. */
3546
0
            stats->n_bands = 0;
3547
0
        }
3548
0
    }
3549
3550
0
    ofpbuf_delete(msg);
3551
0
    return error;
3552
0
}
3553
3554
static int
3555
dpif_netlink_meter_get(const struct dpif *dpif, ofproto_meter_id meter_id,
3556
                       struct ofputil_meter_stats *stats, uint16_t max_bands)
3557
0
{
3558
0
    return dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
3559
0
                                        OVS_METER_CMD_GET);
3560
0
}
3561
3562
static int
3563
dpif_netlink_meter_del(struct dpif *dpif, ofproto_meter_id meter_id,
3564
                       struct ofputil_meter_stats *stats, uint16_t max_bands)
3565
0
{
3566
0
    return dpif_netlink_meter_get_stats(dpif, meter_id, stats,
3567
0
                                        max_bands, OVS_METER_CMD_DEL);
3568
0
}
3569
3570
static bool
3571
probe_broken_meters__(struct dpif *dpif)
3572
0
{
3573
    /* This test is destructive if a probe occurs while ovs-vswitchd is
3574
     * running (e.g., an ovs-dpctl meter command is called), so choose a
3575
     * high meter id to make this less likely to occur.
3576
     *
3577
     * In Linux kernel v5.10+ meters are stored in a table that is not
3578
     * a real hash table.  It's just an array with 'meter_id % size' used
3579
     * as an index.  The numbers are chosen to fit into the minimal table
3580
     * size (1024) without wrapping, so these IDs are guaranteed to be
3581
     * found under normal conditions in the meter table, if such meters
3582
     * exist.  It's possible to break this check by creating some meters
3583
     * in the kernel manually with different IDs that map onto the same
3584
     * indexes, but that should not be a big problem since ovs-vswitchd
3585
     * always allocates densely packed meter IDs with an id-pool.
3586
     *
3587
     * These IDs will also work in cases where the table in the kernel is
3588
     * a proper hash table. */
3589
0
    ofproto_meter_id id1 = { 1021 };
3590
0
    ofproto_meter_id id2 = { 1022 };
3591
0
    struct ofputil_meter_band band = {OFPMBT13_DROP, 0, 1, 0};
3592
0
    struct ofputil_meter_config config1 = { 1, OFPMF13_KBPS, 1, &band};
3593
0
    struct ofputil_meter_config config2 = { 2, OFPMF13_KBPS, 1, &band};
3594
3595
    /* First check if these meters are already in the kernel.  If we get
3596
     * a proper response from the kernel with all the good meter IDs, then
3597
     * meters are likley supported correctly. */
3598
0
    if (!dpif_netlink_meter_get(dpif, id1, NULL, 0)
3599
0
        || !dpif_netlink_meter_get(dpif, id2, NULL, 0)) {
3600
0
        return false;
3601
0
    }
3602
3603
    /* Try adding two meters and make sure that they both come back with
3604
     * the proper meter id.  Use the "__" version so that we don't cause
3605
     * a recurve deadlock. */
3606
0
    dpif_netlink_meter_set__(dpif, id1, &config1);
3607
0
    dpif_netlink_meter_set__(dpif, id2, &config2);
3608
3609
0
    if (dpif_netlink_meter_get(dpif, id1, NULL, 0)
3610
0
        || dpif_netlink_meter_get(dpif, id2, NULL, 0)) {
3611
0
        VLOG_INFO("The kernel module has a broken meter implementation.");
3612
0
        return true;
3613
0
    }
3614
3615
0
    dpif_netlink_meter_del(dpif, id1, NULL, 0);
3616
0
    dpif_netlink_meter_del(dpif, id2, NULL, 0);
3617
3618
0
    return false;
3619
0
}
3620
3621
static bool
3622
probe_broken_meters(struct dpif *dpif)
3623
0
{
3624
    /* This is a once-only test because currently OVS only has at most a single
3625
     * Netlink capable datapath on any given platform. */
3626
0
    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
3627
3628
0
    static bool broken_meters = false;
3629
0
    if (ovsthread_once_start(&once)) {
3630
0
        broken_meters = probe_broken_meters__(dpif);
3631
0
        ovsthread_once_done(&once);
3632
0
    }
3633
0
    return broken_meters;
3634
0
}
3635
3636
3637
static int
3638
dpif_netlink_cache_get_supported_levels(struct dpif *dpif_, uint32_t *levels)
3639
0
{
3640
0
    struct dpif_netlink_dp dp;
3641
0
    struct ofpbuf *buf;
3642
0
    int error;
3643
3644
    /* If available, in the kernel we support one level of cache.
3645
     * Unfortunately, there is no way to detect if the older kernel module has
3646
     * the cache feature.  For now, we only report the cache information if the
3647
     * kernel module reports the OVS_DP_ATTR_MASKS_CACHE_SIZE attribute. */
3648
3649
0
    *levels = 0;
3650
0
    error = dpif_netlink_dp_get(dpif_, &dp, &buf);
3651
0
    if (!error) {
3652
3653
0
        if (dp.cache_size != UINT32_MAX) {
3654
0
            *levels = 1;
3655
0
        }
3656
0
        ofpbuf_delete(buf);
3657
0
    }
3658
3659
0
    return error;
3660
0
}
3661
3662
static int
3663
dpif_netlink_cache_get_name(struct dpif *dpif_ OVS_UNUSED, uint32_t level,
3664
                            const char **name)
3665
0
{
3666
0
    if (level != 0) {
3667
0
        return EINVAL;
3668
0
    }
3669
3670
0
    *name = "masks-cache";
3671
0
    return 0;
3672
0
}
3673
3674
static int
3675
dpif_netlink_cache_get_size(struct dpif *dpif_, uint32_t level, uint32_t *size)
3676
0
{
3677
0
    struct dpif_netlink_dp dp;
3678
0
    struct ofpbuf *buf;
3679
0
    int error;
3680
3681
0
    if (level != 0) {
3682
0
        return EINVAL;
3683
0
    }
3684
3685
0
    error = dpif_netlink_dp_get(dpif_, &dp, &buf);
3686
0
    if (!error) {
3687
3688
0
        ofpbuf_delete(buf);
3689
3690
0
        if (dp.cache_size == UINT32_MAX) {
3691
0
            return EOPNOTSUPP;
3692
0
        }
3693
0
        *size = dp.cache_size;
3694
0
    }
3695
0
    return error;
3696
0
}
3697
3698
static int
3699
dpif_netlink_cache_set_size(struct dpif *dpif_, uint32_t level, uint32_t size)
3700
0
{
3701
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3702
0
    struct dpif_netlink_dp request, reply;
3703
0
    struct ofpbuf *bufp;
3704
0
    int error;
3705
3706
0
    size = ROUND_UP_POW2(size);
3707
3708
0
    if (level != 0) {
3709
0
        return EINVAL;
3710
0
    }
3711
3712
0
    dpif_netlink_dp_init(&request);
3713
0
    request.cmd = OVS_DP_CMD_SET;
3714
0
    request.name = dpif_->base_name;
3715
0
    request.dp_ifindex = dpif->dp_ifindex;
3716
0
    request.cache_size = size;
3717
    /* We need to set the dpif user_features, as the kernel module assumes the
3718
     * OVS_DP_ATTR_USER_FEATURES attribute is always present. If not, it will
3719
     * reset all the features. */
3720
0
    request.user_features = dpif->user_features;
3721
3722
0
    error = dpif_netlink_dp_transact(&request, &reply, &bufp);
3723
0
    if (!error) {
3724
0
        ofpbuf_delete(bufp);
3725
0
        if (reply.cache_size != size) {
3726
0
            return EINVAL;
3727
0
        }
3728
0
    }
3729
3730
0
    return error;
3731
0
}
3732
3733

3734
const struct dpif_class dpif_netlink_class = {
3735
    "system",
3736
    false,                      /* cleanup_required */
3737
    NULL,                       /* init */
3738
    dpif_netlink_enumerate,
3739
    NULL,
3740
    dpif_netlink_open,
3741
    dpif_netlink_close,
3742
    dpif_netlink_destroy,
3743
    dpif_netlink_run,
3744
    NULL,                       /* wait */
3745
    dpif_netlink_get_stats,
3746
    dpif_netlink_set_features,
3747
    dpif_netlink_get_features,
3748
    dpif_netlink_port_add,
3749
    dpif_netlink_port_del,
3750
    NULL,                       /* port_set_config */
3751
    dpif_netlink_port_query_by_number,
3752
    dpif_netlink_port_query_by_name,
3753
    dpif_netlink_port_get_pid,
3754
    dpif_netlink_port_dump_start,
3755
    dpif_netlink_port_dump_next,
3756
    dpif_netlink_port_dump_done,
3757
    dpif_netlink_port_poll,
3758
    dpif_netlink_port_poll_wait,
3759
    dpif_netlink_flow_flush,
3760
    dpif_netlink_flow_dump_create,
3761
    dpif_netlink_flow_dump_destroy,
3762
    dpif_netlink_flow_dump_thread_create,
3763
    dpif_netlink_flow_dump_thread_destroy,
3764
    dpif_netlink_flow_dump_next,
3765
    dpif_netlink_operate,
3766
    dpif_netlink_recv_set,
3767
    dpif_netlink_handlers_set,
3768
    dpif_netlink_number_handlers_required,
3769
    NULL,                       /* set_config */
3770
    dpif_netlink_queue_to_priority,
3771
    dpif_netlink_recv,
3772
    dpif_netlink_recv_wait,
3773
    dpif_netlink_recv_purge,
3774
    NULL,                       /* register_dp_purge_cb */
3775
    NULL,                       /* register_upcall_cb */
3776
    NULL,                       /* enable_upcall */
3777
    NULL,                       /* disable_upcall */
3778
    dpif_netlink_get_datapath_version, /* get_datapath_version */
3779
    dpif_netlink_ct_dump_start,
3780
    dpif_netlink_ct_dump_next,
3781
    dpif_netlink_ct_dump_done,
3782
    NULL,                       /* ct_exp_dump_start */
3783
    NULL,                       /* ct_exp_dump_next */
3784
    NULL,                       /* ct_exp_dump_done */
3785
    dpif_netlink_ct_flush,
3786
    NULL,                       /* ct_set_maxconns */
3787
    NULL,                       /* ct_get_maxconns */
3788
    NULL,                       /* ct_get_nconns */
3789
    NULL,                       /* ct_set_tcp_seq_chk */
3790
    NULL,                       /* ct_get_tcp_seq_chk */
3791
    NULL,                       /* ct_set_sweep_interval */
3792
    NULL,                       /* ct_get_sweep_interval */
3793
    dpif_netlink_ct_set_limits,
3794
    dpif_netlink_ct_get_limits,
3795
    dpif_netlink_ct_del_limits,
3796
    dpif_netlink_ct_set_timeout_policy,
3797
    dpif_netlink_ct_get_timeout_policy,
3798
    dpif_netlink_ct_del_timeout_policy,
3799
    dpif_netlink_ct_timeout_policy_dump_start,
3800
    dpif_netlink_ct_timeout_policy_dump_next,
3801
    dpif_netlink_ct_timeout_policy_dump_done,
3802
    dpif_netlink_ct_get_timeout_policy_name,
3803
    dpif_netlink_ct_get_features,
3804
    NULL,                       /* ipf_set_enabled */
3805
    NULL,                       /* ipf_set_min_frag */
3806
    NULL,                       /* ipf_set_max_nfrags */
3807
    NULL,                       /* ipf_get_status */
3808
    NULL,                       /* ipf_dump_start */
3809
    NULL,                       /* ipf_dump_next */
3810
    NULL,                       /* ipf_dump_done */
3811
    dpif_netlink_meter_get_features,
3812
    dpif_netlink_meter_set,
3813
    dpif_netlink_meter_get,
3814
    dpif_netlink_meter_del,
3815
    NULL,                       /* bond_add */
3816
    NULL,                       /* bond_del */
3817
    NULL,                       /* bond_stats_get */
3818
    dpif_netlink_cache_get_supported_levels,
3819
    dpif_netlink_cache_get_name,
3820
    dpif_netlink_cache_get_size,
3821
    dpif_netlink_cache_set_size,
3822
};
3823
3824
static int
3825
dpif_netlink_init(void)
3826
0
{
3827
0
    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
3828
0
    static int error;
3829
3830
0
    if (ovsthread_once_start(&once)) {
3831
0
        error = nl_lookup_genl_family(OVS_DATAPATH_FAMILY,
3832
0
                                      &ovs_datapath_family);
3833
0
        if (error) {
3834
0
            VLOG_INFO("Generic Netlink family '%s' does not exist. "
3835
0
                      "The Open vSwitch kernel module is probably not loaded.",
3836
0
                      OVS_DATAPATH_FAMILY);
3837
0
        }
3838
0
        if (!error) {
3839
0
            error = nl_lookup_genl_family(OVS_VPORT_FAMILY, &ovs_vport_family);
3840
0
        }
3841
0
        if (!error) {
3842
0
            error = nl_lookup_genl_family(OVS_FLOW_FAMILY, &ovs_flow_family);
3843
0
        }
3844
0
        if (!error) {
3845
0
            error = nl_lookup_genl_family(OVS_PACKET_FAMILY,
3846
0
                                          &ovs_packet_family);
3847
0
        }
3848
0
        if (!error) {
3849
0
            error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, OVS_VPORT_MCGROUP,
3850
0
                                           &ovs_vport_mcgroup);
3851
0
        }
3852
0
        if (!error) {
3853
0
            if (nl_lookup_genl_family(OVS_METER_FAMILY, &ovs_meter_family)) {
3854
0
                VLOG_INFO("The kernel module does not support meters.");
3855
0
            }
3856
0
        }
3857
0
        if (nl_lookup_genl_family(OVS_CT_LIMIT_FAMILY,
3858
0
                                  &ovs_ct_limit_family) < 0) {
3859
0
            VLOG_INFO("Generic Netlink family '%s' does not exist. "
3860
0
                      "Please update the Open vSwitch kernel module to enable "
3861
0
                      "the conntrack limit feature.", OVS_CT_LIMIT_FAMILY);
3862
0
        }
3863
3864
0
        ovs_tunnels_out_of_tree = dpif_netlink_rtnl_probe_oot_tunnels();
3865
3866
0
        unixctl_command_register("dpif-netlink/dispatch-mode", "", 0, 0,
3867
0
                                 dpif_netlink_unixctl_dispatch_mode, NULL);
3868
3869
0
        ovsthread_once_done(&once);
3870
0
    }
3871
3872
0
    return error;
3873
0
}
3874
3875
bool
3876
dpif_netlink_is_internal_device(const char *name)
3877
0
{
3878
0
    struct dpif_netlink_vport reply;
3879
0
    struct ofpbuf *buf;
3880
0
    int error;
3881
3882
0
    error = dpif_netlink_vport_get(name, &reply, &buf);
3883
0
    if (!error) {
3884
0
        ofpbuf_delete(buf);
3885
0
    } else if (error != ENODEV && error != ENOENT) {
3886
0
        VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)",
3887
0
                     name, ovs_strerror(error));
3888
0
    }
3889
3890
0
    return reply.type == OVS_VPORT_TYPE_INTERNAL;
3891
0
}
3892
3893
/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
3894
 * by Netlink attributes, into 'vport'.  Returns 0 if successful, otherwise a
3895
 * positive errno value.
3896
 *
3897
 * 'vport' will contain pointers into 'buf', so the caller should not free
3898
 * 'buf' while 'vport' is still in use. */
3899
static int
3900
dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *vport,
3901
                             const struct ofpbuf *buf)
3902
0
{
3903
0
    static const struct nl_policy ovs_vport_policy[] = {
3904
0
        [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 },
3905
0
        [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32 },
3906
0
        [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
3907
0
        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_UNSPEC },
3908
0
        [OVS_VPORT_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_vport_stats),
3909
0
                                   .optional = true },
3910
0
        [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
3911
0
        [OVS_VPORT_ATTR_NETNSID] = { .type = NL_A_U32, .optional = true },
3912
0
        [OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NL_A_NESTED,
3913
0
                                          .optional = true },
3914
0
    };
3915
3916
0
    dpif_netlink_vport_init(vport);
3917
3918
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3919
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3920
0
    struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3921
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3922
3923
0
    struct nlattr *a[ARRAY_SIZE(ovs_vport_policy)];
3924
0
    if (!nlmsg || !genl || !ovs_header
3925
0
        || nlmsg->nlmsg_type != ovs_vport_family
3926
0
        || !nl_policy_parse(&b, 0, ovs_vport_policy, a,
3927
0
                            ARRAY_SIZE(ovs_vport_policy))) {
3928
0
        return EINVAL;
3929
0
    }
3930
3931
0
    vport->cmd = genl->cmd;
3932
0
    vport->dp_ifindex = ovs_header->dp_ifindex;
3933
0
    vport->port_no = nl_attr_get_odp_port(a[OVS_VPORT_ATTR_PORT_NO]);
3934
0
    vport->type = nl_attr_get_u32(a[OVS_VPORT_ATTR_TYPE]);
3935
0
    vport->name = nl_attr_get_string(a[OVS_VPORT_ATTR_NAME]);
3936
0
    if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
3937
0
        vport->n_upcall_pids = nl_attr_get_size(a[OVS_VPORT_ATTR_UPCALL_PID])
3938
0
                               / (sizeof *vport->upcall_pids);
3939
0
        vport->upcall_pids = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PID]);
3940
3941
0
    }
3942
0
    if (a[OVS_VPORT_ATTR_STATS]) {
3943
0
        vport->stats = nl_attr_get(a[OVS_VPORT_ATTR_STATS]);
3944
0
    }
3945
0
    if (a[OVS_VPORT_ATTR_UPCALL_STATS]) {
3946
0
        const struct nlattr *nla;
3947
0
        size_t left;
3948
3949
0
        NL_NESTED_FOR_EACH (nla, left, a[OVS_VPORT_ATTR_UPCALL_STATS]) {
3950
0
            if (nl_attr_type(nla) == OVS_VPORT_UPCALL_ATTR_SUCCESS) {
3951
0
                vport->upcall_success = nl_attr_get_u64(nla);
3952
0
            } else if (nl_attr_type(nla) == OVS_VPORT_UPCALL_ATTR_FAIL) {
3953
0
                vport->upcall_fail = nl_attr_get_u64(nla);
3954
0
            }
3955
0
        }
3956
0
    } else {
3957
0
        vport->upcall_success = UINT64_MAX;
3958
0
        vport->upcall_fail = UINT64_MAX;
3959
0
    }
3960
0
    if (a[OVS_VPORT_ATTR_OPTIONS]) {
3961
0
        vport->options = nl_attr_get(a[OVS_VPORT_ATTR_OPTIONS]);
3962
0
        vport->options_len = nl_attr_get_size(a[OVS_VPORT_ATTR_OPTIONS]);
3963
0
    }
3964
0
    if (a[OVS_VPORT_ATTR_NETNSID]) {
3965
0
        netnsid_set(&vport->netnsid,
3966
0
                    nl_attr_get_u32(a[OVS_VPORT_ATTR_NETNSID]));
3967
0
    } else {
3968
0
        netnsid_set_local(&vport->netnsid);
3969
0
    }
3970
0
    return 0;
3971
0
}
3972
3973
/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
3974
 * followed by Netlink attributes corresponding to 'vport'. */
3975
static void
3976
dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *vport,
3977
                             struct ofpbuf *buf)
3978
0
{
3979
0
    struct ovs_header *ovs_header;
3980
3981
0
    nl_msg_put_genlmsghdr(buf, 0, ovs_vport_family, NLM_F_REQUEST | NLM_F_ECHO,
3982
0
                          vport->cmd, OVS_VPORT_VERSION);
3983
3984
0
    ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3985
0
    ovs_header->dp_ifindex = vport->dp_ifindex;
3986
3987
0
    if (vport->port_no != ODPP_NONE) {
3988
0
        nl_msg_put_odp_port(buf, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
3989
0
    }
3990
3991
0
    if (vport->type != OVS_VPORT_TYPE_UNSPEC) {
3992
0
        nl_msg_put_u32(buf, OVS_VPORT_ATTR_TYPE, vport->type);
3993
0
    }
3994
3995
0
    if (vport->name) {
3996
0
        nl_msg_put_string(buf, OVS_VPORT_ATTR_NAME, vport->name);
3997
0
    }
3998
3999
0
    if (vport->upcall_pids) {
4000
0
        nl_msg_put_unspec(buf, OVS_VPORT_ATTR_UPCALL_PID,
4001
0
                          vport->upcall_pids,
4002
0
                          vport->n_upcall_pids * sizeof *vport->upcall_pids);
4003
0
    }
4004
4005
0
    if (vport->stats) {
4006
0
        nl_msg_put_unspec(buf, OVS_VPORT_ATTR_STATS,
4007
0
                          vport->stats, sizeof *vport->stats);
4008
0
    }
4009
4010
0
    if (vport->options) {
4011
0
        nl_msg_put_nested(buf, OVS_VPORT_ATTR_OPTIONS,
4012
0
                          vport->options, vport->options_len);
4013
0
    }
4014
0
}
4015
4016
/* Clears 'vport' to "empty" values. */
4017
void
4018
dpif_netlink_vport_init(struct dpif_netlink_vport *vport)
4019
0
{
4020
0
    memset(vport, 0, sizeof *vport);
4021
0
    vport->port_no = ODPP_NONE;
4022
0
}
4023
4024
/* Executes 'request' in the kernel datapath.  If the command fails, returns a
4025
 * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
4026
 * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
4027
 * result of the command is expected to be an ovs_vport also, which is decoded
4028
 * and stored in '*reply' and '*bufp'.  The caller must free '*bufp' when the
4029
 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
4030
int
4031
dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
4032
                            struct dpif_netlink_vport *reply,
4033
                            struct ofpbuf **bufp)
4034
0
{
4035
0
    struct ofpbuf *request_buf;
4036
0
    int error;
4037
4038
0
    ovs_assert((reply != NULL) == (bufp != NULL));
4039
4040
0
    error = dpif_netlink_init();
4041
0
    if (error) {
4042
0
        if (reply) {
4043
0
            *bufp = NULL;
4044
0
            dpif_netlink_vport_init(reply);
4045
0
        }
4046
0
        return error;
4047
0
    }
4048
4049
0
    request_buf = ofpbuf_new(1024);
4050
0
    dpif_netlink_vport_to_ofpbuf(request, request_buf);
4051
0
    error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
4052
0
    ofpbuf_delete(request_buf);
4053
4054
0
    if (reply) {
4055
0
        if (!error) {
4056
0
            error = dpif_netlink_vport_from_ofpbuf(reply, *bufp);
4057
0
        }
4058
0
        if (error) {
4059
0
            dpif_netlink_vport_init(reply);
4060
0
            ofpbuf_delete(*bufp);
4061
0
            *bufp = NULL;
4062
0
        }
4063
0
    }
4064
0
    return error;
4065
0
}
4066
4067
/* Obtains information about the kernel vport named 'name' and stores it into
4068
 * '*reply' and '*bufp'.  The caller must free '*bufp' when the reply is no
4069
 * longer needed ('reply' will contain pointers into '*bufp').  */
4070
int
4071
dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
4072
                       struct ofpbuf **bufp)
4073
0
{
4074
0
    struct dpif_netlink_vport request;
4075
4076
0
    dpif_netlink_vport_init(&request);
4077
0
    request.cmd = OVS_VPORT_CMD_GET;
4078
0
    request.name = name;
4079
4080
0
    return dpif_netlink_vport_transact(&request, reply, bufp);
4081
0
}
4082
4083
/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
4084
 * by Netlink attributes, into 'dp'.  Returns 0 if successful, otherwise a
4085
 * positive errno value.
4086
 *
4087
 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
4088
 * while 'dp' is still in use. */
4089
static int
4090
dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *dp, const struct ofpbuf *buf)
4091
0
{
4092
0
    static const struct nl_policy ovs_datapath_policy[] = {
4093
0
        [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
4094
0
        [OVS_DP_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_dp_stats),
4095
0
                                .optional = true },
4096
0
        [OVS_DP_ATTR_MEGAFLOW_STATS] = {
4097
0
                        NL_POLICY_FOR(struct ovs_dp_megaflow_stats),
4098
0
                        .optional = true },
4099
0
        [OVS_DP_ATTR_USER_FEATURES] = {
4100
0
                        .type = NL_A_U32,
4101
0
                        .optional = true },
4102
0
        [OVS_DP_ATTR_MASKS_CACHE_SIZE] = {
4103
0
                        .type = NL_A_U32,
4104
0
                        .optional = true },
4105
0
    };
4106
4107
0
    dpif_netlink_dp_init(dp);
4108
4109
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
4110
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
4111
0
    struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
4112
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
4113
4114
0
    struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)];
4115
0
    if (!nlmsg || !genl || !ovs_header
4116
0
        || nlmsg->nlmsg_type != ovs_datapath_family
4117
0
        || !nl_policy_parse(&b, 0, ovs_datapath_policy, a,
4118
0
                            ARRAY_SIZE(ovs_datapath_policy))) {
4119
0
        return EINVAL;
4120
0
    }
4121
4122
0
    dp->cmd = genl->cmd;
4123
0
    dp->dp_ifindex = ovs_header->dp_ifindex;
4124
0
    dp->name = nl_attr_get_string(a[OVS_DP_ATTR_NAME]);
4125
0
    if (a[OVS_DP_ATTR_STATS]) {
4126
0
        dp->stats = nl_attr_get(a[OVS_DP_ATTR_STATS]);
4127
0
    }
4128
4129
0
    if (a[OVS_DP_ATTR_MEGAFLOW_STATS]) {
4130
0
        dp->megaflow_stats = nl_attr_get(a[OVS_DP_ATTR_MEGAFLOW_STATS]);
4131
0
    }
4132
4133
0
    if (a[OVS_DP_ATTR_USER_FEATURES]) {
4134
0
        dp->user_features = nl_attr_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
4135
0
    }
4136
4137
0
    if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
4138
0
        dp->cache_size = nl_attr_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
4139
0
    } else {
4140
0
        dp->cache_size = UINT32_MAX;
4141
0
    }
4142
4143
0
    return 0;
4144
0
}
4145
4146
/* Appends to 'buf' the Generic Netlink message described by 'dp'. */
4147
static void
4148
dpif_netlink_dp_to_ofpbuf(const struct dpif_netlink_dp *dp, struct ofpbuf *buf)
4149
0
{
4150
0
    struct ovs_header *ovs_header;
4151
4152
0
    nl_msg_put_genlmsghdr(buf, 0, ovs_datapath_family,
4153
0
                          NLM_F_REQUEST | NLM_F_ECHO, dp->cmd,
4154
0
                          OVS_DATAPATH_VERSION);
4155
4156
0
    ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
4157
0
    ovs_header->dp_ifindex = dp->dp_ifindex;
4158
4159
0
    if (dp->name) {
4160
0
        nl_msg_put_string(buf, OVS_DP_ATTR_NAME, dp->name);
4161
0
    }
4162
4163
0
    if (dp->upcall_pid) {
4164
0
        nl_msg_put_u32(buf, OVS_DP_ATTR_UPCALL_PID, *dp->upcall_pid);
4165
0
    }
4166
4167
0
    if (dp->user_features) {
4168
0
        nl_msg_put_u32(buf, OVS_DP_ATTR_USER_FEATURES, dp->user_features);
4169
0
    }
4170
4171
0
    if (dp->upcall_pids) {
4172
0
        nl_msg_put_unspec(buf, OVS_DP_ATTR_PER_CPU_PIDS, dp->upcall_pids,
4173
0
                          sizeof *dp->upcall_pids * dp->n_upcall_pids);
4174
0
    }
4175
4176
0
    if (dp->cache_size != UINT32_MAX) {
4177
0
        nl_msg_put_u32(buf, OVS_DP_ATTR_MASKS_CACHE_SIZE, dp->cache_size);
4178
0
    }
4179
4180
    /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
4181
0
}
4182
4183
/* Clears 'dp' to "empty" values. */
4184
static void
4185
dpif_netlink_dp_init(struct dpif_netlink_dp *dp)
4186
0
{
4187
0
    memset(dp, 0, sizeof *dp);
4188
0
    dp->cache_size = UINT32_MAX;
4189
0
}
4190
4191
static void
4192
dpif_netlink_dp_dump_start(struct nl_dump *dump)
4193
0
{
4194
0
    struct dpif_netlink_dp request;
4195
0
    struct ofpbuf *buf;
4196
4197
0
    dpif_netlink_dp_init(&request);
4198
0
    request.cmd = OVS_DP_CMD_GET;
4199
4200
0
    buf = ofpbuf_new(1024);
4201
0
    dpif_netlink_dp_to_ofpbuf(&request, buf);
4202
0
    nl_dump_start(dump, NETLINK_GENERIC, buf);
4203
0
    ofpbuf_delete(buf);
4204
0
}
4205
4206
/* Executes 'request' in the kernel datapath.  If the command fails, returns a
4207
 * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
4208
 * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
4209
 * result of the command is expected to be of the same form, which is decoded
4210
 * and stored in '*reply' and '*bufp'.  The caller must free '*bufp' when the
4211
 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
4212
static int
4213
dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
4214
                         struct dpif_netlink_dp *reply, struct ofpbuf **bufp)
4215
0
{
4216
0
    struct ofpbuf *request_buf;
4217
0
    int error;
4218
4219
0
    ovs_assert((reply != NULL) == (bufp != NULL));
4220
4221
0
    request_buf = ofpbuf_new(1024);
4222
0
    dpif_netlink_dp_to_ofpbuf(request, request_buf);
4223
0
    error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
4224
0
    ofpbuf_delete(request_buf);
4225
4226
0
    if (reply) {
4227
0
        dpif_netlink_dp_init(reply);
4228
0
        if (!error) {
4229
0
            error = dpif_netlink_dp_from_ofpbuf(reply, *bufp);
4230
0
        }
4231
0
        if (error) {
4232
0
            ofpbuf_delete(*bufp);
4233
0
            *bufp = NULL;
4234
0
        }
4235
0
    }
4236
0
    return error;
4237
0
}
4238
4239
/* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
4240
 * The caller must free '*bufp' when the reply is no longer needed ('reply'
4241
 * will contain pointers into '*bufp').  */
4242
static int
4243
dpif_netlink_dp_get(const struct dpif *dpif_, struct dpif_netlink_dp *reply,
4244
                    struct ofpbuf **bufp)
4245
0
{
4246
0
    struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
4247
0
    struct dpif_netlink_dp request;
4248
4249
0
    dpif_netlink_dp_init(&request);
4250
0
    request.cmd = OVS_DP_CMD_GET;
4251
0
    request.dp_ifindex = dpif->dp_ifindex;
4252
4253
0
    return dpif_netlink_dp_transact(&request, reply, bufp);
4254
0
}
4255
4256
/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
4257
 * by Netlink attributes, into 'flow'.  Returns 0 if successful, otherwise a
4258
 * positive errno value.
4259
 *
4260
 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
4261
 * while 'flow' is still in use. */
4262
static int
4263
dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *flow,
4264
                              const struct ofpbuf *buf)
4265
0
{
4266
0
    static const struct nl_policy ovs_flow_policy[__OVS_FLOW_ATTR_MAX] = {
4267
0
        [OVS_FLOW_ATTR_KEY] = { .type = NL_A_NESTED, .optional = true },
4268
0
        [OVS_FLOW_ATTR_MASK] = { .type = NL_A_NESTED, .optional = true },
4269
0
        [OVS_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
4270
0
        [OVS_FLOW_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
4271
0
                                  .optional = true },
4272
0
        [OVS_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
4273
0
        [OVS_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
4274
0
        [OVS_FLOW_ATTR_UFID] = { .type = NL_A_U128, .optional = true },
4275
        /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
4276
        /* The kernel never uses OVS_FLOW_ATTR_PROBE. */
4277
        /* The kernel never uses OVS_FLOW_ATTR_UFID_FLAGS. */
4278
0
    };
4279
4280
0
    dpif_netlink_flow_init(flow);
4281
4282
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
4283
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
4284
0
    struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
4285
0
    struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
4286
4287
0
    struct nlattr *a[ARRAY_SIZE(ovs_flow_policy)];
4288
0
    if (!nlmsg || !genl || !ovs_header
4289
0
        || nlmsg->nlmsg_type != ovs_flow_family
4290
0
        || !nl_policy_parse(&b, 0, ovs_flow_policy, a,
4291
0
                            ARRAY_SIZE(ovs_flow_policy))) {
4292
0
        return EINVAL;
4293
0
    }
4294
0
    if (!a[OVS_FLOW_ATTR_KEY] && !a[OVS_FLOW_ATTR_UFID]) {
4295
0
        return EINVAL;
4296
0
    }
4297
4298
0
    flow->nlmsg_flags = nlmsg->nlmsg_flags;
4299
0
    flow->dp_ifindex = ovs_header->dp_ifindex;
4300
0
    if (a[OVS_FLOW_ATTR_KEY]) {
4301
0
        flow->key = nl_attr_get(a[OVS_FLOW_ATTR_KEY]);
4302
0
        flow->key_len = nl_attr_get_size(a[OVS_FLOW_ATTR_KEY]);
4303
0
    }
4304
4305
0
    if (a[OVS_FLOW_ATTR_UFID]) {
4306
0
        flow->ufid = nl_attr_get_u128(a[OVS_FLOW_ATTR_UFID]);
4307
0
        flow->ufid_present = true;
4308
0
    }
4309
0
    if (a[OVS_FLOW_ATTR_MASK]) {
4310
0
        flow->mask = nl_attr_get(a[OVS_FLOW_ATTR_MASK]);
4311
0
        flow->mask_len = nl_attr_get_size(a[OVS_FLOW_ATTR_MASK]);
4312
0
    }
4313
0
    if (a[OVS_FLOW_ATTR_ACTIONS]) {
4314
0
        flow->actions = nl_attr_get(a[OVS_FLOW_ATTR_ACTIONS]);
4315
0
        flow->actions_len = nl_attr_get_size(a[OVS_FLOW_ATTR_ACTIONS]);
4316
0
    }
4317
0
    if (a[OVS_FLOW_ATTR_STATS]) {
4318
0
        flow->stats = nl_attr_get(a[OVS_FLOW_ATTR_STATS]);
4319
0
    }
4320
0
    if (a[OVS_FLOW_ATTR_TCP_FLAGS]) {
4321
0
        flow->tcp_flags = nl_attr_get(a[OVS_FLOW_ATTR_TCP_FLAGS]);
4322
0
    }
4323
0
    if (a[OVS_FLOW_ATTR_USED]) {
4324
0
        flow->used = nl_attr_get(a[OVS_FLOW_ATTR_USED]);
4325
0
    }
4326
0
    return 0;
4327
0
}
4328
4329
4330
/*
4331
 * If PACKET_TYPE attribute is present in 'data', it filters PACKET_TYPE out.
4332
 * If the flow is not Ethernet, the OVS_KEY_ATTR_PACKET_TYPE is converted to
4333
 * OVS_KEY_ATTR_ETHERTYPE. Puts 'data' to 'buf'.
4334
 */
4335
static void
4336
put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
4337
                        const struct nlattr *data, uint16_t data_len)
4338
0
{
4339
0
    const struct nlattr *packet_type;
4340
4341
0
    packet_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_PACKET_TYPE);
4342
4343
0
    if (packet_type) {
4344
        /* exclude PACKET_TYPE Netlink attribute. */
4345
0
        ovs_assert(NLA_ALIGN(packet_type->nla_len) == NL_A_U32_SIZE);
4346
0
        size_t packet_type_len = NL_A_U32_SIZE;
4347
0
        size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
4348
0
        size_t second_chunk_size = data_len - first_chunk_size
4349
0
                                   - packet_type_len;
4350
0
        struct nlattr *next_attr = nl_attr_next(packet_type);
4351
0
        size_t ofs;
4352
4353
0
        ofs = nl_msg_start_nested(buf, type);
4354
0
        nl_msg_put(buf, data, first_chunk_size);
4355
0
        nl_msg_put(buf, next_attr, second_chunk_size);
4356
0
        if (!nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERNET)) {
4357
0
            ovs_be16 pt = pt_ns_type_be(nl_attr_get_be32(packet_type));
4358
0
            const struct nlattr *nla;
4359
4360
0
            nla = nl_attr_find(buf, ofs + NLA_HDRLEN, OVS_KEY_ATTR_ETHERTYPE);
4361
0
            if (nla) {
4362
0
                ovs_be16 *ethertype;
4363
4364
0
                ethertype = CONST_CAST(ovs_be16 *, nl_attr_get(nla));
4365
0
                *ethertype = pt;
4366
0
            } else {
4367
0
                nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, pt);
4368
0
            }
4369
0
        }
4370
0
        nl_msg_end_nested(buf, ofs);
4371
0
    } else {
4372
0
        nl_msg_put_unspec(buf, type, data, data_len);
4373
0
    }
4374
0
}
4375
4376
/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
4377
 * followed by Netlink attributes corresponding to 'flow'. */
4378
static void
4379
dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
4380
                            struct ofpbuf *buf)
4381
0
{
4382
0
    struct ovs_header *ovs_header;
4383
4384
0
    nl_msg_put_genlmsghdr(buf, 0, ovs_flow_family,
4385
0
                          NLM_F_REQUEST | flow->nlmsg_flags,
4386
0
                          flow->cmd, OVS_FLOW_VERSION);
4387
4388
0
    ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
4389
0
    ovs_header->dp_ifindex = flow->dp_ifindex;
4390
4391
0
    if (flow->ufid_present) {
4392
0
        nl_msg_put_u128(buf, OVS_FLOW_ATTR_UFID, flow->ufid);
4393
0
    }
4394
0
    if (flow->ufid_terse) {
4395
0
        nl_msg_put_u32(buf, OVS_FLOW_ATTR_UFID_FLAGS,
4396
0
                       OVS_UFID_F_OMIT_KEY | OVS_UFID_F_OMIT_MASK
4397
0
                       | OVS_UFID_F_OMIT_ACTIONS);
4398
0
    }
4399
0
    if (!flow->ufid_terse || !flow->ufid_present) {
4400
0
        if (flow->key_len) {
4401
0
            put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
4402
0
                                           flow->key_len);
4403
0
        }
4404
0
        if (flow->mask_len) {
4405
0
            put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
4406
0
                                           flow->mask_len);
4407
0
        }
4408
0
        if (flow->actions) {
4409
0
            nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS,
4410
0
                              flow->actions, flow->actions_len);
4411
0
        }
4412
0
    }
4413
4414
    /* We never need to send these to the kernel. */
4415
0
    ovs_assert(!flow->stats);
4416
0
    ovs_assert(!flow->tcp_flags);
4417
0
    ovs_assert(!flow->used);
4418
4419
0
    if (flow->clear) {
4420
0
        nl_msg_put_flag(buf, OVS_FLOW_ATTR_CLEAR);
4421
0
    }
4422
0
    if (flow->probe) {
4423
0
        nl_msg_put_flag(buf, OVS_FLOW_ATTR_PROBE);
4424
0
    }
4425
0
}
4426
4427
/* Clears 'flow' to "empty" values. */
4428
static void
4429
dpif_netlink_flow_init(struct dpif_netlink_flow *flow)
4430
0
{
4431
0
    memset(flow, 0, sizeof *flow);
4432
0
}
4433
4434
/* Executes 'request' in the kernel datapath.  If the command fails, returns a
4435
 * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
4436
 * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
4437
 * result of the command is expected to be a flow also, which is decoded and
4438
 * stored in '*reply' and '*bufp'.  The caller must free '*bufp' when the reply
4439
 * is no longer needed ('reply' will contain pointers into '*bufp'). */
4440
static int
4441
dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
4442
                           struct dpif_netlink_flow *reply,
4443
                           struct ofpbuf **bufp)
4444
0
{
4445
0
    struct ofpbuf *request_buf;
4446
0
    int error;
4447
4448
0
    ovs_assert((reply != NULL) == (bufp != NULL));
4449
4450
0
    if (reply) {
4451
0
        request->nlmsg_flags |= NLM_F_ECHO;
4452
0
    }
4453
4454
0
    request_buf = ofpbuf_new(1024);
4455
0
    dpif_netlink_flow_to_ofpbuf(request, request_buf);
4456
0
    error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
4457
0
    ofpbuf_delete(request_buf);
4458
4459
0
    if (reply) {
4460
0
        if (!error) {
4461
0
            error = dpif_netlink_flow_from_ofpbuf(reply, *bufp);
4462
0
        }
4463
0
        if (error) {
4464
0
            dpif_netlink_flow_init(reply);
4465
0
            ofpbuf_delete(*bufp);
4466
0
            *bufp = NULL;
4467
0
        }
4468
0
    }
4469
0
    return error;
4470
0
}
4471
4472
static void
4473
dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *flow,
4474
                            struct dpif_flow_stats *stats)
4475
0
{
4476
0
    if (flow->stats) {
4477
0
        stats->n_packets = get_32aligned_u64(&flow->stats->n_packets);
4478
0
        stats->n_bytes = get_32aligned_u64(&flow->stats->n_bytes);
4479
0
    } else {
4480
0
        stats->n_packets = 0;
4481
0
        stats->n_bytes = 0;
4482
0
    }
4483
0
    stats->used = flow->used ? get_32aligned_u64(flow->used) : 0;
4484
0
    stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;
4485
0
}
4486
4487
/* Logs information about a packet that was recently lost in 'ch' (in
4488
 * 'dpif_'). */
4489
static void
4490
report_loss(struct dpif_netlink *dpif, struct dpif_channel *ch, uint32_t ch_idx,
4491
            uint32_t handler_id)
4492
0
{
4493
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
4494
0
    struct ds s;
4495
4496
0
    if (VLOG_DROP_WARN(&rl)) {
4497
0
        return;
4498
0
    }
4499
4500
0
    if (dpif_netlink_upcall_per_cpu(dpif)) {
4501
0
        VLOG_WARN("%s: lost packet on handler %u",
4502
0
                  dpif_name(&dpif->dpif), handler_id);
4503
0
    } else {
4504
0
        ds_init(&s);
4505
0
        if (ch->last_poll != LLONG_MIN) {
4506
0
            ds_put_format(&s, " (last polled %lld ms ago)",
4507
0
                        time_msec() - ch->last_poll);
4508
0
        }
4509
4510
0
        VLOG_WARN("%s: lost packet on port channel %u of handler %u%s",
4511
0
                  dpif_name(&dpif->dpif), ch_idx, handler_id, ds_cstr(&s));
4512
0
        ds_destroy(&s);
4513
0
    }
4514
0
}
4515
4516
static void
4517
dpif_netlink_unixctl_dispatch_mode(struct unixctl_conn *conn,
4518
                                   int argc OVS_UNUSED,
4519
                                   const char *argv[] OVS_UNUSED,
4520
                                   void *aux OVS_UNUSED)
4521
0
{
4522
0
    struct ds reply = DS_EMPTY_INITIALIZER;
4523
0
    struct nl_dump dump;
4524
0
    uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
4525
0
    struct ofpbuf msg, buf;
4526
0
    int error;
4527
4528
0
    error = dpif_netlink_init();
4529
0
    if (error) {
4530
0
        return;
4531
0
    }
4532
4533
0
    ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
4534
0
    dpif_netlink_dp_dump_start(&dump);
4535
0
    while (nl_dump_next(&dump, &msg, &buf)) {
4536
0
        struct dpif_netlink_dp dp;
4537
0
        if (!dpif_netlink_dp_from_ofpbuf(&dp, &msg)) {
4538
0
            ds_put_format(&reply, "%s: ", dp.name);
4539
0
            if (dp.user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) {
4540
0
                ds_put_format(&reply, "per-cpu dispatch mode");
4541
0
            } else {
4542
0
                ds_put_format(&reply, "per-vport dispatch mode");
4543
0
            }
4544
0
            ds_put_format(&reply, "\n");
4545
0
        }
4546
0
    }
4547
0
    ofpbuf_uninit(&buf);
4548
0
    error = nl_dump_done(&dump);
4549
0
    if (!error) {
4550
0
        unixctl_command_reply(conn, ds_cstr(&reply));
4551
0
    }
4552
4553
0
    ds_destroy(&reply);
4554
0
}