Coverage Report

Created: 2025-07-01 06:50

/src/openvswitch/lib/netlink-conntrack.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2015 Nicira, Inc.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at:
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
#include <config.h>
18
19
#include "netlink-conntrack.h"
20
21
#include <errno.h>
22
#include <linux/netfilter/nfnetlink.h>
23
#include <linux/netfilter/nfnetlink_conntrack.h>
24
#include <linux/netfilter/nf_conntrack_common.h>
25
#include <linux/netfilter/nf_conntrack_tcp.h>
26
#include <linux/netfilter/nf_conntrack_ftp.h>
27
#include <linux/netfilter/nf_conntrack_sctp.h>
28
29
#include "byte-order.h"
30
#include "compiler.h"
31
#include "openvswitch/dynamic-string.h"
32
#include "netlink.h"
33
#include "netlink-socket.h"
34
#include "openvswitch/ofpbuf.h"
35
#include "openvswitch/vlog.h"
36
#include "openvswitch/poll-loop.h"
37
#include "timeval.h"
38
#include "unixctl.h"
39
#include "util.h"
40
41
VLOG_DEFINE_THIS_MODULE(netlink_conntrack);
42
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
43
44
/* This module works only if conntrack modules and features are enabled in the
45
 * Linux kernel.  This can be done from a root shell like this:
46
 *
47
 * $ modprobe ip_conntrack
48
 * $ sysctl -w net.netfilter.nf_conntrack_acct=1
49
 * $ sysctl -w net.netfilter.nf_conntrack_timestamp=1
50
 *
51
 * Also, if testing conntrack label feature without conntrack-aware OVS kernel
52
 * module, there must be a connlabel rule in iptables for space to be reserved
53
 * for the labels (see kernel source connlabel_mt_check()).  Such a rule can be
54
 * inserted from a root shell like this:
55
 *
56
 * $ iptables -A INPUT -m conntrack -m connlabel \
57
 *   --ctstate NEW,ESTABLISHED,RELATED --label 127 -j ACCEPT
58
 */
59
60
/* Some attributes were introduced in later kernels: with these definitions
61
 * we should be able to compile userspace against Linux 2.6.32+. */
62
63
0
#define CTA_ZONE          (CTA_SECMARK + 1)
64
#define CTA_SECCTX        (CTA_SECMARK + 2)
65
0
#define CTA_TIMESTAMP     (CTA_SECMARK + 3)
66
#define CTA_MARK_MASK     (CTA_SECMARK + 4)
67
0
#define CTA_LABELS        (CTA_SECMARK + 5)
68
#define CTA_LABELS_MASK   (CTA_SECMARK + 6)
69
70
0
#define CTA_TIMESTAMP_START 1
71
0
#define CTA_TIMESTAMP_STOP  2
72
73
0
#define IPS_TEMPLATE_BIT 11
74
0
#define IPS_TEMPLATE (1 << IPS_TEMPLATE_BIT)
75
76
0
#define IPS_UNTRACKED_BIT 12
77
0
#define IPS_UNTRACKED (1 << IPS_UNTRACKED_BIT)
78
79
static const struct nl_policy nfnlgrp_conntrack_policy[] = {
80
    [CTA_TUPLE_ORIG] = { .type = NL_A_NESTED, .optional = false },
81
    [CTA_TUPLE_REPLY] = { .type = NL_A_NESTED, .optional = false },
82
    [CTA_ZONE] = { .type = NL_A_BE16, .optional = true },
83
    [CTA_STATUS] = { .type = NL_A_BE32, .optional = false },
84
    [CTA_TIMESTAMP] = { .type = NL_A_NESTED, .optional = true },
85
    [CTA_TIMEOUT] = { .type = NL_A_BE32, .optional = true },
86
    [CTA_COUNTERS_ORIG] = { .type = NL_A_NESTED, .optional = true },
87
    [CTA_COUNTERS_REPLY] = { .type = NL_A_NESTED, .optional = true },
88
    [CTA_PROTOINFO] = { .type = NL_A_NESTED, .optional = true },
89
    [CTA_HELP] = { .type = NL_A_NESTED, .optional = true },
90
    [CTA_MARK] = { .type = NL_A_BE32, .optional = true },
91
    [CTA_SECCTX] = { .type = NL_A_NESTED, .optional = true },
92
    [CTA_ID] = { .type = NL_A_BE32, .optional = false },
93
    [CTA_USE] = { .type = NL_A_BE32, .optional = true },
94
    [CTA_TUPLE_MASTER] = { .type = NL_A_NESTED, .optional = true },
95
    [CTA_NAT_SEQ_ADJ_ORIG] = { .type = NL_A_NESTED, .optional = true },
96
    [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NL_A_NESTED, .optional = true },
97
    [CTA_LABELS] = { .type = NL_A_UNSPEC, .optional = true },
98
    /* CTA_NAT_SRC, CTA_NAT_DST, CTA_TIMESTAMP, CTA_MARK_MASK, and
99
     * CTA_LABELS_MASK are not received from kernel. */
100
};
101
102
/* Declarations for conntrack netlink dumping. */
103
static void nl_msg_put_nfgenmsg(struct ofpbuf *msg, size_t expected_payload,
104
                                int family, uint8_t subsystem, uint8_t cmd,
105
                                uint32_t flags);
106
107
static bool nl_ct_parse_header_policy(struct ofpbuf *buf,
108
        enum nl_ct_event_type *event_type,
109
        uint8_t *nfgen_family,
110
        struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]);
111
112
static bool nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry,
113
        struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)],
114
        uint8_t nfgen_family);
115
static bool nl_ct_put_ct_tuple(struct ofpbuf *buf,
116
        const struct ct_dpif_tuple *tuple, enum ctattr_type type);
117
118
struct nl_ct_dump_state {
119
    struct nl_dump dump;
120
    struct ofpbuf buf;
121
    bool filter_zone;
122
    uint16_t zone;
123
};
124
125
/* Conntrack netlink dumping. */
126
127
/* Initialize a conntrack netlink dump. */
128
int
129
nl_ct_dump_start(struct nl_ct_dump_state **statep, const uint16_t *zone,
130
        int *ptot_bkts)
131
0
{
132
0
    struct nl_ct_dump_state *state;
133
134
0
    *statep = state = xzalloc(sizeof *state);
135
0
    ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
136
137
0
    if (zone) {
138
0
        state->filter_zone = true;
139
0
        state->zone = *zone;
140
0
    }
141
142
0
    nl_msg_put_nfgenmsg(&state->buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
143
0
                        IPCTNL_MSG_CT_GET, NLM_F_REQUEST);
144
0
    if (zone) {
145
0
        nl_msg_put_be16(&state->buf, CTA_ZONE, htons(*zone));
146
0
    }
147
0
    nl_dump_start(&state->dump, NETLINK_NETFILTER, &state->buf);
148
0
    ofpbuf_clear(&state->buf);
149
150
    /* Buckets to store connections are not used. */
151
0
    *ptot_bkts = -1;
152
153
0
    return 0;
154
0
}
155
156
/* Receive the next 'entry' from the conntrack netlink dump with 'state'.
157
 * Returns 'EOF' when no more entries are available, 0 otherwise.  'entry' may
158
 * be uninitilized memory on entry, and must be uninitialized with
159
 * ct_dpif_entry_uninit() afterwards by the caller.  In case the same 'entry' is
160
 * passed to this function again, the entry must also be uninitialized before
161
 * the next call. */
162
int
163
nl_ct_dump_next(struct nl_ct_dump_state *state, struct ct_dpif_entry *entry)
164
0
{
165
0
    struct ofpbuf buf;
166
167
0
    memset(entry, 0, sizeof *entry);
168
0
    for (;;) {
169
0
        struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)];
170
0
        enum nl_ct_event_type type;
171
0
        uint8_t nfgen_family;
172
173
0
        if (!nl_dump_next(&state->dump, &buf, &state->buf)) {
174
0
            return EOF;
175
0
        }
176
177
0
        if (!nl_ct_parse_header_policy(&buf, &type, &nfgen_family, attrs)) {
178
0
            continue;
179
0
        };
180
181
0
        if (state->filter_zone) {
182
0
            uint16_t entry_zone = attrs[CTA_ZONE]
183
0
                                  ? ntohs(nl_attr_get_be16(attrs[CTA_ZONE]))
184
0
                                  : 0;
185
0
            if (entry_zone != state->zone) {
186
0
                continue;
187
0
            }
188
0
        }
189
190
0
        if (nl_ct_attrs_to_ct_dpif_entry(entry, attrs, nfgen_family)) {
191
0
            break;
192
0
        }
193
194
0
        ct_dpif_entry_uninit(entry);
195
0
        memset(entry, 0, sizeof *entry);
196
        /* Ignore the failed entry and get the next one. */
197
0
    }
198
199
0
    ofpbuf_uninit(&buf);
200
0
    return 0;
201
0
}
202
203
/* End a conntrack netlink dump. */
204
int
205
nl_ct_dump_done(struct nl_ct_dump_state *state)
206
0
{
207
0
    int error = nl_dump_done(&state->dump);
208
209
0
    ofpbuf_uninit(&state->buf);
210
0
    free(state);
211
0
    return error;
212
0
}
213
214
/* Format conntrack event 'entry' of 'type' to 'ds'. */
215
void
216
nl_ct_format_event_entry(const struct ct_dpif_entry *entry,
217
                         enum nl_ct_event_type type, struct ds *ds,
218
                         bool verbose, bool print_stats)
219
0
{
220
0
    ds_put_format(ds, "%s ",
221
0
                  type == NL_CT_EVENT_NEW ? "NEW"
222
0
                  : type == NL_CT_EVENT_UPDATE ? "UPDATE"
223
0
                  : type == NL_CT_EVENT_DELETE ? "DELETE"
224
0
                  : "UNKNOWN");
225
0
    ct_dpif_format_entry(entry, ds, verbose, print_stats);
226
0
}
227
228
int
229
nl_ct_flush(void)
230
0
{
231
0
    struct ofpbuf buf;
232
0
    int err;
233
234
0
    ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
235
236
0
    nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
237
0
                        IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
238
239
0
    err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
240
0
    ofpbuf_uninit(&buf);
241
242
    /* Expectations are flushed automatically, because they do not
243
     * have a parent connection anymore */
244
245
0
    return err;
246
0
}
247
248
int
249
nl_ct_flush_tuple(const struct ct_dpif_tuple *tuple, uint16_t zone)
250
0
{
251
0
    int err;
252
0
    struct ofpbuf buf;
253
254
0
    ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
255
0
    nl_msg_put_nfgenmsg(&buf, 0, tuple->l3_type, NFNL_SUBSYS_CTNETLINK,
256
0
                        IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
257
258
0
    nl_msg_put_be16(&buf, CTA_ZONE, htons(zone));
259
0
    if (!nl_ct_put_ct_tuple(&buf, tuple, CTA_TUPLE_ORIG)) {
260
0
        err = EOPNOTSUPP;
261
0
        goto out;
262
0
    }
263
0
    err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
264
0
out:
265
0
    ofpbuf_uninit(&buf);
266
0
    return err;
267
0
}
268
269
static int
270
nl_ct_flush_zone_with_cta_zone(uint16_t flush_zone)
271
0
{
272
0
    struct ofpbuf buf;
273
0
    int err;
274
275
0
    ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
276
277
0
    nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
278
0
                        IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
279
0
    nl_msg_put_be16(&buf, CTA_ZONE, htons(flush_zone));
280
281
0
    err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
282
0
    ofpbuf_uninit(&buf);
283
284
0
    return err;
285
0
}
286
287
#ifdef _WIN32
288
int
289
nl_ct_flush_zone(uint16_t flush_zone)
290
{
291
    return nl_ct_flush_zone_with_cta_zone(flush_zone);
292
}
293
#else
294
295
static bool
296
netlink_flush_supports_zone(void)
297
0
{
298
0
    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
299
0
    static bool supported = false;
300
301
0
    if (ovsthread_once_start(&once)) {
302
0
        if (ovs_kernel_is_version_or_newer(6, 8)) {
303
0
            supported = true;
304
0
        } else {
305
0
            VLOG_INFO("Disabling conntrack flush by zone. "
306
0
                      "Not supported in Linux kernel.");
307
0
        }
308
0
        ovsthread_once_done(&once);
309
0
    }
310
0
    return supported;
311
0
}
312
313
int
314
nl_ct_flush_zone(uint16_t flush_zone)
315
0
{
316
    /* In older kernels, there was no netlink interface to flush a specific
317
     * conntrack zone.
318
     * This code dumps every connection, checks the zone and eventually
319
     * delete the entry.
320
     * In newer kernels there is the option to specify a zone for filtering
321
     * during dumps.  Older kernels ignore this option. We set it here in the
322
     * hope we only get relevant entries back, but fall back to filtering here
323
     * to keep compatibility.
324
     *
325
     * This is race-prone, but it is better than using shell scripts.
326
     *
327
     * Additionally newer kernels also support flushing a zone without listing
328
     * it first. */
329
330
0
    struct nl_dump dump;
331
0
    struct ofpbuf buf, reply, delete;
332
333
0
    if (netlink_flush_supports_zone()) {
334
0
        return nl_ct_flush_zone_with_cta_zone(flush_zone);
335
0
    }
336
337
0
    ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
338
0
    ofpbuf_init(&delete, NL_DUMP_BUFSIZE);
339
340
0
    nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
341
0
                        IPCTNL_MSG_CT_GET, NLM_F_REQUEST);
342
0
    nl_msg_put_be16(&buf, CTA_ZONE, htons(flush_zone));
343
0
    nl_dump_start(&dump, NETLINK_NETFILTER, &buf);
344
0
    ofpbuf_clear(&buf);
345
346
0
    for (;;) {
347
0
        struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)];
348
0
        enum nl_ct_event_type event_type;
349
0
        uint8_t nfgen_family;
350
0
        uint16_t zone = 0;
351
352
0
        if (!nl_dump_next(&dump, &reply, &buf)) {
353
0
            break;
354
0
        }
355
356
0
        if (!nl_ct_parse_header_policy(&reply, &event_type, &nfgen_family,
357
0
                                       attrs)) {
358
0
            continue;
359
0
        };
360
361
0
        if (attrs[CTA_ZONE]) {
362
0
            zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE]));
363
0
        }
364
365
0
        if (zone != flush_zone) {
366
            /* The entry is not in the zone we're flushing. */
367
0
            continue;
368
0
        }
369
0
        nl_msg_put_nfgenmsg(&delete, 0, nfgen_family, NFNL_SUBSYS_CTNETLINK,
370
0
                            IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
371
372
0
        nl_msg_put_be16(&delete, CTA_ZONE, htons(zone));
373
0
        nl_msg_put_unspec(&delete, CTA_TUPLE_ORIG, attrs[CTA_TUPLE_ORIG] + 1,
374
0
                          attrs[CTA_TUPLE_ORIG]->nla_len - NLA_HDRLEN);
375
0
        nl_msg_put_unspec(&delete, CTA_ID, attrs[CTA_ID] + 1,
376
0
                          attrs[CTA_ID]->nla_len - NLA_HDRLEN);
377
0
        nl_transact(NETLINK_NETFILTER, &delete, NULL);
378
0
        ofpbuf_clear(&delete);
379
0
    }
380
381
0
    nl_dump_done(&dump);
382
383
0
    ofpbuf_uninit(&delete);
384
0
    ofpbuf_uninit(&buf);
385
386
    /* Expectations are flushed automatically, because they do not
387
     * have a parent connection anymore */
388
0
    return 0;
389
0
}
390
#endif
391
392
/* Conntrack netlink parsing. */
393
394
static bool
395
nl_ct_parse_counters(struct nlattr *nla, struct ct_dpif_counters *counters)
396
0
{
397
0
    static const struct nl_policy policy[] = {
398
0
        [CTA_COUNTERS_PACKETS] = { .type = NL_A_BE64, .optional = false },
399
0
        [CTA_COUNTERS_BYTES] = { .type = NL_A_BE64, .optional = false },
400
0
    };
401
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
402
0
    bool parsed;
403
404
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
405
406
0
    if (parsed) {
407
0
        counters->packets
408
0
            = ntohll(nl_attr_get_be64(attrs[CTA_COUNTERS_PACKETS]));
409
0
        counters->bytes = ntohll(nl_attr_get_be64(attrs[CTA_COUNTERS_BYTES]));
410
0
    } else {
411
0
        VLOG_ERR_RL(&rl, "Could not parse nested counters. "
412
0
                    "Possibly incompatible Linux kernel version.");
413
0
    }
414
415
0
    return parsed;
416
0
}
417
418
static bool
419
nl_ct_parse_timestamp(struct nlattr *nla, struct ct_dpif_timestamp *timestamp)
420
0
{
421
0
    static const struct nl_policy policy[] = {
422
0
        [CTA_TIMESTAMP_START] = { .type = NL_A_BE64, .optional = false },
423
0
        [CTA_TIMESTAMP_STOP] = { .type = NL_A_BE64, .optional = true },
424
0
    };
425
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
426
0
    bool parsed;
427
428
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
429
430
0
    if (parsed) {
431
0
        timestamp->start
432
0
            = ntohll(nl_attr_get_be64(attrs[CTA_TIMESTAMP_START]));
433
0
        if (attrs[CTA_TIMESTAMP_STOP]) {
434
0
            timestamp->stop
435
0
                = ntohll(nl_attr_get_be64(attrs[CTA_TIMESTAMP_STOP]));
436
0
        }
437
0
    } else {
438
0
        VLOG_ERR_RL(&rl, "Could not parse nested timestamp. "
439
0
                    "Possibly incompatible Linux kernel version.");
440
0
    }
441
442
0
    return parsed;
443
0
}
444
445
static bool
446
nl_ct_parse_tuple_ip(struct nlattr *nla, struct ct_dpif_tuple *tuple)
447
0
{
448
0
    static const struct nl_policy policy[] = {
449
0
        [CTA_IP_V4_SRC] = { .type = NL_A_BE32, .optional = true },
450
0
        [CTA_IP_V4_DST] = { .type = NL_A_BE32, .optional = true },
451
0
        [CTA_IP_V6_SRC] = { NL_POLICY_FOR(struct in6_addr), .optional = true },
452
0
        [CTA_IP_V6_DST] = { NL_POLICY_FOR(struct in6_addr), .optional = true },
453
0
    };
454
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
455
0
    bool parsed;
456
457
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
458
459
0
    if (parsed) {
460
0
        if (tuple->l3_type == AF_INET) {
461
0
            if (attrs[CTA_IP_V4_SRC]) {
462
0
                tuple->src.ip = nl_attr_get_be32(attrs[CTA_IP_V4_SRC]);
463
0
            }
464
0
            if (attrs[CTA_IP_V4_DST]) {
465
0
                tuple->dst.ip = nl_attr_get_be32(attrs[CTA_IP_V4_DST]);
466
0
            }
467
0
        } else if (tuple->l3_type == AF_INET6) {
468
0
            if (attrs[CTA_IP_V6_SRC]) {
469
0
                memcpy(&tuple->src.in6, nl_attr_get(attrs[CTA_IP_V6_SRC]),
470
0
                       sizeof tuple->src.in6);
471
0
            }
472
0
            if (attrs[CTA_IP_V6_DST]) {
473
0
                memcpy(&tuple->dst.in6, nl_attr_get(attrs[CTA_IP_V6_DST]),
474
0
                       sizeof tuple->dst.in6);
475
0
            }
476
0
        } else {
477
0
            VLOG_WARN_RL(&rl, "Unsupported IP protocol: %u.", tuple->l3_type);
478
0
            return false;
479
0
        }
480
0
    } else {
481
0
        VLOG_ERR_RL(&rl, "Could not parse nested tuple IP options. "
482
0
                    "Possibly incompatible Linux kernel version.");
483
0
    }
484
485
0
    return parsed;
486
0
}
487
488
static bool
489
nl_ct_parse_tuple_proto(struct nlattr *nla, struct ct_dpif_tuple *tuple)
490
0
{
491
0
    static const struct nl_policy policy[] = {
492
0
        [CTA_PROTO_NUM] = { .type = NL_A_U8, .optional = false },
493
0
        [CTA_PROTO_SRC_PORT] = { .type = NL_A_BE16, .optional = true },
494
0
        [CTA_PROTO_DST_PORT] = { .type = NL_A_BE16, .optional = true },
495
0
        [CTA_PROTO_ICMP_ID] = { .type = NL_A_BE16, .optional = true },
496
0
        [CTA_PROTO_ICMP_TYPE] = { .type = NL_A_U8, .optional = true },
497
0
        [CTA_PROTO_ICMP_CODE] = { .type = NL_A_U8, .optional = true },
498
0
        [CTA_PROTO_ICMPV6_ID] = { .type = NL_A_BE16, .optional = true },
499
0
        [CTA_PROTO_ICMPV6_TYPE] = { .type = NL_A_U8, .optional = true },
500
0
        [CTA_PROTO_ICMPV6_CODE] = { .type = NL_A_U8, .optional = true },
501
0
    };
502
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
503
0
    bool parsed;
504
505
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
506
507
0
    if (parsed) {
508
0
        tuple->ip_proto = nl_attr_get_u8(attrs[CTA_PROTO_NUM]);
509
510
0
        if (tuple->l3_type == AF_INET && tuple->ip_proto == IPPROTO_ICMP) {
511
0
            if (!attrs[CTA_PROTO_ICMP_ID] || !attrs[CTA_PROTO_ICMP_TYPE]
512
0
                || !attrs[CTA_PROTO_ICMP_CODE]) {
513
0
                VLOG_ERR_RL(&rl, "Tuple ICMP data missing.");
514
0
                return false;
515
0
            }
516
0
            tuple->icmp_id = nl_attr_get_be16(attrs[CTA_PROTO_ICMP_ID]);
517
0
            tuple->icmp_type = nl_attr_get_u8(attrs[CTA_PROTO_ICMP_TYPE]);
518
0
            tuple->icmp_code = nl_attr_get_u8(attrs[CTA_PROTO_ICMP_CODE]);
519
0
        } else if (tuple->l3_type == AF_INET6 &&
520
0
                   tuple->ip_proto == IPPROTO_ICMPV6) {
521
0
            if (!attrs[CTA_PROTO_ICMPV6_ID] || !attrs[CTA_PROTO_ICMPV6_TYPE]
522
0
                || !attrs[CTA_PROTO_ICMPV6_CODE]) {
523
0
                VLOG_ERR_RL(&rl, "Tuple ICMPv6 data missing.");
524
0
                return false;
525
0
            }
526
0
            tuple->icmp_id = nl_attr_get_be16(attrs[CTA_PROTO_ICMPV6_ID]);
527
0
            tuple->icmp_type = nl_attr_get_u8(attrs[CTA_PROTO_ICMPV6_TYPE]);
528
0
            tuple->icmp_code = nl_attr_get_u8(attrs[CTA_PROTO_ICMPV6_CODE]);
529
0
        } else if (attrs[CTA_PROTO_SRC_PORT] && attrs[CTA_PROTO_DST_PORT]) {
530
0
            tuple->src_port = nl_attr_get_be16(attrs[CTA_PROTO_SRC_PORT]);
531
0
            tuple->dst_port = nl_attr_get_be16(attrs[CTA_PROTO_DST_PORT]);
532
0
        } else {
533
            /* Unsupported IPPROTO and no ports, leave them zeroed.
534
             * We have parsed the ip_proto, so this is not a failure. */
535
0
            VLOG_DBG_RL(&rl, "Unsupported L4 protocol: %u.", tuple->ip_proto);
536
0
        }
537
0
    } else {
538
0
        VLOG_ERR_RL(&rl, "Could not parse nested tuple protocol options. "
539
0
                    "Possibly incompatible Linux kernel version.");
540
0
    }
541
542
0
    return parsed;
543
0
}
544
545
static bool
546
nl_ct_parse_tuple(struct nlattr *nla, struct ct_dpif_tuple *tuple,
547
                  uint16_t l3_type)
548
0
{
549
0
    static const struct nl_policy policy[] = {
550
0
        [CTA_TUPLE_IP] = { .type = NL_A_NESTED, .optional = false },
551
0
        [CTA_TUPLE_PROTO] = { .type = NL_A_NESTED, .optional = false },
552
0
    };
553
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
554
0
    bool parsed;
555
556
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
557
558
0
    memset(tuple, 0, sizeof *tuple);
559
560
0
    if (parsed) {
561
0
        tuple->l3_type = l3_type;
562
563
0
        if (!nl_ct_parse_tuple_ip(attrs[CTA_TUPLE_IP], tuple)
564
0
            || !nl_ct_parse_tuple_proto(attrs[CTA_TUPLE_PROTO], tuple)) {
565
0
            struct ds ds;
566
567
0
            ds_init(&ds);
568
0
            ct_dpif_format_tuple(&ds, tuple);
569
570
0
            VLOG_ERR_RL(&rl, "Failed to parse tuple: %s", ds_cstr(&ds));
571
0
            ds_destroy(&ds);
572
573
0
            memset(tuple, 0, sizeof *tuple);
574
0
            return false;
575
0
        }
576
0
    } else {
577
0
        VLOG_ERR_RL(&rl, "Could not parse nested tuple options. "
578
0
                    "Possibly incompatible Linux kernel version.");
579
0
    }
580
581
0
    return parsed;
582
0
}
583
584
static bool
585
nl_ct_put_tuple_ip(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple)
586
0
{
587
0
    size_t offset = nl_msg_start_nested(buf, CTA_TUPLE_IP);
588
589
0
    if (tuple->l3_type == AF_INET) {
590
0
        nl_msg_put_be32(buf, CTA_IP_V4_SRC, tuple->src.ip);
591
0
        nl_msg_put_be32(buf, CTA_IP_V4_DST, tuple->dst.ip);
592
0
    } else if (tuple->l3_type == AF_INET6) {
593
0
        nl_msg_put_in6_addr(buf, CTA_IP_V6_SRC, &tuple->src.in6);
594
0
        nl_msg_put_in6_addr(buf, CTA_IP_V6_DST, &tuple->dst.in6);
595
0
    } else {
596
0
        VLOG_WARN_RL(&rl, "Unsupported IP protocol: %"PRIu16".",
597
0
                     tuple->l3_type);
598
0
        return false;
599
0
    }
600
601
0
    nl_msg_end_nested(buf, offset);
602
0
    return true;
603
0
}
604
605
static bool
606
nl_ct_put_tuple_proto(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple)
607
0
{
608
0
    size_t offset = nl_msg_start_nested(buf, CTA_TUPLE_PROTO);
609
610
0
    nl_msg_put_u8(buf, CTA_PROTO_NUM, tuple->ip_proto);
611
612
0
    if (tuple->l3_type == AF_INET && tuple->ip_proto == IPPROTO_ICMP) {
613
0
        nl_msg_put_be16(buf, CTA_PROTO_ICMP_ID, tuple->icmp_id);
614
0
        nl_msg_put_u8(buf, CTA_PROTO_ICMP_TYPE, tuple->icmp_type);
615
0
        nl_msg_put_u8(buf, CTA_PROTO_ICMP_CODE, tuple->icmp_code);
616
0
    } else if (tuple->l3_type == AF_INET6 &&
617
0
               tuple->ip_proto == IPPROTO_ICMPV6) {
618
0
        nl_msg_put_be16(buf, CTA_PROTO_ICMPV6_ID, tuple->icmp_id);
619
0
        nl_msg_put_u8(buf, CTA_PROTO_ICMPV6_TYPE, tuple->icmp_type);
620
0
        nl_msg_put_u8(buf, CTA_PROTO_ICMPV6_CODE, tuple->icmp_code);
621
0
    } else if (tuple->ip_proto == IPPROTO_TCP ||
622
0
               tuple->ip_proto == IPPROTO_UDP ||
623
0
               tuple->ip_proto == IPPROTO_SCTP) {
624
0
        nl_msg_put_be16(buf, CTA_PROTO_SRC_PORT, tuple->src_port);
625
0
        nl_msg_put_be16(buf, CTA_PROTO_DST_PORT, tuple->dst_port);
626
0
    } else {
627
0
        VLOG_WARN_RL(&rl, "Unsupported L4 protocol: %"PRIu8".",
628
0
                     tuple->ip_proto);
629
0
        return false;
630
0
    }
631
632
0
    nl_msg_end_nested(buf, offset);
633
0
    return true;
634
0
}
635
636
static bool
637
nl_ct_put_ct_tuple(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple,
638
                   enum ctattr_type type)
639
0
{
640
0
    if (type != CTA_TUPLE_ORIG && type != CTA_TUPLE_REPLY &&
641
0
        type != CTA_TUPLE_MASTER) {
642
0
        return false;
643
0
    }
644
645
0
    size_t offset = nl_msg_start_nested(buf, type);
646
647
0
    if (!nl_ct_put_tuple_ip(buf, tuple)) {
648
0
        return false;
649
0
    }
650
0
    if (!nl_ct_put_tuple_proto(buf, tuple)) {
651
0
        return false;
652
0
    }
653
654
0
    nl_msg_end_nested(buf, offset);
655
0
    return true;
656
0
}
657
658
/* Translate netlink TCP state to CT_DPIF_TCP state. */
659
static uint8_t
660
nl_ct_tcp_state_to_dpif(uint8_t state)
661
0
{
662
#ifdef _WIN32
663
    /* Windows currently sends up CT_DPIF_TCP state */
664
    return state;
665
#else
666
0
    switch (state) {
667
0
    case TCP_CONNTRACK_NONE:
668
0
        return CT_DPIF_TCPS_CLOSED;
669
0
    case TCP_CONNTRACK_SYN_SENT:
670
0
        return CT_DPIF_TCPS_SYN_SENT;
671
0
    case TCP_CONNTRACK_SYN_SENT2:
672
0
        return CT_DPIF_TCPS_SYN_SENT;
673
0
    case TCP_CONNTRACK_SYN_RECV:
674
0
        return CT_DPIF_TCPS_SYN_RECV;
675
0
    case TCP_CONNTRACK_ESTABLISHED:
676
0
        return CT_DPIF_TCPS_ESTABLISHED;
677
0
    case TCP_CONNTRACK_FIN_WAIT:
678
0
        return CT_DPIF_TCPS_FIN_WAIT_1;
679
0
    case TCP_CONNTRACK_CLOSE_WAIT:
680
0
        return CT_DPIF_TCPS_CLOSE_WAIT;
681
0
    case TCP_CONNTRACK_LAST_ACK:
682
0
        return CT_DPIF_TCPS_LAST_ACK;
683
0
    case TCP_CONNTRACK_TIME_WAIT:
684
0
        return CT_DPIF_TCPS_TIME_WAIT;
685
0
    case TCP_CONNTRACK_CLOSE:
686
0
        return CT_DPIF_TCPS_CLOSING;
687
0
    default:
688
0
        return CT_DPIF_TCPS_CLOSED;
689
0
    }
690
0
#endif
691
0
}
692
693
static uint8_t
694
ip_ct_tcp_flags_to_dpif(uint8_t flags)
695
0
{
696
#ifdef _WIN32
697
    /* Windows currently sends up CT_DPIF_TCP flags */
698
    return flags;
699
#else
700
0
    uint8_t ret = 0;
701
0
#define CT_DPIF_TCP_FLAG(FLAG) \
702
0
        ret |= (flags & IP_CT_TCP_FLAG_##FLAG) ? CT_DPIF_TCPF_##FLAG : 0;
703
0
    CT_DPIF_TCP_FLAGS
704
0
#undef CT_DPIF_TCP_FLAG
705
0
    return ret;
706
0
#endif
707
0
}
708
709
static bool
710
nl_ct_parse_protoinfo_tcp(struct nlattr *nla,
711
                          struct ct_dpif_protoinfo *protoinfo)
712
0
{
713
0
    static const struct nl_policy policy[] = {
714
0
        [CTA_PROTOINFO_TCP_STATE] = { .type = NL_A_U8, .optional = false },
715
0
        [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NL_A_U8,
716
0
                                                .optional = true },
717
0
        [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NL_A_U8,
718
0
                                             .optional = true },
719
0
        [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .type = NL_A_U16,
720
0
                                               .optional = true },
721
0
        [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .type = NL_A_U16,
722
0
                                            .optional = true },
723
0
    };
724
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
725
0
    bool parsed;
726
727
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
728
729
0
    if (parsed) {
730
0
        const struct nf_ct_tcp_flags *flags_orig, *flags_reply;
731
0
        uint8_t state;
732
0
        protoinfo->proto = IPPROTO_TCP;
733
0
        state = nl_ct_tcp_state_to_dpif(
734
0
            nl_attr_get_u8(attrs[CTA_PROTOINFO_TCP_STATE]));
735
        /* The connection tracker keeps only one tcp state for the
736
         * connection, but our structures store a separate state for
737
         * each endpoint.  Here we duplicate the state. */
738
0
        protoinfo->tcp.state_orig = protoinfo->tcp.state_reply = state;
739
740
0
        if (attrs[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]) {
741
0
            protoinfo->tcp.wscale_orig =
742
0
                nl_attr_get_u8(attrs[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
743
0
        }
744
0
        if (attrs[CTA_PROTOINFO_TCP_WSCALE_REPLY]) {
745
0
            protoinfo->tcp.wscale_reply =
746
0
                nl_attr_get_u8(attrs[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
747
0
        }
748
0
        if (attrs[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
749
0
            flags_orig =
750
0
                nl_attr_get_unspec(attrs[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL],
751
0
                                   sizeof *flags_orig);
752
0
            protoinfo->tcp.flags_orig =
753
0
                ip_ct_tcp_flags_to_dpif(flags_orig->flags);
754
0
        }
755
0
        if (attrs[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
756
0
            flags_reply =
757
0
                nl_attr_get_unspec(attrs[CTA_PROTOINFO_TCP_FLAGS_REPLY],
758
0
                                   sizeof *flags_reply);
759
0
            protoinfo->tcp.flags_reply =
760
0
                ip_ct_tcp_flags_to_dpif(flags_reply->flags);
761
0
        }
762
0
    } else {
763
0
        VLOG_ERR_RL(&rl, "Could not parse nested TCP protoinfo options. "
764
0
                    "Possibly incompatible Linux kernel version.");
765
0
    }
766
767
0
    return parsed;
768
0
}
769
770
/* Translate netlink SCTP state to CT_DPIF_SCTP state. */
771
static uint8_t
772
nl_ct_sctp_state_to_dpif(uint8_t state)
773
0
{
774
#ifdef _WIN32
775
    /* For now, return the CT_DPIF_SCTP state. Not sure what windows does. */
776
    return state;
777
#else
778
0
    switch (state) {
779
0
    case SCTP_CONNTRACK_COOKIE_WAIT:
780
0
        return CT_DPIF_SCTP_STATE_COOKIE_WAIT;
781
0
    case SCTP_CONNTRACK_COOKIE_ECHOED:
782
0
        return CT_DPIF_SCTP_STATE_COOKIE_ECHOED;
783
0
    case SCTP_CONNTRACK_ESTABLISHED:
784
0
        return CT_DPIF_SCTP_STATE_ESTABLISHED;
785
0
    case SCTP_CONNTRACK_SHUTDOWN_SENT:
786
0
        return CT_DPIF_SCTP_STATE_SHUTDOWN_SENT;
787
0
    case SCTP_CONNTRACK_SHUTDOWN_RECD:
788
0
        return CT_DPIF_SCTP_STATE_SHUTDOWN_RECD;
789
0
    case SCTP_CONNTRACK_SHUTDOWN_ACK_SENT:
790
0
        return CT_DPIF_SCTP_STATE_SHUTDOWN_ACK_SENT;
791
0
    case SCTP_CONNTRACK_HEARTBEAT_SENT:
792
0
        return CT_DPIF_SCTP_STATE_HEARTBEAT_SENT;
793
0
    case SCTP_CONNTRACK_HEARTBEAT_ACKED:
794
0
        return CT_DPIF_SCTP_STATE_HEARTBEAT_ACKED;
795
0
    case SCTP_CONNTRACK_CLOSED:
796
        /* Fall Through. */
797
0
    case SCTP_CONNTRACK_NONE:
798
        /* Fall Through. */
799
0
    default:
800
0
        return CT_DPIF_SCTP_STATE_CLOSED;
801
0
    }
802
0
#endif
803
0
}
804
805
static bool
806
nl_ct_parse_protoinfo_sctp(struct nlattr *nla,
807
                           struct ct_dpif_protoinfo *protoinfo)
808
0
{
809
0
    static const struct nl_policy policy[] = {
810
0
        [CTA_PROTOINFO_SCTP_STATE] = { .type = NL_A_U8, .optional = false },
811
0
        [CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] = { .type = NL_A_U32,
812
0
                                               .optional = false },
813
0
        [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NL_A_U32,
814
0
                                            .optional = false },
815
0
    };
816
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
817
0
    bool parsed;
818
819
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
820
0
    if (parsed) {
821
0
        protoinfo->proto = IPPROTO_SCTP;
822
823
0
        protoinfo->sctp.state = nl_ct_sctp_state_to_dpif(
824
0
            nl_attr_get_u8(attrs[CTA_PROTOINFO_SCTP_STATE]));
825
0
        protoinfo->sctp.vtag_orig = nl_attr_get_u32(
826
0
            attrs[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]);
827
0
        protoinfo->sctp.vtag_reply = nl_attr_get_u32(
828
0
            attrs[CTA_PROTOINFO_SCTP_VTAG_REPLY]);
829
0
    } else {
830
0
        VLOG_ERR_RL(&rl, "Could not parse nested SCTP protoinfo options. "
831
0
                    "Possibly incompatible Linux kernel version.");
832
0
    }
833
834
0
    return parsed;
835
0
}
836
837
static bool
838
nl_ct_parse_protoinfo(struct nlattr *nla, struct ct_dpif_protoinfo *protoinfo)
839
0
{
840
    /* These are mutually exclusive. */
841
0
    static const struct nl_policy policy[] = {
842
0
        [CTA_PROTOINFO_TCP] = { .type = NL_A_NESTED, .optional = true },
843
0
        [CTA_PROTOINFO_SCTP] = { .type = NL_A_NESTED, .optional = true },
844
0
    };
845
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
846
0
    bool parsed;
847
848
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
849
850
0
    memset(protoinfo, 0, sizeof *protoinfo);
851
852
0
    if (parsed) {
853
0
        if (attrs[CTA_PROTOINFO_TCP]) {
854
0
            parsed = nl_ct_parse_protoinfo_tcp(attrs[CTA_PROTOINFO_TCP],
855
0
                                               protoinfo);
856
0
        } else if (attrs[CTA_PROTOINFO_SCTP]) {
857
0
            parsed = nl_ct_parse_protoinfo_sctp(attrs[CTA_PROTOINFO_SCTP],
858
0
                                                protoinfo);
859
0
        } else {
860
0
            VLOG_WARN_RL(&rl, "Empty protoinfo!");
861
0
        }
862
0
    } else {
863
0
        VLOG_ERR_RL(&rl, "Could not parse nested protoinfo options. "
864
0
                    "Possibly incompatible Linux kernel version.");
865
0
    }
866
867
0
    return parsed;
868
0
}
869
870
static bool
871
nl_ct_parse_helper(struct nlattr *nla, struct ct_dpif_helper *helper)
872
0
{
873
0
    static const struct nl_policy policy[] = {
874
0
        [CTA_HELP_NAME] = { .type = NL_A_STRING, .optional = false },
875
0
    };
876
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
877
0
    bool parsed;
878
879
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
880
881
0
    memset(helper, 0, sizeof *helper);
882
883
0
    if (parsed) {
884
0
        helper->name = xstrdup(nl_attr_get_string(attrs[CTA_HELP_NAME]));
885
0
    } else {
886
0
        VLOG_ERR_RL(&rl, "Could not parse nested helper options. "
887
0
                    "Possibly incompatible Linux kernel version.");
888
0
    }
889
890
0
    return parsed;
891
0
}
892
893
static int nl_ct_timeout_policy_max_attr[] = {
894
    [IPPROTO_TCP] = CTA_TIMEOUT_TCP_MAX,
895
    [IPPROTO_UDP] = CTA_TIMEOUT_UDP_MAX,
896
    [IPPROTO_ICMP] = CTA_TIMEOUT_ICMP_MAX,
897
    [IPPROTO_ICMPV6] = CTA_TIMEOUT_ICMPV6_MAX
898
};
899
900
static void
901
nl_ct_set_timeout_policy_attr(struct nl_ct_timeout_policy *nl_tp,
902
                              uint32_t attr, uint32_t val)
903
0
{
904
0
    nl_tp->present |= 1 << attr;
905
0
    nl_tp->attrs[attr] = val;
906
0
}
907
908
static int
909
nl_ct_parse_tcp_timeout_policy_data(struct nlattr *nla,
910
                                    struct nl_ct_timeout_policy *nl_tp)
911
0
{
912
0
    static const struct nl_policy policy[] = {
913
0
        [CTA_TIMEOUT_TCP_SYN_SENT] =    { .type = NL_A_BE32,
914
0
                                          .optional = false },
915
0
        [CTA_TIMEOUT_TCP_SYN_RECV] =    { .type = NL_A_BE32,
916
0
                                          .optional = false },
917
0
        [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NL_A_BE32,
918
0
                                          .optional = false },
919
0
        [CTA_TIMEOUT_TCP_FIN_WAIT] =    { .type = NL_A_BE32,
920
0
                                          .optional = false },
921
0
        [CTA_TIMEOUT_TCP_CLOSE_WAIT] =  { .type = NL_A_BE32,
922
0
                                          .optional = false },
923
0
        [CTA_TIMEOUT_TCP_LAST_ACK] =    { .type = NL_A_BE32,
924
0
                                          .optional = false },
925
0
        [CTA_TIMEOUT_TCP_TIME_WAIT] =   { .type = NL_A_BE32,
926
0
                                          .optional = false },
927
0
        [CTA_TIMEOUT_TCP_CLOSE] =       { .type = NL_A_BE32,
928
0
                                          .optional = false },
929
0
        [CTA_TIMEOUT_TCP_SYN_SENT2] =   { .type = NL_A_BE32,
930
0
                                          .optional = false },
931
0
        [CTA_TIMEOUT_TCP_RETRANS] =     { .type = NL_A_BE32,
932
0
                                          .optional = false },
933
0
        [CTA_TIMEOUT_TCP_UNACK] =       { .type = NL_A_BE32,
934
0
                                          .optional = false },
935
0
    };
936
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
937
938
0
    if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) {
939
0
        VLOG_ERR_RL(&rl, "Could not parse nested tcp timeout options. "
940
0
                    "Possibly incompatible Linux kernel version.");
941
0
        return EINVAL;
942
0
    }
943
944
0
    for (int i = CTA_TIMEOUT_TCP_SYN_SENT; i <= CTA_TIMEOUT_TCP_UNACK; i++) {
945
0
        nl_ct_set_timeout_policy_attr(nl_tp, i,
946
0
                                      ntohl(nl_attr_get_be32(attrs[i])));
947
0
    }
948
0
    return 0;
949
0
}
950
951
static int
952
nl_ct_parse_udp_timeout_policy_data(struct nlattr *nla,
953
                                    struct nl_ct_timeout_policy *nl_tp)
954
0
{
955
0
    static const struct nl_policy policy[] = {
956
0
        [CTA_TIMEOUT_UDP_UNREPLIED] =   { .type = NL_A_BE32,
957
0
                                          .optional = false },
958
0
        [CTA_TIMEOUT_UDP_REPLIED] =     { .type = NL_A_BE32,
959
0
                                          .optional = false },
960
0
    };
961
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
962
963
0
    if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) {
964
0
        VLOG_ERR_RL(&rl, "Could not parse nested tcp timeout options. "
965
0
                    "Possibly incompatible Linux kernel version.");
966
0
        return EINVAL;
967
0
    }
968
969
0
    for (int i = CTA_TIMEOUT_UDP_UNREPLIED; i <= CTA_TIMEOUT_UDP_REPLIED;
970
0
         i++) {
971
0
        nl_ct_set_timeout_policy_attr(nl_tp, i,
972
0
                                      ntohl(nl_attr_get_be32(attrs[i])));
973
0
    }
974
0
    return 0;
975
0
}
976
977
static int
978
nl_ct_parse_icmp_timeout_policy_data(struct nlattr *nla,
979
                                     struct nl_ct_timeout_policy *nl_tp)
980
0
{
981
0
    static const struct nl_policy policy[] = {
982
0
        [CTA_TIMEOUT_ICMP_TIMEOUT] =   { .type = NL_A_BE32,
983
0
                                         .optional = false },
984
0
    };
985
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
986
987
0
    if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) {
988
0
        VLOG_ERR_RL(&rl, "Could not parse nested icmp timeout options. "
989
0
                    "Possibly incompatible Linux kernel version.");
990
0
        return EINVAL;
991
0
    }
992
993
0
    nl_ct_set_timeout_policy_attr(
994
0
        nl_tp, CTA_TIMEOUT_ICMP_TIMEOUT,
995
0
        ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT_ICMP_TIMEOUT])));
996
0
    return 0;
997
0
}
998
999
static int
1000
nl_ct_parse_icmpv6_timeout_policy_data(struct nlattr *nla,
1001
                                       struct nl_ct_timeout_policy *nl_tp)
1002
0
{
1003
0
    static const struct nl_policy policy[] = {
1004
0
        [CTA_TIMEOUT_ICMPV6_TIMEOUT] =   { .type = NL_A_BE32,
1005
0
                                           .optional = false },
1006
0
    };
1007
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
1008
1009
0
    if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) {
1010
0
        VLOG_ERR_RL(&rl, "Could not parse nested icmpv6 timeout options. "
1011
0
                    "Possibly incompatible Linux kernel version.");
1012
0
        return EINVAL;
1013
0
    }
1014
1015
0
    nl_ct_set_timeout_policy_attr(
1016
0
        nl_tp, CTA_TIMEOUT_ICMPV6_TIMEOUT,
1017
0
        ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT_ICMPV6_TIMEOUT])));
1018
0
    return 0;
1019
0
}
1020
1021
static int
1022
nl_ct_parse_timeout_policy_data(struct nlattr *nla,
1023
                                struct nl_ct_timeout_policy *nl_tp)
1024
0
{
1025
0
    switch (nl_tp->l4num) {
1026
0
        case IPPROTO_TCP:
1027
0
            return nl_ct_parse_tcp_timeout_policy_data(nla, nl_tp);
1028
0
        case IPPROTO_UDP:
1029
0
            return nl_ct_parse_udp_timeout_policy_data(nla, nl_tp);
1030
0
        case IPPROTO_ICMP:
1031
0
            return nl_ct_parse_icmp_timeout_policy_data(nla, nl_tp);
1032
0
        case IPPROTO_ICMPV6:
1033
0
            return nl_ct_parse_icmpv6_timeout_policy_data(nla, nl_tp);
1034
0
        default:
1035
0
            return EINVAL;
1036
0
    }
1037
0
}
1038
1039
static int
1040
nl_ct_timeout_policy_from_ofpbuf(struct ofpbuf *buf,
1041
                                 struct nl_ct_timeout_policy *nl_tp,
1042
                                 bool default_tp)
1043
0
{
1044
0
    static const struct nl_policy policy[] = {
1045
0
        [CTA_TIMEOUT_NAME] =    { .type = NL_A_STRING, .optional = false },
1046
0
        [CTA_TIMEOUT_L3PROTO] = { .type = NL_A_BE16, .optional = false },
1047
0
        [CTA_TIMEOUT_L4PROTO] = { .type = NL_A_U8, .optional = false },
1048
0
        [CTA_TIMEOUT_DATA] =    { .type = NL_A_NESTED, .optional = false }
1049
0
    };
1050
0
    static const struct nl_policy policy_default_tp[] = {
1051
0
        [CTA_TIMEOUT_L3PROTO] = { .type = NL_A_BE16, .optional = false },
1052
0
        [CTA_TIMEOUT_L4PROTO] = { .type = NL_A_U8, .optional = false },
1053
0
        [CTA_TIMEOUT_DATA] =    { .type = NL_A_NESTED, .optional = false }
1054
0
    };
1055
1056
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
1057
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
1058
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
1059
0
    struct nfgenmsg *nfmsg = ofpbuf_try_pull(&b, sizeof *nfmsg);
1060
1061
0
    if (!nlmsg || !nfmsg
1062
0
        || NFNL_SUBSYS_ID(nlmsg->nlmsg_type) != NFNL_SUBSYS_CTNETLINK_TIMEOUT
1063
0
        || nfmsg->version != NFNETLINK_V0
1064
0
        || !nl_policy_parse(&b, 0, default_tp ? policy_default_tp : policy,
1065
0
                            attrs, default_tp ? ARRAY_SIZE(policy_default_tp) :
1066
0
                                                ARRAY_SIZE(policy))) {
1067
0
        return EINVAL;
1068
0
    }
1069
1070
0
    if (!default_tp) {
1071
0
        ovs_strlcpy(nl_tp->name, nl_attr_get_string(attrs[CTA_TIMEOUT_NAME]),
1072
0
                    sizeof nl_tp->name);
1073
0
    }
1074
0
    nl_tp->l3num = ntohs(nl_attr_get_be16(attrs[CTA_TIMEOUT_L3PROTO]));
1075
0
    nl_tp->l4num = nl_attr_get_u8(attrs[CTA_TIMEOUT_L4PROTO]);
1076
0
    nl_tp->present = 0;
1077
1078
0
    return nl_ct_parse_timeout_policy_data(attrs[CTA_TIMEOUT_DATA], nl_tp);
1079
0
}
1080
1081
int
1082
nl_ct_set_timeout_policy(const struct nl_ct_timeout_policy *nl_tp)
1083
0
{
1084
0
    struct ofpbuf buf;
1085
0
    size_t offset;
1086
1087
0
    ofpbuf_init(&buf, 512);
1088
0
    nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT,
1089
0
                        IPCTNL_MSG_TIMEOUT_NEW, NLM_F_REQUEST | NLM_F_CREATE
1090
0
                        | NLM_F_ACK | NLM_F_REPLACE);
1091
1092
0
    nl_msg_put_string(&buf, CTA_TIMEOUT_NAME, nl_tp->name);
1093
0
    nl_msg_put_be16(&buf, CTA_TIMEOUT_L3PROTO, htons(nl_tp->l3num));
1094
0
    nl_msg_put_u8(&buf, CTA_TIMEOUT_L4PROTO, nl_tp->l4num);
1095
1096
0
    offset = nl_msg_start_nested(&buf, CTA_TIMEOUT_DATA);
1097
0
    for (int i = 1; i <= nl_ct_timeout_policy_max_attr[nl_tp->l4num]; ++i) {
1098
0
        if (nl_tp->present & 1 << i) {
1099
0
            nl_msg_put_be32(&buf, i, htonl(nl_tp->attrs[i]));
1100
0
        }
1101
0
    }
1102
0
    nl_msg_end_nested(&buf, offset);
1103
1104
0
    int err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
1105
0
    ofpbuf_uninit(&buf);
1106
0
    return err;
1107
0
}
1108
1109
int
1110
nl_ct_get_timeout_policy(const char *tp_name,
1111
                         struct nl_ct_timeout_policy *nl_tp)
1112
0
{
1113
0
    struct ofpbuf request, *reply;
1114
1115
0
    ofpbuf_init(&request, 512);
1116
0
    nl_msg_put_nfgenmsg(&request, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT,
1117
0
                        IPCTNL_MSG_TIMEOUT_GET, NLM_F_REQUEST | NLM_F_ACK);
1118
0
    nl_msg_put_string(&request, CTA_TIMEOUT_NAME, tp_name);
1119
0
    int err = nl_transact(NETLINK_NETFILTER, &request, &reply);
1120
0
    if (err) {
1121
0
        goto out;
1122
0
    }
1123
1124
0
    err = nl_ct_timeout_policy_from_ofpbuf(reply, nl_tp, false);
1125
1126
0
out:
1127
0
    ofpbuf_uninit(&request);
1128
0
    ofpbuf_delete(reply);
1129
0
    return err;
1130
0
}
1131
1132
int
1133
nl_ct_del_timeout_policy(const char *tp_name)
1134
0
{
1135
0
    struct ofpbuf buf;
1136
1137
0
    ofpbuf_init(&buf, 64);
1138
0
    nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT,
1139
0
                        IPCTNL_MSG_TIMEOUT_DELETE, NLM_F_REQUEST | NLM_F_ACK);
1140
1141
0
    nl_msg_put_string(&buf, CTA_TIMEOUT_NAME, tp_name);
1142
0
    int err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
1143
0
    ofpbuf_uninit(&buf);
1144
0
    return err;
1145
0
}
1146
1147
struct nl_ct_timeout_policy_dump_state {
1148
    struct nl_dump dump;
1149
    struct ofpbuf buf;
1150
};
1151
1152
int
1153
nl_ct_timeout_policy_dump_start(
1154
    struct nl_ct_timeout_policy_dump_state **statep)
1155
0
{
1156
0
    struct ofpbuf request;
1157
0
    struct nl_ct_timeout_policy_dump_state *state;
1158
1159
0
    *statep = state = xzalloc(sizeof *state);
1160
0
    ofpbuf_init(&request, 512);
1161
0
    nl_msg_put_nfgenmsg(&request, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT,
1162
0
                        IPCTNL_MSG_TIMEOUT_GET,
1163
0
                        NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
1164
1165
0
    nl_dump_start(&state->dump, NETLINK_NETFILTER, &request);
1166
0
    ofpbuf_uninit(&request);
1167
0
    ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
1168
0
    return 0;
1169
0
}
1170
1171
int
1172
nl_ct_timeout_policy_dump_next(struct nl_ct_timeout_policy_dump_state *state,
1173
                               struct nl_ct_timeout_policy *nl_tp)
1174
0
{
1175
0
    struct ofpbuf reply;
1176
1177
0
    if (!nl_dump_next(&state->dump, &reply, &state->buf)) {
1178
0
        return EOF;
1179
0
    }
1180
0
    int err = nl_ct_timeout_policy_from_ofpbuf(&reply, nl_tp, false);
1181
0
    ofpbuf_uninit(&reply);
1182
0
    return err;
1183
0
}
1184
1185
int
1186
nl_ct_timeout_policy_dump_done(struct nl_ct_timeout_policy_dump_state *state)
1187
0
{
1188
0
    int err  = nl_dump_done(&state->dump);
1189
0
    ofpbuf_uninit(&state->buf);
1190
0
    free(state);
1191
0
    return err;
1192
0
}
1193
1194
/* Translate netlink entry status flags to CT_DPIF_TCP status flags. */
1195
static uint32_t
1196
ips_status_to_dpif_flags(uint32_t status)
1197
0
{
1198
0
    uint32_t ret = 0;
1199
0
#define CT_DPIF_STATUS_FLAG(FLAG) \
1200
0
        ret |= (status & IPS_##FLAG) ? CT_DPIF_STATUS_##FLAG : 0;
1201
0
    CT_DPIF_STATUS_FLAGS
1202
0
#undef CT_DPIF_STATUS_FLAG
1203
0
    return ret;
1204
0
}
1205
1206
static bool
1207
nl_ct_parse_header_policy(struct ofpbuf *buf,
1208
        enum nl_ct_event_type *event_type,
1209
        uint8_t *nfgen_family,
1210
        struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)])
1211
0
{
1212
0
    struct nlmsghdr *nlh;
1213
0
    struct nfgenmsg *nfm;
1214
0
    uint8_t type;
1215
1216
0
    nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN);
1217
0
    nfm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *nfm);
1218
0
    if (!nfm) {
1219
0
        VLOG_ERR_RL(&rl, "Received bad nfnl message (no nfgenmsg).");
1220
0
        return false;
1221
0
    }
1222
0
    if (NFNL_SUBSYS_ID(nlh->nlmsg_type) != NFNL_SUBSYS_CTNETLINK) {
1223
0
        VLOG_ERR_RL(&rl, "Received non-conntrack message (subsystem: %u).",
1224
0
                 NFNL_SUBSYS_ID(nlh->nlmsg_type));
1225
0
        return false;
1226
0
    }
1227
0
    if (nfm->version != NFNETLINK_V0) {
1228
0
        VLOG_ERR_RL(&rl, "Received unsupported nfnetlink version (%u).",
1229
0
                 NFNL_MSG_TYPE(nfm->version));
1230
0
        return false;
1231
0
    }
1232
1233
0
    if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof *nfm,
1234
0
                         nfnlgrp_conntrack_policy, attrs,
1235
0
                         ARRAY_SIZE(nfnlgrp_conntrack_policy))) {
1236
0
        VLOG_ERR_RL(&rl, "Received bad nfnl message (policy).");
1237
0
        return false;
1238
0
    }
1239
1240
0
    type = NFNL_MSG_TYPE(nlh->nlmsg_type);
1241
0
    *nfgen_family = nfm->nfgen_family;
1242
1243
0
    switch (type) {
1244
0
    case IPCTNL_MSG_CT_NEW:
1245
0
        *event_type = nlh->nlmsg_flags & NLM_F_CREATE
1246
0
            ? NL_CT_EVENT_NEW : NL_CT_EVENT_UPDATE;
1247
0
        break;
1248
0
    case IPCTNL_MSG_CT_DELETE:
1249
0
        *event_type = NL_CT_EVENT_DELETE;
1250
0
        break;
1251
0
    default:
1252
0
        VLOG_ERR_RL(&rl, "Can't parse conntrack event type.");
1253
0
        return false;
1254
0
    }
1255
1256
0
    return true;
1257
0
}
1258
1259
static bool
1260
nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry,
1261
        struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)],
1262
        uint8_t nfgen_family)
1263
0
{
1264
0
    if (!nl_ct_parse_tuple(attrs[CTA_TUPLE_ORIG], &entry->tuple_orig,
1265
0
                           nfgen_family)) {
1266
0
        return false;
1267
0
    }
1268
0
    if (!nl_ct_parse_tuple(attrs[CTA_TUPLE_REPLY], &entry->tuple_reply,
1269
0
                           nfgen_family)) {
1270
0
        return false;
1271
0
    }
1272
0
    if (attrs[CTA_COUNTERS_ORIG] &&
1273
0
        !nl_ct_parse_counters(attrs[CTA_COUNTERS_ORIG],
1274
0
                              &entry->counters_orig)) {
1275
0
        return false;
1276
0
    }
1277
0
    if (attrs[CTA_COUNTERS_REPLY] &&
1278
0
        !nl_ct_parse_counters(attrs[CTA_COUNTERS_REPLY],
1279
0
                              &entry->counters_reply)) {
1280
0
        return false;
1281
0
    }
1282
0
    if (attrs[CTA_TIMESTAMP] &&
1283
0
        !nl_ct_parse_timestamp(attrs[CTA_TIMESTAMP], &entry->timestamp)) {
1284
0
        return false;
1285
0
    }
1286
0
    if (attrs[CTA_ID]) {
1287
0
        entry->id = ntohl(nl_attr_get_be32(attrs[CTA_ID]));
1288
0
    }
1289
0
    if (attrs[CTA_ZONE]) {
1290
0
        entry->zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE]));
1291
0
    }
1292
0
    if (attrs[CTA_STATUS]) {
1293
0
        entry->status = ips_status_to_dpif_flags(
1294
0
            ntohl(nl_attr_get_be32(attrs[CTA_STATUS])));
1295
0
    }
1296
0
    if (attrs[CTA_TIMEOUT]) {
1297
0
        entry->timeout = ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT]));
1298
0
    }
1299
0
    if (attrs[CTA_MARK]) {
1300
0
        entry->mark = ntohl(nl_attr_get_be32(attrs[CTA_MARK]));
1301
0
    }
1302
0
    if (attrs[CTA_LABELS]) {
1303
0
        entry->have_labels = true;
1304
0
        memcpy(&entry->labels, nl_attr_get(attrs[CTA_LABELS]),
1305
0
               MIN(sizeof entry->labels, nl_attr_get_size(attrs[CTA_LABELS])));
1306
0
    }
1307
0
    if (attrs[CTA_PROTOINFO] &&
1308
0
        !nl_ct_parse_protoinfo(attrs[CTA_PROTOINFO], &entry->protoinfo)) {
1309
0
        return false;
1310
0
    }
1311
0
    if (attrs[CTA_HELP] &&
1312
0
        !nl_ct_parse_helper(attrs[CTA_HELP], &entry->helper)) {
1313
0
        return false;
1314
0
    }
1315
0
    if (attrs[CTA_TUPLE_MASTER] &&
1316
0
        !nl_ct_parse_tuple(attrs[CTA_TUPLE_MASTER], &entry->tuple_parent,
1317
0
                           nfgen_family)) {
1318
0
        return false;
1319
0
    }
1320
0
    return true;
1321
0
}
1322
1323
bool
1324
nl_ct_parse_entry(struct ofpbuf *buf, struct ct_dpif_entry *entry,
1325
                  enum nl_ct_event_type *event_type)
1326
0
{
1327
0
    struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)];
1328
0
    uint8_t nfgen_family;
1329
1330
0
    memset(entry, 0, sizeof *entry);
1331
0
    if (!nl_ct_parse_header_policy(buf, event_type, &nfgen_family, attrs)) {
1332
0
        return false;
1333
0
    };
1334
1335
0
    if (!nl_ct_attrs_to_ct_dpif_entry(entry, attrs, nfgen_family)) {
1336
0
        ct_dpif_entry_uninit(entry);
1337
0
        memset(entry, 0, sizeof *entry);
1338
0
        return false;
1339
0
    }
1340
1341
0
    return true;
1342
0
}
1343
1344
/* NetFilter utility functions. */
1345
1346
/* Puts a nlmsghdr and nfgenmsg at the beginning of 'msg', which must be
1347
 * initially empty.  'expected_payload' should be an estimate of the number of
1348
 * payload bytes to be supplied; if the size of the payload is unknown a value
1349
 * of 0 is acceptable.
1350
 *
1351
 * Non-zero 'family' is the address family of items to get (e.g. AF_INET).
1352
 *
1353
 * 'flags' is a bit-mask that indicates what kind of request is being made.  It
1354
 * is often NLM_F_REQUEST indicating that a request is being made, commonly
1355
 * or'd with NLM_F_ACK to request an acknowledgement.  NLM_F_DUMP flag reguests
1356
 * a dump of the table.
1357
 *
1358
 * 'subsystem' is a netfilter subsystem id, e.g., NFNL_SUBSYS_CTNETLINK.
1359
 *
1360
 * 'cmd' is an enumerated value specific to the 'subsystem'.
1361
 *
1362
 * Sets the new nlmsghdr's nlmsg_pid field to 0 for now.  nl_sock_send() will
1363
 * fill it in just before sending the message.
1364
 *
1365
 * nl_msg_put_nlmsghdr() should be used to compose Netlink messages that are
1366
 * not NetFilter Netlink messages. */
1367
static void
1368
nl_msg_put_nfgenmsg(struct ofpbuf *msg, size_t expected_payload,
1369
                    int family, uint8_t subsystem, uint8_t cmd,
1370
                    uint32_t flags)
1371
0
{
1372
0
    struct nfgenmsg *nfm;
1373
1374
0
    nl_msg_put_nlmsghdr(msg, sizeof *nfm + expected_payload,
1375
0
                        subsystem << 8 | cmd, flags);
1376
0
    ovs_assert(msg->size == NLMSG_HDRLEN);
1377
0
    nfm = nl_msg_put_uninit(msg, sizeof *nfm);
1378
0
    nfm->nfgen_family = family;
1379
0
    nfm->version = NFNETLINK_V0;
1380
0
    nfm->res_id = 0;
1381
#ifdef _WIN32
1382
    /* nfgenmsg contains ovsHdr padding in windows */
1383
    nfm->ovsHdr.dp_ifindex = 0;
1384
#endif
1385
0
}