Coverage Report

Created: 2026-06-22 06:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openvswitch/lib/netlink-conntrack.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2015 Nicira, Inc.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at:
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
#include <config.h>
18
19
#include "netlink-conntrack.h"
20
21
#include <errno.h>
22
#include <linux/netfilter/nfnetlink.h>
23
#include <linux/netfilter/nfnetlink_conntrack.h>
24
#include <linux/netfilter/nf_conntrack_common.h>
25
#include <linux/netfilter/nf_conntrack_tcp.h>
26
#include <linux/netfilter/nf_conntrack_ftp.h>
27
#include <linux/netfilter/nf_conntrack_sctp.h>
28
29
#include "byte-order.h"
30
#include "compiler.h"
31
#include "openvswitch/dynamic-string.h"
32
#include "netlink.h"
33
#include "netlink-socket.h"
34
#include "openvswitch/ofpbuf.h"
35
#include "openvswitch/vlog.h"
36
#include "openvswitch/poll-loop.h"
37
#include "timeval.h"
38
#include "unixctl.h"
39
#include "util.h"
40
41
VLOG_DEFINE_THIS_MODULE(netlink_conntrack);
42
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
43
44
/* This module works only if conntrack modules and features are enabled in the
45
 * Linux kernel.  This can be done from a root shell like this:
46
 *
47
 * $ modprobe ip_conntrack
48
 * $ sysctl -w net.netfilter.nf_conntrack_acct=1
49
 * $ sysctl -w net.netfilter.nf_conntrack_timestamp=1
50
 *
51
 * Also, if testing conntrack label feature without conntrack-aware OVS kernel
52
 * module, there must be a connlabel rule in iptables for space to be reserved
53
 * for the labels (see kernel source connlabel_mt_check()).  Such a rule can be
54
 * inserted from a root shell like this:
55
 *
56
 * $ iptables -A INPUT -m conntrack -m connlabel \
57
 *   --ctstate NEW,ESTABLISHED,RELATED --label 127 -j ACCEPT
58
 */
59
60
/* Some attributes were introduced in later kernels: with these definitions
61
 * we should be able to compile userspace against Linux 2.6.32+. */
62
63
0
#define CTA_ZONE          (CTA_SECMARK + 1)
64
#define CTA_SECCTX        (CTA_SECMARK + 2)
65
0
#define CTA_TIMESTAMP     (CTA_SECMARK + 3)
66
#define CTA_MARK_MASK     (CTA_SECMARK + 4)
67
0
#define CTA_LABELS        (CTA_SECMARK + 5)
68
#define CTA_LABELS_MASK   (CTA_SECMARK + 6)
69
70
0
#define CTA_TIMESTAMP_START 1
71
0
#define CTA_TIMESTAMP_STOP  2
72
73
0
#define IPS_TEMPLATE_BIT 11
74
0
#define IPS_TEMPLATE (1 << IPS_TEMPLATE_BIT)
75
76
0
#define IPS_UNTRACKED_BIT 12
77
0
#define IPS_UNTRACKED (1 << IPS_UNTRACKED_BIT)
78
79
static const struct nl_policy nfnlgrp_conntrack_policy[] = {
80
    [CTA_TUPLE_ORIG] = { .type = NL_A_NESTED, .optional = false },
81
    [CTA_TUPLE_REPLY] = { .type = NL_A_NESTED, .optional = false },
82
    [CTA_ZONE] = { .type = NL_A_BE16, .optional = true },
83
    [CTA_STATUS] = { .type = NL_A_BE32, .optional = false },
84
    [CTA_TIMESTAMP] = { .type = NL_A_NESTED, .optional = true },
85
    [CTA_TIMEOUT] = { .type = NL_A_BE32, .optional = true },
86
    [CTA_COUNTERS_ORIG] = { .type = NL_A_NESTED, .optional = true },
87
    [CTA_COUNTERS_REPLY] = { .type = NL_A_NESTED, .optional = true },
88
    [CTA_PROTOINFO] = { .type = NL_A_NESTED, .optional = true },
89
    [CTA_HELP] = { .type = NL_A_NESTED, .optional = true },
90
    [CTA_MARK] = { .type = NL_A_BE32, .optional = true },
91
    [CTA_SECCTX] = { .type = NL_A_NESTED, .optional = true },
92
    [CTA_ID] = { .type = NL_A_BE32, .optional = false },
93
    [CTA_USE] = { .type = NL_A_BE32, .optional = true },
94
    [CTA_TUPLE_MASTER] = { .type = NL_A_NESTED, .optional = true },
95
    [CTA_NAT_SEQ_ADJ_ORIG] = { .type = NL_A_NESTED, .optional = true },
96
    [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NL_A_NESTED, .optional = true },
97
    [CTA_LABELS] = { .type = NL_A_UNSPEC, .optional = true },
98
    /* CTA_NAT_SRC, CTA_NAT_DST, CTA_TIMESTAMP, CTA_MARK_MASK, and
99
     * CTA_LABELS_MASK are not received from kernel. */
100
};
101
102
/* Declarations for conntrack netlink dumping. */
103
static void nl_msg_put_nfgenmsg(struct ofpbuf *msg, size_t expected_payload,
104
                                int family, uint8_t subsystem, uint8_t cmd,
105
                                uint32_t flags);
106
107
static bool nl_ct_parse_header_policy(struct ofpbuf *buf,
108
        enum nl_ct_event_type *event_type,
109
        uint8_t *nfgen_family,
110
        struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]);
111
112
static bool nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry,
113
        struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)],
114
        uint8_t nfgen_family);
115
static bool nl_ct_put_ct_tuple(struct ofpbuf *buf,
116
        const struct ct_dpif_tuple *tuple, enum ctattr_type type);
117
118
struct nl_ct_dump_state {
119
    struct nl_dump dump;
120
    struct ofpbuf buf;
121
    bool filter_zone;
122
    uint16_t zone;
123
};
124
125
/* Conntrack netlink dumping. */
126
127
/* Initialize a conntrack netlink dump. */
128
int
129
nl_ct_dump_start(struct nl_ct_dump_state **statep, const uint16_t *zone,
130
        int *ptot_bkts)
131
0
{
132
0
    struct nl_ct_dump_state *state;
133
134
0
    *statep = state = xzalloc(sizeof *state);
135
0
    ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
136
137
0
    if (zone) {
138
0
        state->filter_zone = true;
139
0
        state->zone = *zone;
140
0
    }
141
142
0
    nl_msg_put_nfgenmsg(&state->buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
143
0
                        IPCTNL_MSG_CT_GET, NLM_F_REQUEST);
144
0
    if (zone) {
145
0
        nl_msg_put_be16(&state->buf, CTA_ZONE, htons(*zone));
146
0
    }
147
0
    nl_dump_start(&state->dump, NETLINK_NETFILTER, &state->buf);
148
0
    ofpbuf_clear(&state->buf);
149
150
    /* Buckets to store connections are not used. */
151
0
    *ptot_bkts = -1;
152
153
0
    return 0;
154
0
}
155
156
/* Receive the next 'entry' from the conntrack netlink dump with 'state'.
157
 * Returns 'EOF' when no more entries are available, 0 otherwise.  'entry' may
158
 * be uninitilized memory on entry, and must be uninitialized with
159
 * ct_dpif_entry_uninit() afterwards by the caller.  In case the same 'entry' is
160
 * passed to this function again, the entry must also be uninitialized before
161
 * the next call. */
162
int
163
nl_ct_dump_next(struct nl_ct_dump_state *state, struct ct_dpif_entry *entry)
164
0
{
165
0
    struct ofpbuf buf;
166
167
0
    memset(entry, 0, sizeof *entry);
168
0
    for (;;) {
169
0
        struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)];
170
0
        enum nl_ct_event_type type;
171
0
        uint8_t nfgen_family;
172
173
0
        if (!nl_dump_next(&state->dump, &buf, &state->buf)) {
174
0
            return EOF;
175
0
        }
176
177
0
        if (!nl_ct_parse_header_policy(&buf, &type, &nfgen_family, attrs)) {
178
0
            continue;
179
0
        };
180
181
0
        if (state->filter_zone) {
182
0
            uint16_t entry_zone = attrs[CTA_ZONE]
183
0
                                  ? ntohs(nl_attr_get_be16(attrs[CTA_ZONE]))
184
0
                                  : 0;
185
0
            if (entry_zone != state->zone) {
186
0
                continue;
187
0
            }
188
0
        }
189
190
0
        if (nl_ct_attrs_to_ct_dpif_entry(entry, attrs, nfgen_family)) {
191
0
            break;
192
0
        }
193
194
0
        ct_dpif_entry_uninit(entry);
195
0
        memset(entry, 0, sizeof *entry);
196
        /* Ignore the failed entry and get the next one. */
197
0
    }
198
199
0
    ofpbuf_uninit(&buf);
200
0
    return 0;
201
0
}
202
203
/* End a conntrack netlink dump. */
204
int
205
nl_ct_dump_done(struct nl_ct_dump_state *state)
206
0
{
207
0
    int error = nl_dump_done(&state->dump);
208
209
0
    ofpbuf_uninit(&state->buf);
210
0
    free(state);
211
0
    return error;
212
0
}
213
214
/* Format conntrack event 'entry' of 'type' to 'ds'. */
215
void
216
nl_ct_format_event_entry(const struct ct_dpif_entry *entry,
217
                         enum nl_ct_event_type type, struct ds *ds,
218
                         bool verbose, bool print_stats)
219
0
{
220
0
    ds_put_format(ds, "%s ",
221
0
                  type == NL_CT_EVENT_NEW ? "NEW"
222
0
                  : type == NL_CT_EVENT_UPDATE ? "UPDATE"
223
0
                  : type == NL_CT_EVENT_DELETE ? "DELETE"
224
0
                  : "UNKNOWN");
225
0
    ct_dpif_format_entry(entry, ds, verbose, print_stats);
226
0
}
227
228
int
229
nl_ct_flush(void)
230
0
{
231
0
    struct ofpbuf buf;
232
0
    int err;
233
234
0
    ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
235
236
0
    nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
237
0
                        IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
238
239
0
    err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
240
0
    ofpbuf_uninit(&buf);
241
242
    /* Expectations are flushed automatically, because they do not
243
     * have a parent connection anymore */
244
245
0
    return err;
246
0
}
247
248
int
249
nl_ct_flush_tuple(const struct ct_dpif_tuple *tuple, uint16_t zone)
250
0
{
251
0
    int err;
252
0
    struct ofpbuf buf;
253
254
0
    ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
255
0
    nl_msg_put_nfgenmsg(&buf, 0, tuple->l3_type, NFNL_SUBSYS_CTNETLINK,
256
0
                        IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
257
258
0
    nl_msg_put_be16(&buf, CTA_ZONE, htons(zone));
259
0
    if (!nl_ct_put_ct_tuple(&buf, tuple, CTA_TUPLE_ORIG)) {
260
0
        err = EOPNOTSUPP;
261
0
        goto out;
262
0
    }
263
0
    err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
264
0
out:
265
0
    ofpbuf_uninit(&buf);
266
0
    return err;
267
0
}
268
269
static int
270
nl_ct_flush_zone_with_cta_zone(uint16_t flush_zone)
271
0
{
272
0
    struct ofpbuf buf;
273
0
    int err;
274
275
0
    ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
276
277
0
    nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
278
0
                        IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
279
0
    nl_msg_put_be16(&buf, CTA_ZONE, htons(flush_zone));
280
281
0
    err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
282
0
    ofpbuf_uninit(&buf);
283
284
0
    return err;
285
0
}
286
287
static bool
288
netlink_flush_supports_zone(void)
289
0
{
290
0
    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
291
0
    static bool supported = false;
292
293
0
    if (ovsthread_once_start(&once)) {
294
0
        if (ovs_kernel_is_version_or_newer(6, 8)) {
295
0
            supported = true;
296
0
        } else {
297
0
            VLOG_INFO("Disabling conntrack flush by zone. "
298
0
                      "Not supported in Linux kernel.");
299
0
        }
300
0
        ovsthread_once_done(&once);
301
0
    }
302
0
    return supported;
303
0
}
304
305
int
306
nl_ct_flush_zone(uint16_t flush_zone)
307
0
{
308
    /* In older kernels, there was no netlink interface to flush a specific
309
     * conntrack zone.
310
     * This code dumps every connection, checks the zone and eventually
311
     * delete the entry.
312
     * In newer kernels there is the option to specify a zone for filtering
313
     * during dumps.  Older kernels ignore this option. We set it here in the
314
     * hope we only get relevant entries back, but fall back to filtering here
315
     * to keep compatibility.
316
     *
317
     * This is race-prone, but it is better than using shell scripts.
318
     *
319
     * Additionally newer kernels also support flushing a zone without listing
320
     * it first. */
321
322
0
    struct nl_dump dump;
323
0
    struct ofpbuf buf, reply, delete;
324
325
0
    if (netlink_flush_supports_zone()) {
326
0
        return nl_ct_flush_zone_with_cta_zone(flush_zone);
327
0
    }
328
329
0
    ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
330
0
    ofpbuf_init(&delete, NL_DUMP_BUFSIZE);
331
332
0
    nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
333
0
                        IPCTNL_MSG_CT_GET, NLM_F_REQUEST);
334
0
    nl_msg_put_be16(&buf, CTA_ZONE, htons(flush_zone));
335
0
    nl_dump_start(&dump, NETLINK_NETFILTER, &buf);
336
0
    ofpbuf_clear(&buf);
337
338
0
    for (;;) {
339
0
        struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)];
340
0
        enum nl_ct_event_type event_type;
341
0
        uint8_t nfgen_family;
342
0
        uint16_t zone = 0;
343
344
0
        if (!nl_dump_next(&dump, &reply, &buf)) {
345
0
            break;
346
0
        }
347
348
0
        if (!nl_ct_parse_header_policy(&reply, &event_type, &nfgen_family,
349
0
                                       attrs)) {
350
0
            continue;
351
0
        };
352
353
0
        if (attrs[CTA_ZONE]) {
354
0
            zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE]));
355
0
        }
356
357
0
        if (zone != flush_zone) {
358
            /* The entry is not in the zone we're flushing. */
359
0
            continue;
360
0
        }
361
0
        nl_msg_put_nfgenmsg(&delete, 0, nfgen_family, NFNL_SUBSYS_CTNETLINK,
362
0
                            IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
363
364
0
        nl_msg_put_be16(&delete, CTA_ZONE, htons(zone));
365
0
        nl_msg_put_unspec(&delete, CTA_TUPLE_ORIG, attrs[CTA_TUPLE_ORIG] + 1,
366
0
                          attrs[CTA_TUPLE_ORIG]->nla_len - NLA_HDRLEN);
367
0
        nl_msg_put_unspec(&delete, CTA_ID, attrs[CTA_ID] + 1,
368
0
                          attrs[CTA_ID]->nla_len - NLA_HDRLEN);
369
0
        nl_transact(NETLINK_NETFILTER, &delete, NULL);
370
0
        ofpbuf_clear(&delete);
371
0
    }
372
373
0
    nl_dump_done(&dump);
374
375
0
    ofpbuf_uninit(&delete);
376
0
    ofpbuf_uninit(&buf);
377
378
    /* Expectations are flushed automatically, because they do not
379
     * have a parent connection anymore */
380
0
    return 0;
381
0
}
382
383
/* Conntrack netlink parsing. */
384
385
static bool
386
nl_ct_parse_counters(struct nlattr *nla, struct ct_dpif_counters *counters)
387
0
{
388
0
    static const struct nl_policy policy[] = {
389
0
        [CTA_COUNTERS_PACKETS] = { .type = NL_A_BE64, .optional = false },
390
0
        [CTA_COUNTERS_BYTES] = { .type = NL_A_BE64, .optional = false },
391
0
    };
392
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
393
0
    bool parsed;
394
395
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
396
397
0
    if (parsed) {
398
0
        counters->packets
399
0
            = ntohll(nl_attr_get_be64(attrs[CTA_COUNTERS_PACKETS]));
400
0
        counters->bytes = ntohll(nl_attr_get_be64(attrs[CTA_COUNTERS_BYTES]));
401
0
    } else {
402
0
        VLOG_ERR_RL(&rl, "Could not parse nested counters. "
403
0
                    "Possibly incompatible Linux kernel version.");
404
0
    }
405
406
0
    return parsed;
407
0
}
408
409
static bool
410
nl_ct_parse_timestamp(struct nlattr *nla, struct ct_dpif_timestamp *timestamp)
411
0
{
412
0
    static const struct nl_policy policy[] = {
413
0
        [CTA_TIMESTAMP_START] = { .type = NL_A_BE64, .optional = false },
414
0
        [CTA_TIMESTAMP_STOP] = { .type = NL_A_BE64, .optional = true },
415
0
    };
416
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
417
0
    bool parsed;
418
419
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
420
421
0
    if (parsed) {
422
0
        timestamp->start
423
0
            = ntohll(nl_attr_get_be64(attrs[CTA_TIMESTAMP_START]));
424
0
        if (attrs[CTA_TIMESTAMP_STOP]) {
425
0
            timestamp->stop
426
0
                = ntohll(nl_attr_get_be64(attrs[CTA_TIMESTAMP_STOP]));
427
0
        }
428
0
    } else {
429
0
        VLOG_ERR_RL(&rl, "Could not parse nested timestamp. "
430
0
                    "Possibly incompatible Linux kernel version.");
431
0
    }
432
433
0
    return parsed;
434
0
}
435
436
static bool
437
nl_ct_parse_tuple_ip(struct nlattr *nla, struct ct_dpif_tuple *tuple)
438
0
{
439
0
    static const struct nl_policy policy[] = {
440
0
        [CTA_IP_V4_SRC] = { .type = NL_A_BE32, .optional = true },
441
0
        [CTA_IP_V4_DST] = { .type = NL_A_BE32, .optional = true },
442
0
        [CTA_IP_V6_SRC] = { NL_POLICY_FOR(struct in6_addr), .optional = true },
443
0
        [CTA_IP_V6_DST] = { NL_POLICY_FOR(struct in6_addr), .optional = true },
444
0
    };
445
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
446
0
    bool parsed;
447
448
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
449
450
0
    if (parsed) {
451
0
        if (tuple->l3_type == AF_INET) {
452
0
            if (attrs[CTA_IP_V4_SRC]) {
453
0
                tuple->src.ip = nl_attr_get_be32(attrs[CTA_IP_V4_SRC]);
454
0
            }
455
0
            if (attrs[CTA_IP_V4_DST]) {
456
0
                tuple->dst.ip = nl_attr_get_be32(attrs[CTA_IP_V4_DST]);
457
0
            }
458
0
        } else if (tuple->l3_type == AF_INET6) {
459
0
            if (attrs[CTA_IP_V6_SRC]) {
460
0
                memcpy(&tuple->src.in6, nl_attr_get(attrs[CTA_IP_V6_SRC]),
461
0
                       sizeof tuple->src.in6);
462
0
            }
463
0
            if (attrs[CTA_IP_V6_DST]) {
464
0
                memcpy(&tuple->dst.in6, nl_attr_get(attrs[CTA_IP_V6_DST]),
465
0
                       sizeof tuple->dst.in6);
466
0
            }
467
0
        } else {
468
0
            VLOG_WARN_RL(&rl, "Unsupported IP protocol: %u.", tuple->l3_type);
469
0
            return false;
470
0
        }
471
0
    } else {
472
0
        VLOG_ERR_RL(&rl, "Could not parse nested tuple IP options. "
473
0
                    "Possibly incompatible Linux kernel version.");
474
0
    }
475
476
0
    return parsed;
477
0
}
478
479
static bool
480
nl_ct_parse_tuple_proto(struct nlattr *nla, struct ct_dpif_tuple *tuple)
481
0
{
482
0
    static const struct nl_policy policy[] = {
483
0
        [CTA_PROTO_NUM] = { .type = NL_A_U8, .optional = false },
484
0
        [CTA_PROTO_SRC_PORT] = { .type = NL_A_BE16, .optional = true },
485
0
        [CTA_PROTO_DST_PORT] = { .type = NL_A_BE16, .optional = true },
486
0
        [CTA_PROTO_ICMP_ID] = { .type = NL_A_BE16, .optional = true },
487
0
        [CTA_PROTO_ICMP_TYPE] = { .type = NL_A_U8, .optional = true },
488
0
        [CTA_PROTO_ICMP_CODE] = { .type = NL_A_U8, .optional = true },
489
0
        [CTA_PROTO_ICMPV6_ID] = { .type = NL_A_BE16, .optional = true },
490
0
        [CTA_PROTO_ICMPV6_TYPE] = { .type = NL_A_U8, .optional = true },
491
0
        [CTA_PROTO_ICMPV6_CODE] = { .type = NL_A_U8, .optional = true },
492
0
    };
493
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
494
0
    bool parsed;
495
496
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
497
498
0
    if (parsed) {
499
0
        tuple->ip_proto = nl_attr_get_u8(attrs[CTA_PROTO_NUM]);
500
501
0
        if (tuple->l3_type == AF_INET && tuple->ip_proto == IPPROTO_ICMP) {
502
0
            if (!attrs[CTA_PROTO_ICMP_ID] || !attrs[CTA_PROTO_ICMP_TYPE]
503
0
                || !attrs[CTA_PROTO_ICMP_CODE]) {
504
0
                VLOG_ERR_RL(&rl, "Tuple ICMP data missing.");
505
0
                return false;
506
0
            }
507
0
            tuple->icmp_id = nl_attr_get_be16(attrs[CTA_PROTO_ICMP_ID]);
508
0
            tuple->icmp_type = nl_attr_get_u8(attrs[CTA_PROTO_ICMP_TYPE]);
509
0
            tuple->icmp_code = nl_attr_get_u8(attrs[CTA_PROTO_ICMP_CODE]);
510
0
        } else if (tuple->l3_type == AF_INET6 &&
511
0
                   tuple->ip_proto == IPPROTO_ICMPV6) {
512
0
            if (!attrs[CTA_PROTO_ICMPV6_ID] || !attrs[CTA_PROTO_ICMPV6_TYPE]
513
0
                || !attrs[CTA_PROTO_ICMPV6_CODE]) {
514
0
                VLOG_ERR_RL(&rl, "Tuple ICMPv6 data missing.");
515
0
                return false;
516
0
            }
517
0
            tuple->icmp_id = nl_attr_get_be16(attrs[CTA_PROTO_ICMPV6_ID]);
518
0
            tuple->icmp_type = nl_attr_get_u8(attrs[CTA_PROTO_ICMPV6_TYPE]);
519
0
            tuple->icmp_code = nl_attr_get_u8(attrs[CTA_PROTO_ICMPV6_CODE]);
520
0
        } else if (attrs[CTA_PROTO_SRC_PORT] && attrs[CTA_PROTO_DST_PORT]) {
521
0
            tuple->src_port = nl_attr_get_be16(attrs[CTA_PROTO_SRC_PORT]);
522
0
            tuple->dst_port = nl_attr_get_be16(attrs[CTA_PROTO_DST_PORT]);
523
0
        } else {
524
            /* Unsupported IPPROTO and no ports, leave them zeroed.
525
             * We have parsed the ip_proto, so this is not a failure. */
526
0
            VLOG_DBG_RL(&rl, "Unsupported L4 protocol: %u.", tuple->ip_proto);
527
0
        }
528
0
    } else {
529
0
        VLOG_ERR_RL(&rl, "Could not parse nested tuple protocol options. "
530
0
                    "Possibly incompatible Linux kernel version.");
531
0
    }
532
533
0
    return parsed;
534
0
}
535
536
static bool
537
nl_ct_parse_tuple(struct nlattr *nla, struct ct_dpif_tuple *tuple,
538
                  uint16_t l3_type)
539
0
{
540
0
    static const struct nl_policy policy[] = {
541
0
        [CTA_TUPLE_IP] = { .type = NL_A_NESTED, .optional = false },
542
0
        [CTA_TUPLE_PROTO] = { .type = NL_A_NESTED, .optional = false },
543
0
    };
544
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
545
0
    bool parsed;
546
547
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
548
549
0
    memset(tuple, 0, sizeof *tuple);
550
551
0
    if (parsed) {
552
0
        tuple->l3_type = l3_type;
553
554
0
        if (!nl_ct_parse_tuple_ip(attrs[CTA_TUPLE_IP], tuple)
555
0
            || !nl_ct_parse_tuple_proto(attrs[CTA_TUPLE_PROTO], tuple)) {
556
0
            struct ds ds;
557
558
0
            ds_init(&ds);
559
0
            ct_dpif_format_tuple(&ds, tuple);
560
561
0
            VLOG_ERR_RL(&rl, "Failed to parse tuple: %s", ds_cstr(&ds));
562
0
            ds_destroy(&ds);
563
564
0
            memset(tuple, 0, sizeof *tuple);
565
0
            return false;
566
0
        }
567
0
    } else {
568
0
        VLOG_ERR_RL(&rl, "Could not parse nested tuple options. "
569
0
                    "Possibly incompatible Linux kernel version.");
570
0
    }
571
572
0
    return parsed;
573
0
}
574
575
static bool
576
nl_ct_put_tuple_ip(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple)
577
0
{
578
0
    size_t offset = nl_msg_start_nested(buf, CTA_TUPLE_IP);
579
580
0
    if (tuple->l3_type == AF_INET) {
581
0
        nl_msg_put_be32(buf, CTA_IP_V4_SRC, tuple->src.ip);
582
0
        nl_msg_put_be32(buf, CTA_IP_V4_DST, tuple->dst.ip);
583
0
    } else if (tuple->l3_type == AF_INET6) {
584
0
        nl_msg_put_in6_addr(buf, CTA_IP_V6_SRC, &tuple->src.in6);
585
0
        nl_msg_put_in6_addr(buf, CTA_IP_V6_DST, &tuple->dst.in6);
586
0
    } else {
587
0
        VLOG_WARN_RL(&rl, "Unsupported IP protocol: %"PRIu16".",
588
0
                     tuple->l3_type);
589
0
        return false;
590
0
    }
591
592
0
    nl_msg_end_nested(buf, offset);
593
0
    return true;
594
0
}
595
596
static bool
597
nl_ct_put_tuple_proto(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple)
598
0
{
599
0
    size_t offset = nl_msg_start_nested(buf, CTA_TUPLE_PROTO);
600
601
0
    nl_msg_put_u8(buf, CTA_PROTO_NUM, tuple->ip_proto);
602
603
0
    if (tuple->l3_type == AF_INET && tuple->ip_proto == IPPROTO_ICMP) {
604
0
        nl_msg_put_be16(buf, CTA_PROTO_ICMP_ID, tuple->icmp_id);
605
0
        nl_msg_put_u8(buf, CTA_PROTO_ICMP_TYPE, tuple->icmp_type);
606
0
        nl_msg_put_u8(buf, CTA_PROTO_ICMP_CODE, tuple->icmp_code);
607
0
    } else if (tuple->l3_type == AF_INET6 &&
608
0
               tuple->ip_proto == IPPROTO_ICMPV6) {
609
0
        nl_msg_put_be16(buf, CTA_PROTO_ICMPV6_ID, tuple->icmp_id);
610
0
        nl_msg_put_u8(buf, CTA_PROTO_ICMPV6_TYPE, tuple->icmp_type);
611
0
        nl_msg_put_u8(buf, CTA_PROTO_ICMPV6_CODE, tuple->icmp_code);
612
0
    } else if (tuple->ip_proto == IPPROTO_TCP ||
613
0
               tuple->ip_proto == IPPROTO_UDP ||
614
0
               tuple->ip_proto == IPPROTO_SCTP) {
615
0
        nl_msg_put_be16(buf, CTA_PROTO_SRC_PORT, tuple->src_port);
616
0
        nl_msg_put_be16(buf, CTA_PROTO_DST_PORT, tuple->dst_port);
617
0
    } else {
618
0
        VLOG_WARN_RL(&rl, "Unsupported L4 protocol: %"PRIu8".",
619
0
                     tuple->ip_proto);
620
0
        return false;
621
0
    }
622
623
0
    nl_msg_end_nested(buf, offset);
624
0
    return true;
625
0
}
626
627
static bool
628
nl_ct_put_ct_tuple(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple,
629
                   enum ctattr_type type)
630
0
{
631
0
    if (type != CTA_TUPLE_ORIG && type != CTA_TUPLE_REPLY &&
632
0
        type != CTA_TUPLE_MASTER) {
633
0
        return false;
634
0
    }
635
636
0
    size_t offset = nl_msg_start_nested(buf, type);
637
638
0
    if (!nl_ct_put_tuple_ip(buf, tuple)) {
639
0
        return false;
640
0
    }
641
0
    if (!nl_ct_put_tuple_proto(buf, tuple)) {
642
0
        return false;
643
0
    }
644
645
0
    nl_msg_end_nested(buf, offset);
646
0
    return true;
647
0
}
648
649
/* Translate netlink TCP state to CT_DPIF_TCP state. */
650
static uint8_t
651
nl_ct_tcp_state_to_dpif(uint8_t state)
652
0
{
653
0
    switch (state) {
654
0
    case TCP_CONNTRACK_NONE:
655
0
        return CT_DPIF_TCPS_CLOSED;
656
0
    case TCP_CONNTRACK_SYN_SENT:
657
0
        return CT_DPIF_TCPS_SYN_SENT;
658
0
    case TCP_CONNTRACK_SYN_SENT2:
659
0
        return CT_DPIF_TCPS_SYN_SENT;
660
0
    case TCP_CONNTRACK_SYN_RECV:
661
0
        return CT_DPIF_TCPS_SYN_RECV;
662
0
    case TCP_CONNTRACK_ESTABLISHED:
663
0
        return CT_DPIF_TCPS_ESTABLISHED;
664
0
    case TCP_CONNTRACK_FIN_WAIT:
665
0
        return CT_DPIF_TCPS_FIN_WAIT_1;
666
0
    case TCP_CONNTRACK_CLOSE_WAIT:
667
0
        return CT_DPIF_TCPS_CLOSE_WAIT;
668
0
    case TCP_CONNTRACK_LAST_ACK:
669
0
        return CT_DPIF_TCPS_LAST_ACK;
670
0
    case TCP_CONNTRACK_TIME_WAIT:
671
0
        return CT_DPIF_TCPS_TIME_WAIT;
672
0
    case TCP_CONNTRACK_CLOSE:
673
0
        return CT_DPIF_TCPS_CLOSING;
674
0
    default:
675
0
        return CT_DPIF_TCPS_CLOSED;
676
0
    }
677
0
}
678
679
static uint8_t
680
ip_ct_tcp_flags_to_dpif(uint8_t flags)
681
0
{
682
0
    uint8_t ret = 0;
683
0
#define CT_DPIF_TCP_FLAG(FLAG) \
684
0
        ret |= (flags & IP_CT_TCP_FLAG_##FLAG) ? CT_DPIF_TCPF_##FLAG : 0;
685
0
    CT_DPIF_TCP_FLAGS
686
0
#undef CT_DPIF_TCP_FLAG
687
0
    return ret;
688
0
}
689
690
static bool
691
nl_ct_parse_protoinfo_tcp(struct nlattr *nla,
692
                          struct ct_dpif_protoinfo *protoinfo)
693
0
{
694
0
    static const struct nl_policy policy[] = {
695
0
        [CTA_PROTOINFO_TCP_STATE] = { .type = NL_A_U8, .optional = false },
696
0
        [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NL_A_U8,
697
0
                                                .optional = true },
698
0
        [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NL_A_U8,
699
0
                                             .optional = true },
700
0
        [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .type = NL_A_U16,
701
0
                                               .optional = true },
702
0
        [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .type = NL_A_U16,
703
0
                                            .optional = true },
704
0
    };
705
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
706
0
    bool parsed;
707
708
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
709
710
0
    if (parsed) {
711
0
        const struct nf_ct_tcp_flags *flags_orig, *flags_reply;
712
0
        uint8_t state;
713
0
        protoinfo->proto = IPPROTO_TCP;
714
0
        state = nl_ct_tcp_state_to_dpif(
715
0
            nl_attr_get_u8(attrs[CTA_PROTOINFO_TCP_STATE]));
716
        /* The connection tracker keeps only one tcp state for the
717
         * connection, but our structures store a separate state for
718
         * each endpoint.  Here we duplicate the state. */
719
0
        protoinfo->tcp.state_orig = protoinfo->tcp.state_reply = state;
720
721
0
        if (attrs[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]) {
722
0
            protoinfo->tcp.wscale_orig =
723
0
                nl_attr_get_u8(attrs[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
724
0
        }
725
0
        if (attrs[CTA_PROTOINFO_TCP_WSCALE_REPLY]) {
726
0
            protoinfo->tcp.wscale_reply =
727
0
                nl_attr_get_u8(attrs[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
728
0
        }
729
0
        if (attrs[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
730
0
            flags_orig =
731
0
                nl_attr_get_unspec(attrs[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL],
732
0
                                   sizeof *flags_orig);
733
0
            protoinfo->tcp.flags_orig =
734
0
                ip_ct_tcp_flags_to_dpif(flags_orig->flags);
735
0
        }
736
0
        if (attrs[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
737
0
            flags_reply =
738
0
                nl_attr_get_unspec(attrs[CTA_PROTOINFO_TCP_FLAGS_REPLY],
739
0
                                   sizeof *flags_reply);
740
0
            protoinfo->tcp.flags_reply =
741
0
                ip_ct_tcp_flags_to_dpif(flags_reply->flags);
742
0
        }
743
0
    } else {
744
0
        VLOG_ERR_RL(&rl, "Could not parse nested TCP protoinfo options. "
745
0
                    "Possibly incompatible Linux kernel version.");
746
0
    }
747
748
0
    return parsed;
749
0
}
750
751
/* Translate netlink SCTP state to CT_DPIF_SCTP state. */
752
static uint8_t
753
nl_ct_sctp_state_to_dpif(uint8_t state)
754
0
{
755
0
    switch (state) {
756
0
    case SCTP_CONNTRACK_COOKIE_WAIT:
757
0
        return CT_DPIF_SCTP_STATE_COOKIE_WAIT;
758
0
    case SCTP_CONNTRACK_COOKIE_ECHOED:
759
0
        return CT_DPIF_SCTP_STATE_COOKIE_ECHOED;
760
0
    case SCTP_CONNTRACK_ESTABLISHED:
761
0
        return CT_DPIF_SCTP_STATE_ESTABLISHED;
762
0
    case SCTP_CONNTRACK_SHUTDOWN_SENT:
763
0
        return CT_DPIF_SCTP_STATE_SHUTDOWN_SENT;
764
0
    case SCTP_CONNTRACK_SHUTDOWN_RECD:
765
0
        return CT_DPIF_SCTP_STATE_SHUTDOWN_RECD;
766
0
    case SCTP_CONNTRACK_SHUTDOWN_ACK_SENT:
767
0
        return CT_DPIF_SCTP_STATE_SHUTDOWN_ACK_SENT;
768
0
    case SCTP_CONNTRACK_HEARTBEAT_SENT:
769
0
        return CT_DPIF_SCTP_STATE_HEARTBEAT_SENT;
770
0
    case SCTP_CONNTRACK_HEARTBEAT_ACKED:
771
0
        return CT_DPIF_SCTP_STATE_HEARTBEAT_ACKED;
772
0
    case SCTP_CONNTRACK_CLOSED:
773
        /* Fall Through. */
774
0
    case SCTP_CONNTRACK_NONE:
775
        /* Fall Through. */
776
0
    default:
777
0
        return CT_DPIF_SCTP_STATE_CLOSED;
778
0
    }
779
0
}
780
781
static bool
782
nl_ct_parse_protoinfo_sctp(struct nlattr *nla,
783
                           struct ct_dpif_protoinfo *protoinfo)
784
0
{
785
0
    static const struct nl_policy policy[] = {
786
0
        [CTA_PROTOINFO_SCTP_STATE] = { .type = NL_A_U8, .optional = false },
787
0
        [CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] = { .type = NL_A_U32,
788
0
                                               .optional = false },
789
0
        [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NL_A_U32,
790
0
                                            .optional = false },
791
0
    };
792
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
793
0
    bool parsed;
794
795
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
796
0
    if (parsed) {
797
0
        protoinfo->proto = IPPROTO_SCTP;
798
799
0
        protoinfo->sctp.state = nl_ct_sctp_state_to_dpif(
800
0
            nl_attr_get_u8(attrs[CTA_PROTOINFO_SCTP_STATE]));
801
0
        protoinfo->sctp.vtag_orig = nl_attr_get_u32(
802
0
            attrs[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]);
803
0
        protoinfo->sctp.vtag_reply = nl_attr_get_u32(
804
0
            attrs[CTA_PROTOINFO_SCTP_VTAG_REPLY]);
805
0
    } else {
806
0
        VLOG_ERR_RL(&rl, "Could not parse nested SCTP protoinfo options. "
807
0
                    "Possibly incompatible Linux kernel version.");
808
0
    }
809
810
0
    return parsed;
811
0
}
812
813
static bool
814
nl_ct_parse_protoinfo(struct nlattr *nla, struct ct_dpif_protoinfo *protoinfo)
815
0
{
816
    /* These are mutually exclusive. */
817
0
    static const struct nl_policy policy[] = {
818
0
        [CTA_PROTOINFO_TCP] = { .type = NL_A_NESTED, .optional = true },
819
0
        [CTA_PROTOINFO_SCTP] = { .type = NL_A_NESTED, .optional = true },
820
0
    };
821
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
822
0
    bool parsed;
823
824
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
825
826
0
    memset(protoinfo, 0, sizeof *protoinfo);
827
828
0
    if (parsed) {
829
0
        if (attrs[CTA_PROTOINFO_TCP]) {
830
0
            parsed = nl_ct_parse_protoinfo_tcp(attrs[CTA_PROTOINFO_TCP],
831
0
                                               protoinfo);
832
0
        } else if (attrs[CTA_PROTOINFO_SCTP]) {
833
0
            parsed = nl_ct_parse_protoinfo_sctp(attrs[CTA_PROTOINFO_SCTP],
834
0
                                                protoinfo);
835
0
        } else {
836
0
            VLOG_WARN_RL(&rl, "Empty protoinfo!");
837
0
        }
838
0
    } else {
839
0
        VLOG_ERR_RL(&rl, "Could not parse nested protoinfo options. "
840
0
                    "Possibly incompatible Linux kernel version.");
841
0
    }
842
843
0
    return parsed;
844
0
}
845
846
static bool
847
nl_ct_parse_helper(struct nlattr *nla, struct ct_dpif_helper *helper)
848
0
{
849
0
    static const struct nl_policy policy[] = {
850
0
        [CTA_HELP_NAME] = { .type = NL_A_STRING, .optional = false },
851
0
    };
852
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
853
0
    bool parsed;
854
855
0
    parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
856
857
0
    memset(helper, 0, sizeof *helper);
858
859
0
    if (parsed) {
860
0
        helper->name = xstrdup(nl_attr_get_string(attrs[CTA_HELP_NAME]));
861
0
    } else {
862
0
        VLOG_ERR_RL(&rl, "Could not parse nested helper options. "
863
0
                    "Possibly incompatible Linux kernel version.");
864
0
    }
865
866
0
    return parsed;
867
0
}
868
869
static int nl_ct_timeout_policy_max_attr[] = {
870
    [IPPROTO_TCP] = CTA_TIMEOUT_TCP_MAX,
871
    [IPPROTO_UDP] = CTA_TIMEOUT_UDP_MAX,
872
    [IPPROTO_ICMP] = CTA_TIMEOUT_ICMP_MAX,
873
    [IPPROTO_ICMPV6] = CTA_TIMEOUT_ICMPV6_MAX
874
};
875
876
static void
877
nl_ct_set_timeout_policy_attr(struct nl_ct_timeout_policy *nl_tp,
878
                              uint32_t attr, uint32_t val)
879
0
{
880
0
    nl_tp->present |= 1 << attr;
881
0
    nl_tp->attrs[attr] = val;
882
0
}
883
884
static int
885
nl_ct_parse_tcp_timeout_policy_data(struct nlattr *nla,
886
                                    struct nl_ct_timeout_policy *nl_tp)
887
0
{
888
0
    static const struct nl_policy policy[] = {
889
0
        [CTA_TIMEOUT_TCP_SYN_SENT] =    { .type = NL_A_BE32,
890
0
                                          .optional = false },
891
0
        [CTA_TIMEOUT_TCP_SYN_RECV] =    { .type = NL_A_BE32,
892
0
                                          .optional = false },
893
0
        [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NL_A_BE32,
894
0
                                          .optional = false },
895
0
        [CTA_TIMEOUT_TCP_FIN_WAIT] =    { .type = NL_A_BE32,
896
0
                                          .optional = false },
897
0
        [CTA_TIMEOUT_TCP_CLOSE_WAIT] =  { .type = NL_A_BE32,
898
0
                                          .optional = false },
899
0
        [CTA_TIMEOUT_TCP_LAST_ACK] =    { .type = NL_A_BE32,
900
0
                                          .optional = false },
901
0
        [CTA_TIMEOUT_TCP_TIME_WAIT] =   { .type = NL_A_BE32,
902
0
                                          .optional = false },
903
0
        [CTA_TIMEOUT_TCP_CLOSE] =       { .type = NL_A_BE32,
904
0
                                          .optional = false },
905
0
        [CTA_TIMEOUT_TCP_SYN_SENT2] =   { .type = NL_A_BE32,
906
0
                                          .optional = false },
907
0
        [CTA_TIMEOUT_TCP_RETRANS] =     { .type = NL_A_BE32,
908
0
                                          .optional = false },
909
0
        [CTA_TIMEOUT_TCP_UNACK] =       { .type = NL_A_BE32,
910
0
                                          .optional = false },
911
0
    };
912
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
913
914
0
    if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) {
915
0
        VLOG_ERR_RL(&rl, "Could not parse nested tcp timeout options. "
916
0
                    "Possibly incompatible Linux kernel version.");
917
0
        return EINVAL;
918
0
    }
919
920
0
    for (int i = CTA_TIMEOUT_TCP_SYN_SENT; i <= CTA_TIMEOUT_TCP_UNACK; i++) {
921
0
        nl_ct_set_timeout_policy_attr(nl_tp, i,
922
0
                                      ntohl(nl_attr_get_be32(attrs[i])));
923
0
    }
924
0
    return 0;
925
0
}
926
927
static int
928
nl_ct_parse_udp_timeout_policy_data(struct nlattr *nla,
929
                                    struct nl_ct_timeout_policy *nl_tp)
930
0
{
931
0
    static const struct nl_policy policy[] = {
932
0
        [CTA_TIMEOUT_UDP_UNREPLIED] =   { .type = NL_A_BE32,
933
0
                                          .optional = false },
934
0
        [CTA_TIMEOUT_UDP_REPLIED] =     { .type = NL_A_BE32,
935
0
                                          .optional = false },
936
0
    };
937
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
938
939
0
    if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) {
940
0
        VLOG_ERR_RL(&rl, "Could not parse nested tcp timeout options. "
941
0
                    "Possibly incompatible Linux kernel version.");
942
0
        return EINVAL;
943
0
    }
944
945
0
    for (int i = CTA_TIMEOUT_UDP_UNREPLIED; i <= CTA_TIMEOUT_UDP_REPLIED;
946
0
         i++) {
947
0
        nl_ct_set_timeout_policy_attr(nl_tp, i,
948
0
                                      ntohl(nl_attr_get_be32(attrs[i])));
949
0
    }
950
0
    return 0;
951
0
}
952
953
static int
954
nl_ct_parse_icmp_timeout_policy_data(struct nlattr *nla,
955
                                     struct nl_ct_timeout_policy *nl_tp)
956
0
{
957
0
    static const struct nl_policy policy[] = {
958
0
        [CTA_TIMEOUT_ICMP_TIMEOUT] =   { .type = NL_A_BE32,
959
0
                                         .optional = false },
960
0
    };
961
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
962
963
0
    if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) {
964
0
        VLOG_ERR_RL(&rl, "Could not parse nested icmp timeout options. "
965
0
                    "Possibly incompatible Linux kernel version.");
966
0
        return EINVAL;
967
0
    }
968
969
0
    nl_ct_set_timeout_policy_attr(
970
0
        nl_tp, CTA_TIMEOUT_ICMP_TIMEOUT,
971
0
        ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT_ICMP_TIMEOUT])));
972
0
    return 0;
973
0
}
974
975
static int
976
nl_ct_parse_icmpv6_timeout_policy_data(struct nlattr *nla,
977
                                       struct nl_ct_timeout_policy *nl_tp)
978
0
{
979
0
    static const struct nl_policy policy[] = {
980
0
        [CTA_TIMEOUT_ICMPV6_TIMEOUT] =   { .type = NL_A_BE32,
981
0
                                           .optional = false },
982
0
    };
983
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
984
985
0
    if (!nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy))) {
986
0
        VLOG_ERR_RL(&rl, "Could not parse nested icmpv6 timeout options. "
987
0
                    "Possibly incompatible Linux kernel version.");
988
0
        return EINVAL;
989
0
    }
990
991
0
    nl_ct_set_timeout_policy_attr(
992
0
        nl_tp, CTA_TIMEOUT_ICMPV6_TIMEOUT,
993
0
        ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT_ICMPV6_TIMEOUT])));
994
0
    return 0;
995
0
}
996
997
static int
998
nl_ct_parse_timeout_policy_data(struct nlattr *nla,
999
                                struct nl_ct_timeout_policy *nl_tp)
1000
0
{
1001
0
    switch (nl_tp->l4num) {
1002
0
        case IPPROTO_TCP:
1003
0
            return nl_ct_parse_tcp_timeout_policy_data(nla, nl_tp);
1004
0
        case IPPROTO_UDP:
1005
0
            return nl_ct_parse_udp_timeout_policy_data(nla, nl_tp);
1006
0
        case IPPROTO_ICMP:
1007
0
            return nl_ct_parse_icmp_timeout_policy_data(nla, nl_tp);
1008
0
        case IPPROTO_ICMPV6:
1009
0
            return nl_ct_parse_icmpv6_timeout_policy_data(nla, nl_tp);
1010
0
        default:
1011
0
            return EINVAL;
1012
0
    }
1013
0
}
1014
1015
static int
1016
nl_ct_timeout_policy_from_ofpbuf(struct ofpbuf *buf,
1017
                                 struct nl_ct_timeout_policy *nl_tp,
1018
                                 bool default_tp)
1019
0
{
1020
0
    static const struct nl_policy policy[] = {
1021
0
        [CTA_TIMEOUT_NAME] =    { .type = NL_A_STRING, .optional = false },
1022
0
        [CTA_TIMEOUT_L3PROTO] = { .type = NL_A_BE16, .optional = false },
1023
0
        [CTA_TIMEOUT_L4PROTO] = { .type = NL_A_U8, .optional = false },
1024
0
        [CTA_TIMEOUT_DATA] =    { .type = NL_A_NESTED, .optional = false }
1025
0
    };
1026
0
    static const struct nl_policy policy_default_tp[] = {
1027
0
        [CTA_TIMEOUT_L3PROTO] = { .type = NL_A_BE16, .optional = false },
1028
0
        [CTA_TIMEOUT_L4PROTO] = { .type = NL_A_U8, .optional = false },
1029
0
        [CTA_TIMEOUT_DATA] =    { .type = NL_A_NESTED, .optional = false }
1030
0
    };
1031
1032
0
    struct nlattr *attrs[ARRAY_SIZE(policy)];
1033
0
    struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
1034
0
    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
1035
0
    struct nfgenmsg *nfmsg = ofpbuf_try_pull(&b, sizeof *nfmsg);
1036
1037
0
    if (!nlmsg || !nfmsg
1038
0
        || NFNL_SUBSYS_ID(nlmsg->nlmsg_type) != NFNL_SUBSYS_CTNETLINK_TIMEOUT
1039
0
        || nfmsg->version != NFNETLINK_V0
1040
0
        || !nl_policy_parse(&b, 0, default_tp ? policy_default_tp : policy,
1041
0
                            attrs, default_tp ? ARRAY_SIZE(policy_default_tp) :
1042
0
                                                ARRAY_SIZE(policy))) {
1043
0
        return EINVAL;
1044
0
    }
1045
1046
0
    if (!default_tp) {
1047
0
        ovs_strlcpy(nl_tp->name, nl_attr_get_string(attrs[CTA_TIMEOUT_NAME]),
1048
0
                    sizeof nl_tp->name);
1049
0
    }
1050
0
    nl_tp->l3num = ntohs(nl_attr_get_be16(attrs[CTA_TIMEOUT_L3PROTO]));
1051
0
    nl_tp->l4num = nl_attr_get_u8(attrs[CTA_TIMEOUT_L4PROTO]);
1052
0
    nl_tp->present = 0;
1053
1054
0
    return nl_ct_parse_timeout_policy_data(attrs[CTA_TIMEOUT_DATA], nl_tp);
1055
0
}
1056
1057
int
1058
nl_ct_set_timeout_policy(const struct nl_ct_timeout_policy *nl_tp)
1059
0
{
1060
0
    struct ofpbuf buf;
1061
0
    size_t offset;
1062
1063
0
    ofpbuf_init(&buf, 512);
1064
0
    nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT,
1065
0
                        IPCTNL_MSG_TIMEOUT_NEW, NLM_F_REQUEST | NLM_F_CREATE
1066
0
                        | NLM_F_ACK | NLM_F_REPLACE);
1067
1068
0
    nl_msg_put_string(&buf, CTA_TIMEOUT_NAME, nl_tp->name);
1069
0
    nl_msg_put_be16(&buf, CTA_TIMEOUT_L3PROTO, htons(nl_tp->l3num));
1070
0
    nl_msg_put_u8(&buf, CTA_TIMEOUT_L4PROTO, nl_tp->l4num);
1071
1072
0
    offset = nl_msg_start_nested(&buf, CTA_TIMEOUT_DATA);
1073
0
    for (int i = 1; i <= nl_ct_timeout_policy_max_attr[nl_tp->l4num]; ++i) {
1074
0
        if (nl_tp->present & 1 << i) {
1075
0
            nl_msg_put_be32(&buf, i, htonl(nl_tp->attrs[i]));
1076
0
        }
1077
0
    }
1078
0
    nl_msg_end_nested(&buf, offset);
1079
1080
0
    int err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
1081
0
    ofpbuf_uninit(&buf);
1082
0
    return err;
1083
0
}
1084
1085
int
1086
nl_ct_get_timeout_policy(const char *tp_name,
1087
                         struct nl_ct_timeout_policy *nl_tp)
1088
0
{
1089
0
    struct ofpbuf request, *reply;
1090
1091
0
    ofpbuf_init(&request, 512);
1092
0
    nl_msg_put_nfgenmsg(&request, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT,
1093
0
                        IPCTNL_MSG_TIMEOUT_GET, NLM_F_REQUEST | NLM_F_ACK);
1094
0
    nl_msg_put_string(&request, CTA_TIMEOUT_NAME, tp_name);
1095
0
    int err = nl_transact(NETLINK_NETFILTER, &request, &reply);
1096
0
    if (err) {
1097
0
        goto out;
1098
0
    }
1099
1100
0
    err = nl_ct_timeout_policy_from_ofpbuf(reply, nl_tp, false);
1101
1102
0
out:
1103
0
    ofpbuf_uninit(&request);
1104
0
    ofpbuf_delete(reply);
1105
0
    return err;
1106
0
}
1107
1108
int
1109
nl_ct_del_timeout_policy(const char *tp_name)
1110
0
{
1111
0
    struct ofpbuf buf;
1112
1113
0
    ofpbuf_init(&buf, 64);
1114
0
    nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT,
1115
0
                        IPCTNL_MSG_TIMEOUT_DELETE, NLM_F_REQUEST | NLM_F_ACK);
1116
1117
0
    nl_msg_put_string(&buf, CTA_TIMEOUT_NAME, tp_name);
1118
0
    int err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
1119
0
    ofpbuf_uninit(&buf);
1120
0
    return err;
1121
0
}
1122
1123
struct nl_ct_timeout_policy_dump_state {
1124
    struct nl_dump dump;
1125
    struct ofpbuf buf;
1126
};
1127
1128
int
1129
nl_ct_timeout_policy_dump_start(
1130
    struct nl_ct_timeout_policy_dump_state **statep)
1131
0
{
1132
0
    struct ofpbuf request;
1133
0
    struct nl_ct_timeout_policy_dump_state *state;
1134
1135
0
    *statep = state = xzalloc(sizeof *state);
1136
0
    ofpbuf_init(&request, 512);
1137
0
    nl_msg_put_nfgenmsg(&request, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK_TIMEOUT,
1138
0
                        IPCTNL_MSG_TIMEOUT_GET,
1139
0
                        NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
1140
1141
0
    nl_dump_start(&state->dump, NETLINK_NETFILTER, &request);
1142
0
    ofpbuf_uninit(&request);
1143
0
    ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
1144
0
    return 0;
1145
0
}
1146
1147
int
1148
nl_ct_timeout_policy_dump_next(struct nl_ct_timeout_policy_dump_state *state,
1149
                               struct nl_ct_timeout_policy *nl_tp)
1150
0
{
1151
0
    struct ofpbuf reply;
1152
1153
0
    if (!nl_dump_next(&state->dump, &reply, &state->buf)) {
1154
0
        return EOF;
1155
0
    }
1156
0
    int err = nl_ct_timeout_policy_from_ofpbuf(&reply, nl_tp, false);
1157
0
    ofpbuf_uninit(&reply);
1158
0
    return err;
1159
0
}
1160
1161
int
1162
nl_ct_timeout_policy_dump_done(struct nl_ct_timeout_policy_dump_state *state)
1163
0
{
1164
0
    int err  = nl_dump_done(&state->dump);
1165
0
    ofpbuf_uninit(&state->buf);
1166
0
    free(state);
1167
0
    return err;
1168
0
}
1169
1170
/* Translate netlink entry status flags to CT_DPIF_TCP status flags. */
1171
static uint32_t
1172
ips_status_to_dpif_flags(uint32_t status)
1173
0
{
1174
0
    uint32_t ret = 0;
1175
0
#define CT_DPIF_STATUS_FLAG(FLAG) \
1176
0
        ret |= (status & IPS_##FLAG) ? CT_DPIF_STATUS_##FLAG : 0;
1177
0
    CT_DPIF_STATUS_FLAGS
1178
0
#undef CT_DPIF_STATUS_FLAG
1179
0
    return ret;
1180
0
}
1181
1182
static bool
1183
nl_ct_parse_header_policy(struct ofpbuf *buf,
1184
        enum nl_ct_event_type *event_type,
1185
        uint8_t *nfgen_family,
1186
        struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)])
1187
0
{
1188
0
    struct nlmsghdr *nlh;
1189
0
    struct nfgenmsg *nfm;
1190
0
    uint8_t type;
1191
1192
0
    nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN);
1193
0
    nfm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *nfm);
1194
0
    if (!nfm) {
1195
0
        VLOG_ERR_RL(&rl, "Received bad nfnl message (no nfgenmsg).");
1196
0
        return false;
1197
0
    }
1198
0
    if (NFNL_SUBSYS_ID(nlh->nlmsg_type) != NFNL_SUBSYS_CTNETLINK) {
1199
0
        VLOG_ERR_RL(&rl, "Received non-conntrack message (subsystem: %u).",
1200
0
                 NFNL_SUBSYS_ID(nlh->nlmsg_type));
1201
0
        return false;
1202
0
    }
1203
0
    if (nfm->version != NFNETLINK_V0) {
1204
0
        VLOG_ERR_RL(&rl, "Received unsupported nfnetlink version (%u).",
1205
0
                 NFNL_MSG_TYPE(nfm->version));
1206
0
        return false;
1207
0
    }
1208
1209
0
    if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof *nfm,
1210
0
                         nfnlgrp_conntrack_policy, attrs,
1211
0
                         ARRAY_SIZE(nfnlgrp_conntrack_policy))) {
1212
0
        VLOG_ERR_RL(&rl, "Received bad nfnl message (policy).");
1213
0
        return false;
1214
0
    }
1215
1216
0
    type = NFNL_MSG_TYPE(nlh->nlmsg_type);
1217
0
    *nfgen_family = nfm->nfgen_family;
1218
1219
0
    switch (type) {
1220
0
    case IPCTNL_MSG_CT_NEW:
1221
0
        *event_type = nlh->nlmsg_flags & NLM_F_CREATE
1222
0
            ? NL_CT_EVENT_NEW : NL_CT_EVENT_UPDATE;
1223
0
        break;
1224
0
    case IPCTNL_MSG_CT_DELETE:
1225
0
        *event_type = NL_CT_EVENT_DELETE;
1226
0
        break;
1227
0
    default:
1228
0
        VLOG_ERR_RL(&rl, "Can't parse conntrack event type.");
1229
0
        return false;
1230
0
    }
1231
1232
0
    return true;
1233
0
}
1234
1235
static bool
1236
nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry,
1237
        struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)],
1238
        uint8_t nfgen_family)
1239
0
{
1240
0
    if (!nl_ct_parse_tuple(attrs[CTA_TUPLE_ORIG], &entry->tuple_orig,
1241
0
                           nfgen_family)) {
1242
0
        return false;
1243
0
    }
1244
0
    if (!nl_ct_parse_tuple(attrs[CTA_TUPLE_REPLY], &entry->tuple_reply,
1245
0
                           nfgen_family)) {
1246
0
        return false;
1247
0
    }
1248
0
    if (attrs[CTA_COUNTERS_ORIG] &&
1249
0
        !nl_ct_parse_counters(attrs[CTA_COUNTERS_ORIG],
1250
0
                              &entry->counters_orig)) {
1251
0
        return false;
1252
0
    }
1253
0
    if (attrs[CTA_COUNTERS_REPLY] &&
1254
0
        !nl_ct_parse_counters(attrs[CTA_COUNTERS_REPLY],
1255
0
                              &entry->counters_reply)) {
1256
0
        return false;
1257
0
    }
1258
0
    if (attrs[CTA_TIMESTAMP] &&
1259
0
        !nl_ct_parse_timestamp(attrs[CTA_TIMESTAMP], &entry->timestamp)) {
1260
0
        return false;
1261
0
    }
1262
0
    if (attrs[CTA_ID]) {
1263
0
        entry->id = ntohl(nl_attr_get_be32(attrs[CTA_ID]));
1264
0
    }
1265
0
    if (attrs[CTA_ZONE]) {
1266
0
        entry->zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE]));
1267
0
    }
1268
0
    if (attrs[CTA_STATUS]) {
1269
0
        entry->status = ips_status_to_dpif_flags(
1270
0
            ntohl(nl_attr_get_be32(attrs[CTA_STATUS])));
1271
0
    }
1272
0
    if (attrs[CTA_TIMEOUT]) {
1273
0
        entry->timeout = ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT]));
1274
0
    }
1275
0
    if (attrs[CTA_MARK]) {
1276
0
        entry->mark = ntohl(nl_attr_get_be32(attrs[CTA_MARK]));
1277
0
    }
1278
0
    if (attrs[CTA_LABELS]) {
1279
0
        entry->have_labels = true;
1280
0
        memcpy(&entry->labels, nl_attr_get(attrs[CTA_LABELS]),
1281
0
               MIN(sizeof entry->labels, nl_attr_get_size(attrs[CTA_LABELS])));
1282
0
    }
1283
0
    if (attrs[CTA_PROTOINFO] &&
1284
0
        !nl_ct_parse_protoinfo(attrs[CTA_PROTOINFO], &entry->protoinfo)) {
1285
0
        return false;
1286
0
    }
1287
0
    if (attrs[CTA_HELP] &&
1288
0
        !nl_ct_parse_helper(attrs[CTA_HELP], &entry->helper)) {
1289
0
        return false;
1290
0
    }
1291
0
    if (attrs[CTA_TUPLE_MASTER] &&
1292
0
        !nl_ct_parse_tuple(attrs[CTA_TUPLE_MASTER], &entry->tuple_parent,
1293
0
                           nfgen_family)) {
1294
0
        return false;
1295
0
    }
1296
0
    return true;
1297
0
}
1298
1299
bool
1300
nl_ct_parse_entry(struct ofpbuf *buf, struct ct_dpif_entry *entry,
1301
                  enum nl_ct_event_type *event_type)
1302
0
{
1303
0
    struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)];
1304
0
    uint8_t nfgen_family;
1305
1306
0
    memset(entry, 0, sizeof *entry);
1307
0
    if (!nl_ct_parse_header_policy(buf, event_type, &nfgen_family, attrs)) {
1308
0
        return false;
1309
0
    };
1310
1311
0
    if (!nl_ct_attrs_to_ct_dpif_entry(entry, attrs, nfgen_family)) {
1312
0
        ct_dpif_entry_uninit(entry);
1313
0
        memset(entry, 0, sizeof *entry);
1314
0
        return false;
1315
0
    }
1316
1317
0
    return true;
1318
0
}
1319
1320
/* NetFilter utility functions. */
1321
1322
/* Puts a nlmsghdr and nfgenmsg at the beginning of 'msg', which must be
1323
 * initially empty.  'expected_payload' should be an estimate of the number of
1324
 * payload bytes to be supplied; if the size of the payload is unknown a value
1325
 * of 0 is acceptable.
1326
 *
1327
 * Non-zero 'family' is the address family of items to get (e.g. AF_INET).
1328
 *
1329
 * 'flags' is a bit-mask that indicates what kind of request is being made.  It
1330
 * is often NLM_F_REQUEST indicating that a request is being made, commonly
1331
 * or'd with NLM_F_ACK to request an acknowledgement.  NLM_F_DUMP flag reguests
1332
 * a dump of the table.
1333
 *
1334
 * 'subsystem' is a netfilter subsystem id, e.g., NFNL_SUBSYS_CTNETLINK.
1335
 *
1336
 * 'cmd' is an enumerated value specific to the 'subsystem'.
1337
 *
1338
 * Sets the new nlmsghdr's nlmsg_pid field to 0 for now.  nl_sock_send() will
1339
 * fill it in just before sending the message.
1340
 *
1341
 * nl_msg_put_nlmsghdr() should be used to compose Netlink messages that are
1342
 * not NetFilter Netlink messages. */
1343
static void
1344
nl_msg_put_nfgenmsg(struct ofpbuf *msg, size_t expected_payload,
1345
                    int family, uint8_t subsystem, uint8_t cmd,
1346
                    uint32_t flags)
1347
0
{
1348
0
    struct nfgenmsg *nfm;
1349
1350
0
    nl_msg_put_nlmsghdr(msg, sizeof *nfm + expected_payload,
1351
0
                        subsystem << 8 | cmd, flags);
1352
0
    ovs_assert(msg->size == NLMSG_HDRLEN);
1353
0
    nfm = nl_msg_put_uninit(msg, sizeof *nfm);
1354
0
    nfm->nfgen_family = family;
1355
    nfm->version = NFNETLINK_V0;
1356
0
    nfm->res_id = 0;
1357
0
}