Coverage Report

Created: 2023-03-26 07:42

/src/openvswitch/lib/flow.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2017, 2019 Nicira, Inc.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at:
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
#include <config.h>
17
#include <sys/types.h>
18
#include "flow.h"
19
#include <errno.h>
20
#include <inttypes.h>
21
#include <limits.h>
22
#include <net/if.h>
23
#include <netinet/in.h>
24
#include <netinet/icmp6.h>
25
#include <netinet/ip6.h>
26
#include <stdint.h>
27
#include <stdlib.h>
28
#include <string.h>
29
#include "byte-order.h"
30
#include "colors.h"
31
#include "coverage.h"
32
#include "csum.h"
33
#include "openvswitch/dynamic-string.h"
34
#include "hash.h"
35
#include "jhash.h"
36
#include "openvswitch/match.h"
37
#include "dp-packet.h"
38
#include "openflow/openflow.h"
39
#include "packets.h"
40
#include "odp-util.h"
41
#include "random.h"
42
#include "unaligned.h"
43
#include "util.h"
44
#include "openvswitch/nsh.h"
45
#include "ovs-router.h"
46
#include "lib/netdev-provider.h"
47
#include "openvswitch/vlog.h"
48
49
VLOG_DEFINE_THIS_MODULE(flow);
50
51
COVERAGE_DEFINE(flow_extract);
52
COVERAGE_DEFINE(miniflow_extract_ipv4_pkt_len_error);
53
COVERAGE_DEFINE(miniflow_extract_ipv4_pkt_too_short);
54
COVERAGE_DEFINE(miniflow_extract_ipv6_pkt_len_error);
55
COVERAGE_DEFINE(miniflow_extract_ipv6_pkt_too_short);
56
COVERAGE_DEFINE(miniflow_malloc);
57
58
/* U64 indices for segmented flow classification. */
59
const uint8_t flow_segment_u64s[4] = {
60
    FLOW_SEGMENT_1_ENDS_AT / sizeof(uint64_t),
61
    FLOW_SEGMENT_2_ENDS_AT / sizeof(uint64_t),
62
    FLOW_SEGMENT_3_ENDS_AT / sizeof(uint64_t),
63
    FLOW_U64S
64
};
65
66
int flow_vlan_limit = FLOW_MAX_VLAN_HEADERS;
67
68
/* Asserts that field 'f1' follows immediately after 'f0' in struct flow,
69
 * without any intervening padding. */
70
#define ASSERT_SEQUENTIAL(f0, f1)                       \
71
102k
    BUILD_ASSERT_DECL(offsetof(struct flow, f0)         \
72
102k
                      + MEMBER_SIZEOF(struct flow, f0)  \
73
102k
                      == offsetof(struct flow, f1))
74
75
/* Asserts that fields 'f0' and 'f1' are in the same 32-bit aligned word within
76
 * struct flow. */
77
#define ASSERT_SAME_WORD(f0, f1)                        \
78
    BUILD_ASSERT_DECL(offsetof(struct flow, f0) / 4     \
79
                      == offsetof(struct flow, f1) / 4)
80
81
/* Asserts that 'f0' and 'f1' are both sequential and within the same 32-bit
82
 * aligned word in struct flow. */
83
#define ASSERT_SEQUENTIAL_SAME_WORD(f0, f1)     \
84
    ASSERT_SEQUENTIAL(f0, f1);                  \
85
    ASSERT_SAME_WORD(f0, f1)
86
87
/* miniflow_extract() assumes the following to be true to optimize the
88
 * extraction process. */
89
ASSERT_SEQUENTIAL_SAME_WORD(nw_frag, nw_tos);
90
ASSERT_SEQUENTIAL_SAME_WORD(nw_tos, nw_ttl);
91
ASSERT_SEQUENTIAL_SAME_WORD(nw_ttl, nw_proto);
92
93
/* TCP flags in the middle of a BE64, zeroes in the other half. */
94
BUILD_ASSERT_DECL(offsetof(struct flow, tcp_flags) % 8 == 4);
95
96
#if WORDS_BIGENDIAN
97
#define TCP_FLAGS_BE32(tcp_ctl) ((OVS_FORCE ovs_be32)TCP_FLAGS_BE16(tcp_ctl) \
98
                                 << 16)
99
#else
100
#define TCP_FLAGS_BE32(tcp_ctl) ((OVS_FORCE ovs_be32)TCP_FLAGS_BE16(tcp_ctl))
101
#endif
102
103
ASSERT_SEQUENTIAL_SAME_WORD(tp_src, tp_dst);
104
105
/* Removes 'size' bytes from the head end of '*datap', of size '*sizep', which
106
 * must contain at least 'size' bytes of data.  Returns the first byte of data
107
 * removed. */
108
static inline const void *
109
data_pull(const void **datap, size_t *sizep, size_t size)
110
1.65M
{
111
1.65M
    const char *data = *datap;
112
1.65M
    *datap = data + size;
113
1.65M
    *sizep -= size;
114
1.65M
    return data;
115
1.65M
}
116
117
/* If '*datap' has at least 'size' bytes of data, removes that many bytes from
118
 * the head end of '*datap' and returns the first byte removed.  Otherwise,
119
 * returns a null pointer without modifying '*datap'. */
120
static inline const void *
121
data_try_pull(const void **datap, size_t *sizep, size_t size)
122
1.45M
{
123
1.45M
    return OVS_LIKELY(*sizep >= size) ? data_pull(datap, sizep, size) : NULL;
124
1.45M
}
125
126
/* Context for pushing data to a miniflow. */
127
struct mf_ctx {
128
    struct flowmap map;
129
    uint64_t *data;
130
    uint64_t * const end;
131
};
132
133
/* miniflow_push_* macros allow filling in a miniflow data values in order.
134
 * Assertions are needed only when the layout of the struct flow is modified.
135
 * 'ofs' is a compile-time constant, which allows most of the code be optimized
136
 * away.  Some GCC versions gave warnings on ALWAYS_INLINE, so these are
137
 * defined as macros. */
138
139
#if (FLOW_WC_SEQ != 42)
140
#define MINIFLOW_ASSERT(X) ovs_assert(X)
141
BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime "
142
               "assertions enabled. Consider updating FLOW_WC_SEQ after "
143
               "testing")
144
#else
145
#define MINIFLOW_ASSERT(X)
146
#endif
147
148
/* True if 'IDX' and higher bits are not set. */
149
1.10M
#define ASSERT_FLOWMAP_NOT_SET(FM, IDX)                                 \
150
1.10M
{                                                                       \
151
1.10M
    MINIFLOW_ASSERT(!((FM)->bits[(IDX) / MAP_T_BITS] &                  \
152
1.10M
                      (MAP_MAX << ((IDX) % MAP_T_BITS))));              \
153
1.81M
    for (size_t i = (IDX) / MAP_T_BITS + 1; i < FLOWMAP_UNITS; i++) {   \
154
711k
        MINIFLOW_ASSERT(!(FM)->bits[i]);                                \
155
711k
    }                                                                   \
156
1.10M
}
157
158
#define miniflow_set_map(MF, OFS)            \
159
356k
    {                                        \
160
356k
    ASSERT_FLOWMAP_NOT_SET(&MF.map, (OFS));  \
161
356k
    flowmap_set(&MF.map, (OFS), 1);          \
162
356k
}
163
164
#define miniflow_assert_in_map(MF, OFS)              \
165
584k
    MINIFLOW_ASSERT(flowmap_is_set(&MF.map, (OFS))); \
166
584k
    ASSERT_FLOWMAP_NOT_SET(&MF.map, (OFS) + 1)
167
168
#define miniflow_push_uint64_(MF, OFS, VALUE)              \
169
{                                                          \
170
    MINIFLOW_ASSERT(MF.data < MF.end && (OFS) % 8 == 0);   \
171
    *MF.data++ = VALUE;                                    \
172
    miniflow_set_map(MF, OFS / 8);                         \
173
}
174
175
#define miniflow_push_be64_(MF, OFS, VALUE)                     \
176
    miniflow_push_uint64_(MF, OFS, (OVS_FORCE uint64_t)(VALUE))
177
178
#define miniflow_push_uint32_(MF, OFS, VALUE)   \
179
488k
    {                                           \
180
488k
    MINIFLOW_ASSERT(MF.data < MF.end);          \
181
488k
                                                \
182
488k
    if ((OFS) % 8 == 0) {                       \
183
186k
        miniflow_set_map(MF, OFS / 8);          \
184
186k
        *(uint32_t *)MF.data = VALUE;           \
185
301k
    } else if ((OFS) % 8 == 4) {                \
186
301k
        miniflow_assert_in_map(MF, OFS / 8);    \
187
301k
        *((uint32_t *)MF.data + 1) = VALUE;     \
188
301k
        MF.data++;                              \
189
301k
    }                                           \
190
488k
}
191
192
#define miniflow_push_be32_(MF, OFS, VALUE)                     \
193
246k
    miniflow_push_uint32_(MF, OFS, (OVS_FORCE uint32_t)(VALUE))
194
195
235k
#define miniflow_push_uint16_(MF, OFS, VALUE)   \
196
235k
{                                               \
197
235k
    MINIFLOW_ASSERT(MF.data < MF.end);          \
198
235k
                                                \
199
235k
    if ((OFS) % 8 == 0) {                       \
200
41.3k
        miniflow_set_map(MF, OFS / 8);          \
201
41.3k
        *(uint16_t *)MF.data = VALUE;           \
202
194k
    } else if ((OFS) % 8 == 2) {                \
203
41.3k
        miniflow_assert_in_map(MF, OFS / 8);    \
204
41.3k
        *((uint16_t *)MF.data + 1) = VALUE;     \
205
152k
    } else if ((OFS) % 8 == 4) {                \
206
115k
        miniflow_assert_in_map(MF, OFS / 8);    \
207
115k
        *((uint16_t *)MF.data + 2) = VALUE;     \
208
115k
    } else if ((OFS) % 8 == 6) {                \
209
37.5k
        miniflow_assert_in_map(MF, OFS / 8);    \
210
37.5k
        *((uint16_t *)MF.data + 3) = VALUE;     \
211
37.5k
        MF.data++;                              \
212
37.5k
    }                                           \
213
235k
}
214
215
0
#define miniflow_push_uint8_(MF, OFS, VALUE)            \
216
0
{                                                       \
217
0
    MINIFLOW_ASSERT(MF.data < MF.end);                  \
218
0
                                                        \
219
0
    if ((OFS) % 8 == 0) {                               \
220
0
        miniflow_set_map(MF, OFS / 8);                  \
221
0
        *(uint8_t *)MF.data = VALUE;                    \
222
0
    } else if ((OFS) % 8 == 7) {                        \
223
0
        miniflow_assert_in_map(MF, OFS / 8);            \
224
0
        *((uint8_t *)MF.data + 7) = VALUE;              \
225
0
        MF.data++;                                      \
226
0
    } else {                                            \
227
0
        miniflow_assert_in_map(MF, OFS / 8);            \
228
0
        *((uint8_t *)MF.data + ((OFS) % 8)) = VALUE;    \
229
0
    }                                                   \
230
0
}
231
232
88.6k
#define miniflow_pad_to_64_(MF, OFS)                            \
233
88.6k
{                                                               \
234
88.6k
    MINIFLOW_ASSERT((OFS) % 8 != 0);                            \
235
88.6k
    miniflow_assert_in_map(MF, OFS / 8);                        \
236
88.6k
                                                                \
237
88.6k
    memset((uint8_t *)MF.data + (OFS) % 8, 0, 8 - (OFS) % 8);   \
238
88.6k
    MF.data++;                                                  \
239
88.6k
}
240
241
128k
#define miniflow_pad_from_64_(MF, OFS)                          \
242
128k
{                                                               \
243
128k
    MINIFLOW_ASSERT(MF.data < MF.end);                          \
244
128k
                                                                \
245
128k
    MINIFLOW_ASSERT((OFS) % 8 != 0);                            \
246
128k
    miniflow_set_map(MF, OFS / 8);                              \
247
128k
                                                                \
248
128k
    memset((uint8_t *)MF.data, 0, (OFS) % 8);                   \
249
128k
}
250
251
#define miniflow_push_be16_(MF, OFS, VALUE)                     \
252
235k
    miniflow_push_uint16_(MF, OFS, (OVS_FORCE uint16_t)VALUE);
253
254
#define miniflow_push_be8_(MF, OFS, VALUE)                     \
255
    miniflow_push_uint8_(MF, OFS, (OVS_FORCE uint8_t)VALUE);
256
257
165k
#define miniflow_set_maps(MF, OFS, N_WORDS)                     \
258
165k
{                                                               \
259
165k
    size_t ofs = (OFS);                                         \
260
165k
    size_t n_words = (N_WORDS);                                 \
261
165k
                                                                \
262
165k
    MINIFLOW_ASSERT(n_words && MF.data + n_words <= MF.end);    \
263
165k
    ASSERT_FLOWMAP_NOT_SET(&MF.map, ofs);                       \
264
165k
    flowmap_set(&MF.map, ofs, n_words);                         \
265
165k
}
266
267
/* Data at 'valuep' may be unaligned. */
268
93.1k
#define miniflow_push_words_(MF, OFS, VALUEP, N_WORDS)          \
269
93.1k
{                                                               \
270
93.1k
    MINIFLOW_ASSERT((OFS) % 8 == 0);                            \
271
93.1k
    miniflow_set_maps(MF, (OFS) / 8, (N_WORDS));                \
272
93.1k
    memcpy(MF.data, (VALUEP), (N_WORDS) * sizeof *MF.data);     \
273
93.1k
    MF.data += (N_WORDS);                                       \
274
93.1k
}
275
276
/* Push 32-bit words padded to 64-bits. */
277
1.57k
#define miniflow_push_words_32_(MF, OFS, VALUEP, N_WORDS)               \
278
1.57k
{                                                                       \
279
1.57k
    miniflow_set_maps(MF, (OFS) / 8, DIV_ROUND_UP(N_WORDS, 2));         \
280
1.57k
    memcpy(MF.data, (VALUEP), (N_WORDS) * sizeof(uint32_t));            \
281
1.57k
    MF.data += DIV_ROUND_UP(N_WORDS, 2);                                \
282
1.57k
    if ((N_WORDS) & 1) {                                                \
283
701
        *((uint32_t *)MF.data - 1) = 0;                                 \
284
701
    }                                                                   \
285
1.57k
}
286
287
/* Data at 'valuep' may be unaligned. */
288
/* MACs start 64-aligned, and must be followed by other data or padding. */
289
71.1k
#define miniflow_push_macs_(MF, OFS, VALUEP)                    \
290
71.1k
{                                                               \
291
71.1k
    miniflow_set_maps(MF, (OFS) / 8, 2);                        \
292
71.1k
    memcpy(MF.data, (VALUEP), 2 * ETH_ADDR_LEN);                \
293
71.1k
    MF.data += 1;                   /* First word only. */      \
294
71.1k
}
295
296
#define miniflow_push_uint32(MF, FIELD, VALUE)                      \
297
241k
    miniflow_push_uint32_(MF, offsetof(struct flow, FIELD), VALUE)
298
299
#define miniflow_push_be32(MF, FIELD, VALUE)                        \
300
246k
    miniflow_push_be32_(MF, offsetof(struct flow, FIELD), VALUE)
301
302
#define miniflow_push_uint16(MF, FIELD, VALUE)                      \
303
0
    miniflow_push_uint16_(MF, offsetof(struct flow, FIELD), VALUE)
304
305
#define miniflow_push_be16(MF, FIELD, VALUE)                        \
306
235k
    miniflow_push_be16_(MF, offsetof(struct flow, FIELD), VALUE)
307
308
#define miniflow_push_uint8(MF, FIELD, VALUE)                      \
309
0
    miniflow_push_uint8_(MF, offsetof(struct flow, FIELD), VALUE)
310
311
#define miniflow_pad_to_64(MF, FIELD)                       \
312
88.6k
    miniflow_pad_to_64_(MF, OFFSETOFEND(struct flow, FIELD))
313
314
#define miniflow_pad_from_64(MF, FIELD)                       \
315
128k
    miniflow_pad_from_64_(MF, offsetof(struct flow, FIELD))
316
317
#define miniflow_push_words(MF, FIELD, VALUEP, N_WORDS)                 \
318
93.1k
    miniflow_push_words_(MF, offsetof(struct flow, FIELD), VALUEP, N_WORDS)
319
320
#define miniflow_push_words_32(MF, FIELD, VALUEP, N_WORDS)              \
321
1.57k
    miniflow_push_words_32_(MF, offsetof(struct flow, FIELD), VALUEP, N_WORDS)
322
323
#define miniflow_push_macs(MF, FIELD, VALUEP)                       \
324
71.1k
    miniflow_push_macs_(MF, offsetof(struct flow, FIELD), VALUEP)
325
326
/* Return the pointer to the miniflow data when called BEFORE the corresponding
327
 * push. */
328
#define miniflow_pointer(MF, FIELD)                                     \
329
0
    (void *)((uint8_t *)MF.data + ((offsetof(struct flow, FIELD)) % 8))
330
331
/* Pulls the MPLS headers at '*datap' and returns the count of them. */
332
static inline int
333
parse_mpls(const void **datap, size_t *sizep)
334
924
{
335
924
    const struct mpls_hdr *mh;
336
924
    int count = 0;
337
338
1.14M
    while ((mh = data_try_pull(datap, sizep, sizeof *mh))) {
339
1.14M
        count++;
340
1.14M
        if (mh->mpls_lse.lo & htons(1 << MPLS_BOS_SHIFT)) {
341
250
            break;
342
250
        }
343
1.14M
    }
344
924
    return MIN(count, FLOW_MAX_MPLS_LABELS);
345
924
}
346
347
/* passed vlan_hdrs arg must be at least size FLOW_MAX_VLAN_HEADERS. */
348
static inline ALWAYS_INLINE size_t
349
parse_vlan(const void **datap, size_t *sizep, union flow_vlan_hdr *vlan_hdrs)
350
69.3k
{
351
69.3k
    const ovs_be16 *eth_type;
352
353
69.3k
    data_pull(datap, sizep, ETH_ADDR_LEN * 2);
354
355
69.3k
    eth_type = *datap;
356
357
69.3k
    size_t n;
358
70.4k
    for (n = 0; eth_type_vlan(*eth_type) && n < flow_vlan_limit; n++) {
359
1.09k
        if (OVS_UNLIKELY(*sizep < sizeof(ovs_be32) + sizeof(ovs_be16))) {
360
36
            break;
361
36
        }
362
363
1.06k
        memset(vlan_hdrs + n, 0, sizeof(union flow_vlan_hdr));
364
1.06k
        const ovs_16aligned_be32 *qp = data_pull(datap, sizep, sizeof *qp);
365
1.06k
        vlan_hdrs[n].qtag = get_16aligned_be32(qp);
366
1.06k
        vlan_hdrs[n].tci |= htons(VLAN_CFI);
367
1.06k
        eth_type = *datap;
368
1.06k
    }
369
69.3k
    return n;
370
69.3k
}
371
372
static inline ALWAYS_INLINE ovs_be16
373
parse_ethertype(const void **datap, size_t *sizep)
374
69.3k
{
375
69.3k
    const struct llc_snap_header *llc;
376
69.3k
    ovs_be16 proto;
377
378
69.3k
    proto = *(ovs_be16 *) data_pull(datap, sizep, sizeof proto);
379
69.3k
    if (OVS_LIKELY(ntohs(proto) >= ETH_TYPE_MIN)) {
380
62.3k
        return proto;
381
62.3k
    }
382
383
6.98k
    if (OVS_UNLIKELY(*sizep < sizeof *llc)) {
384
913
        return htons(FLOW_DL_TYPE_NONE);
385
913
    }
386
387
6.06k
    llc = *datap;
388
6.06k
    if (OVS_UNLIKELY(llc->llc.llc_dsap != LLC_DSAP_SNAP
389
6.06k
                     || llc->llc.llc_ssap != LLC_SSAP_SNAP
390
6.06k
                     || llc->llc.llc_cntl != LLC_CNTL_SNAP
391
6.06k
                     || memcmp(llc->snap.snap_org, SNAP_ORG_ETHERNET,
392
6.06k
                               sizeof llc->snap.snap_org))) {
393
5.47k
        return htons(FLOW_DL_TYPE_NONE);
394
5.47k
    }
395
396
593
    data_pull(datap, sizep, sizeof *llc);
397
398
593
    if (OVS_LIKELY(ntohs(llc->snap.snap_type) >= ETH_TYPE_MIN)) {
399
181
        return llc->snap.snap_type;
400
181
    }
401
402
412
    return htons(FLOW_DL_TYPE_NONE);
403
593
}
404
405
/* Returns 'true' if the packet is an ND packet. In that case the '*nd_target'
406
 * and 'arp_buf[]' are filled in.  If the packet is not an ND packet, 'false'
407
 * is returned and no values are filled in on '*nd_target' or 'arp_buf[]'. */
408
static inline bool
409
parse_icmpv6(const void **datap, size_t *sizep,
410
             const struct icmp6_data_header *icmp6,
411
             ovs_be32 *rso_flags, const struct in6_addr **nd_target,
412
             struct eth_addr arp_buf[2], uint8_t *opt_type)
413
5.21k
{
414
5.21k
    if (icmp6->icmp6_base.icmp6_code != 0 ||
415
5.21k
        (icmp6->icmp6_base.icmp6_type != ND_NEIGHBOR_SOLICIT &&
416
4.39k
         icmp6->icmp6_base.icmp6_type != ND_NEIGHBOR_ADVERT)) {
417
1.46k
        return false;
418
1.46k
    }
419
420
3.75k
    arp_buf[0] = eth_addr_zero;
421
3.75k
    arp_buf[1] = eth_addr_zero;
422
3.75k
    *opt_type = 0;
423
424
3.75k
    *rso_flags = get_16aligned_be32(icmp6->icmp6_data.be32);
425
426
3.75k
    *nd_target = data_try_pull(datap, sizep, sizeof **nd_target);
427
3.75k
    if (OVS_UNLIKELY(!*nd_target)) {
428
224
        return true;
429
224
    }
430
431
11.6k
    while (*sizep >= 8) {
432
        /* The minimum size of an option is 8 bytes, which also is
433
         * the size of Ethernet link-layer options. */
434
10.3k
        const struct ovs_nd_lla_opt *lla_opt = *datap;
435
10.3k
        int opt_len = lla_opt->len * ND_LLA_OPT_LEN;
436
437
10.3k
        if (!opt_len || opt_len > *sizep) {
438
1.32k
            return true;
439
1.32k
        }
440
441
        /* Store the link layer address if the appropriate option is
442
         * provided.  It is considered an error if the same link
443
         * layer option is specified twice. */
444
9.06k
        if (lla_opt->type == ND_OPT_SOURCE_LINKADDR && opt_len == 8) {
445
1.83k
            if (OVS_LIKELY(eth_addr_is_zero(arp_buf[0]))) {
446
1.65k
                arp_buf[0] = lla_opt->mac;
447
                /* We use only first option type present in ND packet. */
448
1.65k
                if (*opt_type == 0) {
449
817
                    *opt_type = lla_opt->type;
450
817
                }
451
1.65k
            } else {
452
175
                goto invalid;
453
175
            }
454
7.23k
        } else if (lla_opt->type == ND_OPT_TARGET_LINKADDR && opt_len == 8) {
455
3.32k
            if (OVS_LIKELY(eth_addr_is_zero(arp_buf[1]))) {
456
2.56k
                arp_buf[1] = lla_opt->mac;
457
                /* We use only first option type present in ND packet. */
458
2.56k
                if (*opt_type == 0) {
459
1.44k
                    *opt_type = lla_opt->type;
460
1.44k
                }
461
2.56k
            } else {
462
764
                goto invalid;
463
764
            }
464
3.32k
        }
465
466
8.12k
        if (OVS_UNLIKELY(!data_try_pull(datap, sizep, opt_len))) {
467
0
            return true;
468
0
        }
469
8.12k
    }
470
1.26k
    return true;
471
472
939
invalid:
473
939
    *nd_target = NULL;
474
939
    arp_buf[0] = eth_addr_zero;
475
939
    arp_buf[1] = eth_addr_zero;
476
939
    return true;
477
3.52k
}
478
479
static inline bool
480
parse_ipv6_ext_hdrs__(const void **datap, size_t *sizep, uint8_t *nw_proto,
481
                      uint8_t *nw_frag,
482
                      const struct ovs_16aligned_ip6_frag **frag_hdr)
483
38.2k
{
484
38.2k
    *frag_hdr = NULL;
485
328k
    while (1) {
486
328k
        if (OVS_LIKELY((*nw_proto != IPPROTO_HOPOPTS)
487
328k
                       && (*nw_proto != IPPROTO_ROUTING)
488
328k
                       && (*nw_proto != IPPROTO_DSTOPTS)
489
328k
                       && (*nw_proto != IPPROTO_AH)
490
328k
                       && (*nw_proto != IPPROTO_FRAGMENT))) {
491
            /* It's either a terminal header (e.g., TCP, UDP) or one we
492
             * don't understand.  In either case, we're done with the
493
             * packet, so use it to fill in 'nw_proto'. */
494
35.4k
            return true;
495
35.4k
        }
496
497
        /* We only verify that at least 8 bytes of the next header are
498
         * available, but many of these headers are longer.  Ensure that
499
         * accesses within the extension header are within those first 8
500
         * bytes. All extension headers are required to be at least 8
501
         * bytes. */
502
293k
        if (OVS_UNLIKELY(*sizep < 8)) {
503
1.03k
            return false;
504
1.03k
        }
505
506
292k
        if ((*nw_proto == IPPROTO_HOPOPTS)
507
292k
            || (*nw_proto == IPPROTO_ROUTING)
508
292k
            || (*nw_proto == IPPROTO_DSTOPTS)) {
509
            /* These headers, while different, have the fields we care
510
             * about in the same location and with the same
511
             * interpretation. */
512
285k
            const struct ip6_ext *ext_hdr = *datap;
513
285k
            *nw_proto = ext_hdr->ip6e_nxt;
514
285k
            if (OVS_UNLIKELY(!data_try_pull(datap, sizep,
515
285k
                                            (ext_hdr->ip6e_len + 1) * 8))) {
516
1.61k
                return false;
517
1.61k
            }
518
285k
        } else if (*nw_proto == IPPROTO_AH) {
519
            /* A standard AH definition isn't available, but the fields
520
             * we care about are in the same location as the generic
521
             * option header--only the header length is calculated
522
             * differently. */
523
1.82k
            const struct ip6_ext *ext_hdr = *datap;
524
1.82k
            *nw_proto = ext_hdr->ip6e_nxt;
525
1.82k
            if (OVS_UNLIKELY(!data_try_pull(datap, sizep,
526
1.82k
                                            (ext_hdr->ip6e_len + 2) * 4))) {
527
80
                return false;
528
80
            }
529
4.58k
        } else if (*nw_proto == IPPROTO_FRAGMENT) {
530
4.58k
            *frag_hdr = *datap;
531
532
4.58k
            *nw_proto = (*frag_hdr)->ip6f_nxt;
533
4.58k
            if (!data_try_pull(datap, sizep, sizeof **frag_hdr)) {
534
0
                return false;
535
0
            }
536
537
            /* We only process the first fragment. */
538
4.58k
            if ((*frag_hdr)->ip6f_offlg != htons(0)) {
539
2.55k
                *nw_frag = FLOW_NW_FRAG_ANY;
540
2.55k
                if (((*frag_hdr)->ip6f_offlg & IP6F_OFF_MASK) != htons(0)) {
541
134
                    *nw_frag |= FLOW_NW_FRAG_LATER;
542
134
                    *nw_proto = IPPROTO_FRAGMENT;
543
134
                    return true;
544
134
                }
545
2.55k
            }
546
4.58k
        }
547
292k
    }
548
38.2k
}
549
550
/* Parses IPv6 extension headers until a terminal header (or header we
551
 * don't understand) is found.  'datap' points to the first extension
552
 * header and advances as parsing occurs; 'sizep' is the remaining size
553
 * and is decreased accordingly.  'nw_proto' starts as the first
554
 * extension header to process and is updated as the extension headers
555
 * are parsed.
556
 *
557
 * If a fragment header is found, '*frag_hdr' is set to the fragment
558
 * header and otherwise set to NULL.  If it is the first fragment,
559
 * extension header parsing otherwise continues as usual.  If it's not
560
 * the first fragment, 'nw_proto' is set to IPPROTO_FRAGMENT and 'nw_frag'
561
 * has FLOW_NW_FRAG_LATER set.  Both first and later fragments have
562
 * FLOW_NW_FRAG_ANY set in 'nw_frag'.
563
 *
564
 * A return value of false indicates that there was a problem parsing
565
 * the extension headers.*/
566
bool
567
parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto,
568
                    uint8_t *nw_frag,
569
                    const struct ovs_16aligned_ip6_frag **frag_hdr)
570
0
{
571
0
    return parse_ipv6_ext_hdrs__(datap, sizep, nw_proto, nw_frag,
572
0
                                 frag_hdr);
573
0
}
574
575
bool
576
parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh *key)
577
7.83k
{
578
7.83k
    const struct nsh_hdr *nsh = (const struct nsh_hdr *) *datap;
579
7.83k
    uint8_t version, length, flags, ttl;
580
581
    /* Check if it is long enough for NSH header, doesn't support
582
     * MD type 2 yet
583
     */
584
7.83k
    if (OVS_UNLIKELY(*sizep < NSH_BASE_HDR_LEN)) {
585
70
        return false;
586
70
    }
587
588
7.76k
    version = nsh_get_ver(nsh);
589
7.76k
    flags = nsh_get_flags(nsh);
590
7.76k
    length = nsh_hdr_len(nsh);
591
7.76k
    ttl = nsh_get_ttl(nsh);
592
593
7.76k
    if (OVS_UNLIKELY(length > *sizep || version != 0)) {
594
3.03k
        return false;
595
3.03k
    }
596
597
4.73k
    key->flags = flags;
598
4.73k
    key->ttl = ttl;
599
4.73k
    key->mdtype = nsh->md_type;
600
4.73k
    key->np = nsh->next_proto;
601
4.73k
    key->path_hdr = nsh_get_path_hdr(nsh);
602
603
4.73k
    switch (key->mdtype) {
604
329
        case NSH_M_TYPE1:
605
329
            if (length != NSH_M_TYPE1_LEN) {
606
150
                return false;
607
150
            }
608
895
            for (size_t i = 0; i < 4; i++) {
609
716
                key->context[i] = get_16aligned_be32(&nsh->md1.context[i]);
610
716
            }
611
179
            break;
612
116
        case NSH_M_TYPE2:
613
            /* Don't support MD type 2 metedata parsing yet */
614
116
            if (length < NSH_BASE_HDR_LEN) {
615
83
                return false;
616
83
            }
617
618
33
            memset(key->context, 0, sizeof(key->context));
619
33
            break;
620
4.28k
        default:
621
            /* We don't parse other context headers yet. */
622
4.28k
            memset(key->context, 0, sizeof(key->context));
623
4.28k
            break;
624
4.73k
    }
625
626
4.50k
    data_pull(datap, sizep, length);
627
628
4.50k
    return true;
629
4.73k
}
630
631
/* This does the same thing as miniflow_extract() with a full-size 'flow' as
632
 * the destination. */
633
void
634
flow_extract(struct dp_packet *packet, struct flow *flow)
635
120k
{
636
120k
    struct {
637
120k
        struct miniflow mf;
638
120k
        uint64_t buf[FLOW_U64S];
639
120k
    } m;
640
641
120k
    COVERAGE_INC(flow_extract);
642
643
120k
    miniflow_extract(packet, &m.mf);
644
120k
    miniflow_expand(&m.mf, flow);
645
120k
}
646
647
static inline bool
648
ipv4_sanity_check(const struct ip_header *nh, size_t size,
649
                  int *ip_lenp, uint16_t *tot_lenp)
650
13.7k
{
651
13.7k
    int ip_len;
652
13.7k
    uint16_t tot_len;
653
654
13.7k
    if (OVS_UNLIKELY(size < IP_HEADER_LEN)) {
655
295
        COVERAGE_INC(miniflow_extract_ipv4_pkt_too_short);
656
295
        return false;
657
295
    }
658
13.4k
    ip_len = IP_IHL(nh->ip_ihl_ver) * 4;
659
660
13.4k
    if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN || size < ip_len)) {
661
226
        COVERAGE_INC(miniflow_extract_ipv4_pkt_len_error);
662
226
        return false;
663
226
    }
664
665
13.1k
    tot_len = ntohs(nh->ip_tot_len);
666
13.1k
    if (OVS_UNLIKELY(tot_len > size || ip_len > tot_len ||
667
13.1k
                size - tot_len > UINT16_MAX)) {
668
1.71k
        COVERAGE_INC(miniflow_extract_ipv4_pkt_len_error);
669
1.71k
        return false;
670
1.71k
    }
671
672
11.4k
    *ip_lenp = ip_len;
673
11.4k
    *tot_lenp = tot_len;
674
675
11.4k
    return true;
676
13.1k
}
677
678
static inline uint8_t
679
ipv4_get_nw_frag(const struct ip_header *nh)
680
11.4k
{
681
11.4k
    uint8_t nw_frag = 0;
682
683
11.4k
    if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) {
684
2.03k
        nw_frag = FLOW_NW_FRAG_ANY;
685
2.03k
        if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {
686
1.08k
            nw_frag |= FLOW_NW_FRAG_LATER;
687
1.08k
        }
688
2.03k
    }
689
690
11.4k
    return nw_frag;
691
11.4k
}
692
693
static inline bool
694
ipv6_sanity_check(const struct ovs_16aligned_ip6_hdr *nh, size_t size)
695
40.4k
{
696
40.4k
    uint16_t plen;
697
698
40.4k
    if (OVS_UNLIKELY(size < sizeof *nh)) {
699
347
        COVERAGE_INC(miniflow_extract_ipv6_pkt_too_short);
700
347
        return false;
701
347
    }
702
703
40.1k
    plen = ntohs(nh->ip6_plen);
704
40.1k
    if (OVS_UNLIKELY(plen + IPV6_HEADER_LEN > size)) {
705
1.78k
        COVERAGE_INC(miniflow_extract_ipv6_pkt_len_error);
706
1.78k
        return false;
707
1.78k
    }
708
709
38.3k
    if (OVS_UNLIKELY(size - (plen + IPV6_HEADER_LEN) > UINT16_MAX)) {
710
44
        COVERAGE_INC(miniflow_extract_ipv6_pkt_len_error);
711
44
        return false;
712
44
    }
713
714
38.2k
    return true;
715
38.3k
}
716
717
static void
718
dump_invalid_packet(struct dp_packet *packet, const char *reason)
719
0
{
720
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
721
0
    struct ds ds = DS_EMPTY_INITIALIZER;
722
0
    size_t size;
723
724
0
    if (VLOG_DROP_DBG(&rl)) {
725
0
        return;
726
0
    }
727
0
    size = dp_packet_size(packet);
728
0
    ds_put_hex_dump(&ds, dp_packet_data(packet), size, 0, false);
729
0
    VLOG_DBG("invalid packet for %s: port %"PRIu32", size %"PRIuSIZE"\n%s",
730
0
             reason, packet->md.in_port.odp_port, size, ds_cstr(&ds));
731
0
    ds_destroy(&ds);
732
0
}
733
734
/* Initializes 'dst' from 'packet' and 'md', taking the packet type into
735
 * account.  'dst' must have enough space for FLOW_U64S * 8 bytes.
736
 *
737
 * Initializes the layer offsets as follows:
738
 *
739
 *    - packet->l2_5_ofs to the
740
 *          * the start of the MPLS shim header. Can be zero, if the
741
 *            packet is of type (OFPHTN_ETHERTYPE, ETH_TYPE_MPLS).
742
 *          * UINT16_MAX when there is no MPLS shim header.
743
 *
744
 *    - packet->l3_ofs is set to
745
 *          * zero if the packet_type is in name space OFPHTN_ETHERTYPE
746
 *            and there is no MPLS shim header.
747
 *          * just past the Ethernet header, or just past the vlan_header if
748
 *            one is present, to the first byte of the payload of the
749
 *            Ethernet frame if the packet type is Ethernet and there is
750
 *            no MPLS shim header.
751
 *          * just past the MPLS label stack to the first byte of the MPLS
752
 *            payload if there is at least one MPLS shim header.
753
 *          * UINT16_MAX if the packet type is Ethernet and the frame is
754
 *            too short to contain an Ethernet header.
755
 *
756
 *    - packet->l4_ofs is set to just past the IPv4 or IPv6 header, if one is
757
 *      present and the packet has at least the content used for the fields
758
 *      of interest for the flow, otherwise UINT16_MAX.
759
 */
760
void
761
miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
762
120k
{
763
    /* Add code to this function (or its callees) to extract new fields. */
764
120k
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
765
766
120k
    const struct pkt_metadata *md = &packet->md;
767
120k
    const void *data = dp_packet_data(packet);
768
120k
    size_t size = dp_packet_size(packet);
769
120k
    ovs_be32 packet_type = packet->packet_type;
770
120k
    uint64_t *values = miniflow_values(dst);
771
120k
    struct mf_ctx mf = { FLOWMAP_EMPTY_INITIALIZER, values,
772
120k
                         values + FLOW_U64S };
773
120k
    const char *frame;
774
120k
    ovs_be16 dl_type = OVS_BE16_MAX;
775
120k
    uint8_t nw_frag, nw_tos, nw_ttl, nw_proto;
776
120k
    uint8_t *ct_nw_proto_p = NULL;
777
120k
    ovs_be16 ct_tp_src = 0, ct_tp_dst = 0;
778
779
    /* Metadata. */
780
120k
    if (flow_tnl_dst_is_set(&md->tunnel)) {
781
0
        miniflow_push_words(mf, tunnel, &md->tunnel,
782
0
                            offsetof(struct flow_tnl, metadata) /
783
0
                            sizeof(uint64_t));
784
785
0
        if (!(md->tunnel.flags & FLOW_TNL_F_UDPIF)) {
786
0
            if (md->tunnel.metadata.present.map) {
787
0
                miniflow_push_words(mf, tunnel.metadata, &md->tunnel.metadata,
788
0
                                    sizeof md->tunnel.metadata /
789
0
                                    sizeof(uint64_t));
790
0
            }
791
0
        } else {
792
0
            if (md->tunnel.metadata.present.len) {
793
0
                miniflow_push_words(mf, tunnel.metadata.present,
794
0
                                    &md->tunnel.metadata.present, 1);
795
0
                miniflow_push_words(mf, tunnel.metadata.opts.gnv,
796
0
                                    md->tunnel.metadata.opts.gnv,
797
0
                                    DIV_ROUND_UP(md->tunnel.metadata.present.len,
798
0
                                                 sizeof(uint64_t)));
799
0
            }
800
0
        }
801
0
    }
802
120k
    if (md->skb_priority || md->pkt_mark) {
803
0
        miniflow_push_uint32(mf, skb_priority, md->skb_priority);
804
0
        miniflow_push_uint32(mf, pkt_mark, md->pkt_mark);
805
0
    }
806
120k
    miniflow_push_uint32(mf, dp_hash, md->dp_hash);
807
120k
    miniflow_push_uint32(mf, in_port, odp_to_u32(md->in_port.odp_port));
808
120k
    if (md->ct_state) {
809
0
        miniflow_push_uint32(mf, recirc_id, md->recirc_id);
810
0
        miniflow_push_uint8(mf, ct_state, md->ct_state);
811
0
        ct_nw_proto_p = miniflow_pointer(mf, ct_nw_proto);
812
0
        miniflow_push_uint8(mf, ct_nw_proto, 0);
813
0
        miniflow_push_uint16(mf, ct_zone, md->ct_zone);
814
0
        miniflow_push_uint32(mf, ct_mark, md->ct_mark);
815
0
        miniflow_push_be32(mf, packet_type, packet_type);
816
0
        if (!ovs_u128_is_zero(md->ct_label)) {
817
0
            miniflow_push_words(mf, ct_label, &md->ct_label,
818
0
                                sizeof md->ct_label / sizeof(uint64_t));
819
0
        }
820
120k
    } else {
821
120k
        if (md->recirc_id) {
822
0
            miniflow_push_uint32(mf, recirc_id, md->recirc_id);
823
0
            miniflow_pad_to_64(mf, recirc_id);
824
0
        }
825
120k
        miniflow_pad_from_64(mf, packet_type);
826
120k
        miniflow_push_be32(mf, packet_type, packet_type);
827
120k
    }
828
829
    /* Initialize packet's layer pointer and offsets. */
830
120k
    frame = data;
831
120k
    dp_packet_reset_offsets(packet);
832
833
120k
    if (packet_type == htonl(PT_ETH)) {
834
        /* Must have full Ethernet header to proceed. */
835
112k
        if (OVS_UNLIKELY(size < sizeof(struct eth_header))) {
836
45.3k
            goto out;
837
67.3k
        } else {
838
            /* Link layer. */
839
67.3k
            ASSERT_SEQUENTIAL(dl_dst, dl_src);
840
67.3k
            miniflow_push_macs(mf, dl_dst, data);
841
842
            /* VLAN */
843
67.3k
            union flow_vlan_hdr vlans[FLOW_MAX_VLAN_HEADERS];
844
67.3k
            size_t num_vlans = parse_vlan(&data, &size, vlans);
845
846
67.3k
            dl_type = parse_ethertype(&data, &size);
847
67.3k
            miniflow_push_be16(mf, dl_type, dl_type);
848
67.3k
            miniflow_pad_to_64(mf, dl_type);
849
67.3k
            if (num_vlans > 0) {
850
647
                miniflow_push_words_32(mf, vlans, vlans, num_vlans);
851
647
            }
852
853
67.3k
        }
854
112k
    } else {
855
        /* Take dl_type from packet_type. */
856
8.12k
        dl_type = pt_ns_type_be(packet_type);
857
8.12k
        miniflow_pad_from_64(mf, dl_type);
858
8.12k
        miniflow_push_be16(mf, dl_type, dl_type);
859
        /* Do not push vlan_tci, pad instead */
860
8.12k
        miniflow_pad_to_64(mf, dl_type);
861
8.12k
    }
862
863
    /* Parse mpls. */
864
75.5k
    if (OVS_UNLIKELY(eth_type_mpls(dl_type))) {
865
924
        int count;
866
924
        const void *mpls = data;
867
868
924
        packet->l2_5_ofs = (char *)data - frame;
869
924
        count = parse_mpls(&data, &size);
870
924
        miniflow_push_words_32(mf, mpls_lse, mpls, count);
871
924
    }
872
873
    /* Network layer. */
874
75.5k
    packet->l3_ofs = (char *)data - frame;
875
876
75.5k
    nw_frag = 0;
877
75.5k
    if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {
878
13.0k
        const struct ip_header *nh = data;
879
13.0k
        int ip_len;
880
13.0k
        uint16_t tot_len;
881
882
13.0k
        if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len, &tot_len))) {
883
2.14k
            if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) {
884
0
                dump_invalid_packet(packet, "ipv4_sanity_check");
885
0
            }
886
2.14k
            goto out;
887
2.14k
        }
888
10.8k
        dp_packet_set_l2_pad_size(packet, size - tot_len);
889
10.8k
        size = tot_len;   /* Never pull padding. */
890
891
        /* Push both source and destination address at once. */
892
10.8k
        miniflow_push_words(mf, nw_src, &nh->ip_src, 1);
893
10.8k
        if (ct_nw_proto_p && !md->ct_orig_tuple_ipv6) {
894
0
            *ct_nw_proto_p = md->ct_orig_tuple.ipv4.ipv4_proto;
895
0
            if (*ct_nw_proto_p) {
896
0
                miniflow_push_words(mf, ct_nw_src,
897
0
                                    &md->ct_orig_tuple.ipv4.ipv4_src, 1);
898
0
                ct_tp_src = md->ct_orig_tuple.ipv4.src_port;
899
0
                ct_tp_dst = md->ct_orig_tuple.ipv4.dst_port;
900
0
            }
901
0
        }
902
903
10.8k
        miniflow_push_be32(mf, ipv6_label, 0); /* Padding for IPv4. */
904
905
10.8k
        nw_tos = nh->ip_tos;
906
10.8k
        nw_ttl = nh->ip_ttl;
907
10.8k
        nw_proto = nh->ip_proto;
908
10.8k
        nw_frag = ipv4_get_nw_frag(nh);
909
10.8k
        data_pull(&data, &size, ip_len);
910
62.5k
    } else if (dl_type == htons(ETH_TYPE_IPV6)) {
911
39.7k
        const struct ovs_16aligned_ip6_hdr *nh = data;
912
39.7k
        ovs_be32 tc_flow;
913
39.7k
        uint16_t plen;
914
915
39.7k
        if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) {
916
2.12k
            if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) {
917
0
                dump_invalid_packet(packet, "ipv6_sanity_check");
918
0
            }
919
2.12k
            goto out;
920
2.12k
        }
921
37.6k
        data_pull(&data, &size, sizeof *nh);
922
923
37.6k
        plen = ntohs(nh->ip6_plen);
924
37.6k
        dp_packet_set_l2_pad_size(packet, size - plen);
925
37.6k
        size = plen;   /* Never pull padding. */
926
927
37.6k
        miniflow_push_words(mf, ipv6_src, &nh->ip6_src,
928
37.6k
                            sizeof nh->ip6_src / 8);
929
37.6k
        miniflow_push_words(mf, ipv6_dst, &nh->ip6_dst,
930
37.6k
                            sizeof nh->ip6_dst / 8);
931
37.6k
        if (ct_nw_proto_p && md->ct_orig_tuple_ipv6) {
932
0
            *ct_nw_proto_p = md->ct_orig_tuple.ipv6.ipv6_proto;
933
0
            if (*ct_nw_proto_p) {
934
0
                miniflow_push_words(mf, ct_ipv6_src,
935
0
                                    &md->ct_orig_tuple.ipv6.ipv6_src,
936
0
                                    2 *
937
0
                                    sizeof md->ct_orig_tuple.ipv6.ipv6_src / 8);
938
0
                ct_tp_src = md->ct_orig_tuple.ipv6.src_port;
939
0
                ct_tp_dst = md->ct_orig_tuple.ipv6.dst_port;
940
0
            }
941
0
        }
942
943
37.6k
        tc_flow = get_16aligned_be32(&nh->ip6_flow);
944
37.6k
        nw_tos = ntohl(tc_flow) >> 20;
945
37.6k
        nw_ttl = nh->ip6_hlim;
946
37.6k
        nw_proto = nh->ip6_nxt;
947
948
37.6k
        const struct ovs_16aligned_ip6_frag *frag_hdr;
949
37.6k
        if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag,
950
37.6k
                                   &frag_hdr)) {
951
2.60k
            goto out;
952
2.60k
        }
953
954
        /* This needs to be after the parse_ipv6_ext_hdrs__() call because it
955
         * leaves the nw_frag word uninitialized. */
956
35.0k
        ASSERT_SEQUENTIAL(ipv6_label, nw_frag);
957
35.0k
        ovs_be32 label = tc_flow & htonl(IPV6_LABEL_MASK);
958
35.0k
        miniflow_push_be32(mf, ipv6_label, label);
959
35.0k
    } else {
960
22.7k
        if (dl_type == htons(ETH_TYPE_ARP) ||
961
22.7k
            dl_type == htons(ETH_TYPE_RARP)) {
962
1.25k
            struct eth_addr arp_buf[2];
963
1.25k
            const struct arp_eth_header *arp = (const struct arp_eth_header *)
964
1.25k
                data_try_pull(&data, &size, ARP_ETH_HEADER_LEN);
965
966
1.25k
            if (OVS_LIKELY(arp) && OVS_LIKELY(arp->ar_hrd == htons(1))
967
1.25k
                && OVS_LIKELY(arp->ar_pro == htons(ETH_TYPE_IP))
968
1.25k
                && OVS_LIKELY(arp->ar_hln == ETH_ADDR_LEN)
969
1.25k
                && OVS_LIKELY(arp->ar_pln == 4)) {
970
51
                miniflow_push_be32(mf, nw_src,
971
51
                                   get_16aligned_be32(&arp->ar_spa));
972
51
                miniflow_push_be32(mf, nw_dst,
973
51
                                   get_16aligned_be32(&arp->ar_tpa));
974
975
                /* We only match on the lower 8 bits of the opcode. */
976
51
                if (OVS_LIKELY(ntohs(arp->ar_op) <= 0xff)) {
977
17
                    miniflow_push_be32(mf, ipv6_label, 0); /* Pad with ARP. */
978
17
                    miniflow_push_be32(mf, nw_frag, htonl(ntohs(arp->ar_op)));
979
17
                }
980
981
                /* Must be adjacent. */
982
51
                ASSERT_SEQUENTIAL(arp_sha, arp_tha);
983
984
51
                arp_buf[0] = arp->ar_sha;
985
51
                arp_buf[1] = arp->ar_tha;
986
51
                miniflow_push_macs(mf, arp_sha, arp_buf);
987
51
                miniflow_pad_to_64(mf, arp_tha);
988
51
            }
989
21.5k
        } else if (dl_type == htons(ETH_TYPE_NSH)) {
990
7.83k
            struct ovs_key_nsh nsh;
991
992
7.83k
            if (OVS_LIKELY(parse_nsh(&data, &size, &nsh))) {
993
4.50k
                miniflow_push_words(mf, nsh, &nsh,
994
4.50k
                                    sizeof(struct ovs_key_nsh) /
995
4.50k
                                    sizeof(uint64_t));
996
4.50k
            }
997
7.83k
        }
998
22.7k
        goto out;
999
22.7k
    }
1000
1001
45.8k
    packet->l4_ofs = (char *)data - frame;
1002
45.8k
    miniflow_push_be32(mf, nw_frag,
1003
45.8k
                       bytes_to_be32(nw_frag, nw_tos, nw_ttl, nw_proto));
1004
1005
45.8k
    if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) {
1006
44.7k
        if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) {
1007
15.1k
            if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {
1008
15.0k
                const struct tcp_header *tcp = data;
1009
15.0k
                size_t tcp_hdr_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
1010
1011
15.0k
                if (OVS_LIKELY(tcp_hdr_len >= TCP_HEADER_LEN)
1012
15.0k
                    && OVS_LIKELY(size >= tcp_hdr_len)) {
1013
14.3k
                    miniflow_push_be32(mf, arp_tha.ea[2], 0);
1014
14.3k
                    miniflow_push_be32(mf, tcp_flags,
1015
14.3k
                                       TCP_FLAGS_BE32(tcp->tcp_ctl));
1016
14.3k
                    miniflow_push_be16(mf, tp_src, tcp->tcp_src);
1017
14.3k
                    miniflow_push_be16(mf, tp_dst, tcp->tcp_dst);
1018
14.3k
                    miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
1019
14.3k
                    miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
1020
14.3k
                    if (dl_type == htons(ETH_TYPE_IP)) {
1021
2.82k
                        dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
1022
11.5k
                    } else if (dl_type == htons(ETH_TYPE_IPV6)) {
1023
11.5k
                        dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
1024
11.5k
                    }
1025
14.3k
                }
1026
15.0k
            }
1027
29.6k
        } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) {
1028
14.5k
            if (OVS_LIKELY(size >= UDP_HEADER_LEN)) {
1029
14.3k
                const struct udp_header *udp = data;
1030
1031
14.3k
                miniflow_push_be16(mf, tp_src, udp->udp_src);
1032
14.3k
                miniflow_push_be16(mf, tp_dst, udp->udp_dst);
1033
14.3k
                miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
1034
14.3k
                miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
1035
14.3k
                if (dl_type == htons(ETH_TYPE_IP)) {
1036
4.86k
                    dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
1037
9.47k
                } else if (dl_type == htons(ETH_TYPE_IPV6)) {
1038
9.47k
                    dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
1039
9.47k
                }
1040
14.3k
            }
1041
15.1k
        } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {
1042
907
            if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) {
1043
867
                const struct sctp_header *sctp = data;
1044
1045
867
                miniflow_push_be16(mf, tp_src, sctp->sctp_src);
1046
867
                miniflow_push_be16(mf, tp_dst, sctp->sctp_dst);
1047
867
                miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
1048
867
                miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
1049
867
            }
1050
14.2k
        } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMP)) {
1051
5.28k
            if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) {
1052
4.72k
                const struct icmp_header *icmp = data;
1053
1054
4.72k
                miniflow_push_be16(mf, tp_src, htons(icmp->icmp_type));
1055
4.72k
                miniflow_push_be16(mf, tp_dst, htons(icmp->icmp_code));
1056
4.72k
                miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
1057
4.72k
                miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
1058
4.72k
            }
1059
8.93k
        } else if (OVS_LIKELY(nw_proto == IPPROTO_IGMP)) {
1060
2.64k
            if (OVS_LIKELY(size >= IGMP_HEADER_LEN)) {
1061
1.82k
                const struct igmp_header *igmp = data;
1062
1063
1.82k
                miniflow_push_be16(mf, tp_src, htons(igmp->igmp_type));
1064
1.82k
                miniflow_push_be16(mf, tp_dst, htons(igmp->igmp_code));
1065
1.82k
                miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
1066
1.82k
                miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
1067
1.82k
                miniflow_push_be32(mf, igmp_group_ip4,
1068
1.82k
                                   get_16aligned_be32(&igmp->group));
1069
1.82k
                miniflow_pad_to_64(mf, igmp_group_ip4);
1070
1.82k
            }
1071
6.28k
        } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMPV6)) {
1072
5.32k
            if (OVS_LIKELY(size >= sizeof(struct icmp6_data_header))) {
1073
5.21k
                const struct in6_addr *nd_target;
1074
5.21k
                struct eth_addr arp_buf[2];
1075
                /* This will populate whether we received Option 1
1076
                 * or Option 2. */
1077
5.21k
                uint8_t opt_type;
1078
                /* This holds the ND Reserved field. */
1079
5.21k
                ovs_be32 rso_flags;
1080
5.21k
                const struct icmp6_data_header *icmp6;
1081
1082
5.21k
                icmp6 = data_pull(&data, &size, sizeof *icmp6);
1083
5.21k
                if (parse_icmpv6(&data, &size, icmp6,
1084
5.21k
                                 &rso_flags, &nd_target, arp_buf, &opt_type)) {
1085
3.75k
                    if (nd_target) {
1086
2.58k
                        miniflow_push_words(mf, nd_target, nd_target,
1087
2.58k
                                            sizeof *nd_target / sizeof(uint64_t));
1088
2.58k
                    }
1089
3.75k
                    miniflow_push_macs(mf, arp_sha, arp_buf);
1090
                    /* Populate options field and set the padding
1091
                     * accordingly. */
1092
3.75k
                    if (opt_type != 0) {
1093
2.26k
                        miniflow_push_be16(mf, tcp_flags, htons(opt_type));
1094
                        /* Pad to align with 64 bits.
1095
                         * This will zero out the pad3 field. */
1096
2.26k
                        miniflow_pad_to_64(mf, tcp_flags);
1097
2.26k
                    } else {
1098
                        /* Pad to align with 64 bits.
1099
                         * This will zero out the tcp_flags & pad3 field. */
1100
1.48k
                        miniflow_pad_to_64(mf, arp_tha);
1101
1.48k
                    }
1102
3.75k
                    miniflow_push_be16(mf, tp_src,
1103
3.75k
                                       htons(icmp6->icmp6_base.icmp6_type));
1104
3.75k
                    miniflow_push_be16(mf, tp_dst,
1105
3.75k
                                       htons(icmp6->icmp6_base.icmp6_code));
1106
3.75k
                    miniflow_pad_to_64(mf, tp_dst);
1107
                    /* Fill ND reserved field. */
1108
3.75k
                    miniflow_push_be32(mf, igmp_group_ip4, rso_flags);
1109
3.75k
                    miniflow_pad_to_64(mf, igmp_group_ip4);
1110
3.75k
                } else {
1111
                    /* ICMPv6 but not ND. */
1112
1.46k
                    miniflow_push_be16(mf, tp_src,
1113
1.46k
                                       htons(icmp6->icmp6_base.icmp6_type));
1114
1.46k
                    miniflow_push_be16(mf, tp_dst,
1115
1.46k
                                       htons(icmp6->icmp6_base.icmp6_code));
1116
1.46k
                    miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
1117
1.46k
                    miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
1118
1.46k
                }
1119
5.21k
            }
1120
5.32k
        }
1121
44.7k
    }
1122
120k
 out:
1123
120k
    dst->map = mf.map;
1124
120k
}
1125
1126
static ovs_be16
1127
parse_dl_type(const void **datap, size_t *sizep, ovs_be16 *first_vlan_tci_p)
1128
1.99k
{
1129
1.99k
    union flow_vlan_hdr vlans[FLOW_MAX_VLAN_HEADERS];
1130
1131
1.99k
    if (parse_vlan(datap, sizep, vlans) && first_vlan_tci_p) {
1132
0
        *first_vlan_tci_p = vlans[0].tci;
1133
0
    }
1134
1135
1.99k
    return parse_ethertype(datap, sizep);
1136
1.99k
}
1137
1138
/* Parses and return the TCP flags in 'packet', converted to host byte order.
1139
 * If 'packet' is not an Ethernet packet embedding TCP, returns 0.
1140
 * 'dl_type_p' will be set only if the 'packet' is an Ethernet packet.
1141
 * 'nw_frag_p' will be set only if the 'packet' is an IP packet.
1142
 * 'first_vlan_tci' will be set only if the 'packet' contains vlan header.
1143
 *
1144
 * The caller must ensure that 'packet' is at least ETH_HEADER_LEN bytes
1145
 * long.'*/
1146
uint16_t
1147
parse_tcp_flags(struct dp_packet *packet,
1148
                ovs_be16 *dl_type_p, uint8_t *nw_frag_p,
1149
                ovs_be16 *first_vlan_tci_p)
1150
1.99k
{
1151
1.99k
    const void *data = dp_packet_data(packet);
1152
1.99k
    const char *frame = (const char *)data;
1153
1.99k
    size_t size = dp_packet_size(packet);
1154
1.99k
    ovs_be16 dl_type;
1155
1.99k
    uint8_t nw_frag = 0, nw_proto = 0;
1156
1157
1.99k
    if (!dp_packet_is_eth(packet)) {
1158
0
        return 0;
1159
0
    }
1160
1161
1.99k
    dp_packet_reset_offsets(packet);
1162
1163
1.99k
    dl_type = parse_dl_type(&data, &size, first_vlan_tci_p);
1164
1.99k
    if (dl_type_p) {
1165
0
        *dl_type_p = dl_type;
1166
0
    }
1167
1.99k
    if (OVS_UNLIKELY(eth_type_mpls(dl_type))) {
1168
55
        packet->l2_5_ofs = (char *)data - frame;
1169
55
    }
1170
1.99k
    packet->l3_ofs = (char *)data - frame;
1171
1.99k
    if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {
1172
715
        const struct ip_header *nh = data;
1173
715
        int ip_len;
1174
715
        uint16_t tot_len;
1175
1176
715
        if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len, &tot_len))) {
1177
88
            if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) {
1178
0
                dump_invalid_packet(packet, "ipv4_sanity_check");
1179
0
            }
1180
88
            return 0;
1181
88
        }
1182
627
        dp_packet_set_l2_pad_size(packet, size - tot_len);
1183
627
        nw_proto = nh->ip_proto;
1184
627
        nw_frag = ipv4_get_nw_frag(nh);
1185
1186
627
        size = tot_len;   /* Never pull padding. */
1187
627
        data_pull(&data, &size, ip_len);
1188
1.28k
    } else if (dl_type == htons(ETH_TYPE_IPV6)) {
1189
725
        const struct ovs_16aligned_ip6_hdr *nh = data;
1190
725
        uint16_t plen;
1191
1192
725
        if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) {
1193
51
            if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) {
1194
0
                dump_invalid_packet(packet, "ipv6_sanity_check");
1195
0
            }
1196
51
            return 0;
1197
51
        }
1198
674
        data_pull(&data, &size, sizeof *nh);
1199
1200
674
        plen = ntohs(nh->ip6_plen); /* Never pull padding. */
1201
674
        dp_packet_set_l2_pad_size(packet, size - plen);
1202
674
        size = plen;
1203
674
        const struct ovs_16aligned_ip6_frag *frag_hdr;
1204
674
        nw_proto = nh->ip6_nxt;
1205
674
        if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag,
1206
674
            &frag_hdr)) {
1207
130
            return 0;
1208
130
        }
1209
674
    } else {
1210
556
        return 0;
1211
556
    }
1212
1213
1.17k
    if (nw_frag_p) {
1214
0
        *nw_frag_p = nw_frag;
1215
0
    }
1216
1217
1.17k
    packet->l4_ofs = (uint16_t)((char *)data - frame);
1218
1.17k
    if (!(nw_frag & FLOW_NW_FRAG_LATER) && nw_proto == IPPROTO_TCP &&
1219
1.17k
        size >= TCP_HEADER_LEN) {
1220
294
        const struct tcp_header *tcp = data;
1221
1222
294
        return TCP_FLAGS(tcp->tcp_ctl);
1223
294
    }
1224
1225
877
    return 0;
1226
1.17k
}
1227
1228
/* For every bit of a field that is wildcarded in 'wildcards', sets the
1229
 * corresponding bit in 'flow' to zero. */
1230
void
1231
flow_zero_wildcards(struct flow *flow, const struct flow_wildcards *wildcards)
1232
264k
{
1233
264k
    uint64_t *flow_u64 = (uint64_t *) flow;
1234
264k
    const uint64_t *wc_u64 = (const uint64_t *) &wildcards->masks;
1235
264k
    size_t i;
1236
1237
22.4M
    for (i = 0; i < FLOW_U64S; i++) {
1238
22.2M
        flow_u64[i] &= wc_u64[i];
1239
22.2M
    }
1240
264k
}
1241
1242
void
1243
flow_unwildcard_tp_ports(const struct flow *flow, struct flow_wildcards *wc)
1244
1.15k
{
1245
1.15k
    if (flow->nw_proto != IPPROTO_ICMP) {
1246
1.11k
        memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
1247
1.11k
        memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
1248
1.11k
    } else {
1249
37
        wc->masks.tp_src = htons(0xff);
1250
37
        wc->masks.tp_dst = htons(0xff);
1251
37
    }
1252
1.15k
}
1253
1254
/* Initializes 'flow_metadata' with the metadata found in 'flow'. */
1255
void
1256
flow_get_metadata(const struct flow *flow, struct match *flow_metadata)
1257
2.03k
{
1258
2.03k
    int i;
1259
1260
2.03k
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
1261
1262
2.03k
    match_init_catchall(flow_metadata);
1263
2.03k
    if (flow->tunnel.tun_id != htonll(0)) {
1264
0
        match_set_tun_id(flow_metadata, flow->tunnel.tun_id);
1265
0
    }
1266
2.03k
    if (flow->tunnel.flags & FLOW_TNL_PUB_F_MASK) {
1267
0
        match_set_tun_flags(flow_metadata,
1268
0
                            flow->tunnel.flags & FLOW_TNL_PUB_F_MASK);
1269
0
    }
1270
2.03k
    if (flow->tunnel.ip_src) {
1271
0
        match_set_tun_src(flow_metadata, flow->tunnel.ip_src);
1272
0
    }
1273
2.03k
    if (flow->tunnel.ip_dst) {
1274
0
        match_set_tun_dst(flow_metadata, flow->tunnel.ip_dst);
1275
0
    }
1276
2.03k
    if (ipv6_addr_is_set(&flow->tunnel.ipv6_src)) {
1277
0
        match_set_tun_ipv6_src(flow_metadata, &flow->tunnel.ipv6_src);
1278
0
    }
1279
2.03k
    if (ipv6_addr_is_set(&flow->tunnel.ipv6_dst)) {
1280
0
        match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
1281
0
    }
1282
2.03k
    if (flow->tunnel.gbp_id != htons(0)) {
1283
0
        match_set_tun_gbp_id(flow_metadata, flow->tunnel.gbp_id);
1284
0
    }
1285
2.03k
    if (flow->tunnel.gbp_flags) {
1286
0
        match_set_tun_gbp_flags(flow_metadata, flow->tunnel.gbp_flags);
1287
0
    }
1288
2.03k
    if (flow->tunnel.erspan_ver) {
1289
0
        match_set_tun_erspan_ver(flow_metadata, flow->tunnel.erspan_ver);
1290
0
    }
1291
2.03k
    if (flow->tunnel.erspan_idx) {
1292
0
        match_set_tun_erspan_idx(flow_metadata, flow->tunnel.erspan_idx);
1293
0
    }
1294
2.03k
    if (flow->tunnel.erspan_dir) {
1295
0
        match_set_tun_erspan_dir(flow_metadata, flow->tunnel.erspan_dir);
1296
0
    }
1297
2.03k
    if (flow->tunnel.erspan_hwid) {
1298
0
        match_set_tun_erspan_hwid(flow_metadata, flow->tunnel.erspan_hwid);
1299
0
    }
1300
2.03k
    if (flow->tunnel.gtpu_flags) {
1301
0
        match_set_tun_gtpu_flags(flow_metadata, flow->tunnel.gtpu_flags);
1302
0
    }
1303
2.03k
    if (flow->tunnel.gtpu_msgtype) {
1304
0
        match_set_tun_gtpu_msgtype(flow_metadata, flow->tunnel.gtpu_msgtype);
1305
0
    }
1306
2.03k
    tun_metadata_get_fmd(&flow->tunnel, flow_metadata);
1307
2.03k
    if (flow->metadata != htonll(0)) {
1308
0
        match_set_metadata(flow_metadata, flow->metadata);
1309
0
    }
1310
1311
34.6k
    for (i = 0; i < FLOW_N_REGS; i++) {
1312
32.5k
        if (flow->regs[i]) {
1313
0
            match_set_reg(flow_metadata, i, flow->regs[i]);
1314
0
        }
1315
32.5k
    }
1316
1317
2.03k
    if (flow->pkt_mark != 0) {
1318
0
        match_set_pkt_mark(flow_metadata, flow->pkt_mark);
1319
0
    }
1320
1321
2.03k
    match_set_in_port(flow_metadata, flow->in_port.ofp_port);
1322
2.03k
    if (flow->packet_type != htonl(PT_ETH)) {
1323
0
        match_set_packet_type(flow_metadata, flow->packet_type);
1324
0
    }
1325
1326
2.03k
    if (flow->ct_state != 0) {
1327
0
        match_set_ct_state(flow_metadata, flow->ct_state);
1328
        /* Match dl_type since it is required for the later interpretation of
1329
         * the conntrack metadata. */
1330
0
        match_set_dl_type(flow_metadata, flow->dl_type);
1331
0
        if (is_ct_valid(flow, NULL, NULL) && flow->ct_nw_proto != 0) {
1332
0
            if (flow->dl_type == htons(ETH_TYPE_IP)) {
1333
0
                match_set_ct_nw_src(flow_metadata, flow->ct_nw_src);
1334
0
                match_set_ct_nw_dst(flow_metadata, flow->ct_nw_dst);
1335
0
                match_set_ct_nw_proto(flow_metadata, flow->ct_nw_proto);
1336
0
                match_set_ct_tp_src(flow_metadata, flow->ct_tp_src);
1337
0
                match_set_ct_tp_dst(flow_metadata, flow->ct_tp_dst);
1338
0
            } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
1339
0
                match_set_ct_ipv6_src(flow_metadata, &flow->ct_ipv6_src);
1340
0
                match_set_ct_ipv6_dst(flow_metadata, &flow->ct_ipv6_dst);
1341
0
                match_set_ct_nw_proto(flow_metadata, flow->ct_nw_proto);
1342
0
                match_set_ct_tp_src(flow_metadata, flow->ct_tp_src);
1343
0
                match_set_ct_tp_dst(flow_metadata, flow->ct_tp_dst);
1344
0
            }
1345
0
        }
1346
0
    }
1347
2.03k
    if (flow->ct_zone != 0) {
1348
0
        match_set_ct_zone(flow_metadata, flow->ct_zone);
1349
0
    }
1350
2.03k
    if (flow->ct_mark != 0) {
1351
0
        match_set_ct_mark(flow_metadata, flow->ct_mark);
1352
0
    }
1353
2.03k
    if (!ovs_u128_is_zero(flow->ct_label)) {
1354
0
        match_set_ct_label(flow_metadata, flow->ct_label);
1355
0
    }
1356
2.03k
}
1357
1358
const char *
1359
ct_state_to_string(uint32_t state)
1360
78.4k
{
1361
78.4k
    switch (state) {
1362
32.8k
#define CS_STATE(ENUM, INDEX, NAME) case CS_##ENUM: return NAME;
1363
0
        CS_STATES
1364
0
#undef CS_STATE
1365
45.6k
    default:
1366
45.6k
        return NULL;
1367
78.4k
    }
1368
78.4k
}
1369
1370
uint32_t
1371
ct_state_from_string(const char *s)
1372
0
{
1373
0
#define CS_STATE(ENUM, INDEX, NAME) \
1374
0
    if (!strcmp(s, NAME)) {         \
1375
0
        return CS_##ENUM;           \
1376
0
    }
1377
0
    CS_STATES
1378
0
#undef CS_STATE
1379
0
    return 0;
1380
0
}
1381
1382
/* Parses conntrack state from 'state_str'.  If it is parsed successfully,
1383
 * stores the parsed ct_state in 'ct_state', and returns true.  Otherwise,
1384
 * returns false, and reports error message in 'ds'. */
1385
bool
1386
parse_ct_state(const char *state_str, uint32_t default_state,
1387
               uint32_t *ct_state, struct ds *ds)
1388
0
{
1389
0
    uint32_t state = default_state;
1390
0
    char *state_s = xstrdup(state_str);
1391
0
    char *save_ptr = NULL;
1392
1393
0
    for (char *cs = strtok_r(state_s, ", ", &save_ptr); cs;
1394
0
         cs = strtok_r(NULL, ", ", &save_ptr)) {
1395
0
        uint32_t bit = ct_state_from_string(cs);
1396
0
        if (!bit) {
1397
0
            ds_put_format(ds, "%s: unknown connection tracking state flag",
1398
0
                          cs);
1399
0
            free(state_s);
1400
0
            return false;
1401
0
        }
1402
0
        state |= bit;
1403
0
    }
1404
1405
0
    *ct_state = state;
1406
0
    free(state_s);
1407
1408
0
    return true;
1409
0
}
1410
1411
/* Checks the given conntrack state 'state' according to the constraints
1412
 * listed in ovs-fields (7).  Returns true if it is valid.  Otherwise, returns
1413
 * false, and reports error in 'ds'. */
1414
bool
1415
validate_ct_state(uint32_t state, struct ds *ds)
1416
0
{
1417
0
    bool valid_ct_state = true;
1418
0
    struct ds d_str = DS_EMPTY_INITIALIZER;
1419
1420
0
    format_flags(&d_str, ct_state_to_string, state, '|');
1421
1422
0
    if (state && !(state & CS_TRACKED)) {
1423
0
        ds_put_format(ds, "%s: invalid connection state: "
1424
0
                      "If \"trk\" is unset, no other flags are set\n",
1425
0
                      ds_cstr(&d_str));
1426
0
        valid_ct_state = false;
1427
0
    }
1428
0
    if (state & CS_INVALID && state & ~(CS_TRACKED | CS_INVALID)) {
1429
0
        ds_put_format(ds, "%s: invalid connection state: "
1430
0
                      "when \"inv\" is set, only \"trk\" may also be set\n",
1431
0
                      ds_cstr(&d_str));
1432
0
        valid_ct_state = false;
1433
0
    }
1434
0
    if (state & CS_NEW && state & CS_ESTABLISHED) {
1435
0
        ds_put_format(ds, "%s: invalid connection state: "
1436
0
                      "\"new\" and \"est\" are mutually exclusive\n",
1437
0
                      ds_cstr(&d_str));
1438
0
        valid_ct_state = false;
1439
0
    }
1440
0
    if (state & CS_NEW && state & CS_REPLY_DIR) {
1441
0
        ds_put_format(ds, "%s: invalid connection state: "
1442
0
                      "\"new\" and \"rpy\" are mutually exclusive\n",
1443
0
                      ds_cstr(&d_str));
1444
0
        valid_ct_state = false;
1445
0
    }
1446
1447
0
    ds_destroy(&d_str);
1448
0
    return valid_ct_state;
1449
0
}
1450
1451
/* Clears the fields in 'flow' associated with connection tracking. */
1452
void
1453
flow_clear_conntrack(struct flow *flow)
1454
0
{
1455
0
    flow->ct_state = 0;
1456
0
    flow->ct_zone = 0;
1457
0
    flow->ct_mark = 0;
1458
0
    flow->ct_label = OVS_U128_ZERO;
1459
1460
0
    flow->ct_nw_proto = 0;
1461
0
    flow->ct_tp_src = 0;
1462
0
    flow->ct_tp_dst = 0;
1463
0
    if (flow->dl_type == htons(ETH_TYPE_IP)) {
1464
0
        flow->ct_nw_src = 0;
1465
0
        flow->ct_nw_dst = 0;
1466
0
    } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
1467
0
        memset(&flow->ct_ipv6_src, 0, sizeof flow->ct_ipv6_src);
1468
0
        memset(&flow->ct_ipv6_dst, 0, sizeof flow->ct_ipv6_dst);
1469
0
    }
1470
0
}
1471
1472
char *
1473
flow_to_string(const struct flow *flow,
1474
               const struct ofputil_port_map *port_map)
1475
0
{
1476
0
    struct ds ds = DS_EMPTY_INITIALIZER;
1477
0
    flow_format(&ds, flow, port_map);
1478
0
    return ds_cstr(&ds);
1479
0
}
1480
1481
const char *
1482
flow_tun_flag_to_string(uint32_t flags)
1483
10.6k
{
1484
10.6k
    switch (flags) {
1485
1.33k
    case FLOW_TNL_F_DONT_FRAGMENT:
1486
1.33k
        return "df";
1487
799
    case FLOW_TNL_F_CSUM:
1488
799
        return "csum";
1489
2.74k
    case FLOW_TNL_F_KEY:
1490
2.74k
        return "key";
1491
1.71k
    case FLOW_TNL_F_OAM:
1492
1.71k
        return "oam";
1493
4.02k
    default:
1494
4.02k
        return NULL;
1495
10.6k
    }
1496
10.6k
}
1497
1498
void
1499
format_flags(struct ds *ds, const char *(*bit_to_string)(uint32_t),
1500
             uint32_t flags, char del)
1501
91.2k
{
1502
91.2k
    uint32_t bad = 0;
1503
1504
91.2k
    if (!flags) {
1505
14.5k
        ds_put_char(ds, '0');
1506
14.5k
        return;
1507
14.5k
    }
1508
674k
    while (flags) {
1509
597k
        uint32_t bit = rightmost_1bit(flags);
1510
597k
        const char *s;
1511
1512
597k
        s = bit_to_string(bit);
1513
597k
        if (s) {
1514
465k
            ds_put_format(ds, "%s%c", s, del);
1515
465k
        } else {
1516
131k
            bad |= bit;
1517
131k
        }
1518
1519
597k
        flags &= ~bit;
1520
597k
    }
1521
1522
76.7k
    if (bad) {
1523
31.1k
        ds_put_format(ds, "0x%"PRIx32"%c", bad, del);
1524
31.1k
    }
1525
76.7k
    ds_chomp(ds, del);
1526
76.7k
}
1527
1528
void
1529
format_flags_masked(struct ds *ds, const char *name,
1530
                    const char *(*bit_to_string)(uint32_t), uint32_t flags,
1531
                    uint32_t mask, uint32_t max_mask)
1532
26.8k
{
1533
26.8k
    if (name) {
1534
15.6k
        ds_put_format(ds, "%s%s=%s", colors.param, name, colors.end);
1535
15.6k
    }
1536
1537
26.8k
    if (mask == max_mask) {
1538
22.9k
        format_flags(ds, bit_to_string, flags, '|');
1539
22.9k
        return;
1540
22.9k
    }
1541
1542
3.93k
    if (!mask) {
1543
216
        ds_put_cstr(ds, "0/0");
1544
216
        return;
1545
216
    }
1546
1547
65.7k
    while (mask) {
1548
62.0k
        uint32_t bit = rightmost_1bit(mask);
1549
62.0k
        const char *s = bit_to_string(bit);
1550
1551
62.0k
        ds_put_format(ds, "%s%s", (flags & bit) ? "+" : "-",
1552
62.0k
                      s ? s : "[Unknown]");
1553
62.0k
        mask &= ~bit;
1554
62.0k
    }
1555
3.71k
}
1556
1557
static void
1558
put_u16_masked(struct ds *s, uint16_t value, uint16_t mask)
1559
32.6k
{
1560
32.6k
    if (!mask) {
1561
0
        ds_put_char(s, '*');
1562
32.6k
    } else {
1563
32.6k
        if (value > 9) {
1564
20.4k
            ds_put_format(s, "0x%"PRIx16, value);
1565
20.4k
        } else {
1566
12.2k
            ds_put_format(s, "%"PRIu16, value);
1567
12.2k
        }
1568
1569
32.6k
        if (mask != UINT16_MAX) {
1570
0
            ds_put_format(s, "/0x%"PRIx16, mask);
1571
0
        }
1572
32.6k
    }
1573
32.6k
}
1574
1575
void
1576
format_packet_type_masked(struct ds *s, ovs_be32 value, ovs_be32 mask)
1577
18.4k
{
1578
18.4k
    if (value == htonl(PT_ETH) && mask == OVS_BE32_MAX) {
1579
2.14k
        ds_put_cstr(s, "eth");
1580
16.3k
    } else {
1581
16.3k
        ds_put_cstr(s, "packet_type=(");
1582
16.3k
        put_u16_masked(s, pt_ns(value), pt_ns(mask));
1583
16.3k
        ds_put_char(s, ',');
1584
16.3k
        put_u16_masked(s, pt_ns_type(value), pt_ns_type(mask));
1585
16.3k
        ds_put_char(s, ')');
1586
16.3k
    }
1587
18.4k
}
1588
1589
/* Scans a string 's' of flags to determine their numerical value and
1590
 * returns the number of characters parsed using 'bit_to_string' to
1591
 * lookup flag names. Scanning continues until the character 'end' is
1592
 * reached.
1593
 *
1594
 * In the event of a failure, a negative error code will be returned. In
1595
 * addition, if 'res_string' is non-NULL then a descriptive string will
1596
 * be returned incorporating the identifying string 'field_name'. This
1597
 * error string must be freed by the caller.
1598
 *
1599
 * Upon success, the flag values will be stored in 'res_flags' and
1600
 * optionally 'res_mask', if it is non-NULL (if it is NULL then any masks
1601
 * present in the original string will be considered an error). The
1602
 * caller may restrict the acceptable set of values through the mask
1603
 * 'allowed'. */
1604
int
1605
parse_flags(const char *s, const char *(*bit_to_string)(uint32_t),
1606
            char end, const char *field_name, char **res_string,
1607
            uint32_t *res_flags, uint32_t allowed, uint32_t *res_mask)
1608
42.8k
{
1609
42.8k
    uint32_t result = 0;
1610
42.8k
    int n;
1611
1612
    /* Parse masked flags in numeric format? */
1613
42.8k
    if (res_mask && ovs_scan(s, "%"SCNi32"/%"SCNi32"%n",
1614
42.0k
                             res_flags, res_mask, &n) && n > 0) {
1615
3.07k
        if (*res_flags & ~allowed || *res_mask & ~allowed) {
1616
228
            goto unknown;
1617
228
        }
1618
2.84k
        return n;
1619
3.07k
    }
1620
1621
39.7k
    n = 0;
1622
1623
39.7k
    if (res_mask && (*s == '+' || *s == '-')) {
1624
2.09k
        uint32_t flags = 0, mask = 0;
1625
1626
        /* Parse masked flags. */
1627
5.96k
        while (s[0] != end) {
1628
4.12k
            bool set;
1629
4.12k
            uint32_t bit;
1630
4.12k
            size_t len;
1631
1632
4.12k
            if (s[0] == '+') {
1633
2.00k
                set = true;
1634
2.12k
            } else if (s[0] == '-') {
1635
2.12k
                set = false;
1636
2.12k
            } else {
1637
0
                if (res_string) {
1638
0
                    *res_string = xasprintf("%s: %s must be preceded by '+' "
1639
0
                                            "(for SET) or '-' (NOT SET)", s,
1640
0
                                            field_name);
1641
0
                }
1642
0
                return -EINVAL;
1643
0
            }
1644
4.12k
            s++;
1645
4.12k
            n++;
1646
1647
31.0k
            for (bit = 1; bit; bit <<= 1) {
1648
30.8k
                const char *fname = bit_to_string(bit);
1649
1650
30.8k
                if (!fname) {
1651
5.18k
                    continue;
1652
5.18k
                }
1653
1654
25.6k
                len = strlen(fname);
1655
25.6k
                if (strncmp(s, fname, len) ||
1656
25.6k
                    (s[len] != '+' && s[len] != '-' && s[len] != end)) {
1657
21.7k
                    continue;
1658
21.7k
                }
1659
1660
3.89k
                if (mask & bit) {
1661
                    /* bit already set. */
1662
25
                    if (res_string) {
1663
9
                        *res_string = xasprintf("%s: Each %s flag can be "
1664
9
                                                "specified only once", s,
1665
9
                                                field_name);
1666
9
                    }
1667
25
                    return -EINVAL;
1668
25
                }
1669
3.87k
                if (!(bit & allowed)) {
1670
1
                    goto unknown;
1671
1
                }
1672
3.87k
                if (set) {
1673
1.86k
                   flags |= bit;
1674
1.86k
                }
1675
3.87k
                mask |= bit;
1676
3.87k
                break;
1677
3.87k
            }
1678
1679
4.09k
            if (!bit) {
1680
224
                goto unknown;
1681
224
            }
1682
3.87k
            s += len;
1683
3.87k
            n += len;
1684
3.87k
        }
1685
1686
1.84k
        *res_flags = flags;
1687
1.84k
        *res_mask = mask;
1688
1.84k
        return n;
1689
2.09k
    }
1690
1691
    /* Parse unmasked flags.  If a flag is present, it is set, otherwise
1692
     * it is not set. */
1693
79.9k
    while (s[n] != end) {
1694
42.8k
        unsigned long long int flags;
1695
42.8k
        uint32_t bit;
1696
42.8k
        int n0;
1697
1698
42.8k
        if (ovs_scan(&s[n], "%lli%n", &flags, &n0)) {
1699
38.8k
            if (flags & ~allowed) {
1700
91
                goto unknown;
1701
91
            }
1702
38.7k
            n += n0 + (s[n + n0] == '|');
1703
38.7k
            result |= flags;
1704
38.7k
            continue;
1705
38.8k
        }
1706
1707
38.2k
        for (bit = 1; bit; bit <<= 1) {
1708
37.7k
            const char *name = bit_to_string(bit);
1709
37.7k
            size_t len;
1710
1711
37.7k
            if (!name) {
1712
11.1k
                continue;
1713
11.1k
            }
1714
1715
26.6k
            len = strlen(name);
1716
26.6k
            if (!strncmp(s + n, name, len) &&
1717
26.6k
                (s[n + len] == '|' || s[n + len] == end)) {
1718
3.61k
                if (!(bit & allowed)) {
1719
2
                    goto unknown;
1720
2
                }
1721
3.60k
                result |= bit;
1722
3.60k
                n += len + (s[n + len] == '|');
1723
3.60k
                break;
1724
3.61k
            }
1725
26.6k
        }
1726
1727
4.08k
        if (!bit) {
1728
477
            goto unknown;
1729
477
        }
1730
4.08k
    }
1731
1732
37.0k
    *res_flags = result;
1733
37.0k
    if (res_mask) {
1734
36.3k
        *res_mask = UINT32_MAX;
1735
36.3k
    }
1736
37.0k
    if (res_string) {
1737
1.45k
        *res_string = NULL;
1738
1.45k
    }
1739
37.0k
    return n;
1740
1741
1.02k
unknown:
1742
1.02k
    if (res_string) {
1743
322
        *res_string = xasprintf("%s: unknown %s flag(s)", s, field_name);
1744
322
    }
1745
1.02k
    return -EINVAL;
1746
37.6k
}
1747
1748
void
1749
flow_format(struct ds *ds,
1750
            const struct flow *flow, const struct ofputil_port_map *port_map)
1751
116k
{
1752
116k
    struct match match;
1753
116k
    struct flow_wildcards *wc = &match.wc;
1754
1755
116k
    match_wc_init(&match, flow);
1756
1757
    /* As this function is most often used for formatting a packet in a
1758
     * packet-in message, skip formatting the packet context fields that are
1759
     * all-zeroes to make the print-out easier on the eyes.  This means that a
1760
     * missing context field implies a zero value for that field.  This is
1761
     * similar to OpenFlow encoding of these fields, as the specification
1762
     * states that all-zeroes context fields should not be encoded in the
1763
     * packet-in messages. */
1764
116k
    if (!flow->in_port.ofp_port) {
1765
116k
        WC_UNMASK_FIELD(wc, in_port);
1766
116k
    }
1767
116k
    if (!flow->skb_priority) {
1768
116k
        WC_UNMASK_FIELD(wc, skb_priority);
1769
116k
    }
1770
116k
    if (!flow->pkt_mark) {
1771
116k
        WC_UNMASK_FIELD(wc, pkt_mark);
1772
116k
    }
1773
116k
    if (!flow->recirc_id) {
1774
116k
        WC_UNMASK_FIELD(wc, recirc_id);
1775
116k
    }
1776
116k
    if (!flow->dp_hash) {
1777
116k
        WC_UNMASK_FIELD(wc, dp_hash);
1778
116k
    }
1779
116k
    if (!flow->ct_state) {
1780
116k
        WC_UNMASK_FIELD(wc, ct_state);
1781
116k
    }
1782
116k
    if (!flow->ct_zone) {
1783
116k
        WC_UNMASK_FIELD(wc, ct_zone);
1784
116k
    }
1785
116k
    if (!flow->ct_mark) {
1786
116k
        WC_UNMASK_FIELD(wc, ct_mark);
1787
116k
    }
1788
116k
    if (ovs_u128_is_zero(flow->ct_label)) {
1789
116k
        WC_UNMASK_FIELD(wc, ct_label);
1790
116k
    }
1791
116k
    if (!is_ct_valid(flow, &match.wc, NULL) || !flow->ct_nw_proto) {
1792
116k
        WC_UNMASK_FIELD(wc, ct_nw_proto);
1793
116k
        WC_UNMASK_FIELD(wc, ct_tp_src);
1794
116k
        WC_UNMASK_FIELD(wc, ct_tp_dst);
1795
116k
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
1796
11.5k
            WC_UNMASK_FIELD(wc, ct_nw_src);
1797
11.5k
            WC_UNMASK_FIELD(wc, ct_nw_dst);
1798
105k
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
1799
38.3k
            WC_UNMASK_FIELD(wc, ct_ipv6_src);
1800
38.3k
            WC_UNMASK_FIELD(wc, ct_ipv6_dst);
1801
38.3k
        }
1802
116k
    }
1803
1.98M
    for (int i = 0; i < FLOW_N_REGS; i++) {
1804
1.86M
        if (!flow->regs[i]) {
1805
1.86M
            WC_UNMASK_FIELD(wc, regs[i]);
1806
1.86M
        }
1807
1.86M
    }
1808
116k
    if (!flow->metadata) {
1809
116k
        WC_UNMASK_FIELD(wc, metadata);
1810
116k
    }
1811
1812
116k
    match_format(&match, port_map, ds, OFP_DEFAULT_PRIORITY);
1813
116k
}
1814
1815
void
1816
flow_print(FILE *stream,
1817
           const struct flow *flow, const struct ofputil_port_map *port_map)
1818
0
{
1819
0
    char *s = flow_to_string(flow, port_map);
1820
0
    fputs(s, stream);
1821
0
    free(s);
1822
0
}
1823

1824
/* flow_wildcards functions. */
1825
1826
/* Initializes 'wc' as a set of wildcards that matches every packet. */
1827
void
1828
flow_wildcards_init_catchall(struct flow_wildcards *wc)
1829
1.01M
{
1830
1.01M
    memset(&wc->masks, 0, sizeof wc->masks);
1831
1.01M
}
1832
1833
/* Converts a flow into flow wildcards.  It sets the wildcard masks based on
1834
 * the packet headers extracted to 'flow'.  It will not set the mask for fields
1835
 * that do not make sense for the packet type.  OpenFlow-only metadata is
1836
 * wildcarded, but other metadata is unconditionally exact-matched. */
1837
void
1838
flow_wildcards_init_for_packet(struct flow_wildcards *wc,
1839
                               const struct flow *flow)
1840
120k
{
1841
120k
    ovs_be16 dl_type = OVS_BE16_MAX;
1842
1843
120k
    memset(&wc->masks, 0x0, sizeof wc->masks);
1844
1845
    /* Update this function whenever struct flow changes. */
1846
120k
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
1847
1848
120k
    if (flow_tnl_dst_is_set(&flow->tunnel)) {
1849
0
        if (flow->tunnel.flags & FLOW_TNL_F_KEY) {
1850
0
            WC_MASK_FIELD(wc, tunnel.tun_id);
1851
0
        }
1852
0
        WC_MASK_FIELD(wc, tunnel.ip_src);
1853
0
        WC_MASK_FIELD(wc, tunnel.ip_dst);
1854
0
        WC_MASK_FIELD(wc, tunnel.ipv6_src);
1855
0
        WC_MASK_FIELD(wc, tunnel.ipv6_dst);
1856
0
        WC_MASK_FIELD(wc, tunnel.flags);
1857
0
        WC_MASK_FIELD(wc, tunnel.ip_tos);
1858
0
        WC_MASK_FIELD(wc, tunnel.ip_ttl);
1859
0
        WC_MASK_FIELD(wc, tunnel.tp_src);
1860
0
        WC_MASK_FIELD(wc, tunnel.tp_dst);
1861
0
        WC_MASK_FIELD(wc, tunnel.gbp_id);
1862
0
        WC_MASK_FIELD(wc, tunnel.gbp_flags);
1863
0
        WC_MASK_FIELD(wc, tunnel.erspan_ver);
1864
0
        WC_MASK_FIELD(wc, tunnel.erspan_idx);
1865
0
        WC_MASK_FIELD(wc, tunnel.erspan_dir);
1866
0
        WC_MASK_FIELD(wc, tunnel.erspan_hwid);
1867
0
        WC_MASK_FIELD(wc, tunnel.gtpu_flags);
1868
0
        WC_MASK_FIELD(wc, tunnel.gtpu_msgtype);
1869
1870
0
        if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) {
1871
0
            if (flow->tunnel.metadata.present.map) {
1872
0
                wc->masks.tunnel.metadata.present.map =
1873
0
                                              flow->tunnel.metadata.present.map;
1874
0
                WC_MASK_FIELD(wc, tunnel.metadata.opts.u8);
1875
0
                WC_MASK_FIELD(wc, tunnel.metadata.tab);
1876
0
            }
1877
0
        } else {
1878
0
            WC_MASK_FIELD(wc, tunnel.metadata.present.len);
1879
0
            memset(wc->masks.tunnel.metadata.opts.gnv, 0xff,
1880
0
                   flow->tunnel.metadata.present.len);
1881
0
        }
1882
120k
    } else if (flow->tunnel.tun_id) {
1883
0
        WC_MASK_FIELD(wc, tunnel.tun_id);
1884
0
    }
1885
1886
    /* metadata, regs, and conj_id wildcarded. */
1887
1888
120k
    WC_MASK_FIELD(wc, skb_priority);
1889
120k
    WC_MASK_FIELD(wc, pkt_mark);
1890
120k
    WC_MASK_FIELD(wc, ct_state);
1891
120k
    WC_MASK_FIELD(wc, ct_zone);
1892
120k
    WC_MASK_FIELD(wc, ct_mark);
1893
120k
    WC_MASK_FIELD(wc, ct_label);
1894
120k
    WC_MASK_FIELD(wc, recirc_id);
1895
120k
    WC_MASK_FIELD(wc, dp_hash);
1896
120k
    WC_MASK_FIELD(wc, in_port);
1897
1898
    /* actset_output wildcarded. */
1899
1900
120k
    WC_MASK_FIELD(wc, packet_type);
1901
120k
    if (flow->packet_type == htonl(PT_ETH)) {
1902
112k
        WC_MASK_FIELD(wc, dl_dst);
1903
112k
        WC_MASK_FIELD(wc, dl_src);
1904
112k
        WC_MASK_FIELD(wc, dl_type);
1905
        /* No need to set mask of inner VLANs that don't exist. */
1906
113k
        for (int i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
1907
            /* Always show the first zero VLAN. */
1908
113k
            WC_MASK_FIELD(wc, vlans[i]);
1909
113k
            if (flow->vlans[i].tci == htons(0)) {
1910
112k
                break;
1911
112k
            }
1912
113k
        }
1913
112k
        dl_type = flow->dl_type;
1914
112k
    } else {
1915
8.12k
        dl_type = pt_ns_type_be(flow->packet_type);
1916
8.12k
    }
1917
1918
120k
    if (dl_type == htons(ETH_TYPE_IP)) {
1919
13.0k
        WC_MASK_FIELD(wc, nw_src);
1920
13.0k
        WC_MASK_FIELD(wc, nw_dst);
1921
13.0k
        WC_MASK_FIELD(wc, ct_nw_src);
1922
13.0k
        WC_MASK_FIELD(wc, ct_nw_dst);
1923
107k
    } else if (dl_type == htons(ETH_TYPE_IPV6)) {
1924
39.7k
        WC_MASK_FIELD(wc, ipv6_src);
1925
39.7k
        WC_MASK_FIELD(wc, ipv6_dst);
1926
39.7k
        WC_MASK_FIELD(wc, ipv6_label);
1927
39.7k
        if (is_nd(flow, wc)) {
1928
3.29k
            WC_MASK_FIELD(wc, arp_sha);
1929
3.29k
            WC_MASK_FIELD(wc, arp_tha);
1930
3.29k
            WC_MASK_FIELD(wc, nd_target);
1931
36.4k
        } else {
1932
36.4k
            WC_MASK_FIELD(wc, ct_ipv6_src);
1933
36.4k
            WC_MASK_FIELD(wc, ct_ipv6_dst);
1934
36.4k
        }
1935
68.1k
    } else if (dl_type == htons(ETH_TYPE_ARP) ||
1936
68.1k
               dl_type == htons(ETH_TYPE_RARP)) {
1937
1.25k
        WC_MASK_FIELD(wc, nw_src);
1938
1.25k
        WC_MASK_FIELD(wc, nw_dst);
1939
1.25k
        WC_MASK_FIELD(wc, nw_proto);
1940
1.25k
        WC_MASK_FIELD(wc, arp_sha);
1941
1.25k
        WC_MASK_FIELD(wc, arp_tha);
1942
1.25k
        return;
1943
66.8k
    } else if (eth_type_mpls(dl_type)) {
1944
3.23k
        for (int i = 0; i < FLOW_MAX_MPLS_LABELS; i++) {
1945
2.48k
            WC_MASK_FIELD(wc, mpls_lse[i]);
1946
2.48k
            if (flow->mpls_lse[i] & htonl(MPLS_BOS_MASK)) {
1947
170
                break;
1948
170
            }
1949
2.48k
        }
1950
924
        return;
1951
65.9k
    } else if (flow->dl_type == htons(ETH_TYPE_NSH)) {
1952
7.83k
        WC_MASK_FIELD(wc, nsh.flags);
1953
7.83k
        WC_MASK_FIELD(wc, nsh.ttl);
1954
7.83k
        WC_MASK_FIELD(wc, nsh.mdtype);
1955
7.83k
        WC_MASK_FIELD(wc, nsh.np);
1956
7.83k
        WC_MASK_FIELD(wc, nsh.path_hdr);
1957
7.83k
        WC_MASK_FIELD(wc, nsh.context);
1958
58.1k
    } else {
1959
58.1k
        return; /* Unknown ethertype. */
1960
58.1k
    }
1961
1962
    /* IPv4 or IPv6. */
1963
60.5k
    WC_MASK_FIELD_MASK(wc, nw_frag, FLOW_NW_FRAG_MASK);
1964
60.5k
    WC_MASK_FIELD(wc, nw_tos);
1965
60.5k
    WC_MASK_FIELD(wc, nw_ttl);
1966
60.5k
    WC_MASK_FIELD(wc, nw_proto);
1967
60.5k
    WC_MASK_FIELD(wc, ct_nw_proto);
1968
60.5k
    WC_MASK_FIELD(wc, ct_tp_src);
1969
60.5k
    WC_MASK_FIELD(wc, ct_tp_dst);
1970
1971
    /* No transport layer header in later fragments. */
1972
60.5k
    if (!(flow->nw_frag & FLOW_NW_FRAG_LATER) &&
1973
60.5k
        (flow->nw_proto == IPPROTO_ICMP ||
1974
59.4k
         flow->nw_proto == IPPROTO_ICMPV6 ||
1975
59.4k
         flow->nw_proto == IPPROTO_TCP ||
1976
59.4k
         flow->nw_proto == IPPROTO_UDP ||
1977
59.4k
         flow->nw_proto == IPPROTO_SCTP ||
1978
59.4k
         flow->nw_proto == IPPROTO_IGMP)) {
1979
43.8k
        WC_MASK_FIELD(wc, tp_src);
1980
43.8k
        WC_MASK_FIELD(wc, tp_dst);
1981
1982
43.8k
        if (flow->nw_proto == IPPROTO_TCP) {
1983
15.1k
            WC_MASK_FIELD(wc, tcp_flags);
1984
28.6k
        } else if (flow->nw_proto == IPPROTO_IGMP) {
1985
2.64k
            WC_MASK_FIELD(wc, igmp_group_ip4);
1986
2.64k
        }
1987
43.8k
    }
1988
60.5k
}
1989
1990
/* Return a map of possible fields for a packet of the same type as 'flow'.
1991
 * Including extra bits in the returned mask is not wrong, it is just less
1992
 * optimal.
1993
 *
1994
 * This is a less precise version of flow_wildcards_init_for_packet() above. */
1995
void
1996
flow_wc_map(const struct flow *flow, struct flowmap *map)
1997
2.03k
{
1998
    /* Update this function whenever struct flow changes. */
1999
2.03k
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
2000
2001
2.03k
    flowmap_init(map);
2002
2003
2.03k
    if (flow_tnl_dst_is_set(&flow->tunnel)) {
2004
0
        FLOWMAP_SET__(map, tunnel, offsetof(struct flow_tnl, metadata));
2005
0
        if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) {
2006
0
            if (flow->tunnel.metadata.present.map) {
2007
0
                FLOWMAP_SET(map, tunnel.metadata);
2008
0
            }
2009
0
        } else {
2010
0
            FLOWMAP_SET(map, tunnel.metadata.present.len);
2011
0
            FLOWMAP_SET__(map, tunnel.metadata.opts.gnv,
2012
0
                          flow->tunnel.metadata.present.len);
2013
0
        }
2014
0
    }
2015
2016
    /* Metadata fields that can appear on packet input. */
2017
2.03k
    FLOWMAP_SET(map, skb_priority);
2018
2.03k
    FLOWMAP_SET(map, pkt_mark);
2019
2.03k
    FLOWMAP_SET(map, recirc_id);
2020
2.03k
    FLOWMAP_SET(map, dp_hash);
2021
2.03k
    FLOWMAP_SET(map, in_port);
2022
2.03k
    FLOWMAP_SET(map, dl_dst);
2023
2.03k
    FLOWMAP_SET(map, dl_src);
2024
2.03k
    FLOWMAP_SET(map, dl_type);
2025
2.03k
    FLOWMAP_SET(map, vlans);
2026
2.03k
    FLOWMAP_SET(map, ct_state);
2027
2.03k
    FLOWMAP_SET(map, ct_zone);
2028
2.03k
    FLOWMAP_SET(map, ct_mark);
2029
2.03k
    FLOWMAP_SET(map, ct_label);
2030
2.03k
    FLOWMAP_SET(map, packet_type);
2031
2032
    /* Ethertype-dependent fields. */
2033
2.03k
    if (OVS_LIKELY(flow->dl_type == htons(ETH_TYPE_IP))) {
2034
715
        FLOWMAP_SET(map, nw_src);
2035
715
        FLOWMAP_SET(map, nw_dst);
2036
715
        FLOWMAP_SET(map, nw_proto);
2037
715
        FLOWMAP_SET(map, nw_frag);
2038
715
        FLOWMAP_SET(map, nw_tos);
2039
715
        FLOWMAP_SET(map, nw_ttl);
2040
715
        FLOWMAP_SET(map, tp_src);
2041
715
        FLOWMAP_SET(map, tp_dst);
2042
715
        FLOWMAP_SET(map, ct_nw_proto);
2043
715
        FLOWMAP_SET(map, ct_nw_src);
2044
715
        FLOWMAP_SET(map, ct_nw_dst);
2045
715
        FLOWMAP_SET(map, ct_tp_src);
2046
715
        FLOWMAP_SET(map, ct_tp_dst);
2047
2048
715
        if (OVS_UNLIKELY(flow->nw_proto == IPPROTO_IGMP)) {
2049
8
            FLOWMAP_SET(map, igmp_group_ip4);
2050
707
        } else {
2051
707
            FLOWMAP_SET(map, tcp_flags);
2052
707
        }
2053
1.32k
    } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2054
725
        FLOWMAP_SET(map, ipv6_src);
2055
725
        FLOWMAP_SET(map, ipv6_dst);
2056
725
        FLOWMAP_SET(map, ipv6_label);
2057
725
        FLOWMAP_SET(map, nw_proto);
2058
725
        FLOWMAP_SET(map, nw_frag);
2059
725
        FLOWMAP_SET(map, nw_tos);
2060
725
        FLOWMAP_SET(map, nw_ttl);
2061
725
        FLOWMAP_SET(map, tp_src);
2062
725
        FLOWMAP_SET(map, tp_dst);
2063
2064
725
        if (OVS_UNLIKELY(is_nd(flow, NULL))) {
2065
17
            FLOWMAP_SET(map, nd_target);
2066
17
            FLOWMAP_SET(map, arp_sha);
2067
17
            FLOWMAP_SET(map, arp_tha);
2068
17
            FLOWMAP_SET(map, tcp_flags);
2069
17
            FLOWMAP_SET(map, igmp_group_ip4);
2070
708
        } else {
2071
708
            FLOWMAP_SET(map, ct_nw_proto);
2072
708
            FLOWMAP_SET(map, ct_ipv6_src);
2073
708
            FLOWMAP_SET(map, ct_ipv6_dst);
2074
708
            FLOWMAP_SET(map, ct_tp_src);
2075
708
            FLOWMAP_SET(map, ct_tp_dst);
2076
708
            FLOWMAP_SET(map, tcp_flags);
2077
708
        }
2078
725
    } else if (eth_type_mpls(flow->dl_type)) {
2079
55
        FLOWMAP_SET(map, mpls_lse);
2080
542
    } else if (flow->dl_type == htons(ETH_TYPE_ARP) ||
2081
542
               flow->dl_type == htons(ETH_TYPE_RARP)) {
2082
74
        FLOWMAP_SET(map, nw_src);
2083
74
        FLOWMAP_SET(map, nw_dst);
2084
74
        FLOWMAP_SET(map, nw_proto);
2085
74
        FLOWMAP_SET(map, arp_sha);
2086
74
        FLOWMAP_SET(map, arp_tha);
2087
468
    } else if (flow->dl_type == htons(ETH_TYPE_NSH)) {
2088
97
        FLOWMAP_SET(map, nsh.flags);
2089
97
        FLOWMAP_SET(map, nsh.mdtype);
2090
97
        FLOWMAP_SET(map, nsh.np);
2091
97
        FLOWMAP_SET(map, nsh.path_hdr);
2092
97
        FLOWMAP_SET(map, nsh.context);
2093
97
    }
2094
2.03k
}
2095
2096
/* Clear the metadata and register wildcard masks. They are not packet
2097
 * header fields. */
2098
void
2099
flow_wildcards_clear_non_packet_fields(struct flow_wildcards *wc)
2100
0
{
2101
    /* Update this function whenever struct flow changes. */
2102
0
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
2103
2104
0
    memset(&wc->masks.metadata, 0, sizeof wc->masks.metadata);
2105
0
    memset(&wc->masks.regs, 0, sizeof wc->masks.regs);
2106
0
    wc->masks.actset_output = 0;
2107
0
    wc->masks.conj_id = 0;
2108
0
}
2109
2110
/* Returns true if 'wc' matches every packet, false if 'wc' fixes any bits or
2111
 * fields. */
2112
bool
2113
flow_wildcards_is_catchall(const struct flow_wildcards *wc)
2114
1.95k
{
2115
1.95k
    const uint64_t *wc_u64 = (const uint64_t *) &wc->masks;
2116
1.95k
    size_t i;
2117
2118
1.95k
    for (i = 0; i < FLOW_U64S; i++) {
2119
1.95k
        if (wc_u64[i]) {
2120
1.95k
            return false;
2121
1.95k
        }
2122
1.95k
    }
2123
0
    return true;
2124
1.95k
}
2125
2126
/* Sets 'dst' as the bitwise AND of wildcards in 'src1' and 'src2'.
2127
 * That is, a bit or a field is wildcarded in 'dst' if it is wildcarded
2128
 * in 'src1' or 'src2' or both.  */
2129
void
2130
flow_wildcards_and(struct flow_wildcards *dst,
2131
                   const struct flow_wildcards *src1,
2132
                   const struct flow_wildcards *src2)
2133
1.95k
{
2134
1.95k
    uint64_t *dst_u64 = (uint64_t *) &dst->masks;
2135
1.95k
    const uint64_t *src1_u64 = (const uint64_t *) &src1->masks;
2136
1.95k
    const uint64_t *src2_u64 = (const uint64_t *) &src2->masks;
2137
1.95k
    size_t i;
2138
2139
165k
    for (i = 0; i < FLOW_U64S; i++) {
2140
163k
        dst_u64[i] = src1_u64[i] & src2_u64[i];
2141
163k
    }
2142
1.95k
}
2143
2144
/* Sets 'dst' as the bitwise OR of wildcards in 'src1' and 'src2'.  That
2145
 * is, a bit or a field is wildcarded in 'dst' if it is neither
2146
 * wildcarded in 'src1' nor 'src2'. */
2147
void
2148
flow_wildcards_or(struct flow_wildcards *dst,
2149
                  const struct flow_wildcards *src1,
2150
                  const struct flow_wildcards *src2)
2151
0
{
2152
0
    uint64_t *dst_u64 = (uint64_t *) &dst->masks;
2153
0
    const uint64_t *src1_u64 = (const uint64_t *) &src1->masks;
2154
0
    const uint64_t *src2_u64 = (const uint64_t *) &src2->masks;
2155
0
    size_t i;
2156
2157
0
    for (i = 0; i < FLOW_U64S; i++) {
2158
0
        dst_u64[i] = src1_u64[i] | src2_u64[i];
2159
0
    }
2160
0
}
2161
2162
/* Returns a hash of the wildcards in 'wc'. */
2163
uint32_t
2164
flow_wildcards_hash(const struct flow_wildcards *wc, uint32_t basis)
2165
0
{
2166
0
    return flow_hash(&wc->masks, basis);
2167
0
}
2168
2169
/* Returns true if 'a' and 'b' represent the same wildcards, false if they are
2170
 * different. */
2171
bool
2172
flow_wildcards_equal(const struct flow_wildcards *a,
2173
                     const struct flow_wildcards *b)
2174
161k
{
2175
161k
    return flow_equal(&a->masks, &b->masks);
2176
161k
}
2177
2178
/* Returns true if at least one bit or field is wildcarded in 'a' but not in
2179
 * 'b', false otherwise. */
2180
bool
2181
flow_wildcards_has_extra(const struct flow_wildcards *a,
2182
                         const struct flow_wildcards *b)
2183
0
{
2184
0
    const uint64_t *a_u64 = (const uint64_t *) &a->masks;
2185
0
    const uint64_t *b_u64 = (const uint64_t *) &b->masks;
2186
0
    size_t i;
2187
2188
0
    for (i = 0; i < FLOW_U64S; i++) {
2189
0
        if ((a_u64[i] & b_u64[i]) != b_u64[i]) {
2190
0
            return true;
2191
0
        }
2192
0
    }
2193
0
    return false;
2194
0
}
2195
2196
/* Returns true if 'a' and 'b' are equal, except that 0-bits (wildcarded bits)
2197
 * in 'wc' do not need to be equal in 'a' and 'b'. */
2198
bool
2199
flow_equal_except(const struct flow *a, const struct flow *b,
2200
                  const struct flow_wildcards *wc)
2201
0
{
2202
0
    const uint64_t *a_u64 = (const uint64_t *) a;
2203
0
    const uint64_t *b_u64 = (const uint64_t *) b;
2204
0
    const uint64_t *wc_u64 = (const uint64_t *) &wc->masks;
2205
0
    size_t i;
2206
2207
0
    for (i = 0; i < FLOW_U64S; i++) {
2208
0
        if ((a_u64[i] ^ b_u64[i]) & wc_u64[i]) {
2209
0
            return false;
2210
0
        }
2211
0
    }
2212
0
    return true;
2213
0
}
2214
2215
/* Sets the wildcard mask for register 'idx' in 'wc' to 'mask'.
2216
 * (A 0-bit indicates a wildcard bit.) */
2217
void
2218
flow_wildcards_set_reg_mask(struct flow_wildcards *wc, int idx, uint32_t mask)
2219
48.6k
{
2220
48.6k
    wc->masks.regs[idx] = mask;
2221
48.6k
}
2222
2223
/* Sets the wildcard mask for register 'idx' in 'wc' to 'mask'.
2224
 * (A 0-bit indicates a wildcard bit.) */
2225
void
2226
flow_wildcards_set_xreg_mask(struct flow_wildcards *wc, int idx, uint64_t mask)
2227
18.7k
{
2228
18.7k
    flow_set_xreg(&wc->masks, idx, mask);
2229
18.7k
}
2230
2231
/* Sets the wildcard mask for register 'idx' in 'wc' to 'mask'.
2232
 * (A 0-bit indicates a wildcard bit.) */
2233
void
2234
flow_wildcards_set_xxreg_mask(struct flow_wildcards *wc, int idx,
2235
                              ovs_u128 mask)
2236
8.61k
{
2237
8.61k
    flow_set_xxreg(&wc->masks, idx, mask);
2238
8.61k
}
2239
2240
/* Calculates the 5-tuple hash from the given miniflow.
2241
 * This returns the same value as flow_hash_5tuple for the corresponding
2242
 * flow. */
2243
uint32_t
2244
miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis)
2245
1.95k
{
2246
1.95k
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
2247
1.95k
    uint32_t hash = basis;
2248
2249
1.95k
    if (flow) {
2250
1.95k
        ovs_be16 dl_type = MINIFLOW_GET_BE16(flow, dl_type);
2251
1.95k
        uint8_t nw_proto;
2252
2253
1.95k
        if (dl_type == htons(ETH_TYPE_IPV6)) {
2254
683
            struct flowmap map = FLOWMAP_EMPTY_INITIALIZER;
2255
683
            uint64_t value;
2256
2257
683
            FLOWMAP_SET(&map, ipv6_src);
2258
683
            FLOWMAP_SET(&map, ipv6_dst);
2259
2260
2.73k
            MINIFLOW_FOR_EACH_IN_FLOWMAP(value, flow, map) {
2261
2.73k
                hash = hash_add64(hash, value);
2262
2.73k
            }
2263
1.26k
        } else if (dl_type == htons(ETH_TYPE_IP)
2264
1.26k
                   || dl_type == htons(ETH_TYPE_ARP)) {
2265
785
            hash = hash_add(hash, MINIFLOW_GET_U32(flow, nw_src));
2266
785
            hash = hash_add(hash, MINIFLOW_GET_U32(flow, nw_dst));
2267
785
        } else {
2268
483
            goto out;
2269
483
        }
2270
2271
1.46k
        nw_proto = MINIFLOW_GET_U8(flow, nw_proto);
2272
1.46k
        hash = hash_add(hash, nw_proto);
2273
1.46k
        if (nw_proto != IPPROTO_TCP && nw_proto != IPPROTO_UDP
2274
1.46k
            && nw_proto != IPPROTO_SCTP && nw_proto != IPPROTO_ICMP
2275
1.46k
            && nw_proto != IPPROTO_ICMPV6) {
2276
586
            goto out;
2277
586
        }
2278
2279
        /* Add both ports at once. */
2280
882
        hash = hash_add(hash, (OVS_FORCE uint32_t) miniflow_get_ports(flow));
2281
882
    }
2282
1.95k
out:
2283
1.95k
    return hash_finish(hash, 42);
2284
1.95k
}
2285
2286
ASSERT_SEQUENTIAL_SAME_WORD(tp_src, tp_dst);
2287
ASSERT_SEQUENTIAL(ipv6_src, ipv6_dst);
2288
2289
/* Calculates the 5-tuple hash from the given flow. */
2290
uint32_t
2291
flow_hash_5tuple(const struct flow *flow, uint32_t basis)
2292
2.03k
{
2293
2.03k
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
2294
2.03k
    uint32_t hash = basis;
2295
2296
2.03k
    if (flow) {
2297
2298
2.03k
        if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2299
725
            const uint64_t *flow_u64 = (const uint64_t *)flow;
2300
725
            int ofs = offsetof(struct flow, ipv6_src) / 8;
2301
725
            int end = ofs + 2 * sizeof flow->ipv6_src / 8;
2302
2303
3.62k
            for (;ofs < end; ofs++) {
2304
2.90k
                hash = hash_add64(hash, flow_u64[ofs]);
2305
2.90k
            }
2306
1.31k
        } else if (flow->dl_type == htons(ETH_TYPE_IP)
2307
1.31k
                   || flow->dl_type == htons(ETH_TYPE_ARP)) {
2308
774
            hash = hash_add(hash, (OVS_FORCE uint32_t) flow->nw_src);
2309
774
            hash = hash_add(hash, (OVS_FORCE uint32_t) flow->nw_dst);
2310
774
        } else {
2311
538
            goto out;
2312
538
        }
2313
2314
1.49k
        hash = hash_add(hash, flow->nw_proto);
2315
1.49k
        if (flow->nw_proto != IPPROTO_TCP && flow->nw_proto != IPPROTO_UDP
2316
1.49k
            && flow->nw_proto != IPPROTO_SCTP && flow->nw_proto != IPPROTO_ICMP
2317
1.49k
            && flow->nw_proto != IPPROTO_ICMPV6) {
2318
613
            goto out;
2319
613
        }
2320
2321
        /* Add both ports at once. */
2322
886
        hash = hash_add(hash,
2323
886
                        ((const uint32_t *)flow)[offsetof(struct flow, tp_src)
2324
886
                                                 / sizeof(uint32_t)]);
2325
886
    }
2326
2.03k
out:
2327
2.03k
    return hash_finish(hash, 42); /* Arbitrary number. */
2328
2.03k
}
2329
2330
/* Hashes 'flow' based on its L2 through L4 protocol information. */
2331
uint32_t
2332
flow_hash_symmetric_l4(const struct flow *flow, uint32_t basis)
2333
4.07k
{
2334
4.07k
    struct {
2335
4.07k
        union {
2336
4.07k
            ovs_be32 ipv4_addr;
2337
4.07k
            struct in6_addr ipv6_addr;
2338
4.07k
        };
2339
4.07k
        ovs_be16 eth_type;
2340
4.07k
        ovs_be16 vlan_tci;
2341
4.07k
        ovs_be16 tp_port;
2342
4.07k
        struct eth_addr eth_addr;
2343
4.07k
        uint8_t ip_proto;
2344
4.07k
    } fields;
2345
2346
4.07k
    int i;
2347
2348
4.07k
    memset(&fields, 0, sizeof fields);
2349
16.2k
    for (i = 0; i < ARRAY_SIZE(fields.eth_addr.be16); i++) {
2350
12.2k
        fields.eth_addr.be16[i] = flow->dl_src.be16[i] ^ flow->dl_dst.be16[i];
2351
12.2k
    }
2352
12.2k
    for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
2353
8.14k
        fields.vlan_tci ^= flow->vlans[i].tci & htons(VLAN_VID_MASK);
2354
8.14k
    }
2355
4.07k
    fields.eth_type = flow->dl_type;
2356
2357
    /* UDP source and destination port are not taken into account because they
2358
     * will not necessarily be symmetric in a bidirectional flow. */
2359
4.07k
    if (fields.eth_type == htons(ETH_TYPE_IP)) {
2360
1.43k
        fields.ipv4_addr = flow->nw_src ^ flow->nw_dst;
2361
1.43k
        fields.ip_proto = flow->nw_proto;
2362
1.43k
        if (fields.ip_proto == IPPROTO_TCP || fields.ip_proto == IPPROTO_SCTP) {
2363
358
            fields.tp_port = flow->tp_src ^ flow->tp_dst;
2364
358
        }
2365
2.64k
    } else if (fields.eth_type == htons(ETH_TYPE_IPV6)) {
2366
1.45k
        const uint8_t *a = &flow->ipv6_src.s6_addr[0];
2367
1.45k
        const uint8_t *b = &flow->ipv6_dst.s6_addr[0];
2368
1.45k
        uint8_t *ipv6_addr = &fields.ipv6_addr.s6_addr[0];
2369
2370
24.6k
        for (i=0; i<16; i++) {
2371
23.2k
            ipv6_addr[i] = a[i] ^ b[i];
2372
23.2k
        }
2373
1.45k
        fields.ip_proto = flow->nw_proto;
2374
1.45k
        if (fields.ip_proto == IPPROTO_TCP || fields.ip_proto == IPPROTO_SCTP) {
2375
344
            fields.tp_port = flow->tp_src ^ flow->tp_dst;
2376
344
        }
2377
1.45k
    }
2378
4.07k
    return jhash_bytes(&fields, sizeof fields, basis);
2379
4.07k
}
2380
2381
/* Symmetrically Hashes non-IP 'flow' based on its L2 headers. */
2382
uint32_t
2383
flow_hash_symmetric_l2(const struct flow *flow, uint32_t basis)
2384
3.82k
{
2385
3.82k
    union {
2386
3.82k
        struct {
2387
3.82k
            ovs_be16 eth_type;
2388
3.82k
            ovs_be16 vlan_tci;
2389
3.82k
            struct eth_addr eth_addr;
2390
3.82k
            ovs_be16 pad;
2391
3.82k
        };
2392
3.82k
        uint32_t word[3];
2393
3.82k
    } fields;
2394
2395
3.82k
    uint32_t hash = basis;
2396
3.82k
    int i;
2397
2398
3.82k
    if (flow->packet_type != htonl(PT_ETH)) {
2399
        /* Cannot hash non-Ethernet flows */
2400
0
        return 0;
2401
0
    }
2402
2403
15.3k
    for (i = 0; i < ARRAY_SIZE(fields.eth_addr.be16); i++) {
2404
11.4k
        fields.eth_addr.be16[i] =
2405
11.4k
                flow->dl_src.be16[i] ^ flow->dl_dst.be16[i];
2406
11.4k
    }
2407
3.82k
    fields.vlan_tci = 0;
2408
11.4k
    for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
2409
7.65k
        fields.vlan_tci ^= flow->vlans[i].tci & htons(VLAN_VID_MASK);
2410
7.65k
    }
2411
3.82k
    fields.eth_type = flow->dl_type;
2412
3.82k
    fields.pad = 0;
2413
2414
3.82k
    hash = hash_add(hash, fields.word[0]);
2415
3.82k
    hash = hash_add(hash, fields.word[1]);
2416
3.82k
    hash = hash_add(hash, fields.word[2]);
2417
3.82k
    return hash_finish(hash, basis);
2418
3.82k
}
2419
2420
/* Hashes 'flow' based on its L3 through L4 protocol information */
2421
uint32_t
2422
flow_hash_symmetric_l3l4(const struct flow *flow, uint32_t basis,
2423
                         bool inc_udp_ports)
2424
6.11k
{
2425
6.11k
    uint32_t hash = basis;
2426
2427
    /* UDP source and destination port are also taken into account. */
2428
6.11k
    if (flow->dl_type == htons(ETH_TYPE_IP)) {
2429
2.14k
        hash = hash_add(hash,
2430
2.14k
                        (OVS_FORCE uint32_t) (flow->nw_src ^ flow->nw_dst));
2431
3.96k
    } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2432
        /* IPv6 addresses are 64-bit aligned inside struct flow. */
2433
2.17k
        const uint64_t *a = ALIGNED_CAST(uint64_t *, flow->ipv6_src.s6_addr);
2434
2.17k
        const uint64_t *b = ALIGNED_CAST(uint64_t *, flow->ipv6_dst.s6_addr);
2435
2436
6.52k
        for (int i = 0; i < sizeof flow->ipv6_src / sizeof *a; i++) {
2437
4.35k
            hash = hash_add64(hash, a[i] ^ b[i]);
2438
4.35k
        }
2439
2.17k
    } else {
2440
        /* Revert to hashing L2 headers */
2441
1.79k
        return flow_hash_symmetric_l2(flow, basis);
2442
1.79k
    }
2443
4.32k
    hash = hash_add(hash, flow->nw_proto);
2444
4.32k
    if (!(flow->nw_frag & FLOW_NW_FRAG_MASK)
2445
4.32k
        && (flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_SCTP ||
2446
3.33k
            (inc_udp_ports && flow->nw_proto == IPPROTO_UDP))) {
2447
1.11k
        hash = hash_add(hash,
2448
1.11k
                        (OVS_FORCE uint16_t) (flow->tp_src ^ flow->tp_dst));
2449
1.11k
    }
2450
2451
4.32k
    return hash_finish(hash, basis);
2452
6.11k
}
2453
2454
/* Hashes 'flow' based on its nw_dst and nw_src for multipath. */
2455
uint32_t
2456
flow_hash_symmetric_l3(const struct flow *flow, uint32_t basis)
2457
4.07k
{
2458
4.07k
    struct {
2459
4.07k
        union {
2460
4.07k
            ovs_be32 ipv4_addr;
2461
4.07k
            struct in6_addr ipv6_addr;
2462
4.07k
        };
2463
4.07k
        ovs_be16 eth_type;
2464
4.07k
    } fields;
2465
2466
4.07k
    int i;
2467
2468
4.07k
    memset(&fields, 0, sizeof fields);
2469
4.07k
    fields.eth_type = flow->dl_type;
2470
2471
4.07k
    if (fields.eth_type == htons(ETH_TYPE_IP)) {
2472
1.43k
        fields.ipv4_addr = flow->nw_src ^ flow->nw_dst;
2473
2.64k
    } else if (fields.eth_type == htons(ETH_TYPE_IPV6)) {
2474
1.45k
        const uint8_t *a = &flow->ipv6_src.s6_addr[0];
2475
1.45k
        const uint8_t *b = &flow->ipv6_dst.s6_addr[0];
2476
1.45k
        uint8_t *ipv6_addr = &fields.ipv6_addr.s6_addr[0];
2477
2478
24.6k
        for (i = 0; i < 16; i++) {
2479
23.2k
            ipv6_addr[i] = a[i] ^ b[i];
2480
23.2k
        }
2481
1.45k
    }
2482
4.07k
    return jhash_bytes(&fields, sizeof fields, basis);
2483
4.07k
}
2484
2485
/* Initialize a flow with random fields that matter for nx_hash_fields. */
2486
void
2487
flow_random_hash_fields(struct flow *flow)
2488
0
{
2489
0
    uint16_t rnd = random_uint16();
2490
0
    int i;
2491
2492
    /* Initialize to all zeros. */
2493
0
    memset(flow, 0, sizeof *flow);
2494
2495
0
    eth_addr_random(&flow->dl_src);
2496
0
    eth_addr_random(&flow->dl_dst);
2497
2498
0
    for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
2499
0
        uint16_t vlan = random_uint16() & VLAN_VID_MASK;
2500
0
        flow->vlans[i].tpid = htons(ETH_TYPE_VLAN_8021Q);
2501
0
        flow->vlans[i].tci = htons(vlan | VLAN_CFI);
2502
0
    }
2503
2504
    /* Make most of the random flows IPv4, some IPv6, and rest random. */
2505
0
    flow->dl_type = rnd < 0x8000 ? htons(ETH_TYPE_IP) :
2506
0
        rnd < 0xc000 ? htons(ETH_TYPE_IPV6) : (OVS_FORCE ovs_be16)rnd;
2507
2508
0
    if (dl_type_is_ip_any(flow->dl_type)) {
2509
0
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
2510
0
            flow->nw_src = (OVS_FORCE ovs_be32)random_uint32();
2511
0
            flow->nw_dst = (OVS_FORCE ovs_be32)random_uint32();
2512
0
        } else {
2513
0
            random_bytes(&flow->ipv6_src, sizeof flow->ipv6_src);
2514
0
            random_bytes(&flow->ipv6_dst, sizeof flow->ipv6_dst);
2515
0
        }
2516
        /* Make most of IP flows TCP, some UDP or SCTP, and rest random. */
2517
0
        rnd = random_uint16();
2518
0
        flow->nw_proto = rnd < 0x8000 ? IPPROTO_TCP :
2519
0
            rnd < 0xc000 ? IPPROTO_UDP :
2520
0
            rnd < 0xd000 ? IPPROTO_SCTP : (uint8_t)rnd;
2521
0
        if (flow->nw_proto == IPPROTO_TCP ||
2522
0
            flow->nw_proto == IPPROTO_UDP ||
2523
0
            flow->nw_proto == IPPROTO_SCTP) {
2524
0
            flow->tp_src = (OVS_FORCE ovs_be16)random_uint16();
2525
0
            flow->tp_dst = (OVS_FORCE ovs_be16)random_uint16();
2526
0
        }
2527
0
    }
2528
0
}
2529
2530
/* Masks the fields in 'wc' that are used by the flow hash 'fields'. */
2531
void
2532
flow_mask_hash_fields(const struct flow *flow, struct flow_wildcards *wc,
2533
                      enum nx_hash_fields fields)
2534
14.2k
{
2535
14.2k
    int i;
2536
14.2k
    switch (fields) {
2537
2.03k
    case NX_HASH_FIELDS_ETH_SRC:
2538
2.03k
        memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
2539
2.03k
        break;
2540
2541
2.03k
    case NX_HASH_FIELDS_SYMMETRIC_L4:
2542
2.03k
        memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
2543
2.03k
        memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
2544
2.03k
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
2545
715
            memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
2546
715
            memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
2547
1.32k
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2548
725
            memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src);
2549
725
            memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
2550
725
        }
2551
2.03k
        if (is_ip_any(flow)) {
2552
1.44k
            memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
2553
            /* Unwildcard port only for non-UDP packets as udp port
2554
             * numbers are not used in hash calculations.
2555
             */
2556
1.44k
            if (flow->nw_proto != IPPROTO_UDP) {
2557
1.15k
                flow_unwildcard_tp_ports(flow, wc);
2558
1.15k
            }
2559
1.44k
        }
2560
6.11k
        for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
2561
4.07k
            wc->masks.vlans[i].tci |= htons(VLAN_VID_MASK | VLAN_CFI);
2562
4.07k
        }
2563
2.03k
        break;
2564
2.03k
    case NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP:
2565
2.03k
        if (is_ip_any(flow) && flow->nw_proto == IPPROTO_UDP
2566
2.03k
            && !(flow->nw_frag & FLOW_NW_FRAG_MASK)) {
2567
281
            memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
2568
281
            memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
2569
281
        }
2570
        /* fall through */
2571
4.07k
    case NX_HASH_FIELDS_SYMMETRIC_L3L4:
2572
4.07k
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
2573
1.43k
            memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
2574
1.43k
            memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
2575
2.64k
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2576
1.45k
            memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src);
2577
1.45k
            memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
2578
1.45k
        } else {
2579
1.19k
            break; /* non-IP flow */
2580
1.19k
        }
2581
2.88k
        memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
2582
2.88k
        if ((flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_SCTP)
2583
2.88k
             && !(flow->nw_frag & FLOW_NW_FRAG_MASK)) {
2584
554
            memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
2585
554
            memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
2586
554
        }
2587
2.88k
        break;
2588
2589
2.03k
    case NX_HASH_FIELDS_NW_SRC:
2590
2.03k
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
2591
715
            memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
2592
1.32k
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2593
725
            memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src);
2594
725
        }
2595
2.03k
        break;
2596
2597
2.03k
    case NX_HASH_FIELDS_NW_DST:
2598
2.03k
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
2599
715
            memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
2600
1.32k
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2601
725
            memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
2602
725
        }
2603
2.03k
        break;
2604
2605
2.03k
    case NX_HASH_FIELDS_SYMMETRIC_L3:
2606
2.03k
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
2607
715
            memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
2608
715
            memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
2609
1.32k
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2610
725
            memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src);
2611
725
            memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
2612
725
        }
2613
2.03k
        break;
2614
2615
0
    default:
2616
0
        OVS_NOT_REACHED();
2617
14.2k
    }
2618
14.2k
}
2619
2620
/* Hashes the portions of 'flow' designated by 'fields'. */
2621
uint32_t
2622
flow_hash_fields(const struct flow *flow, enum nx_hash_fields fields,
2623
                 uint16_t basis)
2624
14.2k
{
2625
14.2k
    switch (fields) {
2626
2627
2.03k
    case NX_HASH_FIELDS_ETH_SRC:
2628
2.03k
        return jhash_bytes(&flow->dl_src, sizeof flow->dl_src, basis);
2629
2630
2.03k
    case NX_HASH_FIELDS_SYMMETRIC_L4:
2631
2.03k
        return flow_hash_symmetric_l4(flow, basis);
2632
2633
2.03k
    case NX_HASH_FIELDS_SYMMETRIC_L3L4:
2634
2.03k
        return flow_hash_symmetric_l3l4(flow, basis, false);
2635
2636
2.03k
    case NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP:
2637
2.03k
        return flow_hash_symmetric_l3l4(flow, basis, true);
2638
2639
2.03k
    case NX_HASH_FIELDS_NW_SRC:
2640
2.03k
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
2641
715
            return jhash_bytes(&flow->nw_src, sizeof flow->nw_src, basis);
2642
1.32k
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2643
725
            return jhash_bytes(&flow->ipv6_src, sizeof flow->ipv6_src, basis);
2644
725
        } else {
2645
597
            return basis;
2646
597
        }
2647
2648
2.03k
    case NX_HASH_FIELDS_NW_DST:
2649
2.03k
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
2650
715
            return jhash_bytes(&flow->nw_dst, sizeof flow->nw_dst, basis);
2651
1.32k
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2652
725
            return jhash_bytes(&flow->ipv6_dst, sizeof flow->ipv6_dst, basis);
2653
725
        } else {
2654
597
            return basis;
2655
597
        }
2656
2657
2.03k
    case NX_HASH_FIELDS_SYMMETRIC_L3:
2658
2.03k
        return flow_hash_symmetric_l3(flow, basis);
2659
14.2k
    }
2660
2661
14.2k
    OVS_NOT_REACHED();
2662
14.2k
}
2663
2664
/* Returns a string representation of 'fields'. */
2665
const char *
2666
flow_hash_fields_to_str(enum nx_hash_fields fields)
2667
8.67k
{
2668
8.67k
    switch (fields) {
2669
648
    case NX_HASH_FIELDS_ETH_SRC: return "eth_src";
2670
916
    case NX_HASH_FIELDS_SYMMETRIC_L4: return "symmetric_l4";
2671
1
    case NX_HASH_FIELDS_SYMMETRIC_L3L4: return "symmetric_l3l4";
2672
1
    case NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP: return "symmetric_l3l4+udp";
2673
4
    case NX_HASH_FIELDS_NW_SRC: return "nw_src";
2674
2.33k
    case NX_HASH_FIELDS_NW_DST: return "nw_dst";
2675
0
    case NX_HASH_FIELDS_SYMMETRIC_L3: return "symmetric_l3";
2676
4.77k
    default: return "<unknown>";
2677
8.67k
    }
2678
8.67k
}
2679
2680
/* Returns true if the value of 'fields' is supported. Otherwise false. */
2681
bool
2682
flow_hash_fields_valid(enum nx_hash_fields fields)
2683
19.2k
{
2684
19.2k
    return fields == NX_HASH_FIELDS_ETH_SRC
2685
19.2k
        || fields == NX_HASH_FIELDS_SYMMETRIC_L4
2686
19.2k
        || fields == NX_HASH_FIELDS_SYMMETRIC_L3L4
2687
19.2k
        || fields == NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP
2688
19.2k
        || fields == NX_HASH_FIELDS_NW_SRC
2689
19.2k
        || fields == NX_HASH_FIELDS_NW_DST
2690
19.2k
        || fields == NX_HASH_FIELDS_SYMMETRIC_L3;
2691
19.2k
}
2692
2693
/* Returns a hash value for the bits of 'flow' that are active based on
2694
 * 'wc', given 'basis'. */
2695
uint32_t
2696
flow_hash_in_wildcards(const struct flow *flow,
2697
                       const struct flow_wildcards *wc, uint32_t basis)
2698
0
{
2699
0
    const uint64_t *wc_u64 = (const uint64_t *) &wc->masks;
2700
0
    const uint64_t *flow_u64 = (const uint64_t *) flow;
2701
0
    uint32_t hash;
2702
0
    size_t i;
2703
2704
0
    hash = basis;
2705
0
    for (i = 0; i < FLOW_U64S; i++) {
2706
0
        hash = hash_add64(hash, flow_u64[i] & wc_u64[i]);
2707
0
    }
2708
0
    return hash_finish(hash, 8 * FLOW_U64S);
2709
0
}
2710
2711
/* Sets the VLAN VID that 'flow' matches to 'vid', which is interpreted as an
2712
 * OpenFlow 1.0 "dl_vlan" value:
2713
 *
2714
 *      - If it is in the range 0...4095, 'flow->vlans[0].tci' is set to match
2715
 *        that VLAN.  Any existing PCP match is unchanged (it becomes 0 if
2716
 *        'flow' previously matched packets without a VLAN header).
2717
 *
2718
 *      - If it is OFP_VLAN_NONE, 'flow->vlan_tci' is set to match a packet
2719
 *        without a VLAN tag.
2720
 *
2721
 *      - Other values of 'vid' should not be used. */
2722
void
2723
flow_set_dl_vlan(struct flow *flow, ovs_be16 vid, int id)
2724
805
{
2725
805
    if (vid == htons(OFP10_VLAN_NONE)) {
2726
207
        flow->vlans[id].tci = htons(0);
2727
598
    } else {
2728
598
        vid &= htons(VLAN_VID_MASK);
2729
598
        flow->vlans[id].tci &= ~htons(VLAN_VID_MASK);
2730
598
        flow->vlans[id].tci |= htons(VLAN_CFI) | vid;
2731
598
    }
2732
805
}
2733
2734
/* Sets the VLAN header TPID, which must be either ETH_TYPE_VLAN_8021Q or
2735
 * ETH_TYPE_VLAN_8021AD. */
2736
void
2737
flow_fix_vlan_tpid(struct flow *flow)
2738
0
{
2739
0
    if (flow->vlans[0].tpid == htons(0) && flow->vlans[0].tci != 0) {
2740
0
        flow->vlans[0].tpid = htons(ETH_TYPE_VLAN_8021Q);
2741
0
    }
2742
0
}
2743
2744
/* Sets the VLAN VID that 'flow' matches to 'vid', which is interpreted as an
2745
 * OpenFlow 1.2 "vlan_vid" value, that is, the low 13 bits of 'vlan_tci' (VID
2746
 * plus CFI). */
2747
void
2748
flow_set_vlan_vid(struct flow *flow, ovs_be16 vid)
2749
1.38k
{
2750
1.38k
    ovs_be16 mask = htons(VLAN_VID_MASK | VLAN_CFI);
2751
1.38k
    flow->vlans[0].tci &= ~mask;
2752
1.38k
    flow->vlans[0].tci |= vid & mask;
2753
1.38k
}
2754
2755
/* Sets the VLAN PCP that 'flow' matches to 'pcp', which should be in the
2756
 * range 0...7.
2757
 *
2758
 * This function has no effect on the VLAN ID that 'flow' matches.
2759
 *
2760
 * After calling this function, 'flow' will not match packets without a VLAN
2761
 * header. */
2762
void
2763
flow_set_vlan_pcp(struct flow *flow, uint8_t pcp, int id)
2764
968
{
2765
968
    pcp &= 0x07;
2766
968
    flow->vlans[id].tci &= ~htons(VLAN_PCP_MASK);
2767
968
    flow->vlans[id].tci |= htons((pcp << VLAN_PCP_SHIFT) | VLAN_CFI);
2768
968
}
2769
2770
/* Counts the number of VLAN headers. */
2771
int
2772
flow_count_vlan_headers(const struct flow *flow)
2773
19.7k
{
2774
19.7k
    int i;
2775
2776
25.6k
    for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
2777
24.8k
        if (!(flow->vlans[i].tci & htons(VLAN_CFI))) {
2778
19.0k
            break;
2779
19.0k
        }
2780
24.8k
    }
2781
19.7k
    return i;
2782
19.7k
}
2783
2784
/* Given '*p_an' and '*p_bn' pointing to one past the last VLAN header of
2785
 * 'a' and 'b' respectively, skip common VLANs so that they point to the
2786
 * first different VLAN counting from bottom. */
2787
void
2788
flow_skip_common_vlan_headers(const struct flow *a, int *p_an,
2789
                              const struct flow *b, int *p_bn)
2790
0
{
2791
0
    int an = *p_an, bn = *p_bn;
2792
2793
0
    for (an--, bn--; an >= 0 && bn >= 0; an--, bn--) {
2794
0
        if (a->vlans[an].qtag != b->vlans[bn].qtag) {
2795
0
            break;
2796
0
        }
2797
0
    }
2798
0
    *p_an = an;
2799
0
    *p_bn = bn;
2800
0
}
2801
2802
void
2803
flow_pop_vlan(struct flow *flow, struct flow_wildcards *wc)
2804
17.7k
{
2805
17.7k
    int n = flow_count_vlan_headers(flow);
2806
17.7k
    if (n > 1) {
2807
747
        if (wc) {
2808
0
            memset(&wc->masks.vlans[1], 0xff,
2809
0
                   sizeof(union flow_vlan_hdr) * (n - 1));
2810
0
        }
2811
747
        memmove(&flow->vlans[0], &flow->vlans[1],
2812
747
                sizeof(union flow_vlan_hdr) * (n - 1));
2813
747
    }
2814
17.7k
    if (n > 0) {
2815
5.00k
        memset(&flow->vlans[n - 1], 0, sizeof(union flow_vlan_hdr));
2816
5.00k
    }
2817
17.7k
}
2818
2819
void
2820
flow_push_vlan_uninit(struct flow *flow, struct flow_wildcards *wc)
2821
2.31k
{
2822
2.31k
    if (wc) {
2823
0
        int n = flow_count_vlan_headers(flow);
2824
0
        if (n) {
2825
0
            memset(wc->masks.vlans, 0xff, sizeof(union flow_vlan_hdr) * n);
2826
0
        }
2827
0
    }
2828
2.31k
    memmove(&flow->vlans[1], &flow->vlans[0],
2829
2.31k
            sizeof(union flow_vlan_hdr) * (FLOW_MAX_VLAN_HEADERS - 1));
2830
2.31k
    memset(&flow->vlans[0], 0, sizeof(union flow_vlan_hdr));
2831
2.31k
}
2832
2833
/* Returns the number of MPLS LSEs present in 'flow'
2834
 *
2835
 * Returns 0 if the 'dl_type' of 'flow' is not an MPLS ethernet type.
2836
 * Otherwise traverses 'flow''s MPLS label stack stopping at the
2837
 * first entry that has the BoS bit set. If no such entry exists then
2838
 * the maximum number of LSEs that can be stored in 'flow' is returned.
2839
 */
2840
int
2841
flow_count_mpls_labels(const struct flow *flow, struct flow_wildcards *wc)
2842
66
{
2843
    /* dl_type is always masked. */
2844
66
    if (eth_type_mpls(flow->dl_type)) {
2845
66
        int i;
2846
66
        int cnt;
2847
2848
66
        cnt = 0;
2849
259
        for (i = 0; i < FLOW_MAX_MPLS_LABELS; i++) {
2850
195
            if (wc) {
2851
0
                wc->masks.mpls_lse[i] |= htonl(MPLS_BOS_MASK);
2852
0
            }
2853
195
            if (flow->mpls_lse[i] & htonl(MPLS_BOS_MASK)) {
2854
2
                return i + 1;
2855
2
            }
2856
193
            if (flow->mpls_lse[i]) {
2857
43
                cnt++;
2858
43
            }
2859
193
        }
2860
64
        return cnt;
2861
66
    } else {
2862
0
        return 0;
2863
0
    }
2864
66
}
2865
2866
/* Returns the number consecutive of MPLS LSEs, starting at the
2867
 * innermost LSE, that are common in 'a' and 'b'.
2868
 *
2869
 * 'an' must be flow_count_mpls_labels(a).
2870
 * 'bn' must be flow_count_mpls_labels(b).
2871
 */
2872
int
2873
flow_count_common_mpls_labels(const struct flow *a, int an,
2874
                              const struct flow *b, int bn,
2875
                              struct flow_wildcards *wc)
2876
0
{
2877
0
    int min_n = MIN(an, bn);
2878
0
    if (min_n == 0) {
2879
0
        return 0;
2880
0
    } else {
2881
0
        int common_n = 0;
2882
0
        int a_last = an - 1;
2883
0
        int b_last = bn - 1;
2884
0
        int i;
2885
2886
0
        for (i = 0; i < min_n; i++) {
2887
0
            if (wc) {
2888
0
                wc->masks.mpls_lse[a_last - i] = OVS_BE32_MAX;
2889
0
                wc->masks.mpls_lse[b_last - i] = OVS_BE32_MAX;
2890
0
            }
2891
0
            if (a->mpls_lse[a_last - i] != b->mpls_lse[b_last - i]) {
2892
0
                break;
2893
0
            } else {
2894
0
                common_n++;
2895
0
            }
2896
0
        }
2897
2898
0
        return common_n;
2899
0
    }
2900
0
}
2901
2902
/* Adds a new outermost MPLS label to 'flow' and changes 'flow''s Ethernet type
2903
 * to 'mpls_eth_type', which must be an MPLS Ethertype.
2904
 *
2905
 * If the new label is the first MPLS label in 'flow', it is generated as;
2906
 *
2907
 *     - label: 2, if 'flow' is IPv6, otherwise 0.
2908
 *
2909
 *     - TTL: IPv4 or IPv6 TTL, if present and nonzero, otherwise 64.
2910
 *
2911
 *     - TC: IPv4 or IPv6 TOS, if present, otherwise 0.
2912
 *
2913
 *     - BoS: 1.
2914
 *
2915
 * If the new label is the second or later label MPLS label in 'flow', it is
2916
 * generated as;
2917
 *
2918
 *     - label: Copied from outer label.
2919
 *
2920
 *     - TTL: Copied from outer label.
2921
 *
2922
 *     - TC: Copied from outer label.
2923
 *
2924
 *     - BoS: 0.
2925
 *
2926
 * 'n' must be flow_count_mpls_labels(flow).  'n' must be less than
2927
 * FLOW_MAX_MPLS_LABELS (because otherwise flow->mpls_lse[] would overflow).
2928
 */
2929
void
2930
flow_push_mpls(struct flow *flow, int n, ovs_be16 mpls_eth_type,
2931
               struct flow_wildcards *wc, bool clear_flow_L3)
2932
0
{
2933
0
    ovs_assert(eth_type_mpls(mpls_eth_type));
2934
0
    ovs_assert(n < FLOW_MAX_MPLS_LABELS);
2935
2936
0
    if (n) {
2937
0
        int i;
2938
2939
0
        if (wc) {
2940
0
            memset(&wc->masks.mpls_lse, 0xff, sizeof *wc->masks.mpls_lse * n);
2941
0
        }
2942
0
        for (i = n; i >= 1; i--) {
2943
0
            flow->mpls_lse[i] = flow->mpls_lse[i - 1];
2944
0
        }
2945
0
        flow->mpls_lse[0] = (flow->mpls_lse[1] & htonl(~MPLS_BOS_MASK));
2946
0
    } else {
2947
0
        int label = 0;          /* IPv4 Explicit Null. */
2948
0
        int tc = 0;
2949
0
        int ttl = 64;
2950
2951
0
        if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2952
0
            label = 2;
2953
0
        }
2954
2955
0
        if (is_ip_any(flow)) {
2956
0
            tc = (flow->nw_tos & IP_DSCP_MASK) >> 2;
2957
0
            if (wc) {
2958
0
                wc->masks.nw_tos |= IP_DSCP_MASK;
2959
0
                wc->masks.nw_ttl = 0xff;
2960
0
            }
2961
2962
0
            if (flow->nw_ttl) {
2963
0
                ttl = flow->nw_ttl;
2964
0
            }
2965
0
        }
2966
2967
0
        flow->mpls_lse[0] = set_mpls_lse_values(ttl, tc, 1, htonl(label));
2968
2969
0
        if (clear_flow_L3) {
2970
            /* Clear all L3 and L4 fields and dp_hash. */
2971
0
            BUILD_ASSERT(FLOW_WC_SEQ == 42);
2972
0
            memset((char *) flow + FLOW_SEGMENT_2_ENDS_AT, 0,
2973
0
                   sizeof(struct flow) - FLOW_SEGMENT_2_ENDS_AT);
2974
0
            flow->dp_hash = 0;
2975
0
        }
2976
0
    }
2977
0
    flow->dl_type = mpls_eth_type;
2978
0
}
2979
2980
/* Tries to remove the outermost MPLS label from 'flow'.  Returns true if
2981
 * successful, false otherwise.  On success, sets 'flow''s Ethernet type to
2982
 * 'eth_type'.
2983
 *
2984
 * 'n' must be flow_count_mpls_labels(flow). */
2985
bool
2986
flow_pop_mpls(struct flow *flow, int n, ovs_be16 eth_type,
2987
              struct flow_wildcards *wc)
2988
0
{
2989
0
    int i;
2990
2991
0
    if (n == 0) {
2992
        /* Nothing to pop. */
2993
0
        return false;
2994
0
    } else if (n == FLOW_MAX_MPLS_LABELS) {
2995
0
        if (wc) {
2996
0
            wc->masks.mpls_lse[n - 1] |= htonl(MPLS_BOS_MASK);
2997
0
        }
2998
0
        if (!(flow->mpls_lse[n - 1] & htonl(MPLS_BOS_MASK))) {
2999
            /* Can't pop because don't know what to fill in mpls_lse[n - 1]. */
3000
0
            return false;
3001
0
        }
3002
0
    }
3003
3004
0
    if (wc) {
3005
0
        memset(&wc->masks.mpls_lse[1], 0xff,
3006
0
               sizeof *wc->masks.mpls_lse * (n - 1));
3007
0
    }
3008
0
    for (i = 1; i < n; i++) {
3009
0
        flow->mpls_lse[i - 1] = flow->mpls_lse[i];
3010
0
    }
3011
0
    flow->mpls_lse[n - 1] = 0;
3012
0
    flow->dl_type = eth_type;
3013
0
    return true;
3014
0
}
3015
3016
/* Sets the MPLS Label that 'flow' matches to 'label', which is interpreted
3017
 * as an OpenFlow 1.1 "mpls_label" value. */
3018
void
3019
flow_set_mpls_label(struct flow *flow, int idx, ovs_be32 label)
3020
936
{
3021
936
    set_mpls_lse_label(&flow->mpls_lse[idx], label);
3022
936
}
3023
3024
/* Sets the MPLS TTL that 'flow' matches to 'ttl', which should be in the
3025
 * range 0...255. */
3026
void
3027
flow_set_mpls_ttl(struct flow *flow, int idx, uint8_t ttl)
3028
2.56k
{
3029
2.56k
    set_mpls_lse_ttl(&flow->mpls_lse[idx], ttl);
3030
2.56k
}
3031
3032
/* Sets the MPLS TC that 'flow' matches to 'tc', which should be in the
3033
 * range 0...7. */
3034
void
3035
flow_set_mpls_tc(struct flow *flow, int idx, uint8_t tc)
3036
1.61k
{
3037
1.61k
    set_mpls_lse_tc(&flow->mpls_lse[idx], tc);
3038
1.61k
}
3039
3040
/* Sets the MPLS BOS bit that 'flow' matches to which should be 0 or 1. */
3041
void
3042
flow_set_mpls_bos(struct flow *flow, int idx, uint8_t bos)
3043
838
{
3044
838
    set_mpls_lse_bos(&flow->mpls_lse[idx], bos);
3045
838
}
3046
3047
/* Sets the entire MPLS LSE. */
3048
void
3049
flow_set_mpls_lse(struct flow *flow, int idx, ovs_be32 lse)
3050
0
{
3051
0
    flow->mpls_lse[idx] = lse;
3052
0
}
3053
3054
static void
3055
flow_compose_l7(struct dp_packet *p, const void *l7, size_t l7_len)
3056
0
{
3057
0
    if (l7_len) {
3058
0
        if (l7) {
3059
0
            dp_packet_put(p, l7, l7_len);
3060
0
        } else {
3061
0
            uint8_t *payload = dp_packet_put_uninit(p, l7_len);
3062
0
            for (size_t i = 0; i < l7_len; i++) {
3063
0
                payload[i] = i;
3064
0
            }
3065
0
        }
3066
0
    }
3067
0
}
3068
3069
static size_t
3070
flow_compose_l4(struct dp_packet *p, const struct flow *flow,
3071
                const void *l7, size_t l7_len)
3072
0
{
3073
0
    size_t orig_len = dp_packet_size(p);
3074
3075
0
    if (!(flow->nw_frag & FLOW_NW_FRAG_ANY)
3076
0
        || !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
3077
0
        if (flow->nw_proto == IPPROTO_TCP) {
3078
0
            struct tcp_header *tcp = dp_packet_put_zeros(p, sizeof *tcp);
3079
0
            tcp->tcp_src = flow->tp_src;
3080
0
            tcp->tcp_dst = flow->tp_dst;
3081
0
            tcp->tcp_ctl = TCP_CTL(ntohs(flow->tcp_flags), 5);
3082
0
            if (!(flow->tcp_flags & htons(TCP_SYN | TCP_FIN | TCP_RST))) {
3083
0
                flow_compose_l7(p, l7, l7_len);
3084
0
            }
3085
0
        } else if (flow->nw_proto == IPPROTO_UDP) {
3086
0
            struct udp_header *udp = dp_packet_put_zeros(p, sizeof *udp);
3087
0
            udp->udp_src = flow->tp_src;
3088
0
            udp->udp_dst = flow->tp_dst;
3089
0
            udp->udp_len = htons(sizeof *udp + l7_len);
3090
0
            flow_compose_l7(p, l7, l7_len);
3091
0
        } else if (flow->nw_proto == IPPROTO_SCTP) {
3092
0
            struct sctp_header *sctp = dp_packet_put_zeros(p, sizeof *sctp);
3093
0
            sctp->sctp_src = flow->tp_src;
3094
0
            sctp->sctp_dst = flow->tp_dst;
3095
            /* XXX Someone should figure out what L7 data to include. */
3096
0
        } else if (flow->nw_proto == IPPROTO_ICMP) {
3097
0
            struct icmp_header *icmp = dp_packet_put_zeros(p, sizeof *icmp);
3098
0
            icmp->icmp_type = ntohs(flow->tp_src);
3099
0
            icmp->icmp_code = ntohs(flow->tp_dst);
3100
0
            if ((icmp->icmp_type == ICMP4_ECHO_REQUEST ||
3101
0
                 icmp->icmp_type == ICMP4_ECHO_REPLY)
3102
0
                && icmp->icmp_code == 0) {
3103
0
                flow_compose_l7(p, l7, l7_len);
3104
0
            } else {
3105
                /* XXX Add inner IP packet for e.g. destination unreachable? */
3106
0
            }
3107
0
        } else if (flow->nw_proto == IPPROTO_IGMP) {
3108
0
            struct igmp_header *igmp = dp_packet_put_zeros(p, sizeof *igmp);
3109
0
            igmp->igmp_type = ntohs(flow->tp_src);
3110
0
            igmp->igmp_code = ntohs(flow->tp_dst);
3111
0
            put_16aligned_be32(&igmp->group, flow->igmp_group_ip4);
3112
0
        } else if (flow->nw_proto == IPPROTO_ICMPV6) {
3113
0
            struct icmp6_data_header *icmp6;
3114
3115
0
            icmp6 = dp_packet_put_zeros(p, sizeof *icmp6);
3116
0
            icmp6->icmp6_base.icmp6_type = ntohs(flow->tp_src);
3117
0
            icmp6->icmp6_base.icmp6_code = ntohs(flow->tp_dst);
3118
0
            put_16aligned_be32(icmp6->icmp6_data.be32, flow->igmp_group_ip4);
3119
3120
0
            if (icmp6->icmp6_base.icmp6_code == 0 &&
3121
0
                (icmp6->icmp6_base.icmp6_type == ND_NEIGHBOR_SOLICIT ||
3122
0
                 icmp6->icmp6_base.icmp6_type == ND_NEIGHBOR_ADVERT)) {
3123
0
                struct in6_addr *nd_target;
3124
0
                struct ovs_nd_lla_opt *lla_opt;
3125
3126
0
                nd_target = dp_packet_put_zeros(p, sizeof *nd_target);
3127
0
                *nd_target = flow->nd_target;
3128
3129
0
                if (!eth_addr_is_zero(flow->arp_sha)) {
3130
0
                    lla_opt = dp_packet_put_zeros(p, 8);
3131
0
                    lla_opt->len = 1;
3132
0
                    lla_opt->type = ND_OPT_SOURCE_LINKADDR;
3133
0
                    lla_opt->mac = flow->arp_sha;
3134
0
                }
3135
0
                if (!eth_addr_is_zero(flow->arp_tha)) {
3136
0
                    lla_opt = dp_packet_put_zeros(p, 8);
3137
0
                    lla_opt->len = 1;
3138
0
                    lla_opt->type = ND_OPT_TARGET_LINKADDR;
3139
0
                    lla_opt->mac = flow->arp_tha;
3140
0
                }
3141
0
            } else if (icmp6->icmp6_base.icmp6_code == 0 &&
3142
0
                       (icmp6->icmp6_base.icmp6_type == ICMP6_ECHO_REQUEST ||
3143
0
                        icmp6->icmp6_base.icmp6_type == ICMP6_ECHO_REPLY)) {
3144
0
                flow_compose_l7(p, l7, l7_len);
3145
0
            } else {
3146
                /* XXX Add inner IP packet for e.g. destination unreachable? */
3147
0
            }
3148
0
        }
3149
0
    }
3150
3151
0
    return dp_packet_size(p) - orig_len;
3152
0
}
3153
3154
static void
3155
flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
3156
                     uint32_t pseudo_hdr_csum)
3157
0
{
3158
0
    size_t l4_len = (char *) dp_packet_tail(p) - (char *) dp_packet_l4(p);
3159
3160
0
    if (!(flow->nw_frag & FLOW_NW_FRAG_ANY)
3161
0
        || !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
3162
0
        if (flow->nw_proto == IPPROTO_TCP) {
3163
0
            struct tcp_header *tcp = dp_packet_l4(p);
3164
3165
0
            tcp->tcp_csum = 0;
3166
0
            tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
3167
0
                                                      tcp, l4_len));
3168
0
        } else if (flow->nw_proto == IPPROTO_UDP) {
3169
0
            struct udp_header *udp = dp_packet_l4(p);
3170
3171
0
            udp->udp_csum = 0;
3172
0
            udp->udp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
3173
0
                                                      udp, l4_len));
3174
0
            if (!udp->udp_csum) {
3175
0
                udp->udp_csum = htons(0xffff);
3176
0
            }
3177
0
        } else if (flow->nw_proto == IPPROTO_ICMP) {
3178
0
            struct icmp_header *icmp = dp_packet_l4(p);
3179
3180
0
            icmp->icmp_csum = 0;
3181
0
            icmp->icmp_csum = csum(icmp, l4_len);
3182
0
        } else if (flow->nw_proto == IPPROTO_IGMP) {
3183
0
            struct igmp_header *igmp = dp_packet_l4(p);
3184
3185
0
            igmp->igmp_csum = 0;
3186
0
            igmp->igmp_csum = csum(igmp, l4_len);
3187
0
        } else if (flow->nw_proto == IPPROTO_ICMPV6) {
3188
0
            struct icmp6_data_header *icmp6 = dp_packet_l4(p);
3189
3190
0
            icmp6->icmp6_base.icmp6_cksum = 0;
3191
0
            icmp6->icmp6_base.icmp6_cksum =
3192
0
                csum_finish(csum_continue(pseudo_hdr_csum, icmp6, l4_len));
3193
0
        }
3194
0
    }
3195
0
}
3196
3197
/* Increase the size of packet composed by 'flow_compose_minimal'
3198
 * up to 'size' bytes.  Fixes all the required packet headers like
3199
 * ip/udp lengths and l3/l4 checksums.
3200
 *
3201
 * 'size' needs to be larger then the current packet size.  */
3202
void
3203
packet_expand(struct dp_packet *p, const struct flow *flow, size_t size)
3204
0
{
3205
0
    size_t extra_size;
3206
3207
0
    ovs_assert(size > dp_packet_size(p));
3208
3209
0
    extra_size = size - dp_packet_size(p);
3210
0
    dp_packet_put_zeros(p, extra_size);
3211
3212
0
    if (flow->dl_type == htons(FLOW_DL_TYPE_NONE)) {
3213
0
        struct eth_header *eth = dp_packet_eth(p);
3214
3215
0
        eth->eth_type = htons(dp_packet_size(p));
3216
0
    } else if (dl_type_is_ip_any(flow->dl_type)) {
3217
0
        uint32_t pseudo_hdr_csum;
3218
0
        size_t l4_len = (char *) dp_packet_tail(p) - (char *) dp_packet_l4(p);
3219
3220
0
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
3221
0
            struct ip_header *ip = dp_packet_l3(p);
3222
3223
0
            ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
3224
0
            ip->ip_csum = 0;
3225
0
            ip->ip_csum = csum(ip, sizeof *ip);
3226
3227
0
            pseudo_hdr_csum = packet_csum_pseudoheader(ip);
3228
0
        } else { /* ETH_TYPE_IPV6 */
3229
0
            struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(p);
3230
3231
0
            nh->ip6_plen = htons(l4_len);
3232
0
            pseudo_hdr_csum = packet_csum_pseudoheader6(nh);
3233
0
        }
3234
3235
0
        if ((!(flow->nw_frag & FLOW_NW_FRAG_ANY)
3236
0
             || !(flow->nw_frag & FLOW_NW_FRAG_LATER))
3237
0
            && flow->nw_proto == IPPROTO_UDP) {
3238
0
            struct udp_header *udp = dp_packet_l4(p);
3239
3240
0
            udp->udp_len = htons(l4_len + extra_size);
3241
0
        }
3242
0
        flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
3243
0
    }
3244
0
}
3245
3246
/* Puts into 'p' a packet that flow_extract() would parse as having the given
3247
 * 'flow'.
3248
 *
3249
 * (This is useful only for testing, obviously, and the packet isn't really
3250
 * valid.  Lots of fields are just zeroed.)
3251
 *
3252
 * For packets whose protocols can encapsulate arbitrary L7 payloads, 'l7' and
3253
 * 'l7_len' determine that payload:
3254
 *
3255
 *    - If 'l7_len' is zero, no payload is included.
3256
 *
3257
 *    - If 'l7_len' is nonzero and 'l7' is null, an arbitrary payload 'l7_len'
3258
 *      bytes long is included.
3259
 *
3260
 *    - If 'l7_len' is nonzero and 'l7' is nonnull, the payload is copied
3261
 *      from 'l7'. */
3262
void
3263
flow_compose(struct dp_packet *p, const struct flow *flow,
3264
             const void *l7, size_t l7_len)
3265
0
{
3266
    /* Add code to this function (or its callees) for emitting new fields or
3267
     * protocols.  (This isn't essential, so it can be skipped for initial
3268
     * testing.) */
3269
0
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
3270
3271
0
    uint32_t pseudo_hdr_csum;
3272
0
    size_t l4_len;
3273
3274
    /* eth_compose() sets l3 pointer and makes sure it is 32-bit aligned. */
3275
0
    eth_compose(p, flow->dl_dst, flow->dl_src, ntohs(flow->dl_type), 0);
3276
0
    if (flow->dl_type == htons(FLOW_DL_TYPE_NONE)) {
3277
0
        struct eth_header *eth = dp_packet_eth(p);
3278
0
        eth->eth_type = htons(dp_packet_size(p));
3279
0
        return;
3280
0
    }
3281
3282
0
    for (int encaps = FLOW_MAX_VLAN_HEADERS - 1; encaps >= 0; encaps--) {
3283
0
        if (flow->vlans[encaps].tci & htons(VLAN_CFI)) {
3284
0
            eth_push_vlan(p, flow->vlans[encaps].tpid,
3285
0
                          flow->vlans[encaps].tci);
3286
0
        }
3287
0
    }
3288
3289
0
    if (flow->dl_type == htons(ETH_TYPE_IP)) {
3290
0
        struct ip_header *ip;
3291
3292
0
        ip = dp_packet_put_zeros(p, sizeof *ip);
3293
0
        ip->ip_ihl_ver = IP_IHL_VER(5, 4);
3294
0
        ip->ip_tos = flow->nw_tos;
3295
0
        ip->ip_ttl = flow->nw_ttl;
3296
0
        ip->ip_proto = flow->nw_proto;
3297
0
        put_16aligned_be32(&ip->ip_src, flow->nw_src);
3298
0
        put_16aligned_be32(&ip->ip_dst, flow->nw_dst);
3299
3300
0
        if (flow->nw_frag & FLOW_NW_FRAG_ANY) {
3301
0
            ip->ip_frag_off |= htons(IP_MORE_FRAGMENTS);
3302
0
            if (flow->nw_frag & FLOW_NW_FRAG_LATER) {
3303
0
                ip->ip_frag_off |= htons(100);
3304
0
            }
3305
0
        }
3306
3307
0
        dp_packet_set_l4(p, dp_packet_tail(p));
3308
3309
0
        l4_len = flow_compose_l4(p, flow, l7, l7_len);
3310
3311
0
        ip = dp_packet_l3(p);
3312
0
        ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
3313
        /* Checksum has already been zeroed by put_zeros call. */
3314
0
        ip->ip_csum = csum(ip, sizeof *ip);
3315
3316
0
        pseudo_hdr_csum = packet_csum_pseudoheader(ip);
3317
0
        flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
3318
0
    } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
3319
0
        struct ovs_16aligned_ip6_hdr *nh;
3320
3321
0
        nh = dp_packet_put_zeros(p, sizeof *nh);
3322
0
        put_16aligned_be32(&nh->ip6_flow, htonl(6 << 28) |
3323
0
                           htonl(flow->nw_tos << 20) | flow->ipv6_label);
3324
0
        nh->ip6_hlim = flow->nw_ttl;
3325
0
        nh->ip6_nxt = flow->nw_proto;
3326
3327
0
        memcpy(&nh->ip6_src, &flow->ipv6_src, sizeof(nh->ip6_src));
3328
0
        memcpy(&nh->ip6_dst, &flow->ipv6_dst, sizeof(nh->ip6_dst));
3329
3330
0
        dp_packet_set_l4(p, dp_packet_tail(p));
3331
3332
0
        l4_len = flow_compose_l4(p, flow, l7, l7_len);
3333
3334
0
        nh = dp_packet_l3(p);
3335
0
        nh->ip6_plen = htons(l4_len);
3336
3337
0
        pseudo_hdr_csum = packet_csum_pseudoheader6(nh);
3338
0
        flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
3339
0
    } else if (flow->dl_type == htons(ETH_TYPE_ARP) ||
3340
0
               flow->dl_type == htons(ETH_TYPE_RARP)) {
3341
0
        struct arp_eth_header *arp;
3342
3343
0
        arp = dp_packet_put_zeros(p, sizeof *arp);
3344
0
        dp_packet_set_l3(p, arp);
3345
0
        arp->ar_hrd = htons(1);
3346
0
        arp->ar_pro = htons(ETH_TYPE_IP);
3347
0
        arp->ar_hln = ETH_ADDR_LEN;
3348
0
        arp->ar_pln = 4;
3349
0
        arp->ar_op = htons(flow->nw_proto);
3350
3351
0
        if (flow->nw_proto == ARP_OP_REQUEST ||
3352
0
            flow->nw_proto == ARP_OP_REPLY) {
3353
0
            put_16aligned_be32(&arp->ar_spa, flow->nw_src);
3354
0
            put_16aligned_be32(&arp->ar_tpa, flow->nw_dst);
3355
0
            arp->ar_sha = flow->arp_sha;
3356
0
            arp->ar_tha = flow->arp_tha;
3357
0
        }
3358
0
    }
3359
3360
0
    if (eth_type_mpls(flow->dl_type)) {
3361
0
        int n;
3362
3363
0
        p->l2_5_ofs = p->l3_ofs;
3364
0
        for (n = 1; n < FLOW_MAX_MPLS_LABELS; n++) {
3365
0
            if (flow->mpls_lse[n - 1] & htonl(MPLS_BOS_MASK)) {
3366
0
                break;
3367
0
            }
3368
0
        }
3369
0
        while (n > 0) {
3370
0
            push_mpls(p, flow->dl_type, flow->mpls_lse[--n]);
3371
0
        }
3372
0
    }
3373
0
}
3374

3375
/* Compressed flow. */
3376
3377
/* Completes an initialization of 'dst' as a miniflow copy of 'src' begun by
3378
 * the caller.  The caller must have already computed 'dst->map' properly to
3379
 * indicate the significant uint64_t elements of 'src'.
3380
 *
3381
 * Normally the significant elements are the ones that are non-zero.  However,
3382
 * when a miniflow is initialized from a (mini)mask, the values can be zeroes,
3383
 * so that the flow and mask always have the same maps. */
3384
void
3385
miniflow_init(struct miniflow *dst, const struct flow *src)
3386
194k
{
3387
194k
    uint64_t *dst_u64 = miniflow_values(dst);
3388
194k
    size_t idx;
3389
3390
764k
    FLOWMAP_FOR_EACH_INDEX(idx, dst->map) {
3391
764k
        *dst_u64++ = flow_u64_value(src, idx);
3392
764k
    }
3393
194k
}
3394
3395
/* Initialize the maps of 'flow' from 'src'. */
3396
void
3397
miniflow_map_init(struct miniflow *flow, const struct flow *src)
3398
106k
{
3399
    /* Initialize map, counting the number of nonzero elements. */
3400
106k
    flowmap_init(&flow->map);
3401
9.01M
    for (size_t i = 0; i < FLOW_U64S; i++) {
3402
8.90M
        if (flow_u64_value(src, i)) {
3403
432k
            flowmap_set(&flow->map, i, 1);
3404
432k
        }
3405
8.90M
    }
3406
106k
}
3407
3408
/* Allocates 'n' count of miniflows, consecutive in memory, initializing the
3409
 * map of each from 'src'.
3410
 * Returns the size of the miniflow data. */
3411
size_t
3412
miniflow_alloc(struct miniflow *dsts[], size_t n, const struct miniflow *src)
3413
107k
{
3414
107k
    size_t n_values = miniflow_n_values(src);
3415
107k
    size_t data_size = MINIFLOW_VALUES_SIZE(n_values);
3416
107k
    struct miniflow *dst = xmalloc(n * (sizeof *src + data_size));
3417
107k
    size_t i;
3418
3419
107k
    COVERAGE_INC(miniflow_malloc);
3420
3421
304k
    for (i = 0; i < n; i++) {
3422
196k
        *dst = *src;   /* Copy maps. */
3423
196k
        dsts[i] = dst;
3424
196k
        dst += 1;      /* Just past the maps. */
3425
196k
        dst = (struct miniflow *)((uint64_t *)dst + n_values); /* Skip data. */
3426
196k
    }
3427
107k
    return data_size;
3428
107k
}
3429
3430
/* Returns a miniflow copy of 'src'.  The caller must eventually free() the
3431
 * returned miniflow. */
3432
struct miniflow *
3433
miniflow_create(const struct flow *src)
3434
17.5k
{
3435
17.5k
    struct miniflow tmp;
3436
17.5k
    struct miniflow *dst;
3437
3438
17.5k
    miniflow_map_init(&tmp, src);
3439
3440
17.5k
    miniflow_alloc(&dst, 1, &tmp);
3441
17.5k
    miniflow_init(dst, src);
3442
17.5k
    return dst;
3443
17.5k
}
3444
3445
/* Initializes 'dst' as a copy of 'src'.  The caller must have allocated
3446
 * 'dst' to have inline space for 'n_values' data in 'src'. */
3447
void
3448
miniflow_clone(struct miniflow *dst, const struct miniflow *src,
3449
               size_t n_values)
3450
1.95k
{
3451
1.95k
    *dst = *src;   /* Copy maps. */
3452
1.95k
    memcpy(miniflow_values(dst), miniflow_get_values(src),
3453
1.95k
           MINIFLOW_VALUES_SIZE(n_values));
3454
1.95k
}
3455
3456
/* Initializes 'dst' as a copy of 'src'. */
3457
void
3458
miniflow_expand(const struct miniflow *src, struct flow *dst)
3459
323k
{
3460
323k
    memset(dst, 0, sizeof *dst);
3461
323k
    flow_union_with_miniflow(dst, src);
3462
323k
}
3463
3464
/* Returns true if 'a' and 'b' are equal miniflows, false otherwise. */
3465
bool
3466
miniflow_equal(const struct miniflow *a, const struct miniflow *b)
3467
3.90k
{
3468
3.90k
    const uint64_t *ap = miniflow_get_values(a);
3469
3.90k
    const uint64_t *bp = miniflow_get_values(b);
3470
3471
    /* This is mostly called after a matching hash, so it is highly likely that
3472
     * the maps are equal as well. */
3473
3.90k
    if (OVS_LIKELY(flowmap_equal(a->map, b->map))) {
3474
3.90k
        return !memcmp(ap, bp, miniflow_n_values(a) * sizeof *ap);
3475
3.90k
    } else {
3476
0
        size_t idx;
3477
3478
0
        FLOWMAP_FOR_EACH_INDEX (idx, flowmap_or(a->map, b->map)) {
3479
0
            if ((flowmap_is_set(&a->map, idx) ? *ap++ : 0)
3480
0
                != (flowmap_is_set(&b->map, idx) ? *bp++ : 0)) {
3481
0
                return false;
3482
0
            }
3483
0
        }
3484
0
    }
3485
3486
0
    return true;
3487
3.90k
}
3488
3489
/* Returns false if 'a' and 'b' differ at the places where there are 1-bits
3490
 * in 'mask', true otherwise. */
3491
bool
3492
miniflow_equal_in_minimask(const struct miniflow *a, const struct miniflow *b,
3493
                           const struct minimask *mask)
3494
3.90k
{
3495
3.90k
    const uint64_t *p = miniflow_get_values(&mask->masks);
3496
3.90k
    size_t idx;
3497
3498
32.3k
    FLOWMAP_FOR_EACH_INDEX(idx, mask->masks.map) {
3499
32.3k
        if ((miniflow_get(a, idx) ^ miniflow_get(b, idx)) & *p++) {
3500
1.95k
            return false;
3501
1.95k
        }
3502
32.3k
    }
3503
3504
1.95k
    return true;
3505
3.90k
}
3506
3507
/* Returns true if 'a' and 'b' are equal at the places where there are 1-bits
3508
 * in 'mask', false if they differ. */
3509
bool
3510
miniflow_equal_flow_in_minimask(const struct miniflow *a, const struct flow *b,
3511
                                const struct minimask *mask)
3512
3.90k
{
3513
3.90k
    const uint64_t *p = miniflow_get_values(&mask->masks);
3514
3.90k
    size_t idx;
3515
3516
32.3k
    FLOWMAP_FOR_EACH_INDEX(idx, mask->masks.map) {
3517
32.3k
        if ((miniflow_get(a, idx) ^ flow_u64_value(b, idx)) & *p++) {
3518
1.95k
            return false;
3519
1.95k
        }
3520
32.3k
    }
3521
3522
1.95k
    return true;
3523
3.90k
}
3524
3525

3526
void
3527
minimask_init(struct minimask *mask, const struct flow_wildcards *wc)
3528
88.5k
{
3529
88.5k
    miniflow_init(&mask->masks, &wc->masks);
3530
88.5k
}
3531
3532
/* Returns a minimask copy of 'wc'.  The caller must eventually free the
3533
 * returned minimask with free(). */
3534
struct minimask *
3535
minimask_create(const struct flow_wildcards *wc)
3536
13.6k
{
3537
13.6k
    return (struct minimask *)miniflow_create(&wc->masks);
3538
13.6k
}
3539
3540
/* Initializes 'dst_' as the bit-wise "and" of 'a_' and 'b_'.
3541
 *
3542
 * The caller must provide room for FLOW_U64S "uint64_t"s in 'storage', which
3543
 * must follow '*dst_' in memory, for use by 'dst_'.  The caller must *not*
3544
 * free 'dst_' free(). */
3545
void
3546
minimask_combine(struct minimask *dst_,
3547
                 const struct minimask *a_, const struct minimask *b_,
3548
                 uint64_t storage[FLOW_U64S])
3549
3.90k
{
3550
3.90k
    struct miniflow *dst = &dst_->masks;
3551
3.90k
    uint64_t *dst_values = storage;
3552
3.90k
    const struct miniflow *a = &a_->masks;
3553
3.90k
    const struct miniflow *b = &b_->masks;
3554
3.90k
    size_t idx;
3555
3556
3.90k
    flowmap_init(&dst->map);
3557
3558
3.90k
    FLOWMAP_FOR_EACH_INDEX(idx, flowmap_and(a->map, b->map)) {
3559
        /* Both 'a' and 'b' have non-zero data at 'idx'. */
3560
0
        uint64_t mask = *miniflow_get__(a, idx) & *miniflow_get__(b, idx);
3561
3562
0
        if (mask) {
3563
0
            flowmap_set(&dst->map, idx, 1);
3564
0
            *dst_values++ = mask;
3565
0
        }
3566
0
    }
3567
3.90k
}
3568
3569
/* Initializes 'wc' as a copy of 'mask'. */
3570
void
3571
minimask_expand(const struct minimask *mask, struct flow_wildcards *wc)
3572
100k
{
3573
100k
    miniflow_expand(&mask->masks, &wc->masks);
3574
100k
}
3575
3576
/* Returns true if 'a' and 'b' are the same flow mask, false otherwise.
3577
 * Minimasks may not have zero data values, so for the minimasks to be the
3578
 * same, they need to have the same map and the same data values. */
3579
bool
3580
minimask_equal(const struct minimask *a, const struct minimask *b)
3581
0
{
3582
    /* At first glance, it might seem that this can be reasonably optimized
3583
     * into a single memcmp() for the total size of the region.  Such an
3584
     * optimization will work OK with most implementations of memcmp() that
3585
     * proceed from the start of the regions to be compared to the end in
3586
     * reasonably sized chunks.  However, memcmp() is not required to be
3587
     * implemented that way, and an implementation that, for example, compares
3588
     * all of the bytes in both regions without early exit when it finds a
3589
     * difference, or one that compares, say, 64 bytes at a time, could access
3590
     * an unmapped region of memory if minimasks 'a' and 'b' have different
3591
     * lengths.  By first checking that the maps are the same with the first
3592
     * memcmp(), we verify that 'a' and 'b' have the same length and therefore
3593
     * ensure that the second memcmp() is safe. */
3594
0
    return (!memcmp(a, b, sizeof *a)
3595
0
            && !memcmp(a + 1, b + 1,
3596
0
                       MINIFLOW_VALUES_SIZE(miniflow_n_values(&a->masks))));
3597
0
}
3598
3599
/* Returns true if at least one bit matched by 'b' is wildcarded by 'a',
3600
 * false otherwise. */
3601
bool
3602
minimask_has_extra(const struct minimask *a, const struct minimask *b)
3603
7.74k
{
3604
7.74k
    const uint64_t *bp = miniflow_get_values(&b->masks);
3605
7.74k
    size_t idx;
3606
3607
23.3k
    FLOWMAP_FOR_EACH_INDEX(idx, b->masks.map) {
3608
23.3k
        uint64_t b_u64 = *bp++;
3609
3610
        /* 'b_u64' is non-zero, check if the data in 'a' is either zero
3611
         * or misses some of the bits in 'b_u64'. */
3612
23.3k
        if (!MINIFLOW_IN_MAP(&a->masks, idx)
3613
23.3k
            || ((*miniflow_get__(&a->masks, idx) & b_u64) != b_u64)) {
3614
3.84k
            return true; /* 'a' wildcards some bits 'b' doesn't. */
3615
3.84k
        }
3616
23.3k
    }
3617
3618
3.90k
    return false;
3619
7.74k
}
3620
3621
void
3622
flow_limit_vlans(int vlan_limit)
3623
0
{
3624
0
    if (vlan_limit <= 0) {
3625
0
        flow_vlan_limit = FLOW_MAX_VLAN_HEADERS;
3626
0
    } else {
3627
0
        flow_vlan_limit = MIN(vlan_limit, FLOW_MAX_VLAN_HEADERS);
3628
0
    }
3629
0
}
3630
3631
struct netdev *
3632
flow_get_tunnel_netdev(struct flow_tnl *tunnel)
3633
0
{
3634
0
    char iface[IFNAMSIZ];
3635
0
    struct in6_addr ip6;
3636
0
    struct in6_addr gw;
3637
3638
0
    if (tunnel->ip_src) {
3639
0
        in6_addr_set_mapped_ipv4(&ip6, tunnel->ip_src);
3640
0
    } else if (ipv6_addr_is_set(&tunnel->ipv6_src)) {
3641
0
        ip6 = tunnel->ipv6_src;
3642
0
    } else {
3643
0
        return NULL;
3644
0
    }
3645
3646
0
    if (!ovs_router_lookup(0, &ip6, iface, NULL, &gw)) {
3647
0
        return NULL;
3648
0
    }
3649
3650
0
    return netdev_from_name(iface);
3651
0
}