Coverage Report

Created: 2025-07-01 06:51

/src/openvswitch/lib/flow.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2017, 2019 Nicira, Inc.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at:
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
#include <config.h>
17
#include <sys/types.h>
18
#include "flow.h"
19
#include <errno.h>
20
#include <inttypes.h>
21
#include <limits.h>
22
#include <net/if.h>
23
#include <netinet/in.h>
24
#include <netinet/icmp6.h>
25
#include <netinet/ip6.h>
26
#include <stdint.h>
27
#include <stdlib.h>
28
#include <string.h>
29
#include "byte-order.h"
30
#include "colors.h"
31
#include "coverage.h"
32
#include "csum.h"
33
#include "openvswitch/dynamic-string.h"
34
#include "hash.h"
35
#include "jhash.h"
36
#include "openvswitch/match.h"
37
#include "dp-packet.h"
38
#include "openflow/openflow.h"
39
#include "packets.h"
40
#include "odp-util.h"
41
#include "random.h"
42
#include "unaligned.h"
43
#include "util.h"
44
#include "openvswitch/nsh.h"
45
#include "ovs-router.h"
46
#include "lib/netdev-provider.h"
47
#include "openvswitch/vlog.h"
48
49
VLOG_DEFINE_THIS_MODULE(flow);
50
51
COVERAGE_DEFINE(flow_extract);
52
COVERAGE_DEFINE(miniflow_extract_ipv4_pkt_len_error);
53
COVERAGE_DEFINE(miniflow_extract_ipv4_pkt_too_short);
54
COVERAGE_DEFINE(miniflow_extract_ipv6_pkt_len_error);
55
COVERAGE_DEFINE(miniflow_extract_ipv6_pkt_too_short);
56
COVERAGE_DEFINE(miniflow_malloc);
57
58
/* U64 indices for segmented flow classification. */
59
const uint8_t flow_segment_u64s[4] = {
60
    FLOW_SEGMENT_1_ENDS_AT / sizeof(uint64_t),
61
    FLOW_SEGMENT_2_ENDS_AT / sizeof(uint64_t),
62
    FLOW_SEGMENT_3_ENDS_AT / sizeof(uint64_t),
63
    FLOW_U64S
64
};
65
66
int flow_vlan_limit = FLOW_MAX_VLAN_HEADERS;
67
68
/* Asserts that field 'f1' follows immediately after 'f0' in struct flow,
69
 * without any intervening padding. */
70
#define ASSERT_SEQUENTIAL(f0, f1)                       \
71
40.6k
    BUILD_ASSERT_DECL(offsetof(struct flow, f0)         \
72
40.6k
                      + MEMBER_SIZEOF(struct flow, f0)  \
73
40.6k
                      == offsetof(struct flow, f1))
74
75
/* Asserts that fields 'f0' and 'f1' are in the same 32-bit aligned word within
76
 * struct flow. */
77
#define ASSERT_SAME_WORD(f0, f1)                        \
78
    BUILD_ASSERT_DECL(offsetof(struct flow, f0) / 4     \
79
                      == offsetof(struct flow, f1) / 4)
80
81
/* Asserts that 'f0' and 'f1' are both sequential and within the same 32-bit
82
 * aligned word in struct flow. */
83
#define ASSERT_SEQUENTIAL_SAME_WORD(f0, f1)     \
84
    ASSERT_SEQUENTIAL(f0, f1);                  \
85
    ASSERT_SAME_WORD(f0, f1)
86
87
/* miniflow_extract() assumes the following to be true to optimize the
88
 * extraction process. */
89
ASSERT_SEQUENTIAL_SAME_WORD(nw_frag, nw_tos);
90
ASSERT_SEQUENTIAL_SAME_WORD(nw_tos, nw_ttl);
91
ASSERT_SEQUENTIAL_SAME_WORD(nw_ttl, nw_proto);
92
93
/* TCP flags in the middle of a BE64, zeroes in the other half. */
94
BUILD_ASSERT_DECL(offsetof(struct flow, tcp_flags) % 8 == 4);
95
96
#if WORDS_BIGENDIAN
97
#define TCP_FLAGS_BE32(tcp_ctl) ((OVS_FORCE ovs_be32)TCP_FLAGS_BE16(tcp_ctl) \
98
                                 << 16)
99
#else
100
#define TCP_FLAGS_BE32(tcp_ctl) ((OVS_FORCE ovs_be32)TCP_FLAGS_BE16(tcp_ctl))
101
#endif
102
103
ASSERT_SEQUENTIAL_SAME_WORD(tp_src, tp_dst);
104
105
/* Removes 'size' bytes from the head end of '*datap', of size '*sizep', which
106
 * must contain at least 'size' bytes of data.  Returns the first byte of data
107
 * removed. */
108
static inline const void *
109
data_pull(const void **datap, size_t *sizep, size_t size)
110
96.8k
{
111
96.8k
    const char *data = *datap;
112
96.8k
    *datap = data + size;
113
96.8k
    *sizep -= size;
114
96.8k
    return data;
115
96.8k
}
116
117
/* If '*datap' has at least 'size' bytes of data, removes that many bytes from
118
 * the head end of '*datap' and returns the first byte removed.  Otherwise,
119
 * returns a null pointer without modifying '*datap'. */
120
static inline const void *
121
data_try_pull(const void **datap, size_t *sizep, size_t size)
122
13.9k
{
123
13.9k
    return OVS_LIKELY(*sizep >= size) ? data_pull(datap, sizep, size) : NULL;
124
13.9k
}
125
126
/* Context for pushing data to a miniflow. */
127
struct mf_ctx {
128
    struct flowmap map;
129
    uint64_t *data;
130
    uint64_t * const end;
131
};
132
133
/* miniflow_push_* macros allow filling in a miniflow data values in order.
134
 * Assertions are needed only when the layout of the struct flow is modified.
135
 * 'ofs' is a compile-time constant, which allows most of the code be optimized
136
 * away.  Some GCC versions gave warnings on ALWAYS_INLINE, so these are
137
 * defined as macros. */
138
139
#if (FLOW_WC_SEQ != 42)
140
#define MINIFLOW_ASSERT(X) ovs_assert(X)
141
BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime "
142
               "assertions enabled. Consider updating FLOW_WC_SEQ after "
143
               "testing")
144
#else
145
#define MINIFLOW_ASSERT(X)
146
#endif
147
148
/* True if 'IDX' and higher bits are not set. */
149
549k
#define ASSERT_FLOWMAP_NOT_SET(FM, IDX)                                 \
150
549k
{                                                                       \
151
549k
    MINIFLOW_ASSERT(!((FM)->bits[(IDX) / MAP_T_BITS] &                  \
152
549k
                      (MAP_MAX << ((IDX) % MAP_T_BITS))));              \
153
924k
    for (size_t i = (IDX) / MAP_T_BITS + 1; i < FLOWMAP_UNITS; i++) {   \
154
374k
        MINIFLOW_ASSERT(!(FM)->bits[i]);                                \
155
374k
    }                                                                   \
156
549k
}
157
158
#define miniflow_set_map(MF, OFS)            \
159
190k
    {                                        \
160
190k
    ASSERT_FLOWMAP_NOT_SET(&MF.map, (OFS));  \
161
190k
    flowmap_set(&MF.map, (OFS), 1);          \
162
190k
}
163
164
#define miniflow_assert_in_map(MF, OFS)              \
165
288k
    MINIFLOW_ASSERT(flowmap_is_set(&MF.map, (OFS))); \
166
288k
    ASSERT_FLOWMAP_NOT_SET(&MF.map, (OFS) + 1)
167
168
#define miniflow_push_uint64_(MF, OFS, VALUE)              \
169
{                                                          \
170
    MINIFLOW_ASSERT(MF.data < MF.end && (OFS) % 8 == 0);   \
171
    *MF.data++ = VALUE;                                    \
172
    miniflow_set_map(MF, OFS / 8);                         \
173
}
174
175
#define miniflow_push_be64_(MF, OFS, VALUE)                     \
176
    miniflow_push_uint64_(MF, OFS, (OVS_FORCE uint64_t)(VALUE))
177
178
#define miniflow_push_uint32_(MF, OFS, VALUE)   \
179
251k
    {                                           \
180
251k
    MINIFLOW_ASSERT(MF.data < MF.end);          \
181
251k
                                                \
182
251k
    if ((OFS) % 8 == 0) {                       \
183
88.3k
        miniflow_set_map(MF, OFS / 8);          \
184
88.3k
        *(uint32_t *)MF.data = VALUE;           \
185
163k
    } else if ((OFS) % 8 == 4) {                \
186
163k
        miniflow_assert_in_map(MF, OFS / 8);    \
187
163k
        *((uint32_t *)MF.data + 1) = VALUE;     \
188
163k
        MF.data++;                              \
189
163k
    }                                           \
190
251k
}
191
192
#define miniflow_push_be32_(MF, OFS, VALUE)                     \
193
119k
    miniflow_push_uint32_(MF, OFS, (OVS_FORCE uint32_t)(VALUE))
194
195
99.8k
#define miniflow_push_uint16_(MF, OFS, VALUE)   \
196
99.8k
{                                               \
197
99.8k
    MINIFLOW_ASSERT(MF.data < MF.end);          \
198
99.8k
                                                \
199
99.8k
    if ((OFS) % 8 == 0) {                       \
200
16.9k
        miniflow_set_map(MF, OFS / 8);          \
201
16.9k
        *(uint16_t *)MF.data = VALUE;           \
202
82.9k
    } else if ((OFS) % 8 == 2) {                \
203
16.9k
        miniflow_assert_in_map(MF, OFS / 8);    \
204
16.9k
        *((uint16_t *)MF.data + 1) = VALUE;     \
205
65.9k
    } else if ((OFS) % 8 == 4) {                \
206
51.3k
        miniflow_assert_in_map(MF, OFS / 8);    \
207
51.3k
        *((uint16_t *)MF.data + 2) = VALUE;     \
208
51.3k
    } else if ((OFS) % 8 == 6) {                \
209
14.6k
        miniflow_assert_in_map(MF, OFS / 8);    \
210
14.6k
        *((uint16_t *)MF.data + 3) = VALUE;     \
211
14.6k
        MF.data++;                              \
212
14.6k
    }                                           \
213
99.8k
}
214
215
0
#define miniflow_push_uint8_(MF, OFS, VALUE)            \
216
0
{                                                       \
217
0
    MINIFLOW_ASSERT(MF.data < MF.end);                  \
218
0
                                                        \
219
0
    if ((OFS) % 8 == 0) {                               \
220
0
        miniflow_set_map(MF, OFS / 8);                  \
221
0
        *(uint8_t *)MF.data = VALUE;                    \
222
0
    } else if ((OFS) % 8 == 7) {                        \
223
0
        miniflow_assert_in_map(MF, OFS / 8);            \
224
0
        *((uint8_t *)MF.data + 7) = VALUE;              \
225
0
        MF.data++;                                      \
226
0
    } else {                                            \
227
0
        miniflow_assert_in_map(MF, OFS / 8);            \
228
0
        *((uint8_t *)MF.data + ((OFS) % 8)) = VALUE;    \
229
0
    }                                                   \
230
0
}
231
232
41.8k
#define miniflow_pad_to_64_(MF, OFS)                            \
233
41.8k
{                                                               \
234
41.8k
    MINIFLOW_ASSERT((OFS) % 8 != 0);                            \
235
41.8k
    miniflow_assert_in_map(MF, OFS / 8);                        \
236
41.8k
                                                                \
237
41.8k
    memset((uint8_t *)MF.data + (OFS) % 8, 0, 8 - (OFS) % 8);   \
238
41.8k
    MF.data++;                                                  \
239
41.8k
}
240
241
85.3k
#define miniflow_pad_from_64_(MF, OFS)                          \
242
85.3k
{                                                               \
243
85.3k
    MINIFLOW_ASSERT(MF.data < MF.end);                          \
244
85.3k
                                                                \
245
85.3k
    MINIFLOW_ASSERT((OFS) % 8 != 0);                            \
246
85.3k
    miniflow_set_map(MF, OFS / 8);                              \
247
85.3k
                                                                \
248
85.3k
    memset((uint8_t *)MF.data, 0, (OFS) % 8);                   \
249
85.3k
}
250
251
#define miniflow_push_be16_(MF, OFS, VALUE)                     \
252
99.8k
    miniflow_push_uint16_(MF, OFS, (OVS_FORCE uint16_t)VALUE);
253
254
#define miniflow_push_be8_(MF, OFS, VALUE)                     \
255
    miniflow_push_uint8_(MF, OFS, (OVS_FORCE uint8_t)VALUE);
256
257
70.4k
#define miniflow_set_maps(MF, OFS, N_WORDS)                     \
258
70.4k
{                                                               \
259
70.4k
    size_t ofs = (OFS);                                         \
260
70.4k
    size_t n_words = (N_WORDS);                                 \
261
70.4k
                                                                \
262
70.4k
    MINIFLOW_ASSERT(n_words && MF.data + n_words <= MF.end);    \
263
70.4k
    ASSERT_FLOWMAP_NOT_SET(&MF.map, ofs);                       \
264
70.4k
    flowmap_set(&MF.map, ofs, n_words);                         \
265
70.4k
}
266
267
/* Data at 'valuep' may be unaligned. */
268
39.8k
#define miniflow_push_words_(MF, OFS, VALUEP, N_WORDS)          \
269
39.8k
{                                                               \
270
39.8k
    MINIFLOW_ASSERT((OFS) % 8 == 0);                            \
271
39.8k
    miniflow_set_maps(MF, (OFS) / 8, (N_WORDS));                \
272
39.8k
    memcpy(MF.data, (VALUEP), (N_WORDS) * sizeof *MF.data);     \
273
39.8k
    MF.data += (N_WORDS);                                       \
274
39.8k
}
275
276
/* Push 32-bit words padded to 64-bits. */
277
1.18k
#define miniflow_push_words_32_(MF, OFS, VALUEP, N_WORDS)               \
278
1.18k
{                                                                       \
279
1.18k
    miniflow_set_maps(MF, (OFS) / 8, DIV_ROUND_UP(N_WORDS, 2));         \
280
1.18k
    memcpy(MF.data, (VALUEP), (N_WORDS) * sizeof(uint32_t));            \
281
1.18k
    MF.data += DIV_ROUND_UP(N_WORDS, 2);                                \
282
1.18k
    if ((N_WORDS) & 1) {                                                \
283
682
        *((uint32_t *)MF.data - 1) = 0;                                 \
284
682
    }                                                                   \
285
1.18k
}
286
287
/* Data at 'valuep' may be unaligned. */
288
/* MACs start 64-aligned, and must be followed by other data or padding. */
289
29.4k
#define miniflow_push_macs_(MF, OFS, VALUEP)                    \
290
29.4k
{                                                               \
291
29.4k
    miniflow_set_maps(MF, (OFS) / 8, 2);                        \
292
29.4k
    memcpy(MF.data, (VALUEP), 2 * ETH_ADDR_LEN);                \
293
29.4k
    MF.data += 1;                   /* First word only. */      \
294
29.4k
}
295
296
#define miniflow_push_uint32(MF, FIELD, VALUE)                      \
297
132k
    miniflow_push_uint32_(MF, offsetof(struct flow, FIELD), VALUE)
298
299
#define miniflow_push_be32(MF, FIELD, VALUE)                        \
300
119k
    miniflow_push_be32_(MF, offsetof(struct flow, FIELD), VALUE)
301
302
#define miniflow_push_uint16(MF, FIELD, VALUE)                      \
303
0
    miniflow_push_uint16_(MF, offsetof(struct flow, FIELD), VALUE)
304
305
#define miniflow_push_be16(MF, FIELD, VALUE)                        \
306
99.8k
    miniflow_push_be16_(MF, offsetof(struct flow, FIELD), VALUE)
307
308
#define miniflow_push_uint8(MF, FIELD, VALUE)                      \
309
0
    miniflow_push_uint8_(MF, offsetof(struct flow, FIELD), VALUE)
310
311
#define miniflow_pad_to_64(MF, FIELD)                       \
312
41.8k
    miniflow_pad_to_64_(MF, OFFSETOFEND(struct flow, FIELD))
313
314
#define miniflow_pad_from_64(MF, FIELD)                       \
315
85.3k
    miniflow_pad_from_64_(MF, offsetof(struct flow, FIELD))
316
317
#define miniflow_push_words(MF, FIELD, VALUEP, N_WORDS)                 \
318
39.8k
    miniflow_push_words_(MF, offsetof(struct flow, FIELD), VALUEP, N_WORDS)
319
320
#define miniflow_push_words_32(MF, FIELD, VALUEP, N_WORDS)              \
321
1.18k
    miniflow_push_words_32_(MF, offsetof(struct flow, FIELD), VALUEP, N_WORDS)
322
323
#define miniflow_push_macs(MF, FIELD, VALUEP)                       \
324
29.4k
    miniflow_push_macs_(MF, offsetof(struct flow, FIELD), VALUEP)
325
326
/* Return the pointer to the miniflow data when called BEFORE the corresponding
327
 * push. */
328
#define miniflow_pointer(MF, FIELD)                                     \
329
0
    (void *)((uint8_t *)MF.data + ((offsetof(struct flow, FIELD)) % 8))
330
331
/* Pulls the MPLS headers at '*datap' and returns the count of them. */
332
static inline int
333
parse_mpls(const void **datap, size_t *sizep)
334
490
{
335
490
    const struct mpls_hdr *mh;
336
490
    int count = 0;
337
338
1.55k
    while ((mh = data_try_pull(datap, sizep, sizeof *mh))) {
339
1.18k
        count++;
340
1.18k
        if (mh->mpls_lse.lo & htons(1 << MPLS_BOS_SHIFT)) {
341
113
            break;
342
113
        }
343
1.18k
    }
344
490
    return MIN(count, FLOW_MAX_MPLS_LABELS);
345
490
}
346
347
/* passed vlan_hdrs arg must be at least size FLOW_MAX_VLAN_HEADERS. */
348
static inline ALWAYS_INLINE size_t
349
parse_vlan(const void **datap, size_t *sizep, union flow_vlan_hdr *vlan_hdrs)
350
28.5k
{
351
28.5k
    const ovs_be16 *eth_type;
352
353
28.5k
    data_pull(datap, sizep, ETH_ADDR_LEN * 2);
354
355
28.5k
    eth_type = *datap;
356
357
28.5k
    size_t n;
358
29.6k
    for (n = 0; eth_type_vlan(*eth_type) && n < flow_vlan_limit; n++) {
359
1.07k
        if (OVS_UNLIKELY(*sizep < sizeof(ovs_be32) + sizeof(ovs_be16))) {
360
2
            break;
361
2
        }
362
363
1.07k
        memset(vlan_hdrs + n, 0, sizeof(union flow_vlan_hdr));
364
1.07k
        const ovs_16aligned_be32 *qp = data_pull(datap, sizep, sizeof *qp);
365
1.07k
        vlan_hdrs[n].qtag = get_16aligned_be32(qp);
366
1.07k
        vlan_hdrs[n].tci |= htons(VLAN_CFI);
367
1.07k
        eth_type = *datap;
368
1.07k
    }
369
28.5k
    return n;
370
28.5k
}
371
372
static inline ALWAYS_INLINE ovs_be16
373
parse_ethertype(const void **datap, size_t *sizep)
374
28.5k
{
375
28.5k
    const struct llc_snap_header *llc;
376
28.5k
    ovs_be16 proto;
377
378
28.5k
    proto = *(ovs_be16 *) data_pull(datap, sizep, sizeof proto);
379
28.5k
    if (OVS_LIKELY(ntohs(proto) >= ETH_TYPE_MIN)) {
380
26.5k
        return proto;
381
26.5k
    }
382
383
2.01k
    if (OVS_UNLIKELY(*sizep < sizeof *llc)) {
384
186
        return htons(FLOW_DL_TYPE_NONE);
385
186
    }
386
387
1.83k
    llc = *datap;
388
1.83k
    if (OVS_UNLIKELY(llc->llc.llc_dsap != LLC_DSAP_SNAP
389
1.83k
                     || llc->llc.llc_ssap != LLC_SSAP_SNAP
390
1.83k
                     || llc->llc.llc_cntl != LLC_CNTL_SNAP
391
1.83k
                     || memcmp(llc->snap.snap_org, SNAP_ORG_ETHERNET,
392
1.83k
                               sizeof llc->snap.snap_org))) {
393
1.61k
        return htons(FLOW_DL_TYPE_NONE);
394
1.61k
    }
395
396
216
    data_pull(datap, sizep, sizeof *llc);
397
398
216
    if (OVS_LIKELY(ntohs(llc->snap.snap_type) >= ETH_TYPE_MIN)) {
399
20
        return llc->snap.snap_type;
400
20
    }
401
402
196
    return htons(FLOW_DL_TYPE_NONE);
403
216
}
404
405
static inline bool
406
icmp6_is_nd(const struct icmp6_data_header *icmp6)
407
919
{
408
919
    return (icmp6->icmp6_base.icmp6_code == 0 &&
409
919
            (icmp6->icmp6_base.icmp6_type == ND_NEIGHBOR_SOLICIT ||
410
836
             icmp6->icmp6_base.icmp6_type == ND_NEIGHBOR_ADVERT));
411
919
}
412
413
/* Returns 'true' if the packet is an ND packet. In that case the '*nd_target'
414
 * and 'arp_buf[]' are filled in.  If the packet is not an ND packet, 'false'
415
 * is returned and no values are filled in on '*nd_target' or 'arp_buf[]'. */
416
static inline bool
417
parse_icmpv6(const void **datap, size_t *sizep,
418
             const struct icmp6_data_header *icmp6,
419
             ovs_be32 *rso_flags,
420
             const union ovs_16aligned_in6_addr **nd_target,
421
             struct eth_addr arp_buf[2], uint8_t *opt_type)
422
919
{
423
919
    if (!icmp6_is_nd(icmp6)) {
424
154
        return false;
425
154
    }
426
427
765
    arp_buf[0] = eth_addr_zero;
428
765
    arp_buf[1] = eth_addr_zero;
429
765
    *opt_type = 0;
430
431
765
    *rso_flags = get_16aligned_be32(icmp6->icmp6_data.be32);
432
433
765
    *nd_target = data_try_pull(datap, sizep, sizeof **nd_target);
434
765
    if (OVS_UNLIKELY(!*nd_target)) {
435
54
        return true;
436
54
    }
437
438
1.67k
    while (*sizep >= 8) {
439
        /* The minimum size of an option is 8 bytes, which also is
440
         * the size of Ethernet link-layer options. */
441
1.26k
        const struct ovs_nd_lla_opt *lla_opt = *datap;
442
1.26k
        int opt_len = lla_opt->len * ND_LLA_OPT_LEN;
443
444
1.26k
        if (!opt_len || opt_len > *sizep) {
445
280
            return true;
446
280
        }
447
448
        /* Store the link layer address if the appropriate option is
449
         * provided.  It is considered an error if the same link
450
         * layer option is specified twice. */
451
989
        if (lla_opt->type == ND_OPT_SOURCE_LINKADDR && opt_len == 8) {
452
435
            if (OVS_LIKELY(eth_addr_is_zero(arp_buf[0]))) {
453
412
                arp_buf[0] = lla_opt->mac;
454
                /* We use only first option type present in ND packet. */
455
412
                if (*opt_type == 0) {
456
41
                    *opt_type = lla_opt->type;
457
41
                }
458
412
            } else {
459
23
                goto invalid;
460
23
            }
461
554
        } else if (lla_opt->type == ND_OPT_TARGET_LINKADDR && opt_len == 8) {
462
389
            if (OVS_LIKELY(eth_addr_is_zero(arp_buf[1]))) {
463
386
                arp_buf[1] = lla_opt->mac;
464
                /* We use only first option type present in ND packet. */
465
386
                if (*opt_type == 0) {
466
378
                    *opt_type = lla_opt->type;
467
378
                }
468
386
            } else {
469
3
                goto invalid;
470
3
            }
471
389
        }
472
473
963
        if (OVS_UNLIKELY(!data_try_pull(datap, sizep, opt_len))) {
474
0
            return true;
475
0
        }
476
963
    }
477
405
    return true;
478
479
26
invalid:
480
26
    *nd_target = NULL;
481
26
    arp_buf[0] = eth_addr_zero;
482
26
    arp_buf[1] = eth_addr_zero;
483
26
    return true;
484
711
}
485
486
static inline bool
487
parse_ipv6_ext_hdrs__(const void **datap, size_t *sizep, uint8_t *nw_proto,
488
                      uint8_t *nw_frag,
489
                      const struct ovs_16aligned_ip6_frag **frag_hdr,
490
                      const struct ip6_rt_hdr **rt_hdr)
491
13.5k
{
492
13.5k
    if (frag_hdr) {
493
0
        *frag_hdr = NULL;
494
0
    }
495
496
13.5k
    if (rt_hdr) {
497
0
        *rt_hdr = NULL;
498
0
    }
499
500
22.3k
    while (1) {
501
22.3k
        if (OVS_LIKELY((*nw_proto != IPPROTO_HOPOPTS)
502
22.3k
                       && (*nw_proto != IPPROTO_ROUTING)
503
22.3k
                       && (*nw_proto != IPPROTO_DSTOPTS)
504
22.3k
                       && (*nw_proto != IPPROTO_AH)
505
22.3k
                       && (*nw_proto != IPPROTO_FRAGMENT))) {
506
            /* It's either a terminal header (e.g., TCP, UDP) or one we
507
             * don't understand.  In either case, we're done with the
508
             * packet, so use it to fill in 'nw_proto'. */
509
11.9k
            return true;
510
11.9k
        }
511
512
        /* We only verify that at least 8 bytes of the next header are
513
         * available, but many of these headers are longer.  Ensure that
514
         * accesses within the extension header are within those first 8
515
         * bytes. All extension headers are required to be at least 8
516
         * bytes. */
517
10.4k
        if (OVS_UNLIKELY(*sizep < 8)) {
518
185
            return false;
519
185
        }
520
521
10.2k
        if ((*nw_proto == IPPROTO_HOPOPTS)
522
10.2k
            || (*nw_proto == IPPROTO_DSTOPTS)) {
523
            /* These headers, while different, have the fields we care
524
             * about in the same location and with the same
525
             * interpretation. */
526
9.51k
            const struct ip6_ext *ext_hdr = *datap;
527
9.51k
            *nw_proto = ext_hdr->ip6e_nxt;
528
9.51k
            if (OVS_UNLIKELY(!data_try_pull(datap, sizep,
529
9.51k
                                            (ext_hdr->ip6e_len + 1) * 8))) {
530
1.14k
                return false;
531
1.14k
            }
532
9.51k
        } else if (*nw_proto == IPPROTO_ROUTING) {
533
319
            const struct ip6_rt_hdr *tmp;
534
319
            if (!rt_hdr) {
535
307
                rt_hdr = &tmp;
536
307
            }
537
538
319
            *rt_hdr = *datap;
539
319
            *nw_proto = (*rt_hdr)->nexthdr;
540
319
            if (OVS_UNLIKELY(!data_try_pull(datap, sizep,
541
319
                                            ((*rt_hdr)->hdrlen + 1) * 8))) {
542
204
                return false;
543
204
            }
544
399
        } else if (*nw_proto == IPPROTO_AH) {
545
            /* A standard AH definition isn't available, but the fields
546
             * we care about are in the same location as the generic
547
             * option header--only the header length is calculated
548
             * differently. */
549
117
            const struct ip6_ext *ext_hdr = *datap;
550
117
            *nw_proto = ext_hdr->ip6e_nxt;
551
117
            if (OVS_UNLIKELY(!data_try_pull(datap, sizep,
552
117
                                            (ext_hdr->ip6e_len + 2) * 4))) {
553
19
                return false;
554
19
            }
555
282
        } else if (*nw_proto == IPPROTO_FRAGMENT) {
556
282
            const struct ovs_16aligned_ip6_frag *tmp;
557
282
            if (!frag_hdr) {
558
103
                frag_hdr = &tmp;
559
103
            }
560
561
282
            *frag_hdr = *datap;
562
563
282
            *nw_proto = (*frag_hdr)->ip6f_nxt;
564
282
            if (!data_try_pull(datap, sizep, sizeof **frag_hdr)) {
565
0
                return false;
566
0
            }
567
568
            /* We only process the first fragment. */
569
282
            if ((*frag_hdr)->ip6f_offlg != htons(0)) {
570
54
                *nw_frag = FLOW_NW_FRAG_ANY;
571
54
                if (((*frag_hdr)->ip6f_offlg & IP6F_OFF_MASK) != htons(0)) {
572
43
                    *nw_frag |= FLOW_NW_FRAG_LATER;
573
43
                    *nw_proto = IPPROTO_FRAGMENT;
574
43
                    return true;
575
43
                }
576
54
            }
577
282
        }
578
10.2k
    }
579
13.5k
}
580
581
/* Parses IPv6 extension headers until a terminal header (or header we
582
 * don't understand) is found.  'datap' points to the first extension
583
 * header and advances as parsing occurs; 'sizep' is the remaining size
584
 * and is decreased accordingly.  'nw_proto' starts as the first
585
 * extension header to process and is updated as the extension headers
586
 * are parsed.
587
 *
588
 * If a fragment header is found, '*frag_hdr' is set to the fragment
589
 * header and otherwise set to NULL.  If it is the first fragment,
590
 * extension header parsing otherwise continues as usual.  If it's not
591
 * the first fragment, 'nw_proto' is set to IPPROTO_FRAGMENT and 'nw_frag'
592
 * has FLOW_NW_FRAG_LATER set.  Both first and later fragments have
593
 * FLOW_NW_FRAG_ANY set in 'nw_frag'.
594
 *
595
 * If a routing header is found, '*rt_hdr' is set to the routing
596
 * header and otherwise set to NULL.
597
 *
598
 * A return value of false indicates that there was a problem parsing
599
 * the extension headers.*/
600
bool
601
parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto,
602
                    uint8_t *nw_frag,
603
                    const struct ovs_16aligned_ip6_frag **frag_hdr,
604
                    const struct ip6_rt_hdr **rt_hdr)
605
13.5k
{
606
13.5k
    return parse_ipv6_ext_hdrs__(datap, sizep, nw_proto, nw_frag,
607
13.5k
                                 frag_hdr, rt_hdr);
608
13.5k
}
609
610
bool
611
parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh *key)
612
6.77k
{
613
6.77k
    const struct nsh_hdr *nsh = (const struct nsh_hdr *) *datap;
614
6.77k
    uint8_t version, length, flags, ttl;
615
616
    /* Check if it is long enough for NSH header, doesn't support
617
     * MD type 2 yet
618
     */
619
6.77k
    if (OVS_UNLIKELY(*sizep < NSH_BASE_HDR_LEN)) {
620
348
        return false;
621
348
    }
622
623
6.42k
    version = nsh_get_ver(nsh);
624
6.42k
    flags = nsh_get_flags(nsh);
625
6.42k
    length = nsh_hdr_len(nsh);
626
6.42k
    ttl = nsh_get_ttl(nsh);
627
628
6.42k
    if (OVS_UNLIKELY(length > *sizep || version != 0)) {
629
1.42k
        return false;
630
1.42k
    }
631
632
4.99k
    key->flags = flags;
633
4.99k
    key->ttl = ttl;
634
4.99k
    key->mdtype = nsh->md_type;
635
4.99k
    key->np = nsh->next_proto;
636
4.99k
    key->path_hdr = nsh_get_path_hdr(nsh);
637
638
4.99k
    switch (key->mdtype) {
639
663
        case NSH_M_TYPE1:
640
663
            if (length != NSH_M_TYPE1_LEN) {
641
507
                return false;
642
507
            }
643
780
            for (size_t i = 0; i < 4; i++) {
644
624
                key->context[i] = get_16aligned_be32(&nsh->md1.context[i]);
645
624
            }
646
156
            break;
647
91
        case NSH_M_TYPE2:
648
            /* Don't support MD type 2 metedata parsing yet */
649
91
            if (length < NSH_BASE_HDR_LEN) {
650
67
                return false;
651
67
            }
652
653
24
            memset(key->context, 0, sizeof(key->context));
654
24
            break;
655
4.24k
        default:
656
            /* We don't parse other context headers yet. */
657
4.24k
            memset(key->context, 0, sizeof(key->context));
658
4.24k
            break;
659
4.99k
    }
660
661
4.42k
    data_pull(datap, sizep, length);
662
663
4.42k
    return true;
664
4.99k
}
665
666
/* This does the same thing as miniflow_extract() with a full-size 'flow' as
667
 * the destination. */
668
void
669
flow_extract(struct dp_packet *packet, struct flow *flow)
670
66.1k
{
671
66.1k
    struct {
672
66.1k
        struct miniflow mf;
673
66.1k
        uint64_t buf[FLOW_U64S];
674
66.1k
    } m;
675
676
66.1k
    COVERAGE_INC(flow_extract);
677
678
66.1k
    miniflow_extract(packet, &m.mf);
679
66.1k
    miniflow_expand(&m.mf, flow);
680
66.1k
}
681
682
static inline bool
683
ipv4_sanity_check(const struct ip_header *nh, size_t size,
684
                  int *ip_lenp, uint16_t *tot_lenp)
685
8.02k
{
686
8.02k
    int ip_len;
687
8.02k
    uint16_t tot_len;
688
689
8.02k
    if (OVS_UNLIKELY(size < IP_HEADER_LEN)) {
690
73
        COVERAGE_INC(miniflow_extract_ipv4_pkt_too_short);
691
73
        return false;
692
73
    }
693
7.95k
    ip_len = IP_IHL(nh->ip_ihl_ver) * 4;
694
695
7.95k
    if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN || size < ip_len)) {
696
48
        COVERAGE_INC(miniflow_extract_ipv4_pkt_len_error);
697
48
        return false;
698
48
    }
699
700
7.90k
    tot_len = ntohs(nh->ip_tot_len);
701
7.90k
    if (OVS_UNLIKELY(tot_len > size || ip_len > tot_len ||
702
7.90k
                size - tot_len > UINT16_MAX)) {
703
208
        COVERAGE_INC(miniflow_extract_ipv4_pkt_len_error);
704
208
        return false;
705
208
    }
706
707
7.69k
    *ip_lenp = ip_len;
708
7.69k
    *tot_lenp = tot_len;
709
710
7.69k
    return true;
711
7.90k
}
712
713
static inline uint8_t
714
ipv4_get_nw_frag(const struct ip_header *nh)
715
7.69k
{
716
7.69k
    uint8_t nw_frag = 0;
717
718
7.69k
    if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) {
719
504
        nw_frag = FLOW_NW_FRAG_ANY;
720
504
        if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {
721
403
            nw_frag |= FLOW_NW_FRAG_LATER;
722
403
        }
723
504
    }
724
725
7.69k
    return nw_frag;
726
7.69k
}
727
728
static inline bool
729
ipv6_sanity_check(const struct ovs_16aligned_ip6_hdr *nh, size_t size)
730
14.2k
{
731
14.2k
    uint16_t plen;
732
733
14.2k
    if (OVS_UNLIKELY(size < sizeof *nh)) {
734
381
        COVERAGE_INC(miniflow_extract_ipv6_pkt_too_short);
735
381
        return false;
736
381
    }
737
738
13.8k
    plen = ntohs(nh->ip6_plen);
739
13.8k
    if (OVS_UNLIKELY(plen + IPV6_HEADER_LEN > size)) {
740
352
        COVERAGE_INC(miniflow_extract_ipv6_pkt_len_error);
741
352
        return false;
742
352
    }
743
744
13.5k
    if (OVS_UNLIKELY(size - (plen + IPV6_HEADER_LEN) > UINT16_MAX)) {
745
0
        COVERAGE_INC(miniflow_extract_ipv6_pkt_len_error);
746
0
        return false;
747
0
    }
748
749
13.5k
    return true;
750
13.5k
}
751
752
static void
753
dump_invalid_packet(struct dp_packet *packet, const char *reason)
754
0
{
755
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
756
0
    struct ds ds = DS_EMPTY_INITIALIZER;
757
0
    size_t size;
758
759
0
    if (VLOG_DROP_DBG(&rl)) {
760
0
        return;
761
0
    }
762
0
    size = dp_packet_size(packet);
763
0
    ds_put_hex_dump(&ds, dp_packet_data(packet), size, 0, false);
764
0
    VLOG_DBG("invalid packet for %s: port %"PRIu32", size %"PRIuSIZE"\n%s",
765
0
             reason, packet->md.in_port.odp_port, size, ds_cstr(&ds));
766
0
    ds_destroy(&ds);
767
0
}
768
769
/* Initializes 'dst' from 'packet' and 'md', taking the packet type into
770
 * account.  'dst' must have enough space for FLOW_U64S * 8 bytes.
771
 *
772
 * Initializes the layer offsets as follows:
773
 *
774
 *    - packet->l2_5_ofs to the
775
 *          * the start of the MPLS shim header. Can be zero, if the
776
 *            packet is of type (OFPHTN_ETHERTYPE, ETH_TYPE_MPLS).
777
 *          * UINT16_MAX when there is no MPLS shim header.
778
 *
779
 *    - packet->l3_ofs is set to
780
 *          * zero if the packet_type is in name space OFPHTN_ETHERTYPE
781
 *            and there is no MPLS shim header.
782
 *          * just past the Ethernet header, or just past the vlan_header if
783
 *            one is present, to the first byte of the payload of the
784
 *            Ethernet frame if the packet type is Ethernet and there is
785
 *            no MPLS shim header.
786
 *          * just past the MPLS label stack to the first byte of the MPLS
787
 *            payload if there is at least one MPLS shim header.
788
 *          * UINT16_MAX if the packet type is Ethernet and the frame is
789
 *            too short to contain an Ethernet header.
790
 *
791
 *    - packet->l4_ofs is set to just past the IPv4 or IPv6 header, if one is
792
 *      present and the packet has at least the content used for the fields
793
 *      of interest for the flow, otherwise UINT16_MAX.
794
 */
795
void
796
miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
797
66.1k
{
798
    /* Add code to this function (or its callees) to extract new fields. */
799
66.1k
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
800
801
66.1k
    const struct pkt_metadata *md = &packet->md;
802
66.1k
    const void *data = dp_packet_data(packet);
803
66.1k
    size_t size = dp_packet_size(packet);
804
66.1k
    ovs_be32 packet_type = packet->packet_type;
805
66.1k
    uint64_t *values = miniflow_values(dst);
806
66.1k
    struct mf_ctx mf = { FLOWMAP_EMPTY_INITIALIZER, values,
807
66.1k
                         values + FLOW_U64S };
808
66.1k
    const char *frame;
809
66.1k
    ovs_be16 dl_type = OVS_BE16_MAX;
810
66.1k
    uint8_t nw_frag, nw_tos, nw_ttl, nw_proto;
811
66.1k
    uint8_t *ct_nw_proto_p = NULL;
812
66.1k
    ovs_be16 ct_tp_src = 0, ct_tp_dst = 0;
813
814
    /* Metadata. */
815
66.1k
    if (flow_tnl_dst_is_set(&md->tunnel)) {
816
0
        miniflow_push_words(mf, tunnel, &md->tunnel,
817
0
                            offsetof(struct flow_tnl, metadata) /
818
0
                            sizeof(uint64_t));
819
820
0
        if (!(md->tunnel.flags & FLOW_TNL_F_UDPIF)) {
821
0
            if (md->tunnel.metadata.present.map) {
822
0
                miniflow_push_words(mf, tunnel.metadata, &md->tunnel.metadata,
823
0
                                    sizeof md->tunnel.metadata /
824
0
                                    sizeof(uint64_t));
825
0
            }
826
0
        } else {
827
0
            if (md->tunnel.metadata.present.len) {
828
0
                miniflow_push_words(mf, tunnel.metadata.present,
829
0
                                    &md->tunnel.metadata.present, 1);
830
0
                miniflow_push_words(mf, tunnel.metadata.opts.gnv,
831
0
                                    md->tunnel.metadata.opts.gnv,
832
0
                                    DIV_ROUND_UP(md->tunnel.metadata.present.len,
833
0
                                                 sizeof(uint64_t)));
834
0
            }
835
0
        }
836
0
    }
837
66.1k
    if (md->skb_priority || md->pkt_mark) {
838
0
        miniflow_push_uint32(mf, skb_priority, md->skb_priority);
839
0
        miniflow_push_uint32(mf, pkt_mark, md->pkt_mark);
840
0
    }
841
66.1k
    miniflow_push_uint32(mf, dp_hash, md->dp_hash);
842
66.1k
    miniflow_push_uint32(mf, in_port, odp_to_u32(md->in_port.odp_port));
843
66.1k
    if (md->ct_state) {
844
0
        miniflow_push_uint32(mf, recirc_id, md->recirc_id);
845
0
        miniflow_push_uint8(mf, ct_state, md->ct_state);
846
0
        ct_nw_proto_p = miniflow_pointer(mf, ct_nw_proto);
847
0
        miniflow_push_uint8(mf, ct_nw_proto, 0);
848
0
        miniflow_push_uint16(mf, ct_zone, md->ct_zone);
849
0
        miniflow_push_uint32(mf, ct_mark, md->ct_mark);
850
0
        miniflow_push_be32(mf, packet_type, packet_type);
851
0
        if (!ovs_u128_is_zero(md->ct_label)) {
852
0
            miniflow_push_words(mf, ct_label, &md->ct_label,
853
0
                                sizeof md->ct_label / sizeof(uint64_t));
854
0
        }
855
66.1k
    } else {
856
66.1k
        if (md->recirc_id) {
857
0
            miniflow_push_uint32(mf, recirc_id, md->recirc_id);
858
0
            miniflow_pad_to_64(mf, recirc_id);
859
0
        }
860
66.1k
        miniflow_pad_from_64(mf, packet_type);
861
66.1k
        miniflow_push_be32(mf, packet_type, packet_type);
862
66.1k
    }
863
864
    /* Initialize packet's layer pointer and offsets. */
865
66.1k
    frame = data;
866
66.1k
    if (dp_packet_tunnel(packet)) {
867
        /* Preserve inner offsets from previous circulation. */
868
0
        dp_packet_reset_outer_offsets(packet);
869
66.1k
    } else {
870
66.1k
        dp_packet_reset_offsets(packet);
871
66.1k
    }
872
873
66.1k
    if (packet_type == htonl(PT_ETH)) {
874
        /* Must have full Ethernet header to proceed. */
875
58.4k
        if (OVS_UNLIKELY(size < sizeof(struct eth_header))) {
876
29.9k
            goto out;
877
29.9k
        } else {
878
            /* Link layer. */
879
28.5k
            ASSERT_SEQUENTIAL(dl_dst, dl_src);
880
28.5k
            miniflow_push_macs(mf, dl_dst, data);
881
882
            /* VLAN */
883
28.5k
            union flow_vlan_hdr vlans[FLOW_MAX_VLAN_HEADERS];
884
28.5k
            size_t num_vlans = parse_vlan(&data, &size, vlans);
885
886
28.5k
            dl_type = parse_ethertype(&data, &size);
887
28.5k
            miniflow_push_be16(mf, dl_type, dl_type);
888
28.5k
            miniflow_pad_to_64(mf, dl_type);
889
28.5k
            if (num_vlans > 0) {
890
693
                miniflow_push_words_32(mf, vlans, vlans, num_vlans);
891
693
            }
892
893
28.5k
        }
894
58.4k
    } else {
895
        /* Take dl_type from packet_type. */
896
7.74k
        dl_type = pt_ns_type_be(packet_type);
897
7.74k
        miniflow_pad_from_64(mf, dl_type);
898
7.74k
        miniflow_push_be16(mf, dl_type, dl_type);
899
        /* Do not push vlan_tci, pad instead */
900
7.74k
        miniflow_pad_to_64(mf, dl_type);
901
7.74k
    }
902
903
    /* Parse mpls. */
904
36.2k
    if (OVS_UNLIKELY(eth_type_mpls(dl_type))) {
905
490
        int count;
906
490
        const void *mpls = data;
907
908
490
        packet->l2_5_ofs = (char *)data - frame;
909
490
        count = parse_mpls(&data, &size);
910
490
        miniflow_push_words_32(mf, mpls_lse, mpls, count);
911
490
    }
912
913
    /* Network layer. */
914
36.2k
    packet->l3_ofs = (char *)data - frame;
915
916
36.2k
    nw_frag = 0;
917
36.2k
    if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {
918
8.02k
        const struct ip_header *nh = data;
919
8.02k
        int ip_len;
920
8.02k
        uint16_t tot_len;
921
922
8.02k
        if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len, &tot_len))) {
923
329
            if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) {
924
0
                dump_invalid_packet(packet, "ipv4_sanity_check");
925
0
            }
926
329
            goto out;
927
329
        }
928
7.69k
        dp_packet_set_l2_pad_size(packet, size - tot_len);
929
7.69k
        size = tot_len;   /* Never pull padding. */
930
931
        /* Push both source and destination address at once. */
932
7.69k
        miniflow_push_words(mf, nw_src, &nh->ip_src, 1);
933
7.69k
        if (ct_nw_proto_p && !md->ct_orig_tuple_ipv6) {
934
0
            *ct_nw_proto_p = md->ct_orig_tuple.ipv4.ipv4_proto;
935
0
            if (*ct_nw_proto_p) {
936
0
                miniflow_push_words(mf, ct_nw_src,
937
0
                                    &md->ct_orig_tuple.ipv4.ipv4_src, 1);
938
0
                ct_tp_src = md->ct_orig_tuple.ipv4.src_port;
939
0
                ct_tp_dst = md->ct_orig_tuple.ipv4.dst_port;
940
0
            }
941
0
        }
942
943
7.69k
        miniflow_push_be32(mf, ipv6_label, 0); /* Padding for IPv4. */
944
945
7.69k
        nw_tos = nh->ip_tos;
946
7.69k
        nw_ttl = nh->ip_ttl;
947
7.69k
        nw_proto = nh->ip_proto;
948
7.69k
        nw_frag = ipv4_get_nw_frag(nh);
949
7.69k
        data_pull(&data, &size, ip_len);
950
28.2k
    } else if (dl_type == htons(ETH_TYPE_IPV6)) {
951
14.2k
        const struct ovs_16aligned_ip6_hdr *nh = data;
952
14.2k
        ovs_be32 tc_flow;
953
14.2k
        uint16_t plen;
954
955
14.2k
        if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) {
956
733
            if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) {
957
0
                dump_invalid_packet(packet, "ipv6_sanity_check");
958
0
            }
959
733
            goto out;
960
733
        }
961
13.5k
        data_pull(&data, &size, sizeof *nh);
962
13.5k
        plen = ntohs(nh->ip6_plen);
963
13.5k
        dp_packet_set_l2_pad_size(packet, size - plen);
964
13.5k
        size = plen;   /* Never pull padding. */
965
966
13.5k
        miniflow_push_words(mf, ipv6_src, &nh->ip6_src,
967
13.5k
                            sizeof nh->ip6_src / 8);
968
13.5k
        miniflow_push_words(mf, ipv6_dst, &nh->ip6_dst,
969
13.5k
                            sizeof nh->ip6_dst / 8);
970
13.5k
        if (ct_nw_proto_p && md->ct_orig_tuple_ipv6) {
971
0
            *ct_nw_proto_p = md->ct_orig_tuple.ipv6.ipv6_proto;
972
0
            if (*ct_nw_proto_p) {
973
0
                miniflow_push_words(mf, ct_ipv6_src,
974
0
                                    &md->ct_orig_tuple.ipv6.ipv6_src,
975
0
                                    2 *
976
0
                                    sizeof md->ct_orig_tuple.ipv6.ipv6_src / 8);
977
0
                ct_tp_src = md->ct_orig_tuple.ipv6.src_port;
978
0
                ct_tp_dst = md->ct_orig_tuple.ipv6.dst_port;
979
0
            }
980
0
        }
981
982
13.5k
        tc_flow = get_16aligned_be32(&nh->ip6_flow);
983
13.5k
        nw_tos = ntohl(tc_flow) >> 20;
984
13.5k
        nw_ttl = nh->ip6_hlim;
985
13.5k
        nw_proto = nh->ip6_nxt;
986
987
13.5k
        if (!parse_ipv6_ext_hdrs(&data, &size, &nw_proto, &nw_frag,
988
13.5k
                                 NULL, NULL)) {
989
1.55k
            goto out;
990
1.55k
        }
991
992
        /* This needs to be after the parse_ipv6_ext_hdrs__() call because it
993
         * leaves the nw_frag word uninitialized. */
994
11.9k
        ASSERT_SEQUENTIAL(ipv6_label, nw_frag);
995
11.9k
        ovs_be32 label = tc_flow & htonl(IPV6_LABEL_MASK);
996
11.9k
        miniflow_push_be32(mf, ipv6_label, label);
997
14.0k
    } else {
998
14.0k
        if (dl_type == htons(ETH_TYPE_ARP) ||
999
14.0k
            dl_type == htons(ETH_TYPE_RARP)) {
1000
469
            struct eth_addr arp_buf[2];
1001
469
            const struct arp_eth_header *arp = (const struct arp_eth_header *)
1002
469
                data_try_pull(&data, &size, ARP_ETH_HEADER_LEN);
1003
1004
469
            if (OVS_LIKELY(arp) && OVS_LIKELY(arp->ar_hrd == htons(1))
1005
469
                && OVS_LIKELY(arp->ar_pro == htons(ETH_TYPE_IP))
1006
469
                && OVS_LIKELY(arp->ar_hln == ETH_ADDR_LEN)
1007
469
                && OVS_LIKELY(arp->ar_pln == 4)) {
1008
112
                miniflow_push_be32(mf, nw_src,
1009
112
                                   get_16aligned_be32(&arp->ar_spa));
1010
112
                miniflow_push_be32(mf, nw_dst,
1011
112
                                   get_16aligned_be32(&arp->ar_tpa));
1012
1013
                /* We only match on the lower 8 bits of the opcode. */
1014
112
                if (OVS_LIKELY(ntohs(arp->ar_op) <= 0xff)) {
1015
34
                    miniflow_push_be32(mf, ipv6_label, 0); /* Pad with ARP. */
1016
34
                    miniflow_push_be32(mf, nw_frag, htonl(ntohs(arp->ar_op)));
1017
34
                }
1018
1019
                /* Must be adjacent. */
1020
112
                ASSERT_SEQUENTIAL(arp_sha, arp_tha);
1021
1022
112
                arp_buf[0] = arp->ar_sha;
1023
112
                arp_buf[1] = arp->ar_tha;
1024
112
                miniflow_push_macs(mf, arp_sha, arp_buf);
1025
112
                miniflow_pad_to_64(mf, arp_tha);
1026
112
            }
1027
13.5k
        } else if (dl_type == htons(ETH_TYPE_NSH)) {
1028
6.77k
            struct ovs_key_nsh nsh;
1029
1030
6.77k
            if (OVS_LIKELY(parse_nsh(&data, &size, &nsh))) {
1031
4.42k
                miniflow_push_words(mf, nsh, &nsh,
1032
4.42k
                                    sizeof(struct ovs_key_nsh) /
1033
4.42k
                                    sizeof(uint64_t));
1034
4.42k
            }
1035
6.77k
        }
1036
14.0k
        goto out;
1037
14.0k
    }
1038
1039
19.6k
    packet->l4_ofs = (char *)data - frame;
1040
19.6k
    miniflow_push_be32(mf, nw_frag,
1041
19.6k
                       bytes_to_be32(nw_frag, nw_tos, nw_ttl, nw_proto));
1042
1043
19.6k
    if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) {
1044
19.2k
        if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) {
1045
12.7k
            if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {
1046
12.6k
                const struct tcp_header *tcp = data;
1047
12.6k
                size_t tcp_hdr_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
1048
1049
12.6k
                if (OVS_LIKELY(tcp_hdr_len >= TCP_HEADER_LEN)
1050
12.6k
                    && OVS_LIKELY(size >= tcp_hdr_len)) {
1051
                    /* tcp_flags are not at the beginning of the block. */
1052
11.4k
                    miniflow_pad_from_64(mf, tcp_flags);
1053
11.4k
                    miniflow_push_be32(mf, tcp_flags,
1054
11.4k
                                       TCP_FLAGS_BE32(tcp->tcp_ctl));
1055
11.4k
                    miniflow_push_be16(mf, tp_src, tcp->tcp_src);
1056
11.4k
                    miniflow_push_be16(mf, tp_dst, tcp->tcp_dst);
1057
11.4k
                    miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
1058
11.4k
                    miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
1059
11.4k
                    if (dl_type == htons(ETH_TYPE_IP)) {
1060
6.21k
                        dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
1061
6.21k
                    } else if (dl_type == htons(ETH_TYPE_IPV6)) {
1062
5.19k
                        dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
1063
5.19k
                    }
1064
11.4k
                    dp_packet_l4_proto_set_tcp(packet);
1065
11.4k
                }
1066
12.6k
            }
1067
12.7k
        } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) {
1068
580
            if (OVS_LIKELY(size >= UDP_HEADER_LEN)) {
1069
570
                const struct udp_header *udp = data;
1070
1071
570
                miniflow_push_be16(mf, tp_src, udp->udp_src);
1072
570
                miniflow_push_be16(mf, tp_dst, udp->udp_dst);
1073
570
                miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
1074
570
                miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
1075
570
                if (dl_type == htons(ETH_TYPE_IP)) {
1076
201
                    dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
1077
369
                } else if (dl_type == htons(ETH_TYPE_IPV6)) {
1078
369
                    dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
1079
369
                }
1080
570
                dp_packet_l4_proto_set_udp(packet);
1081
570
            }
1082
5.88k
        } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {
1083
544
            if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) {
1084
454
                const struct sctp_header *sctp = data;
1085
1086
454
                miniflow_push_be16(mf, tp_src, sctp->sctp_src);
1087
454
                miniflow_push_be16(mf, tp_dst, sctp->sctp_dst);
1088
454
                miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
1089
454
                miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
1090
454
                dp_packet_l4_proto_set_sctp(packet);
1091
454
            }
1092
5.34k
        } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMP)) {
1093
2.19k
            if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) {
1094
2.05k
                const struct icmp_header *icmp = data;
1095
1096
2.05k
                miniflow_push_be16(mf, tp_src, htons(icmp->icmp_type));
1097
2.05k
                miniflow_push_be16(mf, tp_dst, htons(icmp->icmp_code));
1098
2.05k
                miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
1099
2.05k
                miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
1100
2.05k
            }
1101
3.14k
        } else if (OVS_LIKELY(nw_proto == IPPROTO_IGMP)) {
1102
1.64k
            if (OVS_LIKELY(size >= IGMP_HEADER_LEN)) {
1103
1.56k
                const struct igmp_header *igmp = data;
1104
1105
1.56k
                miniflow_push_be16(mf, tp_src, htons(igmp->igmp_type));
1106
1.56k
                miniflow_push_be16(mf, tp_dst, htons(igmp->igmp_code));
1107
                /* ct_tp_src/dst are not extracted for IGMP. */
1108
1.56k
                miniflow_pad_to_64(mf, tp_dst);
1109
1.56k
                miniflow_push_be32(mf, igmp_group_ip4,
1110
1.56k
                                   get_16aligned_be32(&igmp->group));
1111
1.56k
                miniflow_pad_to_64(mf, igmp_group_ip4);
1112
1.56k
            }
1113
1.64k
        } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMPV6)) {
1114
1.01k
            if (OVS_LIKELY(size >= sizeof(struct icmp6_data_header))) {
1115
919
                const union ovs_16aligned_in6_addr *nd_target;
1116
919
                struct eth_addr arp_buf[2];
1117
                /* This will populate whether we received Option 1
1118
                 * or Option 2. */
1119
919
                uint8_t opt_type;
1120
                /* This holds the ND Reserved field. */
1121
919
                ovs_be32 rso_flags;
1122
919
                const struct icmp6_data_header *icmp6;
1123
1124
919
                icmp6 = data_pull(&data, &size, sizeof *icmp6);
1125
919
                if (parse_icmpv6(&data, &size, icmp6,
1126
919
                                 &rso_flags, &nd_target, arp_buf, &opt_type)) {
1127
765
                    if (nd_target) {
1128
685
                        miniflow_push_words(mf, nd_target, nd_target,
1129
685
                                            sizeof *nd_target / sizeof(uint64_t));
1130
685
                    }
1131
765
                    miniflow_push_macs(mf, arp_sha, arp_buf);
1132
                    /* Populate options field and set the padding
1133
                     * accordingly. */
1134
765
                    if (opt_type != 0) {
1135
419
                        miniflow_push_be16(mf, tcp_flags, htons(opt_type));
1136
                        /* Pad to align with 64 bits.
1137
                         * This will zero out the pad3 field. */
1138
419
                        miniflow_pad_to_64(mf, tcp_flags);
1139
419
                    } else {
1140
                        /* Pad to align with 64 bits.
1141
                         * This will zero out the tcp_flags & pad3 field. */
1142
346
                        miniflow_pad_to_64(mf, arp_tha);
1143
346
                    }
1144
765
                    miniflow_push_be16(mf, tp_src,
1145
765
                                       htons(icmp6->icmp6_base.icmp6_type));
1146
765
                    miniflow_push_be16(mf, tp_dst,
1147
765
                                       htons(icmp6->icmp6_base.icmp6_code));
1148
765
                    miniflow_pad_to_64(mf, tp_dst);
1149
                    /* Fill ND reserved field. */
1150
765
                    miniflow_push_be32(mf, igmp_group_ip4, rso_flags);
1151
765
                    miniflow_pad_to_64(mf, igmp_group_ip4);
1152
765
                } else {
1153
                    /* ICMPv6 but not ND. */
1154
154
                    miniflow_push_be16(mf, tp_src,
1155
154
                                       htons(icmp6->icmp6_base.icmp6_type));
1156
154
                    miniflow_push_be16(mf, tp_dst,
1157
154
                                       htons(icmp6->icmp6_base.icmp6_code));
1158
154
                    miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
1159
154
                    miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
1160
154
                }
1161
919
            }
1162
1.01k
        }
1163
19.2k
    } else if (ct_nw_proto_p &&
1164
446
               (*ct_nw_proto_p == IPPROTO_TCP ||
1165
0
                *ct_nw_proto_p == IPPROTO_UDP ||
1166
0
                *ct_nw_proto_p == IPPROTO_SCTP ||
1167
0
                *ct_nw_proto_p == IPPROTO_ICMP ||
1168
0
                (*ct_nw_proto_p == IPPROTO_ICMPV6 && !icmp6_is_nd(data)))) {
1169
0
        miniflow_pad_from_64(mf, ct_tp_src);
1170
0
        miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
1171
0
        miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
1172
0
    }
1173
66.1k
 out:
1174
66.1k
    dst->map = mf.map;
1175
66.1k
}
1176
1177
static ovs_be16
1178
parse_dl_type(const void **datap, size_t *sizep, ovs_be16 *first_vlan_tci_p)
1179
0
{
1180
0
    union flow_vlan_hdr vlans[FLOW_MAX_VLAN_HEADERS];
1181
1182
0
    if (parse_vlan(datap, sizep, vlans) && first_vlan_tci_p) {
1183
0
        *first_vlan_tci_p = vlans[0].tci;
1184
0
    }
1185
1186
0
    return parse_ethertype(datap, sizep);
1187
0
}
1188
1189
/* Parses and return the TCP flags in 'packet', converted to host byte order.
1190
 * If 'packet' is not an Ethernet packet embedding TCP, returns 0.
1191
 * 'dl_type_p' will be set only if the 'packet' is an Ethernet packet.
1192
 * 'nw_frag_p' will be set only if the 'packet' is an IP packet.
1193
 * 'first_vlan_tci_p' will be set only if the 'packet' contains vlan header.
1194
 *
1195
 * The caller must ensure that 'packet' is at least ETH_HEADER_LEN bytes
1196
 * long.'*/
1197
uint16_t
1198
parse_tcp_flags(struct dp_packet *packet,
1199
                ovs_be16 *dl_type_p, uint8_t *nw_frag_p,
1200
                ovs_be16 *first_vlan_tci_p)
1201
0
{
1202
0
    const void *data = dp_packet_data(packet);
1203
0
    const char *frame = (const char *)data;
1204
0
    size_t size = dp_packet_size(packet);
1205
0
    ovs_be16 dl_type;
1206
0
    uint8_t nw_frag = 0, nw_proto = 0;
1207
1208
0
    if (!dp_packet_is_eth(packet)) {
1209
0
        return 0;
1210
0
    }
1211
1212
0
    dp_packet_reset_offsets(packet);
1213
1214
0
    dl_type = parse_dl_type(&data, &size, first_vlan_tci_p);
1215
0
    if (dl_type_p) {
1216
0
        *dl_type_p = dl_type;
1217
0
    }
1218
0
    if (OVS_UNLIKELY(eth_type_mpls(dl_type))) {
1219
0
        packet->l2_5_ofs = (char *)data - frame;
1220
0
    }
1221
0
    packet->l3_ofs = (char *)data - frame;
1222
0
    if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {
1223
0
        const struct ip_header *nh = data;
1224
0
        int ip_len;
1225
0
        uint16_t tot_len;
1226
1227
0
        if (OVS_UNLIKELY(!ipv4_sanity_check(nh, size, &ip_len, &tot_len))) {
1228
0
            if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) {
1229
0
                dump_invalid_packet(packet, "ipv4_sanity_check");
1230
0
            }
1231
0
            return 0;
1232
0
        }
1233
0
        dp_packet_set_l2_pad_size(packet, size - tot_len);
1234
0
        nw_proto = nh->ip_proto;
1235
0
        nw_frag = ipv4_get_nw_frag(nh);
1236
1237
0
        size = tot_len;   /* Never pull padding. */
1238
0
        data_pull(&data, &size, ip_len);
1239
0
    } else if (dl_type == htons(ETH_TYPE_IPV6)) {
1240
0
        const struct ovs_16aligned_ip6_hdr *nh = data;
1241
0
        uint16_t plen;
1242
1243
0
        if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) {
1244
0
            if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) {
1245
0
                dump_invalid_packet(packet, "ipv6_sanity_check");
1246
0
            }
1247
0
            return 0;
1248
0
        }
1249
0
        data_pull(&data, &size, sizeof *nh);
1250
1251
0
        plen = ntohs(nh->ip6_plen); /* Never pull padding. */
1252
0
        dp_packet_set_l2_pad_size(packet, size - plen);
1253
0
        size = plen;
1254
0
        nw_proto = nh->ip6_nxt;
1255
0
        if (!parse_ipv6_ext_hdrs(&data, &size, &nw_proto, &nw_frag,
1256
0
                                 NULL, NULL)) {
1257
0
            return 0;
1258
0
        }
1259
0
    } else {
1260
0
        return 0;
1261
0
    }
1262
1263
0
    if (nw_frag_p) {
1264
0
        *nw_frag_p = nw_frag;
1265
0
    }
1266
1267
0
    packet->l4_ofs = (uint16_t)((char *)data - frame);
1268
0
    if (!(nw_frag & FLOW_NW_FRAG_LATER)) {
1269
0
        if (nw_proto == IPPROTO_TCP && size >= TCP_HEADER_LEN) {
1270
0
            const struct tcp_header *tcp = data;
1271
1272
0
            dp_packet_l4_proto_set_tcp(packet);
1273
0
            return TCP_FLAGS(tcp->tcp_ctl);
1274
0
        } else if (nw_proto == IPPROTO_UDP && size >= UDP_HEADER_LEN) {
1275
0
            dp_packet_l4_proto_set_udp(packet);
1276
0
        } else if (nw_proto == IPPROTO_SCTP && size >= SCTP_HEADER_LEN) {
1277
0
            dp_packet_l4_proto_set_sctp(packet);
1278
0
        }
1279
0
    }
1280
1281
0
    return 0;
1282
0
}
1283
1284
/* For every bit of a field that is wildcarded in 'wildcards', sets the
1285
 * corresponding bit in 'flow' to zero. */
1286
void
1287
flow_zero_wildcards(struct flow *flow, const struct flow_wildcards *wildcards)
1288
133k
{
1289
133k
    uint64_t *flow_u64 = (uint64_t *) flow;
1290
133k
    const uint64_t *wc_u64 = (const uint64_t *) &wildcards->masks;
1291
133k
    size_t i;
1292
1293
11.3M
    for (i = 0; i < FLOW_U64S; i++) {
1294
11.2M
        flow_u64[i] &= wc_u64[i];
1295
11.2M
    }
1296
133k
}
1297
1298
void
1299
flow_unwildcard_tp_ports(const struct flow *flow, struct flow_wildcards *wc)
1300
0
{
1301
0
    if (flow->nw_proto != IPPROTO_ICMP) {
1302
0
        memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
1303
0
        memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
1304
0
    } else {
1305
0
        wc->masks.tp_src = htons(0xff);
1306
0
        wc->masks.tp_dst = htons(0xff);
1307
0
    }
1308
0
}
1309
1310
/* Initializes 'flow_metadata' with the metadata found in 'flow'. */
1311
void
1312
flow_get_metadata(const struct flow *flow, struct match *flow_metadata)
1313
0
{
1314
0
    int i;
1315
1316
0
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
1317
1318
0
    match_init_catchall(flow_metadata);
1319
0
    if (flow->tunnel.tun_id != htonll(0)) {
1320
0
        match_set_tun_id(flow_metadata, flow->tunnel.tun_id);
1321
0
    }
1322
0
    if (flow->tunnel.flags & FLOW_TNL_PUB_F_MASK) {
1323
0
        match_set_tun_flags(flow_metadata,
1324
0
                            flow->tunnel.flags & FLOW_TNL_PUB_F_MASK);
1325
0
    }
1326
0
    if (flow->tunnel.ip_src) {
1327
0
        match_set_tun_src(flow_metadata, flow->tunnel.ip_src);
1328
0
    }
1329
0
    if (flow->tunnel.ip_dst) {
1330
0
        match_set_tun_dst(flow_metadata, flow->tunnel.ip_dst);
1331
0
    }
1332
0
    if (ipv6_addr_is_set(&flow->tunnel.ipv6_src)) {
1333
0
        match_set_tun_ipv6_src(flow_metadata, &flow->tunnel.ipv6_src);
1334
0
    }
1335
0
    if (ipv6_addr_is_set(&flow->tunnel.ipv6_dst)) {
1336
0
        match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
1337
0
    }
1338
0
    if (flow->tunnel.gbp_id != htons(0)) {
1339
0
        match_set_tun_gbp_id(flow_metadata, flow->tunnel.gbp_id);
1340
0
    }
1341
0
    if (flow->tunnel.gbp_flags) {
1342
0
        match_set_tun_gbp_flags(flow_metadata, flow->tunnel.gbp_flags);
1343
0
    }
1344
0
    if (flow->tunnel.erspan_ver) {
1345
0
        match_set_tun_erspan_ver(flow_metadata, flow->tunnel.erspan_ver);
1346
0
    }
1347
0
    if (flow->tunnel.erspan_idx) {
1348
0
        match_set_tun_erspan_idx(flow_metadata, flow->tunnel.erspan_idx);
1349
0
    }
1350
0
    if (flow->tunnel.erspan_dir) {
1351
0
        match_set_tun_erspan_dir(flow_metadata, flow->tunnel.erspan_dir);
1352
0
    }
1353
0
    if (flow->tunnel.erspan_hwid) {
1354
0
        match_set_tun_erspan_hwid(flow_metadata, flow->tunnel.erspan_hwid);
1355
0
    }
1356
0
    if (flow->tunnel.gtpu_flags) {
1357
0
        match_set_tun_gtpu_flags(flow_metadata, flow->tunnel.gtpu_flags);
1358
0
    }
1359
0
    if (flow->tunnel.gtpu_msgtype) {
1360
0
        match_set_tun_gtpu_msgtype(flow_metadata, flow->tunnel.gtpu_msgtype);
1361
0
    }
1362
0
    tun_metadata_get_fmd(&flow->tunnel, flow_metadata);
1363
0
    if (flow->metadata != htonll(0)) {
1364
0
        match_set_metadata(flow_metadata, flow->metadata);
1365
0
    }
1366
1367
0
    for (i = 0; i < FLOW_N_REGS; i++) {
1368
0
        if (flow->regs[i]) {
1369
0
            match_set_reg(flow_metadata, i, flow->regs[i]);
1370
0
        }
1371
0
    }
1372
1373
0
    if (flow->pkt_mark != 0) {
1374
0
        match_set_pkt_mark(flow_metadata, flow->pkt_mark);
1375
0
    }
1376
1377
0
    match_set_in_port(flow_metadata, flow->in_port.ofp_port);
1378
0
    if (flow->packet_type != htonl(PT_ETH)) {
1379
0
        match_set_packet_type(flow_metadata, flow->packet_type);
1380
0
    }
1381
1382
0
    if (flow->ct_state != 0) {
1383
0
        match_set_ct_state(flow_metadata, flow->ct_state);
1384
        /* Match dl_type since it is required for the later interpretation of
1385
         * the conntrack metadata. */
1386
0
        match_set_dl_type(flow_metadata, flow->dl_type);
1387
0
        if (is_ct_valid(flow, NULL, NULL) && flow->ct_nw_proto != 0) {
1388
0
            if (flow->dl_type == htons(ETH_TYPE_IP)) {
1389
0
                match_set_ct_nw_src(flow_metadata, flow->ct_nw_src);
1390
0
                match_set_ct_nw_dst(flow_metadata, flow->ct_nw_dst);
1391
0
                match_set_ct_nw_proto(flow_metadata, flow->ct_nw_proto);
1392
0
                match_set_ct_tp_src(flow_metadata, flow->ct_tp_src);
1393
0
                match_set_ct_tp_dst(flow_metadata, flow->ct_tp_dst);
1394
0
            } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
1395
0
                match_set_ct_ipv6_src(flow_metadata, &flow->ct_ipv6_src);
1396
0
                match_set_ct_ipv6_dst(flow_metadata, &flow->ct_ipv6_dst);
1397
0
                match_set_ct_nw_proto(flow_metadata, flow->ct_nw_proto);
1398
0
                match_set_ct_tp_src(flow_metadata, flow->ct_tp_src);
1399
0
                match_set_ct_tp_dst(flow_metadata, flow->ct_tp_dst);
1400
0
            }
1401
0
        }
1402
0
    }
1403
0
    if (flow->ct_zone != 0) {
1404
0
        match_set_ct_zone(flow_metadata, flow->ct_zone);
1405
0
    }
1406
0
    if (flow->ct_mark != 0) {
1407
0
        match_set_ct_mark(flow_metadata, flow->ct_mark);
1408
0
    }
1409
0
    if (!ovs_u128_is_zero(flow->ct_label)) {
1410
0
        match_set_ct_label(flow_metadata, flow->ct_label);
1411
0
    }
1412
0
}
1413
1414
const char *
1415
ct_state_to_string(uint32_t state)
1416
17.1k
{
1417
17.1k
    switch (state) {
1418
5.24k
#define CS_STATE(ENUM, INDEX, NAME) case CS_##ENUM: return NAME;
1419
0
        CS_STATES
1420
0
#undef CS_STATE
1421
11.9k
    default:
1422
11.9k
        return NULL;
1423
17.1k
    }
1424
17.1k
}
1425
1426
uint32_t
1427
ct_state_from_string(const char *s)
1428
0
{
1429
0
#define CS_STATE(ENUM, INDEX, NAME) \
1430
0
    if (!strcmp(s, NAME)) {         \
1431
0
        return CS_##ENUM;           \
1432
0
    }
1433
0
    CS_STATES
1434
0
#undef CS_STATE
1435
0
    return 0;
1436
0
}
1437
1438
/* Parses conntrack state from 'state_str'.  If it is parsed successfully,
1439
 * stores the parsed ct_state in 'ct_state', and returns true.  Otherwise,
1440
 * returns false, and reports error message in 'ds'. */
1441
bool
1442
parse_ct_state(const char *state_str, uint32_t default_state,
1443
               uint32_t *ct_state, struct ds *ds)
1444
0
{
1445
0
    uint32_t state = default_state;
1446
0
    char *state_s = xstrdup(state_str);
1447
0
    char *save_ptr = NULL;
1448
1449
0
    for (char *cs = strtok_r(state_s, ", ", &save_ptr); cs;
1450
0
         cs = strtok_r(NULL, ", ", &save_ptr)) {
1451
0
        uint32_t bit = ct_state_from_string(cs);
1452
0
        if (!bit) {
1453
0
            ds_put_format(ds, "%s: unknown connection tracking state flag",
1454
0
                          cs);
1455
0
            free(state_s);
1456
0
            return false;
1457
0
        }
1458
0
        state |= bit;
1459
0
    }
1460
1461
0
    *ct_state = state;
1462
0
    free(state_s);
1463
1464
0
    return true;
1465
0
}
1466
1467
/* Checks the given conntrack state 'state' according to the constraints
1468
 * listed in ovs-fields (7).  Returns true if it is valid.  Otherwise, returns
1469
 * false, and reports error in 'ds'. */
1470
bool
1471
validate_ct_state(uint32_t state, struct ds *ds)
1472
0
{
1473
0
    bool valid_ct_state = true;
1474
0
    struct ds d_str = DS_EMPTY_INITIALIZER;
1475
1476
0
    format_flags(&d_str, ct_state_to_string, state, '|');
1477
1478
0
    if (state && !(state & CS_TRACKED)) {
1479
0
        ds_put_format(ds, "%s: invalid connection state: "
1480
0
                      "If \"trk\" is unset, no other flags are set\n",
1481
0
                      ds_cstr(&d_str));
1482
0
        valid_ct_state = false;
1483
0
    }
1484
0
    if (state & CS_INVALID && state & ~(CS_TRACKED | CS_INVALID)) {
1485
0
        ds_put_format(ds, "%s: invalid connection state: "
1486
0
                      "when \"inv\" is set, only \"trk\" may also be set\n",
1487
0
                      ds_cstr(&d_str));
1488
0
        valid_ct_state = false;
1489
0
    }
1490
0
    if (state & CS_NEW && state & CS_ESTABLISHED) {
1491
0
        ds_put_format(ds, "%s: invalid connection state: "
1492
0
                      "\"new\" and \"est\" are mutually exclusive\n",
1493
0
                      ds_cstr(&d_str));
1494
0
        valid_ct_state = false;
1495
0
    }
1496
0
    if (state & CS_NEW && state & CS_REPLY_DIR) {
1497
0
        ds_put_format(ds, "%s: invalid connection state: "
1498
0
                      "\"new\" and \"rpy\" are mutually exclusive\n",
1499
0
                      ds_cstr(&d_str));
1500
0
        valid_ct_state = false;
1501
0
    }
1502
1503
0
    ds_destroy(&d_str);
1504
0
    return valid_ct_state;
1505
0
}
1506
1507
/* Clears the fields in 'flow' associated with connection tracking. */
1508
void
1509
flow_clear_conntrack(struct flow *flow)
1510
0
{
1511
0
    flow->ct_state = 0;
1512
0
    flow->ct_zone = 0;
1513
0
    flow->ct_mark = 0;
1514
0
    flow->ct_label = OVS_U128_ZERO;
1515
1516
0
    flow->ct_nw_proto = 0;
1517
0
    flow->ct_tp_src = 0;
1518
0
    flow->ct_tp_dst = 0;
1519
0
    if (flow->dl_type == htons(ETH_TYPE_IP)) {
1520
0
        flow->ct_nw_src = 0;
1521
0
        flow->ct_nw_dst = 0;
1522
0
    } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
1523
0
        memset(&flow->ct_ipv6_src, 0, sizeof flow->ct_ipv6_src);
1524
0
        memset(&flow->ct_ipv6_dst, 0, sizeof flow->ct_ipv6_dst);
1525
0
    }
1526
0
}
1527
1528
char *
1529
flow_to_string(const struct flow *flow,
1530
               const struct ofputil_port_map *port_map)
1531
0
{
1532
0
    struct ds ds = DS_EMPTY_INITIALIZER;
1533
0
    flow_format(&ds, flow, port_map);
1534
0
    return ds_cstr(&ds);
1535
0
}
1536
1537
const char *
1538
flow_tun_flag_to_string(uint32_t flags)
1539
1.51k
{
1540
1.51k
    switch (flags) {
1541
96
    case FLOW_TNL_F_DONT_FRAGMENT:
1542
96
        return "df";
1543
93
    case FLOW_TNL_F_CSUM:
1544
93
        return "csum";
1545
97
    case FLOW_TNL_F_KEY:
1546
97
        return "key";
1547
647
    case FLOW_TNL_F_OAM:
1548
647
        return "oam";
1549
579
    default:
1550
579
        return NULL;
1551
1.51k
    }
1552
1.51k
}
1553
1554
void
1555
format_flags(struct ds *ds, const char *(*bit_to_string)(uint32_t),
1556
             uint32_t flags, char del)
1557
13.7k
{
1558
13.7k
    uint32_t bad = 0;
1559
1560
13.7k
    if (!flags) {
1561
1.35k
        ds_put_char(ds, '0');
1562
1.35k
        return;
1563
1.35k
    }
1564
82.6k
    while (flags) {
1565
70.2k
        uint32_t bit = rightmost_1bit(flags);
1566
70.2k
        const char *s;
1567
1568
70.2k
        s = bit_to_string(bit);
1569
70.2k
        if (s) {
1570
67.9k
            ds_put_format(ds, "%s%c", s, del);
1571
67.9k
        } else {
1572
2.27k
            bad |= bit;
1573
2.27k
        }
1574
1575
70.2k
        flags &= ~bit;
1576
70.2k
    }
1577
1578
12.4k
    if (bad) {
1579
730
        ds_put_format(ds, "0x%"PRIx32"%c", bad, del);
1580
730
    }
1581
12.4k
    ds_chomp(ds, del);
1582
12.4k
}
1583
1584
void
1585
format_flags_masked(struct ds *ds, const char *name,
1586
                    const char *(*bit_to_string)(uint32_t), uint32_t flags,
1587
                    uint32_t mask, uint32_t max_mask)
1588
14.9k
{
1589
14.9k
    if (name) {
1590
13.4k
        ds_put_format(ds, "%s%s=%s", colors.param, name, colors.end);
1591
13.4k
    }
1592
1593
14.9k
    if (mask == max_mask) {
1594
13.7k
        format_flags(ds, bit_to_string, flags, '|');
1595
13.7k
        return;
1596
13.7k
    }
1597
1598
1.20k
    if (!mask) {
1599
0
        ds_put_cstr(ds, "0/0");
1600
0
        return;
1601
0
    }
1602
1603
18.6k
    while (mask) {
1604
17.4k
        uint32_t bit = rightmost_1bit(mask);
1605
17.4k
        const char *s = bit_to_string(bit);
1606
1607
17.4k
        ds_put_format(ds, "%s%s", (flags & bit) ? "+" : "-",
1608
17.4k
                      s ? s : "[Unknown]");
1609
17.4k
        mask &= ~bit;
1610
17.4k
    }
1611
1.20k
}
1612
1613
static void
1614
put_u16_masked(struct ds *s, uint16_t value, uint16_t mask)
1615
35.4k
{
1616
35.4k
    if (!mask) {
1617
0
        ds_put_char(s, '*');
1618
35.4k
    } else {
1619
35.4k
        if (value > 9) {
1620
21.6k
            ds_put_format(s, "0x%"PRIx16, value);
1621
21.6k
        } else {
1622
13.7k
            ds_put_format(s, "%"PRIu16, value);
1623
13.7k
        }
1624
1625
35.4k
        if (mask != UINT16_MAX) {
1626
0
            ds_put_format(s, "/0x%"PRIx16, mask);
1627
0
        }
1628
35.4k
    }
1629
35.4k
}
1630
1631
void
1632
format_packet_type_masked(struct ds *s, ovs_be32 value, ovs_be32 mask)
1633
17.7k
{
1634
17.7k
    if (value == htonl(PT_ETH) && mask == OVS_BE32_MAX) {
1635
38
        ds_put_cstr(s, "eth");
1636
17.7k
    } else {
1637
17.7k
        ds_put_cstr(s, "packet_type=(");
1638
17.7k
        put_u16_masked(s, pt_ns(value), pt_ns(mask));
1639
17.7k
        ds_put_char(s, ',');
1640
17.7k
        put_u16_masked(s, pt_ns_type(value), pt_ns_type(mask));
1641
17.7k
        ds_put_char(s, ')');
1642
17.7k
    }
1643
17.7k
}
1644
1645
/* Scans a string 's' of flags to determine their numerical value and
1646
 * returns the number of characters parsed using 'bit_to_string' to
1647
 * lookup flag names. Scanning continues until the character 'end' is
1648
 * reached.
1649
 *
1650
 * In the event of a failure, a negative error code will be returned. In
1651
 * addition, if 'res_string' is non-NULL then a descriptive string will
1652
 * be returned incorporating the identifying string 'field_name'. This
1653
 * error string must be freed by the caller.
1654
 *
1655
 * Upon success, the flag values will be stored in 'res_flags' and
1656
 * optionally 'res_mask', if it is non-NULL (if it is NULL then any masks
1657
 * present in the original string will be considered an error). The
1658
 * caller may restrict the acceptable set of values through the mask
1659
 * 'allowed'. */
1660
int
1661
parse_flags(const char *s, const char *(*bit_to_string)(uint32_t),
1662
            char end, const char *field_name, char **res_string,
1663
            uint32_t *res_flags, uint32_t allowed, uint32_t *res_mask)
1664
0
{
1665
0
    uint32_t result = 0;
1666
0
    int n;
1667
1668
    /* Parse masked flags in numeric format? */
1669
0
    if (res_mask && ovs_scan(s, "%"SCNi32"/%"SCNi32"%n",
1670
0
                             res_flags, res_mask, &n) && n > 0) {
1671
0
        if (*res_flags & ~allowed || *res_mask & ~allowed) {
1672
0
            goto unknown;
1673
0
        }
1674
0
        return n;
1675
0
    }
1676
1677
0
    n = 0;
1678
1679
0
    if (res_mask && (*s == '+' || *s == '-')) {
1680
0
        uint32_t flags = 0, mask = 0;
1681
1682
        /* Parse masked flags. */
1683
0
        while (s[0] != end) {
1684
0
            bool set;
1685
0
            uint32_t bit;
1686
0
            size_t len;
1687
1688
0
            if (s[0] == '+') {
1689
0
                set = true;
1690
0
            } else if (s[0] == '-') {
1691
0
                set = false;
1692
0
            } else {
1693
0
                if (res_string) {
1694
0
                    *res_string = xasprintf("%s: %s must be preceded by '+' "
1695
0
                                            "(for SET) or '-' (NOT SET)", s,
1696
0
                                            field_name);
1697
0
                }
1698
0
                return -EINVAL;
1699
0
            }
1700
0
            s++;
1701
0
            n++;
1702
1703
0
            for (bit = 1; bit; bit <<= 1) {
1704
0
                const char *fname = bit_to_string(bit);
1705
1706
0
                if (!fname) {
1707
0
                    continue;
1708
0
                }
1709
1710
0
                len = strlen(fname);
1711
0
                if (strncmp(s, fname, len) ||
1712
0
                    (s[len] != '+' && s[len] != '-' && s[len] != end)) {
1713
0
                    continue;
1714
0
                }
1715
1716
0
                if (mask & bit) {
1717
                    /* bit already set. */
1718
0
                    if (res_string) {
1719
0
                        *res_string = xasprintf("%s: Each %s flag can be "
1720
0
                                                "specified only once", s,
1721
0
                                                field_name);
1722
0
                    }
1723
0
                    return -EINVAL;
1724
0
                }
1725
0
                if (!(bit & allowed)) {
1726
0
                    goto unknown;
1727
0
                }
1728
0
                if (set) {
1729
0
                   flags |= bit;
1730
0
                }
1731
0
                mask |= bit;
1732
0
                break;
1733
0
            }
1734
1735
0
            if (!bit) {
1736
0
                goto unknown;
1737
0
            }
1738
0
            s += len;
1739
0
            n += len;
1740
0
        }
1741
1742
0
        *res_flags = flags;
1743
0
        *res_mask = mask;
1744
0
        return n;
1745
0
    }
1746
1747
    /* Parse unmasked flags.  If a flag is present, it is set, otherwise
1748
     * it is not set. */
1749
0
    while (s[n] != end) {
1750
0
        unsigned long long int flags;
1751
0
        uint32_t bit;
1752
0
        int n0;
1753
1754
0
        if (ovs_scan(&s[n], "%lli%n", &flags, &n0)) {
1755
0
            if (flags & ~allowed) {
1756
0
                goto unknown;
1757
0
            }
1758
0
            n += n0 + (s[n + n0] == '|');
1759
0
            result |= flags;
1760
0
            continue;
1761
0
        }
1762
1763
0
        for (bit = 1; bit; bit <<= 1) {
1764
0
            const char *name = bit_to_string(bit);
1765
0
            size_t len;
1766
1767
0
            if (!name) {
1768
0
                continue;
1769
0
            }
1770
1771
0
            len = strlen(name);
1772
0
            if (!strncmp(s + n, name, len) &&
1773
0
                (s[n + len] == '|' || s[n + len] == end)) {
1774
0
                if (!(bit & allowed)) {
1775
0
                    goto unknown;
1776
0
                }
1777
0
                result |= bit;
1778
0
                n += len + (s[n + len] == '|');
1779
0
                break;
1780
0
            }
1781
0
        }
1782
1783
0
        if (!bit) {
1784
0
            goto unknown;
1785
0
        }
1786
0
    }
1787
1788
0
    *res_flags = result;
1789
0
    if (res_mask) {
1790
0
        *res_mask = UINT32_MAX;
1791
0
    }
1792
0
    if (res_string) {
1793
0
        *res_string = NULL;
1794
0
    }
1795
0
    return n;
1796
1797
0
unknown:
1798
0
    if (res_string) {
1799
0
        *res_string = xasprintf("%s: unknown %s flag(s)", s, field_name);
1800
0
    }
1801
0
    return -EINVAL;
1802
0
}
1803
1804
void
1805
flow_format(struct ds *ds,
1806
            const struct flow *flow, const struct ofputil_port_map *port_map)
1807
66.1k
{
1808
66.1k
    struct match match;
1809
66.1k
    struct flow_wildcards *wc = &match.wc;
1810
1811
66.1k
    match_wc_init(&match, flow);
1812
1813
    /* As this function is most often used for formatting a packet in a
1814
     * packet-in message, skip formatting the packet context fields that are
1815
     * all-zeroes to make the print-out easier on the eyes.  This means that a
1816
     * missing context field implies a zero value for that field.  This is
1817
     * similar to OpenFlow encoding of these fields, as the specification
1818
     * states that all-zeroes context fields should not be encoded in the
1819
     * packet-in messages. */
1820
66.1k
    if (!flow->in_port.ofp_port) {
1821
66.1k
        WC_UNMASK_FIELD(wc, in_port);
1822
66.1k
    }
1823
66.1k
    if (!flow->skb_priority) {
1824
66.1k
        WC_UNMASK_FIELD(wc, skb_priority);
1825
66.1k
    }
1826
66.1k
    if (!flow->pkt_mark) {
1827
66.1k
        WC_UNMASK_FIELD(wc, pkt_mark);
1828
66.1k
    }
1829
66.1k
    if (!flow->recirc_id) {
1830
66.1k
        WC_UNMASK_FIELD(wc, recirc_id);
1831
66.1k
    }
1832
66.1k
    if (!flow->dp_hash) {
1833
66.1k
        WC_UNMASK_FIELD(wc, dp_hash);
1834
66.1k
    }
1835
66.1k
    if (!flow->ct_state) {
1836
66.1k
        WC_UNMASK_FIELD(wc, ct_state);
1837
66.1k
    }
1838
66.1k
    if (!flow->ct_zone) {
1839
66.1k
        WC_UNMASK_FIELD(wc, ct_zone);
1840
66.1k
    }
1841
66.1k
    if (!flow->ct_mark) {
1842
66.1k
        WC_UNMASK_FIELD(wc, ct_mark);
1843
66.1k
    }
1844
66.1k
    if (ovs_u128_is_zero(flow->ct_label)) {
1845
66.1k
        WC_UNMASK_FIELD(wc, ct_label);
1846
66.1k
    }
1847
66.1k
    if (!is_ct_valid(flow, &match.wc, NULL) || !flow->ct_nw_proto) {
1848
66.1k
        WC_UNMASK_FIELD(wc, ct_nw_proto);
1849
66.1k
        WC_UNMASK_FIELD(wc, ct_tp_src);
1850
66.1k
        WC_UNMASK_FIELD(wc, ct_tp_dst);
1851
66.1k
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
1852
8.02k
            WC_UNMASK_FIELD(wc, ct_nw_src);
1853
8.02k
            WC_UNMASK_FIELD(wc, ct_nw_dst);
1854
58.1k
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
1855
14.2k
            WC_UNMASK_FIELD(wc, ct_ipv6_src);
1856
14.2k
            WC_UNMASK_FIELD(wc, ct_ipv6_dst);
1857
14.2k
        }
1858
66.1k
    }
1859
1.12M
    for (int i = 0; i < FLOW_N_REGS; i++) {
1860
1.05M
        if (!flow->regs[i]) {
1861
1.05M
            WC_UNMASK_FIELD(wc, regs[i]);
1862
1.05M
        }
1863
1.05M
    }
1864
66.1k
    if (!flow->metadata) {
1865
66.1k
        WC_UNMASK_FIELD(wc, metadata);
1866
66.1k
    }
1867
1868
66.1k
    match_format(&match, port_map, ds, OFP_DEFAULT_PRIORITY);
1869
66.1k
}
1870
1871
void
1872
flow_print(FILE *stream,
1873
           const struct flow *flow, const struct ofputil_port_map *port_map)
1874
0
{
1875
0
    char *s = flow_to_string(flow, port_map);
1876
0
    fputs(s, stream);
1877
0
    free(s);
1878
0
}
1879

1880
/* flow_wildcards functions. */
1881
1882
/* Initializes 'wc' as a set of wildcards that matches every packet. */
1883
void
1884
flow_wildcards_init_catchall(struct flow_wildcards *wc)
1885
448k
{
1886
448k
    memset(&wc->masks, 0, sizeof wc->masks);
1887
448k
}
1888
1889
/* Converts a flow into flow wildcards.  It sets the wildcard masks based on
1890
 * the packet headers extracted to 'flow'.  It will not set the mask for fields
1891
 * that do not make sense for the packet type.  OpenFlow-only metadata is
1892
 * wildcarded, but other metadata is unconditionally exact-matched. */
1893
void
1894
flow_wildcards_init_for_packet(struct flow_wildcards *wc,
1895
                               const struct flow *flow)
1896
66.1k
{
1897
66.1k
    ovs_be16 dl_type = OVS_BE16_MAX;
1898
1899
66.1k
    memset(&wc->masks, 0x0, sizeof wc->masks);
1900
1901
    /* Update this function whenever struct flow changes. */
1902
66.1k
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
1903
1904
66.1k
    if (flow_tnl_dst_is_set(&flow->tunnel)) {
1905
0
        if (flow->tunnel.flags & FLOW_TNL_F_KEY) {
1906
0
            WC_MASK_FIELD(wc, tunnel.tun_id);
1907
0
        }
1908
0
        WC_MASK_FIELD(wc, tunnel.ip_src);
1909
0
        WC_MASK_FIELD(wc, tunnel.ip_dst);
1910
0
        WC_MASK_FIELD(wc, tunnel.ipv6_src);
1911
0
        WC_MASK_FIELD(wc, tunnel.ipv6_dst);
1912
0
        WC_MASK_FIELD(wc, tunnel.flags);
1913
0
        WC_MASK_FIELD(wc, tunnel.ip_tos);
1914
0
        WC_MASK_FIELD(wc, tunnel.ip_ttl);
1915
0
        WC_MASK_FIELD(wc, tunnel.tp_src);
1916
0
        WC_MASK_FIELD(wc, tunnel.tp_dst);
1917
0
        WC_MASK_FIELD(wc, tunnel.gbp_id);
1918
0
        WC_MASK_FIELD(wc, tunnel.gbp_flags);
1919
0
        WC_MASK_FIELD(wc, tunnel.erspan_ver);
1920
0
        WC_MASK_FIELD(wc, tunnel.erspan_idx);
1921
0
        WC_MASK_FIELD(wc, tunnel.erspan_dir);
1922
0
        WC_MASK_FIELD(wc, tunnel.erspan_hwid);
1923
0
        WC_MASK_FIELD(wc, tunnel.gtpu_flags);
1924
0
        WC_MASK_FIELD(wc, tunnel.gtpu_msgtype);
1925
1926
0
        if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) {
1927
0
            if (flow->tunnel.metadata.present.map) {
1928
0
                wc->masks.tunnel.metadata.present.map =
1929
0
                                              flow->tunnel.metadata.present.map;
1930
0
                WC_MASK_FIELD(wc, tunnel.metadata.opts.u8);
1931
0
                WC_MASK_FIELD(wc, tunnel.metadata.tab);
1932
0
            }
1933
0
        } else {
1934
0
            WC_MASK_FIELD(wc, tunnel.metadata.present.len);
1935
0
            memset(wc->masks.tunnel.metadata.opts.gnv, 0xff,
1936
0
                   flow->tunnel.metadata.present.len);
1937
0
        }
1938
66.1k
    } else if (flow->tunnel.tun_id) {
1939
0
        WC_MASK_FIELD(wc, tunnel.tun_id);
1940
0
    }
1941
1942
    /* metadata, regs, and conj_id wildcarded. */
1943
1944
66.1k
    WC_MASK_FIELD(wc, skb_priority);
1945
66.1k
    WC_MASK_FIELD(wc, pkt_mark);
1946
66.1k
    WC_MASK_FIELD(wc, ct_state);
1947
66.1k
    WC_MASK_FIELD(wc, ct_zone);
1948
66.1k
    WC_MASK_FIELD(wc, ct_mark);
1949
66.1k
    WC_MASK_FIELD(wc, ct_label);
1950
66.1k
    WC_MASK_FIELD(wc, recirc_id);
1951
66.1k
    WC_MASK_FIELD(wc, dp_hash);
1952
66.1k
    WC_MASK_FIELD(wc, in_port);
1953
1954
    /* actset_output wildcarded. */
1955
1956
66.1k
    WC_MASK_FIELD(wc, packet_type);
1957
66.1k
    if (flow->packet_type == htonl(PT_ETH)) {
1958
58.4k
        WC_MASK_FIELD(wc, dl_dst);
1959
58.4k
        WC_MASK_FIELD(wc, dl_src);
1960
58.4k
        WC_MASK_FIELD(wc, dl_type);
1961
        /* No need to set mask of inner VLANs that don't exist. */
1962
59.5k
        for (int i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
1963
            /* Always show the first zero VLAN. */
1964
59.1k
            WC_MASK_FIELD(wc, vlans[i]);
1965
59.1k
            if (flow->vlans[i].tci == htons(0)) {
1966
58.0k
                break;
1967
58.0k
            }
1968
59.1k
        }
1969
58.4k
        dl_type = flow->dl_type;
1970
58.4k
    } else {
1971
7.74k
        dl_type = pt_ns_type_be(flow->packet_type);
1972
7.74k
    }
1973
1974
66.1k
    if (dl_type == htons(ETH_TYPE_IP)) {
1975
8.02k
        WC_MASK_FIELD(wc, nw_src);
1976
8.02k
        WC_MASK_FIELD(wc, nw_dst);
1977
8.02k
        WC_MASK_FIELD(wc, ct_nw_src);
1978
8.02k
        WC_MASK_FIELD(wc, ct_nw_dst);
1979
58.1k
    } else if (dl_type == htons(ETH_TYPE_IPV6)) {
1980
14.2k
        WC_MASK_FIELD(wc, ipv6_src);
1981
14.2k
        WC_MASK_FIELD(wc, ipv6_dst);
1982
14.2k
        WC_MASK_FIELD(wc, ipv6_label);
1983
14.2k
        if (is_nd(flow, wc)) {
1984
705
            WC_MASK_FIELD(wc, arp_sha);
1985
705
            WC_MASK_FIELD(wc, arp_tha);
1986
705
            WC_MASK_FIELD(wc, nd_target);
1987
13.5k
        } else {
1988
13.5k
            WC_MASK_FIELD(wc, ct_ipv6_src);
1989
13.5k
            WC_MASK_FIELD(wc, ct_ipv6_dst);
1990
13.5k
        }
1991
43.9k
    } else if (dl_type == htons(ETH_TYPE_ARP) ||
1992
43.9k
               dl_type == htons(ETH_TYPE_RARP)) {
1993
469
        WC_MASK_FIELD(wc, nw_src);
1994
469
        WC_MASK_FIELD(wc, nw_dst);
1995
469
        WC_MASK_FIELD(wc, nw_proto);
1996
469
        WC_MASK_FIELD(wc, arp_sha);
1997
469
        WC_MASK_FIELD(wc, arp_tha);
1998
469
        return;
1999
43.4k
    } else if (eth_type_mpls(dl_type)) {
2000
1.64k
        for (int i = 0; i < FLOW_MAX_MPLS_LABELS; i++) {
2001
1.26k
            WC_MASK_FIELD(wc, mpls_lse[i]);
2002
1.26k
            if (flow->mpls_lse[i] & htonl(MPLS_BOS_MASK)) {
2003
111
                break;
2004
111
            }
2005
1.26k
        }
2006
490
        return;
2007
42.9k
    } else if (flow->dl_type == htons(ETH_TYPE_NSH)) {
2008
6.77k
        WC_MASK_FIELD(wc, nsh.flags);
2009
6.77k
        WC_MASK_FIELD(wc, nsh.ttl);
2010
6.77k
        WC_MASK_FIELD(wc, nsh.mdtype);
2011
6.77k
        WC_MASK_FIELD(wc, nsh.np);
2012
6.77k
        WC_MASK_FIELD(wc, nsh.path_hdr);
2013
6.77k
        WC_MASK_FIELD(wc, nsh.context);
2014
36.1k
    } else {
2015
36.1k
        return; /* Unknown ethertype. */
2016
36.1k
    }
2017
2018
    /* IPv4 or IPv6. */
2019
29.0k
    WC_MASK_FIELD_MASK(wc, nw_frag, FLOW_NW_FRAG_MASK);
2020
29.0k
    WC_MASK_FIELD(wc, nw_tos);
2021
29.0k
    WC_MASK_FIELD(wc, nw_ttl);
2022
29.0k
    WC_MASK_FIELD(wc, nw_proto);
2023
29.0k
    WC_MASK_FIELD(wc, ct_nw_proto);
2024
29.0k
    WC_MASK_FIELD(wc, ct_tp_src);
2025
29.0k
    WC_MASK_FIELD(wc, ct_tp_dst);
2026
2027
    /* No transport layer header in later fragments. */
2028
29.0k
    if (!(flow->nw_frag & FLOW_NW_FRAG_LATER) &&
2029
29.0k
        (flow->nw_proto == IPPROTO_ICMP ||
2030
28.5k
         flow->nw_proto == IPPROTO_ICMPV6 ||
2031
28.5k
         flow->nw_proto == IPPROTO_TCP ||
2032
28.5k
         flow->nw_proto == IPPROTO_UDP ||
2033
28.5k
         flow->nw_proto == IPPROTO_SCTP ||
2034
28.5k
         flow->nw_proto == IPPROTO_IGMP)) {
2035
18.7k
        WC_MASK_FIELD(wc, tp_src);
2036
18.7k
        WC_MASK_FIELD(wc, tp_dst);
2037
2038
18.7k
        if (flow->nw_proto == IPPROTO_TCP) {
2039
12.7k
            WC_MASK_FIELD(wc, tcp_flags);
2040
12.7k
        } else if (flow->nw_proto == IPPROTO_IGMP) {
2041
1.64k
            WC_MASK_FIELD(wc, igmp_group_ip4);
2042
1.64k
        }
2043
18.7k
    }
2044
29.0k
}
2045
2046
/* Return a map of possible fields for a packet of the same type as 'flow'.
2047
 * Including extra bits in the returned mask is not wrong, it is just less
2048
 * optimal.
2049
 *
2050
 * This is a less precise version of flow_wildcards_init_for_packet() above. */
2051
void
2052
flow_wc_map(const struct flow *flow, struct flowmap *map)
2053
0
{
2054
    /* Update this function whenever struct flow changes. */
2055
0
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
2056
2057
0
    flowmap_init(map);
2058
2059
0
    if (flow_tnl_dst_is_set(&flow->tunnel)) {
2060
0
        FLOWMAP_SET__(map, tunnel, offsetof(struct flow_tnl, metadata));
2061
0
        if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) {
2062
0
            if (flow->tunnel.metadata.present.map) {
2063
0
                FLOWMAP_SET(map, tunnel.metadata);
2064
0
            }
2065
0
        } else {
2066
0
            FLOWMAP_SET(map, tunnel.metadata.present.len);
2067
0
            FLOWMAP_SET__(map, tunnel.metadata.opts.gnv,
2068
0
                          flow->tunnel.metadata.present.len);
2069
0
        }
2070
0
    }
2071
2072
    /* Metadata fields that can appear on packet input. */
2073
0
    FLOWMAP_SET(map, skb_priority);
2074
0
    FLOWMAP_SET(map, pkt_mark);
2075
0
    FLOWMAP_SET(map, recirc_id);
2076
0
    FLOWMAP_SET(map, dp_hash);
2077
0
    FLOWMAP_SET(map, in_port);
2078
0
    FLOWMAP_SET(map, dl_dst);
2079
0
    FLOWMAP_SET(map, dl_src);
2080
0
    FLOWMAP_SET(map, dl_type);
2081
0
    FLOWMAP_SET(map, vlans);
2082
0
    FLOWMAP_SET(map, ct_state);
2083
0
    FLOWMAP_SET(map, ct_zone);
2084
0
    FLOWMAP_SET(map, ct_mark);
2085
0
    FLOWMAP_SET(map, ct_label);
2086
0
    FLOWMAP_SET(map, packet_type);
2087
2088
    /* Ethertype-dependent fields. */
2089
0
    if (OVS_LIKELY(flow->dl_type == htons(ETH_TYPE_IP))) {
2090
0
        FLOWMAP_SET(map, nw_src);
2091
0
        FLOWMAP_SET(map, nw_dst);
2092
0
        FLOWMAP_SET(map, nw_proto);
2093
0
        FLOWMAP_SET(map, nw_frag);
2094
0
        FLOWMAP_SET(map, nw_tos);
2095
0
        FLOWMAP_SET(map, nw_ttl);
2096
0
        FLOWMAP_SET(map, tp_src);
2097
0
        FLOWMAP_SET(map, tp_dst);
2098
0
        FLOWMAP_SET(map, ct_nw_proto);
2099
0
        FLOWMAP_SET(map, ct_nw_src);
2100
0
        FLOWMAP_SET(map, ct_nw_dst);
2101
0
        FLOWMAP_SET(map, ct_tp_src);
2102
0
        FLOWMAP_SET(map, ct_tp_dst);
2103
2104
0
        if (OVS_UNLIKELY(flow->nw_proto == IPPROTO_IGMP)) {
2105
0
            FLOWMAP_SET(map, igmp_group_ip4);
2106
0
        } else {
2107
0
            FLOWMAP_SET(map, tcp_flags);
2108
0
        }
2109
0
    } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2110
0
        FLOWMAP_SET(map, ipv6_src);
2111
0
        FLOWMAP_SET(map, ipv6_dst);
2112
0
        FLOWMAP_SET(map, ipv6_label);
2113
0
        FLOWMAP_SET(map, nw_proto);
2114
0
        FLOWMAP_SET(map, nw_frag);
2115
0
        FLOWMAP_SET(map, nw_tos);
2116
0
        FLOWMAP_SET(map, nw_ttl);
2117
0
        FLOWMAP_SET(map, tp_src);
2118
0
        FLOWMAP_SET(map, tp_dst);
2119
2120
0
        if (OVS_UNLIKELY(is_nd(flow, NULL))) {
2121
0
            FLOWMAP_SET(map, nd_target);
2122
0
            FLOWMAP_SET(map, arp_sha);
2123
0
            FLOWMAP_SET(map, arp_tha);
2124
0
            FLOWMAP_SET(map, tcp_flags);
2125
0
            FLOWMAP_SET(map, igmp_group_ip4);
2126
0
        } else {
2127
0
            FLOWMAP_SET(map, ct_nw_proto);
2128
0
            FLOWMAP_SET(map, ct_ipv6_src);
2129
0
            FLOWMAP_SET(map, ct_ipv6_dst);
2130
0
            FLOWMAP_SET(map, ct_tp_src);
2131
0
            FLOWMAP_SET(map, ct_tp_dst);
2132
0
            FLOWMAP_SET(map, tcp_flags);
2133
0
        }
2134
0
    } else if (eth_type_mpls(flow->dl_type)) {
2135
0
        FLOWMAP_SET(map, mpls_lse);
2136
0
    } else if (flow->dl_type == htons(ETH_TYPE_ARP) ||
2137
0
               flow->dl_type == htons(ETH_TYPE_RARP)) {
2138
0
        FLOWMAP_SET(map, nw_src);
2139
0
        FLOWMAP_SET(map, nw_dst);
2140
0
        FLOWMAP_SET(map, nw_proto);
2141
0
        FLOWMAP_SET(map, arp_sha);
2142
0
        FLOWMAP_SET(map, arp_tha);
2143
0
    } else if (flow->dl_type == htons(ETH_TYPE_NSH)) {
2144
0
        FLOWMAP_SET(map, nsh.flags);
2145
0
        FLOWMAP_SET(map, nsh.mdtype);
2146
0
        FLOWMAP_SET(map, nsh.np);
2147
0
        FLOWMAP_SET(map, nsh.path_hdr);
2148
0
        FLOWMAP_SET(map, nsh.context);
2149
0
    }
2150
0
}
2151
2152
/* Clear the metadata and register wildcard masks. They are not packet
2153
 * header fields. */
2154
void
2155
flow_wildcards_clear_non_packet_fields(struct flow_wildcards *wc)
2156
0
{
2157
    /* Update this function whenever struct flow changes. */
2158
0
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
2159
2160
0
    memset(&wc->masks.metadata, 0, sizeof wc->masks.metadata);
2161
0
    memset(&wc->masks.regs, 0, sizeof wc->masks.regs);
2162
0
    wc->masks.actset_output = 0;
2163
0
    wc->masks.conj_id = 0;
2164
0
}
2165
2166
/* Returns true if 'wc' matches every packet, false if 'wc' fixes any bits or
2167
 * fields. */
2168
bool
2169
flow_wildcards_is_catchall(const struct flow_wildcards *wc)
2170
0
{
2171
0
    const uint64_t *wc_u64 = (const uint64_t *) &wc->masks;
2172
0
    size_t i;
2173
2174
0
    for (i = 0; i < FLOW_U64S; i++) {
2175
0
        if (wc_u64[i]) {
2176
0
            return false;
2177
0
        }
2178
0
    }
2179
0
    return true;
2180
0
}
2181
2182
/* Sets 'dst' as the bitwise AND of wildcards in 'src1' and 'src2'.
2183
 * That is, a bit or a field is wildcarded in 'dst' if it is wildcarded
2184
 * in 'src1' or 'src2' or both.  */
2185
void
2186
flow_wildcards_and(struct flow_wildcards *dst,
2187
                   const struct flow_wildcards *src1,
2188
                   const struct flow_wildcards *src2)
2189
0
{
2190
0
    uint64_t *dst_u64 = (uint64_t *) &dst->masks;
2191
0
    const uint64_t *src1_u64 = (const uint64_t *) &src1->masks;
2192
0
    const uint64_t *src2_u64 = (const uint64_t *) &src2->masks;
2193
0
    size_t i;
2194
2195
0
    for (i = 0; i < FLOW_U64S; i++) {
2196
0
        dst_u64[i] = src1_u64[i] & src2_u64[i];
2197
0
    }
2198
0
}
2199
2200
/* Sets 'dst' as the bitwise OR of wildcards in 'src1' and 'src2'.  That
2201
 * is, a bit or a field is wildcarded in 'dst' if it is neither
2202
 * wildcarded in 'src1' nor 'src2'. */
2203
void
2204
flow_wildcards_or(struct flow_wildcards *dst,
2205
                  const struct flow_wildcards *src1,
2206
                  const struct flow_wildcards *src2)
2207
0
{
2208
0
    uint64_t *dst_u64 = (uint64_t *) &dst->masks;
2209
0
    const uint64_t *src1_u64 = (const uint64_t *) &src1->masks;
2210
0
    const uint64_t *src2_u64 = (const uint64_t *) &src2->masks;
2211
0
    size_t i;
2212
2213
0
    for (i = 0; i < FLOW_U64S; i++) {
2214
0
        dst_u64[i] = src1_u64[i] | src2_u64[i];
2215
0
    }
2216
0
}
2217
2218
/* Returns a hash of the wildcards in 'wc'. */
2219
uint32_t
2220
flow_wildcards_hash(const struct flow_wildcards *wc, uint32_t basis)
2221
0
{
2222
0
    return flow_hash(&wc->masks, basis);
2223
0
}
2224
2225
/* Returns true if 'a' and 'b' represent the same wildcards, false if they are
2226
 * different. */
2227
bool
2228
flow_wildcards_equal(const struct flow_wildcards *a,
2229
                     const struct flow_wildcards *b)
2230
69.4k
{
2231
69.4k
    return flow_equal(&a->masks, &b->masks);
2232
69.4k
}
2233
2234
/* Returns true if at least one bit or field is wildcarded in 'a' but not in
2235
 * 'b', false otherwise. */
2236
bool
2237
flow_wildcards_has_extra(const struct flow_wildcards *a,
2238
                         const struct flow_wildcards *b)
2239
0
{
2240
0
    const uint64_t *a_u64 = (const uint64_t *) &a->masks;
2241
0
    const uint64_t *b_u64 = (const uint64_t *) &b->masks;
2242
0
    size_t i;
2243
2244
0
    for (i = 0; i < FLOW_U64S; i++) {
2245
0
        if ((a_u64[i] & b_u64[i]) != b_u64[i]) {
2246
0
            return true;
2247
0
        }
2248
0
    }
2249
0
    return false;
2250
0
}
2251
2252
/* Returns true if 'a' and 'b' are equal, except that 0-bits (wildcarded bits)
2253
 * in 'wc' do not need to be equal in 'a' and 'b'. */
2254
bool
2255
flow_equal_except(const struct flow *a, const struct flow *b,
2256
                  const struct flow_wildcards *wc)
2257
0
{
2258
0
    const uint64_t *a_u64 = (const uint64_t *) a;
2259
0
    const uint64_t *b_u64 = (const uint64_t *) b;
2260
0
    const uint64_t *wc_u64 = (const uint64_t *) &wc->masks;
2261
0
    size_t i;
2262
2263
0
    for (i = 0; i < FLOW_U64S; i++) {
2264
0
        if ((a_u64[i] ^ b_u64[i]) & wc_u64[i]) {
2265
0
            return false;
2266
0
        }
2267
0
    }
2268
0
    return true;
2269
0
}
2270
2271
/* Sets the wildcard mask for register 'idx' in 'wc' to 'mask'.
2272
 * (A 0-bit indicates a wildcard bit.) */
2273
void
2274
flow_wildcards_set_reg_mask(struct flow_wildcards *wc, int idx, uint32_t mask)
2275
10.2k
{
2276
10.2k
    wc->masks.regs[idx] = mask;
2277
10.2k
}
2278
2279
/* Sets the wildcard mask for register 'idx' in 'wc' to 'mask'.
2280
 * (A 0-bit indicates a wildcard bit.) */
2281
void
2282
flow_wildcards_set_xreg_mask(struct flow_wildcards *wc, int idx, uint64_t mask)
2283
5.04k
{
2284
5.04k
    flow_set_xreg(&wc->masks, idx, mask);
2285
5.04k
}
2286
2287
/* Sets the wildcard mask for register 'idx' in 'wc' to 'mask'.
2288
 * (A 0-bit indicates a wildcard bit.) */
2289
void
2290
flow_wildcards_set_xxreg_mask(struct flow_wildcards *wc, int idx,
2291
                              ovs_u128 mask)
2292
5.01k
{
2293
5.01k
    flow_set_xxreg(&wc->masks, idx, mask);
2294
5.01k
}
2295
2296
/* Calculates the 5-tuple hash from the given miniflow.
2297
 * This returns the same value as flow_hash_5tuple for the corresponding
2298
 * flow. */
2299
uint32_t
2300
miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis)
2301
0
{
2302
0
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
2303
0
    uint32_t hash = basis;
2304
2305
0
    if (flow) {
2306
0
        ovs_be16 dl_type = MINIFLOW_GET_BE16(flow, dl_type);
2307
0
        uint8_t nw_proto;
2308
2309
0
        if (dl_type == htons(ETH_TYPE_IPV6)) {
2310
0
            struct flowmap map = FLOWMAP_EMPTY_INITIALIZER;
2311
0
            uint64_t value;
2312
2313
0
            FLOWMAP_SET(&map, ipv6_src);
2314
0
            FLOWMAP_SET(&map, ipv6_dst);
2315
2316
0
            MINIFLOW_FOR_EACH_IN_FLOWMAP(value, flow, map) {
2317
0
                hash = hash_add64(hash, value);
2318
0
            }
2319
0
        } else if (dl_type == htons(ETH_TYPE_IP)
2320
0
                   || dl_type == htons(ETH_TYPE_ARP)) {
2321
0
            hash = hash_add(hash, MINIFLOW_GET_U32(flow, nw_src));
2322
0
            hash = hash_add(hash, MINIFLOW_GET_U32(flow, nw_dst));
2323
0
        } else {
2324
0
            goto out;
2325
0
        }
2326
2327
0
        nw_proto = MINIFLOW_GET_U8(flow, nw_proto);
2328
0
        hash = hash_add(hash, nw_proto);
2329
0
        if (nw_proto != IPPROTO_TCP && nw_proto != IPPROTO_UDP
2330
0
            && nw_proto != IPPROTO_SCTP && nw_proto != IPPROTO_ICMP
2331
0
            && nw_proto != IPPROTO_ICMPV6) {
2332
0
            goto out;
2333
0
        }
2334
2335
        /* Add both ports at once. */
2336
0
        hash = hash_add(hash, (OVS_FORCE uint32_t) miniflow_get_ports(flow));
2337
0
    }
2338
0
out:
2339
0
    return hash_finish(hash, 42);
2340
0
}
2341
2342
ASSERT_SEQUENTIAL_SAME_WORD(tp_src, tp_dst);
2343
ASSERT_SEQUENTIAL(ipv6_src, ipv6_dst);
2344
2345
/* Calculates the 5-tuple hash from the given flow. */
2346
uint32_t
2347
flow_hash_5tuple(const struct flow *flow, uint32_t basis)
2348
0
{
2349
0
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
2350
0
    uint32_t hash = basis;
2351
2352
0
    if (flow) {
2353
2354
0
        if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2355
0
            const uint64_t *flow_u64 = (const uint64_t *)flow;
2356
0
            int ofs = offsetof(struct flow, ipv6_src) / 8;
2357
0
            int end = ofs + 2 * sizeof flow->ipv6_src / 8;
2358
2359
0
            for (;ofs < end; ofs++) {
2360
0
                hash = hash_add64(hash, flow_u64[ofs]);
2361
0
            }
2362
0
        } else if (flow->dl_type == htons(ETH_TYPE_IP)
2363
0
                   || flow->dl_type == htons(ETH_TYPE_ARP)) {
2364
0
            hash = hash_add(hash, (OVS_FORCE uint32_t) flow->nw_src);
2365
0
            hash = hash_add(hash, (OVS_FORCE uint32_t) flow->nw_dst);
2366
0
        } else {
2367
0
            goto out;
2368
0
        }
2369
2370
0
        hash = hash_add(hash, flow->nw_proto);
2371
0
        if (flow->nw_proto != IPPROTO_TCP && flow->nw_proto != IPPROTO_UDP
2372
0
            && flow->nw_proto != IPPROTO_SCTP && flow->nw_proto != IPPROTO_ICMP
2373
0
            && flow->nw_proto != IPPROTO_ICMPV6) {
2374
0
            goto out;
2375
0
        }
2376
2377
        /* Add both ports at once. */
2378
0
        hash = hash_add(hash,
2379
0
                        ((const uint32_t *)flow)[offsetof(struct flow, tp_src)
2380
0
                                                 / sizeof(uint32_t)]);
2381
0
    }
2382
0
out:
2383
0
    return hash_finish(hash, 42); /* Arbitrary number. */
2384
0
}
2385
2386
/* Hashes 'flow' based on its L2 through L4 protocol information. */
2387
uint32_t
2388
flow_hash_symmetric_l4(const struct flow *flow, uint32_t basis)
2389
0
{
2390
0
    struct {
2391
0
        union {
2392
0
            ovs_be32 ipv4_addr;
2393
0
            struct in6_addr ipv6_addr;
2394
0
        };
2395
0
        ovs_be16 eth_type;
2396
0
        ovs_be16 vlan_tci;
2397
0
        ovs_be16 tp_port;
2398
0
        struct eth_addr eth_addr;
2399
0
        uint8_t ip_proto;
2400
0
    } fields;
2401
2402
0
    int i;
2403
2404
0
    memset(&fields, 0, sizeof fields);
2405
0
    for (i = 0; i < ARRAY_SIZE(fields.eth_addr.be16); i++) {
2406
0
        fields.eth_addr.be16[i] = flow->dl_src.be16[i] ^ flow->dl_dst.be16[i];
2407
0
    }
2408
0
    for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
2409
0
        fields.vlan_tci ^= flow->vlans[i].tci & htons(VLAN_VID_MASK);
2410
0
    }
2411
0
    fields.eth_type = flow->dl_type;
2412
2413
    /* UDP source and destination port are not taken into account because they
2414
     * will not necessarily be symmetric in a bidirectional flow. */
2415
0
    if (fields.eth_type == htons(ETH_TYPE_IP)) {
2416
0
        fields.ipv4_addr = flow->nw_src ^ flow->nw_dst;
2417
0
        fields.ip_proto = flow->nw_proto;
2418
0
        if (fields.ip_proto == IPPROTO_TCP || fields.ip_proto == IPPROTO_SCTP) {
2419
0
            fields.tp_port = flow->tp_src ^ flow->tp_dst;
2420
0
        }
2421
0
    } else if (fields.eth_type == htons(ETH_TYPE_IPV6)) {
2422
0
        const uint8_t *a = &flow->ipv6_src.s6_addr[0];
2423
0
        const uint8_t *b = &flow->ipv6_dst.s6_addr[0];
2424
0
        uint8_t *ipv6_addr = &fields.ipv6_addr.s6_addr[0];
2425
2426
0
        for (i=0; i<16; i++) {
2427
0
            ipv6_addr[i] = a[i] ^ b[i];
2428
0
        }
2429
0
        fields.ip_proto = flow->nw_proto;
2430
0
        if (fields.ip_proto == IPPROTO_TCP || fields.ip_proto == IPPROTO_SCTP) {
2431
0
            fields.tp_port = flow->tp_src ^ flow->tp_dst;
2432
0
        }
2433
0
    }
2434
0
    return jhash_bytes(&fields, sizeof fields, basis);
2435
0
}
2436
2437
/* Symmetrically Hashes non-IP 'flow' based on its L2 headers. */
2438
uint32_t
2439
flow_hash_symmetric_l2(const struct flow *flow, uint32_t basis)
2440
0
{
2441
0
    union {
2442
0
        struct {
2443
0
            ovs_be16 eth_type;
2444
0
            ovs_be16 vlan_tci;
2445
0
            struct eth_addr eth_addr;
2446
0
            ovs_be16 pad;
2447
0
        };
2448
0
        uint32_t word[3];
2449
0
    } fields;
2450
2451
0
    uint32_t hash = basis;
2452
0
    int i;
2453
2454
0
    if (flow->packet_type != htonl(PT_ETH)) {
2455
        /* Cannot hash non-Ethernet flows */
2456
0
        return 0;
2457
0
    }
2458
2459
0
    for (i = 0; i < ARRAY_SIZE(fields.eth_addr.be16); i++) {
2460
0
        fields.eth_addr.be16[i] =
2461
0
                flow->dl_src.be16[i] ^ flow->dl_dst.be16[i];
2462
0
    }
2463
0
    fields.vlan_tci = 0;
2464
0
    for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
2465
0
        fields.vlan_tci ^= flow->vlans[i].tci & htons(VLAN_VID_MASK);
2466
0
    }
2467
0
    fields.eth_type = flow->dl_type;
2468
0
    fields.pad = 0;
2469
2470
0
    hash = hash_add(hash, fields.word[0]);
2471
0
    hash = hash_add(hash, fields.word[1]);
2472
0
    hash = hash_add(hash, fields.word[2]);
2473
0
    return hash_finish(hash, basis);
2474
0
}
2475
2476
/* Hashes 'flow' based on its L3 through L4 protocol information */
2477
uint32_t
2478
flow_hash_symmetric_l3l4(const struct flow *flow, uint32_t basis,
2479
                         bool inc_udp_ports)
2480
0
{
2481
0
    uint32_t hash = basis;
2482
2483
    /* UDP source and destination port are also taken into account. */
2484
0
    if (flow->dl_type == htons(ETH_TYPE_IP)) {
2485
0
        hash = hash_add(hash,
2486
0
                        (OVS_FORCE uint32_t) (flow->nw_src ^ flow->nw_dst));
2487
0
    } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2488
        /* IPv6 addresses are 64-bit aligned inside struct flow. */
2489
0
        const uint64_t *a = ALIGNED_CAST(uint64_t *, flow->ipv6_src.s6_addr);
2490
0
        const uint64_t *b = ALIGNED_CAST(uint64_t *, flow->ipv6_dst.s6_addr);
2491
2492
0
        for (int i = 0; i < sizeof flow->ipv6_src / sizeof *a; i++) {
2493
0
            hash = hash_add64(hash, a[i] ^ b[i]);
2494
0
        }
2495
0
    } else {
2496
        /* Revert to hashing L2 headers */
2497
0
        return flow_hash_symmetric_l2(flow, basis);
2498
0
    }
2499
0
    hash = hash_add(hash, flow->nw_proto);
2500
0
    if (!(flow->nw_frag & FLOW_NW_FRAG_MASK)
2501
0
        && (flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_SCTP ||
2502
0
            (inc_udp_ports && flow->nw_proto == IPPROTO_UDP))) {
2503
0
        hash = hash_add(hash,
2504
0
                        (OVS_FORCE uint16_t) (flow->tp_src ^ flow->tp_dst));
2505
0
    }
2506
2507
0
    return hash_finish(hash, basis);
2508
0
}
2509
2510
/* Hashes 'flow' based on its nw_dst and nw_src for multipath. */
2511
uint32_t
2512
flow_hash_symmetric_l3(const struct flow *flow, uint32_t basis)
2513
0
{
2514
0
    struct {
2515
0
        union {
2516
0
            ovs_be32 ipv4_addr;
2517
0
            struct in6_addr ipv6_addr;
2518
0
        };
2519
0
        ovs_be16 eth_type;
2520
0
    } fields;
2521
2522
0
    int i;
2523
2524
0
    memset(&fields, 0, sizeof fields);
2525
0
    fields.eth_type = flow->dl_type;
2526
2527
0
    if (fields.eth_type == htons(ETH_TYPE_IP)) {
2528
0
        fields.ipv4_addr = flow->nw_src ^ flow->nw_dst;
2529
0
    } else if (fields.eth_type == htons(ETH_TYPE_IPV6)) {
2530
0
        const uint8_t *a = &flow->ipv6_src.s6_addr[0];
2531
0
        const uint8_t *b = &flow->ipv6_dst.s6_addr[0];
2532
0
        uint8_t *ipv6_addr = &fields.ipv6_addr.s6_addr[0];
2533
2534
0
        for (i = 0; i < 16; i++) {
2535
0
            ipv6_addr[i] = a[i] ^ b[i];
2536
0
        }
2537
0
    }
2538
0
    return jhash_bytes(&fields, sizeof fields, basis);
2539
0
}
2540
2541
/* Initialize a flow with random fields that matter for nx_hash_fields. */
2542
void
2543
flow_random_hash_fields(struct flow *flow)
2544
0
{
2545
0
    uint16_t rnd = random_uint16();
2546
0
    int i;
2547
2548
    /* Initialize to all zeros. */
2549
0
    memset(flow, 0, sizeof *flow);
2550
2551
0
    eth_addr_random(&flow->dl_src);
2552
0
    eth_addr_random(&flow->dl_dst);
2553
2554
0
    for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
2555
0
        uint16_t vlan = random_uint16() & VLAN_VID_MASK;
2556
0
        flow->vlans[i].tpid = htons(ETH_TYPE_VLAN_8021Q);
2557
0
        flow->vlans[i].tci = htons(vlan | VLAN_CFI);
2558
0
    }
2559
2560
    /* Make most of the random flows IPv4, some IPv6, and rest random. */
2561
0
    flow->dl_type = rnd < 0x8000 ? htons(ETH_TYPE_IP) :
2562
0
        rnd < 0xc000 ? htons(ETH_TYPE_IPV6) : (OVS_FORCE ovs_be16)rnd;
2563
2564
0
    if (dl_type_is_ip_any(flow->dl_type)) {
2565
0
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
2566
0
            flow->nw_src = (OVS_FORCE ovs_be32)random_uint32();
2567
0
            flow->nw_dst = (OVS_FORCE ovs_be32)random_uint32();
2568
0
        } else {
2569
0
            random_bytes(&flow->ipv6_src, sizeof flow->ipv6_src);
2570
0
            random_bytes(&flow->ipv6_dst, sizeof flow->ipv6_dst);
2571
0
        }
2572
        /* Make most of IP flows TCP, some UDP or SCTP, and rest random. */
2573
0
        rnd = random_uint16();
2574
0
        flow->nw_proto = rnd < 0x8000 ? IPPROTO_TCP :
2575
0
            rnd < 0xc000 ? IPPROTO_UDP :
2576
0
            rnd < 0xd000 ? IPPROTO_SCTP : (uint8_t)rnd;
2577
0
        if (flow->nw_proto == IPPROTO_TCP ||
2578
0
            flow->nw_proto == IPPROTO_UDP ||
2579
0
            flow->nw_proto == IPPROTO_SCTP) {
2580
0
            flow->tp_src = (OVS_FORCE ovs_be16)random_uint16();
2581
0
            flow->tp_dst = (OVS_FORCE ovs_be16)random_uint16();
2582
0
        }
2583
0
    }
2584
0
}
2585
2586
/* Masks the fields in 'wc' that are used by the flow hash 'fields'. */
2587
void
2588
flow_mask_hash_fields(const struct flow *flow, struct flow_wildcards *wc,
2589
                      enum nx_hash_fields fields)
2590
0
{
2591
0
    int i;
2592
0
    switch (fields) {
2593
0
    case NX_HASH_FIELDS_ETH_SRC:
2594
0
        memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
2595
0
        break;
2596
2597
0
    case NX_HASH_FIELDS_SYMMETRIC_L4:
2598
0
        memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
2599
0
        memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
2600
0
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
2601
0
            memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
2602
0
            memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
2603
0
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2604
0
            memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src);
2605
0
            memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
2606
0
        }
2607
0
        if (is_ip_any(flow)) {
2608
0
            memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
2609
            /* Unwildcard port only for non-UDP packets as udp port
2610
             * numbers are not used in hash calculations.
2611
             */
2612
0
            if (flow->nw_proto != IPPROTO_UDP) {
2613
0
                flow_unwildcard_tp_ports(flow, wc);
2614
0
            }
2615
0
        }
2616
0
        for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
2617
0
            wc->masks.vlans[i].tci |= htons(VLAN_VID_MASK | VLAN_CFI);
2618
0
        }
2619
0
        break;
2620
0
    case NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP:
2621
0
        if (is_ip_any(flow) && flow->nw_proto == IPPROTO_UDP
2622
0
            && !(flow->nw_frag & FLOW_NW_FRAG_MASK)) {
2623
0
            memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
2624
0
            memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
2625
0
        }
2626
        /* fall through */
2627
0
    case NX_HASH_FIELDS_SYMMETRIC_L3L4:
2628
0
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
2629
0
            memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
2630
0
            memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
2631
0
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2632
0
            memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src);
2633
0
            memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
2634
0
        } else {
2635
0
            break; /* non-IP flow */
2636
0
        }
2637
0
        memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
2638
0
        if ((flow->nw_proto == IPPROTO_TCP || flow->nw_proto == IPPROTO_SCTP)
2639
0
             && !(flow->nw_frag & FLOW_NW_FRAG_MASK)) {
2640
0
            memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
2641
0
            memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
2642
0
        }
2643
0
        break;
2644
2645
0
    case NX_HASH_FIELDS_NW_SRC:
2646
0
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
2647
0
            memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
2648
0
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2649
0
            memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src);
2650
0
        }
2651
0
        break;
2652
2653
0
    case NX_HASH_FIELDS_NW_DST:
2654
0
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
2655
0
            memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
2656
0
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2657
0
            memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
2658
0
        }
2659
0
        break;
2660
2661
0
    case NX_HASH_FIELDS_SYMMETRIC_L3:
2662
0
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
2663
0
            memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
2664
0
            memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
2665
0
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2666
0
            memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src);
2667
0
            memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
2668
0
        }
2669
0
        break;
2670
2671
0
    default:
2672
0
        OVS_NOT_REACHED();
2673
0
    }
2674
0
}
2675
2676
/* Hashes the portions of 'flow' designated by 'fields'. */
2677
uint32_t
2678
flow_hash_fields(const struct flow *flow, enum nx_hash_fields fields,
2679
                 uint16_t basis)
2680
0
{
2681
0
    switch (fields) {
2682
2683
0
    case NX_HASH_FIELDS_ETH_SRC:
2684
0
        return jhash_bytes(&flow->dl_src, sizeof flow->dl_src, basis);
2685
2686
0
    case NX_HASH_FIELDS_SYMMETRIC_L4:
2687
0
        return flow_hash_symmetric_l4(flow, basis);
2688
2689
0
    case NX_HASH_FIELDS_SYMMETRIC_L3L4:
2690
0
        return flow_hash_symmetric_l3l4(flow, basis, false);
2691
2692
0
    case NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP:
2693
0
        return flow_hash_symmetric_l3l4(flow, basis, true);
2694
2695
0
    case NX_HASH_FIELDS_NW_SRC:
2696
0
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
2697
0
            return jhash_bytes(&flow->nw_src, sizeof flow->nw_src, basis);
2698
0
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2699
0
            return jhash_bytes(&flow->ipv6_src, sizeof flow->ipv6_src, basis);
2700
0
        } else {
2701
0
            return basis;
2702
0
        }
2703
2704
0
    case NX_HASH_FIELDS_NW_DST:
2705
0
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
2706
0
            return jhash_bytes(&flow->nw_dst, sizeof flow->nw_dst, basis);
2707
0
        } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2708
0
            return jhash_bytes(&flow->ipv6_dst, sizeof flow->ipv6_dst, basis);
2709
0
        } else {
2710
0
            return basis;
2711
0
        }
2712
2713
0
    case NX_HASH_FIELDS_SYMMETRIC_L3:
2714
0
        return flow_hash_symmetric_l3(flow, basis);
2715
0
    }
2716
2717
0
    OVS_NOT_REACHED();
2718
0
}
2719
2720
/* Returns a string representation of 'fields'. */
2721
const char *
2722
flow_hash_fields_to_str(enum nx_hash_fields fields)
2723
2.58k
{
2724
2.58k
    switch (fields) {
2725
985
    case NX_HASH_FIELDS_ETH_SRC: return "eth_src";
2726
230
    case NX_HASH_FIELDS_SYMMETRIC_L4: return "symmetric_l4";
2727
3
    case NX_HASH_FIELDS_SYMMETRIC_L3L4: return "symmetric_l3l4";
2728
15
    case NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP: return "symmetric_l3l4+udp";
2729
37
    case NX_HASH_FIELDS_NW_SRC: return "nw_src";
2730
780
    case NX_HASH_FIELDS_NW_DST: return "nw_dst";
2731
2
    case NX_HASH_FIELDS_SYMMETRIC_L3: return "symmetric_l3";
2732
531
    default: return "<unknown>";
2733
2.58k
    }
2734
2.58k
}
2735
2736
/* Returns true if the value of 'fields' is supported. Otherwise false. */
2737
bool
2738
flow_hash_fields_valid(enum nx_hash_fields fields)
2739
5.96k
{
2740
5.96k
    return fields == NX_HASH_FIELDS_ETH_SRC
2741
5.96k
        || fields == NX_HASH_FIELDS_SYMMETRIC_L4
2742
5.96k
        || fields == NX_HASH_FIELDS_SYMMETRIC_L3L4
2743
5.96k
        || fields == NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP
2744
5.96k
        || fields == NX_HASH_FIELDS_NW_SRC
2745
5.96k
        || fields == NX_HASH_FIELDS_NW_DST
2746
5.96k
        || fields == NX_HASH_FIELDS_SYMMETRIC_L3;
2747
5.96k
}
2748
2749
/* Returns a hash value for the bits of 'flow' that are active based on
2750
 * 'wc', given 'basis'. */
2751
uint32_t
2752
flow_hash_in_wildcards(const struct flow *flow,
2753
                       const struct flow_wildcards *wc, uint32_t basis)
2754
0
{
2755
0
    const uint64_t *wc_u64 = (const uint64_t *) &wc->masks;
2756
0
    const uint64_t *flow_u64 = (const uint64_t *) flow;
2757
0
    uint32_t hash;
2758
0
    size_t i;
2759
2760
0
    hash = basis;
2761
0
    for (i = 0; i < FLOW_U64S; i++) {
2762
0
        hash = hash_add64(hash, flow_u64[i] & wc_u64[i]);
2763
0
    }
2764
0
    return hash_finish(hash, 8 * FLOW_U64S);
2765
0
}
2766
2767
/* Sets the VLAN VID that 'flow' matches to 'vid', which is interpreted as an
2768
 * OpenFlow 1.0 "dl_vlan" value:
2769
 *
2770
 *      - If it is in the range 0...4095, 'flow->vlans[0].tci' is set to match
2771
 *        that VLAN.  Any existing PCP match is unchanged (it becomes 0 if
2772
 *        'flow' previously matched packets without a VLAN header).
2773
 *
2774
 *      - If it is OFP_VLAN_NONE, 'flow->vlan_tci' is set to match a packet
2775
 *        without a VLAN tag.
2776
 *
2777
 *      - Other values of 'vid' should not be used. */
2778
void
2779
flow_set_dl_vlan(struct flow *flow, ovs_be16 vid, int id)
2780
0
{
2781
0
    if (vid == htons(OFP10_VLAN_NONE)) {
2782
0
        flow->vlans[id].tci = htons(0);
2783
0
    } else {
2784
0
        vid &= htons(VLAN_VID_MASK);
2785
0
        flow->vlans[id].tci &= ~htons(VLAN_VID_MASK);
2786
0
        flow->vlans[id].tci |= htons(VLAN_CFI) | vid;
2787
0
    }
2788
0
}
2789
2790
/* Sets the VLAN header TPID, which must be either ETH_TYPE_VLAN_8021Q or
2791
 * ETH_TYPE_VLAN_8021AD. */
2792
void
2793
flow_fix_vlan_tpid(struct flow *flow)
2794
0
{
2795
0
    if (flow->vlans[0].tpid == htons(0) && flow->vlans[0].tci != 0) {
2796
0
        flow->vlans[0].tpid = htons(ETH_TYPE_VLAN_8021Q);
2797
0
    }
2798
0
}
2799
2800
/* Sets the VLAN VID that 'flow' matches to 'vid', which is interpreted as an
2801
 * OpenFlow 1.2 "vlan_vid" value, that is, the low 13 bits of 'vlan_tci' (VID
2802
 * plus CFI). */
2803
void
2804
flow_set_vlan_vid(struct flow *flow, ovs_be16 vid)
2805
195
{
2806
195
    ovs_be16 mask = htons(VLAN_VID_MASK | VLAN_CFI);
2807
195
    flow->vlans[0].tci &= ~mask;
2808
195
    flow->vlans[0].tci |= vid & mask;
2809
195
}
2810
2811
/* Sets the VLAN PCP that 'flow' matches to 'pcp', which should be in the
2812
 * range 0...7.
2813
 *
2814
 * This function has no effect on the VLAN ID that 'flow' matches.
2815
 *
2816
 * After calling this function, 'flow' will not match packets without a VLAN
2817
 * header. */
2818
void
2819
flow_set_vlan_pcp(struct flow *flow, uint8_t pcp, int id)
2820
18
{
2821
18
    pcp &= 0x07;
2822
18
    flow->vlans[id].tci &= ~htons(VLAN_PCP_MASK);
2823
18
    flow->vlans[id].tci |= htons((pcp << VLAN_PCP_SHIFT) | VLAN_CFI);
2824
18
}
2825
2826
/* Counts the number of VLAN headers. */
2827
int
2828
flow_count_vlan_headers(const struct flow *flow)
2829
21.1k
{
2830
21.1k
    int i;
2831
2832
31.7k
    for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
2833
31.3k
        if (!(flow->vlans[i].tci & htons(VLAN_CFI))) {
2834
20.7k
            break;
2835
20.7k
        }
2836
31.3k
    }
2837
21.1k
    return i;
2838
21.1k
}
2839
2840
/* Given '*p_an' and '*p_bn' pointing to one past the last VLAN header of
2841
 * 'a' and 'b' respectively, skip common VLANs so that they point to the
2842
 * first different VLAN counting from bottom. */
2843
void
2844
flow_skip_common_vlan_headers(const struct flow *a, int *p_an,
2845
                              const struct flow *b, int *p_bn)
2846
0
{
2847
0
    int an = *p_an, bn = *p_bn;
2848
2849
0
    for (an--, bn--; an >= 0 && bn >= 0; an--, bn--) {
2850
0
        if (a->vlans[an].qtag != b->vlans[bn].qtag) {
2851
0
            break;
2852
0
        }
2853
0
    }
2854
0
    *p_an = an;
2855
0
    *p_bn = bn;
2856
0
}
2857
2858
void
2859
flow_pop_vlan(struct flow *flow, struct flow_wildcards *wc)
2860
21.1k
{
2861
21.1k
    int n = flow_count_vlan_headers(flow);
2862
21.1k
    if (n > 1) {
2863
376
        if (wc) {
2864
0
            memset(&wc->masks.vlans[1], 0xff,
2865
0
                   sizeof(union flow_vlan_hdr) * (n - 1));
2866
0
        }
2867
376
        memmove(&flow->vlans[0], &flow->vlans[1],
2868
376
                sizeof(union flow_vlan_hdr) * (n - 1));
2869
376
    }
2870
21.1k
    if (n > 0) {
2871
10.2k
        memset(&flow->vlans[n - 1], 0, sizeof(union flow_vlan_hdr));
2872
10.2k
    }
2873
21.1k
}
2874
2875
void
2876
flow_push_vlan_uninit(struct flow *flow, struct flow_wildcards *wc)
2877
2.40k
{
2878
2.40k
    if (wc) {
2879
0
        int n = flow_count_vlan_headers(flow);
2880
0
        if (n) {
2881
0
            memset(wc->masks.vlans, 0xff, sizeof(union flow_vlan_hdr) * n);
2882
0
        }
2883
0
    }
2884
2.40k
    memmove(&flow->vlans[1], &flow->vlans[0],
2885
2.40k
            sizeof(union flow_vlan_hdr) * (FLOW_MAX_VLAN_HEADERS - 1));
2886
2.40k
    memset(&flow->vlans[0], 0, sizeof(union flow_vlan_hdr));
2887
2.40k
}
2888
2889
/* Returns the number of MPLS LSEs present in 'flow'
2890
 *
2891
 * Returns 0 if the 'dl_type' of 'flow' is not an MPLS ethernet type.
2892
 * Otherwise traverses 'flow''s MPLS label stack stopping at the
2893
 * first entry that has the BoS bit set. If no such entry exists then
2894
 * the maximum number of LSEs that can be stored in 'flow' is returned.
2895
 */
2896
int
2897
flow_count_mpls_labels(const struct flow *flow, struct flow_wildcards *wc)
2898
0
{
2899
    /* dl_type is always masked. */
2900
0
    if (eth_type_mpls(flow->dl_type)) {
2901
0
        int i;
2902
0
        int cnt;
2903
2904
0
        cnt = 0;
2905
0
        for (i = 0; i < FLOW_MAX_MPLS_LABELS; i++) {
2906
0
            if (wc) {
2907
0
                wc->masks.mpls_lse[i] |= htonl(MPLS_BOS_MASK);
2908
0
            }
2909
0
            if (flow->mpls_lse[i] & htonl(MPLS_BOS_MASK)) {
2910
0
                return i + 1;
2911
0
            }
2912
0
            if (flow->mpls_lse[i]) {
2913
0
                cnt++;
2914
0
            }
2915
0
        }
2916
0
        return cnt;
2917
0
    } else {
2918
0
        return 0;
2919
0
    }
2920
0
}
2921
2922
/* Returns the number consecutive of MPLS LSEs, starting at the
2923
 * innermost LSE, that are common in 'a' and 'b'.
2924
 *
2925
 * 'an' must be flow_count_mpls_labels(a).
2926
 * 'bn' must be flow_count_mpls_labels(b).
2927
 */
2928
int
2929
flow_count_common_mpls_labels(const struct flow *a, int an,
2930
                              const struct flow *b, int bn,
2931
                              struct flow_wildcards *wc)
2932
0
{
2933
0
    int min_n = MIN(an, bn);
2934
0
    if (min_n == 0) {
2935
0
        return 0;
2936
0
    } else {
2937
0
        int common_n = 0;
2938
0
        int a_last = an - 1;
2939
0
        int b_last = bn - 1;
2940
0
        int i;
2941
2942
0
        for (i = 0; i < min_n; i++) {
2943
0
            if (wc) {
2944
0
                wc->masks.mpls_lse[a_last - i] = OVS_BE32_MAX;
2945
0
                wc->masks.mpls_lse[b_last - i] = OVS_BE32_MAX;
2946
0
            }
2947
0
            if (a->mpls_lse[a_last - i] != b->mpls_lse[b_last - i]) {
2948
0
                break;
2949
0
            } else {
2950
0
                common_n++;
2951
0
            }
2952
0
        }
2953
2954
0
        return common_n;
2955
0
    }
2956
0
}
2957
2958
/* Adds a new outermost MPLS label to 'flow' and changes 'flow''s Ethernet type
2959
 * to 'mpls_eth_type', which must be an MPLS Ethertype.
2960
 *
2961
 * If the new label is the first MPLS label in 'flow', it is generated as;
2962
 *
2963
 *     - label: 2, if 'flow' is IPv6, otherwise 0.
2964
 *
2965
 *     - TTL: IPv4 or IPv6 TTL, if present and nonzero, otherwise 64.
2966
 *
2967
 *     - TC: IPv4 or IPv6 TOS, if present, otherwise 0.
2968
 *
2969
 *     - BoS: 1.
2970
 *
2971
 * If the new label is the second or later label MPLS label in 'flow', it is
2972
 * generated as;
2973
 *
2974
 *     - label: Copied from outer label.
2975
 *
2976
 *     - TTL: Copied from outer label.
2977
 *
2978
 *     - TC: Copied from outer label.
2979
 *
2980
 *     - BoS: 0.
2981
 *
2982
 * 'n' must be flow_count_mpls_labels(flow).  'n' must be less than
2983
 * FLOW_MAX_MPLS_LABELS (because otherwise flow->mpls_lse[] would overflow).
2984
 */
2985
void
2986
flow_push_mpls(struct flow *flow, int n, ovs_be16 mpls_eth_type,
2987
               struct flow_wildcards *wc, bool clear_flow_L3)
2988
0
{
2989
0
    ovs_assert(eth_type_mpls(mpls_eth_type));
2990
0
    ovs_assert(n < FLOW_MAX_MPLS_LABELS);
2991
2992
0
    if (n) {
2993
0
        int i;
2994
2995
0
        if (wc) {
2996
0
            memset(&wc->masks.mpls_lse, 0xff, sizeof *wc->masks.mpls_lse * n);
2997
0
        }
2998
0
        for (i = n; i >= 1; i--) {
2999
0
            flow->mpls_lse[i] = flow->mpls_lse[i - 1];
3000
0
        }
3001
0
        flow->mpls_lse[0] = (flow->mpls_lse[1] & htonl(~MPLS_BOS_MASK));
3002
0
    } else {
3003
0
        int label = 0;          /* IPv4 Explicit Null. */
3004
0
        int tc = 0;
3005
0
        int ttl = 64;
3006
3007
0
        if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
3008
0
            label = 2;
3009
0
        }
3010
3011
0
        if (is_ip_any(flow)) {
3012
0
            tc = (flow->nw_tos & IP_DSCP_MASK) >> 2;
3013
0
            if (wc) {
3014
0
                wc->masks.nw_tos |= IP_DSCP_MASK;
3015
0
                wc->masks.nw_ttl = 0xff;
3016
0
            }
3017
3018
0
            if (flow->nw_ttl) {
3019
0
                ttl = flow->nw_ttl;
3020
0
            }
3021
0
        }
3022
3023
0
        flow->mpls_lse[0] = set_mpls_lse_values(ttl, tc, 1, htonl(label));
3024
3025
0
        if (clear_flow_L3) {
3026
            /* Clear all L3 and L4 fields and dp_hash. */
3027
0
            BUILD_ASSERT(FLOW_WC_SEQ == 42);
3028
0
            memset((char *) flow + FLOW_SEGMENT_2_ENDS_AT, 0,
3029
0
                   sizeof(struct flow) - FLOW_SEGMENT_2_ENDS_AT);
3030
0
            flow->dp_hash = 0;
3031
0
        }
3032
0
    }
3033
0
    flow->dl_type = mpls_eth_type;
3034
0
}
3035
3036
/* Tries to remove the outermost MPLS label from 'flow'.  Returns true if
3037
 * successful, false otherwise.  On success, sets 'flow''s Ethernet type to
3038
 * 'eth_type'.
3039
 *
3040
 * 'n' must be flow_count_mpls_labels(flow). */
3041
bool
3042
flow_pop_mpls(struct flow *flow, int n, ovs_be16 eth_type,
3043
              struct flow_wildcards *wc)
3044
0
{
3045
0
    int i;
3046
3047
0
    if (n == 0) {
3048
        /* Nothing to pop. */
3049
0
        return false;
3050
0
    } else if (n == FLOW_MAX_MPLS_LABELS) {
3051
0
        if (wc) {
3052
0
            wc->masks.mpls_lse[n - 1] |= htonl(MPLS_BOS_MASK);
3053
0
        }
3054
0
        if (!(flow->mpls_lse[n - 1] & htonl(MPLS_BOS_MASK))) {
3055
            /* Can't pop because don't know what to fill in mpls_lse[n - 1]. */
3056
0
            return false;
3057
0
        }
3058
0
    }
3059
3060
0
    if (wc) {
3061
0
        memset(&wc->masks.mpls_lse[1], 0xff,
3062
0
               sizeof *wc->masks.mpls_lse * (n - 1));
3063
0
    }
3064
0
    for (i = 1; i < n; i++) {
3065
0
        flow->mpls_lse[i - 1] = flow->mpls_lse[i];
3066
0
    }
3067
0
    flow->mpls_lse[n - 1] = 0;
3068
0
    flow->dl_type = eth_type;
3069
0
    return true;
3070
0
}
3071
3072
/* Sets the MPLS Label that 'flow' matches to 'label', which is interpreted
3073
 * as an OpenFlow 1.1 "mpls_label" value. */
3074
void
3075
flow_set_mpls_label(struct flow *flow, int idx, ovs_be32 label)
3076
1.24k
{
3077
1.24k
    set_mpls_lse_label(&flow->mpls_lse[idx], label);
3078
1.24k
}
3079
3080
/* Sets the MPLS TTL that 'flow' matches to 'ttl', which should be in the
3081
 * range 0...255. */
3082
void
3083
flow_set_mpls_ttl(struct flow *flow, int idx, uint8_t ttl)
3084
47
{
3085
47
    set_mpls_lse_ttl(&flow->mpls_lse[idx], ttl);
3086
47
}
3087
3088
/* Sets the MPLS TC that 'flow' matches to 'tc', which should be in the
3089
 * range 0...7. */
3090
void
3091
flow_set_mpls_tc(struct flow *flow, int idx, uint8_t tc)
3092
1.24k
{
3093
1.24k
    set_mpls_lse_tc(&flow->mpls_lse[idx], tc);
3094
1.24k
}
3095
3096
/* Sets the MPLS BOS bit that 'flow' matches to which should be 0 or 1. */
3097
void
3098
flow_set_mpls_bos(struct flow *flow, int idx, uint8_t bos)
3099
108
{
3100
108
    set_mpls_lse_bos(&flow->mpls_lse[idx], bos);
3101
108
}
3102
3103
/* Sets the entire MPLS LSE. */
3104
void
3105
flow_set_mpls_lse(struct flow *flow, int idx, ovs_be32 lse)
3106
0
{
3107
0
    flow->mpls_lse[idx] = lse;
3108
0
}
3109
3110
static void
3111
flow_compose_l7(struct dp_packet *p, const void *l7, size_t l7_len)
3112
0
{
3113
0
    if (l7_len) {
3114
0
        if (l7) {
3115
0
            dp_packet_put(p, l7, l7_len);
3116
0
        } else {
3117
0
            uint8_t *payload = dp_packet_put_uninit(p, l7_len);
3118
0
            for (size_t i = 0; i < l7_len; i++) {
3119
0
                payload[i] = i;
3120
0
            }
3121
0
        }
3122
0
    }
3123
0
}
3124
3125
static size_t
3126
flow_compose_l4(struct dp_packet *p, const struct flow *flow,
3127
                const void *l7, size_t l7_len)
3128
0
{
3129
0
    size_t orig_len = dp_packet_size(p);
3130
3131
0
    if (!(flow->nw_frag & FLOW_NW_FRAG_ANY)
3132
0
        || !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
3133
0
        if (flow->nw_proto == IPPROTO_TCP) {
3134
0
            struct tcp_header *tcp = dp_packet_put_zeros(p, sizeof *tcp);
3135
0
            tcp->tcp_src = flow->tp_src;
3136
0
            tcp->tcp_dst = flow->tp_dst;
3137
0
            tcp->tcp_ctl = TCP_CTL(ntohs(flow->tcp_flags), 5);
3138
0
            if (!(flow->tcp_flags & htons(TCP_SYN | TCP_FIN | TCP_RST))) {
3139
0
                flow_compose_l7(p, l7, l7_len);
3140
0
            }
3141
0
        } else if (flow->nw_proto == IPPROTO_UDP) {
3142
0
            struct udp_header *udp = dp_packet_put_zeros(p, sizeof *udp);
3143
0
            udp->udp_src = flow->tp_src;
3144
0
            udp->udp_dst = flow->tp_dst;
3145
0
            udp->udp_len = htons(sizeof *udp + l7_len);
3146
0
            flow_compose_l7(p, l7, l7_len);
3147
0
        } else if (flow->nw_proto == IPPROTO_SCTP) {
3148
0
            struct sctp_header *sctp = dp_packet_put_zeros(p, sizeof *sctp);
3149
0
            sctp->sctp_src = flow->tp_src;
3150
0
            sctp->sctp_dst = flow->tp_dst;
3151
            /* XXX Someone should figure out what L7 data to include. */
3152
0
        } else if (flow->nw_proto == IPPROTO_ICMP) {
3153
0
            struct icmp_header *icmp = dp_packet_put_zeros(p, sizeof *icmp);
3154
0
            icmp->icmp_type = ntohs(flow->tp_src);
3155
0
            icmp->icmp_code = ntohs(flow->tp_dst);
3156
0
            if ((icmp->icmp_type == ICMP4_ECHO_REQUEST ||
3157
0
                 icmp->icmp_type == ICMP4_ECHO_REPLY)
3158
0
                && icmp->icmp_code == 0) {
3159
0
                flow_compose_l7(p, l7, l7_len);
3160
0
            } else {
3161
                /* XXX Add inner IP packet for e.g. destination unreachable? */
3162
0
            }
3163
0
        } else if (flow->nw_proto == IPPROTO_IGMP) {
3164
0
            struct igmp_header *igmp = dp_packet_put_zeros(p, sizeof *igmp);
3165
0
            igmp->igmp_type = ntohs(flow->tp_src);
3166
0
            igmp->igmp_code = ntohs(flow->tp_dst);
3167
0
            put_16aligned_be32(&igmp->group, flow->igmp_group_ip4);
3168
0
        } else if (flow->nw_proto == IPPROTO_ICMPV6) {
3169
0
            struct icmp6_data_header *icmp6;
3170
3171
0
            icmp6 = dp_packet_put_zeros(p, sizeof *icmp6);
3172
0
            icmp6->icmp6_base.icmp6_type = ntohs(flow->tp_src);
3173
0
            icmp6->icmp6_base.icmp6_code = ntohs(flow->tp_dst);
3174
0
            put_16aligned_be32(icmp6->icmp6_data.be32, flow->igmp_group_ip4);
3175
3176
0
            if (icmp6->icmp6_base.icmp6_code == 0 &&
3177
0
                (icmp6->icmp6_base.icmp6_type == ND_NEIGHBOR_SOLICIT ||
3178
0
                 icmp6->icmp6_base.icmp6_type == ND_NEIGHBOR_ADVERT)) {
3179
0
                struct in6_addr *nd_target;
3180
0
                struct ovs_nd_lla_opt *lla_opt;
3181
3182
0
                nd_target = dp_packet_put_zeros(p, sizeof *nd_target);
3183
0
                *nd_target = flow->nd_target;
3184
3185
0
                if (!eth_addr_is_zero(flow->arp_sha)) {
3186
0
                    lla_opt = dp_packet_put_zeros(p, 8);
3187
0
                    lla_opt->len = 1;
3188
0
                    lla_opt->type = ND_OPT_SOURCE_LINKADDR;
3189
0
                    lla_opt->mac = flow->arp_sha;
3190
0
                }
3191
0
                if (!eth_addr_is_zero(flow->arp_tha)) {
3192
0
                    lla_opt = dp_packet_put_zeros(p, 8);
3193
0
                    lla_opt->len = 1;
3194
0
                    lla_opt->type = ND_OPT_TARGET_LINKADDR;
3195
0
                    lla_opt->mac = flow->arp_tha;
3196
0
                }
3197
0
            } else if (icmp6->icmp6_base.icmp6_code == 0 &&
3198
0
                       (icmp6->icmp6_base.icmp6_type == ICMP6_ECHO_REQUEST ||
3199
0
                        icmp6->icmp6_base.icmp6_type == ICMP6_ECHO_REPLY)) {
3200
0
                flow_compose_l7(p, l7, l7_len);
3201
0
            } else {
3202
                /* XXX Add inner IP packet for e.g. destination unreachable? */
3203
0
            }
3204
0
        }
3205
0
    }
3206
3207
0
    return dp_packet_size(p) - orig_len;
3208
0
}
3209
3210
static void
3211
flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
3212
                     uint32_t pseudo_hdr_csum)
3213
0
{
3214
0
    size_t l4_len = (char *) dp_packet_tail(p) - (char *) dp_packet_l4(p);
3215
3216
0
    if (!(flow->nw_frag & FLOW_NW_FRAG_ANY)
3217
0
        || !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
3218
0
        if (flow->nw_proto == IPPROTO_TCP) {
3219
0
            struct tcp_header *tcp = dp_packet_l4(p);
3220
3221
0
            tcp->tcp_csum = 0;
3222
0
            tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
3223
0
                                                      tcp, l4_len));
3224
0
            dp_packet_l4_checksum_set_good(p);
3225
0
        } else if (flow->nw_proto == IPPROTO_UDP) {
3226
0
            struct udp_header *udp = dp_packet_l4(p);
3227
3228
0
            udp->udp_csum = 0;
3229
0
            udp->udp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
3230
0
                                                      udp, l4_len));
3231
0
            if (!udp->udp_csum) {
3232
0
                udp->udp_csum = htons(0xffff);
3233
0
            }
3234
0
            dp_packet_l4_checksum_set_good(p);
3235
0
        } else if (flow->nw_proto == IPPROTO_ICMP) {
3236
0
            struct icmp_header *icmp = dp_packet_l4(p);
3237
3238
0
            icmp->icmp_csum = 0;
3239
0
            icmp->icmp_csum = csum(icmp, l4_len);
3240
0
        } else if (flow->nw_proto == IPPROTO_IGMP) {
3241
0
            struct igmp_header *igmp = dp_packet_l4(p);
3242
3243
0
            igmp->igmp_csum = 0;
3244
0
            igmp->igmp_csum = csum(igmp, l4_len);
3245
0
        } else if (flow->nw_proto == IPPROTO_ICMPV6) {
3246
0
            struct icmp6_data_header *icmp6 = dp_packet_l4(p);
3247
3248
0
            icmp6->icmp6_base.icmp6_cksum = 0;
3249
0
            icmp6->icmp6_base.icmp6_cksum =
3250
0
                csum_finish(csum_continue(pseudo_hdr_csum, icmp6, l4_len));
3251
0
        }
3252
0
    }
3253
0
}
3254
3255
/* Increase the size of packet composed by 'flow_compose_minimal'
3256
 * up to 'size' bytes.  Fixes all the required packet headers like
3257
 * ip/udp lengths and l3/l4 checksums.
3258
 *
3259
 * 'size' needs to be larger then the current packet size.  */
3260
void
3261
packet_expand(struct dp_packet *p, const struct flow *flow, size_t size)
3262
0
{
3263
0
    size_t extra_size;
3264
3265
0
    ovs_assert(size > dp_packet_size(p));
3266
3267
0
    extra_size = size - dp_packet_size(p);
3268
0
    dp_packet_put_zeros(p, extra_size);
3269
3270
0
    if (flow->dl_type == htons(FLOW_DL_TYPE_NONE)) {
3271
0
        struct eth_header *eth = dp_packet_eth(p);
3272
3273
0
        eth->eth_type = htons(dp_packet_size(p));
3274
0
    } else if (dl_type_is_ip_any(flow->dl_type)) {
3275
0
        uint32_t pseudo_hdr_csum;
3276
0
        size_t l4_len = (char *) dp_packet_tail(p) - (char *) dp_packet_l4(p);
3277
3278
0
        if (flow->dl_type == htons(ETH_TYPE_IP)) {
3279
0
            struct ip_header *ip = dp_packet_l3(p);
3280
3281
0
            ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
3282
0
            dp_packet_ip_set_header_csum(p, false);
3283
0
            pseudo_hdr_csum = packet_csum_pseudoheader(ip);
3284
0
        } else { /* ETH_TYPE_IPV6 */
3285
0
            struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(p);
3286
3287
0
            nh->ip6_plen = htons(l4_len);
3288
0
            pseudo_hdr_csum = packet_csum_pseudoheader6(nh);
3289
0
        }
3290
3291
0
        if ((!(flow->nw_frag & FLOW_NW_FRAG_ANY)
3292
0
             || !(flow->nw_frag & FLOW_NW_FRAG_LATER))
3293
0
            && flow->nw_proto == IPPROTO_UDP) {
3294
0
            struct udp_header *udp = dp_packet_l4(p);
3295
3296
0
            udp->udp_len = htons(l4_len + extra_size);
3297
0
        }
3298
0
        flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
3299
0
    }
3300
0
}
3301
3302
/* Puts into 'p' a packet that flow_extract() would parse as having the given
3303
 * 'flow'.
3304
 *
3305
 * (This is useful only for testing, obviously, and the packet isn't really
3306
 * valid.  Lots of fields are just zeroed.)
3307
 *
3308
 * If 'bad_csum' is true, the final IP checksum is invalid.
3309
 *
3310
 * For packets whose protocols can encapsulate arbitrary L7 payloads, 'l7' and
3311
 * 'l7_len' determine that payload:
3312
 *
3313
 *    - If 'l7_len' is zero, no payload is included.
3314
 *
3315
 *    - If 'l7_len' is nonzero and 'l7' is null, an arbitrary payload 'l7_len'
3316
 *      bytes long is included.
3317
 *
3318
 *    - If 'l7_len' is nonzero and 'l7' is nonnull, the payload is copied
3319
 *      from 'l7'. */
3320
void
3321
flow_compose(struct dp_packet *p, const struct flow *flow,
3322
             const void *l7, size_t l7_len, bool bad_csum)
3323
0
{
3324
    /* Add code to this function (or its callees) for emitting new fields or
3325
     * protocols.  (This isn't essential, so it can be skipped for initial
3326
     * testing.) */
3327
0
    BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
3328
3329
0
    uint32_t pseudo_hdr_csum;
3330
0
    size_t l4_len;
3331
3332
    /* eth_compose() sets l3 pointer and makes sure it is 32-bit aligned. */
3333
0
    eth_compose(p, flow->dl_dst, flow->dl_src, ntohs(flow->dl_type), 0);
3334
0
    if (flow->dl_type == htons(FLOW_DL_TYPE_NONE)) {
3335
0
        struct eth_header *eth = dp_packet_eth(p);
3336
0
        eth->eth_type = htons(dp_packet_size(p));
3337
0
        return;
3338
0
    }
3339
3340
0
    for (int encaps = FLOW_MAX_VLAN_HEADERS - 1; encaps >= 0; encaps--) {
3341
0
        if (flow->vlans[encaps].tci & htons(VLAN_CFI)) {
3342
0
            eth_push_vlan(p, flow->vlans[encaps].tpid,
3343
0
                          flow->vlans[encaps].tci);
3344
0
        }
3345
0
    }
3346
3347
0
    if (flow->dl_type == htons(ETH_TYPE_IP)) {
3348
0
        struct ip_header *ip;
3349
3350
0
        ip = dp_packet_put_zeros(p, sizeof *ip);
3351
0
        ip->ip_ihl_ver = IP_IHL_VER(5, 4);
3352
0
        ip->ip_tos = flow->nw_tos;
3353
0
        ip->ip_ttl = flow->nw_ttl;
3354
0
        ip->ip_proto = flow->nw_proto;
3355
0
        put_16aligned_be32(&ip->ip_src, flow->nw_src);
3356
0
        put_16aligned_be32(&ip->ip_dst, flow->nw_dst);
3357
3358
0
        if (flow->nw_frag & FLOW_NW_FRAG_ANY) {
3359
0
            ip->ip_frag_off |= htons(IP_MORE_FRAGMENTS);
3360
0
            if (flow->nw_frag & FLOW_NW_FRAG_LATER) {
3361
0
                ip->ip_frag_off |= htons(100);
3362
0
            }
3363
0
        }
3364
3365
0
        dp_packet_set_l4(p, dp_packet_tail(p));
3366
3367
0
        l4_len = flow_compose_l4(p, flow, l7, l7_len);
3368
3369
0
        ip = dp_packet_l3(p);
3370
0
        ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
3371
        /* Checksum has already been zeroed by put_zeros call. */
3372
0
        ip->ip_csum = csum(ip, sizeof *ip);
3373
3374
0
        if (bad_csum) {
3375
            /*
3376
             * Internet checksum is a sum complement to zero, so any other
3377
             * value will result in an invalid checksum. Here, we flip one
3378
             * bit.
3379
             */
3380
0
            ip->ip_csum ^= (OVS_FORCE ovs_be16) 0x1;
3381
0
            dp_packet_ip_checksum_set_bad(p);
3382
0
        } else {
3383
0
            dp_packet_ip_checksum_set_good(p);
3384
0
        }
3385
3386
0
        pseudo_hdr_csum = packet_csum_pseudoheader(ip);
3387
0
        flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
3388
0
    } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
3389
0
        struct ovs_16aligned_ip6_hdr *nh;
3390
3391
0
        nh = dp_packet_put_zeros(p, sizeof *nh);
3392
0
        put_16aligned_be32(&nh->ip6_flow, htonl(6 << 28) |
3393
0
                           htonl(flow->nw_tos << 20) | flow->ipv6_label);
3394
0
        nh->ip6_hlim = flow->nw_ttl;
3395
0
        nh->ip6_nxt = flow->nw_proto;
3396
3397
0
        memcpy(&nh->ip6_src, &flow->ipv6_src, sizeof(nh->ip6_src));
3398
0
        memcpy(&nh->ip6_dst, &flow->ipv6_dst, sizeof(nh->ip6_dst));
3399
3400
0
        dp_packet_set_l4(p, dp_packet_tail(p));
3401
3402
0
        l4_len = flow_compose_l4(p, flow, l7, l7_len);
3403
3404
0
        nh = dp_packet_l3(p);
3405
0
        nh->ip6_plen = htons(l4_len);
3406
3407
0
        pseudo_hdr_csum = packet_csum_pseudoheader6(nh);
3408
0
        flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
3409
0
    } else if (flow->dl_type == htons(ETH_TYPE_ARP) ||
3410
0
               flow->dl_type == htons(ETH_TYPE_RARP)) {
3411
0
        struct arp_eth_header *arp;
3412
3413
0
        arp = dp_packet_put_zeros(p, sizeof *arp);
3414
0
        dp_packet_set_l3(p, arp);
3415
0
        arp->ar_hrd = htons(1);
3416
0
        arp->ar_pro = htons(ETH_TYPE_IP);
3417
0
        arp->ar_hln = ETH_ADDR_LEN;
3418
0
        arp->ar_pln = 4;
3419
0
        arp->ar_op = htons(flow->nw_proto);
3420
3421
0
        if (flow->nw_proto == ARP_OP_REQUEST ||
3422
0
            flow->nw_proto == ARP_OP_REPLY) {
3423
0
            put_16aligned_be32(&arp->ar_spa, flow->nw_src);
3424
0
            put_16aligned_be32(&arp->ar_tpa, flow->nw_dst);
3425
0
            arp->ar_sha = flow->arp_sha;
3426
0
            arp->ar_tha = flow->arp_tha;
3427
0
        }
3428
0
    } else if (flow->dl_type == htons(ETH_TYPE_NSH)) {
3429
0
        struct nsh_hdr *nsh;
3430
3431
0
        nsh = dp_packet_put_zeros(p, sizeof *nsh);
3432
0
        dp_packet_set_l3(p, nsh);
3433
3434
0
        nsh_set_flags_ttl_len(nsh, flow->nsh.flags, flow->nsh.ttl,
3435
0
                              flow->nsh.mdtype == NSH_M_TYPE1
3436
0
                              ? NSH_M_TYPE1_LEN : NSH_BASE_HDR_LEN);
3437
0
        nsh->next_proto = flow->nsh.np;
3438
0
        nsh->md_type = flow->nsh.mdtype;
3439
0
        put_16aligned_be32(&nsh->path_hdr, flow->nsh.path_hdr);
3440
3441
0
        if (flow->nsh.mdtype == NSH_M_TYPE1) {
3442
0
            for (size_t i = 0; i < 4; i++) {
3443
0
                put_16aligned_be32(&nsh->md1.context[i], flow->nsh.context[i]);
3444
0
            }
3445
0
        }
3446
0
    }
3447
3448
0
    if (eth_type_mpls(flow->dl_type)) {
3449
0
        int n;
3450
3451
0
        p->l2_5_ofs = p->l3_ofs;
3452
0
        for (n = 1; n < FLOW_MAX_MPLS_LABELS; n++) {
3453
0
            if (flow->mpls_lse[n - 1] & htonl(MPLS_BOS_MASK)) {
3454
0
                break;
3455
0
            }
3456
0
        }
3457
0
        while (n > 0) {
3458
0
            push_mpls(p, flow->dl_type, flow->mpls_lse[--n]);
3459
0
        }
3460
0
    }
3461
0
}
3462

3463
/* Compressed flow. */
3464
3465
/* Completes an initialization of 'dst' as a miniflow copy of 'src' begun by
3466
 * the caller.  The caller must have already computed 'dst->map' properly to
3467
 * indicate the significant uint64_t elements of 'src'.
3468
 *
3469
 * Normally the significant elements are the ones that are non-zero.  However,
3470
 * when a miniflow is initialized from a (mini)mask, the values can be zeroes,
3471
 * so that the flow and mask always have the same maps. */
3472
void
3473
miniflow_init(struct miniflow *dst, const struct flow *src)
3474
92.9k
{
3475
92.9k
    uint64_t *dst_u64 = miniflow_values(dst);
3476
92.9k
    size_t idx;
3477
3478
347k
    FLOWMAP_FOR_EACH_INDEX(idx, dst->map) {
3479
347k
        *dst_u64++ = flow_u64_value(src, idx);
3480
347k
    }
3481
92.9k
}
3482
3483
/* Initialize the maps of 'flow' from 'src'. */
3484
void
3485
miniflow_map_init(struct miniflow *flow, const struct flow *src)
3486
46.4k
{
3487
    /* Initialize map, counting the number of nonzero elements. */
3488
46.4k
    flowmap_init(&flow->map);
3489
3.95M
    for (size_t i = 0; i < FLOW_U64S; i++) {
3490
3.90M
        if (flow_u64_value(src, i)) {
3491
173k
            flowmap_set(&flow->map, i, 1);
3492
173k
        }
3493
3.90M
    }
3494
46.4k
}
3495
3496
/* Allocates 'n' count of miniflows, consecutive in memory, initializing the
3497
 * map of each from 'src'.
3498
 * Returns the size of the miniflow data. */
3499
size_t
3500
miniflow_alloc(struct miniflow *dsts[], size_t n, const struct miniflow *src)
3501
46.4k
{
3502
46.4k
    size_t n_values = miniflow_n_values(src);
3503
46.4k
    size_t data_size = MINIFLOW_VALUES_SIZE(n_values);
3504
46.4k
    struct miniflow *dst = xmalloc(n * (sizeof *src + data_size));
3505
46.4k
    size_t i;
3506
3507
46.4k
    COVERAGE_INC(miniflow_malloc);
3508
3509
139k
    for (i = 0; i < n; i++) {
3510
92.9k
        *dst = *src;   /* Copy maps. */
3511
92.9k
        dsts[i] = dst;
3512
92.9k
        dst += 1;      /* Just past the maps. */
3513
92.9k
        dst = (struct miniflow *)((uint64_t *)dst + n_values); /* Skip data. */
3514
92.9k
    }
3515
46.4k
    return data_size;
3516
46.4k
}
3517
3518
/* Returns a miniflow copy of 'src'.  The caller must eventually free() the
3519
 * returned miniflow. */
3520
struct miniflow *
3521
miniflow_create(const struct flow *src)
3522
0
{
3523
0
    struct miniflow tmp;
3524
0
    struct miniflow *dst;
3525
3526
0
    miniflow_map_init(&tmp, src);
3527
3528
0
    miniflow_alloc(&dst, 1, &tmp);
3529
0
    miniflow_init(dst, src);
3530
0
    return dst;
3531
0
}
3532
3533
/* Initializes 'dst' as a copy of 'src'.  The caller must have allocated
3534
 * 'dst' to have inline space for 'n_values' data in 'src'. */
3535
void
3536
miniflow_clone(struct miniflow *dst, const struct miniflow *src,
3537
               size_t n_values)
3538
0
{
3539
0
    *dst = *src;   /* Copy maps. */
3540
0
    memcpy(miniflow_values(dst), miniflow_get_values(src),
3541
0
           MINIFLOW_VALUES_SIZE(n_values));
3542
0
}
3543
3544
/* Initializes 'dst' as a copy of 'src'. */
3545
void
3546
miniflow_expand(const struct miniflow *src, struct flow *dst)
3547
159k
{
3548
159k
    memset(dst, 0, sizeof *dst);
3549
159k
    flow_union_with_miniflow(dst, src);
3550
159k
}
3551
3552
/* Returns true if 'a' and 'b' are equal miniflows, false otherwise. */
3553
bool
3554
miniflow_equal(const struct miniflow *a, const struct miniflow *b)
3555
0
{
3556
0
    const uint64_t *ap = miniflow_get_values(a);
3557
0
    const uint64_t *bp = miniflow_get_values(b);
3558
3559
    /* This is mostly called after a matching hash, so it is highly likely that
3560
     * the maps are equal as well. */
3561
0
    if (OVS_LIKELY(flowmap_equal(a->map, b->map))) {
3562
0
        return !memcmp(ap, bp, miniflow_n_values(a) * sizeof *ap);
3563
0
    } else {
3564
0
        size_t idx;
3565
3566
0
        FLOWMAP_FOR_EACH_INDEX (idx, flowmap_or(a->map, b->map)) {
3567
0
            if ((flowmap_is_set(&a->map, idx) ? *ap++ : 0)
3568
0
                != (flowmap_is_set(&b->map, idx) ? *bp++ : 0)) {
3569
0
                return false;
3570
0
            }
3571
0
        }
3572
0
    }
3573
3574
0
    return true;
3575
0
}
3576
3577
/* Returns false if 'a' and 'b' differ at the places where there are 1-bits
3578
 * in 'mask', true otherwise. */
3579
bool
3580
miniflow_equal_in_minimask(const struct miniflow *a, const struct miniflow *b,
3581
                           const struct minimask *mask)
3582
0
{
3583
0
    const uint64_t *p = miniflow_get_values(&mask->masks);
3584
0
    size_t idx;
3585
3586
0
    FLOWMAP_FOR_EACH_INDEX(idx, mask->masks.map) {
3587
0
        if ((miniflow_get(a, idx) ^ miniflow_get(b, idx)) & *p++) {
3588
0
            return false;
3589
0
        }
3590
0
    }
3591
3592
0
    return true;
3593
0
}
3594
3595
/* Returns true if 'a' and 'b' are equal at the places where there are 1-bits
3596
 * in 'mask', false if they differ. */
3597
bool
3598
miniflow_equal_flow_in_minimask(const struct miniflow *a, const struct flow *b,
3599
                                const struct minimask *mask)
3600
0
{
3601
0
    const uint64_t *p = miniflow_get_values(&mask->masks);
3602
0
    size_t idx;
3603
3604
0
    FLOWMAP_FOR_EACH_INDEX(idx, mask->masks.map) {
3605
0
        if ((miniflow_get(a, idx) ^ flow_u64_value(b, idx)) & *p++) {
3606
0
            return false;
3607
0
        }
3608
0
    }
3609
3610
0
    return true;
3611
0
}
3612
3613

3614
void
3615
minimask_init(struct minimask *mask, const struct flow_wildcards *wc)
3616
46.4k
{
3617
46.4k
    miniflow_init(&mask->masks, &wc->masks);
3618
46.4k
}
3619
3620
/* Returns a minimask copy of 'wc'.  The caller must eventually free the
3621
 * returned minimask with free(). */
3622
struct minimask *
3623
minimask_create(const struct flow_wildcards *wc)
3624
0
{
3625
0
    return (struct minimask *)miniflow_create(&wc->masks);
3626
0
}
3627
3628
/* Initializes 'dst_' as the bit-wise "and" of 'a_' and 'b_'.
3629
 *
3630
 * The caller must provide room for FLOW_U64S "uint64_t"s in 'storage', which
3631
 * must follow '*dst_' in memory, for use by 'dst_'.  The caller must *not*
3632
 * free 'dst_' free(). */
3633
void
3634
minimask_combine(struct minimask *dst_,
3635
                 const struct minimask *a_, const struct minimask *b_,
3636
                 uint64_t storage[FLOW_U64S])
3637
0
{
3638
0
    struct miniflow *dst = &dst_->masks;
3639
0
    uint64_t *dst_values = storage;
3640
0
    const struct miniflow *a = &a_->masks;
3641
0
    const struct miniflow *b = &b_->masks;
3642
0
    size_t idx;
3643
3644
0
    flowmap_init(&dst->map);
3645
3646
0
    FLOWMAP_FOR_EACH_INDEX(idx, flowmap_and(a->map, b->map)) {
3647
        /* Both 'a' and 'b' have non-zero data at 'idx'. */
3648
0
        uint64_t mask = *miniflow_get__(a, idx) & *miniflow_get__(b, idx);
3649
3650
0
        if (mask) {
3651
0
            flowmap_set(&dst->map, idx, 1);
3652
0
            *dst_values++ = mask;
3653
0
        }
3654
0
    }
3655
0
}
3656
3657
/* Initializes 'wc' as a copy of 'mask'. */
3658
void
3659
minimask_expand(const struct minimask *mask, struct flow_wildcards *wc)
3660
46.4k
{
3661
46.4k
    miniflow_expand(&mask->masks, &wc->masks);
3662
46.4k
}
3663
3664
/* Returns true if 'a' and 'b' are the same flow mask, false otherwise.
3665
 * Minimasks may not have zero data values, so for the minimasks to be the
3666
 * same, they need to have the same map and the same data values. */
3667
bool
3668
minimask_equal(const struct minimask *a, const struct minimask *b)
3669
0
{
3670
    /* At first glance, it might seem that this can be reasonably optimized
3671
     * into a single memcmp() for the total size of the region.  Such an
3672
     * optimization will work OK with most implementations of memcmp() that
3673
     * proceed from the start of the regions to be compared to the end in
3674
     * reasonably sized chunks.  However, memcmp() is not required to be
3675
     * implemented that way, and an implementation that, for example, compares
3676
     * all of the bytes in both regions without early exit when it finds a
3677
     * difference, or one that compares, say, 64 bytes at a time, could access
3678
     * an unmapped region of memory if minimasks 'a' and 'b' have different
3679
     * lengths.  By first checking that the maps are the same with the first
3680
     * memcmp(), we verify that 'a' and 'b' have the same length and therefore
3681
     * ensure that the second memcmp() is safe. */
3682
0
    return (!memcmp(a, b, sizeof *a)
3683
0
            && !memcmp(a + 1, b + 1,
3684
0
                       MINIFLOW_VALUES_SIZE(miniflow_n_values(&a->masks))));
3685
0
}
3686
3687
/* Returns true if at least one bit matched by 'b' is wildcarded by 'a',
3688
 * false otherwise. */
3689
bool
3690
minimask_has_extra(const struct minimask *a, const struct minimask *b)
3691
0
{
3692
0
    const uint64_t *bp = miniflow_get_values(&b->masks);
3693
0
    size_t idx;
3694
3695
0
    FLOWMAP_FOR_EACH_INDEX(idx, b->masks.map) {
3696
0
        uint64_t b_u64 = *bp++;
3697
3698
        /* 'b_u64' is non-zero, check if the data in 'a' is either zero
3699
         * or misses some of the bits in 'b_u64'. */
3700
0
        if (!MINIFLOW_IN_MAP(&a->masks, idx)
3701
0
            || ((*miniflow_get__(&a->masks, idx) & b_u64) != b_u64)) {
3702
0
            return true; /* 'a' wildcards some bits 'b' doesn't. */
3703
0
        }
3704
0
    }
3705
3706
0
    return false;
3707
0
}
3708
3709
void
3710
flow_limit_vlans(int vlan_limit)
3711
0
{
3712
0
    if (vlan_limit <= 0) {
3713
0
        flow_vlan_limit = FLOW_MAX_VLAN_HEADERS;
3714
0
    } else {
3715
0
        flow_vlan_limit = MIN(vlan_limit, FLOW_MAX_VLAN_HEADERS);
3716
0
    }
3717
0
}
3718
3719
struct netdev *
3720
flow_get_tunnel_netdev(struct flow_tnl *tunnel)
3721
0
{
3722
0
    char iface[IFNAMSIZ];
3723
0
    struct in6_addr ip6;
3724
0
    struct in6_addr gw;
3725
3726
0
    if (tunnel->ip_src) {
3727
0
        in6_addr_set_mapped_ipv4(&ip6, tunnel->ip_src);
3728
0
    } else if (ipv6_addr_is_set(&tunnel->ipv6_src)) {
3729
0
        ip6 = tunnel->ipv6_src;
3730
0
    } else {
3731
0
        return NULL;
3732
0
    }
3733
3734
0
    if (!ovs_router_lookup(0, &ip6, iface, NULL, &gw)) {
3735
0
        return NULL;
3736
0
    }
3737
3738
0
    return netdev_from_name(iface);
3739
0
}