Coverage Report

Created: 2025-11-24 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openvswitch/lib/ipf.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2019 Nicira, Inc.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at:
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
#include <config.h>
18
#include <ctype.h>
19
#include <errno.h>
20
#include <sys/types.h>
21
#include <netinet/in.h>
22
#include <netinet/ip6.h>
23
#include <netinet/icmp6.h>
24
#include <string.h>
25
26
#include "coverage.h"
27
#include "csum.h"
28
#include "ipf.h"
29
#include "latch.h"
30
#include "openvswitch/hmap.h"
31
#include "openvswitch/poll-loop.h"
32
#include "openvswitch/types.h"
33
#include "openvswitch/vlog.h"
34
#include "ovs-atomic.h"
35
#include "packets.h"
36
#include "util.h"
37
38
VLOG_DEFINE_THIS_MODULE(ipf);
39
COVERAGE_DEFINE(ipf_stuck_frag_list_expired);
40
COVERAGE_DEFINE(ipf_stuck_frag_list_purged);
41
COVERAGE_DEFINE(ipf_l3csum_checked);
42
COVERAGE_DEFINE(ipf_l3csum_err);
43
44
enum {
45
    IPV4_PACKET_MAX_HDR_SIZE = 60,
46
    IPV4_PACKET_MAX_SIZE = 65535,
47
    IPV6_PACKET_MAX_DATA = 65535,
48
};
49
50
enum ipf_list_state {
51
    IPF_LIST_STATE_UNUSED,
52
    IPF_LIST_STATE_REASS_FAIL,
53
    IPF_LIST_STATE_OTHER_SEEN,
54
    IPF_LIST_STATE_FIRST_SEEN,
55
    IPF_LIST_STATE_LAST_SEEN,
56
    IPF_LIST_STATE_FIRST_LAST_SEEN,
57
    IPF_LIST_STATE_COMPLETED,
58
    IPF_LIST_STATE_NUM,
59
};
60
61
static char *ipf_state_name[IPF_LIST_STATE_NUM] =
62
    {"unused", "reassemble fail", "other frag", "first frag", "last frag",
63
     "first/last frag", "complete"};
64
65
enum ipf_list_type {
66
    IPF_FRAG_COMPLETED_LIST,
67
    IPF_FRAG_EXPIRY_LIST,
68
};
69
70
enum {
71
    IPF_INVALID_IDX = -1,
72
    IPF_V4_FRAG_SIZE_LBOUND = 400,
73
    IPF_V4_FRAG_SIZE_MIN_DEF = 1200,
74
    IPF_V6_FRAG_SIZE_LBOUND = 400, /* Useful for testing. */
75
    IPF_V6_FRAG_SIZE_MIN_DEF = 1280,
76
    IPF_MAX_FRAGS_DEFAULT = 1000,
77
    IPF_NFRAG_UBOUND = 5000,
78
};
79
80
enum ipf_counter_type {
81
    IPF_NFRAGS_ACCEPTED,
82
    IPF_NFRAGS_COMPL_SENT,
83
    IPF_NFRAGS_EXPIRED,
84
    IPF_NFRAGS_TOO_SMALL,
85
    IPF_NFRAGS_OVERLAP,
86
    IPF_NFRAGS_PURGED,
87
    IPF_NFRAGS_NUM_CNTS,
88
};
89
90
union ipf_addr {
91
    ovs_be32 ipv4;
92
    struct in6_addr ipv6;
93
};
94
95
/* Represents a single fragment; part of a list of fragments. */
96
struct ipf_frag {
97
    struct dp_packet *pkt;
98
    uint16_t start_data_byte;
99
    uint16_t end_data_byte;
100
};
101
102
/* The key for a collection of fragments potentially making up an unfragmented
103
 * packet. */
104
struct ipf_list_key {
105
    /* ipf_list_key_hash() requires 'src_addr' and 'dst_addr' to be the first
106
     * two members. */
107
    union ipf_addr src_addr;
108
    union ipf_addr dst_addr;
109
    uint32_t recirc_id;
110
    ovs_be32 ip_id;   /* V6 is 32 bits. */
111
    ovs_be16 dl_type;
112
    uint16_t zone;
113
    uint8_t nw_proto;
114
};
115
116
/* A collection of fragments potentially making up an unfragmented packet. */
117
struct ipf_list {
118
    struct hmap_node node;         /* In struct ipf's 'frag_lists'. */
119
    struct ovs_list list_node;     /* In struct ipf's 'frag_exp_list' or
120
                                    * 'frag_complete_list'. */
121
    struct ipf_frag *frag_list;    /* List of fragments for this list. */
122
    struct ipf_list_key key;       /* The key for the fragemnt list. */
123
    struct dp_packet *reass_execute_ctx; /* Reassembled packet. */
124
    long long expiration;          /* In milliseconds. */
125
    int last_sent_idx;             /* Last sent fragment idx. */
126
    int last_inuse_idx;            /* Last inuse fragment idx. */
127
    int size;                      /* Fragment list size. */
128
    uint8_t state;                 /* Frag list state; see ipf_list_state. */
129
};
130
131
/* Represents a reassambled packet which typically is passed through
132
 * conntrack. */
133
struct reassembled_pkt {
134
    struct ovs_list rp_list_node;  /* In struct ipf's
135
                                    * 'reassembled_pkt_list'. */
136
    struct dp_packet *pkt;
137
    struct ipf_list *list;
138
};
139
140
struct ipf {
141
    /* The clean thread is used to clean up fragments in the 'ipf'
142
     * module if packet batches are not longer be sent through its user. */
143
    pthread_t ipf_clean_thread;
144
    struct latch ipf_clean_thread_exit;
145
146
    int max_v4_frag_list_size;
147
148
    struct ovs_mutex ipf_lock; /* Protects all of the following. */
149
    /* These contain 'struct ipf_list's. */
150
    struct hmap frag_lists OVS_GUARDED;
151
    struct ovs_list frag_exp_list OVS_GUARDED;
152
    struct ovs_list frag_complete_list OVS_GUARDED;
153
    /* Contains 'struct reassembled_pkt's. */
154
    struct ovs_list reassembled_pkt_list OVS_GUARDED;
155
156
    /* Used to allow disabling fragmentation reassembly. */
157
    atomic_bool ifp_v4_enabled;
158
    atomic_bool ifp_v6_enabled;
159
160
    /* Will be clamped above 400 bytes; the value chosen should handle
161
     * alg control packets of interest that use string encoding of mutable
162
     * IP fields; meaning, the control packets should not be fragmented. */
163
    atomic_uint min_v4_frag_size;
164
    atomic_uint min_v6_frag_size;
165
166
    /* Configurable maximum allowable fragments in process. */
167
    atomic_uint nfrag_max;
168
169
    /* Number of fragments in process. */
170
    atomic_count nfrag;
171
172
    atomic_uint64_t n4frag_cnt[IPF_NFRAGS_NUM_CNTS];
173
    atomic_uint64_t n6frag_cnt[IPF_NFRAGS_NUM_CNTS];
174
};
175
176
static void
177
ipf_print_reass_packet(const char *es, const void *pkt)
178
0
{
179
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10);
180
0
    if (!VLOG_DROP_WARN(&rl)) {
181
0
        struct ds ds = DS_EMPTY_INITIALIZER;
182
0
        ds_put_hex_dump(&ds, pkt, 128, 0, false);
183
0
        VLOG_WARN("%s\n%s", es, ds_cstr(&ds));
184
0
        ds_destroy(&ds);
185
0
    }
186
0
}
187
188
static void
189
ipf_count(struct ipf *ipf, bool v6, enum ipf_counter_type cntr)
190
0
{
191
0
    atomic_count_inc64(v6 ? &ipf->n6frag_cnt[cntr] : &ipf->n4frag_cnt[cntr]);
192
0
}
193
194
static bool
195
ipf_get_v4_enabled(struct ipf *ipf)
196
0
{
197
0
    bool ifp_v4_enabled_;
198
0
    atomic_read_relaxed(&ipf->ifp_v4_enabled, &ifp_v4_enabled_);
199
0
    return ifp_v4_enabled_;
200
0
}
201
202
static bool
203
ipf_get_v6_enabled(struct ipf *ipf)
204
0
{
205
0
    bool ifp_v6_enabled_;
206
0
    atomic_read_relaxed(&ipf->ifp_v6_enabled, &ifp_v6_enabled_);
207
0
    return ifp_v6_enabled_;
208
0
}
209
210
static bool
211
ipf_get_enabled(struct ipf *ipf)
212
0
{
213
0
    return ipf_get_v4_enabled(ipf) || ipf_get_v6_enabled(ipf);
214
0
}
215
216
static uint32_t
217
ipf_addr_hash_add(uint32_t hash, const union ipf_addr *addr)
218
0
{
219
0
    BUILD_ASSERT_DECL(sizeof *addr % 4 == 0);
220
0
    return hash_add_bytes32(hash, (const uint32_t *) addr, sizeof *addr);
221
0
}
222
223
/* Adds a list of fragments to the list tracking expiry of yet to be
224
 * completed reassembled packets, hence subject to expirty. */
225
static void
226
ipf_expiry_list_add(struct ovs_list *frag_exp_list, struct ipf_list *ipf_list,
227
                    long long now)
228
   /* OVS_REQUIRES(ipf->ipf_lock) */
229
0
{
230
0
    enum {
231
0
        IPF_FRAG_LIST_TIMEOUT = 15000,
232
0
    };
233
234
0
    ipf_list->expiration = now + IPF_FRAG_LIST_TIMEOUT;
235
0
    ovs_list_push_back(frag_exp_list, &ipf_list->list_node);
236
0
}
237
238
/* Adds a list of fragments to the list of completed packets, which will be
239
 * subsequently transmitted. */
240
static void
241
ipf_completed_list_add(struct ovs_list *frag_complete_list,
242
                       struct ipf_list *ipf_list)
243
    /* OVS_REQUIRES(ipf_lock) */
244
0
{
245
0
    ovs_list_push_back(frag_complete_list, &ipf_list->list_node);
246
0
}
247
248
/* Adds a reassmebled packet to the list of reassembled packets, awaiting some
249
 * processing, such as being sent through conntrack. */
250
static void
251
ipf_reassembled_list_add(struct ovs_list *reassembled_pkt_list,
252
                         struct reassembled_pkt *rp)
253
    /* OVS_REQUIRES(ipf_lock) */
254
0
{
255
0
    ovs_list_push_back(reassembled_pkt_list, &rp->rp_list_node);
256
0
}
257
258
/* Removed a frag list from tracking datastructures and frees list heap
259
 * memory. */
260
static void
261
ipf_list_clean(struct hmap *frag_lists,
262
               struct ipf_list *ipf_list)
263
    /* OVS_REQUIRES(ipf_lock) */
264
0
{
265
0
    ovs_list_remove(&ipf_list->list_node);
266
0
    hmap_remove(frag_lists, &ipf_list->node);
267
0
    free(ipf_list->frag_list);
268
0
    free(ipf_list);
269
0
}
270
271
/* Removed a frag list sitting on the expiry list from tracking
272
 * datastructures and frees list heap memory. */
273
static void
274
ipf_expiry_list_clean(struct hmap *frag_lists,
275
                      struct ipf_list *ipf_list)
276
    /* OVS_REQUIRES(ipf_lock) */
277
0
{
278
0
    ipf_list_clean(frag_lists, ipf_list);
279
0
}
280
281
/* Removed a frag list sitting on the completed list from tracking
282
 * datastructures and frees list heap memory. */
283
static void
284
ipf_completed_list_clean(struct hmap *frag_lists,
285
                         struct ipf_list *ipf_list)
286
    /* OVS_REQUIRES(ipf_lock) */
287
0
{
288
0
    ipf_list_clean(frag_lists, ipf_list);
289
0
}
290
291
static void
292
ipf_expiry_list_remove(struct ipf_list *ipf_list)
293
    /* OVS_REQUIRES(ipf_lock) */
294
0
{
295
0
    ovs_list_remove(&ipf_list->list_node);
296
0
}
297
298
static void
299
ipf_reassembled_list_remove(struct reassembled_pkt *rp)
300
    /* OVS_REQUIRES(ipf_lock) */
301
0
{
302
0
    ovs_list_remove(&rp->rp_list_node);
303
0
}
304
305
/* Normally, access to ipf lists requires holding ipf_lock.  This is a special
306
 * function to work around this restriction.  As a consequence of not taking
307
 * any locks, the result of this function can be wrong.  Must only be used in
308
 * cases where the wrong result doesn't impact the overall correctness of the
309
 * logic, e.g., a quick check if there is any work to be done on current
310
 * iteration that otherwise would block all other threads by taking a lock.
311
 *
312
 * XXX: We need a proper thread-safe solution for this instead.
313
 */
314
static bool
315
ipf_list_is_empty_unsafe(struct ovs_list *list)
316
    OVS_NO_THREAD_SAFETY_ANALYSIS
317
0
{
318
0
    return ovs_list_is_empty(list);
319
0
}
320
321
/* Symmetric */
322
static uint32_t
323
ipf_list_key_hash(const struct ipf_list_key *key, uint32_t basis)
324
0
{
325
0
    uint32_t hsrc, hdst, hash;
326
0
    hsrc = hdst = basis;
327
0
    hsrc = ipf_addr_hash_add(hsrc, &key->src_addr);
328
0
    hdst = ipf_addr_hash_add(hdst, &key->dst_addr);
329
0
    hash = hsrc ^ hdst;
330
331
    /* Hash the rest of the key. */
332
0
    return hash_words((uint32_t *) (&key->dst_addr + 1),
333
0
                      (uint32_t *) (key + 1) -
334
0
                      (uint32_t *) (&key->dst_addr + 1),
335
0
                      hash);
336
0
}
337
338
static bool
339
ipf_is_first_v4_frag(const struct dp_packet *pkt)
340
0
{
341
0
    const struct ip_header *l3 = dp_packet_l3(pkt);
342
0
    if (!(l3->ip_frag_off & htons(IP_FRAG_OFF_MASK)) &&
343
0
        l3->ip_frag_off & htons(IP_MORE_FRAGMENTS)) {
344
0
        return true;
345
0
    }
346
0
    return false;
347
0
}
348
349
static bool
350
ipf_is_last_v4_frag(const struct dp_packet *pkt)
351
0
{
352
0
    const struct ip_header *l3 = dp_packet_l3(pkt);
353
0
    if (l3->ip_frag_off & htons(IP_FRAG_OFF_MASK) &&
354
0
        !(l3->ip_frag_off & htons(IP_MORE_FRAGMENTS))) {
355
0
        return true;
356
0
    }
357
0
    return false;
358
0
}
359
360
static bool
361
ipf_is_v6_frag(ovs_be16 ip6f_offlg)
362
0
{
363
0
    if (ip6f_offlg & (IP6F_OFF_MASK | IP6F_MORE_FRAG)) {
364
0
        return true;
365
0
    }
366
0
    return false;
367
0
}
368
369
static bool
370
ipf_is_first_v6_frag(ovs_be16 ip6f_offlg)
371
0
{
372
0
    if (!(ip6f_offlg & IP6F_OFF_MASK) &&
373
0
        ip6f_offlg & IP6F_MORE_FRAG) {
374
0
        return true;
375
0
    }
376
0
    return false;
377
0
}
378
379
static bool
380
ipf_is_last_v6_frag(ovs_be16 ip6f_offlg)
381
0
{
382
0
    if ((ip6f_offlg & IP6F_OFF_MASK) &&
383
0
        !(ip6f_offlg & IP6F_MORE_FRAG)) {
384
0
        return true;
385
0
    }
386
0
    return false;
387
0
}
388
389
/* Checks for a completed packet collection of fragments. */
390
static bool
391
ipf_list_complete(const struct ipf_list *ipf_list)
392
    /* OVS_REQUIRES(ipf_lock) */
393
0
{
394
0
    for (int i = 1; i <= ipf_list->last_inuse_idx; i++) {
395
0
        if (ipf_list->frag_list[i - 1].end_data_byte + 1
396
0
            != ipf_list->frag_list[i].start_data_byte) {
397
0
            return false;
398
0
        }
399
0
    }
400
0
    return true;
401
0
}
402
403
/* Runs O(n) for a sorted or almost sorted list. */
404
static void
405
ipf_sort(struct ipf_frag *frag_list, size_t last_idx)
406
    /* OVS_REQUIRES(ipf_lock) */
407
0
{
408
0
    for (int li = 1; li <= last_idx; li++) {
409
0
        struct ipf_frag ipf_frag = frag_list[li];
410
0
        int ci = li - 1;
411
0
        while (ci >= 0 &&
412
0
               frag_list[ci].start_data_byte > ipf_frag.start_data_byte) {
413
0
            frag_list[ci + 1] = frag_list[ci];
414
0
            ci--;
415
0
        }
416
0
        frag_list[ci + 1] = ipf_frag;
417
0
    }
418
0
}
419
420
/* Called on a sorted complete list of v4 fragments to reassemble them into
421
 * a single packet that can be processed, such as passing through conntrack.
422
 */
423
static struct dp_packet *
424
ipf_reassemble_v4_frags(struct ipf_list *ipf_list)
425
    /* OVS_REQUIRES(ipf_lock) */
426
0
{
427
0
    struct ipf_frag *frag_list = ipf_list->frag_list;
428
0
    struct dp_packet *pkt = dp_packet_clone(frag_list[0].pkt);
429
0
    dp_packet_set_size(pkt, dp_packet_size(pkt) - dp_packet_l2_pad_size(pkt));
430
0
    struct ip_header *l3 = dp_packet_l3(pkt);
431
0
    int len = ntohs(l3->ip_tot_len);
432
0
    int orig_len = dp_packet_size(pkt);
433
434
0
    int rest_len = frag_list[ipf_list->last_inuse_idx].end_data_byte -
435
0
                   frag_list[1].start_data_byte + 1;
436
437
0
    if (orig_len + rest_len > IPV4_PACKET_MAX_SIZE) {
438
0
        ipf_print_reass_packet(
439
0
            "Unsupported big reassembled v4 packet; v4 hdr:", l3);
440
0
        dp_packet_delete(pkt);
441
0
        return NULL;
442
0
    }
443
444
0
    dp_packet_prealloc_tailroom(pkt, rest_len);
445
446
0
    for (int i = 1; i <= ipf_list->last_inuse_idx; i++) {
447
0
        size_t add_len = frag_list[i].end_data_byte -
448
0
                         frag_list[i].start_data_byte + 1;
449
0
        const char *l4 = dp_packet_l4(frag_list[i].pkt);
450
0
        dp_packet_put(pkt, l4, add_len);
451
0
    }
452
453
0
    len += rest_len;
454
0
    l3 = dp_packet_l3(pkt);
455
0
    ovs_be16 new_ip_frag_off = l3->ip_frag_off & ~htons(IP_MORE_FRAGMENTS);
456
0
    if (dp_packet_ip_checksum_valid(pkt)) {
457
0
        dp_packet_ip_checksum_set_partial(pkt);
458
0
    } else {
459
0
        l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_frag_off,
460
0
                                    new_ip_frag_off);
461
0
        l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_tot_len, htons(len));
462
0
    }
463
0
    l3->ip_tot_len = htons(len);
464
0
    l3->ip_frag_off = new_ip_frag_off;
465
0
    dp_packet_set_l2_pad_size(pkt, 0);
466
467
0
    return pkt;
468
0
}
469
470
/* Called on a sorted complete list of v6 fragments to reassemble them into
471
 * a single packet that can be processed, such as passing through conntrack.
472
 */
473
static struct dp_packet *
474
ipf_reassemble_v6_frags(struct ipf_list *ipf_list)
475
    /* OVS_REQUIRES(ipf_lock) */
476
0
{
477
0
    struct ipf_frag *frag_list = ipf_list->frag_list;
478
0
    struct dp_packet *pkt = dp_packet_clone(frag_list[0].pkt);
479
0
    dp_packet_set_size(pkt, dp_packet_size(pkt) - dp_packet_l2_pad_size(pkt));
480
0
    struct  ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt);
481
0
    int pl = ntohs(l3->ip6_plen) - sizeof(struct ovs_16aligned_ip6_frag);
482
0
    int orig_len = dp_packet_size(pkt);
483
484
0
    int rest_len = frag_list[ipf_list->last_inuse_idx].end_data_byte -
485
0
                   frag_list[1].start_data_byte + 1;
486
487
0
    if (orig_len + rest_len > IPV6_PACKET_MAX_DATA) {
488
0
        ipf_print_reass_packet(
489
0
             "Unsupported big reassembled v6 packet; v6 hdr:", l3);
490
0
        dp_packet_delete(pkt);
491
0
        return NULL;
492
0
    }
493
494
0
    dp_packet_prealloc_tailroom(pkt, rest_len);
495
496
0
    for (int i = 1; i <= ipf_list->last_inuse_idx; i++) {
497
0
        size_t add_len = frag_list[i].end_data_byte -
498
0
                          frag_list[i].start_data_byte + 1;
499
0
        const char *l4 = dp_packet_l4(frag_list[i].pkt);
500
0
        dp_packet_put(pkt, l4, add_len);
501
0
    }
502
503
0
    pl += rest_len;
504
0
    l3 = dp_packet_l3(pkt);
505
506
0
    uint8_t nw_proto = l3->ip6_nxt;
507
0
    uint8_t nw_frag = 0;
508
0
    const void *data = l3 + 1;
509
0
    size_t datasize = pl;
510
511
0
    const struct ovs_16aligned_ip6_frag *frag_hdr;
512
0
    if (!parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, &frag_hdr,
513
0
                             NULL) || !nw_frag || !frag_hdr) {
514
515
0
        ipf_print_reass_packet("Unparsed reassembled v6 packet; v6 hdr:", l3);
516
0
        dp_packet_delete(pkt);
517
0
        return NULL;
518
0
    }
519
520
0
    struct ovs_16aligned_ip6_frag *fh =
521
0
        CONST_CAST(struct ovs_16aligned_ip6_frag *, frag_hdr);
522
0
    fh->ip6f_offlg = 0;
523
0
    l3->ip6_plen = htons(pl);
524
0
    l3->ip6_ctlun.ip6_un1.ip6_un1_nxt = nw_proto;
525
0
    dp_packet_set_l2_pad_size(pkt, 0);
526
0
    return pkt;
527
0
}
528
529
/* Called when a frag list state transitions to another state. This is
530
* triggered by new fragment for the list being received. Returns a reassembled
531
* packet if this fragment has completed one. */
532
static struct reassembled_pkt *
533
ipf_list_state_transition(struct ipf *ipf, struct ipf_list *ipf_list,
534
                          bool ff, bool lf, bool v6)
535
    OVS_REQUIRES(ipf->ipf_lock)
536
0
{
537
0
    enum ipf_list_state curr_state = ipf_list->state;
538
0
    struct reassembled_pkt *ret = NULL;
539
0
    enum ipf_list_state next_state;
540
0
    switch (curr_state) {
541
0
    case IPF_LIST_STATE_UNUSED:
542
0
    case IPF_LIST_STATE_OTHER_SEEN:
543
0
        if (ff) {
544
0
            next_state = IPF_LIST_STATE_FIRST_SEEN;
545
0
        } else if (lf) {
546
0
            next_state = IPF_LIST_STATE_LAST_SEEN;
547
0
        } else {
548
0
            next_state = IPF_LIST_STATE_OTHER_SEEN;
549
0
        }
550
0
        break;
551
0
    case IPF_LIST_STATE_FIRST_SEEN:
552
0
        if (lf) {
553
0
            next_state = IPF_LIST_STATE_FIRST_LAST_SEEN;
554
0
        } else {
555
0
            next_state = IPF_LIST_STATE_FIRST_SEEN;
556
0
        }
557
0
        break;
558
0
    case IPF_LIST_STATE_LAST_SEEN:
559
0
        if (ff) {
560
0
            next_state = IPF_LIST_STATE_FIRST_LAST_SEEN;
561
0
        } else {
562
0
            next_state = IPF_LIST_STATE_LAST_SEEN;
563
0
        }
564
0
        break;
565
0
    case IPF_LIST_STATE_FIRST_LAST_SEEN:
566
0
        next_state = IPF_LIST_STATE_FIRST_LAST_SEEN;
567
0
        break;
568
0
    case IPF_LIST_STATE_COMPLETED:
569
0
    case IPF_LIST_STATE_REASS_FAIL:
570
0
    case IPF_LIST_STATE_NUM:
571
0
    default:
572
0
        OVS_NOT_REACHED();
573
0
    }
574
575
0
    if (next_state == IPF_LIST_STATE_FIRST_LAST_SEEN) {
576
0
        ipf_sort(ipf_list->frag_list, ipf_list->last_inuse_idx);
577
0
        if (ipf_list_complete(ipf_list)) {
578
0
            struct dp_packet *reass_pkt = v6
579
0
                ? ipf_reassemble_v6_frags(ipf_list)
580
0
                : ipf_reassemble_v4_frags(ipf_list);
581
0
            if (reass_pkt) {
582
0
                struct reassembled_pkt *rp = xzalloc(sizeof *rp);
583
0
                rp->pkt = reass_pkt;
584
0
                rp->list = ipf_list;
585
0
                ipf_reassembled_list_add(&ipf->reassembled_pkt_list, rp);
586
0
                ipf_expiry_list_remove(ipf_list);
587
0
                next_state = IPF_LIST_STATE_COMPLETED;
588
0
                ret = rp;
589
0
            } else {
590
0
                next_state = IPF_LIST_STATE_REASS_FAIL;
591
0
            }
592
0
        }
593
0
    }
594
0
    ipf_list->state = next_state;
595
596
0
    return ret;
597
0
}
598
599
/* Some sanity checks are redundant, but prudent, in case code paths for
600
 * fragments change in future. The processing cost for fragments is not
601
 * important. */
602
static bool
603
ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet *pkt)
604
0
{
605
0
    const struct eth_header *l2 = dp_packet_eth(pkt);
606
0
    const struct ip_header *l3 = dp_packet_l3(pkt);
607
608
0
    if (OVS_UNLIKELY(!l2 || !l3)) {
609
0
        goto invalid_pkt;
610
0
    }
611
612
0
    size_t l3_size = dp_packet_l3_size(pkt);
613
0
    if (OVS_UNLIKELY(l3_size < IP_HEADER_LEN)) {
614
0
        goto invalid_pkt;
615
0
    }
616
617
0
    if (!IP_IS_FRAGMENT(l3->ip_frag_off)) {
618
0
        return false;
619
0
    }
620
621
0
    uint16_t ip_tot_len = ntohs(l3->ip_tot_len);
622
0
    if (OVS_UNLIKELY(ip_tot_len != l3_size)) {
623
0
        goto invalid_pkt;
624
0
    }
625
626
0
    size_t ip_hdr_len = IP_IHL(l3->ip_ihl_ver) * 4;
627
0
    if (OVS_UNLIKELY(ip_hdr_len < IP_HEADER_LEN)) {
628
0
        goto invalid_pkt;
629
0
    }
630
0
    if (OVS_UNLIKELY(l3_size < ip_hdr_len)) {
631
0
        goto invalid_pkt;
632
0
    }
633
634
0
    bool bad_csum = dp_packet_ip_checksum_bad(pkt);
635
0
    if (OVS_UNLIKELY(!bad_csum && dp_packet_ip_checksum_unknown(pkt))) {
636
0
        COVERAGE_INC(ipf_l3csum_checked);
637
0
        if (csum(l3, ip_hdr_len)) {
638
0
            dp_packet_ip_checksum_set_bad(pkt);
639
0
            bad_csum = true;
640
0
        } else {
641
0
            dp_packet_ip_checksum_set_good(pkt);
642
0
        }
643
0
    }
644
0
    if (OVS_UNLIKELY(bad_csum)) {
645
0
        COVERAGE_INC(ipf_l3csum_err);
646
0
        goto invalid_pkt;
647
0
    }
648
649
0
    uint32_t min_v4_frag_size_;
650
0
    atomic_read_relaxed(&ipf->min_v4_frag_size, &min_v4_frag_size_);
651
0
    bool lf = ipf_is_last_v4_frag(pkt);
652
0
    if (OVS_UNLIKELY(!lf && dp_packet_l3_size(pkt) < min_v4_frag_size_)) {
653
0
        ipf_count(ipf, false, IPF_NFRAGS_TOO_SMALL);
654
0
        goto invalid_pkt;
655
0
    }
656
0
    return true;
657
658
0
invalid_pkt:
659
0
    pkt->md.ct_state = CS_INVALID;
660
0
    return false;
661
0
}
662
663
static bool
664
ipf_v4_key_extract(struct dp_packet *pkt, ovs_be16 dl_type, uint16_t zone,
665
                   struct ipf_list_key *key, uint16_t *start_data_byte,
666
                   uint16_t *end_data_byte, bool *ff, bool *lf)
667
0
{
668
0
    const struct ip_header *l3 = dp_packet_l3(pkt);
669
0
    uint16_t ip_tot_len = ntohs(l3->ip_tot_len);
670
0
    size_t ip_hdr_len = IP_IHL(l3->ip_ihl_ver) * 4;
671
672
0
    *start_data_byte = ntohs(l3->ip_frag_off & htons(IP_FRAG_OFF_MASK)) * 8;
673
0
    *end_data_byte = *start_data_byte + ip_tot_len - ip_hdr_len - 1;
674
0
    *ff = ipf_is_first_v4_frag(pkt);
675
0
    *lf = ipf_is_last_v4_frag(pkt);
676
0
    memset(key, 0, sizeof *key);
677
0
    key->ip_id = be16_to_be32(l3->ip_id);
678
0
    key->dl_type = dl_type;
679
0
    key->src_addr.ipv4 = get_16aligned_be32(&l3->ip_src);
680
0
    key->dst_addr.ipv4 = get_16aligned_be32(&l3->ip_dst);
681
0
    key->nw_proto = l3->ip_proto;
682
0
    key->zone = zone;
683
0
    key->recirc_id = pkt->md.recirc_id;
684
0
    return true;
685
0
}
686
687
/* Some sanity checks are redundant, but prudent, in case code paths for
688
 * fragments change in future. The processing cost for fragments is not
689
 * important. */
690
static bool
691
ipf_is_valid_v6_frag(struct ipf *ipf, struct dp_packet *pkt)
692
0
{
693
0
    const struct eth_header *l2 = dp_packet_eth(pkt);
694
0
    const struct  ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt);
695
0
    const char *l4 = dp_packet_l4(pkt);
696
697
0
    if (OVS_UNLIKELY(!l2 || !l3 || !l4)) {
698
0
        goto invalid_pkt;
699
0
    }
700
701
0
    size_t l3_size = dp_packet_l3_size(pkt);
702
0
    size_t l3_hdr_size = sizeof *l3;
703
704
0
    if (OVS_UNLIKELY(l3_size < l3_hdr_size)) {
705
0
        goto invalid_pkt;
706
0
    }
707
708
0
    uint8_t nw_frag = 0;
709
0
    uint8_t nw_proto = l3->ip6_nxt;
710
0
    const void *data = l3 + 1;
711
0
    size_t datasize = l3_size - l3_hdr_size;
712
0
    const struct ovs_16aligned_ip6_frag *frag_hdr;
713
0
    if (!parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag,
714
0
                             &frag_hdr, NULL) || !nw_frag || !frag_hdr) {
715
0
        return false;
716
0
    }
717
718
0
    int pl = ntohs(l3->ip6_plen);
719
0
    if (OVS_UNLIKELY(pl + l3_hdr_size != l3_size)) {
720
0
        goto invalid_pkt;
721
0
    }
722
723
0
    ovs_be16 ip6f_offlg = frag_hdr->ip6f_offlg;
724
0
    if (OVS_UNLIKELY(!ipf_is_v6_frag(ip6f_offlg))) {
725
0
        return false;
726
0
    }
727
728
0
    uint32_t min_v6_frag_size_;
729
0
    atomic_read_relaxed(&ipf->min_v6_frag_size, &min_v6_frag_size_);
730
0
    bool lf = ipf_is_last_v6_frag(ip6f_offlg);
731
732
0
    if (OVS_UNLIKELY(!lf && dp_packet_l3_size(pkt) < min_v6_frag_size_)) {
733
0
        ipf_count(ipf, true, IPF_NFRAGS_TOO_SMALL);
734
0
        goto invalid_pkt;
735
0
    }
736
737
0
    return true;
738
739
0
invalid_pkt:
740
0
    pkt->md.ct_state = CS_INVALID;
741
0
    return false;
742
743
0
}
744
745
static void
746
ipf_v6_key_extract(struct dp_packet *pkt, ovs_be16 dl_type, uint16_t zone,
747
                   struct ipf_list_key *key, uint16_t *start_data_byte,
748
                   uint16_t *end_data_byte, bool *ff, bool *lf)
749
0
{
750
0
    const struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt);
751
0
    uint8_t nw_frag = 0;
752
0
    uint8_t nw_proto = l3->ip6_nxt;
753
0
    const void *data = l3 + 1;
754
0
    size_t datasize = dp_packet_l3_size(pkt) - sizeof *l3;
755
0
    const struct ovs_16aligned_ip6_frag *frag_hdr;
756
757
0
    parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, &frag_hdr,
758
0
                        NULL);
759
0
    ovs_assert(nw_frag && frag_hdr);
760
0
    ovs_be16 ip6f_offlg = frag_hdr->ip6f_offlg;
761
0
    *start_data_byte = ntohs(ip6f_offlg & IP6F_OFF_MASK) +
762
0
        sizeof (struct ovs_16aligned_ip6_frag);
763
0
    *end_data_byte = *start_data_byte + dp_packet_l4_size(pkt) - 1;
764
0
    *ff = ipf_is_first_v6_frag(ip6f_offlg);
765
0
    *lf = ipf_is_last_v6_frag(ip6f_offlg);
766
0
    memset(key, 0, sizeof *key);
767
0
    key->ip_id = get_16aligned_be32(&frag_hdr->ip6f_ident);
768
0
    key->dl_type = dl_type;
769
0
    memcpy(&key->src_addr.ipv6, &l3->ip6_src, sizeof key->src_addr.ipv6);
770
    /* We are not supporting parsing of the routing header to use as the
771
     * dst address part of the key. */
772
0
    memcpy(&key->dst_addr.ipv6, &l3->ip6_dst, sizeof key->dst_addr.ipv6);
773
0
    key->nw_proto = 0;   /* Not used for key for V6. */
774
0
    key->zone = zone;
775
0
    key->recirc_id = pkt->md.recirc_id;
776
0
}
777
778
static bool
779
ipf_list_key_eq(const struct ipf_list_key *key1,
780
                const struct ipf_list_key *key2)
781
    /* OVS_REQUIRES(ipf_lock) */
782
0
{
783
0
    if (!memcmp(&key1->src_addr, &key2->src_addr, sizeof key1->src_addr) &&
784
0
        !memcmp(&key1->dst_addr, &key2->dst_addr, sizeof key1->dst_addr) &&
785
0
        key1->dl_type == key2->dl_type &&
786
0
        key1->ip_id == key2->ip_id &&
787
0
        key1->zone == key2->zone &&
788
0
        key1->nw_proto == key2->nw_proto &&
789
0
        key1->recirc_id == key2->recirc_id) {
790
0
        return true;
791
0
    }
792
0
    return false;
793
0
}
794
795
static struct ipf_list *
796
ipf_list_key_lookup(struct ipf *ipf, const struct ipf_list_key *key,
797
                    uint32_t hash)
798
    OVS_REQUIRES(ipf->ipf_lock)
799
0
{
800
0
    struct ipf_list *ipf_list;
801
0
    HMAP_FOR_EACH_WITH_HASH (ipf_list, node, hash, &ipf->frag_lists) {
802
0
        if (ipf_list_key_eq(&ipf_list->key, key)) {
803
0
            return ipf_list;
804
0
        }
805
0
    }
806
0
    return NULL;
807
0
}
808
809
static bool
810
ipf_is_frag_duped(const struct ipf_frag *frag_list, int last_inuse_idx,
811
                  size_t start_data_byte, size_t end_data_byte)
812
    /* OVS_REQUIRES(ipf_lock) */
813
0
{
814
0
    for (int i = 0; i <= last_inuse_idx; i++) {
815
0
        if ((start_data_byte >= frag_list[i].start_data_byte &&
816
0
            start_data_byte <= frag_list[i].end_data_byte) ||
817
0
            (end_data_byte >= frag_list[i].start_data_byte &&
818
0
             end_data_byte <= frag_list[i].end_data_byte)) {
819
0
            return true;
820
0
        }
821
0
    }
822
0
    return false;
823
0
}
824
825
/* Adds a fragment to a list of fragments, if the fragment is not a
826
 * duplicate. If the fragment is a duplicate, that fragment is marked
827
 * invalid to avoid the work that conntrack would do to mark the fragment
828
 * as invalid, which it will in all cases. */
829
static bool
830
ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list,
831
                 struct dp_packet *pkt, uint16_t start_data_byte,
832
                 uint16_t end_data_byte, bool ff, bool lf, bool v6,
833
                 struct reassembled_pkt **rp)
834
    OVS_REQUIRES(ipf->ipf_lock)
835
0
{
836
0
    bool duped_frag = ipf_is_frag_duped(ipf_list->frag_list,
837
0
        ipf_list->last_inuse_idx, start_data_byte, end_data_byte);
838
0
    int last_inuse_idx = ipf_list->last_inuse_idx;
839
840
0
    if (!duped_frag) {
841
0
        if (last_inuse_idx < ipf_list->size - 1) {
842
            /* In the case of dpdk, it would be unfortunate if we had
843
             * to create a clone fragment outside the dpdk mp due to the
844
             * mempool size being too limited. We will otherwise need to
845
             * recommend not setting the mempool number of buffers too low
846
             * and also clamp the number of fragments. */
847
0
            struct ipf_frag *frag = &ipf_list->frag_list[last_inuse_idx + 1];
848
0
            frag->pkt = pkt;
849
0
            frag->start_data_byte = start_data_byte;
850
0
            frag->end_data_byte = end_data_byte;
851
0
            ipf_list->last_inuse_idx++;
852
0
            atomic_count_inc(&ipf->nfrag);
853
0
            ipf_count(ipf, v6, IPF_NFRAGS_ACCEPTED);
854
0
            *rp = ipf_list_state_transition(ipf, ipf_list, ff, lf, v6);
855
0
        } else {
856
0
            OVS_NOT_REACHED();
857
0
        }
858
0
    } else {
859
0
        ipf_count(ipf, v6, IPF_NFRAGS_OVERLAP);
860
0
        pkt->md.ct_state = CS_INVALID;
861
0
        return false;
862
0
    }
863
0
    return true;
864
0
}
865
866
static void
867
ipf_list_init(struct ipf_list *ipf_list, struct ipf_list_key *key,
868
              int max_frag_list_size)
869
0
{
870
0
    ipf_list->key = *key;
871
0
    ipf_list->last_inuse_idx = IPF_INVALID_IDX;
872
0
    ipf_list->last_sent_idx = IPF_INVALID_IDX;
873
0
    ipf_list->reass_execute_ctx = NULL;
874
0
    ipf_list->state = IPF_LIST_STATE_UNUSED;
875
0
    ipf_list->size = max_frag_list_size;
876
0
    ipf_list->frag_list
877
0
        = xzalloc(ipf_list->size * sizeof *ipf_list->frag_list);
878
0
}
879
880
/* Generates a fragment list key from a well formed fragment and either starts
881
 * a new fragment list or increases the size of the existing fragment list,
882
 * while checking if the maximum supported fragements are supported or the
883
 * list size is impossibly big. Calls 'ipf_process_frag()' to add a fragment
884
 * to a list of fragemnts. */
885
static bool
886
ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type,
887
                uint16_t zone, long long now, uint32_t hash_basis,
888
                struct reassembled_pkt **rp)
889
    OVS_REQUIRES(ipf->ipf_lock)
890
0
{
891
0
    struct ipf_list_key key;
892
    /* Initialize 4 variables for some versions of GCC. */
893
0
    uint16_t start_data_byte = 0;
894
0
    uint16_t end_data_byte = 0;
895
0
    bool ff = false;
896
0
    bool lf = false;
897
0
    bool v6 = dl_type == htons(ETH_TYPE_IPV6);
898
899
0
    if (v6 && ipf_get_v6_enabled(ipf)) {
900
0
        ipf_v6_key_extract(pkt, dl_type, zone, &key, &start_data_byte,
901
0
                           &end_data_byte, &ff, &lf);
902
0
    } else if (!v6 && ipf_get_v4_enabled(ipf)) {
903
0
        ipf_v4_key_extract(pkt, dl_type, zone, &key, &start_data_byte,
904
0
                           &end_data_byte, &ff, &lf);
905
0
    } else {
906
0
        return false;
907
0
    }
908
909
0
    unsigned int nfrag_max;
910
0
    atomic_read_relaxed(&ipf->nfrag_max, &nfrag_max);
911
0
    if (atomic_count_get(&ipf->nfrag) >= nfrag_max) {
912
0
        return false;
913
0
    }
914
915
0
    uint32_t hash = ipf_list_key_hash(&key, hash_basis);
916
0
    struct ipf_list *ipf_list = ipf_list_key_lookup(ipf, &key, hash);
917
0
    enum {
918
0
        IPF_FRAG_LIST_MIN_INCREMENT = 4,
919
0
        IPF_IPV6_MAX_FRAG_LIST_SIZE = 65535,
920
0
    };
921
922
0
    int max_frag_list_size;
923
0
    if (v6) {
924
        /* Because the calculation with extension headers is variable,
925
         * we don't calculate a hard maximum fragment list size upfront.  The
926
         * fragment list size is practically limited by the code, however. */
927
0
        max_frag_list_size = IPF_IPV6_MAX_FRAG_LIST_SIZE;
928
0
    } else {
929
0
        max_frag_list_size = ipf->max_v4_frag_list_size;
930
0
    }
931
932
0
    if (!ipf_list) {
933
0
        ipf_list = xmalloc(sizeof *ipf_list);
934
0
        ipf_list_init(ipf_list, &key,
935
0
                      MIN(max_frag_list_size, IPF_FRAG_LIST_MIN_INCREMENT));
936
0
        hmap_insert(&ipf->frag_lists, &ipf_list->node, hash);
937
0
        ipf_expiry_list_add(&ipf->frag_exp_list, ipf_list, now);
938
0
    } else if (ipf_list->state == IPF_LIST_STATE_REASS_FAIL ||
939
0
               ipf_list->state == IPF_LIST_STATE_COMPLETED) {
940
        /* Bail out as early as possible. */
941
0
        return false;
942
0
    } else if (ipf_list->last_inuse_idx + 1 >= ipf_list->size) {
943
0
        int increment = MIN(IPF_FRAG_LIST_MIN_INCREMENT,
944
0
                            max_frag_list_size - ipf_list->size);
945
        /* Enforce limit. */
946
0
        if (increment > 0) {
947
0
            ipf_list->frag_list =
948
0
                xrealloc(ipf_list->frag_list, (ipf_list->size + increment) *
949
0
                  sizeof *ipf_list->frag_list);
950
0
            ipf_list->size += increment;
951
0
        } else {
952
0
            return false;
953
0
        }
954
0
    }
955
956
0
    return ipf_process_frag(ipf, ipf_list, pkt, start_data_byte,
957
0
                            end_data_byte, ff, lf, v6, rp);
958
0
}
959
960
/* Filters out fragments from a batch of fragments and adjust the batch. */
961
static void
962
ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb,
963
                             ovs_be16 dl_type, uint16_t zone, long long now,
964
                             uint32_t hash_basis)
965
0
{
966
0
    const size_t pb_cnt = dp_packet_batch_size(pb);
967
0
    int pb_idx; /* Index in a packet batch. */
968
0
    struct dp_packet *pkt;
969
970
0
    DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) {
971
0
        if (OVS_UNLIKELY((dl_type == htons(ETH_TYPE_IP) &&
972
0
                          ipf_is_valid_v4_frag(ipf, pkt))
973
0
                          ||
974
0
                          (dl_type == htons(ETH_TYPE_IPV6) &&
975
0
                          ipf_is_valid_v6_frag(ipf, pkt)))) {
976
0
            struct reassembled_pkt *rp = NULL;
977
978
0
            ovs_mutex_lock(&ipf->ipf_lock);
979
0
            if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis,
980
0
                                 &rp)) {
981
0
                dp_packet_batch_refill(pb, pkt, pb_idx);
982
0
            } else {
983
0
                if (rp && !dp_packet_batch_is_full(pb)) {
984
0
                    dp_packet_batch_refill(pb, rp->pkt, pb_idx);
985
0
                    rp->list->reass_execute_ctx = rp->pkt;
986
0
                }
987
0
            }
988
0
            ovs_mutex_unlock(&ipf->ipf_lock);
989
0
        } else {
990
0
            dp_packet_batch_refill(pb, pkt, pb_idx);
991
0
        }
992
0
    }
993
0
}
994
995
/* In case of DPDK, a memory source check is done, as DPDK memory pool
996
 * management has trouble dealing with multiple source types.  The
997
 * check_source paramater is used to indicate when this check is needed. */
998
static bool
999
ipf_dp_packet_batch_add(struct dp_packet_batch *pb , struct dp_packet *pkt,
1000
                        bool check_source OVS_UNUSED)
1001
0
{
1002
#ifdef DPDK_NETDEV
1003
    if ((dp_packet_batch_is_full(pb)) ||
1004
        /* DPDK cannot handle multiple sources in a batch. */
1005
        (check_source && !dp_packet_batch_is_empty(pb)
1006
         && pb->packets[0]->source != pkt->source)) {
1007
#else
1008
0
    if (dp_packet_batch_is_full(pb)) {
1009
0
#endif
1010
0
        return false;
1011
0
    }
1012
1013
0
    dp_packet_batch_add(pb, pkt);
1014
0
    return true;
1015
0
}
1016
1017
/* This would be used in rare cases where a list cannot be sent. One rare
1018
 * reason known right now is a mempool source check, which exists due to DPDK
1019
 * support, where packets are no longer being received on any port with a
1020
 * source matching the fragment.  Another reason is a race where all
1021
 * conntrack rules are unconfigured when some fragments are yet to be
1022
 * flushed.
1023
 *
1024
 * Returns true if the list was purged. */
1025
static bool
1026
ipf_purge_list_check(struct ipf *ipf, struct ipf_list *ipf_list,
1027
                     long long now)
1028
    OVS_REQUIRES(ipf->ipf_lock)
1029
0
{
1030
0
    enum {
1031
0
        IPF_FRAG_LIST_PURGE_TIME_ADJ = 10000
1032
0
    };
1033
1034
0
    if (now < ipf_list->expiration + IPF_FRAG_LIST_PURGE_TIME_ADJ) {
1035
0
        return false;
1036
0
    }
1037
1038
0
    while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) {
1039
0
        struct dp_packet * pkt
1040
0
            = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
1041
0
        dp_packet_delete(pkt);
1042
0
        atomic_count_dec(&ipf->nfrag);
1043
0
        COVERAGE_INC(ipf_stuck_frag_list_purged);
1044
0
        ipf_count(ipf, ipf_list->key.dl_type == htons(ETH_TYPE_IPV6),
1045
0
                  IPF_NFRAGS_PURGED);
1046
0
        ipf_list->last_sent_idx++;
1047
0
    }
1048
1049
0
    return true;
1050
0
}
1051
1052
/* Does the packet batch management and common accounting work associated
1053
 * with 'ipf_send_completed_frags()' and 'ipf_send_expired_frags()'. */
1054
static bool
1055
ipf_send_frags_in_list(struct ipf *ipf, struct ipf_list *ipf_list,
1056
                       struct dp_packet_batch *pb, bool v6, long long now)
1057
    OVS_REQUIRES(ipf->ipf_lock)
1058
0
{
1059
0
    if (ipf_purge_list_check(ipf, ipf_list, now)) {
1060
0
        return true;
1061
0
    }
1062
1063
0
    while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) {
1064
0
        struct dp_packet *pkt
1065
0
            = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
1066
0
        if (ipf_dp_packet_batch_add(pb, pkt, true)) {
1067
0
            ipf_list->last_sent_idx++;
1068
0
            atomic_count_dec(&ipf->nfrag);
1069
1070
0
            ipf_count(ipf, v6, IPF_NFRAGS_COMPL_SENT);
1071
1072
0
            if (ipf_list->last_sent_idx == ipf_list->last_inuse_idx) {
1073
0
                return true;
1074
0
            }
1075
0
        } else {
1076
0
            return false;
1077
0
        }
1078
0
    }
1079
0
    OVS_NOT_REACHED();
1080
0
}
1081
1082
/* Adds fragments associated with a completed fragment list to a packet batch
1083
 * to be processed by the calling application, typically conntrack. Also
1084
 * cleans up the list context when it is empty.*/
1085
static void
1086
ipf_send_completed_frags(struct ipf *ipf, struct dp_packet_batch *pb,
1087
                         long long now, bool v6, uint16_t zone,
1088
                         odp_port_t in_port)
1089
0
{
1090
0
    if (ipf_list_is_empty_unsafe(&ipf->frag_complete_list)) {
1091
0
        return;
1092
0
    }
1093
1094
0
    ovs_mutex_lock(&ipf->ipf_lock);
1095
0
    struct ipf_list *ipf_list;
1096
1097
0
    LIST_FOR_EACH_SAFE (ipf_list, list_node, &ipf->frag_complete_list) {
1098
1099
0
        if ((ipf_list->key.dl_type == htons(ETH_TYPE_IPV6)) != v6) {
1100
0
            continue;
1101
0
        }
1102
0
        if (ipf_list->key.zone != zone) {
1103
0
            continue;
1104
0
        }
1105
1106
        /* Check that the batch's in_port matches. */
1107
0
        struct dp_packet *pkt
1108
0
            = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
1109
0
        if (in_port != pkt->md.in_port.odp_port) {
1110
0
            continue;
1111
0
        }
1112
1113
0
        if (ipf_send_frags_in_list(ipf, ipf_list, pb, v6, now)) {
1114
0
            ipf_completed_list_clean(&ipf->frag_lists, ipf_list);
1115
0
        } else {
1116
0
            break;
1117
0
        }
1118
0
    }
1119
1120
0
    ovs_mutex_unlock(&ipf->ipf_lock);
1121
0
}
1122
1123
/* Remove expired fragment lists and clean up the list context. */
1124
static void
1125
ipf_delete_expired_frags(struct ipf *ipf, long long now)
1126
0
{
1127
0
    enum {
1128
        /* Very conservative, due to DOS probability. */
1129
0
        IPF_FRAG_LIST_MAX_EXPIRED = 1,
1130
0
    };
1131
1132
1133
0
    if (ipf_list_is_empty_unsafe(&ipf->frag_exp_list)) {
1134
0
        return;
1135
0
    }
1136
1137
0
    ovs_mutex_lock(&ipf->ipf_lock);
1138
0
    struct ipf_list *ipf_list;
1139
0
    size_t lists_removed = 0;
1140
1141
0
    LIST_FOR_EACH_SAFE (ipf_list, list_node, &ipf->frag_exp_list) {
1142
0
        if (now <= ipf_list->expiration ||
1143
0
            lists_removed >= IPF_FRAG_LIST_MAX_EXPIRED) {
1144
0
            break;
1145
0
        }
1146
1147
0
        while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) {
1148
0
            struct dp_packet * pkt
1149
0
                = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
1150
0
            dp_packet_delete(pkt);
1151
0
            atomic_count_dec(&ipf->nfrag);
1152
0
            COVERAGE_INC(ipf_stuck_frag_list_expired);
1153
0
            ipf_count(ipf, ipf_list->key.dl_type == htons(ETH_TYPE_IPV6),
1154
0
                      IPF_NFRAGS_EXPIRED);
1155
0
            ipf_list->last_sent_idx++;
1156
0
        }
1157
0
        ipf_expiry_list_clean(&ipf->frag_lists, ipf_list);
1158
0
        lists_removed++;
1159
0
    }
1160
1161
0
    ovs_mutex_unlock(&ipf->ipf_lock);
1162
0
}
1163
1164
/* Adds a reassmebled packet to a packet batch to be processed by the caller.
1165
 */
1166
static void
1167
ipf_execute_reass_pkts(struct ipf *ipf, struct dp_packet_batch *pb,
1168
                       ovs_be16 dl_type)
1169
0
{
1170
0
    if (ipf_list_is_empty_unsafe(&ipf->reassembled_pkt_list)) {
1171
0
        return;
1172
0
    }
1173
1174
0
    ovs_mutex_lock(&ipf->ipf_lock);
1175
0
    struct reassembled_pkt *rp;
1176
1177
0
    LIST_FOR_EACH_SAFE (rp, rp_list_node, &ipf->reassembled_pkt_list) {
1178
0
        if (!rp->list->reass_execute_ctx &&
1179
0
            rp->list->key.dl_type == dl_type &&
1180
0
            ipf_dp_packet_batch_add(pb, rp->pkt, false)) {
1181
0
            rp->list->reass_execute_ctx = rp->pkt;
1182
0
        }
1183
0
    }
1184
1185
0
    ovs_mutex_unlock(&ipf->ipf_lock);
1186
0
}
1187
1188
/* Checks for reassembled packets post processing by conntrack and edits the
1189
 * fragments if needed based on what conntrack decided. */
1190
static void
1191
ipf_post_execute_reass_pkts(struct ipf *ipf,
1192
                            struct dp_packet_batch *pb, bool v6)
1193
0
{
1194
0
    if (ipf_list_is_empty_unsafe(&ipf->reassembled_pkt_list)) {
1195
0
        return;
1196
0
    }
1197
1198
0
    ovs_mutex_lock(&ipf->ipf_lock);
1199
0
    struct reassembled_pkt *rp;
1200
1201
0
    LIST_FOR_EACH_SAFE (rp, rp_list_node, &ipf->reassembled_pkt_list) {
1202
0
        const size_t pb_cnt = dp_packet_batch_size(pb);
1203
0
        int pb_idx;
1204
0
        struct dp_packet *pkt;
1205
        /* Inner batch loop is constant time since batch size is <=
1206
         * NETDEV_MAX_BURST. */
1207
0
        DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) {
1208
0
            if (rp && pkt == rp->list->reass_execute_ctx) {
1209
0
                const struct ipf_frag *frag_0 = &rp->list->frag_list[0];
1210
0
                void *l4_frag = dp_packet_l4(frag_0->pkt);
1211
0
                void *l4_reass = dp_packet_l4(pkt);
1212
1213
                /* Complete all L4 checksums before reassembly. */
1214
0
                dp_packet_ol_send_prepare(pkt, 0);
1215
0
                memcpy(l4_frag, l4_reass, dp_packet_l4_size(frag_0->pkt));
1216
1217
0
                for (int i = 0; i <= rp->list->last_inuse_idx; i++) {
1218
0
                    const struct ipf_frag *frag_i = &rp->list->frag_list[i];
1219
1220
0
                    frag_i->pkt->md.ct_label = pkt->md.ct_label;
1221
0
                    frag_i->pkt->md.ct_mark = pkt->md.ct_mark;
1222
0
                    frag_i->pkt->md.ct_state = pkt->md.ct_state;
1223
0
                    frag_i->pkt->md.ct_zone = pkt->md.ct_zone;
1224
0
                    frag_i->pkt->md.ct_orig_tuple_ipv6 =
1225
0
                        pkt->md.ct_orig_tuple_ipv6;
1226
0
                    if (pkt->md.ct_orig_tuple_ipv6) {
1227
0
                        frag_i->pkt->md.ct_orig_tuple.ipv6 =
1228
0
                            pkt->md.ct_orig_tuple.ipv6;
1229
0
                    } else {
1230
0
                        frag_i->pkt->md.ct_orig_tuple.ipv4 =
1231
0
                            pkt->md.ct_orig_tuple.ipv4;
1232
0
                    }
1233
0
                    if (v6) {
1234
0
                        struct ovs_16aligned_ip6_hdr *l3_frag
1235
0
                            = dp_packet_l3(frag_i->pkt);
1236
0
                        struct ovs_16aligned_ip6_hdr *l3_reass
1237
0
                            = dp_packet_l3(pkt);
1238
0
                        l3_frag->ip6_src = l3_reass->ip6_src;
1239
0
                        l3_frag->ip6_dst = l3_reass->ip6_dst;
1240
0
                    } else {
1241
0
                        struct ip_header *l3_frag = dp_packet_l3(frag_i->pkt);
1242
0
                        struct ip_header *l3_reass = dp_packet_l3(pkt);
1243
0
                        if (dp_packet_ip_checksum_valid(frag_i->pkt)) {
1244
0
                            dp_packet_ip_checksum_set_partial(frag_i->pkt);
1245
0
                        } else {
1246
0
                            ovs_be32 reass_ip =
1247
0
                                get_16aligned_be32(&l3_reass->ip_src);
1248
0
                            ovs_be32 frag_ip =
1249
0
                                get_16aligned_be32(&l3_frag->ip_src);
1250
1251
0
                            l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum,
1252
0
                                                             frag_ip,
1253
0
                                                             reass_ip);
1254
0
                            reass_ip = get_16aligned_be32(&l3_reass->ip_dst);
1255
0
                            frag_ip = get_16aligned_be32(&l3_frag->ip_dst);
1256
0
                            l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum,
1257
0
                                                             frag_ip,
1258
0
                                                             reass_ip);
1259
0
                        }
1260
1261
0
                        l3_frag->ip_src = l3_reass->ip_src;
1262
0
                        l3_frag->ip_dst = l3_reass->ip_dst;
1263
0
                    }
1264
0
                }
1265
1266
0
                ipf_completed_list_add(&ipf->frag_complete_list, rp->list);
1267
0
                ipf_reassembled_list_remove(rp);
1268
0
                dp_packet_delete(rp->pkt);
1269
0
                free(rp);
1270
0
                rp = NULL;
1271
0
            } else {
1272
0
                dp_packet_batch_refill(pb, pkt, pb_idx);
1273
0
            }
1274
0
        }
1275
0
    }
1276
1277
0
    ovs_mutex_unlock(&ipf->ipf_lock);
1278
0
}
1279
1280
/* Extracts any fragments from the batch and reassembles them when a
1281
 * complete packet is received.  Completed packets are attempted to
1282
 * be added to the batch to be sent through conntrack. */
1283
void
1284
ipf_preprocess_conntrack(struct ipf *ipf, struct dp_packet_batch *pb,
1285
                         long long now, ovs_be16 dl_type, uint16_t zone,
1286
                         uint32_t hash_basis)
1287
0
{
1288
0
    if (ipf_get_enabled(ipf)) {
1289
0
        ipf_extract_frags_from_batch(ipf, pb, dl_type, zone, now, hash_basis);
1290
0
    }
1291
1292
0
    if (ipf_get_enabled(ipf) || atomic_count_get(&ipf->nfrag)) {
1293
0
        ipf_execute_reass_pkts(ipf, pb, dl_type);
1294
0
    }
1295
0
}
1296
1297
/* Updates fragments based on the processing of the reassembled packet sent
1298
 * through conntrack and adds these fragments to any batches seen.  Expired
1299
 * fragments are marked as invalid and also added to the batches seen
1300
 * with low priority.  Reassembled packets are freed. */
1301
void
1302
ipf_postprocess_conntrack(struct ipf *ipf, struct dp_packet_batch *pb,
1303
                          long long now, ovs_be16 dl_type, uint16_t zone,
1304
                          odp_port_t in_port)
1305
0
{
1306
0
    if (ipf_get_enabled(ipf) || atomic_count_get(&ipf->nfrag)) {
1307
0
        bool v6 = dl_type == htons(ETH_TYPE_IPV6);
1308
0
        ipf_post_execute_reass_pkts(ipf, pb, v6);
1309
0
        ipf_send_completed_frags(ipf, pb, now, v6, zone, in_port);
1310
0
        ipf_delete_expired_frags(ipf, now);
1311
0
    }
1312
0
}
1313
1314
static void *
1315
ipf_clean_thread_main(void *f)
1316
0
{
1317
0
    struct ipf *ipf = f;
1318
1319
0
    enum {
1320
0
        IPF_FRAG_LIST_CLEAN_TIMEOUT = 60000,
1321
0
    };
1322
1323
0
    while (!latch_is_set(&ipf->ipf_clean_thread_exit)) {
1324
1325
0
        long long now = time_msec();
1326
1327
0
        if (!ipf_list_is_empty_unsafe(&ipf->frag_exp_list) ||
1328
0
            !ipf_list_is_empty_unsafe(&ipf->frag_complete_list)) {
1329
1330
0
            ovs_mutex_lock(&ipf->ipf_lock);
1331
1332
0
            struct ipf_list *ipf_list;
1333
0
            LIST_FOR_EACH_SAFE (ipf_list, list_node,
1334
0
                                &ipf->frag_exp_list) {
1335
0
                if (ipf_purge_list_check(ipf, ipf_list, now)) {
1336
0
                    ipf_expiry_list_clean(&ipf->frag_lists, ipf_list);
1337
0
                }
1338
0
            }
1339
1340
0
            LIST_FOR_EACH_SAFE (ipf_list, list_node,
1341
0
                                &ipf->frag_complete_list) {
1342
0
                if (ipf_purge_list_check(ipf, ipf_list, now)) {
1343
0
                    ipf_completed_list_clean(&ipf->frag_lists, ipf_list);
1344
0
                }
1345
0
            }
1346
1347
0
            ovs_mutex_unlock(&ipf->ipf_lock);
1348
0
        }
1349
1350
0
        poll_timer_wait_until(now + IPF_FRAG_LIST_CLEAN_TIMEOUT);
1351
0
        latch_wait(&ipf->ipf_clean_thread_exit);
1352
0
        poll_block();
1353
0
    }
1354
1355
0
    return NULL;
1356
0
}
1357
1358
struct ipf *
1359
ipf_init(void)
1360
0
{
1361
0
    struct ipf *ipf = xzalloc(sizeof *ipf);
1362
1363
0
    ovs_mutex_init_adaptive(&ipf->ipf_lock);
1364
0
    ovs_mutex_lock(&ipf->ipf_lock);
1365
0
    hmap_init(&ipf->frag_lists);
1366
0
    ovs_list_init(&ipf->frag_exp_list);
1367
0
    ovs_list_init(&ipf->frag_complete_list);
1368
0
    ovs_list_init(&ipf->reassembled_pkt_list);
1369
0
    atomic_init(&ipf->min_v4_frag_size, IPF_V4_FRAG_SIZE_MIN_DEF);
1370
0
    atomic_init(&ipf->min_v6_frag_size, IPF_V6_FRAG_SIZE_MIN_DEF);
1371
0
    ipf->max_v4_frag_list_size = DIV_ROUND_UP(
1372
0
        IPV4_PACKET_MAX_SIZE - IPV4_PACKET_MAX_HDR_SIZE,
1373
0
        ipf->min_v4_frag_size - IPV4_PACKET_MAX_HDR_SIZE);
1374
0
    ovs_mutex_unlock(&ipf->ipf_lock);
1375
0
    atomic_count_init(&ipf->nfrag, 0);
1376
0
    for (size_t i = 0; i < IPF_NFRAGS_NUM_CNTS; i++) {
1377
0
        atomic_init(&ipf->n4frag_cnt[i], 0);
1378
0
        atomic_init(&ipf->n6frag_cnt[i], 0);
1379
0
    }
1380
0
    atomic_init(&ipf->nfrag_max, IPF_MAX_FRAGS_DEFAULT);
1381
0
    atomic_init(&ipf->ifp_v4_enabled, true);
1382
0
    atomic_init(&ipf->ifp_v6_enabled, true);
1383
0
    latch_init(&ipf->ipf_clean_thread_exit);
1384
0
    ipf->ipf_clean_thread = ovs_thread_create("ipf_clean",
1385
0
                                         ipf_clean_thread_main, ipf);
1386
1387
0
    return ipf;
1388
0
}
1389
1390
void
1391
ipf_destroy(struct ipf *ipf)
1392
0
{
1393
0
    latch_set(&ipf->ipf_clean_thread_exit);
1394
0
    pthread_join(ipf->ipf_clean_thread, NULL);
1395
0
    latch_destroy(&ipf->ipf_clean_thread_exit);
1396
1397
0
    ovs_mutex_lock(&ipf->ipf_lock);
1398
1399
0
    struct ipf_list *ipf_list;
1400
0
    HMAP_FOR_EACH_POP (ipf_list, node, &ipf->frag_lists) {
1401
0
        while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) {
1402
0
            struct dp_packet *pkt
1403
0
                = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
1404
0
            dp_packet_delete(pkt);
1405
0
            atomic_count_dec(&ipf->nfrag);
1406
0
            ipf_list->last_sent_idx++;
1407
0
        }
1408
0
        free(ipf_list->frag_list);
1409
0
        free(ipf_list);
1410
0
    }
1411
1412
0
    if (atomic_count_get(&ipf->nfrag)) {
1413
0
        VLOG_WARN("ipf destroy with non-zero fragment count. ");
1414
0
    }
1415
1416
0
    struct reassembled_pkt *rp;
1417
0
    LIST_FOR_EACH_POP (rp, rp_list_node, &ipf->reassembled_pkt_list) {
1418
0
        dp_packet_delete(rp->pkt);
1419
0
        free(rp);
1420
0
    }
1421
1422
0
    hmap_destroy(&ipf->frag_lists);
1423
0
    ovs_list_poison(&ipf->frag_exp_list);
1424
0
    ovs_list_poison(&ipf->frag_complete_list);
1425
0
    ovs_list_poison(&ipf->reassembled_pkt_list);
1426
0
    ovs_mutex_unlock(&ipf->ipf_lock);
1427
0
    ovs_mutex_destroy(&ipf->ipf_lock);
1428
0
    free(ipf);
1429
0
}
1430
1431
int
1432
ipf_set_enabled(struct ipf *ipf, bool v6, bool enable)
1433
0
{
1434
0
    atomic_store_relaxed(v6 ? &ipf->ifp_v6_enabled : &ipf->ifp_v4_enabled,
1435
0
                         enable);
1436
0
    return 0;
1437
0
}
1438
1439
int
1440
ipf_set_min_frag(struct ipf *ipf, bool v6, uint32_t value)
1441
0
{
1442
    /* If the user specifies an unreasonably large number, fragmentation
1443
     * will not work well but it will not blow up. */
1444
0
    if (value < (v6 ? IPF_V6_FRAG_SIZE_LBOUND :  IPF_V4_FRAG_SIZE_LBOUND)) {
1445
0
        return 1;
1446
0
    }
1447
1448
0
    ovs_mutex_lock(&ipf->ipf_lock);
1449
0
    if (v6) {
1450
0
        atomic_store_relaxed(&ipf->min_v6_frag_size, value);
1451
0
    } else {
1452
0
        atomic_store_relaxed(&ipf->min_v4_frag_size, value);
1453
0
        ipf->max_v4_frag_list_size = DIV_ROUND_UP(
1454
0
            IPV4_PACKET_MAX_SIZE - IPV4_PACKET_MAX_HDR_SIZE,
1455
0
            ipf->min_v4_frag_size - IPV4_PACKET_MAX_HDR_SIZE);
1456
0
    }
1457
0
    ovs_mutex_unlock(&ipf->ipf_lock);
1458
0
    return 0;
1459
0
}
1460
1461
int
1462
ipf_set_max_nfrags(struct ipf *ipf, uint32_t value)
1463
0
{
1464
0
    if (value > IPF_NFRAG_UBOUND) {
1465
0
        return 1;
1466
0
    }
1467
0
    atomic_store_relaxed(&ipf->nfrag_max, value);
1468
0
    return 0;
1469
0
}
1470
1471
int
1472
ipf_get_status(struct ipf *ipf, struct ipf_status *ipf_status)
1473
0
{
1474
0
    ipf_status->nfrag = atomic_count_get(&ipf->nfrag);
1475
0
    atomic_read_relaxed(&ipf->nfrag_max, &ipf_status->nfrag_max);
1476
1477
0
    atomic_read_relaxed(&ipf->ifp_v4_enabled, &ipf_status->v4.enabled);
1478
0
    atomic_read_relaxed(&ipf->min_v4_frag_size,
1479
0
                        &ipf_status->v4.min_frag_size);
1480
0
    atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_ACCEPTED],
1481
0
                        &ipf_status->v4.nfrag_accepted);
1482
0
    atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_COMPL_SENT],
1483
0
                        &ipf_status->v4.nfrag_completed_sent);
1484
0
    atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_EXPIRED],
1485
0
                        &ipf_status->v4.nfrag_expired_sent);
1486
0
    atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_TOO_SMALL],
1487
0
                        &ipf_status->v4.nfrag_too_small);
1488
0
    atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_OVERLAP],
1489
0
                        &ipf_status->v4.nfrag_overlap);
1490
0
    atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_PURGED],
1491
0
                        &ipf_status->v4.nfrag_purged);
1492
1493
0
    atomic_read_relaxed(&ipf->ifp_v6_enabled, &ipf_status->v6.enabled);
1494
0
    atomic_read_relaxed(&ipf->min_v6_frag_size,
1495
0
                        &ipf_status->v6.min_frag_size);
1496
0
    atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_ACCEPTED],
1497
0
                        &ipf_status->v6.nfrag_accepted);
1498
0
    atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_COMPL_SENT],
1499
0
                        &ipf_status->v6.nfrag_completed_sent);
1500
0
    atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_EXPIRED],
1501
0
                        &ipf_status->v6.nfrag_expired_sent);
1502
0
    atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_TOO_SMALL],
1503
0
                        &ipf_status->v6.nfrag_too_small);
1504
0
    atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_OVERLAP],
1505
0
                        &ipf_status->v6.nfrag_overlap);
1506
0
    atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_PURGED],
1507
0
                        &ipf_status->v6.nfrag_purged);
1508
0
    return 0;
1509
0
}
1510
1511
struct ipf_dump_ctx {
1512
    struct hmap_position bucket_pos;
1513
};
1514
1515
/* Allocates an 'ipf_dump_ctx' to keep track of an hmap position. The
1516
 * caller must call ipf_dump_done() when dumping is finished. */
1517
int
1518
ipf_dump_start(struct ipf_dump_ctx **ipf_dump_ctx)
1519
0
{
1520
0
    *ipf_dump_ctx = xzalloc(sizeof **ipf_dump_ctx);
1521
0
    return 0;
1522
0
}
1523
1524
/* Creates a string representation of the state of an 'ipf_list' and puts
1525
 * it in 'ds'. */
1526
static void
1527
ipf_dump_create(const struct ipf_list *ipf_list, struct ds *ds)
1528
0
{
1529
0
    ds_put_cstr(ds, "(");
1530
0
    if (ipf_list->key.dl_type == htons(ETH_TYPE_IP)) {
1531
0
        ds_put_format(ds, "src="IP_FMT",dst="IP_FMT",",
1532
0
                      IP_ARGS(ipf_list->key.src_addr.ipv4),
1533
0
                      IP_ARGS(ipf_list->key.dst_addr.ipv4));
1534
0
    } else {
1535
0
        ds_put_cstr(ds, "src=");
1536
0
        ipv6_format_addr(&ipf_list->key.src_addr.ipv6, ds);
1537
0
        ds_put_cstr(ds, ",dst=");
1538
0
        ipv6_format_addr(&ipf_list->key.dst_addr.ipv6, ds);
1539
0
        ds_put_cstr(ds, ",");
1540
0
    }
1541
1542
0
    ds_put_format(ds, "recirc_id=%u,ip_id=%u,dl_type=0x%x,zone=%u,nw_proto=%u",
1543
0
                  ipf_list->key.recirc_id, ntohl(ipf_list->key.ip_id),
1544
0
                  ntohs(ipf_list->key.dl_type), ipf_list->key.zone,
1545
0
                  ipf_list->key.nw_proto);
1546
1547
0
    ds_put_format(ds, ",num_fragments=%u,state=%s",
1548
0
                  ipf_list->last_inuse_idx + 1,
1549
0
                  ipf_state_name[ipf_list->state]);
1550
1551
0
    ds_put_cstr(ds, ")");
1552
0
}
1553
1554
/* Finds the next ipf list starting from 'ipf_dump_ctx->bucket_pos' and uses
1555
 * ipf_dump_create() to create a string representation of the state of an
1556
 * ipf list, to which 'dump' is pointed to.  Returns EOF when there are no
1557
 * more ipf lists. */
1558
int
1559
ipf_dump_next(struct ipf *ipf, struct ipf_dump_ctx *ipf_dump_ctx, char **dump)
1560
0
{
1561
0
    ovs_mutex_lock(&ipf->ipf_lock);
1562
1563
0
    struct hmap_node *node = hmap_at_position(&ipf->frag_lists,
1564
0
                                              &ipf_dump_ctx->bucket_pos);
1565
0
    if (!node) {
1566
0
        ovs_mutex_unlock(&ipf->ipf_lock);
1567
0
        return EOF;
1568
0
    } else {
1569
0
        struct ipf_list *ipf_list_;
1570
0
        INIT_CONTAINER(ipf_list_, node, node);
1571
0
        struct ipf_list ipf_list = *ipf_list_;
1572
0
        ovs_mutex_unlock(&ipf->ipf_lock);
1573
0
        struct ds ds = DS_EMPTY_INITIALIZER;
1574
0
        ipf_dump_create(&ipf_list, &ds);
1575
0
        *dump = ds_steal_cstr(&ds);
1576
0
        return 0;
1577
0
    }
1578
0
}
1579
1580
/* Frees 'ipf_dump_ctx' allocated by ipf_dump_start(). */
1581
int
1582
ipf_dump_done(struct ipf_dump_ctx *ipf_dump_ctx)
1583
0
{
1584
0
    free(ipf_dump_ctx);
1585
0
    return 0;
1586
0
}