/src/openvswitch/lib/ipf.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2019 Nicira, Inc. |
3 | | * |
4 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | * you may not use this file except in compliance with the License. |
6 | | * You may obtain a copy of the License at: |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | * See the License for the specific language governing permissions and |
14 | | * limitations under the License. |
15 | | */ |
16 | | |
17 | | #include <config.h> |
18 | | #include <ctype.h> |
19 | | #include <errno.h> |
20 | | #include <sys/types.h> |
21 | | #include <netinet/in.h> |
22 | | #include <netinet/ip6.h> |
23 | | #include <netinet/icmp6.h> |
24 | | #include <string.h> |
25 | | |
26 | | #include "coverage.h" |
27 | | #include "csum.h" |
28 | | #include "ipf.h" |
29 | | #include "latch.h" |
30 | | #include "openvswitch/hmap.h" |
31 | | #include "openvswitch/poll-loop.h" |
32 | | #include "openvswitch/vlog.h" |
33 | | #include "ovs-atomic.h" |
34 | | #include "packets.h" |
35 | | #include "util.h" |
36 | | |
37 | | VLOG_DEFINE_THIS_MODULE(ipf); |
38 | | COVERAGE_DEFINE(ipf_stuck_frag_list_purged); |
39 | | COVERAGE_DEFINE(ipf_l3csum_err); |
40 | | |
41 | | enum { |
42 | | IPV4_PACKET_MAX_HDR_SIZE = 60, |
43 | | IPV4_PACKET_MAX_SIZE = 65535, |
44 | | IPV6_PACKET_MAX_DATA = 65535, |
45 | | }; |
46 | | |
47 | | enum ipf_list_state { |
48 | | IPF_LIST_STATE_UNUSED, |
49 | | IPF_LIST_STATE_REASS_FAIL, |
50 | | IPF_LIST_STATE_OTHER_SEEN, |
51 | | IPF_LIST_STATE_FIRST_SEEN, |
52 | | IPF_LIST_STATE_LAST_SEEN, |
53 | | IPF_LIST_STATE_FIRST_LAST_SEEN, |
54 | | IPF_LIST_STATE_COMPLETED, |
55 | | IPF_LIST_STATE_NUM, |
56 | | }; |
57 | | |
58 | | static char *ipf_state_name[IPF_LIST_STATE_NUM] = |
59 | | {"unused", "reassemble fail", "other frag", "first frag", "last frag", |
60 | | "first/last frag", "complete"}; |
61 | | |
62 | | enum ipf_list_type { |
63 | | IPF_FRAG_COMPLETED_LIST, |
64 | | IPF_FRAG_EXPIRY_LIST, |
65 | | }; |
66 | | |
67 | | enum { |
68 | | IPF_INVALID_IDX = -1, |
69 | | IPF_V4_FRAG_SIZE_LBOUND = 400, |
70 | | IPF_V4_FRAG_SIZE_MIN_DEF = 1200, |
71 | | IPF_V6_FRAG_SIZE_LBOUND = 400, /* Useful for testing. */ |
72 | | IPF_V6_FRAG_SIZE_MIN_DEF = 1280, |
73 | | IPF_MAX_FRAGS_DEFAULT = 1000, |
74 | | IPF_NFRAG_UBOUND = 5000, |
75 | | }; |
76 | | |
77 | | enum ipf_counter_type { |
78 | | IPF_NFRAGS_ACCEPTED, |
79 | | IPF_NFRAGS_COMPL_SENT, |
80 | | IPF_NFRAGS_EXPD_SENT, |
81 | | IPF_NFRAGS_TOO_SMALL, |
82 | | IPF_NFRAGS_OVERLAP, |
83 | | IPF_NFRAGS_PURGED, |
84 | | IPF_NFRAGS_NUM_CNTS, |
85 | | }; |
86 | | |
87 | | union ipf_addr { |
88 | | ovs_be32 ipv4; |
89 | | struct in6_addr ipv6; |
90 | | }; |
91 | | |
92 | | /* Represents a single fragment; part of a list of fragments. */ |
93 | | struct ipf_frag { |
94 | | struct dp_packet *pkt; |
95 | | uint16_t start_data_byte; |
96 | | uint16_t end_data_byte; |
97 | | }; |
98 | | |
99 | | /* The key for a collection of fragments potentially making up an unfragmented |
100 | | * packet. */ |
101 | | struct ipf_list_key { |
102 | | /* ipf_list_key_hash() requires 'src_addr' and 'dst_addr' to be the first |
103 | | * two members. */ |
104 | | union ipf_addr src_addr; |
105 | | union ipf_addr dst_addr; |
106 | | uint32_t recirc_id; |
107 | | ovs_be32 ip_id; /* V6 is 32 bits. */ |
108 | | ovs_be16 dl_type; |
109 | | uint16_t zone; |
110 | | uint8_t nw_proto; |
111 | | }; |
112 | | |
113 | | /* A collection of fragments potentially making up an unfragmented packet. */ |
114 | | struct ipf_list { |
115 | | struct hmap_node node; /* In struct ipf's 'frag_lists'. */ |
116 | | struct ovs_list list_node; /* In struct ipf's 'frag_exp_list' or |
117 | | * 'frag_complete_list'. */ |
118 | | struct ipf_frag *frag_list; /* List of fragments for this list. */ |
119 | | struct ipf_list_key key; /* The key for the fragemnt list. */ |
120 | | struct dp_packet *reass_execute_ctx; /* Reassembled packet. */ |
121 | | long long expiration; /* In milliseconds. */ |
122 | | int last_sent_idx; /* Last sent fragment idx. */ |
123 | | int last_inuse_idx; /* Last inuse fragment idx. */ |
124 | | int size; /* Fragment list size. */ |
125 | | uint8_t state; /* Frag list state; see ipf_list_state. */ |
126 | | }; |
127 | | |
128 | | /* Represents a reassambled packet which typically is passed through |
129 | | * conntrack. */ |
130 | | struct reassembled_pkt { |
131 | | struct ovs_list rp_list_node; /* In struct ipf's |
132 | | * 'reassembled_pkt_list'. */ |
133 | | struct dp_packet *pkt; |
134 | | struct ipf_list *list; |
135 | | }; |
136 | | |
137 | | struct ipf { |
138 | | /* The clean thread is used to clean up fragments in the 'ipf' |
139 | | * module if packet batches are not longer be sent through its user. */ |
140 | | pthread_t ipf_clean_thread; |
141 | | struct latch ipf_clean_thread_exit; |
142 | | |
143 | | int max_v4_frag_list_size; |
144 | | |
145 | | struct ovs_mutex ipf_lock; /* Protects all of the following. */ |
146 | | /* These contain 'struct ipf_list's. */ |
147 | | struct hmap frag_lists OVS_GUARDED; |
148 | | struct ovs_list frag_exp_list OVS_GUARDED; |
149 | | struct ovs_list frag_complete_list OVS_GUARDED; |
150 | | /* Contains 'struct reassembled_pkt's. */ |
151 | | struct ovs_list reassembled_pkt_list OVS_GUARDED; |
152 | | |
153 | | /* Used to allow disabling fragmentation reassembly. */ |
154 | | atomic_bool ifp_v4_enabled; |
155 | | atomic_bool ifp_v6_enabled; |
156 | | |
157 | | /* Will be clamped above 400 bytes; the value chosen should handle |
158 | | * alg control packets of interest that use string encoding of mutable |
159 | | * IP fields; meaning, the control packets should not be fragmented. */ |
160 | | atomic_uint min_v4_frag_size; |
161 | | atomic_uint min_v6_frag_size; |
162 | | |
163 | | /* Configurable maximum allowable fragments in process. */ |
164 | | atomic_uint nfrag_max; |
165 | | |
166 | | /* Number of fragments in process. */ |
167 | | atomic_count nfrag; |
168 | | |
169 | | atomic_uint64_t n4frag_cnt[IPF_NFRAGS_NUM_CNTS]; |
170 | | atomic_uint64_t n6frag_cnt[IPF_NFRAGS_NUM_CNTS]; |
171 | | }; |
172 | | |
173 | | static void |
174 | | ipf_print_reass_packet(const char *es, const void *pkt) |
175 | 0 | { |
176 | 0 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); |
177 | 0 | if (!VLOG_DROP_WARN(&rl)) { |
178 | 0 | struct ds ds = DS_EMPTY_INITIALIZER; |
179 | 0 | ds_put_hex_dump(&ds, pkt, 128, 0, false); |
180 | 0 | VLOG_WARN("%s\n%s", es, ds_cstr(&ds)); |
181 | 0 | ds_destroy(&ds); |
182 | 0 | } |
183 | 0 | } |
184 | | |
185 | | static void |
186 | | ipf_count(struct ipf *ipf, bool v6, enum ipf_counter_type cntr) |
187 | 0 | { |
188 | 0 | atomic_count_inc64(v6 ? &ipf->n6frag_cnt[cntr] : &ipf->n4frag_cnt[cntr]); |
189 | 0 | } |
190 | | |
191 | | static bool |
192 | | ipf_get_v4_enabled(struct ipf *ipf) |
193 | 0 | { |
194 | 0 | bool ifp_v4_enabled_; |
195 | 0 | atomic_read_relaxed(&ipf->ifp_v4_enabled, &ifp_v4_enabled_); |
196 | 0 | return ifp_v4_enabled_; |
197 | 0 | } |
198 | | |
199 | | static bool |
200 | | ipf_get_v6_enabled(struct ipf *ipf) |
201 | 0 | { |
202 | 0 | bool ifp_v6_enabled_; |
203 | 0 | atomic_read_relaxed(&ipf->ifp_v6_enabled, &ifp_v6_enabled_); |
204 | 0 | return ifp_v6_enabled_; |
205 | 0 | } |
206 | | |
207 | | static bool |
208 | | ipf_get_enabled(struct ipf *ipf) |
209 | 0 | { |
210 | 0 | return ipf_get_v4_enabled(ipf) || ipf_get_v6_enabled(ipf); |
211 | 0 | } |
212 | | |
213 | | static uint32_t |
214 | | ipf_addr_hash_add(uint32_t hash, const union ipf_addr *addr) |
215 | 0 | { |
216 | 0 | BUILD_ASSERT_DECL(sizeof *addr % 4 == 0); |
217 | 0 | return hash_add_bytes32(hash, (const uint32_t *) addr, sizeof *addr); |
218 | 0 | } |
219 | | |
220 | | /* Adds a list of fragments to the list tracking expiry of yet to be |
221 | | * completed reassembled packets, hence subject to expirty. */ |
222 | | static void |
223 | | ipf_expiry_list_add(struct ovs_list *frag_exp_list, struct ipf_list *ipf_list, |
224 | | long long now) |
225 | | /* OVS_REQUIRES(ipf->ipf_lock) */ |
226 | 0 | { |
227 | 0 | enum { |
228 | 0 | IPF_FRAG_LIST_TIMEOUT = 15000, |
229 | 0 | }; |
230 | |
|
231 | 0 | ipf_list->expiration = now + IPF_FRAG_LIST_TIMEOUT; |
232 | 0 | ovs_list_push_back(frag_exp_list, &ipf_list->list_node); |
233 | 0 | } |
234 | | |
235 | | /* Adds a list of fragments to the list of completed packets, which will be |
236 | | * subsequently transmitted. */ |
237 | | static void |
238 | | ipf_completed_list_add(struct ovs_list *frag_complete_list, |
239 | | struct ipf_list *ipf_list) |
240 | | /* OVS_REQUIRES(ipf_lock) */ |
241 | 0 | { |
242 | 0 | ovs_list_push_back(frag_complete_list, &ipf_list->list_node); |
243 | 0 | } |
244 | | |
245 | | /* Adds a reassmebled packet to the list of reassembled packets, awaiting some |
246 | | * processing, such as being sent through conntrack. */ |
247 | | static void |
248 | | ipf_reassembled_list_add(struct ovs_list *reassembled_pkt_list, |
249 | | struct reassembled_pkt *rp) |
250 | | /* OVS_REQUIRES(ipf_lock) */ |
251 | 0 | { |
252 | 0 | ovs_list_push_back(reassembled_pkt_list, &rp->rp_list_node); |
253 | 0 | } |
254 | | |
255 | | /* Removed a frag list from tracking datastructures and frees list heap |
256 | | * memory. */ |
257 | | static void |
258 | | ipf_list_clean(struct hmap *frag_lists, |
259 | | struct ipf_list *ipf_list) |
260 | | /* OVS_REQUIRES(ipf_lock) */ |
261 | 0 | { |
262 | 0 | ovs_list_remove(&ipf_list->list_node); |
263 | 0 | hmap_remove(frag_lists, &ipf_list->node); |
264 | 0 | free(ipf_list->frag_list); |
265 | 0 | free(ipf_list); |
266 | 0 | } |
267 | | |
268 | | /* Removed a frag list sitting on the expiry list from tracking |
269 | | * datastructures and frees list heap memory. */ |
270 | | static void |
271 | | ipf_expiry_list_clean(struct hmap *frag_lists, |
272 | | struct ipf_list *ipf_list) |
273 | | /* OVS_REQUIRES(ipf_lock) */ |
274 | 0 | { |
275 | 0 | ipf_list_clean(frag_lists, ipf_list); |
276 | 0 | } |
277 | | |
278 | | /* Removed a frag list sitting on the completed list from tracking |
279 | | * datastructures and frees list heap memory. */ |
280 | | static void |
281 | | ipf_completed_list_clean(struct hmap *frag_lists, |
282 | | struct ipf_list *ipf_list) |
283 | | /* OVS_REQUIRES(ipf_lock) */ |
284 | 0 | { |
285 | 0 | ipf_list_clean(frag_lists, ipf_list); |
286 | 0 | } |
287 | | |
288 | | static void |
289 | | ipf_expiry_list_remove(struct ipf_list *ipf_list) |
290 | | /* OVS_REQUIRES(ipf_lock) */ |
291 | 0 | { |
292 | 0 | ovs_list_remove(&ipf_list->list_node); |
293 | 0 | } |
294 | | |
295 | | static void |
296 | | ipf_reassembled_list_remove(struct reassembled_pkt *rp) |
297 | | /* OVS_REQUIRES(ipf_lock) */ |
298 | 0 | { |
299 | 0 | ovs_list_remove(&rp->rp_list_node); |
300 | 0 | } |
301 | | |
302 | | /* Symmetric */ |
303 | | static uint32_t |
304 | | ipf_list_key_hash(const struct ipf_list_key *key, uint32_t basis) |
305 | 0 | { |
306 | 0 | uint32_t hsrc, hdst, hash; |
307 | 0 | hsrc = hdst = basis; |
308 | 0 | hsrc = ipf_addr_hash_add(hsrc, &key->src_addr); |
309 | 0 | hdst = ipf_addr_hash_add(hdst, &key->dst_addr); |
310 | 0 | hash = hsrc ^ hdst; |
311 | | |
312 | | /* Hash the rest of the key. */ |
313 | 0 | return hash_words((uint32_t *) (&key->dst_addr + 1), |
314 | 0 | (uint32_t *) (key + 1) - |
315 | 0 | (uint32_t *) (&key->dst_addr + 1), |
316 | 0 | hash); |
317 | 0 | } |
318 | | |
319 | | static bool |
320 | | ipf_is_first_v4_frag(const struct dp_packet *pkt) |
321 | 0 | { |
322 | 0 | const struct ip_header *l3 = dp_packet_l3(pkt); |
323 | 0 | if (!(l3->ip_frag_off & htons(IP_FRAG_OFF_MASK)) && |
324 | 0 | l3->ip_frag_off & htons(IP_MORE_FRAGMENTS)) { |
325 | 0 | return true; |
326 | 0 | } |
327 | 0 | return false; |
328 | 0 | } |
329 | | |
330 | | static bool |
331 | | ipf_is_last_v4_frag(const struct dp_packet *pkt) |
332 | 0 | { |
333 | 0 | const struct ip_header *l3 = dp_packet_l3(pkt); |
334 | 0 | if (l3->ip_frag_off & htons(IP_FRAG_OFF_MASK) && |
335 | 0 | !(l3->ip_frag_off & htons(IP_MORE_FRAGMENTS))) { |
336 | 0 | return true; |
337 | 0 | } |
338 | 0 | return false; |
339 | 0 | } |
340 | | |
341 | | static bool |
342 | | ipf_is_v6_frag(ovs_be16 ip6f_offlg) |
343 | 0 | { |
344 | 0 | if (ip6f_offlg & (IP6F_OFF_MASK | IP6F_MORE_FRAG)) { |
345 | 0 | return true; |
346 | 0 | } |
347 | 0 | return false; |
348 | 0 | } |
349 | | |
350 | | static bool |
351 | | ipf_is_first_v6_frag(ovs_be16 ip6f_offlg) |
352 | 0 | { |
353 | 0 | if (!(ip6f_offlg & IP6F_OFF_MASK) && |
354 | 0 | ip6f_offlg & IP6F_MORE_FRAG) { |
355 | 0 | return true; |
356 | 0 | } |
357 | 0 | return false; |
358 | 0 | } |
359 | | |
360 | | static bool |
361 | | ipf_is_last_v6_frag(ovs_be16 ip6f_offlg) |
362 | 0 | { |
363 | 0 | if ((ip6f_offlg & IP6F_OFF_MASK) && |
364 | 0 | !(ip6f_offlg & IP6F_MORE_FRAG)) { |
365 | 0 | return true; |
366 | 0 | } |
367 | 0 | return false; |
368 | 0 | } |
369 | | |
370 | | /* Checks for a completed packet collection of fragments. */ |
371 | | static bool |
372 | | ipf_list_complete(const struct ipf_list *ipf_list) |
373 | | /* OVS_REQUIRES(ipf_lock) */ |
374 | 0 | { |
375 | 0 | for (int i = 1; i <= ipf_list->last_inuse_idx; i++) { |
376 | 0 | if (ipf_list->frag_list[i - 1].end_data_byte + 1 |
377 | 0 | != ipf_list->frag_list[i].start_data_byte) { |
378 | 0 | return false; |
379 | 0 | } |
380 | 0 | } |
381 | 0 | return true; |
382 | 0 | } |
383 | | |
384 | | /* Runs O(n) for a sorted or almost sorted list. */ |
385 | | static void |
386 | | ipf_sort(struct ipf_frag *frag_list, size_t last_idx) |
387 | | /* OVS_REQUIRES(ipf_lock) */ |
388 | 0 | { |
389 | 0 | for (int li = 1; li <= last_idx; li++) { |
390 | 0 | struct ipf_frag ipf_frag = frag_list[li]; |
391 | 0 | int ci = li - 1; |
392 | 0 | while (ci >= 0 && |
393 | 0 | frag_list[ci].start_data_byte > ipf_frag.start_data_byte) { |
394 | 0 | frag_list[ci + 1] = frag_list[ci]; |
395 | 0 | ci--; |
396 | 0 | } |
397 | 0 | frag_list[ci + 1] = ipf_frag; |
398 | 0 | } |
399 | 0 | } |
400 | | |
401 | | /* Called on a sorted complete list of v4 fragments to reassemble them into |
402 | | * a single packet that can be processed, such as passing through conntrack. |
403 | | */ |
404 | | static struct dp_packet * |
405 | | ipf_reassemble_v4_frags(struct ipf_list *ipf_list) |
406 | | /* OVS_REQUIRES(ipf_lock) */ |
407 | 0 | { |
408 | 0 | struct ipf_frag *frag_list = ipf_list->frag_list; |
409 | 0 | struct dp_packet *pkt = dp_packet_clone(frag_list[0].pkt); |
410 | 0 | dp_packet_set_size(pkt, dp_packet_size(pkt) - dp_packet_l2_pad_size(pkt)); |
411 | 0 | struct ip_header *l3 = dp_packet_l3(pkt); |
412 | 0 | int len = ntohs(l3->ip_tot_len); |
413 | |
|
414 | 0 | int rest_len = frag_list[ipf_list->last_inuse_idx].end_data_byte - |
415 | 0 | frag_list[1].start_data_byte + 1; |
416 | |
|
417 | 0 | if (len + rest_len > IPV4_PACKET_MAX_SIZE) { |
418 | 0 | ipf_print_reass_packet( |
419 | 0 | "Unsupported big reassembled v4 packet; v4 hdr:", l3); |
420 | 0 | dp_packet_delete(pkt); |
421 | 0 | return NULL; |
422 | 0 | } |
423 | | |
424 | 0 | dp_packet_prealloc_tailroom(pkt, rest_len); |
425 | |
|
426 | 0 | for (int i = 1; i <= ipf_list->last_inuse_idx; i++) { |
427 | 0 | size_t add_len = frag_list[i].end_data_byte - |
428 | 0 | frag_list[i].start_data_byte + 1; |
429 | 0 | const char *l4 = dp_packet_l4(frag_list[i].pkt); |
430 | 0 | dp_packet_put(pkt, l4, add_len); |
431 | 0 | } |
432 | |
|
433 | 0 | len += rest_len; |
434 | 0 | l3 = dp_packet_l3(pkt); |
435 | 0 | ovs_be16 new_ip_frag_off = l3->ip_frag_off & ~htons(IP_MORE_FRAGMENTS); |
436 | 0 | if (!dp_packet_hwol_is_ipv4(pkt)) { |
437 | 0 | l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_frag_off, |
438 | 0 | new_ip_frag_off); |
439 | 0 | l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_tot_len, htons(len)); |
440 | 0 | } |
441 | 0 | l3->ip_tot_len = htons(len); |
442 | 0 | l3->ip_frag_off = new_ip_frag_off; |
443 | 0 | dp_packet_set_l2_pad_size(pkt, 0); |
444 | |
|
445 | 0 | return pkt; |
446 | 0 | } |
447 | | |
448 | | /* Called on a sorted complete list of v6 fragments to reassemble them into |
449 | | * a single packet that can be processed, such as passing through conntrack. |
450 | | */ |
451 | | static struct dp_packet * |
452 | | ipf_reassemble_v6_frags(struct ipf_list *ipf_list) |
453 | | /* OVS_REQUIRES(ipf_lock) */ |
454 | 0 | { |
455 | 0 | struct ipf_frag *frag_list = ipf_list->frag_list; |
456 | 0 | struct dp_packet *pkt = dp_packet_clone(frag_list[0].pkt); |
457 | 0 | dp_packet_set_size(pkt, dp_packet_size(pkt) - dp_packet_l2_pad_size(pkt)); |
458 | 0 | struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt); |
459 | 0 | int pl = ntohs(l3->ip6_plen) - sizeof(struct ovs_16aligned_ip6_frag); |
460 | |
|
461 | 0 | int rest_len = frag_list[ipf_list->last_inuse_idx].end_data_byte - |
462 | 0 | frag_list[1].start_data_byte + 1; |
463 | |
|
464 | 0 | if (pl + rest_len > IPV6_PACKET_MAX_DATA) { |
465 | 0 | ipf_print_reass_packet( |
466 | 0 | "Unsupported big reassembled v6 packet; v6 hdr:", l3); |
467 | 0 | dp_packet_delete(pkt); |
468 | 0 | return NULL; |
469 | 0 | } |
470 | | |
471 | 0 | dp_packet_prealloc_tailroom(pkt, rest_len); |
472 | |
|
473 | 0 | for (int i = 1; i <= ipf_list->last_inuse_idx; i++) { |
474 | 0 | size_t add_len = frag_list[i].end_data_byte - |
475 | 0 | frag_list[i].start_data_byte + 1; |
476 | 0 | const char *l4 = dp_packet_l4(frag_list[i].pkt); |
477 | 0 | dp_packet_put(pkt, l4, add_len); |
478 | 0 | } |
479 | |
|
480 | 0 | pl += rest_len; |
481 | 0 | l3 = dp_packet_l3(pkt); |
482 | |
|
483 | 0 | uint8_t nw_proto = l3->ip6_nxt; |
484 | 0 | uint8_t nw_frag = 0; |
485 | 0 | const void *data = l3 + 1; |
486 | 0 | size_t datasize = pl; |
487 | |
|
488 | 0 | const struct ovs_16aligned_ip6_frag *frag_hdr = NULL; |
489 | 0 | if (!parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, &frag_hdr) |
490 | 0 | || !nw_frag || !frag_hdr) { |
491 | |
|
492 | 0 | ipf_print_reass_packet("Unparsed reassembled v6 packet; v6 hdr:", l3); |
493 | 0 | dp_packet_delete(pkt); |
494 | 0 | return NULL; |
495 | 0 | } |
496 | | |
497 | 0 | struct ovs_16aligned_ip6_frag *fh = |
498 | 0 | CONST_CAST(struct ovs_16aligned_ip6_frag *, frag_hdr); |
499 | 0 | fh->ip6f_offlg = 0; |
500 | 0 | l3->ip6_plen = htons(pl); |
501 | 0 | l3->ip6_ctlun.ip6_un1.ip6_un1_nxt = nw_proto; |
502 | 0 | dp_packet_set_l2_pad_size(pkt, 0); |
503 | 0 | return pkt; |
504 | 0 | } |
505 | | |
506 | | /* Called when a frag list state transitions to another state. This is |
507 | | * triggered by new fragment for the list being received.*/ |
508 | | static void |
509 | | ipf_list_state_transition(struct ipf *ipf, struct ipf_list *ipf_list, |
510 | | bool ff, bool lf, bool v6) |
511 | | OVS_REQUIRES(ipf->ipf_lock) |
512 | 0 | { |
513 | 0 | enum ipf_list_state curr_state = ipf_list->state; |
514 | 0 | enum ipf_list_state next_state; |
515 | 0 | switch (curr_state) { |
516 | 0 | case IPF_LIST_STATE_UNUSED: |
517 | 0 | case IPF_LIST_STATE_OTHER_SEEN: |
518 | 0 | if (ff) { |
519 | 0 | next_state = IPF_LIST_STATE_FIRST_SEEN; |
520 | 0 | } else if (lf) { |
521 | 0 | next_state = IPF_LIST_STATE_LAST_SEEN; |
522 | 0 | } else { |
523 | 0 | next_state = IPF_LIST_STATE_OTHER_SEEN; |
524 | 0 | } |
525 | 0 | break; |
526 | 0 | case IPF_LIST_STATE_FIRST_SEEN: |
527 | 0 | if (lf) { |
528 | 0 | next_state = IPF_LIST_STATE_FIRST_LAST_SEEN; |
529 | 0 | } else { |
530 | 0 | next_state = IPF_LIST_STATE_FIRST_SEEN; |
531 | 0 | } |
532 | 0 | break; |
533 | 0 | case IPF_LIST_STATE_LAST_SEEN: |
534 | 0 | if (ff) { |
535 | 0 | next_state = IPF_LIST_STATE_FIRST_LAST_SEEN; |
536 | 0 | } else { |
537 | 0 | next_state = IPF_LIST_STATE_LAST_SEEN; |
538 | 0 | } |
539 | 0 | break; |
540 | 0 | case IPF_LIST_STATE_FIRST_LAST_SEEN: |
541 | 0 | next_state = IPF_LIST_STATE_FIRST_LAST_SEEN; |
542 | 0 | break; |
543 | 0 | case IPF_LIST_STATE_COMPLETED: |
544 | 0 | case IPF_LIST_STATE_REASS_FAIL: |
545 | 0 | case IPF_LIST_STATE_NUM: |
546 | 0 | default: |
547 | 0 | OVS_NOT_REACHED(); |
548 | 0 | } |
549 | | |
550 | 0 | if (next_state == IPF_LIST_STATE_FIRST_LAST_SEEN) { |
551 | 0 | ipf_sort(ipf_list->frag_list, ipf_list->last_inuse_idx); |
552 | 0 | if (ipf_list_complete(ipf_list)) { |
553 | 0 | struct dp_packet *reass_pkt = v6 |
554 | 0 | ? ipf_reassemble_v6_frags(ipf_list) |
555 | 0 | : ipf_reassemble_v4_frags(ipf_list); |
556 | 0 | if (reass_pkt) { |
557 | 0 | struct reassembled_pkt *rp = xzalloc(sizeof *rp); |
558 | 0 | rp->pkt = reass_pkt; |
559 | 0 | rp->list = ipf_list; |
560 | 0 | ipf_reassembled_list_add(&ipf->reassembled_pkt_list, rp); |
561 | 0 | ipf_expiry_list_remove(ipf_list); |
562 | 0 | next_state = IPF_LIST_STATE_COMPLETED; |
563 | 0 | } else { |
564 | 0 | next_state = IPF_LIST_STATE_REASS_FAIL; |
565 | 0 | } |
566 | 0 | } |
567 | 0 | } |
568 | 0 | ipf_list->state = next_state; |
569 | 0 | } |
570 | | |
571 | | /* Some sanity checks are redundant, but prudent, in case code paths for |
572 | | * fragments change in future. The processing cost for fragments is not |
573 | | * important. */ |
574 | | static bool |
575 | | ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet *pkt) |
576 | 0 | { |
577 | 0 | if (OVS_UNLIKELY(dp_packet_ip_checksum_bad(pkt))) { |
578 | 0 | COVERAGE_INC(ipf_l3csum_err); |
579 | 0 | goto invalid_pkt; |
580 | 0 | } |
581 | | |
582 | 0 | const struct eth_header *l2 = dp_packet_eth(pkt); |
583 | 0 | const struct ip_header *l3 = dp_packet_l3(pkt); |
584 | |
|
585 | 0 | if (OVS_UNLIKELY(!l2 || !l3)) { |
586 | 0 | goto invalid_pkt; |
587 | 0 | } |
588 | | |
589 | 0 | size_t l3_size = dp_packet_l3_size(pkt); |
590 | 0 | if (OVS_UNLIKELY(l3_size < IP_HEADER_LEN)) { |
591 | 0 | goto invalid_pkt; |
592 | 0 | } |
593 | | |
594 | 0 | if (!IP_IS_FRAGMENT(l3->ip_frag_off)) { |
595 | 0 | return false; |
596 | 0 | } |
597 | | |
598 | 0 | uint16_t ip_tot_len = ntohs(l3->ip_tot_len); |
599 | 0 | if (OVS_UNLIKELY(ip_tot_len != l3_size)) { |
600 | 0 | goto invalid_pkt; |
601 | 0 | } |
602 | | |
603 | 0 | size_t ip_hdr_len = IP_IHL(l3->ip_ihl_ver) * 4; |
604 | 0 | if (OVS_UNLIKELY(ip_hdr_len < IP_HEADER_LEN)) { |
605 | 0 | goto invalid_pkt; |
606 | 0 | } |
607 | 0 | if (OVS_UNLIKELY(l3_size < ip_hdr_len)) { |
608 | 0 | goto invalid_pkt; |
609 | 0 | } |
610 | | |
611 | 0 | if (OVS_UNLIKELY(!dp_packet_ip_checksum_valid(pkt) |
612 | 0 | && !dp_packet_hwol_is_ipv4(pkt) |
613 | 0 | && csum(l3, ip_hdr_len) != 0)) { |
614 | 0 | COVERAGE_INC(ipf_l3csum_err); |
615 | 0 | goto invalid_pkt; |
616 | 0 | } |
617 | | |
618 | 0 | uint32_t min_v4_frag_size_; |
619 | 0 | atomic_read_relaxed(&ipf->min_v4_frag_size, &min_v4_frag_size_); |
620 | 0 | bool lf = ipf_is_last_v4_frag(pkt); |
621 | 0 | if (OVS_UNLIKELY(!lf && dp_packet_l3_size(pkt) < min_v4_frag_size_)) { |
622 | 0 | ipf_count(ipf, false, IPF_NFRAGS_TOO_SMALL); |
623 | 0 | goto invalid_pkt; |
624 | 0 | } |
625 | 0 | return true; |
626 | | |
627 | 0 | invalid_pkt: |
628 | 0 | pkt->md.ct_state = CS_INVALID; |
629 | 0 | return false; |
630 | 0 | } |
631 | | |
632 | | static bool |
633 | | ipf_v4_key_extract(struct dp_packet *pkt, ovs_be16 dl_type, uint16_t zone, |
634 | | struct ipf_list_key *key, uint16_t *start_data_byte, |
635 | | uint16_t *end_data_byte, bool *ff, bool *lf) |
636 | 0 | { |
637 | 0 | const struct ip_header *l3 = dp_packet_l3(pkt); |
638 | 0 | uint16_t ip_tot_len = ntohs(l3->ip_tot_len); |
639 | 0 | size_t ip_hdr_len = IP_IHL(l3->ip_ihl_ver) * 4; |
640 | |
|
641 | 0 | *start_data_byte = ntohs(l3->ip_frag_off & htons(IP_FRAG_OFF_MASK)) * 8; |
642 | 0 | *end_data_byte = *start_data_byte + ip_tot_len - ip_hdr_len - 1; |
643 | 0 | *ff = ipf_is_first_v4_frag(pkt); |
644 | 0 | *lf = ipf_is_last_v4_frag(pkt); |
645 | 0 | memset(key, 0, sizeof *key); |
646 | 0 | key->ip_id = be16_to_be32(l3->ip_id); |
647 | 0 | key->dl_type = dl_type; |
648 | 0 | key->src_addr.ipv4 = get_16aligned_be32(&l3->ip_src); |
649 | 0 | key->dst_addr.ipv4 = get_16aligned_be32(&l3->ip_dst); |
650 | 0 | key->nw_proto = l3->ip_proto; |
651 | 0 | key->zone = zone; |
652 | 0 | key->recirc_id = pkt->md.recirc_id; |
653 | 0 | return true; |
654 | 0 | } |
655 | | |
656 | | /* Some sanity checks are redundant, but prudent, in case code paths for |
657 | | * fragments change in future. The processing cost for fragments is not |
658 | | * important. */ |
659 | | static bool |
660 | | ipf_is_valid_v6_frag(struct ipf *ipf, struct dp_packet *pkt) |
661 | 0 | { |
662 | 0 | const struct eth_header *l2 = dp_packet_eth(pkt); |
663 | 0 | const struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt); |
664 | 0 | const char *l4 = dp_packet_l4(pkt); |
665 | |
|
666 | 0 | if (OVS_UNLIKELY(!l2 || !l3 || !l4)) { |
667 | 0 | goto invalid_pkt; |
668 | 0 | } |
669 | | |
670 | 0 | size_t l3_size = dp_packet_l3_size(pkt); |
671 | 0 | size_t l3_hdr_size = sizeof *l3; |
672 | |
|
673 | 0 | if (OVS_UNLIKELY(l3_size < l3_hdr_size)) { |
674 | 0 | goto invalid_pkt; |
675 | 0 | } |
676 | | |
677 | 0 | uint8_t nw_frag = 0; |
678 | 0 | uint8_t nw_proto = l3->ip6_nxt; |
679 | 0 | const void *data = l3 + 1; |
680 | 0 | size_t datasize = l3_size - l3_hdr_size; |
681 | 0 | const struct ovs_16aligned_ip6_frag *frag_hdr = NULL; |
682 | 0 | if (!parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, |
683 | 0 | &frag_hdr) || !nw_frag || !frag_hdr) { |
684 | 0 | return false; |
685 | 0 | } |
686 | | |
687 | 0 | int pl = ntohs(l3->ip6_plen); |
688 | 0 | if (OVS_UNLIKELY(pl + l3_hdr_size != l3_size)) { |
689 | 0 | goto invalid_pkt; |
690 | 0 | } |
691 | | |
692 | 0 | ovs_be16 ip6f_offlg = frag_hdr->ip6f_offlg; |
693 | 0 | if (OVS_UNLIKELY(!ipf_is_v6_frag(ip6f_offlg))) { |
694 | 0 | return false; |
695 | 0 | } |
696 | | |
697 | 0 | uint32_t min_v6_frag_size_; |
698 | 0 | atomic_read_relaxed(&ipf->min_v6_frag_size, &min_v6_frag_size_); |
699 | 0 | bool lf = ipf_is_last_v6_frag(ip6f_offlg); |
700 | |
|
701 | 0 | if (OVS_UNLIKELY(!lf && dp_packet_l3_size(pkt) < min_v6_frag_size_)) { |
702 | 0 | ipf_count(ipf, true, IPF_NFRAGS_TOO_SMALL); |
703 | 0 | goto invalid_pkt; |
704 | 0 | } |
705 | | |
706 | 0 | return true; |
707 | | |
708 | 0 | invalid_pkt: |
709 | 0 | pkt->md.ct_state = CS_INVALID; |
710 | 0 | return false; |
711 | |
|
712 | 0 | } |
713 | | |
714 | | static void |
715 | | ipf_v6_key_extract(struct dp_packet *pkt, ovs_be16 dl_type, uint16_t zone, |
716 | | struct ipf_list_key *key, uint16_t *start_data_byte, |
717 | | uint16_t *end_data_byte, bool *ff, bool *lf) |
718 | 0 | { |
719 | 0 | const struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt); |
720 | 0 | uint8_t nw_frag = 0; |
721 | 0 | uint8_t nw_proto = l3->ip6_nxt; |
722 | 0 | const void *data = l3 + 1; |
723 | 0 | size_t datasize = dp_packet_l3_size(pkt) - sizeof *l3; |
724 | 0 | const struct ovs_16aligned_ip6_frag *frag_hdr = NULL; |
725 | |
|
726 | 0 | parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, &frag_hdr); |
727 | 0 | ovs_assert(nw_frag && frag_hdr); |
728 | 0 | ovs_be16 ip6f_offlg = frag_hdr->ip6f_offlg; |
729 | 0 | *start_data_byte = ntohs(ip6f_offlg & IP6F_OFF_MASK) + |
730 | 0 | sizeof (struct ovs_16aligned_ip6_frag); |
731 | 0 | *end_data_byte = *start_data_byte + dp_packet_l4_size(pkt) - 1; |
732 | 0 | *ff = ipf_is_first_v6_frag(ip6f_offlg); |
733 | 0 | *lf = ipf_is_last_v6_frag(ip6f_offlg); |
734 | 0 | memset(key, 0, sizeof *key); |
735 | 0 | key->ip_id = get_16aligned_be32(&frag_hdr->ip6f_ident); |
736 | 0 | key->dl_type = dl_type; |
737 | 0 | memcpy(&key->src_addr.ipv6, &l3->ip6_src, sizeof key->src_addr.ipv6); |
738 | | /* We are not supporting parsing of the routing header to use as the |
739 | | * dst address part of the key. */ |
740 | 0 | memcpy(&key->dst_addr.ipv6, &l3->ip6_dst, sizeof key->dst_addr.ipv6); |
741 | 0 | key->nw_proto = 0; /* Not used for key for V6. */ |
742 | 0 | key->zone = zone; |
743 | 0 | key->recirc_id = pkt->md.recirc_id; |
744 | 0 | } |
745 | | |
746 | | static bool |
747 | | ipf_list_key_eq(const struct ipf_list_key *key1, |
748 | | const struct ipf_list_key *key2) |
749 | | /* OVS_REQUIRES(ipf_lock) */ |
750 | 0 | { |
751 | 0 | if (!memcmp(&key1->src_addr, &key2->src_addr, sizeof key1->src_addr) && |
752 | 0 | !memcmp(&key1->dst_addr, &key2->dst_addr, sizeof key1->dst_addr) && |
753 | 0 | key1->dl_type == key2->dl_type && |
754 | 0 | key1->ip_id == key2->ip_id && |
755 | 0 | key1->zone == key2->zone && |
756 | 0 | key1->nw_proto == key2->nw_proto && |
757 | 0 | key1->recirc_id == key2->recirc_id) { |
758 | 0 | return true; |
759 | 0 | } |
760 | 0 | return false; |
761 | 0 | } |
762 | | |
763 | | static struct ipf_list * |
764 | | ipf_list_key_lookup(struct ipf *ipf, const struct ipf_list_key *key, |
765 | | uint32_t hash) |
766 | | OVS_REQUIRES(ipf->ipf_lock) |
767 | 0 | { |
768 | 0 | struct ipf_list *ipf_list; |
769 | 0 | HMAP_FOR_EACH_WITH_HASH (ipf_list, node, hash, &ipf->frag_lists) { |
770 | 0 | if (ipf_list_key_eq(&ipf_list->key, key)) { |
771 | 0 | return ipf_list; |
772 | 0 | } |
773 | 0 | } |
774 | 0 | return NULL; |
775 | 0 | } |
776 | | |
777 | | static bool |
778 | | ipf_is_frag_duped(const struct ipf_frag *frag_list, int last_inuse_idx, |
779 | | size_t start_data_byte, size_t end_data_byte) |
780 | | /* OVS_REQUIRES(ipf_lock) */ |
781 | 0 | { |
782 | 0 | for (int i = 0; i <= last_inuse_idx; i++) { |
783 | 0 | if ((start_data_byte >= frag_list[i].start_data_byte && |
784 | 0 | start_data_byte <= frag_list[i].end_data_byte) || |
785 | 0 | (end_data_byte >= frag_list[i].start_data_byte && |
786 | 0 | end_data_byte <= frag_list[i].end_data_byte)) { |
787 | 0 | return true; |
788 | 0 | } |
789 | 0 | } |
790 | 0 | return false; |
791 | 0 | } |
792 | | |
793 | | /* Adds a fragment to a list of fragments, if the fragment is not a |
794 | | * duplicate. If the fragment is a duplicate, that fragment is marked |
795 | | * invalid to avoid the work that conntrack would do to mark the fragment |
796 | | * as invalid, which it will in all cases. */ |
797 | | static bool |
798 | | ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list, |
799 | | struct dp_packet *pkt, uint16_t start_data_byte, |
800 | | uint16_t end_data_byte, bool ff, bool lf, bool v6) |
801 | | OVS_REQUIRES(ipf->ipf_lock) |
802 | 0 | { |
803 | 0 | bool duped_frag = ipf_is_frag_duped(ipf_list->frag_list, |
804 | 0 | ipf_list->last_inuse_idx, start_data_byte, end_data_byte); |
805 | 0 | int last_inuse_idx = ipf_list->last_inuse_idx; |
806 | |
|
807 | 0 | if (!duped_frag) { |
808 | 0 | if (last_inuse_idx < ipf_list->size - 1) { |
809 | | /* In the case of dpdk, it would be unfortunate if we had |
810 | | * to create a clone fragment outside the dpdk mp due to the |
811 | | * mempool size being too limited. We will otherwise need to |
812 | | * recommend not setting the mempool number of buffers too low |
813 | | * and also clamp the number of fragments. */ |
814 | 0 | struct ipf_frag *frag = &ipf_list->frag_list[last_inuse_idx + 1]; |
815 | 0 | frag->pkt = dp_packet_clone(pkt); |
816 | 0 | frag->start_data_byte = start_data_byte; |
817 | 0 | frag->end_data_byte = end_data_byte; |
818 | 0 | ipf_list->last_inuse_idx++; |
819 | 0 | atomic_count_inc(&ipf->nfrag); |
820 | 0 | ipf_count(ipf, v6, IPF_NFRAGS_ACCEPTED); |
821 | 0 | ipf_list_state_transition(ipf, ipf_list, ff, lf, v6); |
822 | 0 | } else { |
823 | 0 | OVS_NOT_REACHED(); |
824 | 0 | } |
825 | 0 | } else { |
826 | 0 | ipf_count(ipf, v6, IPF_NFRAGS_OVERLAP); |
827 | 0 | pkt->md.ct_state = CS_INVALID; |
828 | 0 | return false; |
829 | 0 | } |
830 | 0 | return true; |
831 | 0 | } |
832 | | |
833 | | static void |
834 | | ipf_list_init(struct ipf_list *ipf_list, struct ipf_list_key *key, |
835 | | int max_frag_list_size) |
836 | 0 | { |
837 | 0 | ipf_list->key = *key; |
838 | 0 | ipf_list->last_inuse_idx = IPF_INVALID_IDX; |
839 | 0 | ipf_list->last_sent_idx = IPF_INVALID_IDX; |
840 | 0 | ipf_list->reass_execute_ctx = NULL; |
841 | 0 | ipf_list->state = IPF_LIST_STATE_UNUSED; |
842 | 0 | ipf_list->size = max_frag_list_size; |
843 | 0 | ipf_list->frag_list |
844 | 0 | = xzalloc(ipf_list->size * sizeof *ipf_list->frag_list); |
845 | 0 | } |
846 | | |
847 | | /* Generates a fragment list key from a well formed fragment and either starts |
848 | | * a new fragment list or increases the size of the existing fragment list, |
849 | | * while checking if the maximum supported fragements are supported or the |
850 | | * list size is impossibly big. Calls 'ipf_process_frag()' to add a fragment |
851 | | * to a list of fragemnts. */ |
852 | | static bool |
853 | | ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type, |
854 | | uint16_t zone, long long now, uint32_t hash_basis) |
855 | | OVS_REQUIRES(ipf->ipf_lock) |
856 | 0 | { |
857 | 0 | struct ipf_list_key key; |
858 | | /* Initialize 4 variables for some versions of GCC. */ |
859 | 0 | uint16_t start_data_byte = 0; |
860 | 0 | uint16_t end_data_byte = 0; |
861 | 0 | bool ff = false; |
862 | 0 | bool lf = false; |
863 | 0 | bool v6 = dl_type == htons(ETH_TYPE_IPV6); |
864 | |
|
865 | 0 | if (v6 && ipf_get_v6_enabled(ipf)) { |
866 | 0 | ipf_v6_key_extract(pkt, dl_type, zone, &key, &start_data_byte, |
867 | 0 | &end_data_byte, &ff, &lf); |
868 | 0 | } else if (!v6 && ipf_get_v4_enabled(ipf)) { |
869 | 0 | ipf_v4_key_extract(pkt, dl_type, zone, &key, &start_data_byte, |
870 | 0 | &end_data_byte, &ff, &lf); |
871 | 0 | } else { |
872 | 0 | return false; |
873 | 0 | } |
874 | | |
875 | 0 | unsigned int nfrag_max; |
876 | 0 | atomic_read_relaxed(&ipf->nfrag_max, &nfrag_max); |
877 | 0 | if (atomic_count_get(&ipf->nfrag) >= nfrag_max) { |
878 | 0 | return false; |
879 | 0 | } |
880 | | |
881 | 0 | uint32_t hash = ipf_list_key_hash(&key, hash_basis); |
882 | 0 | struct ipf_list *ipf_list = ipf_list_key_lookup(ipf, &key, hash); |
883 | 0 | enum { |
884 | 0 | IPF_FRAG_LIST_MIN_INCREMENT = 4, |
885 | 0 | IPF_IPV6_MAX_FRAG_LIST_SIZE = 65535, |
886 | 0 | }; |
887 | |
|
888 | 0 | int max_frag_list_size; |
889 | 0 | if (v6) { |
890 | | /* Because the calculation with extension headers is variable, |
891 | | * we don't calculate a hard maximum fragment list size upfront. The |
892 | | * fragment list size is practically limited by the code, however. */ |
893 | 0 | max_frag_list_size = IPF_IPV6_MAX_FRAG_LIST_SIZE; |
894 | 0 | } else { |
895 | 0 | max_frag_list_size = ipf->max_v4_frag_list_size; |
896 | 0 | } |
897 | |
|
898 | 0 | if (!ipf_list) { |
899 | 0 | ipf_list = xmalloc(sizeof *ipf_list); |
900 | 0 | ipf_list_init(ipf_list, &key, |
901 | 0 | MIN(max_frag_list_size, IPF_FRAG_LIST_MIN_INCREMENT)); |
902 | 0 | hmap_insert(&ipf->frag_lists, &ipf_list->node, hash); |
903 | 0 | ipf_expiry_list_add(&ipf->frag_exp_list, ipf_list, now); |
904 | 0 | } else if (ipf_list->state == IPF_LIST_STATE_REASS_FAIL || |
905 | 0 | ipf_list->state == IPF_LIST_STATE_COMPLETED) { |
906 | | /* Bail out as early as possible. */ |
907 | 0 | return false; |
908 | 0 | } else if (ipf_list->last_inuse_idx + 1 >= ipf_list->size) { |
909 | 0 | int increment = MIN(IPF_FRAG_LIST_MIN_INCREMENT, |
910 | 0 | max_frag_list_size - ipf_list->size); |
911 | | /* Enforce limit. */ |
912 | 0 | if (increment > 0) { |
913 | 0 | ipf_list->frag_list = |
914 | 0 | xrealloc(ipf_list->frag_list, (ipf_list->size + increment) * |
915 | 0 | sizeof *ipf_list->frag_list); |
916 | 0 | ipf_list->size += increment; |
917 | 0 | } else { |
918 | 0 | return false; |
919 | 0 | } |
920 | 0 | } |
921 | | |
922 | 0 | return ipf_process_frag(ipf, ipf_list, pkt, start_data_byte, |
923 | 0 | end_data_byte, ff, lf, v6); |
924 | 0 | } |
925 | | |
926 | | /* Filters out fragments from a batch of fragments and adjust the batch. */ |
927 | | static void |
928 | | ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb, |
929 | | ovs_be16 dl_type, uint16_t zone, long long now, |
930 | | uint32_t hash_basis) |
931 | 0 | { |
932 | 0 | const size_t pb_cnt = dp_packet_batch_size(pb); |
933 | 0 | int pb_idx; /* Index in a packet batch. */ |
934 | 0 | struct dp_packet *pkt; |
935 | |
|
936 | 0 | DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) { |
937 | 0 | if (OVS_UNLIKELY((dl_type == htons(ETH_TYPE_IP) && |
938 | 0 | ipf_is_valid_v4_frag(ipf, pkt)) |
939 | 0 | || |
940 | 0 | (dl_type == htons(ETH_TYPE_IPV6) && |
941 | 0 | ipf_is_valid_v6_frag(ipf, pkt)))) { |
942 | |
|
943 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
944 | 0 | if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis)) { |
945 | 0 | dp_packet_batch_refill(pb, pkt, pb_idx); |
946 | 0 | } else { |
947 | 0 | dp_packet_delete(pkt); |
948 | 0 | } |
949 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
950 | 0 | } else { |
951 | 0 | dp_packet_batch_refill(pb, pkt, pb_idx); |
952 | 0 | } |
953 | 0 | } |
954 | 0 | } |
955 | | |
956 | | /* In case of DPDK, a memory source check is done, as DPDK memory pool |
957 | | * management has trouble dealing with multiple source types. The |
958 | | * check_source paramater is used to indicate when this check is needed. */ |
959 | | static bool |
960 | | ipf_dp_packet_batch_add(struct dp_packet_batch *pb , struct dp_packet *pkt, |
961 | | bool check_source OVS_UNUSED) |
962 | 0 | { |
963 | | #ifdef DPDK_NETDEV |
964 | | if ((dp_packet_batch_is_full(pb)) || |
965 | | /* DPDK cannot handle multiple sources in a batch. */ |
966 | | (check_source && !dp_packet_batch_is_empty(pb) |
967 | | && pb->packets[0]->source != pkt->source)) { |
968 | | #else |
969 | 0 | if (dp_packet_batch_is_full(pb)) { |
970 | 0 | #endif |
971 | 0 | return false; |
972 | 0 | } |
973 | | |
974 | 0 | dp_packet_batch_add(pb, pkt); |
975 | 0 | return true; |
976 | 0 | } |
977 | | |
978 | | /* This would be used in rare cases where a list cannot be sent. One rare |
979 | | * reason known right now is a mempool source check, which exists due to DPDK |
980 | | * support, where packets are no longer being received on any port with a |
981 | | * source matching the fragment. Another reason is a race where all |
982 | | * conntrack rules are unconfigured when some fragments are yet to be |
983 | | * flushed. |
984 | | * |
985 | | * Returns true if the list was purged. */ |
986 | | static bool |
987 | | ipf_purge_list_check(struct ipf *ipf, struct ipf_list *ipf_list, |
988 | | long long now) |
989 | | OVS_REQUIRES(ipf->ipf_lock) |
990 | 0 | { |
991 | 0 | enum { |
992 | 0 | IPF_FRAG_LIST_PURGE_TIME_ADJ = 10000 |
993 | 0 | }; |
994 | |
|
995 | 0 | if (now < ipf_list->expiration + IPF_FRAG_LIST_PURGE_TIME_ADJ) { |
996 | 0 | return false; |
997 | 0 | } |
998 | | |
999 | 0 | while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { |
1000 | 0 | struct dp_packet * pkt |
1001 | 0 | = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; |
1002 | 0 | dp_packet_delete(pkt); |
1003 | 0 | atomic_count_dec(&ipf->nfrag); |
1004 | 0 | COVERAGE_INC(ipf_stuck_frag_list_purged); |
1005 | 0 | ipf_count(ipf, ipf_list->key.dl_type == htons(ETH_TYPE_IPV6), |
1006 | 0 | IPF_NFRAGS_PURGED); |
1007 | 0 | ipf_list->last_sent_idx++; |
1008 | 0 | } |
1009 | |
|
1010 | 0 | return true; |
1011 | 0 | } |
1012 | | |
1013 | | /* Does the packet batch management and common accounting work associated |
1014 | | * with 'ipf_send_completed_frags()' and 'ipf_send_expired_frags()'. */ |
1015 | | static bool |
1016 | | ipf_send_frags_in_list(struct ipf *ipf, struct ipf_list *ipf_list, |
1017 | | struct dp_packet_batch *pb, |
1018 | | enum ipf_list_type list_type, bool v6, long long now) |
1019 | | OVS_REQUIRES(ipf->ipf_lock) |
1020 | 0 | { |
1021 | 0 | if (ipf_purge_list_check(ipf, ipf_list, now)) { |
1022 | 0 | return true; |
1023 | 0 | } |
1024 | | |
1025 | 0 | while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { |
1026 | 0 | struct dp_packet *pkt |
1027 | 0 | = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; |
1028 | 0 | if (ipf_dp_packet_batch_add(pb, pkt, true)) { |
1029 | 0 | ipf_list->last_sent_idx++; |
1030 | 0 | atomic_count_dec(&ipf->nfrag); |
1031 | |
|
1032 | 0 | if (list_type == IPF_FRAG_COMPLETED_LIST) { |
1033 | 0 | ipf_count(ipf, v6, IPF_NFRAGS_COMPL_SENT); |
1034 | 0 | } else { |
1035 | 0 | ipf_count(ipf, v6, IPF_NFRAGS_EXPD_SENT); |
1036 | 0 | pkt->md.ct_state = CS_INVALID; |
1037 | 0 | } |
1038 | |
|
1039 | 0 | if (ipf_list->last_sent_idx == ipf_list->last_inuse_idx) { |
1040 | 0 | return true; |
1041 | 0 | } |
1042 | 0 | } else { |
1043 | 0 | return false; |
1044 | 0 | } |
1045 | 0 | } |
1046 | 0 | OVS_NOT_REACHED(); |
1047 | 0 | } |
1048 | | |
1049 | | /* Adds fragments associated with a completed fragment list to a packet batch |
1050 | | * to be processed by the calling application, typically conntrack. Also |
1051 | | * cleans up the list context when it is empty.*/ |
1052 | | static void |
1053 | | ipf_send_completed_frags(struct ipf *ipf, struct dp_packet_batch *pb, |
1054 | | long long now, bool v6) |
1055 | 0 | { |
1056 | 0 | if (ovs_list_is_empty(&ipf->frag_complete_list)) { |
1057 | 0 | return; |
1058 | 0 | } |
1059 | | |
1060 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1061 | 0 | struct ipf_list *ipf_list; |
1062 | |
|
1063 | 0 | LIST_FOR_EACH_SAFE (ipf_list, list_node, &ipf->frag_complete_list) { |
1064 | 0 | if (ipf_send_frags_in_list(ipf, ipf_list, pb, IPF_FRAG_COMPLETED_LIST, |
1065 | 0 | v6, now)) { |
1066 | 0 | ipf_completed_list_clean(&ipf->frag_lists, ipf_list); |
1067 | 0 | } else { |
1068 | 0 | break; |
1069 | 0 | } |
1070 | 0 | } |
1071 | |
|
1072 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1073 | 0 | } |
1074 | | |
1075 | | /* Conservatively adds fragments associated with a expired fragment list to |
1076 | | * a packet batch to be processed by the calling application, typically |
1077 | | * conntrack. Also cleans up the list context when it is empty.*/ |
1078 | | static void |
1079 | | ipf_send_expired_frags(struct ipf *ipf, struct dp_packet_batch *pb, |
1080 | | long long now, bool v6) |
1081 | 0 | { |
1082 | 0 | enum { |
1083 | | /* Very conservative, due to DOS probability. */ |
1084 | 0 | IPF_FRAG_LIST_MAX_EXPIRED = 1, |
1085 | 0 | }; |
1086 | | |
1087 | |
|
1088 | 0 | if (ovs_list_is_empty(&ipf->frag_exp_list)) { |
1089 | 0 | return; |
1090 | 0 | } |
1091 | | |
1092 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1093 | 0 | struct ipf_list *ipf_list; |
1094 | 0 | size_t lists_removed = 0; |
1095 | |
|
1096 | 0 | LIST_FOR_EACH_SAFE (ipf_list, list_node, &ipf->frag_exp_list) { |
1097 | 0 | if (now <= ipf_list->expiration || |
1098 | 0 | lists_removed >= IPF_FRAG_LIST_MAX_EXPIRED) { |
1099 | 0 | break; |
1100 | 0 | } |
1101 | | |
1102 | 0 | if (ipf_send_frags_in_list(ipf, ipf_list, pb, IPF_FRAG_EXPIRY_LIST, |
1103 | 0 | v6, now)) { |
1104 | 0 | ipf_expiry_list_clean(&ipf->frag_lists, ipf_list); |
1105 | 0 | lists_removed++; |
1106 | 0 | } else { |
1107 | 0 | break; |
1108 | 0 | } |
1109 | 0 | } |
1110 | |
|
1111 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1112 | 0 | } |
1113 | | |
1114 | | /* Adds a reassmebled packet to a packet batch to be processed by the caller. |
1115 | | */ |
1116 | | static void |
1117 | | ipf_execute_reass_pkts(struct ipf *ipf, struct dp_packet_batch *pb) |
1118 | 0 | { |
1119 | 0 | if (ovs_list_is_empty(&ipf->reassembled_pkt_list)) { |
1120 | 0 | return; |
1121 | 0 | } |
1122 | | |
1123 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1124 | 0 | struct reassembled_pkt *rp; |
1125 | |
|
1126 | 0 | LIST_FOR_EACH_SAFE (rp, rp_list_node, &ipf->reassembled_pkt_list) { |
1127 | 0 | if (!rp->list->reass_execute_ctx && |
1128 | 0 | ipf_dp_packet_batch_add(pb, rp->pkt, false)) { |
1129 | 0 | rp->list->reass_execute_ctx = rp->pkt; |
1130 | 0 | } |
1131 | 0 | } |
1132 | |
|
1133 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1134 | 0 | } |
1135 | | |
1136 | | /* Checks for reassembled packets post processing by conntrack and edits the |
1137 | | * fragments if needed based on what conntrack decided. */ |
1138 | | static void |
1139 | | ipf_post_execute_reass_pkts(struct ipf *ipf, |
1140 | | struct dp_packet_batch *pb, bool v6) |
1141 | 0 | { |
1142 | 0 | if (ovs_list_is_empty(&ipf->reassembled_pkt_list)) { |
1143 | 0 | return; |
1144 | 0 | } |
1145 | | |
1146 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1147 | 0 | struct reassembled_pkt *rp; |
1148 | |
|
1149 | 0 | LIST_FOR_EACH_SAFE (rp, rp_list_node, &ipf->reassembled_pkt_list) { |
1150 | 0 | const size_t pb_cnt = dp_packet_batch_size(pb); |
1151 | 0 | int pb_idx; |
1152 | 0 | struct dp_packet *pkt; |
1153 | | /* Inner batch loop is constant time since batch size is <= |
1154 | | * NETDEV_MAX_BURST. */ |
1155 | 0 | DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) { |
1156 | 0 | if (rp && pkt == rp->list->reass_execute_ctx) { |
1157 | 0 | const struct ipf_frag *frag_0 = &rp->list->frag_list[0]; |
1158 | 0 | void *l4_frag = dp_packet_l4(frag_0->pkt); |
1159 | 0 | void *l4_reass = dp_packet_l4(pkt); |
1160 | 0 | memcpy(l4_frag, l4_reass, dp_packet_l4_size(frag_0->pkt)); |
1161 | |
|
1162 | 0 | for (int i = 0; i <= rp->list->last_inuse_idx; i++) { |
1163 | 0 | const struct ipf_frag *frag_i = &rp->list->frag_list[i]; |
1164 | |
|
1165 | 0 | frag_i->pkt->md.ct_label = pkt->md.ct_label; |
1166 | 0 | frag_i->pkt->md.ct_mark = pkt->md.ct_mark; |
1167 | 0 | frag_i->pkt->md.ct_state = pkt->md.ct_state; |
1168 | 0 | frag_i->pkt->md.ct_zone = pkt->md.ct_zone; |
1169 | 0 | frag_i->pkt->md.ct_orig_tuple_ipv6 = |
1170 | 0 | pkt->md.ct_orig_tuple_ipv6; |
1171 | 0 | if (pkt->md.ct_orig_tuple_ipv6) { |
1172 | 0 | frag_i->pkt->md.ct_orig_tuple.ipv6 = |
1173 | 0 | pkt->md.ct_orig_tuple.ipv6; |
1174 | 0 | } else { |
1175 | 0 | frag_i->pkt->md.ct_orig_tuple.ipv4 = |
1176 | 0 | pkt->md.ct_orig_tuple.ipv4; |
1177 | 0 | } |
1178 | 0 | if (v6) { |
1179 | 0 | struct ovs_16aligned_ip6_hdr *l3_frag |
1180 | 0 | = dp_packet_l3(frag_i->pkt); |
1181 | 0 | struct ovs_16aligned_ip6_hdr *l3_reass |
1182 | 0 | = dp_packet_l3(pkt); |
1183 | 0 | l3_frag->ip6_src = l3_reass->ip6_src; |
1184 | 0 | l3_frag->ip6_dst = l3_reass->ip6_dst; |
1185 | 0 | } else { |
1186 | 0 | struct ip_header *l3_frag = dp_packet_l3(frag_i->pkt); |
1187 | 0 | struct ip_header *l3_reass = dp_packet_l3(pkt); |
1188 | 0 | if (!dp_packet_hwol_is_ipv4(frag_i->pkt)) { |
1189 | 0 | ovs_be32 reass_ip = |
1190 | 0 | get_16aligned_be32(&l3_reass->ip_src); |
1191 | 0 | ovs_be32 frag_ip = |
1192 | 0 | get_16aligned_be32(&l3_frag->ip_src); |
1193 | |
|
1194 | 0 | l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, |
1195 | 0 | frag_ip, |
1196 | 0 | reass_ip); |
1197 | 0 | reass_ip = get_16aligned_be32(&l3_reass->ip_dst); |
1198 | 0 | frag_ip = get_16aligned_be32(&l3_frag->ip_dst); |
1199 | 0 | l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, |
1200 | 0 | frag_ip, |
1201 | 0 | reass_ip); |
1202 | 0 | } |
1203 | |
|
1204 | 0 | l3_frag->ip_src = l3_reass->ip_src; |
1205 | 0 | l3_frag->ip_dst = l3_reass->ip_dst; |
1206 | 0 | } |
1207 | 0 | } |
1208 | |
|
1209 | 0 | ipf_completed_list_add(&ipf->frag_complete_list, rp->list); |
1210 | 0 | ipf_reassembled_list_remove(rp); |
1211 | 0 | dp_packet_delete(rp->pkt); |
1212 | 0 | free(rp); |
1213 | 0 | rp = NULL; |
1214 | 0 | } else { |
1215 | 0 | dp_packet_batch_refill(pb, pkt, pb_idx); |
1216 | 0 | } |
1217 | 0 | } |
1218 | 0 | } |
1219 | |
|
1220 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1221 | 0 | } |
1222 | | |
1223 | | /* Extracts any fragments from the batch and reassembles them when a |
1224 | | * complete packet is received. Completed packets are attempted to |
1225 | | * be added to the batch to be sent through conntrack. */ |
1226 | | void |
1227 | | ipf_preprocess_conntrack(struct ipf *ipf, struct dp_packet_batch *pb, |
1228 | | long long now, ovs_be16 dl_type, uint16_t zone, |
1229 | | uint32_t hash_basis) |
1230 | 0 | { |
1231 | 0 | if (ipf_get_enabled(ipf)) { |
1232 | 0 | ipf_extract_frags_from_batch(ipf, pb, dl_type, zone, now, hash_basis); |
1233 | 0 | } |
1234 | |
|
1235 | 0 | if (ipf_get_enabled(ipf) || atomic_count_get(&ipf->nfrag)) { |
1236 | 0 | ipf_execute_reass_pkts(ipf, pb); |
1237 | 0 | } |
1238 | 0 | } |
1239 | | |
1240 | | /* Updates fragments based on the processing of the reassembled packet sent |
1241 | | * through conntrack and adds these fragments to any batches seen. Expired |
1242 | | * fragments are marked as invalid and also added to the batches seen |
1243 | | * with low priority. Reassembled packets are freed. */ |
1244 | | void |
1245 | | ipf_postprocess_conntrack(struct ipf *ipf, struct dp_packet_batch *pb, |
1246 | | long long now, ovs_be16 dl_type) |
1247 | 0 | { |
1248 | 0 | if (ipf_get_enabled(ipf) || atomic_count_get(&ipf->nfrag)) { |
1249 | 0 | bool v6 = dl_type == htons(ETH_TYPE_IPV6); |
1250 | 0 | ipf_post_execute_reass_pkts(ipf, pb, v6); |
1251 | 0 | ipf_send_completed_frags(ipf, pb, now, v6); |
1252 | 0 | ipf_send_expired_frags(ipf, pb, now, v6); |
1253 | 0 | } |
1254 | 0 | } |
1255 | | |
1256 | | static void * |
1257 | | ipf_clean_thread_main(void *f) |
1258 | 0 | { |
1259 | 0 | struct ipf *ipf = f; |
1260 | |
|
1261 | 0 | enum { |
1262 | 0 | IPF_FRAG_LIST_CLEAN_TIMEOUT = 60000, |
1263 | 0 | }; |
1264 | |
|
1265 | 0 | while (!latch_is_set(&ipf->ipf_clean_thread_exit)) { |
1266 | |
|
1267 | 0 | long long now = time_msec(); |
1268 | |
|
1269 | 0 | if (!ovs_list_is_empty(&ipf->frag_exp_list) || |
1270 | 0 | !ovs_list_is_empty(&ipf->frag_complete_list)) { |
1271 | |
|
1272 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1273 | |
|
1274 | 0 | struct ipf_list *ipf_list; |
1275 | 0 | LIST_FOR_EACH_SAFE (ipf_list, list_node, |
1276 | 0 | &ipf->frag_exp_list) { |
1277 | 0 | if (ipf_purge_list_check(ipf, ipf_list, now)) { |
1278 | 0 | ipf_expiry_list_clean(&ipf->frag_lists, ipf_list); |
1279 | 0 | } |
1280 | 0 | } |
1281 | |
|
1282 | 0 | LIST_FOR_EACH_SAFE (ipf_list, list_node, |
1283 | 0 | &ipf->frag_complete_list) { |
1284 | 0 | if (ipf_purge_list_check(ipf, ipf_list, now)) { |
1285 | 0 | ipf_completed_list_clean(&ipf->frag_lists, ipf_list); |
1286 | 0 | } |
1287 | 0 | } |
1288 | |
|
1289 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1290 | 0 | } |
1291 | |
|
1292 | 0 | poll_timer_wait_until(now + IPF_FRAG_LIST_CLEAN_TIMEOUT); |
1293 | 0 | latch_wait(&ipf->ipf_clean_thread_exit); |
1294 | 0 | poll_block(); |
1295 | 0 | } |
1296 | |
|
1297 | 0 | return NULL; |
1298 | 0 | } |
1299 | | |
1300 | | struct ipf * |
1301 | | ipf_init(void) |
1302 | 0 | { |
1303 | 0 | struct ipf *ipf = xzalloc(sizeof *ipf); |
1304 | |
|
1305 | 0 | ovs_mutex_init_adaptive(&ipf->ipf_lock); |
1306 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1307 | 0 | hmap_init(&ipf->frag_lists); |
1308 | 0 | ovs_list_init(&ipf->frag_exp_list); |
1309 | 0 | ovs_list_init(&ipf->frag_complete_list); |
1310 | 0 | ovs_list_init(&ipf->reassembled_pkt_list); |
1311 | 0 | atomic_init(&ipf->min_v4_frag_size, IPF_V4_FRAG_SIZE_MIN_DEF); |
1312 | 0 | atomic_init(&ipf->min_v6_frag_size, IPF_V6_FRAG_SIZE_MIN_DEF); |
1313 | 0 | ipf->max_v4_frag_list_size = DIV_ROUND_UP( |
1314 | 0 | IPV4_PACKET_MAX_SIZE - IPV4_PACKET_MAX_HDR_SIZE, |
1315 | 0 | ipf->min_v4_frag_size - IPV4_PACKET_MAX_HDR_SIZE); |
1316 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1317 | 0 | atomic_count_init(&ipf->nfrag, 0); |
1318 | 0 | for (size_t i = 0; i < IPF_NFRAGS_NUM_CNTS; i++) { |
1319 | 0 | atomic_init(&ipf->n4frag_cnt[i], 0); |
1320 | 0 | atomic_init(&ipf->n6frag_cnt[i], 0); |
1321 | 0 | } |
1322 | 0 | atomic_init(&ipf->nfrag_max, IPF_MAX_FRAGS_DEFAULT); |
1323 | 0 | atomic_init(&ipf->ifp_v4_enabled, true); |
1324 | 0 | atomic_init(&ipf->ifp_v6_enabled, true); |
1325 | 0 | latch_init(&ipf->ipf_clean_thread_exit); |
1326 | 0 | ipf->ipf_clean_thread = ovs_thread_create("ipf_clean", |
1327 | 0 | ipf_clean_thread_main, ipf); |
1328 | |
|
1329 | 0 | return ipf; |
1330 | 0 | } |
1331 | | |
1332 | | void |
1333 | | ipf_destroy(struct ipf *ipf) |
1334 | 0 | { |
1335 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1336 | 0 | latch_set(&ipf->ipf_clean_thread_exit); |
1337 | 0 | pthread_join(ipf->ipf_clean_thread, NULL); |
1338 | 0 | latch_destroy(&ipf->ipf_clean_thread_exit); |
1339 | |
|
1340 | 0 | struct ipf_list *ipf_list; |
1341 | 0 | HMAP_FOR_EACH_POP (ipf_list, node, &ipf->frag_lists) { |
1342 | 0 | while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { |
1343 | 0 | struct dp_packet *pkt |
1344 | 0 | = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; |
1345 | 0 | dp_packet_delete(pkt); |
1346 | 0 | atomic_count_dec(&ipf->nfrag); |
1347 | 0 | ipf_list->last_sent_idx++; |
1348 | 0 | } |
1349 | 0 | free(ipf_list->frag_list); |
1350 | 0 | free(ipf_list); |
1351 | 0 | } |
1352 | |
|
1353 | 0 | if (atomic_count_get(&ipf->nfrag)) { |
1354 | 0 | VLOG_WARN("ipf destroy with non-zero fragment count. "); |
1355 | 0 | } |
1356 | |
|
1357 | 0 | struct reassembled_pkt *rp; |
1358 | 0 | LIST_FOR_EACH_POP (rp, rp_list_node, &ipf->reassembled_pkt_list) { |
1359 | 0 | dp_packet_delete(rp->pkt); |
1360 | 0 | free(rp); |
1361 | 0 | } |
1362 | |
|
1363 | 0 | hmap_destroy(&ipf->frag_lists); |
1364 | 0 | ovs_list_poison(&ipf->frag_exp_list); |
1365 | 0 | ovs_list_poison(&ipf->frag_complete_list); |
1366 | 0 | ovs_list_poison(&ipf->reassembled_pkt_list); |
1367 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1368 | 0 | ovs_mutex_destroy(&ipf->ipf_lock); |
1369 | 0 | free(ipf); |
1370 | 0 | } |
1371 | | |
1372 | | int |
1373 | | ipf_set_enabled(struct ipf *ipf, bool v6, bool enable) |
1374 | 0 | { |
1375 | 0 | atomic_store_relaxed(v6 ? &ipf->ifp_v6_enabled : &ipf->ifp_v4_enabled, |
1376 | 0 | enable); |
1377 | 0 | return 0; |
1378 | 0 | } |
1379 | | |
1380 | | int |
1381 | | ipf_set_min_frag(struct ipf *ipf, bool v6, uint32_t value) |
1382 | 0 | { |
1383 | | /* If the user specifies an unreasonably large number, fragmentation |
1384 | | * will not work well but it will not blow up. */ |
1385 | 0 | if (value < (v6 ? IPF_V6_FRAG_SIZE_LBOUND : IPF_V4_FRAG_SIZE_LBOUND)) { |
1386 | 0 | return 1; |
1387 | 0 | } |
1388 | | |
1389 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1390 | 0 | if (v6) { |
1391 | 0 | atomic_store_relaxed(&ipf->min_v6_frag_size, value); |
1392 | 0 | } else { |
1393 | 0 | atomic_store_relaxed(&ipf->min_v4_frag_size, value); |
1394 | 0 | ipf->max_v4_frag_list_size = DIV_ROUND_UP( |
1395 | 0 | IPV4_PACKET_MAX_SIZE - IPV4_PACKET_MAX_HDR_SIZE, |
1396 | 0 | ipf->min_v4_frag_size - IPV4_PACKET_MAX_HDR_SIZE); |
1397 | 0 | } |
1398 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1399 | 0 | return 0; |
1400 | 0 | } |
1401 | | |
1402 | | int |
1403 | | ipf_set_max_nfrags(struct ipf *ipf, uint32_t value) |
1404 | 0 | { |
1405 | 0 | if (value > IPF_NFRAG_UBOUND) { |
1406 | 0 | return 1; |
1407 | 0 | } |
1408 | 0 | atomic_store_relaxed(&ipf->nfrag_max, value); |
1409 | 0 | return 0; |
1410 | 0 | } |
1411 | | |
1412 | | int |
1413 | | ipf_get_status(struct ipf *ipf, struct ipf_status *ipf_status) |
1414 | 0 | { |
1415 | 0 | ipf_status->nfrag = atomic_count_get(&ipf->nfrag); |
1416 | 0 | atomic_read_relaxed(&ipf->nfrag_max, &ipf_status->nfrag_max); |
1417 | |
|
1418 | 0 | atomic_read_relaxed(&ipf->ifp_v4_enabled, &ipf_status->v4.enabled); |
1419 | 0 | atomic_read_relaxed(&ipf->min_v4_frag_size, |
1420 | 0 | &ipf_status->v4.min_frag_size); |
1421 | 0 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_ACCEPTED], |
1422 | 0 | &ipf_status->v4.nfrag_accepted); |
1423 | 0 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_COMPL_SENT], |
1424 | 0 | &ipf_status->v4.nfrag_completed_sent); |
1425 | 0 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_EXPD_SENT], |
1426 | 0 | &ipf_status->v4.nfrag_expired_sent); |
1427 | 0 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_TOO_SMALL], |
1428 | 0 | &ipf_status->v4.nfrag_too_small); |
1429 | 0 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_OVERLAP], |
1430 | 0 | &ipf_status->v4.nfrag_overlap); |
1431 | 0 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_PURGED], |
1432 | 0 | &ipf_status->v4.nfrag_purged); |
1433 | |
|
1434 | 0 | atomic_read_relaxed(&ipf->ifp_v6_enabled, &ipf_status->v6.enabled); |
1435 | 0 | atomic_read_relaxed(&ipf->min_v6_frag_size, |
1436 | 0 | &ipf_status->v6.min_frag_size); |
1437 | 0 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_ACCEPTED], |
1438 | 0 | &ipf_status->v6.nfrag_accepted); |
1439 | 0 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_COMPL_SENT], |
1440 | 0 | &ipf_status->v6.nfrag_completed_sent); |
1441 | 0 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_EXPD_SENT], |
1442 | 0 | &ipf_status->v6.nfrag_expired_sent); |
1443 | 0 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_TOO_SMALL], |
1444 | 0 | &ipf_status->v6.nfrag_too_small); |
1445 | 0 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_OVERLAP], |
1446 | 0 | &ipf_status->v6.nfrag_overlap); |
1447 | 0 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_PURGED], |
1448 | 0 | &ipf_status->v6.nfrag_purged); |
1449 | 0 | return 0; |
1450 | 0 | } |
1451 | | |
1452 | | struct ipf_dump_ctx { |
1453 | | struct hmap_position bucket_pos; |
1454 | | }; |
1455 | | |
1456 | | /* Allocates an 'ipf_dump_ctx' to keep track of an hmap position. The |
1457 | | * caller must call ipf_dump_done() when dumping is finished. */ |
1458 | | int |
1459 | | ipf_dump_start(struct ipf_dump_ctx **ipf_dump_ctx) |
1460 | 0 | { |
1461 | 0 | *ipf_dump_ctx = xzalloc(sizeof **ipf_dump_ctx); |
1462 | 0 | return 0; |
1463 | 0 | } |
1464 | | |
1465 | | /* Creates a string representation of the state of an 'ipf_list' and puts |
1466 | | * it in 'ds'. */ |
1467 | | static void |
1468 | | ipf_dump_create(const struct ipf_list *ipf_list, struct ds *ds) |
1469 | 0 | { |
1470 | 0 | ds_put_cstr(ds, "("); |
1471 | 0 | if (ipf_list->key.dl_type == htons(ETH_TYPE_IP)) { |
1472 | 0 | ds_put_format(ds, "src="IP_FMT",dst="IP_FMT",", |
1473 | 0 | IP_ARGS(ipf_list->key.src_addr.ipv4), |
1474 | 0 | IP_ARGS(ipf_list->key.dst_addr.ipv4)); |
1475 | 0 | } else { |
1476 | 0 | ds_put_cstr(ds, "src="); |
1477 | 0 | ipv6_format_addr(&ipf_list->key.src_addr.ipv6, ds); |
1478 | 0 | ds_put_cstr(ds, ",dst="); |
1479 | 0 | ipv6_format_addr(&ipf_list->key.dst_addr.ipv6, ds); |
1480 | 0 | ds_put_cstr(ds, ","); |
1481 | 0 | } |
1482 | |
|
1483 | 0 | ds_put_format(ds, "recirc_id=%u,ip_id=%u,dl_type=0x%x,zone=%u,nw_proto=%u", |
1484 | 0 | ipf_list->key.recirc_id, ntohl(ipf_list->key.ip_id), |
1485 | 0 | ntohs(ipf_list->key.dl_type), ipf_list->key.zone, |
1486 | 0 | ipf_list->key.nw_proto); |
1487 | |
|
1488 | 0 | ds_put_format(ds, ",num_fragments=%u,state=%s", |
1489 | 0 | ipf_list->last_inuse_idx + 1, |
1490 | 0 | ipf_state_name[ipf_list->state]); |
1491 | |
|
1492 | 0 | ds_put_cstr(ds, ")"); |
1493 | 0 | } |
1494 | | |
1495 | | /* Finds the next ipf list starting from 'ipf_dump_ctx->bucket_pos' and uses |
1496 | | * ipf_dump_create() to create a string representation of the state of an |
1497 | | * ipf list, to which 'dump' is pointed to. Returns EOF when there are no |
1498 | | * more ipf lists. */ |
1499 | | int |
1500 | | ipf_dump_next(struct ipf *ipf, struct ipf_dump_ctx *ipf_dump_ctx, char **dump) |
1501 | 0 | { |
1502 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1503 | |
|
1504 | 0 | struct hmap_node *node = hmap_at_position(&ipf->frag_lists, |
1505 | 0 | &ipf_dump_ctx->bucket_pos); |
1506 | 0 | if (!node) { |
1507 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1508 | 0 | return EOF; |
1509 | 0 | } else { |
1510 | 0 | struct ipf_list *ipf_list_; |
1511 | 0 | INIT_CONTAINER(ipf_list_, node, node); |
1512 | 0 | struct ipf_list ipf_list = *ipf_list_; |
1513 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1514 | 0 | struct ds ds = DS_EMPTY_INITIALIZER; |
1515 | 0 | ipf_dump_create(&ipf_list, &ds); |
1516 | 0 | *dump = ds_steal_cstr(&ds); |
1517 | 0 | return 0; |
1518 | 0 | } |
1519 | 0 | } |
1520 | | |
1521 | | /* Frees 'ipf_dump_ctx' allocated by ipf_dump_start(). */ |
1522 | | int |
1523 | | ipf_dump_done(struct ipf_dump_ctx *ipf_dump_ctx) |
1524 | 0 | { |
1525 | 0 | free(ipf_dump_ctx); |
1526 | 0 | return 0; |
1527 | 0 | } |