/src/openvswitch/lib/ipf.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2019 Nicira, Inc. |
3 | | * |
4 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | * you may not use this file except in compliance with the License. |
6 | | * You may obtain a copy of the License at: |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | * See the License for the specific language governing permissions and |
14 | | * limitations under the License. |
15 | | */ |
16 | | |
17 | | #include <config.h> |
18 | | #include <ctype.h> |
19 | | #include <errno.h> |
20 | | #include <sys/types.h> |
21 | | #include <netinet/in.h> |
22 | | #include <netinet/ip6.h> |
23 | | #include <netinet/icmp6.h> |
24 | | #include <string.h> |
25 | | |
26 | | #include "coverage.h" |
27 | | #include "csum.h" |
28 | | #include "ipf.h" |
29 | | #include "latch.h" |
30 | | #include "openvswitch/hmap.h" |
31 | | #include "openvswitch/poll-loop.h" |
32 | | #include "openvswitch/types.h" |
33 | | #include "openvswitch/vlog.h" |
34 | | #include "ovs-atomic.h" |
35 | | #include "packets.h" |
36 | | #include "util.h" |
37 | | |
38 | | VLOG_DEFINE_THIS_MODULE(ipf); |
39 | | COVERAGE_DEFINE(ipf_stuck_frag_list_expired); |
40 | | COVERAGE_DEFINE(ipf_stuck_frag_list_purged); |
41 | | COVERAGE_DEFINE(ipf_l3csum_checked); |
42 | | COVERAGE_DEFINE(ipf_l3csum_err); |
43 | | |
44 | | enum { |
45 | | IPV4_PACKET_MAX_HDR_SIZE = 60, |
46 | | IPV4_PACKET_MAX_SIZE = 65535, |
47 | | IPV6_PACKET_MAX_DATA = 65535, |
48 | | }; |
49 | | |
50 | | enum ipf_list_state { |
51 | | IPF_LIST_STATE_UNUSED, |
52 | | IPF_LIST_STATE_REASS_FAIL, |
53 | | IPF_LIST_STATE_OTHER_SEEN, |
54 | | IPF_LIST_STATE_FIRST_SEEN, |
55 | | IPF_LIST_STATE_LAST_SEEN, |
56 | | IPF_LIST_STATE_FIRST_LAST_SEEN, |
57 | | IPF_LIST_STATE_COMPLETED, |
58 | | IPF_LIST_STATE_NUM, |
59 | | }; |
60 | | |
61 | | static char *ipf_state_name[IPF_LIST_STATE_NUM] = |
62 | | {"unused", "reassemble fail", "other frag", "first frag", "last frag", |
63 | | "first/last frag", "complete"}; |
64 | | |
65 | | enum ipf_list_type { |
66 | | IPF_FRAG_COMPLETED_LIST, |
67 | | IPF_FRAG_EXPIRY_LIST, |
68 | | }; |
69 | | |
70 | | enum { |
71 | | IPF_INVALID_IDX = -1, |
72 | | IPF_V4_FRAG_SIZE_LBOUND = 400, |
73 | | IPF_V4_FRAG_SIZE_MIN_DEF = 1200, |
74 | | IPF_V6_FRAG_SIZE_LBOUND = 400, /* Useful for testing. */ |
75 | | IPF_V6_FRAG_SIZE_MIN_DEF = 1280, |
76 | | IPF_MAX_FRAGS_DEFAULT = 1000, |
77 | | IPF_NFRAG_UBOUND = 5000, |
78 | | }; |
79 | | |
80 | | enum ipf_counter_type { |
81 | | IPF_NFRAGS_ACCEPTED, |
82 | | IPF_NFRAGS_COMPL_SENT, |
83 | | IPF_NFRAGS_EXPIRED, |
84 | | IPF_NFRAGS_TOO_SMALL, |
85 | | IPF_NFRAGS_OVERLAP, |
86 | | IPF_NFRAGS_PURGED, |
87 | | IPF_NFRAGS_NUM_CNTS, |
88 | | }; |
89 | | |
90 | | union ipf_addr { |
91 | | ovs_be32 ipv4; |
92 | | struct in6_addr ipv6; |
93 | | }; |
94 | | |
95 | | /* Represents a single fragment; part of a list of fragments. */ |
96 | | struct ipf_frag { |
97 | | struct dp_packet *pkt; |
98 | | uint16_t start_data_byte; |
99 | | uint16_t end_data_byte; |
100 | | }; |
101 | | |
102 | | /* The key for a collection of fragments potentially making up an unfragmented |
103 | | * packet. */ |
104 | | struct ipf_list_key { |
105 | | /* ipf_list_key_hash() requires 'src_addr' and 'dst_addr' to be the first |
106 | | * two members. */ |
107 | | union ipf_addr src_addr; |
108 | | union ipf_addr dst_addr; |
109 | | uint32_t recirc_id; |
110 | | ovs_be32 ip_id; /* V6 is 32 bits. */ |
111 | | ovs_be16 dl_type; |
112 | | uint16_t zone; |
113 | | uint8_t nw_proto; |
114 | | }; |
115 | | |
116 | | /* A collection of fragments potentially making up an unfragmented packet. */ |
117 | | struct ipf_list { |
118 | | struct hmap_node node; /* In struct ipf's 'frag_lists'. */ |
119 | | struct ovs_list list_node; /* In struct ipf's 'frag_exp_list' or |
120 | | * 'frag_complete_list'. */ |
121 | | struct ipf_frag *frag_list; /* List of fragments for this list. */ |
122 | | struct ipf_list_key key; /* The key for the fragemnt list. */ |
123 | | struct dp_packet *reass_execute_ctx; /* Reassembled packet. */ |
124 | | long long expiration; /* In milliseconds. */ |
125 | | int last_sent_idx; /* Last sent fragment idx. */ |
126 | | int last_inuse_idx; /* Last inuse fragment idx. */ |
127 | | int size; /* Fragment list size. */ |
128 | | uint8_t state; /* Frag list state; see ipf_list_state. */ |
129 | | }; |
130 | | |
131 | | /* Represents a reassambled packet which typically is passed through |
132 | | * conntrack. */ |
133 | | struct reassembled_pkt { |
134 | | struct ovs_list rp_list_node; /* In struct ipf's |
135 | | * 'reassembled_pkt_list'. */ |
136 | | struct dp_packet *pkt; |
137 | | struct ipf_list *list; |
138 | | }; |
139 | | |
140 | | struct ipf { |
141 | | /* The clean thread is used to clean up fragments in the 'ipf' |
142 | | * module if packet batches are not longer be sent through its user. */ |
143 | | pthread_t ipf_clean_thread; |
144 | | struct latch ipf_clean_thread_exit; |
145 | | |
146 | | int max_v4_frag_list_size; |
147 | | |
148 | | struct ovs_mutex ipf_lock; /* Protects all of the following. */ |
149 | | /* These contain 'struct ipf_list's. */ |
150 | | struct hmap frag_lists OVS_GUARDED; |
151 | | struct ovs_list frag_exp_list OVS_GUARDED; |
152 | | struct ovs_list frag_complete_list OVS_GUARDED; |
153 | | /* Contains 'struct reassembled_pkt's. */ |
154 | | struct ovs_list reassembled_pkt_list OVS_GUARDED; |
155 | | |
156 | | /* Used to allow disabling fragmentation reassembly. */ |
157 | | atomic_bool ifp_v4_enabled; |
158 | | atomic_bool ifp_v6_enabled; |
159 | | |
160 | | /* Will be clamped above 400 bytes; the value chosen should handle |
161 | | * alg control packets of interest that use string encoding of mutable |
162 | | * IP fields; meaning, the control packets should not be fragmented. */ |
163 | | atomic_uint min_v4_frag_size; |
164 | | atomic_uint min_v6_frag_size; |
165 | | |
166 | | /* Configurable maximum allowable fragments in process. */ |
167 | | atomic_uint nfrag_max; |
168 | | |
169 | | /* Number of fragments in process. */ |
170 | | atomic_count nfrag; |
171 | | |
172 | | atomic_uint64_t n4frag_cnt[IPF_NFRAGS_NUM_CNTS]; |
173 | | atomic_uint64_t n6frag_cnt[IPF_NFRAGS_NUM_CNTS]; |
174 | | }; |
175 | | |
176 | | static void |
177 | | ipf_print_reass_packet(const char *es, const void *pkt) |
178 | 0 | { |
179 | 0 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); |
180 | 0 | if (!VLOG_DROP_WARN(&rl)) { |
181 | 0 | struct ds ds = DS_EMPTY_INITIALIZER; |
182 | 0 | ds_put_hex_dump(&ds, pkt, 128, 0, false); |
183 | 0 | VLOG_WARN("%s\n%s", es, ds_cstr(&ds)); |
184 | 0 | ds_destroy(&ds); |
185 | 0 | } |
186 | 0 | } |
187 | | |
188 | | static void |
189 | | ipf_count(struct ipf *ipf, bool v6, enum ipf_counter_type cntr) |
190 | 0 | { |
191 | 0 | atomic_count_inc64(v6 ? &ipf->n6frag_cnt[cntr] : &ipf->n4frag_cnt[cntr]); |
192 | 0 | } |
193 | | |
194 | | static bool |
195 | | ipf_get_v4_enabled(struct ipf *ipf) |
196 | 0 | { |
197 | 0 | bool ifp_v4_enabled_; |
198 | 0 | atomic_read_relaxed(&ipf->ifp_v4_enabled, &ifp_v4_enabled_); |
199 | 0 | return ifp_v4_enabled_; |
200 | 0 | } |
201 | | |
202 | | static bool |
203 | | ipf_get_v6_enabled(struct ipf *ipf) |
204 | 0 | { |
205 | 0 | bool ifp_v6_enabled_; |
206 | 0 | atomic_read_relaxed(&ipf->ifp_v6_enabled, &ifp_v6_enabled_); |
207 | 0 | return ifp_v6_enabled_; |
208 | 0 | } |
209 | | |
210 | | static bool |
211 | | ipf_get_enabled(struct ipf *ipf) |
212 | 0 | { |
213 | 0 | return ipf_get_v4_enabled(ipf) || ipf_get_v6_enabled(ipf); |
214 | 0 | } |
215 | | |
216 | | static uint32_t |
217 | | ipf_addr_hash_add(uint32_t hash, const union ipf_addr *addr) |
218 | 0 | { |
219 | 0 | BUILD_ASSERT_DECL(sizeof *addr % 4 == 0); |
220 | 0 | return hash_add_bytes32(hash, (const uint32_t *) addr, sizeof *addr); |
221 | 0 | } |
222 | | |
223 | | /* Adds a list of fragments to the list tracking expiry of yet to be |
224 | | * completed reassembled packets, hence subject to expirty. */ |
225 | | static void |
226 | | ipf_expiry_list_add(struct ovs_list *frag_exp_list, struct ipf_list *ipf_list, |
227 | | long long now) |
228 | | /* OVS_REQUIRES(ipf->ipf_lock) */ |
229 | 0 | { |
230 | 0 | enum { |
231 | 0 | IPF_FRAG_LIST_TIMEOUT = 15000, |
232 | 0 | }; |
233 | |
|
234 | 0 | ipf_list->expiration = now + IPF_FRAG_LIST_TIMEOUT; |
235 | 0 | ovs_list_push_back(frag_exp_list, &ipf_list->list_node); |
236 | 0 | } |
237 | | |
238 | | /* Adds a list of fragments to the list of completed packets, which will be |
239 | | * subsequently transmitted. */ |
240 | | static void |
241 | | ipf_completed_list_add(struct ovs_list *frag_complete_list, |
242 | | struct ipf_list *ipf_list) |
243 | | /* OVS_REQUIRES(ipf_lock) */ |
244 | 0 | { |
245 | 0 | ovs_list_push_back(frag_complete_list, &ipf_list->list_node); |
246 | 0 | } |
247 | | |
248 | | /* Adds a reassmebled packet to the list of reassembled packets, awaiting some |
249 | | * processing, such as being sent through conntrack. */ |
250 | | static void |
251 | | ipf_reassembled_list_add(struct ovs_list *reassembled_pkt_list, |
252 | | struct reassembled_pkt *rp) |
253 | | /* OVS_REQUIRES(ipf_lock) */ |
254 | 0 | { |
255 | 0 | ovs_list_push_back(reassembled_pkt_list, &rp->rp_list_node); |
256 | 0 | } |
257 | | |
258 | | /* Removed a frag list from tracking datastructures and frees list heap |
259 | | * memory. */ |
260 | | static void |
261 | | ipf_list_clean(struct hmap *frag_lists, |
262 | | struct ipf_list *ipf_list) |
263 | | /* OVS_REQUIRES(ipf_lock) */ |
264 | 0 | { |
265 | 0 | ovs_list_remove(&ipf_list->list_node); |
266 | 0 | hmap_remove(frag_lists, &ipf_list->node); |
267 | 0 | free(ipf_list->frag_list); |
268 | 0 | free(ipf_list); |
269 | 0 | } |
270 | | |
271 | | /* Removed a frag list sitting on the expiry list from tracking |
272 | | * datastructures and frees list heap memory. */ |
273 | | static void |
274 | | ipf_expiry_list_clean(struct hmap *frag_lists, |
275 | | struct ipf_list *ipf_list) |
276 | | /* OVS_REQUIRES(ipf_lock) */ |
277 | 0 | { |
278 | 0 | ipf_list_clean(frag_lists, ipf_list); |
279 | 0 | } |
280 | | |
281 | | /* Removed a frag list sitting on the completed list from tracking |
282 | | * datastructures and frees list heap memory. */ |
283 | | static void |
284 | | ipf_completed_list_clean(struct hmap *frag_lists, |
285 | | struct ipf_list *ipf_list) |
286 | | /* OVS_REQUIRES(ipf_lock) */ |
287 | 0 | { |
288 | 0 | ipf_list_clean(frag_lists, ipf_list); |
289 | 0 | } |
290 | | |
291 | | static void |
292 | | ipf_expiry_list_remove(struct ipf_list *ipf_list) |
293 | | /* OVS_REQUIRES(ipf_lock) */ |
294 | 0 | { |
295 | 0 | ovs_list_remove(&ipf_list->list_node); |
296 | 0 | } |
297 | | |
298 | | static void |
299 | | ipf_reassembled_list_remove(struct reassembled_pkt *rp) |
300 | | /* OVS_REQUIRES(ipf_lock) */ |
301 | 0 | { |
302 | 0 | ovs_list_remove(&rp->rp_list_node); |
303 | 0 | } |
304 | | |
305 | | /* Symmetric */ |
306 | | static uint32_t |
307 | | ipf_list_key_hash(const struct ipf_list_key *key, uint32_t basis) |
308 | 0 | { |
309 | 0 | uint32_t hsrc, hdst, hash; |
310 | 0 | hsrc = hdst = basis; |
311 | 0 | hsrc = ipf_addr_hash_add(hsrc, &key->src_addr); |
312 | 0 | hdst = ipf_addr_hash_add(hdst, &key->dst_addr); |
313 | 0 | hash = hsrc ^ hdst; |
314 | | |
315 | | /* Hash the rest of the key. */ |
316 | 0 | return hash_words((uint32_t *) (&key->dst_addr + 1), |
317 | 0 | (uint32_t *) (key + 1) - |
318 | 0 | (uint32_t *) (&key->dst_addr + 1), |
319 | 0 | hash); |
320 | 0 | } |
321 | | |
322 | | static bool |
323 | | ipf_is_first_v4_frag(const struct dp_packet *pkt) |
324 | 0 | { |
325 | 0 | const struct ip_header *l3 = dp_packet_l3(pkt); |
326 | 0 | if (!(l3->ip_frag_off & htons(IP_FRAG_OFF_MASK)) && |
327 | 0 | l3->ip_frag_off & htons(IP_MORE_FRAGMENTS)) { |
328 | 0 | return true; |
329 | 0 | } |
330 | 0 | return false; |
331 | 0 | } |
332 | | |
333 | | static bool |
334 | | ipf_is_last_v4_frag(const struct dp_packet *pkt) |
335 | 0 | { |
336 | 0 | const struct ip_header *l3 = dp_packet_l3(pkt); |
337 | 0 | if (l3->ip_frag_off & htons(IP_FRAG_OFF_MASK) && |
338 | 0 | !(l3->ip_frag_off & htons(IP_MORE_FRAGMENTS))) { |
339 | 0 | return true; |
340 | 0 | } |
341 | 0 | return false; |
342 | 0 | } |
343 | | |
344 | | static bool |
345 | | ipf_is_v6_frag(ovs_be16 ip6f_offlg) |
346 | 0 | { |
347 | 0 | if (ip6f_offlg & (IP6F_OFF_MASK | IP6F_MORE_FRAG)) { |
348 | 0 | return true; |
349 | 0 | } |
350 | 0 | return false; |
351 | 0 | } |
352 | | |
353 | | static bool |
354 | | ipf_is_first_v6_frag(ovs_be16 ip6f_offlg) |
355 | 0 | { |
356 | 0 | if (!(ip6f_offlg & IP6F_OFF_MASK) && |
357 | 0 | ip6f_offlg & IP6F_MORE_FRAG) { |
358 | 0 | return true; |
359 | 0 | } |
360 | 0 | return false; |
361 | 0 | } |
362 | | |
363 | | static bool |
364 | | ipf_is_last_v6_frag(ovs_be16 ip6f_offlg) |
365 | 0 | { |
366 | 0 | if ((ip6f_offlg & IP6F_OFF_MASK) && |
367 | 0 | !(ip6f_offlg & IP6F_MORE_FRAG)) { |
368 | 0 | return true; |
369 | 0 | } |
370 | 0 | return false; |
371 | 0 | } |
372 | | |
373 | | /* Checks for a completed packet collection of fragments. */ |
374 | | static bool |
375 | | ipf_list_complete(const struct ipf_list *ipf_list) |
376 | | /* OVS_REQUIRES(ipf_lock) */ |
377 | 0 | { |
378 | 0 | for (int i = 1; i <= ipf_list->last_inuse_idx; i++) { |
379 | 0 | if (ipf_list->frag_list[i - 1].end_data_byte + 1 |
380 | 0 | != ipf_list->frag_list[i].start_data_byte) { |
381 | 0 | return false; |
382 | 0 | } |
383 | 0 | } |
384 | 0 | return true; |
385 | 0 | } |
386 | | |
387 | | /* Runs O(n) for a sorted or almost sorted list. */ |
388 | | static void |
389 | | ipf_sort(struct ipf_frag *frag_list, size_t last_idx) |
390 | | /* OVS_REQUIRES(ipf_lock) */ |
391 | 0 | { |
392 | 0 | for (int li = 1; li <= last_idx; li++) { |
393 | 0 | struct ipf_frag ipf_frag = frag_list[li]; |
394 | 0 | int ci = li - 1; |
395 | 0 | while (ci >= 0 && |
396 | 0 | frag_list[ci].start_data_byte > ipf_frag.start_data_byte) { |
397 | 0 | frag_list[ci + 1] = frag_list[ci]; |
398 | 0 | ci--; |
399 | 0 | } |
400 | 0 | frag_list[ci + 1] = ipf_frag; |
401 | 0 | } |
402 | 0 | } |
403 | | |
404 | | /* Called on a sorted complete list of v4 fragments to reassemble them into |
405 | | * a single packet that can be processed, such as passing through conntrack. |
406 | | */ |
407 | | static struct dp_packet * |
408 | | ipf_reassemble_v4_frags(struct ipf_list *ipf_list) |
409 | | /* OVS_REQUIRES(ipf_lock) */ |
410 | 0 | { |
411 | 0 | struct ipf_frag *frag_list = ipf_list->frag_list; |
412 | 0 | struct dp_packet *pkt = dp_packet_clone(frag_list[0].pkt); |
413 | 0 | dp_packet_set_size(pkt, dp_packet_size(pkt) - dp_packet_l2_pad_size(pkt)); |
414 | 0 | struct ip_header *l3 = dp_packet_l3(pkt); |
415 | 0 | int len = ntohs(l3->ip_tot_len); |
416 | 0 | int orig_len = dp_packet_size(pkt); |
417 | |
|
418 | 0 | int rest_len = frag_list[ipf_list->last_inuse_idx].end_data_byte - |
419 | 0 | frag_list[1].start_data_byte + 1; |
420 | |
|
421 | 0 | if (orig_len + rest_len > IPV4_PACKET_MAX_SIZE) { |
422 | 0 | ipf_print_reass_packet( |
423 | 0 | "Unsupported big reassembled v4 packet; v4 hdr:", l3); |
424 | 0 | dp_packet_delete(pkt); |
425 | 0 | return NULL; |
426 | 0 | } |
427 | | |
428 | 0 | dp_packet_prealloc_tailroom(pkt, rest_len); |
429 | |
|
430 | 0 | for (int i = 1; i <= ipf_list->last_inuse_idx; i++) { |
431 | 0 | size_t add_len = frag_list[i].end_data_byte - |
432 | 0 | frag_list[i].start_data_byte + 1; |
433 | 0 | const char *l4 = dp_packet_l4(frag_list[i].pkt); |
434 | 0 | dp_packet_put(pkt, l4, add_len); |
435 | 0 | } |
436 | |
|
437 | 0 | len += rest_len; |
438 | 0 | l3 = dp_packet_l3(pkt); |
439 | 0 | ovs_be16 new_ip_frag_off = l3->ip_frag_off & ~htons(IP_MORE_FRAGMENTS); |
440 | 0 | if (dp_packet_ip_checksum_valid(pkt)) { |
441 | 0 | dp_packet_ip_checksum_set_partial(pkt); |
442 | 0 | } else { |
443 | 0 | l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_frag_off, |
444 | 0 | new_ip_frag_off); |
445 | 0 | l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_tot_len, htons(len)); |
446 | 0 | } |
447 | 0 | l3->ip_tot_len = htons(len); |
448 | 0 | l3->ip_frag_off = new_ip_frag_off; |
449 | 0 | dp_packet_set_l2_pad_size(pkt, 0); |
450 | |
|
451 | 0 | return pkt; |
452 | 0 | } |
453 | | |
454 | | /* Called on a sorted complete list of v6 fragments to reassemble them into |
455 | | * a single packet that can be processed, such as passing through conntrack. |
456 | | */ |
457 | | static struct dp_packet * |
458 | | ipf_reassemble_v6_frags(struct ipf_list *ipf_list) |
459 | | /* OVS_REQUIRES(ipf_lock) */ |
460 | 0 | { |
461 | 0 | struct ipf_frag *frag_list = ipf_list->frag_list; |
462 | 0 | struct dp_packet *pkt = dp_packet_clone(frag_list[0].pkt); |
463 | 0 | dp_packet_set_size(pkt, dp_packet_size(pkt) - dp_packet_l2_pad_size(pkt)); |
464 | 0 | struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt); |
465 | 0 | int pl = ntohs(l3->ip6_plen) - sizeof(struct ovs_16aligned_ip6_frag); |
466 | 0 | int orig_len = dp_packet_size(pkt); |
467 | |
|
468 | 0 | int rest_len = frag_list[ipf_list->last_inuse_idx].end_data_byte - |
469 | 0 | frag_list[1].start_data_byte + 1; |
470 | |
|
471 | 0 | if (orig_len + rest_len > IPV6_PACKET_MAX_DATA) { |
472 | 0 | ipf_print_reass_packet( |
473 | 0 | "Unsupported big reassembled v6 packet; v6 hdr:", l3); |
474 | 0 | dp_packet_delete(pkt); |
475 | 0 | return NULL; |
476 | 0 | } |
477 | | |
478 | 0 | dp_packet_prealloc_tailroom(pkt, rest_len); |
479 | |
|
480 | 0 | for (int i = 1; i <= ipf_list->last_inuse_idx; i++) { |
481 | 0 | size_t add_len = frag_list[i].end_data_byte - |
482 | 0 | frag_list[i].start_data_byte + 1; |
483 | 0 | const char *l4 = dp_packet_l4(frag_list[i].pkt); |
484 | 0 | dp_packet_put(pkt, l4, add_len); |
485 | 0 | } |
486 | |
|
487 | 0 | pl += rest_len; |
488 | 0 | l3 = dp_packet_l3(pkt); |
489 | |
|
490 | 0 | uint8_t nw_proto = l3->ip6_nxt; |
491 | 0 | uint8_t nw_frag = 0; |
492 | 0 | const void *data = l3 + 1; |
493 | 0 | size_t datasize = pl; |
494 | |
|
495 | 0 | const struct ovs_16aligned_ip6_frag *frag_hdr; |
496 | 0 | if (!parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, &frag_hdr, |
497 | 0 | NULL) || !nw_frag || !frag_hdr) { |
498 | |
|
499 | 0 | ipf_print_reass_packet("Unparsed reassembled v6 packet; v6 hdr:", l3); |
500 | 0 | dp_packet_delete(pkt); |
501 | 0 | return NULL; |
502 | 0 | } |
503 | | |
504 | 0 | struct ovs_16aligned_ip6_frag *fh = |
505 | 0 | CONST_CAST(struct ovs_16aligned_ip6_frag *, frag_hdr); |
506 | 0 | fh->ip6f_offlg = 0; |
507 | 0 | l3->ip6_plen = htons(pl); |
508 | 0 | l3->ip6_ctlun.ip6_un1.ip6_un1_nxt = nw_proto; |
509 | 0 | dp_packet_set_l2_pad_size(pkt, 0); |
510 | 0 | return pkt; |
511 | 0 | } |
512 | | |
513 | | /* Called when a frag list state transitions to another state. This is |
514 | | * triggered by new fragment for the list being received. Returns a reassembled |
515 | | * packet if this fragment has completed one. */ |
516 | | static struct reassembled_pkt * |
517 | | ipf_list_state_transition(struct ipf *ipf, struct ipf_list *ipf_list, |
518 | | bool ff, bool lf, bool v6) |
519 | | OVS_REQUIRES(ipf->ipf_lock) |
520 | 0 | { |
521 | 0 | enum ipf_list_state curr_state = ipf_list->state; |
522 | 0 | struct reassembled_pkt *ret = NULL; |
523 | 0 | enum ipf_list_state next_state; |
524 | 0 | switch (curr_state) { |
525 | 0 | case IPF_LIST_STATE_UNUSED: |
526 | 0 | case IPF_LIST_STATE_OTHER_SEEN: |
527 | 0 | if (ff) { |
528 | 0 | next_state = IPF_LIST_STATE_FIRST_SEEN; |
529 | 0 | } else if (lf) { |
530 | 0 | next_state = IPF_LIST_STATE_LAST_SEEN; |
531 | 0 | } else { |
532 | 0 | next_state = IPF_LIST_STATE_OTHER_SEEN; |
533 | 0 | } |
534 | 0 | break; |
535 | 0 | case IPF_LIST_STATE_FIRST_SEEN: |
536 | 0 | if (lf) { |
537 | 0 | next_state = IPF_LIST_STATE_FIRST_LAST_SEEN; |
538 | 0 | } else { |
539 | 0 | next_state = IPF_LIST_STATE_FIRST_SEEN; |
540 | 0 | } |
541 | 0 | break; |
542 | 0 | case IPF_LIST_STATE_LAST_SEEN: |
543 | 0 | if (ff) { |
544 | 0 | next_state = IPF_LIST_STATE_FIRST_LAST_SEEN; |
545 | 0 | } else { |
546 | 0 | next_state = IPF_LIST_STATE_LAST_SEEN; |
547 | 0 | } |
548 | 0 | break; |
549 | 0 | case IPF_LIST_STATE_FIRST_LAST_SEEN: |
550 | 0 | next_state = IPF_LIST_STATE_FIRST_LAST_SEEN; |
551 | 0 | break; |
552 | 0 | case IPF_LIST_STATE_COMPLETED: |
553 | 0 | case IPF_LIST_STATE_REASS_FAIL: |
554 | 0 | case IPF_LIST_STATE_NUM: |
555 | 0 | default: |
556 | 0 | OVS_NOT_REACHED(); |
557 | 0 | } |
558 | | |
559 | 0 | if (next_state == IPF_LIST_STATE_FIRST_LAST_SEEN) { |
560 | 0 | ipf_sort(ipf_list->frag_list, ipf_list->last_inuse_idx); |
561 | 0 | if (ipf_list_complete(ipf_list)) { |
562 | 0 | struct dp_packet *reass_pkt = v6 |
563 | 0 | ? ipf_reassemble_v6_frags(ipf_list) |
564 | 0 | : ipf_reassemble_v4_frags(ipf_list); |
565 | 0 | if (reass_pkt) { |
566 | 0 | struct reassembled_pkt *rp = xzalloc(sizeof *rp); |
567 | 0 | rp->pkt = reass_pkt; |
568 | 0 | rp->list = ipf_list; |
569 | 0 | ipf_reassembled_list_add(&ipf->reassembled_pkt_list, rp); |
570 | 0 | ipf_expiry_list_remove(ipf_list); |
571 | 0 | next_state = IPF_LIST_STATE_COMPLETED; |
572 | 0 | ret = rp; |
573 | 0 | } else { |
574 | 0 | next_state = IPF_LIST_STATE_REASS_FAIL; |
575 | 0 | } |
576 | 0 | } |
577 | 0 | } |
578 | 0 | ipf_list->state = next_state; |
579 | |
|
580 | 0 | return ret; |
581 | 0 | } |
582 | | |
583 | | /* Some sanity checks are redundant, but prudent, in case code paths for |
584 | | * fragments change in future. The processing cost for fragments is not |
585 | | * important. */ |
586 | | static bool |
587 | | ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet *pkt) |
588 | 0 | { |
589 | 0 | const struct eth_header *l2 = dp_packet_eth(pkt); |
590 | 0 | const struct ip_header *l3 = dp_packet_l3(pkt); |
591 | |
|
592 | 0 | if (OVS_UNLIKELY(!l2 || !l3)) { |
593 | 0 | goto invalid_pkt; |
594 | 0 | } |
595 | | |
596 | 0 | size_t l3_size = dp_packet_l3_size(pkt); |
597 | 0 | if (OVS_UNLIKELY(l3_size < IP_HEADER_LEN)) { |
598 | 0 | goto invalid_pkt; |
599 | 0 | } |
600 | | |
601 | 0 | if (!IP_IS_FRAGMENT(l3->ip_frag_off)) { |
602 | 0 | return false; |
603 | 0 | } |
604 | | |
605 | 0 | uint16_t ip_tot_len = ntohs(l3->ip_tot_len); |
606 | 0 | if (OVS_UNLIKELY(ip_tot_len != l3_size)) { |
607 | 0 | goto invalid_pkt; |
608 | 0 | } |
609 | | |
610 | 0 | size_t ip_hdr_len = IP_IHL(l3->ip_ihl_ver) * 4; |
611 | 0 | if (OVS_UNLIKELY(ip_hdr_len < IP_HEADER_LEN)) { |
612 | 0 | goto invalid_pkt; |
613 | 0 | } |
614 | 0 | if (OVS_UNLIKELY(l3_size < ip_hdr_len)) { |
615 | 0 | goto invalid_pkt; |
616 | 0 | } |
617 | | |
618 | 0 | bool bad_csum = dp_packet_ip_checksum_bad(pkt); |
619 | 0 | if (OVS_UNLIKELY(!bad_csum && dp_packet_ip_checksum_unknown(pkt))) { |
620 | 0 | COVERAGE_INC(ipf_l3csum_checked); |
621 | 0 | if (csum(l3, ip_hdr_len)) { |
622 | 0 | dp_packet_ip_checksum_set_bad(pkt); |
623 | 0 | bad_csum = true; |
624 | 0 | } else { |
625 | 0 | dp_packet_ip_checksum_set_good(pkt); |
626 | 0 | } |
627 | 0 | } |
628 | 0 | if (OVS_UNLIKELY(bad_csum)) { |
629 | 0 | COVERAGE_INC(ipf_l3csum_err); |
630 | 0 | goto invalid_pkt; |
631 | 0 | } |
632 | | |
633 | 0 | uint32_t min_v4_frag_size_; |
634 | 0 | atomic_read_relaxed(&ipf->min_v4_frag_size, &min_v4_frag_size_); |
635 | 0 | bool lf = ipf_is_last_v4_frag(pkt); |
636 | 0 | if (OVS_UNLIKELY(!lf && dp_packet_l3_size(pkt) < min_v4_frag_size_)) { |
637 | 0 | ipf_count(ipf, false, IPF_NFRAGS_TOO_SMALL); |
638 | 0 | goto invalid_pkt; |
639 | 0 | } |
640 | 0 | return true; |
641 | | |
642 | 0 | invalid_pkt: |
643 | 0 | pkt->md.ct_state = CS_INVALID; |
644 | 0 | return false; |
645 | 0 | } |
646 | | |
647 | | static bool |
648 | | ipf_v4_key_extract(struct dp_packet *pkt, ovs_be16 dl_type, uint16_t zone, |
649 | | struct ipf_list_key *key, uint16_t *start_data_byte, |
650 | | uint16_t *end_data_byte, bool *ff, bool *lf) |
651 | 0 | { |
652 | 0 | const struct ip_header *l3 = dp_packet_l3(pkt); |
653 | 0 | uint16_t ip_tot_len = ntohs(l3->ip_tot_len); |
654 | 0 | size_t ip_hdr_len = IP_IHL(l3->ip_ihl_ver) * 4; |
655 | |
|
656 | 0 | *start_data_byte = ntohs(l3->ip_frag_off & htons(IP_FRAG_OFF_MASK)) * 8; |
657 | 0 | *end_data_byte = *start_data_byte + ip_tot_len - ip_hdr_len - 1; |
658 | 0 | *ff = ipf_is_first_v4_frag(pkt); |
659 | 0 | *lf = ipf_is_last_v4_frag(pkt); |
660 | 0 | memset(key, 0, sizeof *key); |
661 | 0 | key->ip_id = be16_to_be32(l3->ip_id); |
662 | 0 | key->dl_type = dl_type; |
663 | 0 | key->src_addr.ipv4 = get_16aligned_be32(&l3->ip_src); |
664 | 0 | key->dst_addr.ipv4 = get_16aligned_be32(&l3->ip_dst); |
665 | 0 | key->nw_proto = l3->ip_proto; |
666 | 0 | key->zone = zone; |
667 | 0 | key->recirc_id = pkt->md.recirc_id; |
668 | 0 | return true; |
669 | 0 | } |
670 | | |
671 | | /* Some sanity checks are redundant, but prudent, in case code paths for |
672 | | * fragments change in future. The processing cost for fragments is not |
673 | | * important. */ |
674 | | static bool |
675 | | ipf_is_valid_v6_frag(struct ipf *ipf, struct dp_packet *pkt) |
676 | 0 | { |
677 | 0 | const struct eth_header *l2 = dp_packet_eth(pkt); |
678 | 0 | const struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt); |
679 | 0 | const char *l4 = dp_packet_l4(pkt); |
680 | |
|
681 | 0 | if (OVS_UNLIKELY(!l2 || !l3 || !l4)) { |
682 | 0 | goto invalid_pkt; |
683 | 0 | } |
684 | | |
685 | 0 | size_t l3_size = dp_packet_l3_size(pkt); |
686 | 0 | size_t l3_hdr_size = sizeof *l3; |
687 | |
|
688 | 0 | if (OVS_UNLIKELY(l3_size < l3_hdr_size)) { |
689 | 0 | goto invalid_pkt; |
690 | 0 | } |
691 | | |
692 | 0 | uint8_t nw_frag = 0; |
693 | 0 | uint8_t nw_proto = l3->ip6_nxt; |
694 | 0 | const void *data = l3 + 1; |
695 | 0 | size_t datasize = l3_size - l3_hdr_size; |
696 | 0 | const struct ovs_16aligned_ip6_frag *frag_hdr; |
697 | 0 | if (!parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, |
698 | 0 | &frag_hdr, NULL) || !nw_frag || !frag_hdr) { |
699 | 0 | return false; |
700 | 0 | } |
701 | | |
702 | 0 | int pl = ntohs(l3->ip6_plen); |
703 | 0 | if (OVS_UNLIKELY(pl + l3_hdr_size != l3_size)) { |
704 | 0 | goto invalid_pkt; |
705 | 0 | } |
706 | | |
707 | 0 | ovs_be16 ip6f_offlg = frag_hdr->ip6f_offlg; |
708 | 0 | if (OVS_UNLIKELY(!ipf_is_v6_frag(ip6f_offlg))) { |
709 | 0 | return false; |
710 | 0 | } |
711 | | |
712 | 0 | uint32_t min_v6_frag_size_; |
713 | 0 | atomic_read_relaxed(&ipf->min_v6_frag_size, &min_v6_frag_size_); |
714 | 0 | bool lf = ipf_is_last_v6_frag(ip6f_offlg); |
715 | |
|
716 | 0 | if (OVS_UNLIKELY(!lf && dp_packet_l3_size(pkt) < min_v6_frag_size_)) { |
717 | 0 | ipf_count(ipf, true, IPF_NFRAGS_TOO_SMALL); |
718 | 0 | goto invalid_pkt; |
719 | 0 | } |
720 | | |
721 | 0 | return true; |
722 | | |
723 | 0 | invalid_pkt: |
724 | 0 | pkt->md.ct_state = CS_INVALID; |
725 | 0 | return false; |
726 | |
|
727 | 0 | } |
728 | | |
729 | | static void |
730 | | ipf_v6_key_extract(struct dp_packet *pkt, ovs_be16 dl_type, uint16_t zone, |
731 | | struct ipf_list_key *key, uint16_t *start_data_byte, |
732 | | uint16_t *end_data_byte, bool *ff, bool *lf) |
733 | 0 | { |
734 | 0 | const struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt); |
735 | 0 | uint8_t nw_frag = 0; |
736 | 0 | uint8_t nw_proto = l3->ip6_nxt; |
737 | 0 | const void *data = l3 + 1; |
738 | 0 | size_t datasize = dp_packet_l3_size(pkt) - sizeof *l3; |
739 | 0 | const struct ovs_16aligned_ip6_frag *frag_hdr; |
740 | |
|
741 | 0 | parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, &frag_hdr, |
742 | 0 | NULL); |
743 | 0 | ovs_assert(nw_frag && frag_hdr); |
744 | 0 | ovs_be16 ip6f_offlg = frag_hdr->ip6f_offlg; |
745 | 0 | *start_data_byte = ntohs(ip6f_offlg & IP6F_OFF_MASK) + |
746 | 0 | sizeof (struct ovs_16aligned_ip6_frag); |
747 | 0 | *end_data_byte = *start_data_byte + dp_packet_l4_size(pkt) - 1; |
748 | 0 | *ff = ipf_is_first_v6_frag(ip6f_offlg); |
749 | 0 | *lf = ipf_is_last_v6_frag(ip6f_offlg); |
750 | 0 | memset(key, 0, sizeof *key); |
751 | 0 | key->ip_id = get_16aligned_be32(&frag_hdr->ip6f_ident); |
752 | 0 | key->dl_type = dl_type; |
753 | 0 | memcpy(&key->src_addr.ipv6, &l3->ip6_src, sizeof key->src_addr.ipv6); |
754 | | /* We are not supporting parsing of the routing header to use as the |
755 | | * dst address part of the key. */ |
756 | 0 | memcpy(&key->dst_addr.ipv6, &l3->ip6_dst, sizeof key->dst_addr.ipv6); |
757 | 0 | key->nw_proto = 0; /* Not used for key for V6. */ |
758 | 0 | key->zone = zone; |
759 | 0 | key->recirc_id = pkt->md.recirc_id; |
760 | 0 | } |
761 | | |
762 | | static bool |
763 | | ipf_list_key_eq(const struct ipf_list_key *key1, |
764 | | const struct ipf_list_key *key2) |
765 | | /* OVS_REQUIRES(ipf_lock) */ |
766 | 0 | { |
767 | 0 | if (!memcmp(&key1->src_addr, &key2->src_addr, sizeof key1->src_addr) && |
768 | 0 | !memcmp(&key1->dst_addr, &key2->dst_addr, sizeof key1->dst_addr) && |
769 | 0 | key1->dl_type == key2->dl_type && |
770 | 0 | key1->ip_id == key2->ip_id && |
771 | 0 | key1->zone == key2->zone && |
772 | 0 | key1->nw_proto == key2->nw_proto && |
773 | 0 | key1->recirc_id == key2->recirc_id) { |
774 | 0 | return true; |
775 | 0 | } |
776 | 0 | return false; |
777 | 0 | } |
778 | | |
779 | | static struct ipf_list * |
780 | | ipf_list_key_lookup(struct ipf *ipf, const struct ipf_list_key *key, |
781 | | uint32_t hash) |
782 | | OVS_REQUIRES(ipf->ipf_lock) |
783 | 0 | { |
784 | 0 | struct ipf_list *ipf_list; |
785 | 0 | HMAP_FOR_EACH_WITH_HASH (ipf_list, node, hash, &ipf->frag_lists) { |
786 | 0 | if (ipf_list_key_eq(&ipf_list->key, key)) { |
787 | 0 | return ipf_list; |
788 | 0 | } |
789 | 0 | } |
790 | 0 | return NULL; |
791 | 0 | } |
792 | | |
793 | | static bool |
794 | | ipf_is_frag_duped(const struct ipf_frag *frag_list, int last_inuse_idx, |
795 | | size_t start_data_byte, size_t end_data_byte) |
796 | | /* OVS_REQUIRES(ipf_lock) */ |
797 | 0 | { |
798 | 0 | for (int i = 0; i <= last_inuse_idx; i++) { |
799 | 0 | if ((start_data_byte >= frag_list[i].start_data_byte && |
800 | 0 | start_data_byte <= frag_list[i].end_data_byte) || |
801 | 0 | (end_data_byte >= frag_list[i].start_data_byte && |
802 | 0 | end_data_byte <= frag_list[i].end_data_byte)) { |
803 | 0 | return true; |
804 | 0 | } |
805 | 0 | } |
806 | 0 | return false; |
807 | 0 | } |
808 | | |
809 | | /* Adds a fragment to a list of fragments, if the fragment is not a |
810 | | * duplicate. If the fragment is a duplicate, that fragment is marked |
811 | | * invalid to avoid the work that conntrack would do to mark the fragment |
812 | | * as invalid, which it will in all cases. */ |
813 | | static bool |
814 | | ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list, |
815 | | struct dp_packet *pkt, uint16_t start_data_byte, |
816 | | uint16_t end_data_byte, bool ff, bool lf, bool v6, |
817 | | struct reassembled_pkt **rp) |
818 | | OVS_REQUIRES(ipf->ipf_lock) |
819 | 0 | { |
820 | 0 | bool duped_frag = ipf_is_frag_duped(ipf_list->frag_list, |
821 | 0 | ipf_list->last_inuse_idx, start_data_byte, end_data_byte); |
822 | 0 | int last_inuse_idx = ipf_list->last_inuse_idx; |
823 | |
|
824 | 0 | if (!duped_frag) { |
825 | 0 | if (last_inuse_idx < ipf_list->size - 1) { |
826 | | /* In the case of dpdk, it would be unfortunate if we had |
827 | | * to create a clone fragment outside the dpdk mp due to the |
828 | | * mempool size being too limited. We will otherwise need to |
829 | | * recommend not setting the mempool number of buffers too low |
830 | | * and also clamp the number of fragments. */ |
831 | 0 | struct ipf_frag *frag = &ipf_list->frag_list[last_inuse_idx + 1]; |
832 | 0 | frag->pkt = pkt; |
833 | 0 | frag->start_data_byte = start_data_byte; |
834 | 0 | frag->end_data_byte = end_data_byte; |
835 | 0 | ipf_list->last_inuse_idx++; |
836 | 0 | atomic_count_inc(&ipf->nfrag); |
837 | 0 | ipf_count(ipf, v6, IPF_NFRAGS_ACCEPTED); |
838 | 0 | *rp = ipf_list_state_transition(ipf, ipf_list, ff, lf, v6); |
839 | 0 | } else { |
840 | 0 | OVS_NOT_REACHED(); |
841 | 0 | } |
842 | 0 | } else { |
843 | 0 | ipf_count(ipf, v6, IPF_NFRAGS_OVERLAP); |
844 | 0 | pkt->md.ct_state = CS_INVALID; |
845 | 0 | return false; |
846 | 0 | } |
847 | 0 | return true; |
848 | 0 | } |
849 | | |
850 | | static void |
851 | | ipf_list_init(struct ipf_list *ipf_list, struct ipf_list_key *key, |
852 | | int max_frag_list_size) |
853 | 0 | { |
854 | 0 | ipf_list->key = *key; |
855 | 0 | ipf_list->last_inuse_idx = IPF_INVALID_IDX; |
856 | 0 | ipf_list->last_sent_idx = IPF_INVALID_IDX; |
857 | 0 | ipf_list->reass_execute_ctx = NULL; |
858 | 0 | ipf_list->state = IPF_LIST_STATE_UNUSED; |
859 | 0 | ipf_list->size = max_frag_list_size; |
860 | 0 | ipf_list->frag_list |
861 | 0 | = xzalloc(ipf_list->size * sizeof *ipf_list->frag_list); |
862 | 0 | } |
863 | | |
864 | | /* Generates a fragment list key from a well formed fragment and either starts |
865 | | * a new fragment list or increases the size of the existing fragment list, |
866 | | * while checking if the maximum supported fragements are supported or the |
867 | | * list size is impossibly big. Calls 'ipf_process_frag()' to add a fragment |
868 | | * to a list of fragemnts. */ |
869 | | static bool |
870 | | ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type, |
871 | | uint16_t zone, long long now, uint32_t hash_basis, |
872 | | struct reassembled_pkt **rp) |
873 | | OVS_REQUIRES(ipf->ipf_lock) |
874 | 0 | { |
875 | 0 | struct ipf_list_key key; |
876 | | /* Initialize 4 variables for some versions of GCC. */ |
877 | 0 | uint16_t start_data_byte = 0; |
878 | 0 | uint16_t end_data_byte = 0; |
879 | 0 | bool ff = false; |
880 | 0 | bool lf = false; |
881 | 0 | bool v6 = dl_type == htons(ETH_TYPE_IPV6); |
882 | |
|
883 | 0 | if (v6 && ipf_get_v6_enabled(ipf)) { |
884 | 0 | ipf_v6_key_extract(pkt, dl_type, zone, &key, &start_data_byte, |
885 | 0 | &end_data_byte, &ff, &lf); |
886 | 0 | } else if (!v6 && ipf_get_v4_enabled(ipf)) { |
887 | 0 | ipf_v4_key_extract(pkt, dl_type, zone, &key, &start_data_byte, |
888 | 0 | &end_data_byte, &ff, &lf); |
889 | 0 | } else { |
890 | 0 | return false; |
891 | 0 | } |
892 | | |
893 | 0 | unsigned int nfrag_max; |
894 | 0 | atomic_read_relaxed(&ipf->nfrag_max, &nfrag_max); |
895 | 0 | if (atomic_count_get(&ipf->nfrag) >= nfrag_max) { |
896 | 0 | return false; |
897 | 0 | } |
898 | | |
899 | 0 | uint32_t hash = ipf_list_key_hash(&key, hash_basis); |
900 | 0 | struct ipf_list *ipf_list = ipf_list_key_lookup(ipf, &key, hash); |
901 | 0 | enum { |
902 | 0 | IPF_FRAG_LIST_MIN_INCREMENT = 4, |
903 | 0 | IPF_IPV6_MAX_FRAG_LIST_SIZE = 65535, |
904 | 0 | }; |
905 | |
|
906 | 0 | int max_frag_list_size; |
907 | 0 | if (v6) { |
908 | | /* Because the calculation with extension headers is variable, |
909 | | * we don't calculate a hard maximum fragment list size upfront. The |
910 | | * fragment list size is practically limited by the code, however. */ |
911 | 0 | max_frag_list_size = IPF_IPV6_MAX_FRAG_LIST_SIZE; |
912 | 0 | } else { |
913 | 0 | max_frag_list_size = ipf->max_v4_frag_list_size; |
914 | 0 | } |
915 | |
|
916 | 0 | if (!ipf_list) { |
917 | 0 | ipf_list = xmalloc(sizeof *ipf_list); |
918 | 0 | ipf_list_init(ipf_list, &key, |
919 | 0 | MIN(max_frag_list_size, IPF_FRAG_LIST_MIN_INCREMENT)); |
920 | 0 | hmap_insert(&ipf->frag_lists, &ipf_list->node, hash); |
921 | 0 | ipf_expiry_list_add(&ipf->frag_exp_list, ipf_list, now); |
922 | 0 | } else if (ipf_list->state == IPF_LIST_STATE_REASS_FAIL || |
923 | 0 | ipf_list->state == IPF_LIST_STATE_COMPLETED) { |
924 | | /* Bail out as early as possible. */ |
925 | 0 | return false; |
926 | 0 | } else if (ipf_list->last_inuse_idx + 1 >= ipf_list->size) { |
927 | 0 | int increment = MIN(IPF_FRAG_LIST_MIN_INCREMENT, |
928 | 0 | max_frag_list_size - ipf_list->size); |
929 | | /* Enforce limit. */ |
930 | 0 | if (increment > 0) { |
931 | 0 | ipf_list->frag_list = |
932 | 0 | xrealloc(ipf_list->frag_list, (ipf_list->size + increment) * |
933 | 0 | sizeof *ipf_list->frag_list); |
934 | 0 | ipf_list->size += increment; |
935 | 0 | } else { |
936 | 0 | return false; |
937 | 0 | } |
938 | 0 | } |
939 | | |
940 | 0 | return ipf_process_frag(ipf, ipf_list, pkt, start_data_byte, |
941 | 0 | end_data_byte, ff, lf, v6, rp); |
942 | 0 | } |
943 | | |
944 | | /* Filters out fragments from a batch of fragments and adjust the batch. */ |
945 | | static void |
946 | | ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb, |
947 | | ovs_be16 dl_type, uint16_t zone, long long now, |
948 | | uint32_t hash_basis) |
949 | 0 | { |
950 | 0 | const size_t pb_cnt = dp_packet_batch_size(pb); |
951 | 0 | int pb_idx; /* Index in a packet batch. */ |
952 | 0 | struct dp_packet *pkt; |
953 | |
|
954 | 0 | DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) { |
955 | 0 | if (OVS_UNLIKELY((dl_type == htons(ETH_TYPE_IP) && |
956 | 0 | ipf_is_valid_v4_frag(ipf, pkt)) |
957 | 0 | || |
958 | 0 | (dl_type == htons(ETH_TYPE_IPV6) && |
959 | 0 | ipf_is_valid_v6_frag(ipf, pkt)))) { |
960 | 0 | struct reassembled_pkt *rp = NULL; |
961 | |
|
962 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
963 | 0 | if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis, |
964 | 0 | &rp)) { |
965 | 0 | dp_packet_batch_refill(pb, pkt, pb_idx); |
966 | 0 | } else { |
967 | 0 | if (rp && !dp_packet_batch_is_full(pb)) { |
968 | 0 | dp_packet_batch_refill(pb, rp->pkt, pb_idx); |
969 | 0 | rp->list->reass_execute_ctx = rp->pkt; |
970 | 0 | } |
971 | 0 | } |
972 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
973 | 0 | } else { |
974 | 0 | dp_packet_batch_refill(pb, pkt, pb_idx); |
975 | 0 | } |
976 | 0 | } |
977 | 0 | } |
978 | | |
979 | | /* In case of DPDK, a memory source check is done, as DPDK memory pool |
980 | | * management has trouble dealing with multiple source types. The |
981 | | * check_source paramater is used to indicate when this check is needed. */ |
982 | | static bool |
983 | | ipf_dp_packet_batch_add(struct dp_packet_batch *pb , struct dp_packet *pkt, |
984 | | bool check_source OVS_UNUSED) |
985 | 0 | { |
986 | | #ifdef DPDK_NETDEV |
987 | | if ((dp_packet_batch_is_full(pb)) || |
988 | | /* DPDK cannot handle multiple sources in a batch. */ |
989 | | (check_source && !dp_packet_batch_is_empty(pb) |
990 | | && pb->packets[0]->source != pkt->source)) { |
991 | | #else |
992 | 0 | if (dp_packet_batch_is_full(pb)) { |
993 | 0 | #endif |
994 | 0 | return false; |
995 | 0 | } |
996 | | |
997 | 0 | dp_packet_batch_add(pb, pkt); |
998 | 0 | return true; |
999 | 0 | } |
1000 | | |
1001 | | /* This would be used in rare cases where a list cannot be sent. One rare |
1002 | | * reason known right now is a mempool source check, which exists due to DPDK |
1003 | | * support, where packets are no longer being received on any port with a |
1004 | | * source matching the fragment. Another reason is a race where all |
1005 | | * conntrack rules are unconfigured when some fragments are yet to be |
1006 | | * flushed. |
1007 | | * |
1008 | | * Returns true if the list was purged. */ |
1009 | | static bool |
1010 | | ipf_purge_list_check(struct ipf *ipf, struct ipf_list *ipf_list, |
1011 | | long long now) |
1012 | | OVS_REQUIRES(ipf->ipf_lock) |
1013 | 0 | { |
1014 | 0 | enum { |
1015 | 0 | IPF_FRAG_LIST_PURGE_TIME_ADJ = 10000 |
1016 | 0 | }; |
1017 | |
|
1018 | 0 | if (now < ipf_list->expiration + IPF_FRAG_LIST_PURGE_TIME_ADJ) { |
1019 | 0 | return false; |
1020 | 0 | } |
1021 | | |
1022 | 0 | while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { |
1023 | 0 | struct dp_packet * pkt |
1024 | 0 | = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; |
1025 | 0 | dp_packet_delete(pkt); |
1026 | 0 | atomic_count_dec(&ipf->nfrag); |
1027 | 0 | COVERAGE_INC(ipf_stuck_frag_list_purged); |
1028 | 0 | ipf_count(ipf, ipf_list->key.dl_type == htons(ETH_TYPE_IPV6), |
1029 | 0 | IPF_NFRAGS_PURGED); |
1030 | 0 | ipf_list->last_sent_idx++; |
1031 | 0 | } |
1032 | |
|
1033 | 0 | return true; |
1034 | 0 | } |
1035 | | |
1036 | | /* Does the packet batch management and common accounting work associated |
1037 | | * with 'ipf_send_completed_frags()' and 'ipf_send_expired_frags()'. */ |
1038 | | static bool |
1039 | | ipf_send_frags_in_list(struct ipf *ipf, struct ipf_list *ipf_list, |
1040 | | struct dp_packet_batch *pb, bool v6, long long now) |
1041 | | OVS_REQUIRES(ipf->ipf_lock) |
1042 | 0 | { |
1043 | 0 | if (ipf_purge_list_check(ipf, ipf_list, now)) { |
1044 | 0 | return true; |
1045 | 0 | } |
1046 | | |
1047 | 0 | while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { |
1048 | 0 | struct dp_packet *pkt |
1049 | 0 | = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; |
1050 | 0 | if (ipf_dp_packet_batch_add(pb, pkt, true)) { |
1051 | 0 | ipf_list->last_sent_idx++; |
1052 | 0 | atomic_count_dec(&ipf->nfrag); |
1053 | |
|
1054 | 0 | ipf_count(ipf, v6, IPF_NFRAGS_COMPL_SENT); |
1055 | |
|
1056 | 0 | if (ipf_list->last_sent_idx == ipf_list->last_inuse_idx) { |
1057 | 0 | return true; |
1058 | 0 | } |
1059 | 0 | } else { |
1060 | 0 | return false; |
1061 | 0 | } |
1062 | 0 | } |
1063 | 0 | OVS_NOT_REACHED(); |
1064 | 0 | } |
1065 | | |
1066 | | /* Adds fragments associated with a completed fragment list to a packet batch |
1067 | | * to be processed by the calling application, typically conntrack. Also |
1068 | | * cleans up the list context when it is empty.*/ |
1069 | | static void |
1070 | | ipf_send_completed_frags(struct ipf *ipf, struct dp_packet_batch *pb, |
1071 | | long long now, bool v6, uint16_t zone, |
1072 | | odp_port_t in_port) |
1073 | 0 | { |
1074 | 0 | if (ovs_list_is_empty(&ipf->frag_complete_list)) { |
1075 | 0 | return; |
1076 | 0 | } |
1077 | | |
1078 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1079 | 0 | struct ipf_list *ipf_list; |
1080 | |
|
1081 | 0 | LIST_FOR_EACH_SAFE (ipf_list, list_node, &ipf->frag_complete_list) { |
1082 | |
|
1083 | 0 | if ((ipf_list->key.dl_type == htons(ETH_TYPE_IPV6)) != v6) { |
1084 | 0 | continue; |
1085 | 0 | } |
1086 | 0 | if (ipf_list->key.zone != zone) { |
1087 | 0 | continue; |
1088 | 0 | } |
1089 | | |
1090 | | /* Check that the batch's in_port matches. */ |
1091 | 0 | struct dp_packet *pkt |
1092 | 0 | = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; |
1093 | 0 | if (in_port != pkt->md.in_port.odp_port) { |
1094 | 0 | continue; |
1095 | 0 | } |
1096 | | |
1097 | 0 | if (ipf_send_frags_in_list(ipf, ipf_list, pb, v6, now)) { |
1098 | 0 | ipf_completed_list_clean(&ipf->frag_lists, ipf_list); |
1099 | 0 | } else { |
1100 | 0 | break; |
1101 | 0 | } |
1102 | 0 | } |
1103 | |
|
1104 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1105 | 0 | } |
1106 | | |
1107 | | /* Remove expired fragment lists and clean up the list context. */ |
1108 | | static void |
1109 | | ipf_delete_expired_frags(struct ipf *ipf, long long now) |
1110 | 0 | { |
1111 | 0 | enum { |
1112 | | /* Very conservative, due to DOS probability. */ |
1113 | 0 | IPF_FRAG_LIST_MAX_EXPIRED = 1, |
1114 | 0 | }; |
1115 | | |
1116 | |
|
1117 | 0 | if (ovs_list_is_empty(&ipf->frag_exp_list)) { |
1118 | 0 | return; |
1119 | 0 | } |
1120 | | |
1121 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1122 | 0 | struct ipf_list *ipf_list; |
1123 | 0 | size_t lists_removed = 0; |
1124 | |
|
1125 | 0 | LIST_FOR_EACH_SAFE (ipf_list, list_node, &ipf->frag_exp_list) { |
1126 | 0 | if (now <= ipf_list->expiration || |
1127 | 0 | lists_removed >= IPF_FRAG_LIST_MAX_EXPIRED) { |
1128 | 0 | break; |
1129 | 0 | } |
1130 | | |
1131 | 0 | while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { |
1132 | 0 | struct dp_packet * pkt |
1133 | 0 | = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; |
1134 | 0 | dp_packet_delete(pkt); |
1135 | 0 | atomic_count_dec(&ipf->nfrag); |
1136 | 0 | COVERAGE_INC(ipf_stuck_frag_list_expired); |
1137 | 0 | ipf_count(ipf, ipf_list->key.dl_type == htons(ETH_TYPE_IPV6), |
1138 | 0 | IPF_NFRAGS_EXPIRED); |
1139 | 0 | ipf_list->last_sent_idx++; |
1140 | 0 | } |
1141 | 0 | ipf_expiry_list_clean(&ipf->frag_lists, ipf_list); |
1142 | 0 | lists_removed++; |
1143 | 0 | } |
1144 | |
|
1145 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1146 | 0 | } |
1147 | | |
1148 | | /* Adds a reassmebled packet to a packet batch to be processed by the caller. |
1149 | | */ |
1150 | | static void |
1151 | | ipf_execute_reass_pkts(struct ipf *ipf, struct dp_packet_batch *pb, |
1152 | | ovs_be16 dl_type) |
1153 | 0 | { |
1154 | 0 | if (ovs_list_is_empty(&ipf->reassembled_pkt_list)) { |
1155 | 0 | return; |
1156 | 0 | } |
1157 | | |
1158 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1159 | 0 | struct reassembled_pkt *rp; |
1160 | |
|
1161 | 0 | LIST_FOR_EACH_SAFE (rp, rp_list_node, &ipf->reassembled_pkt_list) { |
1162 | 0 | if (!rp->list->reass_execute_ctx && |
1163 | 0 | rp->list->key.dl_type == dl_type && |
1164 | 0 | ipf_dp_packet_batch_add(pb, rp->pkt, false)) { |
1165 | 0 | rp->list->reass_execute_ctx = rp->pkt; |
1166 | 0 | } |
1167 | 0 | } |
1168 | |
|
1169 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1170 | 0 | } |
1171 | | |
1172 | | /* Checks for reassembled packets post processing by conntrack and edits the |
1173 | | * fragments if needed based on what conntrack decided. */ |
1174 | | static void |
1175 | | ipf_post_execute_reass_pkts(struct ipf *ipf, |
1176 | | struct dp_packet_batch *pb, bool v6) |
1177 | 0 | { |
1178 | 0 | if (ovs_list_is_empty(&ipf->reassembled_pkt_list)) { |
1179 | 0 | return; |
1180 | 0 | } |
1181 | | |
1182 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1183 | 0 | struct reassembled_pkt *rp; |
1184 | |
|
1185 | 0 | LIST_FOR_EACH_SAFE (rp, rp_list_node, &ipf->reassembled_pkt_list) { |
1186 | 0 | const size_t pb_cnt = dp_packet_batch_size(pb); |
1187 | 0 | int pb_idx; |
1188 | 0 | struct dp_packet *pkt; |
1189 | | /* Inner batch loop is constant time since batch size is <= |
1190 | | * NETDEV_MAX_BURST. */ |
1191 | 0 | DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) { |
1192 | 0 | if (rp && pkt == rp->list->reass_execute_ctx) { |
1193 | 0 | const struct ipf_frag *frag_0 = &rp->list->frag_list[0]; |
1194 | 0 | void *l4_frag = dp_packet_l4(frag_0->pkt); |
1195 | 0 | void *l4_reass = dp_packet_l4(pkt); |
1196 | 0 | memcpy(l4_frag, l4_reass, dp_packet_l4_size(frag_0->pkt)); |
1197 | |
|
1198 | 0 | for (int i = 0; i <= rp->list->last_inuse_idx; i++) { |
1199 | 0 | const struct ipf_frag *frag_i = &rp->list->frag_list[i]; |
1200 | |
|
1201 | 0 | frag_i->pkt->md.ct_label = pkt->md.ct_label; |
1202 | 0 | frag_i->pkt->md.ct_mark = pkt->md.ct_mark; |
1203 | 0 | frag_i->pkt->md.ct_state = pkt->md.ct_state; |
1204 | 0 | frag_i->pkt->md.ct_zone = pkt->md.ct_zone; |
1205 | 0 | frag_i->pkt->md.ct_orig_tuple_ipv6 = |
1206 | 0 | pkt->md.ct_orig_tuple_ipv6; |
1207 | 0 | if (pkt->md.ct_orig_tuple_ipv6) { |
1208 | 0 | frag_i->pkt->md.ct_orig_tuple.ipv6 = |
1209 | 0 | pkt->md.ct_orig_tuple.ipv6; |
1210 | 0 | } else { |
1211 | 0 | frag_i->pkt->md.ct_orig_tuple.ipv4 = |
1212 | 0 | pkt->md.ct_orig_tuple.ipv4; |
1213 | 0 | } |
1214 | 0 | if (v6) { |
1215 | 0 | struct ovs_16aligned_ip6_hdr *l3_frag |
1216 | 0 | = dp_packet_l3(frag_i->pkt); |
1217 | 0 | struct ovs_16aligned_ip6_hdr *l3_reass |
1218 | 0 | = dp_packet_l3(pkt); |
1219 | 0 | l3_frag->ip6_src = l3_reass->ip6_src; |
1220 | 0 | l3_frag->ip6_dst = l3_reass->ip6_dst; |
1221 | 0 | } else { |
1222 | 0 | struct ip_header *l3_frag = dp_packet_l3(frag_i->pkt); |
1223 | 0 | struct ip_header *l3_reass = dp_packet_l3(pkt); |
1224 | 0 | if (dp_packet_ip_checksum_valid(frag_i->pkt)) { |
1225 | 0 | dp_packet_ip_checksum_set_partial(frag_i->pkt); |
1226 | 0 | } else { |
1227 | 0 | ovs_be32 reass_ip = |
1228 | 0 | get_16aligned_be32(&l3_reass->ip_src); |
1229 | 0 | ovs_be32 frag_ip = |
1230 | 0 | get_16aligned_be32(&l3_frag->ip_src); |
1231 | |
|
1232 | 0 | l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, |
1233 | 0 | frag_ip, |
1234 | 0 | reass_ip); |
1235 | 0 | reass_ip = get_16aligned_be32(&l3_reass->ip_dst); |
1236 | 0 | frag_ip = get_16aligned_be32(&l3_frag->ip_dst); |
1237 | 0 | l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, |
1238 | 0 | frag_ip, |
1239 | 0 | reass_ip); |
1240 | 0 | } |
1241 | |
|
1242 | 0 | l3_frag->ip_src = l3_reass->ip_src; |
1243 | 0 | l3_frag->ip_dst = l3_reass->ip_dst; |
1244 | 0 | } |
1245 | 0 | } |
1246 | |
|
1247 | 0 | ipf_completed_list_add(&ipf->frag_complete_list, rp->list); |
1248 | 0 | ipf_reassembled_list_remove(rp); |
1249 | 0 | dp_packet_delete(rp->pkt); |
1250 | 0 | free(rp); |
1251 | 0 | rp = NULL; |
1252 | 0 | } else { |
1253 | 0 | dp_packet_batch_refill(pb, pkt, pb_idx); |
1254 | 0 | } |
1255 | 0 | } |
1256 | 0 | } |
1257 | |
|
1258 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1259 | 0 | } |
1260 | | |
1261 | | /* Extracts any fragments from the batch and reassembles them when a |
1262 | | * complete packet is received. Completed packets are attempted to |
1263 | | * be added to the batch to be sent through conntrack. */ |
1264 | | void |
1265 | | ipf_preprocess_conntrack(struct ipf *ipf, struct dp_packet_batch *pb, |
1266 | | long long now, ovs_be16 dl_type, uint16_t zone, |
1267 | | uint32_t hash_basis) |
1268 | 0 | { |
1269 | 0 | if (ipf_get_enabled(ipf)) { |
1270 | 0 | ipf_extract_frags_from_batch(ipf, pb, dl_type, zone, now, hash_basis); |
1271 | 0 | } |
1272 | |
|
1273 | 0 | if (ipf_get_enabled(ipf) || atomic_count_get(&ipf->nfrag)) { |
1274 | 0 | ipf_execute_reass_pkts(ipf, pb, dl_type); |
1275 | 0 | } |
1276 | 0 | } |
1277 | | |
1278 | | /* Updates fragments based on the processing of the reassembled packet sent |
1279 | | * through conntrack and adds these fragments to any batches seen. Expired |
1280 | | * fragments are marked as invalid and also added to the batches seen |
1281 | | * with low priority. Reassembled packets are freed. */ |
1282 | | void |
1283 | | ipf_postprocess_conntrack(struct ipf *ipf, struct dp_packet_batch *pb, |
1284 | | long long now, ovs_be16 dl_type, uint16_t zone, |
1285 | | odp_port_t in_port) |
1286 | 0 | { |
1287 | 0 | if (ipf_get_enabled(ipf) || atomic_count_get(&ipf->nfrag)) { |
1288 | 0 | bool v6 = dl_type == htons(ETH_TYPE_IPV6); |
1289 | 0 | ipf_post_execute_reass_pkts(ipf, pb, v6); |
1290 | 0 | ipf_send_completed_frags(ipf, pb, now, v6, zone, in_port); |
1291 | 0 | ipf_delete_expired_frags(ipf, now); |
1292 | 0 | } |
1293 | 0 | } |
1294 | | |
1295 | | static void * |
1296 | | ipf_clean_thread_main(void *f) |
1297 | 0 | { |
1298 | 0 | struct ipf *ipf = f; |
1299 | |
|
1300 | 0 | enum { |
1301 | 0 | IPF_FRAG_LIST_CLEAN_TIMEOUT = 60000, |
1302 | 0 | }; |
1303 | |
|
1304 | 0 | while (!latch_is_set(&ipf->ipf_clean_thread_exit)) { |
1305 | |
|
1306 | 0 | long long now = time_msec(); |
1307 | |
|
1308 | 0 | if (!ovs_list_is_empty(&ipf->frag_exp_list) || |
1309 | 0 | !ovs_list_is_empty(&ipf->frag_complete_list)) { |
1310 | |
|
1311 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1312 | |
|
1313 | 0 | struct ipf_list *ipf_list; |
1314 | 0 | LIST_FOR_EACH_SAFE (ipf_list, list_node, |
1315 | 0 | &ipf->frag_exp_list) { |
1316 | 0 | if (ipf_purge_list_check(ipf, ipf_list, now)) { |
1317 | 0 | ipf_expiry_list_clean(&ipf->frag_lists, ipf_list); |
1318 | 0 | } |
1319 | 0 | } |
1320 | |
|
1321 | 0 | LIST_FOR_EACH_SAFE (ipf_list, list_node, |
1322 | 0 | &ipf->frag_complete_list) { |
1323 | 0 | if (ipf_purge_list_check(ipf, ipf_list, now)) { |
1324 | 0 | ipf_completed_list_clean(&ipf->frag_lists, ipf_list); |
1325 | 0 | } |
1326 | 0 | } |
1327 | |
|
1328 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1329 | 0 | } |
1330 | |
|
1331 | 0 | poll_timer_wait_until(now + IPF_FRAG_LIST_CLEAN_TIMEOUT); |
1332 | 0 | latch_wait(&ipf->ipf_clean_thread_exit); |
1333 | 0 | poll_block(); |
1334 | 0 | } |
1335 | |
|
1336 | 0 | return NULL; |
1337 | 0 | } |
1338 | | |
1339 | | struct ipf * |
1340 | | ipf_init(void) |
1341 | 0 | { |
1342 | 0 | struct ipf *ipf = xzalloc(sizeof *ipf); |
1343 | |
|
1344 | 0 | ovs_mutex_init_adaptive(&ipf->ipf_lock); |
1345 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1346 | 0 | hmap_init(&ipf->frag_lists); |
1347 | 0 | ovs_list_init(&ipf->frag_exp_list); |
1348 | 0 | ovs_list_init(&ipf->frag_complete_list); |
1349 | 0 | ovs_list_init(&ipf->reassembled_pkt_list); |
1350 | 0 | atomic_init(&ipf->min_v4_frag_size, IPF_V4_FRAG_SIZE_MIN_DEF); |
1351 | 0 | atomic_init(&ipf->min_v6_frag_size, IPF_V6_FRAG_SIZE_MIN_DEF); |
1352 | 0 | ipf->max_v4_frag_list_size = DIV_ROUND_UP( |
1353 | 0 | IPV4_PACKET_MAX_SIZE - IPV4_PACKET_MAX_HDR_SIZE, |
1354 | 0 | ipf->min_v4_frag_size - IPV4_PACKET_MAX_HDR_SIZE); |
1355 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1356 | 0 | atomic_count_init(&ipf->nfrag, 0); |
1357 | 0 | for (size_t i = 0; i < IPF_NFRAGS_NUM_CNTS; i++) { |
1358 | 0 | atomic_init(&ipf->n4frag_cnt[i], 0); |
1359 | 0 | atomic_init(&ipf->n6frag_cnt[i], 0); |
1360 | 0 | } |
1361 | 0 | atomic_init(&ipf->nfrag_max, IPF_MAX_FRAGS_DEFAULT); |
1362 | 0 | atomic_init(&ipf->ifp_v4_enabled, true); |
1363 | 0 | atomic_init(&ipf->ifp_v6_enabled, true); |
1364 | 0 | latch_init(&ipf->ipf_clean_thread_exit); |
1365 | 0 | ipf->ipf_clean_thread = ovs_thread_create("ipf_clean", |
1366 | 0 | ipf_clean_thread_main, ipf); |
1367 | |
|
1368 | 0 | return ipf; |
1369 | 0 | } |
1370 | | |
1371 | | void |
1372 | | ipf_destroy(struct ipf *ipf) |
1373 | 0 | { |
1374 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1375 | 0 | latch_set(&ipf->ipf_clean_thread_exit); |
1376 | 0 | pthread_join(ipf->ipf_clean_thread, NULL); |
1377 | 0 | latch_destroy(&ipf->ipf_clean_thread_exit); |
1378 | |
|
1379 | 0 | struct ipf_list *ipf_list; |
1380 | 0 | HMAP_FOR_EACH_POP (ipf_list, node, &ipf->frag_lists) { |
1381 | 0 | while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { |
1382 | 0 | struct dp_packet *pkt |
1383 | 0 | = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; |
1384 | 0 | dp_packet_delete(pkt); |
1385 | 0 | atomic_count_dec(&ipf->nfrag); |
1386 | 0 | ipf_list->last_sent_idx++; |
1387 | 0 | } |
1388 | 0 | free(ipf_list->frag_list); |
1389 | 0 | free(ipf_list); |
1390 | 0 | } |
1391 | |
|
1392 | 0 | if (atomic_count_get(&ipf->nfrag)) { |
1393 | 0 | VLOG_WARN("ipf destroy with non-zero fragment count. "); |
1394 | 0 | } |
1395 | |
|
1396 | 0 | struct reassembled_pkt *rp; |
1397 | 0 | LIST_FOR_EACH_POP (rp, rp_list_node, &ipf->reassembled_pkt_list) { |
1398 | 0 | dp_packet_delete(rp->pkt); |
1399 | 0 | free(rp); |
1400 | 0 | } |
1401 | |
|
1402 | 0 | hmap_destroy(&ipf->frag_lists); |
1403 | 0 | ovs_list_poison(&ipf->frag_exp_list); |
1404 | 0 | ovs_list_poison(&ipf->frag_complete_list); |
1405 | 0 | ovs_list_poison(&ipf->reassembled_pkt_list); |
1406 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1407 | 0 | ovs_mutex_destroy(&ipf->ipf_lock); |
1408 | 0 | free(ipf); |
1409 | 0 | } |
1410 | | |
1411 | | int |
1412 | | ipf_set_enabled(struct ipf *ipf, bool v6, bool enable) |
1413 | 0 | { |
1414 | 0 | atomic_store_relaxed(v6 ? &ipf->ifp_v6_enabled : &ipf->ifp_v4_enabled, |
1415 | 0 | enable); |
1416 | 0 | return 0; |
1417 | 0 | } |
1418 | | |
1419 | | int |
1420 | | ipf_set_min_frag(struct ipf *ipf, bool v6, uint32_t value) |
1421 | 0 | { |
1422 | | /* If the user specifies an unreasonably large number, fragmentation |
1423 | | * will not work well but it will not blow up. */ |
1424 | 0 | if (value < (v6 ? IPF_V6_FRAG_SIZE_LBOUND : IPF_V4_FRAG_SIZE_LBOUND)) { |
1425 | 0 | return 1; |
1426 | 0 | } |
1427 | | |
1428 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1429 | 0 | if (v6) { |
1430 | 0 | atomic_store_relaxed(&ipf->min_v6_frag_size, value); |
1431 | 0 | } else { |
1432 | 0 | atomic_store_relaxed(&ipf->min_v4_frag_size, value); |
1433 | 0 | ipf->max_v4_frag_list_size = DIV_ROUND_UP( |
1434 | 0 | IPV4_PACKET_MAX_SIZE - IPV4_PACKET_MAX_HDR_SIZE, |
1435 | 0 | ipf->min_v4_frag_size - IPV4_PACKET_MAX_HDR_SIZE); |
1436 | 0 | } |
1437 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1438 | 0 | return 0; |
1439 | 0 | } |
1440 | | |
1441 | | int |
1442 | | ipf_set_max_nfrags(struct ipf *ipf, uint32_t value) |
1443 | 0 | { |
1444 | 0 | if (value > IPF_NFRAG_UBOUND) { |
1445 | 0 | return 1; |
1446 | 0 | } |
1447 | 0 | atomic_store_relaxed(&ipf->nfrag_max, value); |
1448 | 0 | return 0; |
1449 | 0 | } |
1450 | | |
1451 | | int |
1452 | | ipf_get_status(struct ipf *ipf, struct ipf_status *ipf_status) |
1453 | 0 | { |
1454 | 0 | ipf_status->nfrag = atomic_count_get(&ipf->nfrag); |
1455 | 0 | atomic_read_relaxed(&ipf->nfrag_max, &ipf_status->nfrag_max); |
1456 | |
|
1457 | 0 | atomic_read_relaxed(&ipf->ifp_v4_enabled, &ipf_status->v4.enabled); |
1458 | 0 | atomic_read_relaxed(&ipf->min_v4_frag_size, |
1459 | 0 | &ipf_status->v4.min_frag_size); |
1460 | 0 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_ACCEPTED], |
1461 | 0 | &ipf_status->v4.nfrag_accepted); |
1462 | 0 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_COMPL_SENT], |
1463 | 0 | &ipf_status->v4.nfrag_completed_sent); |
1464 | 0 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_EXPIRED], |
1465 | 0 | &ipf_status->v4.nfrag_expired_sent); |
1466 | 0 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_TOO_SMALL], |
1467 | 0 | &ipf_status->v4.nfrag_too_small); |
1468 | 0 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_OVERLAP], |
1469 | 0 | &ipf_status->v4.nfrag_overlap); |
1470 | 0 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_PURGED], |
1471 | 0 | &ipf_status->v4.nfrag_purged); |
1472 | |
|
1473 | 0 | atomic_read_relaxed(&ipf->ifp_v6_enabled, &ipf_status->v6.enabled); |
1474 | 0 | atomic_read_relaxed(&ipf->min_v6_frag_size, |
1475 | 0 | &ipf_status->v6.min_frag_size); |
1476 | 0 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_ACCEPTED], |
1477 | 0 | &ipf_status->v6.nfrag_accepted); |
1478 | 0 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_COMPL_SENT], |
1479 | 0 | &ipf_status->v6.nfrag_completed_sent); |
1480 | 0 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_EXPIRED], |
1481 | 0 | &ipf_status->v6.nfrag_expired_sent); |
1482 | 0 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_TOO_SMALL], |
1483 | 0 | &ipf_status->v6.nfrag_too_small); |
1484 | 0 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_OVERLAP], |
1485 | 0 | &ipf_status->v6.nfrag_overlap); |
1486 | 0 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_PURGED], |
1487 | 0 | &ipf_status->v6.nfrag_purged); |
1488 | 0 | return 0; |
1489 | 0 | } |
1490 | | |
1491 | | struct ipf_dump_ctx { |
1492 | | struct hmap_position bucket_pos; |
1493 | | }; |
1494 | | |
1495 | | /* Allocates an 'ipf_dump_ctx' to keep track of an hmap position. The |
1496 | | * caller must call ipf_dump_done() when dumping is finished. */ |
1497 | | int |
1498 | | ipf_dump_start(struct ipf_dump_ctx **ipf_dump_ctx) |
1499 | 0 | { |
1500 | 0 | *ipf_dump_ctx = xzalloc(sizeof **ipf_dump_ctx); |
1501 | 0 | return 0; |
1502 | 0 | } |
1503 | | |
1504 | | /* Creates a string representation of the state of an 'ipf_list' and puts |
1505 | | * it in 'ds'. */ |
1506 | | static void |
1507 | | ipf_dump_create(const struct ipf_list *ipf_list, struct ds *ds) |
1508 | 0 | { |
1509 | 0 | ds_put_cstr(ds, "("); |
1510 | 0 | if (ipf_list->key.dl_type == htons(ETH_TYPE_IP)) { |
1511 | 0 | ds_put_format(ds, "src="IP_FMT",dst="IP_FMT",", |
1512 | 0 | IP_ARGS(ipf_list->key.src_addr.ipv4), |
1513 | 0 | IP_ARGS(ipf_list->key.dst_addr.ipv4)); |
1514 | 0 | } else { |
1515 | 0 | ds_put_cstr(ds, "src="); |
1516 | 0 | ipv6_format_addr(&ipf_list->key.src_addr.ipv6, ds); |
1517 | 0 | ds_put_cstr(ds, ",dst="); |
1518 | 0 | ipv6_format_addr(&ipf_list->key.dst_addr.ipv6, ds); |
1519 | 0 | ds_put_cstr(ds, ","); |
1520 | 0 | } |
1521 | |
|
1522 | 0 | ds_put_format(ds, "recirc_id=%u,ip_id=%u,dl_type=0x%x,zone=%u,nw_proto=%u", |
1523 | 0 | ipf_list->key.recirc_id, ntohl(ipf_list->key.ip_id), |
1524 | 0 | ntohs(ipf_list->key.dl_type), ipf_list->key.zone, |
1525 | 0 | ipf_list->key.nw_proto); |
1526 | |
|
1527 | 0 | ds_put_format(ds, ",num_fragments=%u,state=%s", |
1528 | 0 | ipf_list->last_inuse_idx + 1, |
1529 | 0 | ipf_state_name[ipf_list->state]); |
1530 | |
|
1531 | 0 | ds_put_cstr(ds, ")"); |
1532 | 0 | } |
1533 | | |
1534 | | /* Finds the next ipf list starting from 'ipf_dump_ctx->bucket_pos' and uses |
1535 | | * ipf_dump_create() to create a string representation of the state of an |
1536 | | * ipf list, to which 'dump' is pointed to. Returns EOF when there are no |
1537 | | * more ipf lists. */ |
1538 | | int |
1539 | | ipf_dump_next(struct ipf *ipf, struct ipf_dump_ctx *ipf_dump_ctx, char **dump) |
1540 | 0 | { |
1541 | 0 | ovs_mutex_lock(&ipf->ipf_lock); |
1542 | |
|
1543 | 0 | struct hmap_node *node = hmap_at_position(&ipf->frag_lists, |
1544 | 0 | &ipf_dump_ctx->bucket_pos); |
1545 | 0 | if (!node) { |
1546 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1547 | 0 | return EOF; |
1548 | 0 | } else { |
1549 | 0 | struct ipf_list *ipf_list_; |
1550 | 0 | INIT_CONTAINER(ipf_list_, node, node); |
1551 | 0 | struct ipf_list ipf_list = *ipf_list_; |
1552 | 0 | ovs_mutex_unlock(&ipf->ipf_lock); |
1553 | 0 | struct ds ds = DS_EMPTY_INITIALIZER; |
1554 | 0 | ipf_dump_create(&ipf_list, &ds); |
1555 | 0 | *dump = ds_steal_cstr(&ds); |
1556 | 0 | return 0; |
1557 | 0 | } |
1558 | 0 | } |
1559 | | |
1560 | | /* Frees 'ipf_dump_ctx' allocated by ipf_dump_start(). */ |
1561 | | int |
1562 | | ipf_dump_done(struct ipf_dump_ctx *ipf_dump_ctx) |
1563 | 0 | { |
1564 | 0 | free(ipf_dump_ctx); |
1565 | 0 | return 0; |
1566 | 0 | } |