/src/SockFuzzer/third_party/xnu/bsd/netinet/mptcp.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2012-2018 Apple Inc. All rights reserved. |
3 | | * |
4 | | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | | * |
6 | | * This file contains Original Code and/or Modifications of Original Code |
7 | | * as defined in and that are subject to the Apple Public Source License |
8 | | * Version 2.0 (the 'License'). You may not use this file except in |
9 | | * compliance with the License. The rights granted to you under the License |
10 | | * may not be used to create, or enable the creation or redistribution of, |
11 | | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | | * circumvent, violate, or enable the circumvention or violation of, any |
13 | | * terms of an Apple operating system software license agreement. |
14 | | * |
15 | | * Please obtain a copy of the License at |
16 | | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | | * |
18 | | * The Original Code and all software distributed under the License are |
19 | | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | | * Please see the License for the specific language governing rights and |
24 | | * limitations under the License. |
25 | | * |
26 | | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | | */ |
28 | | |
29 | | /* |
30 | | * A note on the MPTCP/NECP-interactions: |
31 | | * |
32 | | * MPTCP uses NECP-callbacks to get notified of interface/policy events. |
33 | | * MPTCP registers to these events at the MPTCP-layer for interface-events |
34 | | * through a call to necp_client_register_multipath_cb. |
35 | | * To get per-flow events (aka per TCP-subflow), we register to it with |
36 | | * necp_client_register_socket_flow. Both registrations happen by using the |
37 | | * necp-client-uuid that comes from the app. |
38 | | * |
39 | | * The locking is rather tricky. In general, we expect the lock-ordering to |
40 | | * happen from necp-fd -> necp->client -> mpp_lock. |
41 | | * |
42 | | * There are however some subtleties. |
43 | | * |
44 | | * 1. When registering the multipath_cb, we are holding the mpp_lock. This is |
45 | | * safe, because it is the very first time this MPTCP-connection goes into NECP. |
46 | | * As we go into NECP we take the NECP-locks and thus are guaranteed that no |
47 | | * NECP-locks will deadlock us. Because these NECP-events will also first take |
48 | | * the NECP-locks. Either they win the race and thus won't find our |
49 | | * MPTCP-connection. Or, MPTCP wins the race and thus it will safely install |
50 | | * the callbacks while holding the NECP lock. |
51 | | * |
52 | | * 2. When registering the subflow-callbacks we must unlock the mpp_lock. This, |
53 | | * because we have already registered callbacks and we might race against an |
54 | | * NECP-event that will match on our socket. So, we have to unlock to be safe. |
55 | | * |
56 | | * 3. When removing the multipath_cb, we do it in mp_pcbdispose(). The |
57 | | * so_usecount has reached 0. We must be careful to not remove the mpp_socket |
58 | | * pointers before we unregistered the callback. Because, again we might be |
59 | | * racing against an NECP-event. Unregistering must happen with an unlocked |
60 | | * mpp_lock, because of the lock-ordering constraint. It could be that |
61 | | * before we had a chance to unregister an NECP-event triggers. That's why |
62 | | * we need to check for the so_usecount in mptcp_session_necp_cb. If we get |
63 | | * there while the socket is being garbage-collected, the use-count will go |
64 | | * down to 0 and we exit. Removal of the multipath_cb again happens by taking |
65 | | * the NECP-locks so any running NECP-events will finish first and exit cleanly. |
66 | | * |
67 | | * 4. When removing the subflow-callback, we do it in in_pcbdispose(). Again, |
68 | | * the socket-lock must be unlocked for lock-ordering constraints. This gets a |
69 | | * bit tricky here, as in tcp_garbage_collect we hold the mp_so and so lock. |
70 | | * So, we drop the mp_so-lock as soon as the subflow is unlinked with |
71 | | * mptcp_subflow_del. Then, in in_pcbdispose we drop the subflow-lock. |
72 | | * If an NECP-event was waiting on the lock in mptcp_subflow_necp_cb, when it |
73 | | * gets it, it will realize that the subflow became non-MPTCP and retry (see |
74 | | * tcp_lock). Then it waits again on the subflow-lock. When we drop this lock |
75 | | * in in_pcbdispose, and enter necp_inpcb_dispose, this one will have to wait |
76 | | * for the NECP-lock (held by the other thread that is taking care of the NECP- |
77 | | * event). So, the event now finally gets the subflow-lock and then hits an |
78 | | * so_usecount that is 0 and exits. Eventually, we can remove the subflow from |
79 | | * the NECP callback. |
80 | | */ |
81 | | |
82 | | #include <sys/param.h> |
83 | | #include <sys/systm.h> |
84 | | #include <sys/kernel.h> |
85 | | #include <sys/mbuf.h> |
86 | | #include <sys/mcache.h> |
87 | | #include <sys/socket.h> |
88 | | #include <sys/socketvar.h> |
89 | | #include <sys/syslog.h> |
90 | | #include <sys/protosw.h> |
91 | | |
92 | | #include <kern/zalloc.h> |
93 | | #include <kern/locks.h> |
94 | | |
95 | | #include <mach/sdt.h> |
96 | | |
97 | | #include <net/if.h> |
98 | | #include <netinet/in.h> |
99 | | #include <netinet/in_var.h> |
100 | | #include <netinet/tcp.h> |
101 | | #include <netinet/tcp_fsm.h> |
102 | | #include <netinet/tcp_seq.h> |
103 | | #include <netinet/tcp_var.h> |
104 | | #include <netinet/mptcp_var.h> |
105 | | #include <netinet/mptcp.h> |
106 | | #include <netinet/mptcp_seq.h> |
107 | | #include <netinet/mptcp_opt.h> |
108 | | #include <netinet/mptcp_timer.h> |
109 | | |
110 | | int mptcp_enable = 1; |
111 | | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED, |
112 | | &mptcp_enable, 0, "Enable Multipath TCP Support"); |
113 | | |
114 | | /* |
115 | | * Number of times to try negotiating MPTCP on SYN retransmissions. |
116 | | * We haven't seen any reports of a middlebox that is dropping all SYN-segments |
117 | | * that have an MPTCP-option. Thus, let's be generous and retransmit it 4 times. |
118 | | */ |
119 | | int mptcp_mpcap_retries = 4; |
120 | | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr, |
121 | | CTLFLAG_RW | CTLFLAG_LOCKED, |
122 | | &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries"); |
123 | | |
124 | | /* |
125 | | * By default, DSS checksum is turned off, revisit if we ever do |
126 | | * MPTCP for non SSL Traffic. |
127 | | */ |
128 | | int mptcp_dss_csum = 0; |
129 | | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dss_csum, CTLFLAG_RW | CTLFLAG_LOCKED, |
130 | | &mptcp_dss_csum, 0, "Enable DSS checksum"); |
131 | | |
132 | | /* |
133 | | * When mptcp_fail_thresh number of retransmissions are sent, subflow failover |
134 | | * is attempted on a different path. |
135 | | */ |
136 | | int mptcp_fail_thresh = 1; |
137 | | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fail, CTLFLAG_RW | CTLFLAG_LOCKED, |
138 | | &mptcp_fail_thresh, 0, "Failover threshold"); |
139 | | |
140 | | /* |
141 | | * MPTCP subflows have TCP keepalives set to ON. Set a conservative keeptime |
142 | | * as carrier networks mostly have a 30 minute to 60 minute NAT Timeout. |
143 | | * Some carrier networks have a timeout of 10 or 15 minutes. |
144 | | */ |
145 | | int mptcp_subflow_keeptime = 60 * 14; |
146 | | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, keepalive, CTLFLAG_RW | CTLFLAG_LOCKED, |
147 | | &mptcp_subflow_keeptime, 0, "Keepalive in seconds"); |
148 | | |
149 | | int mptcp_rtthist_rtthresh = 600; |
150 | | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rtthist_thresh, CTLFLAG_RW | CTLFLAG_LOCKED, |
151 | | &mptcp_rtthist_rtthresh, 0, "Rtt threshold"); |
152 | | |
153 | | int mptcp_rtothresh = 1500; |
154 | | SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rto_thresh, CTLFLAG_RW | CTLFLAG_LOCKED, |
155 | | &mptcp_rtothresh, 0, "RTO threshold"); |
156 | | |
157 | | /* |
158 | | * Probe the preferred path, when it is not in use |
159 | | */ |
160 | | uint32_t mptcp_probeto = 1000; |
161 | | SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probeto, CTLFLAG_RW | CTLFLAG_LOCKED, |
162 | | &mptcp_probeto, 0, "Disable probing by setting to 0"); |
163 | | |
164 | | uint32_t mptcp_probecnt = 5; |
165 | | SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probecnt, CTLFLAG_RW | CTLFLAG_LOCKED, |
166 | | &mptcp_probecnt, 0, "Number of probe writes"); |
167 | | |
168 | | static int |
169 | | mptcp_reass_present(struct socket *mp_so) |
170 | 0 | { |
171 | 0 | struct mptses *mpte = mpsotompte(mp_so); |
172 | 0 | struct mptcb *mp_tp = mpte->mpte_mptcb; |
173 | 0 | struct tseg_qent *q; |
174 | 0 | int dowakeup = 0; |
175 | 0 | int flags = 0; |
176 | | |
177 | | /* |
178 | | * Present data to user, advancing rcv_nxt through |
179 | | * completed sequence space. |
180 | | */ |
181 | 0 | if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { |
182 | 0 | return flags; |
183 | 0 | } |
184 | 0 | q = LIST_FIRST(&mp_tp->mpt_segq); |
185 | 0 | if (!q || q->tqe_m->m_pkthdr.mp_dsn != mp_tp->mpt_rcvnxt) { |
186 | 0 | return flags; |
187 | 0 | } |
188 | | |
189 | | /* |
190 | | * If there is already another thread doing reassembly for this |
191 | | * connection, it is better to let it finish the job -- |
192 | | * (radar 16316196) |
193 | | */ |
194 | 0 | if (mp_tp->mpt_flags & MPTCPF_REASS_INPROG) { |
195 | 0 | return flags; |
196 | 0 | } |
197 | | |
198 | 0 | mp_tp->mpt_flags |= MPTCPF_REASS_INPROG; |
199 | |
|
200 | 0 | do { |
201 | 0 | mp_tp->mpt_rcvnxt += q->tqe_len; |
202 | 0 | LIST_REMOVE(q, tqe_q); |
203 | 0 | if (mp_so->so_state & SS_CANTRCVMORE) { |
204 | 0 | m_freem(q->tqe_m); |
205 | 0 | } else { |
206 | 0 | flags = !!(q->tqe_m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN); |
207 | 0 | if (sbappendstream_rcvdemux(mp_so, q->tqe_m)) { |
208 | 0 | dowakeup = 1; |
209 | 0 | } |
210 | 0 | } |
211 | 0 | zfree(tcp_reass_zone, q); |
212 | 0 | mp_tp->mpt_reassqlen--; |
213 | 0 | q = LIST_FIRST(&mp_tp->mpt_segq); |
214 | 0 | } while (q && q->tqe_m->m_pkthdr.mp_dsn == mp_tp->mpt_rcvnxt); |
215 | 0 | mp_tp->mpt_flags &= ~MPTCPF_REASS_INPROG; |
216 | |
|
217 | 0 | if (dowakeup) { |
218 | 0 | sorwakeup(mp_so); /* done with socket lock held */ |
219 | 0 | } |
220 | 0 | return flags; |
221 | 0 | } |
222 | | |
223 | | static int |
224 | | mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf *m) |
225 | 0 | { |
226 | 0 | struct mptcb *mp_tp = mpsotomppcb(mp_so)->mpp_pcbe->mpte_mptcb; |
227 | 0 | u_int64_t mb_dsn = phdr->mp_dsn; |
228 | 0 | struct tseg_qent *q; |
229 | 0 | struct tseg_qent *p = NULL; |
230 | 0 | struct tseg_qent *nq; |
231 | 0 | struct tseg_qent *te = NULL; |
232 | 0 | uint32_t qlimit; |
233 | | |
234 | | /* |
235 | | * Limit the number of segments in the reassembly queue to prevent |
236 | | * holding on to too many segments (and thus running out of mbufs). |
237 | | * Make sure to let the missing segment through which caused this |
238 | | * queue. Always keep one global queue entry spare to be able to |
239 | | * process the missing segment. |
240 | | */ |
241 | 0 | qlimit = MIN(MAX(100, mp_so->so_rcv.sb_hiwat >> 10), |
242 | 0 | (tcp_autorcvbuf_max >> 10)); |
243 | 0 | if (mb_dsn != mp_tp->mpt_rcvnxt && |
244 | 0 | (mp_tp->mpt_reassqlen + 1) >= qlimit) { |
245 | 0 | tcpstat.tcps_mptcp_rcvmemdrop++; |
246 | 0 | m_freem(m); |
247 | 0 | *tlenp = 0; |
248 | 0 | return 0; |
249 | 0 | } |
250 | | |
251 | | /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */ |
252 | 0 | te = (struct tseg_qent *) zalloc(tcp_reass_zone); |
253 | 0 | if (te == NULL) { |
254 | 0 | tcpstat.tcps_mptcp_rcvmemdrop++; |
255 | 0 | m_freem(m); |
256 | 0 | return 0; |
257 | 0 | } |
258 | | |
259 | 0 | mp_tp->mpt_reassqlen++; |
260 | | |
261 | | /* |
262 | | * Find a segment which begins after this one does. |
263 | | */ |
264 | 0 | LIST_FOREACH(q, &mp_tp->mpt_segq, tqe_q) { |
265 | 0 | if (MPTCP_SEQ_GT(q->tqe_m->m_pkthdr.mp_dsn, mb_dsn)) { |
266 | 0 | break; |
267 | 0 | } |
268 | 0 | p = q; |
269 | 0 | } |
270 | | |
271 | | /* |
272 | | * If there is a preceding segment, it may provide some of |
273 | | * our data already. If so, drop the data from the incoming |
274 | | * segment. If it provides all of our data, drop us. |
275 | | */ |
276 | 0 | if (p != NULL) { |
277 | 0 | int64_t i; |
278 | | /* conversion to int (in i) handles seq wraparound */ |
279 | 0 | i = p->tqe_m->m_pkthdr.mp_dsn + p->tqe_len - mb_dsn; |
280 | 0 | if (i > 0) { |
281 | 0 | if (i >= *tlenp) { |
282 | 0 | tcpstat.tcps_mptcp_rcvduppack++; |
283 | 0 | m_freem(m); |
284 | 0 | zfree(tcp_reass_zone, te); |
285 | 0 | te = NULL; |
286 | 0 | mp_tp->mpt_reassqlen--; |
287 | | /* |
288 | | * Try to present any queued data |
289 | | * at the left window edge to the user. |
290 | | * This is needed after the 3-WHS |
291 | | * completes. |
292 | | */ |
293 | 0 | goto out; |
294 | 0 | } |
295 | 0 | VERIFY(i <= INT_MAX); |
296 | 0 | m_adj(m, (int)i); |
297 | 0 | *tlenp -= i; |
298 | 0 | phdr->mp_dsn += i; |
299 | 0 | } |
300 | 0 | } |
301 | | |
302 | 0 | tcpstat.tcps_mp_oodata++; |
303 | | |
304 | | /* |
305 | | * While we overlap succeeding segments trim them or, |
306 | | * if they are completely covered, dequeue them. |
307 | | */ |
308 | 0 | while (q) { |
309 | 0 | int64_t i = (mb_dsn + *tlenp) - q->tqe_m->m_pkthdr.mp_dsn; |
310 | 0 | if (i <= 0) { |
311 | 0 | break; |
312 | 0 | } |
313 | | |
314 | 0 | if (i < q->tqe_len) { |
315 | 0 | q->tqe_m->m_pkthdr.mp_dsn += i; |
316 | 0 | q->tqe_len -= i; |
317 | |
|
318 | 0 | VERIFY(i <= INT_MAX); |
319 | 0 | m_adj(q->tqe_m, (int)i); |
320 | 0 | break; |
321 | 0 | } |
322 | | |
323 | 0 | nq = LIST_NEXT(q, tqe_q); |
324 | 0 | LIST_REMOVE(q, tqe_q); |
325 | 0 | m_freem(q->tqe_m); |
326 | 0 | zfree(tcp_reass_zone, q); |
327 | 0 | mp_tp->mpt_reassqlen--; |
328 | 0 | q = nq; |
329 | 0 | } |
330 | | |
331 | | /* Insert the new segment queue entry into place. */ |
332 | 0 | te->tqe_m = m; |
333 | 0 | te->tqe_th = NULL; |
334 | 0 | te->tqe_len = *tlenp; |
335 | |
|
336 | 0 | if (p == NULL) { |
337 | 0 | LIST_INSERT_HEAD(&mp_tp->mpt_segq, te, tqe_q); |
338 | 0 | } else { |
339 | 0 | LIST_INSERT_AFTER(p, te, tqe_q); |
340 | 0 | } |
341 | | |
342 | 0 | out: |
343 | 0 | return mptcp_reass_present(mp_so); |
344 | 0 | } |
345 | | |
346 | | /* |
347 | | * MPTCP input, called when data has been read from a subflow socket. |
348 | | */ |
349 | | void |
350 | | mptcp_input(struct mptses *mpte, struct mbuf *m) |
351 | 0 | { |
352 | 0 | struct socket *mp_so; |
353 | 0 | struct mptcb *mp_tp = NULL; |
354 | 0 | int count = 0, wakeup = 0; |
355 | 0 | struct mbuf *save = NULL, *prev = NULL; |
356 | 0 | struct mbuf *freelist = NULL, *tail = NULL; |
357 | |
|
358 | 0 | VERIFY(m->m_flags & M_PKTHDR); |
359 | | |
360 | 0 | mp_so = mptetoso(mpte); |
361 | 0 | mp_tp = mpte->mpte_mptcb; |
362 | |
|
363 | 0 | socket_lock_assert_owned(mp_so); |
364 | |
|
365 | 0 | DTRACE_MPTCP(input); |
366 | |
|
367 | 0 | mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp); |
368 | | |
369 | | /* |
370 | | * Each mbuf contains MPTCP Data Sequence Map |
371 | | * Process the data for reassembly, delivery to MPTCP socket |
372 | | * client, etc. |
373 | | * |
374 | | */ |
375 | 0 | count = mp_so->so_rcv.sb_cc; |
376 | | |
377 | | /* |
378 | | * In the degraded fallback case, data is accepted without DSS map |
379 | | */ |
380 | 0 | if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) { |
381 | 0 | struct mbuf *iter; |
382 | 0 | int mb_dfin = 0; |
383 | 0 | fallback: |
384 | 0 | mptcp_sbrcv_grow(mp_tp); |
385 | |
|
386 | 0 | iter = m; |
387 | 0 | while (iter) { |
388 | 0 | if ((iter->m_flags & M_PKTHDR) && |
389 | 0 | (iter->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN)) { |
390 | 0 | mb_dfin = 1; |
391 | 0 | } |
392 | |
|
393 | 0 | if ((iter->m_flags & M_PKTHDR) && m_pktlen(iter) == 0) { |
394 | | /* Don't add zero-length packets, so jump it! */ |
395 | 0 | if (prev == NULL) { |
396 | 0 | m = iter->m_next; |
397 | 0 | m_free(iter); |
398 | 0 | iter = m; |
399 | 0 | } else { |
400 | 0 | prev->m_next = iter->m_next; |
401 | 0 | m_free(iter); |
402 | 0 | iter = prev->m_next; |
403 | 0 | } |
404 | | |
405 | | /* It was a zero-length packet so next one must be a pkthdr */ |
406 | 0 | VERIFY(iter == NULL || iter->m_flags & M_PKTHDR); |
407 | 0 | } else { |
408 | 0 | prev = iter; |
409 | 0 | iter = iter->m_next; |
410 | 0 | } |
411 | 0 | } |
412 | | |
413 | | /* |
414 | | * assume degraded flow as this may be the first packet |
415 | | * without DSS, and the subflow state is not updated yet. |
416 | | */ |
417 | 0 | if (sbappendstream_rcvdemux(mp_so, m)) { |
418 | 0 | sorwakeup(mp_so); |
419 | 0 | } |
420 | |
|
421 | 0 | DTRACE_MPTCP5(receive__degraded, struct mbuf *, m, |
422 | 0 | struct socket *, mp_so, |
423 | 0 | struct sockbuf *, &mp_so->so_rcv, |
424 | 0 | struct sockbuf *, &mp_so->so_snd, |
425 | 0 | struct mptses *, mpte); |
426 | 0 | count = mp_so->so_rcv.sb_cc - count; |
427 | |
|
428 | 0 | mp_tp->mpt_rcvnxt += count; |
429 | |
|
430 | 0 | if (mb_dfin) { |
431 | 0 | mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN); |
432 | 0 | socantrcvmore(mp_so); |
433 | 0 | } |
434 | 0 | return; |
435 | 0 | } |
436 | | |
437 | 0 | do { |
438 | 0 | u_int64_t mb_dsn; |
439 | 0 | int32_t mb_datalen; |
440 | 0 | int64_t todrop; |
441 | 0 | int mb_dfin = 0; |
442 | |
|
443 | 0 | VERIFY(m->m_flags & M_PKTHDR); |
444 | | |
445 | | /* If fallback occurs, mbufs will not have PKTF_MPTCP set */ |
446 | 0 | if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) { |
447 | 0 | goto fallback; |
448 | 0 | } |
449 | | |
450 | 0 | save = m->m_next; |
451 | | /* |
452 | | * A single TCP packet formed of multiple mbufs |
453 | | * holds DSS mapping in the first mbuf of the chain. |
454 | | * Other mbufs in the chain may have M_PKTHDR set |
455 | | * even though they belong to the same TCP packet |
456 | | * and therefore use the DSS mapping stored in the |
457 | | * first mbuf of the mbuf chain. mptcp_input() can |
458 | | * get an mbuf chain with multiple TCP packets. |
459 | | */ |
460 | 0 | while (save && (!(save->m_flags & M_PKTHDR) || |
461 | 0 | !(save->m_pkthdr.pkt_flags & PKTF_MPTCP))) { |
462 | 0 | prev = save; |
463 | 0 | save = save->m_next; |
464 | 0 | } |
465 | 0 | if (prev) { |
466 | 0 | prev->m_next = NULL; |
467 | 0 | } else { |
468 | 0 | m->m_next = NULL; |
469 | 0 | } |
470 | |
|
471 | 0 | mb_dsn = m->m_pkthdr.mp_dsn; |
472 | 0 | mb_datalen = m->m_pkthdr.mp_rlen; |
473 | |
|
474 | 0 | todrop = (mb_dsn + mb_datalen) - (mp_tp->mpt_rcvnxt + mp_tp->mpt_rcvwnd); |
475 | 0 | if (todrop > 0) { |
476 | 0 | tcpstat.tcps_mptcp_rcvpackafterwin++; |
477 | |
|
478 | 0 | os_log_info(mptcp_log_handle, "%s - %lx: dropping dsn %u dlen %u rcvnxt %u rcvwnd %u todrop %lld\n", |
479 | 0 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
480 | 0 | (uint32_t)mb_dsn, mb_datalen, (uint32_t)mp_tp->mpt_rcvnxt, |
481 | 0 | mp_tp->mpt_rcvwnd, todrop); |
482 | |
|
483 | 0 | if (todrop >= mb_datalen) { |
484 | 0 | if (freelist == NULL) { |
485 | 0 | freelist = m; |
486 | 0 | } else { |
487 | 0 | tail->m_next = m; |
488 | 0 | } |
489 | |
|
490 | 0 | if (prev != NULL) { |
491 | 0 | tail = prev; |
492 | 0 | } else { |
493 | 0 | tail = m; |
494 | 0 | } |
495 | |
|
496 | 0 | m = save; |
497 | 0 | prev = save = NULL; |
498 | 0 | continue; |
499 | 0 | } else { |
500 | 0 | VERIFY(todrop <= INT_MAX); |
501 | 0 | m_adj(m, (int)-todrop); |
502 | 0 | mb_datalen -= todrop; |
503 | 0 | m->m_pkthdr.mp_rlen -= todrop; |
504 | 0 | } |
505 | | |
506 | | /* |
507 | | * We drop from the right edge of the mbuf, thus the |
508 | | * DATA_FIN is dropped as well |
509 | | */ |
510 | 0 | m->m_pkthdr.pkt_flags &= ~PKTF_MPTCP_DFIN; |
511 | 0 | } |
512 | | |
513 | 0 | if (MPTCP_SEQ_LT(mb_dsn, mp_tp->mpt_rcvnxt)) { |
514 | 0 | if (MPTCP_SEQ_LEQ((mb_dsn + mb_datalen), |
515 | 0 | mp_tp->mpt_rcvnxt)) { |
516 | 0 | if (freelist == NULL) { |
517 | 0 | freelist = m; |
518 | 0 | } else { |
519 | 0 | tail->m_next = m; |
520 | 0 | } |
521 | |
|
522 | 0 | if (prev != NULL) { |
523 | 0 | tail = prev; |
524 | 0 | } else { |
525 | 0 | tail = m; |
526 | 0 | } |
527 | |
|
528 | 0 | m = save; |
529 | 0 | prev = save = NULL; |
530 | 0 | continue; |
531 | 0 | } else { |
532 | 0 | VERIFY((mp_tp->mpt_rcvnxt - mb_dsn) <= INT_MAX); |
533 | 0 | m_adj(m, (int)(mp_tp->mpt_rcvnxt - mb_dsn)); |
534 | 0 | mb_datalen -= (mp_tp->mpt_rcvnxt - mb_dsn); |
535 | 0 | mb_dsn = mp_tp->mpt_rcvnxt; |
536 | 0 | VERIFY(mb_datalen >= 0 && mb_datalen <= USHRT_MAX); |
537 | 0 | m->m_pkthdr.mp_rlen = (uint16_t)mb_datalen; |
538 | 0 | m->m_pkthdr.mp_dsn = mb_dsn; |
539 | 0 | } |
540 | 0 | } |
541 | | |
542 | 0 | if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvnxt) || |
543 | 0 | !LIST_EMPTY(&mp_tp->mpt_segq)) { |
544 | 0 | mb_dfin = mptcp_reass(mp_so, &m->m_pkthdr, &mb_datalen, m); |
545 | |
|
546 | 0 | goto next; |
547 | 0 | } |
548 | 0 | mb_dfin = !!(m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN); |
549 | |
|
550 | 0 | mptcp_sbrcv_grow(mp_tp); |
551 | |
|
552 | 0 | if (sbappendstream_rcvdemux(mp_so, m)) { |
553 | 0 | wakeup = 1; |
554 | 0 | } |
555 | |
|
556 | 0 | DTRACE_MPTCP6(receive, struct mbuf *, m, struct socket *, mp_so, |
557 | 0 | struct sockbuf *, &mp_so->so_rcv, |
558 | 0 | struct sockbuf *, &mp_so->so_snd, |
559 | 0 | struct mptses *, mpte, |
560 | 0 | struct mptcb *, mp_tp); |
561 | 0 | count = mp_so->so_rcv.sb_cc - count; |
562 | 0 | tcpstat.tcps_mp_rcvtotal++; |
563 | 0 | tcpstat.tcps_mp_rcvbytes += count; |
564 | |
|
565 | 0 | mp_tp->mpt_rcvnxt += count; |
566 | |
|
567 | 0 | next: |
568 | 0 | if (mb_dfin) { |
569 | 0 | mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN); |
570 | 0 | socantrcvmore(mp_so); |
571 | 0 | } |
572 | 0 | m = save; |
573 | 0 | prev = save = NULL; |
574 | 0 | count = mp_so->so_rcv.sb_cc; |
575 | 0 | } while (m); |
576 | | |
577 | 0 | if (freelist) { |
578 | 0 | m_freem(freelist); |
579 | 0 | } |
580 | |
|
581 | 0 | if (wakeup) { |
582 | 0 | sorwakeup(mp_so); |
583 | 0 | } |
584 | 0 | } |
585 | | |
586 | | boolean_t |
587 | | mptcp_can_send_more(struct mptcb *mp_tp, boolean_t ignore_reinject) |
588 | 0 | { |
589 | 0 | struct socket *mp_so = mptetoso(mp_tp->mpt_mpte); |
590 | | |
591 | | /* |
592 | | * Always send if there is data in the reinject-queue. |
593 | | */ |
594 | 0 | if (!ignore_reinject && mp_tp->mpt_mpte->mpte_reinjectq) { |
595 | 0 | return TRUE; |
596 | 0 | } |
597 | | |
598 | | /* |
599 | | * Don't send, if: |
600 | | * |
601 | | * 1. snd_nxt >= snd_max : Means, basically everything has been sent. |
602 | | * Except when using TFO, we might be doing a 0-byte write. |
603 | | * 2. snd_una + snd_wnd <= snd_nxt: No space in the receiver's window |
604 | | * 3. snd_nxt + 1 == snd_max and we are closing: A DATA_FIN is scheduled. |
605 | | */ |
606 | | |
607 | 0 | if (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA) && MPTCP_SEQ_GEQ(mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax)) { |
608 | 0 | return FALSE; |
609 | 0 | } |
610 | | |
611 | 0 | if (MPTCP_SEQ_LEQ(mp_tp->mpt_snduna + mp_tp->mpt_sndwnd, mp_tp->mpt_sndnxt)) { |
612 | 0 | return FALSE; |
613 | 0 | } |
614 | | |
615 | 0 | if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax && mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) { |
616 | 0 | return FALSE; |
617 | 0 | } |
618 | | |
619 | 0 | if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) { |
620 | 0 | return FALSE; |
621 | 0 | } |
622 | | |
623 | 0 | return TRUE; |
624 | 0 | } |
625 | | |
626 | | /* |
627 | | * MPTCP output. |
628 | | */ |
629 | | int |
630 | | mptcp_output(struct mptses *mpte) |
631 | 0 | { |
632 | 0 | struct mptcb *mp_tp; |
633 | 0 | struct mptsub *mpts; |
634 | 0 | struct mptsub *mpts_tried = NULL; |
635 | 0 | struct socket *mp_so; |
636 | 0 | struct mptsub *preferred_mpts = NULL; |
637 | 0 | uint64_t old_snd_nxt; |
638 | 0 | int error = 0; |
639 | |
|
640 | 0 | mp_so = mptetoso(mpte); |
641 | 0 | mp_tp = mpte->mpte_mptcb; |
642 | |
|
643 | 0 | socket_lock_assert_owned(mp_so); |
644 | |
|
645 | 0 | if (mp_so->so_flags & SOF_DEFUNCT) { |
646 | 0 | return 0; |
647 | 0 | } |
648 | | |
649 | 0 | VERIFY(!(mpte->mpte_mppcb->mpp_flags & MPP_WUPCALL)); |
650 | 0 | mpte->mpte_mppcb->mpp_flags |= MPP_WUPCALL; |
651 | |
|
652 | 0 | old_snd_nxt = mp_tp->mpt_sndnxt; |
653 | | // nedwill: limit attempts to avoid infinite loop |
654 | 0 | int attempts = 0; |
655 | 0 | while (mptcp_can_send_more(mp_tp, FALSE) && attempts++ < 16) { |
656 | | /* get the "best" subflow to be used for transmission */ |
657 | 0 | mpts = mptcp_get_subflow(mpte, &preferred_mpts); |
658 | 0 | if (mpts == NULL) { |
659 | 0 | mptcplog((LOG_INFO, "%s: no subflow\n", __func__), |
660 | 0 | MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); |
661 | 0 | break; |
662 | 0 | } |
663 | | |
664 | | /* In case there's just one flow, we reattempt later */ |
665 | 0 | if (mpts_tried != NULL && |
666 | 0 | (mpts == mpts_tried || (mpts->mpts_flags & MPTSF_FAILINGOVER))) { |
667 | 0 | mpts_tried->mpts_flags &= ~MPTSF_FAILINGOVER; |
668 | 0 | mpts_tried->mpts_flags |= MPTSF_ACTIVE; |
669 | 0 | mptcp_start_timer(mpte, MPTT_REXMT); |
670 | 0 | break; |
671 | 0 | } |
672 | | |
673 | | /* |
674 | | * Automatic sizing of send socket buffer. Increase the send |
675 | | * socket buffer size if all of the following criteria are met |
676 | | * 1. the receiver has enough buffer space for this data |
677 | | * 2. send buffer is filled to 7/8th with data (so we actually |
678 | | * have data to make use of it); |
679 | | */ |
680 | 0 | if ((mp_so->so_snd.sb_flags & (SB_AUTOSIZE | SB_TRIM)) == SB_AUTOSIZE && |
681 | 0 | tcp_cansbgrow(&mp_so->so_snd)) { |
682 | 0 | if ((mp_tp->mpt_sndwnd / 4 * 5) >= mp_so->so_snd.sb_hiwat && |
683 | 0 | mp_so->so_snd.sb_cc >= (mp_so->so_snd.sb_hiwat / 8 * 7)) { |
684 | 0 | if (sbreserve(&mp_so->so_snd, |
685 | 0 | min(mp_so->so_snd.sb_hiwat + tcp_autosndbuf_inc, |
686 | 0 | tcp_autosndbuf_max)) == 1) { |
687 | 0 | mp_so->so_snd.sb_idealsize = mp_so->so_snd.sb_hiwat; |
688 | 0 | } |
689 | 0 | } |
690 | 0 | } |
691 | |
|
692 | 0 | DTRACE_MPTCP3(output, struct mptses *, mpte, struct mptsub *, mpts, |
693 | 0 | struct socket *, mp_so); |
694 | 0 | error = mptcp_subflow_output(mpte, mpts, 0); |
695 | 0 | if (error) { |
696 | | /* can be a temporary loss of source address or other error */ |
697 | 0 | mpts->mpts_flags |= MPTSF_FAILINGOVER; |
698 | 0 | mpts->mpts_flags &= ~MPTSF_ACTIVE; |
699 | 0 | mpts_tried = mpts; |
700 | 0 | if (error != ECANCELED) { |
701 | 0 | os_log_error(mptcp_log_handle, "%s - %lx: Error = %d mpts_flags %#x\n", |
702 | 0 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), |
703 | 0 | error, mpts->mpts_flags); |
704 | 0 | } |
705 | 0 | break; |
706 | 0 | } |
707 | | /* The model is to have only one active flow at a time */ |
708 | 0 | mpts->mpts_flags |= MPTSF_ACTIVE; |
709 | 0 | mpts->mpts_probesoon = mpts->mpts_probecnt = 0; |
710 | | |
711 | | /* Allows us to update the smoothed rtt */ |
712 | 0 | if (mptcp_probeto && mpts != preferred_mpts && preferred_mpts != NULL) { |
713 | 0 | if (preferred_mpts->mpts_probesoon) { |
714 | 0 | if ((tcp_now - preferred_mpts->mpts_probesoon) > mptcp_probeto) { |
715 | 0 | mptcp_subflow_output(mpte, preferred_mpts, MPTCP_SUBOUT_PROBING); |
716 | 0 | if (preferred_mpts->mpts_probecnt >= mptcp_probecnt) { |
717 | 0 | preferred_mpts->mpts_probesoon = 0; |
718 | 0 | preferred_mpts->mpts_probecnt = 0; |
719 | 0 | } |
720 | 0 | } |
721 | 0 | } else { |
722 | 0 | preferred_mpts->mpts_probesoon = tcp_now; |
723 | 0 | preferred_mpts->mpts_probecnt = 0; |
724 | 0 | } |
725 | 0 | } |
726 | |
|
727 | 0 | if (mpte->mpte_active_sub == NULL) { |
728 | 0 | mpte->mpte_active_sub = mpts; |
729 | 0 | } else if (mpte->mpte_active_sub != mpts) { |
730 | 0 | mpte->mpte_active_sub->mpts_flags &= ~MPTSF_ACTIVE; |
731 | 0 | mpte->mpte_active_sub = mpts; |
732 | |
|
733 | 0 | mptcpstats_inc_switch(mpte, mpts); |
734 | 0 | } |
735 | 0 | } |
736 | |
|
737 | 0 | if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) { |
738 | 0 | if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax && |
739 | 0 | mp_tp->mpt_snduna == mp_tp->mpt_sndnxt) { |
740 | 0 | mptcp_finish_usrclosed(mpte); |
741 | 0 | } |
742 | 0 | } |
743 | |
|
744 | 0 | mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_WUPCALL); |
745 | | |
746 | | /* subflow errors should not be percolated back up */ |
747 | 0 | return 0; |
748 | 0 | } |
749 | | |
750 | | |
751 | | static struct mptsub * |
752 | | mptcp_choose_subflow(struct mptsub *mpts, struct mptsub *curbest, int *currtt) |
753 | 0 | { |
754 | 0 | struct tcpcb *tp = sototcpcb(mpts->mpts_socket); |
755 | | |
756 | | /* |
757 | | * Lower RTT? Take it, if it's our first one, or |
758 | | * it doesn't has any loss, or the current one has |
759 | | * loss as well. |
760 | | */ |
761 | 0 | if (tp->t_srtt && *currtt > tp->t_srtt && |
762 | 0 | (curbest == NULL || tp->t_rxtshift == 0 || |
763 | 0 | sototcpcb(curbest->mpts_socket)->t_rxtshift)) { |
764 | 0 | *currtt = tp->t_srtt; |
765 | 0 | return mpts; |
766 | 0 | } |
767 | | |
768 | | /* |
769 | | * If we find a subflow without loss, take it always! |
770 | | */ |
771 | 0 | if (curbest && |
772 | 0 | sototcpcb(curbest->mpts_socket)->t_rxtshift && |
773 | 0 | tp->t_rxtshift == 0) { |
774 | 0 | *currtt = tp->t_srtt; |
775 | 0 | return mpts; |
776 | 0 | } |
777 | | |
778 | 0 | return curbest != NULL ? curbest : mpts; |
779 | 0 | } |
780 | | |
781 | | static struct mptsub * |
782 | | mptcp_return_subflow(struct mptsub *mpts) |
783 | 0 | { |
784 | 0 | if (mpts && mptcp_subflow_cwnd_space(mpts->mpts_socket) <= 0) { |
785 | 0 | return NULL; |
786 | 0 | } |
787 | | |
788 | 0 | return mpts; |
789 | 0 | } |
790 | | |
791 | | static boolean_t |
792 | | mptcp_subflow_is_slow(struct mptses *mpte, struct mptsub *mpts) |
793 | 0 | { |
794 | 0 | struct tcpcb *tp = sototcpcb(mpts->mpts_socket); |
795 | 0 | int fail_thresh = mptcp_fail_thresh; |
796 | |
|
797 | 0 | if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER || mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) { |
798 | 0 | fail_thresh *= 2; |
799 | 0 | } |
800 | |
|
801 | 0 | return tp->t_rxtshift >= fail_thresh && |
802 | 0 | (mptetoso(mpte)->so_snd.sb_cc || mpte->mpte_reinjectq); |
803 | 0 | } |
804 | | |
805 | | /* |
806 | | * Return the most eligible subflow to be used for sending data. |
807 | | */ |
808 | | struct mptsub * |
809 | | mptcp_get_subflow(struct mptses *mpte, struct mptsub **preferred) |
810 | 0 | { |
811 | 0 | struct tcpcb *besttp, *secondtp; |
812 | 0 | struct inpcb *bestinp, *secondinp; |
813 | 0 | struct mptsub *mpts; |
814 | 0 | struct mptsub *best = NULL; |
815 | 0 | struct mptsub *second_best = NULL; |
816 | 0 | int exp_rtt = INT_MAX, cheap_rtt = INT_MAX; |
817 | | |
818 | | /* |
819 | | * First Step: |
820 | | * Choose the best subflow for cellular and non-cellular interfaces. |
821 | | */ |
822 | |
|
823 | 0 | TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { |
824 | 0 | struct socket *so = mpts->mpts_socket; |
825 | 0 | struct tcpcb *tp = sototcpcb(so); |
826 | 0 | struct inpcb *inp = sotoinpcb(so); |
827 | |
|
828 | 0 | mptcplog((LOG_DEBUG, "%s mpts %u mpts_flags %#x, suspended %u sostate %#x tpstate %u cellular %d rtt %u rxtshift %u cheap %u exp %u cwnd %d\n", |
829 | 0 | __func__, mpts->mpts_connid, mpts->mpts_flags, |
830 | 0 | INP_WAIT_FOR_IF_FEEDBACK(inp), so->so_state, tp->t_state, |
831 | 0 | inp->inp_last_outifp ? IFNET_IS_CELLULAR(inp->inp_last_outifp) : -1, |
832 | 0 | tp->t_srtt, tp->t_rxtshift, cheap_rtt, exp_rtt, |
833 | 0 | mptcp_subflow_cwnd_space(so)), |
834 | 0 | MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); |
835 | | |
836 | | /* |
837 | | * First, the hard conditions to reject subflows |
838 | | * (e.g., not connected,...) |
839 | | */ |
840 | 0 | if (inp->inp_last_outifp == NULL) { |
841 | 0 | continue; |
842 | 0 | } |
843 | | |
844 | 0 | if (INP_WAIT_FOR_IF_FEEDBACK(inp)) { |
845 | 0 | continue; |
846 | 0 | } |
847 | | |
848 | | /* There can only be one subflow in degraded state */ |
849 | 0 | if (mpts->mpts_flags & MPTSF_MP_DEGRADED) { |
850 | 0 | best = mpts; |
851 | 0 | break; |
852 | 0 | } |
853 | | |
854 | | /* |
855 | | * If this subflow is waiting to finally send, do it! |
856 | | */ |
857 | 0 | if (so->so_flags1 & SOF1_PRECONNECT_DATA) { |
858 | 0 | return mptcp_return_subflow(mpts); |
859 | 0 | } |
860 | | |
861 | | /* |
862 | | * Only send if the subflow is MP_CAPABLE. The exceptions to |
863 | | * this rule (degraded or TFO) have been taken care of above. |
864 | | */ |
865 | 0 | if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE)) { |
866 | 0 | continue; |
867 | 0 | } |
868 | | |
869 | 0 | if ((so->so_state & SS_ISDISCONNECTED) || |
870 | 0 | !(so->so_state & SS_ISCONNECTED) || |
871 | 0 | !TCPS_HAVEESTABLISHED(tp->t_state) || |
872 | 0 | tp->t_state > TCPS_CLOSE_WAIT) { |
873 | 0 | continue; |
874 | 0 | } |
875 | | |
876 | | /* |
877 | | * Second, the soft conditions to find the subflow with best |
878 | | * conditions for each set (aka cellular vs non-cellular) |
879 | | */ |
880 | 0 | if (IFNET_IS_CELLULAR(inp->inp_last_outifp)) { |
881 | 0 | second_best = mptcp_choose_subflow(mpts, second_best, |
882 | 0 | &exp_rtt); |
883 | 0 | } else { |
884 | 0 | best = mptcp_choose_subflow(mpts, best, &cheap_rtt); |
885 | 0 | } |
886 | 0 | } |
887 | | |
888 | | /* |
889 | | * If there is no preferred or backup subflow, and there is no active |
890 | | * subflow use the last usable subflow. |
891 | | */ |
892 | 0 | if (best == NULL) { |
893 | 0 | return mptcp_return_subflow(second_best); |
894 | 0 | } |
895 | | |
896 | 0 | if (second_best == NULL) { |
897 | 0 | return mptcp_return_subflow(best); |
898 | 0 | } |
899 | | |
900 | 0 | besttp = sototcpcb(best->mpts_socket); |
901 | 0 | bestinp = sotoinpcb(best->mpts_socket); |
902 | 0 | secondtp = sototcpcb(second_best->mpts_socket); |
903 | 0 | secondinp = sotoinpcb(second_best->mpts_socket); |
904 | |
|
905 | 0 | if (preferred != NULL) { |
906 | 0 | *preferred = mptcp_return_subflow(best); |
907 | 0 | } |
908 | | |
909 | | /* |
910 | | * Second Step: Among best and second_best. Choose the one that is |
911 | | * most appropriate for this particular service-type. |
912 | | */ |
913 | 0 | if (mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) { |
914 | 0 | return mptcp_return_subflow(best); |
915 | 0 | } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) { |
916 | | /* |
917 | | * Only handover if Symptoms tells us to do so. |
918 | | */ |
919 | 0 | if (!IFNET_IS_CELLULAR(bestinp->inp_last_outifp) && |
920 | 0 | mptcp_is_wifi_unusable_for_session(mpte) != 0 && mptcp_subflow_is_slow(mpte, best)) { |
921 | 0 | return mptcp_return_subflow(second_best); |
922 | 0 | } |
923 | | |
924 | 0 | return mptcp_return_subflow(best); |
925 | 0 | } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_INTERACTIVE) { |
926 | 0 | int rtt_thresh = mptcp_rtthist_rtthresh << TCP_RTT_SHIFT; |
927 | 0 | int rto_thresh = mptcp_rtothresh; |
928 | | |
929 | | /* Adjust with symptoms information */ |
930 | 0 | if (!IFNET_IS_CELLULAR(bestinp->inp_last_outifp) && |
931 | 0 | mptcp_is_wifi_unusable_for_session(mpte) != 0) { |
932 | 0 | rtt_thresh /= 2; |
933 | 0 | rto_thresh /= 2; |
934 | 0 | } |
935 | |
|
936 | 0 | if (besttp->t_srtt && secondtp->t_srtt && |
937 | 0 | besttp->t_srtt >= rtt_thresh && |
938 | 0 | secondtp->t_srtt < rtt_thresh) { |
939 | 0 | tcpstat.tcps_mp_sel_rtt++; |
940 | 0 | mptcplog((LOG_DEBUG, "%s: best cid %d at rtt %d, second cid %d at rtt %d\n", __func__, |
941 | 0 | best->mpts_connid, besttp->t_srtt >> TCP_RTT_SHIFT, |
942 | 0 | second_best->mpts_connid, |
943 | 0 | secondtp->t_srtt >> TCP_RTT_SHIFT), |
944 | 0 | MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); |
945 | 0 | return mptcp_return_subflow(second_best); |
946 | 0 | } |
947 | | |
948 | 0 | if (mptcp_subflow_is_slow(mpte, best) && |
949 | 0 | secondtp->t_rxtshift == 0) { |
950 | 0 | return mptcp_return_subflow(second_best); |
951 | 0 | } |
952 | | |
953 | | /* Compare RTOs, select second_best if best's rto exceeds rtothresh */ |
954 | 0 | if (besttp->t_rxtcur && secondtp->t_rxtcur && |
955 | 0 | besttp->t_rxtcur >= rto_thresh && |
956 | 0 | secondtp->t_rxtcur < rto_thresh) { |
957 | 0 | tcpstat.tcps_mp_sel_rto++; |
958 | 0 | mptcplog((LOG_DEBUG, "%s: best cid %d at rto %d, second cid %d at rto %d\n", __func__, |
959 | 0 | best->mpts_connid, besttp->t_rxtcur, |
960 | 0 | second_best->mpts_connid, secondtp->t_rxtcur), |
961 | 0 | MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); |
962 | |
|
963 | 0 | return mptcp_return_subflow(second_best); |
964 | 0 | } |
965 | | |
966 | | /* |
967 | | * None of the above conditions for sending on the secondary |
968 | | * were true. So, let's schedule on the best one, if he still |
969 | | * has some space in the congestion-window. |
970 | | */ |
971 | 0 | return mptcp_return_subflow(best); |
972 | 0 | } else if (mpte->mpte_svctype >= MPTCP_SVCTYPE_AGGREGATE) { |
973 | 0 | struct mptsub *tmp; |
974 | | |
975 | | /* |
976 | | * We only care about RTT when aggregating |
977 | | */ |
978 | 0 | if (besttp->t_srtt > secondtp->t_srtt) { |
979 | 0 | tmp = best; |
980 | 0 | best = second_best; |
981 | 0 | besttp = secondtp; |
982 | 0 | bestinp = secondinp; |
983 | |
|
984 | 0 | second_best = tmp; |
985 | 0 | secondtp = sototcpcb(second_best->mpts_socket); |
986 | 0 | secondinp = sotoinpcb(second_best->mpts_socket); |
987 | 0 | } |
988 | | |
989 | | /* Is there still space in the congestion window? */ |
990 | 0 | if (mptcp_subflow_cwnd_space(bestinp->inp_socket) <= 0) { |
991 | 0 | return mptcp_return_subflow(second_best); |
992 | 0 | } |
993 | | |
994 | 0 | return mptcp_return_subflow(best); |
995 | 0 | } else { |
996 | 0 | panic("Unknown service-type configured for MPTCP"); |
997 | 0 | } |
998 | | |
999 | 0 | return NULL; |
1000 | 0 | } |
1001 | | |
1002 | | static const char * |
1003 | | mptcp_event_to_str(uint32_t event) |
1004 | 0 | { |
1005 | 0 | const char *c = "UNDEFINED"; |
1006 | 0 | switch (event) { |
1007 | 0 | case MPCE_CLOSE: |
1008 | 0 | c = "MPCE_CLOSE"; |
1009 | 0 | break; |
1010 | 0 | case MPCE_RECV_DATA_ACK: |
1011 | 0 | c = "MPCE_RECV_DATA_ACK"; |
1012 | 0 | break; |
1013 | 0 | case MPCE_RECV_DATA_FIN: |
1014 | 0 | c = "MPCE_RECV_DATA_FIN"; |
1015 | 0 | break; |
1016 | 0 | } |
1017 | 0 | return c; |
1018 | 0 | } |
1019 | | |
1020 | | static const char * |
1021 | | mptcp_state_to_str(mptcp_state_t state) |
1022 | 0 | { |
1023 | 0 | const char *c = "UNDEFINED"; |
1024 | 0 | switch (state) { |
1025 | 0 | case MPTCPS_CLOSED: |
1026 | 0 | c = "MPTCPS_CLOSED"; |
1027 | 0 | break; |
1028 | 0 | case MPTCPS_LISTEN: |
1029 | 0 | c = "MPTCPS_LISTEN"; |
1030 | 0 | break; |
1031 | 0 | case MPTCPS_ESTABLISHED: |
1032 | 0 | c = "MPTCPS_ESTABLISHED"; |
1033 | 0 | break; |
1034 | 0 | case MPTCPS_CLOSE_WAIT: |
1035 | 0 | c = "MPTCPS_CLOSE_WAIT"; |
1036 | 0 | break; |
1037 | 0 | case MPTCPS_FIN_WAIT_1: |
1038 | 0 | c = "MPTCPS_FIN_WAIT_1"; |
1039 | 0 | break; |
1040 | 0 | case MPTCPS_CLOSING: |
1041 | 0 | c = "MPTCPS_CLOSING"; |
1042 | 0 | break; |
1043 | 0 | case MPTCPS_LAST_ACK: |
1044 | 0 | c = "MPTCPS_LAST_ACK"; |
1045 | 0 | break; |
1046 | 0 | case MPTCPS_FIN_WAIT_2: |
1047 | 0 | c = "MPTCPS_FIN_WAIT_2"; |
1048 | 0 | break; |
1049 | 0 | case MPTCPS_TIME_WAIT: |
1050 | 0 | c = "MPTCPS_TIME_WAIT"; |
1051 | 0 | break; |
1052 | 0 | case MPTCPS_TERMINATE: |
1053 | 0 | c = "MPTCPS_TERMINATE"; |
1054 | 0 | break; |
1055 | 0 | } |
1056 | 0 | return c; |
1057 | 0 | } |
1058 | | |
1059 | | void |
1060 | | mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event) |
1061 | 0 | { |
1062 | 0 | struct socket *mp_so = mptetoso(mp_tp->mpt_mpte); |
1063 | |
|
1064 | 0 | socket_lock_assert_owned(mp_so); |
1065 | |
|
1066 | 0 | mptcp_state_t old_state = mp_tp->mpt_state; |
1067 | |
|
1068 | 0 | DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, |
1069 | 0 | uint32_t, event); |
1070 | |
|
1071 | 0 | switch (mp_tp->mpt_state) { |
1072 | 0 | case MPTCPS_CLOSED: |
1073 | 0 | case MPTCPS_LISTEN: |
1074 | 0 | mp_tp->mpt_state = MPTCPS_TERMINATE; |
1075 | 0 | break; |
1076 | | |
1077 | 0 | case MPTCPS_ESTABLISHED: |
1078 | 0 | if (event == MPCE_CLOSE) { |
1079 | 0 | mp_tp->mpt_state = MPTCPS_FIN_WAIT_1; |
1080 | 0 | mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */ |
1081 | 0 | } else if (event == MPCE_RECV_DATA_FIN) { |
1082 | 0 | mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */ |
1083 | 0 | mp_tp->mpt_state = MPTCPS_CLOSE_WAIT; |
1084 | 0 | } |
1085 | 0 | break; |
1086 | | |
1087 | 0 | case MPTCPS_CLOSE_WAIT: |
1088 | 0 | if (event == MPCE_CLOSE) { |
1089 | 0 | mp_tp->mpt_state = MPTCPS_LAST_ACK; |
1090 | 0 | mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */ |
1091 | 0 | } |
1092 | 0 | break; |
1093 | | |
1094 | 0 | case MPTCPS_FIN_WAIT_1: |
1095 | 0 | if (event == MPCE_RECV_DATA_ACK) { |
1096 | 0 | mp_tp->mpt_state = MPTCPS_FIN_WAIT_2; |
1097 | 0 | } else if (event == MPCE_RECV_DATA_FIN) { |
1098 | 0 | mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */ |
1099 | 0 | mp_tp->mpt_state = MPTCPS_CLOSING; |
1100 | 0 | } |
1101 | 0 | break; |
1102 | | |
1103 | 0 | case MPTCPS_CLOSING: |
1104 | 0 | if (event == MPCE_RECV_DATA_ACK) { |
1105 | 0 | mp_tp->mpt_state = MPTCPS_TIME_WAIT; |
1106 | 0 | } |
1107 | 0 | break; |
1108 | | |
1109 | 0 | case MPTCPS_LAST_ACK: |
1110 | 0 | if (event == MPCE_RECV_DATA_ACK) { |
1111 | 0 | mptcp_close(mp_tp->mpt_mpte, mp_tp); |
1112 | 0 | } |
1113 | 0 | break; |
1114 | | |
1115 | 0 | case MPTCPS_FIN_WAIT_2: |
1116 | 0 | if (event == MPCE_RECV_DATA_FIN) { |
1117 | 0 | mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */ |
1118 | 0 | mp_tp->mpt_state = MPTCPS_TIME_WAIT; |
1119 | 0 | } |
1120 | 0 | break; |
1121 | | |
1122 | 0 | case MPTCPS_TIME_WAIT: |
1123 | 0 | case MPTCPS_TERMINATE: |
1124 | 0 | break; |
1125 | | |
1126 | 0 | default: |
1127 | 0 | VERIFY(0); |
1128 | | /* NOTREACHED */ |
1129 | 0 | } |
1130 | 0 | DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, |
1131 | 0 | uint32_t, event); |
1132 | 0 | mptcplog((LOG_INFO, "%s: %s to %s on event %s\n", __func__, |
1133 | 0 | mptcp_state_to_str(old_state), |
1134 | 0 | mptcp_state_to_str(mp_tp->mpt_state), |
1135 | 0 | mptcp_event_to_str(event)), |
1136 | 0 | MPTCP_STATE_DBG, MPTCP_LOGLVL_LOG); |
1137 | 0 | } |
1138 | | |
1139 | | /* If you change this function, match up mptcp_update_rcv_state_f */ |
1140 | | void |
1141 | | mptcp_update_dss_rcv_state(struct mptcp_dsn_opt *dss_info, struct tcpcb *tp, |
1142 | | uint16_t csum) |
1143 | 0 | { |
1144 | 0 | struct mptcb *mp_tp = tptomptp(tp); |
1145 | 0 | u_int64_t full_dsn = 0; |
1146 | |
|
1147 | 0 | NTOHL(dss_info->mdss_dsn); |
1148 | 0 | NTOHL(dss_info->mdss_subflow_seqn); |
1149 | 0 | NTOHS(dss_info->mdss_data_len); |
1150 | | |
1151 | | /* XXX for autosndbuf grow sb here */ |
1152 | 0 | MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_info->mdss_dsn, full_dsn); |
1153 | 0 | mptcp_update_rcv_state_meat(mp_tp, tp, |
1154 | 0 | full_dsn, dss_info->mdss_subflow_seqn, dss_info->mdss_data_len, |
1155 | 0 | csum); |
1156 | 0 | } |
1157 | | |
1158 | | void |
1159 | | mptcp_update_rcv_state_meat(struct mptcb *mp_tp, struct tcpcb *tp, |
1160 | | u_int64_t full_dsn, u_int32_t seqn, u_int16_t mdss_data_len, |
1161 | | uint16_t csum) |
1162 | 0 | { |
1163 | 0 | if (mdss_data_len == 0) { |
1164 | 0 | os_log_error(mptcp_log_handle, "%s - %lx: Infinite Mapping.\n", |
1165 | 0 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte)); |
1166 | |
|
1167 | 0 | if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && (csum != 0)) { |
1168 | 0 | os_log_error(mptcp_log_handle, "%s - %lx: Bad checksum %x \n", |
1169 | 0 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte), csum); |
1170 | 0 | } |
1171 | 0 | mptcp_notify_mpfail(tp->t_inpcb->inp_socket); |
1172 | 0 | return; |
1173 | 0 | } |
1174 | | |
1175 | 0 | mptcp_notify_mpready(tp->t_inpcb->inp_socket); |
1176 | |
|
1177 | 0 | tp->t_rcv_map.mpt_dsn = full_dsn; |
1178 | 0 | tp->t_rcv_map.mpt_sseq = seqn; |
1179 | 0 | tp->t_rcv_map.mpt_len = mdss_data_len; |
1180 | 0 | tp->t_rcv_map.mpt_csum = csum; |
1181 | 0 | tp->t_mpflags |= TMPF_EMBED_DSN; |
1182 | 0 | } |
1183 | | |
1184 | | |
1185 | | static int |
1186 | | mptcp_validate_dss_map(struct socket *so, struct tcpcb *tp, struct mbuf *m, |
1187 | | int hdrlen) |
1188 | 0 | { |
1189 | 0 | u_int32_t datalen; |
1190 | |
|
1191 | 0 | if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) { |
1192 | 0 | return 0; |
1193 | 0 | } |
1194 | | |
1195 | 0 | datalen = m->m_pkthdr.mp_rlen; |
1196 | | |
1197 | | /* unacceptable DSS option, fallback to TCP */ |
1198 | 0 | if (m->m_pkthdr.len > ((int) datalen + hdrlen)) { |
1199 | 0 | os_log_error(mptcp_log_handle, "%s - %lx: mbuf len %d, MPTCP expected %d", |
1200 | 0 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(tptomptp(tp)->mpt_mpte), m->m_pkthdr.len, datalen); |
1201 | 0 | } else { |
1202 | 0 | return 0; |
1203 | 0 | } |
1204 | 0 | tp->t_mpflags |= TMPF_SND_MPFAIL; |
1205 | 0 | mptcp_notify_mpfail(so); |
1206 | 0 | m_freem(m); |
1207 | 0 | return -1; |
1208 | 0 | } |
1209 | | |
1210 | | int |
1211 | | mptcp_input_preproc(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, |
1212 | | int drop_hdrlen) |
1213 | 0 | { |
1214 | 0 | mptcp_insert_rmap(tp, m, th); |
1215 | 0 | if (mptcp_validate_dss_map(tp->t_inpcb->inp_socket, tp, m, |
1216 | 0 | drop_hdrlen) != 0) { |
1217 | 0 | return -1; |
1218 | 0 | } |
1219 | 0 | return 0; |
1220 | 0 | } |
1221 | | |
1222 | | static uint16_t |
1223 | | mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn, uint32_t sseq, |
1224 | | uint16_t dlen, uint16_t csum, int dfin) |
1225 | 0 | { |
1226 | 0 | struct mptcb *mp_tp = tptomptp(tp); |
1227 | 0 | int real_len = dlen - dfin; |
1228 | 0 | uint32_t sum = 0; |
1229 | |
|
1230 | 0 | VERIFY(real_len >= 0); |
1231 | | |
1232 | 0 | if (mp_tp == NULL) { |
1233 | 0 | return 0; |
1234 | 0 | } |
1235 | | |
1236 | 0 | if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) { |
1237 | 0 | return 0; |
1238 | 0 | } |
1239 | | |
1240 | 0 | if (tp->t_mpflags & TMPF_TCP_FALLBACK) { |
1241 | 0 | return 0; |
1242 | 0 | } |
1243 | | |
1244 | | /* |
1245 | | * The remote side may send a packet with fewer bytes than the |
1246 | | * claimed DSS checksum length. |
1247 | | */ |
1248 | 0 | if ((int)m_length2(m, NULL) < real_len) { |
1249 | 0 | return 0xffff; |
1250 | 0 | } |
1251 | | |
1252 | 0 | if (real_len != 0) { |
1253 | 0 | sum = m_sum16(m, 0, real_len); |
1254 | 0 | } |
1255 | |
|
1256 | 0 | sum += in_pseudo64(htonll(dsn), htonl(sseq), htons(dlen) + csum); |
1257 | 0 | ADDCARRY(sum); |
1258 | |
|
1259 | 0 | DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m, |
1260 | 0 | uint32_t, sum); |
1261 | |
|
1262 | 0 | return ~sum & 0xffff; |
1263 | 0 | } |
1264 | | |
1265 | | /* |
1266 | | * MPTCP Checksum support |
1267 | | * The checksum is calculated whenever the MPTCP DSS option is included |
1268 | | * in the TCP packet. The checksum includes the sum of the MPTCP psuedo |
1269 | | * header and the actual data indicated by the length specified in the |
1270 | | * DSS option. |
1271 | | */ |
1272 | | |
1273 | | int |
1274 | | mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn, |
1275 | | uint32_t sseq, uint16_t dlen, uint16_t csum, int dfin) |
1276 | 0 | { |
1277 | 0 | uint16_t mptcp_csum; |
1278 | |
|
1279 | 0 | mptcp_csum = mptcp_input_csum(tp, m, dsn, sseq, dlen, csum, dfin); |
1280 | 0 | if (mptcp_csum) { |
1281 | 0 | tp->t_mpflags |= TMPF_SND_MPFAIL; |
1282 | 0 | mptcp_notify_mpfail(tp->t_inpcb->inp_socket); |
1283 | 0 | m_freem(m); |
1284 | 0 | tcpstat.tcps_mp_badcsum++; |
1285 | 0 | return -1; |
1286 | 0 | } |
1287 | 0 | return 0; |
1288 | 0 | } |
1289 | | |
1290 | | uint16_t |
1291 | | mptcp_output_csum(struct mbuf *m, uint64_t dss_val, uint32_t sseq, uint16_t dlen) |
1292 | 0 | { |
1293 | 0 | uint32_t sum = 0; |
1294 | |
|
1295 | 0 | if (dlen) { |
1296 | 0 | sum = m_sum16(m, 0, dlen); |
1297 | 0 | } |
1298 | |
|
1299 | 0 | dss_val = mptcp_hton64(dss_val); |
1300 | 0 | sseq = htonl(sseq); |
1301 | 0 | dlen = htons(dlen); |
1302 | 0 | sum += in_pseudo64(dss_val, sseq, dlen); |
1303 | |
|
1304 | 0 | ADDCARRY(sum); |
1305 | 0 | sum = ~sum & 0xffff; |
1306 | 0 | DTRACE_MPTCP2(checksum__result, struct mbuf *, m, uint32_t, sum); |
1307 | 0 | mptcplog((LOG_DEBUG, "%s: sum = %x \n", __func__, sum), |
1308 | 0 | MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); |
1309 | |
|
1310 | 0 | return (uint16_t)sum; |
1311 | 0 | } |
1312 | | |
1313 | | /* |
1314 | | * When WiFi signal starts fading, there's more loss and RTT spikes. |
1315 | | * Check if there has been a large spike by comparing against |
1316 | | * a tolerable RTT spike threshold. |
1317 | | */ |
1318 | | boolean_t |
1319 | | mptcp_no_rto_spike(struct socket *so) |
1320 | 0 | { |
1321 | 0 | struct tcpcb *tp = intotcpcb(sotoinpcb(so)); |
1322 | 0 | int32_t spike = 0; |
1323 | |
|
1324 | 0 | if (tp->t_rxtcur > mptcp_rtothresh) { |
1325 | 0 | spike = tp->t_rxtcur - mptcp_rtothresh; |
1326 | |
|
1327 | 0 | mptcplog((LOG_DEBUG, "%s: spike = %d rto = %d best = %d cur = %d\n", |
1328 | 0 | __func__, spike, |
1329 | 0 | tp->t_rxtcur, tp->t_rttbest >> TCP_RTT_SHIFT, |
1330 | 0 | tp->t_rttcur), |
1331 | 0 | (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG); |
1332 | 0 | } |
1333 | |
|
1334 | 0 | if (spike > 0) { |
1335 | 0 | return FALSE; |
1336 | 0 | } else { |
1337 | 0 | return TRUE; |
1338 | 0 | } |
1339 | 0 | } |
1340 | | |
1341 | | void |
1342 | | mptcp_handle_deferred_upcalls(struct mppcb *mpp, uint32_t flag) |
1343 | 0 | { |
1344 | 0 | VERIFY(mpp->mpp_flags & flag); |
1345 | 0 | mpp->mpp_flags &= ~flag; |
1346 | |
|
1347 | 0 | if (mptcp_should_defer_upcall(mpp)) { |
1348 | 0 | return; |
1349 | 0 | } |
1350 | | |
1351 | 0 | if (mpp->mpp_flags & MPP_SHOULD_WORKLOOP) { |
1352 | 0 | mpp->mpp_flags &= ~MPP_SHOULD_WORKLOOP; |
1353 | |
|
1354 | 0 | mptcp_subflow_workloop(mpp->mpp_pcbe); |
1355 | 0 | } |
1356 | |
|
1357 | 0 | if (mpp->mpp_flags & MPP_SHOULD_RWAKEUP) { |
1358 | 0 | mpp->mpp_flags &= ~MPP_SHOULD_RWAKEUP; |
1359 | |
|
1360 | 0 | sorwakeup(mpp->mpp_socket); |
1361 | 0 | } |
1362 | |
|
1363 | 0 | if (mpp->mpp_flags & MPP_SHOULD_WWAKEUP) { |
1364 | 0 | mpp->mpp_flags &= ~MPP_SHOULD_WWAKEUP; |
1365 | |
|
1366 | 0 | sowwakeup(mpp->mpp_socket); |
1367 | 0 | } |
1368 | 0 | } |
1369 | | |
1370 | | static void |
1371 | | mptcp_reset_itfinfo(struct mpt_itf_info *info) |
1372 | 0 | { |
1373 | 0 | memset(info, 0, sizeof(*info)); |
1374 | 0 | } |
1375 | | |
1376 | | void |
1377 | | mptcp_session_necp_cb(void *handle, int action, uint32_t interface_index, |
1378 | | uint32_t necp_flags, __unused bool *viable) |
1379 | 0 | { |
1380 | 0 | boolean_t has_v4 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV4); |
1381 | 0 | boolean_t has_v6 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV6); |
1382 | 0 | boolean_t has_nat64 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_NAT64); |
1383 | 0 | boolean_t low_power = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_INTERFACE_LOW_POWER); |
1384 | 0 | struct mppcb *mp = (struct mppcb *)handle; |
1385 | 0 | struct mptses *mpte = mptompte(mp); |
1386 | 0 | struct socket *mp_so; |
1387 | 0 | struct mptcb *mp_tp; |
1388 | 0 | uint32_t i, ifindex; |
1389 | 0 | struct ifnet *ifp; |
1390 | 0 | int locked = 0; |
1391 | |
|
1392 | 0 | ifindex = interface_index; |
1393 | 0 | VERIFY(ifindex != IFSCOPE_NONE); |
1394 | | |
1395 | | /* About to be garbage-collected (see note about MPTCP/NECP interactions) */ |
1396 | 0 | if (mp->mpp_socket->so_usecount == 0) { |
1397 | 0 | return; |
1398 | 0 | } |
1399 | | |
1400 | 0 | mp_so = mptetoso(mpte); |
1401 | |
|
1402 | 0 | if (action != NECP_CLIENT_CBACTION_INITIAL) { |
1403 | 0 | socket_lock(mp_so, 1); |
1404 | 0 | locked = 1; |
1405 | | |
1406 | | /* Check again, because it might have changed while waiting */ |
1407 | 0 | if (mp->mpp_socket->so_usecount == 0) { |
1408 | 0 | goto out; |
1409 | 0 | } |
1410 | 0 | } |
1411 | | |
1412 | 0 | socket_lock_assert_owned(mp_so); |
1413 | |
|
1414 | 0 | mp_tp = mpte->mpte_mptcb; |
1415 | |
|
1416 | 0 | ifnet_head_lock_shared(); |
1417 | 0 | ifp = ifindex2ifnet[ifindex]; |
1418 | 0 | ifnet_head_done(); |
1419 | |
|
1420 | 0 | os_log(mptcp_log_handle, "%s - %lx: action: %u ifindex %u delegated to %u usecount %u mpt_flags %#x state %u v4 %u v6 %u nat64 %u power %u\n", |
1421 | 0 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), action, ifindex, |
1422 | 0 | ifp && ifp->if_delegated.ifp ? ifp->if_delegated.ifp->if_index : IFSCOPE_NONE, |
1423 | 0 | mp->mpp_socket->so_usecount, mp_tp->mpt_flags, mp_tp->mpt_state, |
1424 | 0 | has_v4, has_v6, has_nat64, low_power); |
1425 | | |
1426 | | /* No need on fallen back sockets */ |
1427 | 0 | if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) { |
1428 | 0 | goto out; |
1429 | 0 | } |
1430 | | |
1431 | | /* |
1432 | | * When the interface goes in low-power mode we don't want to establish |
1433 | | * new subflows on it. Thus, mark it internally as non-viable. |
1434 | | */ |
1435 | 0 | if (low_power) { |
1436 | 0 | action = NECP_CLIENT_CBACTION_NONVIABLE; |
1437 | 0 | } |
1438 | |
|
1439 | 0 | if (action == NECP_CLIENT_CBACTION_NONVIABLE) { |
1440 | 0 | for (i = 0; i < mpte->mpte_itfinfo_size; i++) { |
1441 | 0 | if (mpte->mpte_itfinfo[i].ifindex == IFSCOPE_NONE) { |
1442 | 0 | continue; |
1443 | 0 | } |
1444 | | |
1445 | 0 | if (mpte->mpte_itfinfo[i].ifindex == ifindex) { |
1446 | 0 | mptcp_reset_itfinfo(&mpte->mpte_itfinfo[i]); |
1447 | 0 | } |
1448 | 0 | } |
1449 | |
|
1450 | 0 | mptcp_sched_create_subflows(mpte); |
1451 | 0 | } else if (action == NECP_CLIENT_CBACTION_VIABLE || |
1452 | 0 | action == NECP_CLIENT_CBACTION_INITIAL) { |
1453 | 0 | int found_slot = 0, slot_index = -1; |
1454 | 0 | struct sockaddr *dst; |
1455 | |
|
1456 | 0 | if (ifp == NULL) { |
1457 | 0 | goto out; |
1458 | 0 | } |
1459 | | |
1460 | 0 | if (IFNET_IS_COMPANION_LINK(ifp)) { |
1461 | 0 | goto out; |
1462 | 0 | } |
1463 | | |
1464 | 0 | if (IFNET_IS_EXPENSIVE(ifp) && |
1465 | 0 | (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) { |
1466 | 0 | goto out; |
1467 | 0 | } |
1468 | | |
1469 | 0 | if (IFNET_IS_CONSTRAINED(ifp) && |
1470 | 0 | (mp_so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) { |
1471 | 0 | goto out; |
1472 | 0 | } |
1473 | | |
1474 | 0 | if (IFNET_IS_CELLULAR(ifp) && |
1475 | 0 | (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) { |
1476 | 0 | goto out; |
1477 | 0 | } |
1478 | | |
1479 | 0 | if (IS_INTF_CLAT46(ifp)) { |
1480 | 0 | has_v4 = FALSE; |
1481 | 0 | } |
1482 | | |
1483 | | /* Look for the slot on where to store/update the interface-info. */ |
1484 | 0 | for (i = 0; i < mpte->mpte_itfinfo_size; i++) { |
1485 | | /* Found a potential empty slot where we can put it */ |
1486 | 0 | if (mpte->mpte_itfinfo[i].ifindex == 0) { |
1487 | 0 | found_slot = 1; |
1488 | 0 | slot_index = i; |
1489 | 0 | } |
1490 | | |
1491 | | /* |
1492 | | * The interface is already in our array. Check if we |
1493 | | * need to update it. |
1494 | | */ |
1495 | 0 | if (mpte->mpte_itfinfo[i].ifindex == ifindex && |
1496 | 0 | (mpte->mpte_itfinfo[i].has_v4_conn != has_v4 || |
1497 | 0 | mpte->mpte_itfinfo[i].has_v6_conn != has_v6 || |
1498 | 0 | mpte->mpte_itfinfo[i].has_nat64_conn != has_nat64)) { |
1499 | 0 | found_slot = 1; |
1500 | 0 | slot_index = i; |
1501 | 0 | break; |
1502 | 0 | } |
1503 | | |
1504 | 0 | if (mpte->mpte_itfinfo[i].ifindex == ifindex) { |
1505 | | /* |
1506 | | * Ok, it's already there and we don't need |
1507 | | * to update it |
1508 | | */ |
1509 | 0 | goto out; |
1510 | 0 | } |
1511 | 0 | } |
1512 | | |
1513 | 0 | dst = mptcp_get_session_dst(mpte, has_v6, has_v4); |
1514 | 0 | if (dst && dst->sa_family == AF_INET && |
1515 | 0 | has_v6 && !has_nat64 && !has_v4) { |
1516 | 0 | if (found_slot) { |
1517 | 0 | mpte->mpte_itfinfo[slot_index].ifindex = ifindex; |
1518 | 0 | mpte->mpte_itfinfo[slot_index].has_v4_conn = has_v4; |
1519 | 0 | mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6; |
1520 | 0 | mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64; |
1521 | 0 | } |
1522 | 0 | goto out; |
1523 | 0 | } |
1524 | | |
1525 | 0 | if (found_slot == 0) { |
1526 | 0 | int new_size = mpte->mpte_itfinfo_size * 2; |
1527 | 0 | struct mpt_itf_info *info = _MALLOC(sizeof(*info) * new_size, M_TEMP, M_ZERO); |
1528 | |
|
1529 | 0 | if (info == NULL) { |
1530 | 0 | os_log_error(mptcp_log_handle, "%s - %lx: malloc failed for %u\n", |
1531 | 0 | __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), new_size); |
1532 | 0 | goto out; |
1533 | 0 | } |
1534 | | |
1535 | 0 | memcpy(info, mpte->mpte_itfinfo, mpte->mpte_itfinfo_size * sizeof(*info)); |
1536 | |
|
1537 | 0 | if (mpte->mpte_itfinfo_size > MPTE_ITFINFO_SIZE) { |
1538 | 0 | _FREE(mpte->mpte_itfinfo, M_TEMP); |
1539 | 0 | } |
1540 | | |
1541 | | /* We allocated a new one, thus the first must be empty */ |
1542 | 0 | slot_index = mpte->mpte_itfinfo_size; |
1543 | |
|
1544 | 0 | mpte->mpte_itfinfo = info; |
1545 | 0 | mpte->mpte_itfinfo_size = new_size; |
1546 | 0 | } |
1547 | | |
1548 | 0 | VERIFY(slot_index >= 0 && slot_index < (int)mpte->mpte_itfinfo_size); |
1549 | 0 | mpte->mpte_itfinfo[slot_index].ifindex = ifindex; |
1550 | 0 | mpte->mpte_itfinfo[slot_index].has_v4_conn = has_v4; |
1551 | 0 | mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6; |
1552 | 0 | mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64; |
1553 | |
|
1554 | 0 | mptcp_sched_create_subflows(mpte); |
1555 | 0 | } |
1556 | | |
1557 | 0 | out: |
1558 | 0 | if (locked) { |
1559 | 0 | socket_unlock(mp_so, 1); |
1560 | 0 | } |
1561 | 0 | } |
1562 | | |
1563 | | void |
1564 | | mptcp_set_restrictions(struct socket *mp_so) |
1565 | 0 | { |
1566 | 0 | struct mptses *mpte = mpsotompte(mp_so); |
1567 | 0 | uint32_t i; |
1568 | |
|
1569 | 0 | socket_lock_assert_owned(mp_so); |
1570 | |
|
1571 | 0 | ifnet_head_lock_shared(); |
1572 | |
|
1573 | 0 | for (i = 0; i < mpte->mpte_itfinfo_size; i++) { |
1574 | 0 | struct mpt_itf_info *info = &mpte->mpte_itfinfo[i]; |
1575 | 0 | uint32_t ifindex = info->ifindex; |
1576 | 0 | struct ifnet *ifp; |
1577 | |
|
1578 | 0 | if (ifindex == IFSCOPE_NONE) { |
1579 | 0 | continue; |
1580 | 0 | } |
1581 | | |
1582 | 0 | ifp = ifindex2ifnet[ifindex]; |
1583 | 0 | if (ifp == NULL) { |
1584 | 0 | continue; |
1585 | 0 | } |
1586 | | |
1587 | 0 | if (IFNET_IS_EXPENSIVE(ifp) && |
1588 | 0 | (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) { |
1589 | 0 | info->ifindex = IFSCOPE_NONE; |
1590 | 0 | } |
1591 | |
|
1592 | 0 | if (IFNET_IS_CONSTRAINED(ifp) && |
1593 | 0 | (mp_so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) { |
1594 | 0 | info->ifindex = IFSCOPE_NONE; |
1595 | 0 | } |
1596 | |
|
1597 | 0 | if (IFNET_IS_CELLULAR(ifp) && |
1598 | 0 | (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) { |
1599 | 0 | info->ifindex = IFSCOPE_NONE; |
1600 | 0 | } |
1601 | 0 | } |
1602 | |
|
1603 | 0 | ifnet_head_done(); |
1604 | 0 | } |