/src/openthread/third_party/tcplp/bsdtcp/tcp_timewait.c
Line | Count | Source |
1 | | /*- |
2 | | * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 |
3 | | * The Regents of the University of California. All rights reserved. |
4 | | * |
5 | | * Redistribution and use in source and binary forms, with or without |
6 | | * modification, are permitted provided that the following conditions |
7 | | * are met: |
8 | | * 1. Redistributions of source code must retain the above copyright |
9 | | * notice, this list of conditions and the following disclaimer. |
10 | | * 2. Redistributions in binary form must reproduce the above copyright |
11 | | * notice, this list of conditions and the following disclaimer in the |
12 | | * documentation and/or other materials provided with the distribution. |
13 | | * 4. Neither the name of the University nor the names of its contributors |
14 | | * may be used to endorse or promote products derived from this software |
15 | | * without specific prior written permission. |
16 | | * |
17 | | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
18 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
19 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
20 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
21 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
22 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
23 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
24 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
25 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
26 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
27 | | * SUCH DAMAGE. |
28 | | * |
29 | | * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 |
30 | | */ |
31 | | |
32 | | #include <string.h> |
33 | | |
34 | | #include "tcp.h" |
35 | | #include "tcp_fsm.h" |
36 | | #include "tcp_seq.h" |
37 | | #include "tcp_timer.h" |
38 | | #include "tcp_var.h" |
39 | | |
40 | | #include "tcp_const.h" |
41 | | #include <openthread/ip6.h> |
42 | | #include <openthread/message.h> |
43 | | |
44 | | /* |
45 | | * samkumar: The V_nolocaltimewait variable corresponds to the |
46 | | * net.inet.tcp.nolocaltimewait option in FreeBSD. When set to 1, it skips the |
47 | | * TIME-WAIT state for TCP connections where both endpoints are local IP |
48 | | * addresses, to save resources on HTTP accelerators, database servers/clients, |
49 | | * etc. In TCPlp, I eliminated support for this feature, but I have kept the |
50 | | * code for it, commented out with "#if 0", in case we choose to bring it back |
51 | | * at a later time. |
52 | | * |
53 | | * See also the "#if 0" block in tcp_twstart. |
54 | | */ |
55 | | #if 0 |
56 | | enum tcp_timewait_consts { |
57 | | V_nolocaltimewait = 0 |
58 | | }; |
59 | | #endif |
60 | | |
61 | | /* |
62 | | * samkumar: The FreeBSD code used a separate, smaller structure, called |
63 | | * struct tcptw, to respresent connections in the TIME-WAIT state. In TCPlp, |
64 | | * we use the full struct tcpcb structure even in the TIME-WAIT state. This |
65 | | * consumes more memory, but switching to a different structure like |
66 | | * struct tcptw to save memory would be difficult because the host system or |
67 | | * application has allocated these structures; we can't simply "free" the |
68 | | * struct tcpcb. It would have to have been done via a callback or something, |
69 | | * and in the common case of statically allocated sockets, this would actually |
70 | | * result in more memory (since an application would need to allocate both the |
71 | | * struct tcpcb and the struct tcptw, if it uses a static allocation approach). |
72 | | * |
73 | | * Below, I've changed the function signatures to accept "struct tcpcb* tp" |
74 | | * instead of "struct tcptw *tw" and I have reimplemented the functions |
75 | | * to work using tp (of type struct tcpcb) instead of tw (of type |
76 | | * struct tcptw). |
77 | | * |
78 | | * Conceptually, the biggest change is in how timers are handled. The FreeBSD |
79 | | * code had a 2MSL timer, which was set for sockets that enter certain |
80 | | * "closing" states of the TCP state machine. But when the TIME-WAIT state was |
81 | | * entered, the state is transferred from struct tcpcb into struct tcptw. |
82 | | * The final timeout is handled as follows; the function tcp_tw_2msl_scan is |
83 | | * called periodically on the slow timer, and it iterates over a linked list |
84 | | * of all the struct tcptw and checks the tw->tw_time field to identify which |
85 | | * TIME-WAIT sockets have expired. |
86 | | * |
87 | | * In our switch to using struct tcpcb even in the TIME-WAIT state, we rely on |
88 | | * the timer system for struct tcpcb. I modified the 2msl callback in |
89 | | * tcp_timer.c to check for the TIME-WAIT case and handle it correctly. |
90 | | */ |
91 | | |
92 | | static void |
93 | | tcp_tw_2msl_reset(struct tcpcb* tp, int rearm) |
94 | 0 | { |
95 | | /* |
96 | | * samkumar: This function used to set tw->tw_time to ticks + 2 * tcp_msl |
97 | | * and insert tw into the linked list V_twq_2msl. I've replaced this, along |
98 | | * with the associated locking logic, with the following call, which uses |
99 | | * the timer system in place for full TCBs. |
100 | | */ |
101 | 0 | tcp_timer_activate(tp, TT_2MSL, 2 * tcp_msl); |
102 | 0 | } |
103 | | |
104 | | /* |
105 | | * samkumar: I've rewritten this code since I need to send out packets via the |
106 | | * host system for TCPlp: allocating buffers from the host system, populate |
107 | | * them, and then pass them back to the host system. I simplified the code by |
108 | | * only using the logic that was fully necessary, eliminating the code for IPv4 |
109 | | * packets and keeping only the code for IPv6 packets. I also removed all of |
110 | | * the mbuf logic, instead using the logic for using the host system's |
111 | | * buffering. |
112 | | * |
113 | | * This rewritten code always returns 0. The original code would return |
114 | | * whatever is returned by ip_output or ip6_output (FreeBSD's functions for |
115 | | * sending out IP packets). I believe 0 indicates success, and a nonzero |
116 | | * value represents an error code. It seems that the return value of |
117 | | * tcp_twrespond is ignored by all instances of its use in TCPlp (maybe even |
118 | | * in all of FreeBSD), so this is a moot point. |
119 | | */ |
120 | | static int |
121 | | tcp_twrespond(struct tcpcb* tp, int flags) |
122 | 0 | { |
123 | 0 | struct tcphdr* nth; |
124 | 0 | struct tcpopt to; |
125 | 0 | uint32_t optlen = 0; |
126 | 0 | uint8_t opt[TCP_MAXOLEN]; |
127 | |
|
128 | 0 | to.to_flags = 0; |
129 | | |
130 | | /* |
131 | | * Send a timestamp and echo-reply if both our side and our peer |
132 | | * have sent timestamps in our SYN's and this is not a RST. |
133 | | */ |
134 | 0 | if ((tp->t_flags & TF_RCVD_TSTMP) && flags == TH_ACK) { |
135 | 0 | to.to_flags |= TOF_TS; |
136 | 0 | to.to_tsval = tcp_ts_getticks() + tp->ts_offset; |
137 | 0 | to.to_tsecr = tp->ts_recent; |
138 | 0 | } |
139 | 0 | optlen = tcp_addoptions(&to, opt); |
140 | |
|
141 | 0 | otMessage* message = tcplp_sys_new_message(tp->instance); |
142 | 0 | if (message == NULL) { |
143 | 0 | return 0; // drop the message |
144 | 0 | } |
145 | 0 | if (otMessageSetLength(message, sizeof(struct tcphdr) + optlen) != OT_ERROR_NONE) { |
146 | 0 | tcplp_sys_free_message(tp->instance, message); |
147 | 0 | return 0; // drop the message |
148 | 0 | } |
149 | | |
150 | 0 | char outbuf[sizeof(struct tcphdr) + optlen]; |
151 | 0 | nth = (struct tcphdr*) &outbuf[0]; |
152 | 0 | otMessageInfo ip6info; |
153 | 0 | memset(&ip6info, 0x00, sizeof(ip6info)); |
154 | |
|
155 | 0 | memcpy(&ip6info.mSockAddr, &tp->laddr, sizeof(ip6info.mSockAddr)); |
156 | 0 | memcpy(&ip6info.mPeerAddr, &tp->faddr, sizeof(ip6info.mPeerAddr)); |
157 | 0 | nth->th_sport = tp->lport; |
158 | 0 | nth->th_dport = tp->fport; |
159 | 0 | nth->th_seq = htonl(tp->snd_nxt); |
160 | 0 | nth->th_ack = htonl(tp->rcv_nxt); |
161 | 0 | nth->th_off_x2 = ((sizeof(struct tcphdr) + optlen) >> 2) << TH_OFF_SHIFT; |
162 | 0 | nth->th_flags = flags; |
163 | 0 | nth->th_win = htons(tp->tw_last_win); |
164 | 0 | nth->th_urp = 0; |
165 | 0 | nth->th_sum = 0; |
166 | |
|
167 | 0 | memcpy(nth + 1, opt, optlen); |
168 | 0 | otMessageWrite(message, 0, outbuf, sizeof(struct tcphdr) + optlen); |
169 | 0 | tcplp_sys_send_message(tp->instance, message, &ip6info); |
170 | |
|
171 | 0 | return 0; |
172 | 0 | } |
173 | | |
174 | | /* |
175 | | * Move a TCP connection into TIME_WAIT state. |
176 | | * tcbinfo is locked. |
177 | | * inp is locked, and is unlocked before returning. |
178 | | */ |
179 | | /* |
180 | | * samkumar: Locking is removed (so above comments regarding locks are no |
181 | | * not relevant for TCPlp). Rather than allocating a struct tcptw and |
182 | | * discarding the struct tcpcb, this function just switches the tcpcb state |
183 | | * to correspond to TIME-WAIT (updating variables as appropriate). We also |
184 | | * eliminate the "V_nolocaltimewait" optimization. |
185 | | */ |
186 | | void |
187 | | tcp_twstart(struct tcpcb *tp) |
188 | 0 | { |
189 | 0 | int acknow; |
190 | | |
191 | | /* |
192 | | * samkumar: The following code, commented out using "#if 0", handles the |
193 | | * net.inet.tcp.nolocaltimewait option in FreeBSD. The option skips the |
194 | | * TIME-WAIT state for TCP connections where both endpoints are local. |
195 | | * I'm removing this optimization for TCPlp, but I've left the code |
196 | | * commented out as it's a potentially useful feature that we may choose |
197 | | * to restore later. |
198 | | * |
199 | | * See also the "#if 0" block near the top of this file. |
200 | | */ |
201 | | #if 0 |
202 | | if (V_nolocaltimewait) { |
203 | | int error = 0; |
204 | | #ifdef INET6 |
205 | | if (isipv6) |
206 | | error = in6_localaddr(&inp->in6p_faddr); |
207 | | #endif |
208 | | #if defined(INET6) && defined(INET) |
209 | | else |
210 | | #endif |
211 | | #ifdef INET |
212 | | error = in_localip(inp->inp_faddr); |
213 | | #endif |
214 | | if (error) { |
215 | | tp = tcp_close_tcb(tp); |
216 | | if (tp != NULL) |
217 | | INP_WUNLOCK(inp); |
218 | | return; |
219 | | } |
220 | | } |
221 | | #endif |
222 | | |
223 | | /* |
224 | | * For use only by DTrace. We do not reference the state |
225 | | * after this point so modifying it in place is not a problem. |
226 | | */ |
227 | | /* |
228 | | * samkumar: The above comment is not true anymore. I use this state, since |
229 | | * I don't associate every struct tcpcb with a struct inpcb. |
230 | | */ |
231 | 0 | tcp_state_change(tp, TCPS_TIME_WAIT); |
232 | | |
233 | | /* |
234 | | * samkumar: There used to be code here to allocate a struct tcptw |
235 | | * using "tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);" and if it fails, close |
236 | | * an existing TIME-WAIT connection, in LRU fashion, to allocate memory. |
237 | | */ |
238 | | |
239 | | /* |
240 | | * Recover last window size sent. |
241 | | */ |
242 | 0 | if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) |
243 | 0 | tp->tw_last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale; |
244 | 0 | else |
245 | 0 | tp->tw_last_win = 0; |
246 | | |
247 | | /* |
248 | | * Set t_recent if timestamps are used on the connection. |
249 | | */ |
250 | 0 | if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) == |
251 | 0 | (TF_REQ_TSTMP|TF_RCVD_TSTMP)) { |
252 | | /* |
253 | | * samkumar: This used to do: |
254 | | * tw->t_recent = tp->ts_recent; |
255 | | * tw->ts_offset = tp->ts_offset; |
256 | | * But since we're keeping the state in tp, we don't need to do this |
257 | | * anymore. */ |
258 | 0 | } else { |
259 | 0 | tp->ts_recent = 0; |
260 | 0 | tp->ts_offset = 0; |
261 | 0 | } |
262 | | |
263 | | /* |
264 | | * samkumar: There used to be code here to populate various fields in |
265 | | * tw based on their values in tp, but there's no need for that now since |
266 | | * we can just read the values from tp. tw->tw_time was set to 0, but we |
267 | | * don't need to do that either since we're relying on the old timer system |
268 | | * anyway. |
269 | | */ |
270 | | |
271 | | /* XXX |
272 | | * If this code will |
273 | | * be used for fin-wait-2 state also, then we may need |
274 | | * a ts_recent from the last segment. |
275 | | */ |
276 | 0 | acknow = tp->t_flags & TF_ACKNOW; |
277 | | |
278 | | /* |
279 | | * First, discard tcpcb state, which includes stopping its timers and |
280 | | * freeing it. tcp_discardcb() used to also release the inpcb, but |
281 | | * that work is now done in the caller. |
282 | | * |
283 | | * Note: soisdisconnected() call used to be made in tcp_discardcb(), |
284 | | * and might not be needed here any longer. |
285 | | */ |
286 | | /* |
287 | | * samkumar: Below, I removed the code to discard tp, update inpcb and |
288 | | * release a reference to socket, but kept the rest. I also added a call |
289 | | * to cancel any pending timers on the TCB (which discarding it, as the |
290 | | * original code did, would have done). |
291 | | */ |
292 | 0 | tcp_cancel_timers(tp); |
293 | 0 | if (acknow) |
294 | 0 | tcp_twrespond(tp, TH_ACK); |
295 | 0 | tcp_tw_2msl_reset(tp, 0); |
296 | 0 | } |
297 | | |
298 | | /* |
299 | | * Returns 1 if the TIME_WAIT state was killed and we should start over, |
300 | | * looking for a pcb in the listen state. Returns 0 otherwise. |
301 | | */ |
302 | | /* |
303 | | * samkumar: Old signature was |
304 | | * int |
305 | | * tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th, |
306 | | * struct mbuf *m, int tlen) |
307 | | */ |
308 | | int |
309 | | tcp_twcheck(struct tcpcb* tp, struct tcphdr *th, int tlen) |
310 | 0 | { |
311 | 0 | int thflags; |
312 | 0 | tcp_seq seq; |
313 | | |
314 | | /* |
315 | | * samkumar: There used to be code here that obtains the struct tcptw from |
316 | | * the inpcb, and does "goto drop" if that fails. |
317 | | */ |
318 | |
|
319 | 0 | thflags = th->th_flags; |
320 | | |
321 | | /* |
322 | | * NOTE: for FIN_WAIT_2 (to be added later), |
323 | | * must validate sequence number before accepting RST |
324 | | */ |
325 | | |
326 | | /* |
327 | | * If the segment contains RST: |
328 | | * Drop the segment - see Stevens, vol. 2, p. 964 and |
329 | | * RFC 1337. |
330 | | */ |
331 | 0 | if (thflags & TH_RST) |
332 | 0 | goto drop; |
333 | | |
334 | | /* |
335 | | * samkumar: This was commented out (using #if 0) in the original FreeBSD |
336 | | * code. |
337 | | */ |
338 | | #if 0 |
339 | | /* PAWS not needed at the moment */ |
340 | | /* |
341 | | * RFC 1323 PAWS: If we have a timestamp reply on this segment |
342 | | * and it's less than ts_recent, drop it. |
343 | | */ |
344 | | if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent && |
345 | | TSTMP_LT(to.to_tsval, tp->ts_recent)) { |
346 | | if ((thflags & TH_ACK) == 0) |
347 | | goto drop; |
348 | | goto ack; |
349 | | } |
350 | | /* |
351 | | * ts_recent is never updated because we never accept new segments. |
352 | | */ |
353 | | #endif |
354 | | |
355 | | /* |
356 | | * If a new connection request is received |
357 | | * while in TIME_WAIT, drop the old connection |
358 | | * and start over if the sequence numbers |
359 | | * are above the previous ones. |
360 | | */ |
361 | 0 | if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tp->rcv_nxt)) { |
362 | | /* |
363 | | * samkumar: The FreeBSD code would call tcp_twclose(tw, 0); but we |
364 | | * do it as below since TCPlp represents TIME-WAIT connects as |
365 | | * struct tcpcb's. |
366 | | */ |
367 | 0 | tcp_close_tcb(tp); |
368 | 0 | tcplp_sys_connection_lost(tp, CONN_LOST_NORMAL); |
369 | 0 | return (1); |
370 | 0 | } |
371 | | |
372 | | /* |
373 | | * Drop the segment if it does not contain an ACK. |
374 | | */ |
375 | 0 | if ((thflags & TH_ACK) == 0) |
376 | 0 | goto drop; |
377 | | |
378 | | /* |
379 | | * Reset the 2MSL timer if this is a duplicate FIN. |
380 | | */ |
381 | 0 | if (thflags & TH_FIN) { |
382 | 0 | seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0); |
383 | 0 | if (seq + 1 == tp->rcv_nxt) |
384 | 0 | tcp_tw_2msl_reset(tp, 1); |
385 | 0 | } |
386 | | |
387 | | /* |
388 | | * Acknowledge the segment if it has data or is not a duplicate ACK. |
389 | | */ |
390 | 0 | if (thflags != TH_ACK || tlen != 0 || |
391 | 0 | th->th_seq != tp->rcv_nxt || th->th_ack != tp->snd_nxt) |
392 | 0 | tcp_twrespond(tp, TH_ACK); |
393 | 0 | drop: |
394 | 0 | return (0); |
395 | 0 | } |