Coverage Report

Created: 2025-12-31 06:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openthread/third_party/tcplp/bsdtcp/tcp_input.c
Line
Count
Source
1
/*-
2
 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
3
 *  The Regents of the University of California.  All rights reserved.
4
 * Copyright (c) 2007-2008,2010
5
 *  Swinburne University of Technology, Melbourne, Australia.
6
 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
7
 * Copyright (c) 2010 The FreeBSD Foundation
8
 * Copyright (c) 2010-2011 Juniper Networks, Inc.
9
 * All rights reserved.
10
 *
11
 * Portions of this software were developed at the Centre for Advanced Internet
12
 * Architectures, Swinburne University of Technology, by Lawrence Stewart,
13
 * James Healy and David Hayes, made possible in part by a grant from the Cisco
14
 * University Research Program Fund at Community Foundation Silicon Valley.
15
 *
16
 * Portions of this software were developed at the Centre for Advanced
17
 * Internet Architectures, Swinburne University of Technology, Melbourne,
18
 * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
19
 *
20
 * Portions of this software were developed by Robert N. M. Watson under
21
 * contract to Juniper Networks, Inc.
22
 *
23
 * Redistribution and use in source and binary forms, with or without
24
 * modification, are permitted provided that the following conditions
25
 * are met:
26
 * 1. Redistributions of source code must retain the above copyright
27
 *    notice, this list of conditions and the following disclaimer.
28
 * 2. Redistributions in binary form must reproduce the above copyright
29
 *    notice, this list of conditions and the following disclaimer in the
30
 *    documentation and/or other materials provided with the distribution.
31
 * 4. Neither the name of the University nor the names of its contributors
32
 *    may be used to endorse or promote products derived from this software
33
 *    without specific prior written permission.
34
 *
35
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
36
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
39
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
41
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
43
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
44
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45
 * SUCH DAMAGE.
46
 *
47
 *  @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
48
 */
49
50
51
/*
52
 * Determine a reasonable value for maxseg size.
53
 * If the route is known, check route for mtu.
54
 * If none, use an mss that can be handled on the outgoing interface
55
 * without forcing IP to fragment.  If no route is found, route has no mtu,
56
 * or the destination isn't local, use a default, hopefully conservative
57
 * size (usually 512 or the default IP max size, but no more than the mtu
58
 * of the interface), as we can't discover anything about intervening
59
 * gateways or networks.  We also initialize the congestion/slow start
60
 * window to be a single segment if the destination isn't local.
61
 * While looking at the routing entry, we also initialize other path-dependent
62
 * parameters from pre-set or cached values in the routing entry.
63
 *
64
 * Also take into account the space needed for options that we
65
 * send regularly.  Make maxseg shorter by that amount to assure
66
 * that we can send maxseg amount of data even when the options
67
 * are present.  Store the upper limit of the length of options plus
68
 * data in maxopd.
69
 *
70
 * NOTE that this routine is only called when we process an incoming
71
 * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS
72
 * settings are handled in tcp_mssopt().
73
 */
74
75
#include <errno.h>
76
#include <string.h>
77
#include <strings.h>
78
79
#include "tcp.h"
80
#include "tcp_fsm.h"
81
#include "tcp_seq.h"
82
#include "tcp_timer.h"
83
#include "tcp_var.h"
84
#include "tcp_fastopen.h"
85
#include "../lib/bitmap.h"
86
#include "../lib/cbuf.h"
87
#include "icmp_var.h"
88
#include "ip.h"
89
#include "ip6.h"
90
#include "sys/queue.h"
91
92
#include "tcp_const.h"
93
94
/* samkumar: Copied from in.h */
95
0
#define IPPROTO_DONE 267
96
97
/* samkumar: Copied from sys/libkern.h */
98
0
static int imax(int a, int b) { return (a > b ? a : b); }
99
0
static int imin(int a, int b) { return (a < b ? a : b); }
100
101
0
static int min(int a, int b) { return imin(a, b); }
102
103
static void  tcp_dooptions(struct tcpopt *, uint8_t *, int, int);
104
static void
105
tcp_do_segment(struct ip6_hdr* ip6, struct tcphdr *th, otMessage* msg,
106
    struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
107
    struct tcplp_signals* sig);
108
static void  tcp_xmit_timer(struct tcpcb *, int);
109
void tcp_hc_get(/*struct in_conninfo *inc*/ struct tcpcb* tp, struct hc_metrics_lite *hc_metrics_lite);
110
static void  tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
111
112
/*
113
 * CC wrapper hook functions
114
 */
115
static inline void
116
cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type)
117
0
{
118
0
  tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th);
119
0
  if (tp->snd_cwnd <= tp->snd_wnd)
120
0
    tp->ccv->flags |= CCF_CWND_LIMITED;
121
0
  else
122
0
    tp->ccv->flags &= ~CCF_CWND_LIMITED;
123
124
0
  if (type == CC_ACK) {
125
0
    if (tp->snd_cwnd > tp->snd_ssthresh) {
126
0
      tp->t_bytes_acked += min(tp->ccv->bytes_this_ack,
127
0
           V_tcp_abc_l_var * tp->t_maxseg);
128
0
      if (tp->t_bytes_acked >= tp->snd_cwnd) {
129
0
        tp->t_bytes_acked -= tp->snd_cwnd;
130
0
        tp->ccv->flags |= CCF_ABC_SENTAWND;
131
0
      }
132
0
    } else {
133
0
        tp->ccv->flags &= ~CCF_ABC_SENTAWND;
134
0
        tp->t_bytes_acked = 0;
135
0
    }
136
0
  }
137
138
0
  if (CC_ALGO(tp)->ack_received != NULL) {
139
    /* XXXLAS: Find a way to live without this */
140
0
    tp->ccv->curack = th->th_ack;
141
0
    CC_ALGO(tp)->ack_received(tp->ccv, type);
142
0
  }
143
0
}
144
145
static inline void
146
cc_conn_init(struct tcpcb *tp)
147
0
{
148
0
  struct hc_metrics_lite metrics;
149
0
  int rtt;
150
151
  /*
152
   * samkumar: remove locks, inpcb, and stats.
153
   */
154
155
  /* samkumar: Used to take &inp->inp_inc as an argument. */
156
0
  tcp_hc_get(tp, &metrics);
157
158
0
  if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) {
159
0
    tp->t_srtt = rtt;
160
0
    tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE;
161
0
    if (metrics.rmx_rttvar) {
162
0
      tp->t_rttvar = metrics.rmx_rttvar;
163
0
    } else {
164
      /* default variation is +- 1 rtt */
165
0
      tp->t_rttvar =
166
0
          tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
167
0
    }
168
0
    TCPT_RANGESET(tp->t_rxtcur,
169
0
        ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
170
0
        tp->t_rttmin, TCPTV_REXMTMAX);
171
0
  }
172
0
  if (metrics.rmx_ssthresh) {
173
    /*
174
     * There's some sort of gateway or interface
175
     * buffer limit on the path.  Use this to set
176
     * the slow start threshhold, but set the
177
     * threshold to no less than 2*mss.
178
     */
179
0
    tp->snd_ssthresh = max(2 * tp->t_maxseg, metrics.rmx_ssthresh);
180
0
  }
181
182
  /*
183
   * Set the initial slow-start flight size.
184
   *
185
   * RFC5681 Section 3.1 specifies the default conservative values.
186
   * RFC3390 specifies slightly more aggressive values.
187
   * RFC6928 increases it to ten segments.
188
   * Support for user specified value for initial flight size.
189
   *
190
   * If a SYN or SYN/ACK was lost and retransmitted, we have to
191
   * reduce the initial CWND to one segment as congestion is likely
192
   * requiring us to be cautious.
193
   */
194
0
  if (tp->snd_cwnd == 1)
195
0
    tp->snd_cwnd = tp->t_maxseg;   /* SYN(-ACK) lost */
196
0
  else if (V_tcp_initcwnd_segments)
197
0
    tp->snd_cwnd = min(V_tcp_initcwnd_segments * tp->t_maxseg,
198
0
        max(2 * tp->t_maxseg, V_tcp_initcwnd_segments * 1460));
199
0
  else if (V_tcp_do_rfc3390)
200
0
    tp->snd_cwnd = min(4 * tp->t_maxseg,
201
0
        max(2 * tp->t_maxseg, 4380));
202
0
  else {
203
    /* Per RFC5681 Section 3.1 */
204
0
    if (tp->t_maxseg > 2190)
205
0
      tp->snd_cwnd = 2 * tp->t_maxseg;
206
0
    else if (tp->t_maxseg > 1095)
207
0
      tp->snd_cwnd = 3 * tp->t_maxseg;
208
0
    else
209
0
      tp->snd_cwnd = 4 * tp->t_maxseg;
210
0
  }
211
212
0
  if (CC_ALGO(tp)->conn_init != NULL)
213
0
    CC_ALGO(tp)->conn_init(tp->ccv);
214
215
  /* samkumar: print statement for debugging. Resurrect with DEBUG macro? */
216
#ifdef INSTRUMENT_TCP
217
  tcplp_sys_log("TCP CC_INIT %u %d %d", (unsigned int) tcplp_sys_get_millis(), (int) tp->snd_cwnd, (int) tp->snd_ssthresh);
218
#endif
219
0
}
220
221
inline void
222
cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type)
223
0
{
224
  /* samkumar: Remove locks and stats from this function. */
225
226
0
  switch(type) {
227
0
  case CC_NDUPACK:
228
0
    if (!IN_FASTRECOVERY(tp->t_flags)) {
229
0
      tp->snd_recover = tp->snd_max;
230
0
      if (tp->t_flags & TF_ECN_PERMIT)
231
0
        tp->t_flags |= TF_ECN_SND_CWR;
232
0
    }
233
0
    break;
234
0
  case CC_ECN:
235
0
    if (!IN_CONGRECOVERY(tp->t_flags)) {
236
0
      tp->snd_recover = tp->snd_max;
237
0
      if (tp->t_flags & TF_ECN_PERMIT)
238
0
        tp->t_flags |= TF_ECN_SND_CWR;
239
0
    }
240
0
    break;
241
0
  case CC_RTO:
242
0
    tp->t_dupacks = 0;
243
0
    tp->t_bytes_acked = 0;
244
0
    EXIT_RECOVERY(tp->t_flags);
245
    /*
246
     * samkumar: I added the cast to uint64_t below to fix an OpenThread
247
     * code scanning alert relating to integer overflow in multiplication.
248
     */
249
0
    tp->snd_ssthresh = max(2, min(tp->snd_wnd, tp->snd_cwnd) / 2 /
250
0
        tp->t_maxseg) * ((uint64_t) tp->t_maxseg);
251
0
    tp->snd_cwnd = tp->t_maxseg;
252
253
    /*
254
     * samkumar: Stats for TCPlp: count the number of timeouts (RTOs).
255
     * I've commented this out (with #if 0) because it isn't part of TCP
256
     * functionality. At some point, we may want to bring it back to
257
     * measure performance.
258
     */
259
#if 0
260
    tcplp_timeoutRexmitCnt++;
261
#endif
262
#ifdef INSTRUMENT_TCP
263
    tcplp_sys_log("TCP CC_RTO %u %d %d", (unsigned int) tcplp_sys_get_millis(), (int) tp->snd_cwnd, (int) tp->snd_ssthresh);
264
#endif
265
0
    break;
266
0
  case CC_RTO_ERR:
267
    /* RTO was unnecessary, so reset everything. */
268
0
    tp->snd_cwnd = tp->snd_cwnd_prev;
269
0
    tp->snd_ssthresh = tp->snd_ssthresh_prev;
270
0
    tp->snd_recover = tp->snd_recover_prev;
271
0
    if (tp->t_flags & TF_WASFRECOVERY)
272
0
      ENTER_FASTRECOVERY(tp->t_flags);
273
0
    if (tp->t_flags & TF_WASCRECOVERY)
274
0
      ENTER_CONGRECOVERY(tp->t_flags);
275
0
    tp->snd_nxt = tp->snd_max;
276
0
    tp->t_flags &= ~TF_PREVVALID;
277
0
    tp->t_badrxtwin = 0;
278
#ifdef INSTRUMENT_TCP
279
    tcplp_sys_log("TCP CC_RTO_ERR %u %d %d", (unsigned int) tcplp_sys_get_millis(), (int) tp->snd_cwnd, (int) tp->snd_ssthresh);
280
#endif
281
0
    break;
282
0
  }
283
284
0
  if (CC_ALGO(tp)->cong_signal != NULL) {
285
0
    if (th != NULL)
286
0
      tp->ccv->curack = th->th_ack;
287
0
    CC_ALGO(tp)->cong_signal(tp->ccv, type);
288
0
  }
289
0
}
290
291
static inline void
292
cc_post_recovery(struct tcpcb *tp, struct tcphdr *th)
293
0
{
294
  /* samkumar: remove lock */
295
296
  /* XXXLAS: KASSERT that we're in recovery? */
297
0
  if (CC_ALGO(tp)->post_recovery != NULL) {
298
0
    tp->ccv->curack = th->th_ack;
299
0
    CC_ALGO(tp)->post_recovery(tp->ccv);
300
0
  }
301
  /* XXXLAS: EXIT_RECOVERY ? */
302
0
  tp->t_bytes_acked = 0;
303
0
}
304
305
306
/*
307
 * Indicate whether this ack should be delayed.  We can delay the ack if
308
 * following conditions are met:
309
 *  - There is no delayed ack timer in progress.
310
 *  - Our last ack wasn't a 0-sized window. We never want to delay
311
 *    the ack that opens up a 0-sized window.
312
 *  - LRO wasn't used for this segment. We make sure by checking that the
313
 *    segment size is not larger than the MSS.
314
 *  - Delayed acks are enabled or this is a half-synchronized T/TCP
315
 *    connection.
316
 */
317
#define DELAY_ACK(tp, tlen)           \
318
0
  ((!tcp_timer_active(tp, TT_DELACK) &&        \
319
0
      (tp->t_flags & TF_RXWIN0SENT) == 0) &&      \
320
0
      (tlen <= tp->t_maxopd) &&         \
321
0
      (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
322
323
static inline void
324
cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
325
0
{
326
  /* samkumar: remove lock */
327
328
0
  if (CC_ALGO(tp)->ecnpkt_handler != NULL) {
329
0
    switch (iptos & IPTOS_ECN_MASK) {
330
0
    case IPTOS_ECN_CE:
331
0
      tp->ccv->flags |= CCF_IPHDR_CE;
332
0
      break;
333
0
    case IPTOS_ECN_ECT0:
334
0
      tp->ccv->flags &= ~CCF_IPHDR_CE;
335
0
      break;
336
0
    case IPTOS_ECN_ECT1:
337
0
      tp->ccv->flags &= ~CCF_IPHDR_CE;
338
0
      break;
339
0
    }
340
341
0
    if (th->th_flags & TH_CWR)
342
0
      tp->ccv->flags |= CCF_TCPHDR_CWR;
343
0
    else
344
0
      tp->ccv->flags &= ~CCF_TCPHDR_CWR;
345
346
0
    if (tp->t_flags & TF_DELACK)
347
0
      tp->ccv->flags |= CCF_DELACK;
348
0
    else
349
0
      tp->ccv->flags &= ~CCF_DELACK;
350
351
0
    CC_ALGO(tp)->ecnpkt_handler(tp->ccv);
352
353
0
    if (tp->ccv->flags & CCF_ACKNOW)
354
0
      tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
355
0
  }
356
0
}
357
358
/*
359
 * External function: look up an entry in the hostcache and fill out the
360
 * supplied TCP metrics structure.  Fills in NULL when no entry was found or
361
 * a value is not set.
362
 */
363
/*
364
 * samkumar: This function is taken from tcp_hostcache.c. We have no host cache
365
 * in TCPlp, so I changed this to always act as if there is a miss. I removed
366
 * the first argument, formerly "struct in_coninfo *inc".
367
 */
368
void
369
tcp_hc_get(struct tcpcb* tp, struct hc_metrics_lite *hc_metrics_lite)
370
0
{
371
0
  bzero(hc_metrics_lite, sizeof(*hc_metrics_lite));
372
0
}
373
374
/*
375
 * External function: look up an entry in the hostcache and return the
376
 * discovered path MTU.  Returns NULL if no entry is found or value is not
377
 * set.
378
 */
379
 /*
380
  * samkumar: This function is taken from tcp_hostcache.c. We have no host cache
381
  * in TCPlp, so I changed this to always act as if there is a miss.
382
  */
383
uint64_t
384
tcp_hc_getmtu(struct tcpcb* tp)
385
0
{
386
0
  return 0;
387
0
}
388
389
390
/*
391
 * Issue RST and make ACK acceptable to originator of segment.
392
 * The mbuf must still include the original packet header.
393
 * tp may be NULL.
394
 */
395
/*
396
 * samkumar: Original signature was:
397
 * static void tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
398
 *    int tlen, int rstreason)
399
 */
400
void
401
tcp_dropwithreset(struct ip6_hdr* ip6, struct tcphdr *th, struct tcpcb *tp, otInstance* instance,
402
    int tlen, int rstreason)
403
40
{
404
  /*
405
   * samkumar: I removed logic to skip this for broadcast or multicast
406
   * packets. In the FreeBSD version of this function, it would just
407
   * call m_freem(m), if m->m_flags has M_BCAST or M_MCAST set, and not
408
   * send a response packet.
409
   * I also removed bandwidth limiting.
410
   */
411
40
  if (th->th_flags & TH_RST)
412
13
    return;
413
414
  /* tcp_respond consumes the mbuf chain. */
415
27
  if (th->th_flags & TH_ACK) {
416
9
    tcp_respond(tp, instance, ip6, th, (tcp_seq) 0, th->th_ack, TH_RST);
417
18
  } else {
418
18
    if (th->th_flags & TH_SYN)
419
9
      tlen++;
420
18
    tcp_respond(tp, instance, ip6, th, th->th_seq + tlen, (tcp_seq) 0, TH_RST | TH_ACK);
421
18
  }
422
27
  return;
423
40
}
424
425
/*
426
 * TCP input handling is split into multiple parts:
427
 *   tcp6_input is a thin wrapper around tcplp_input for the extended
428
 *  ip6_protox[] call format in ip6_input
429
 *   tcplp_input handles primary segment validation, inpcb lookup and
430
 *  SYN processing on listen sockets
431
 *   tcp_do_segment processes the ACK and text of the segment for
432
 *  establishing, established and closing connections
433
 */
434
/* samkumar: The signature of this function was originally:
435
   tcp_input(struct mbuf **mp, int *offp, int proto) */
436
/* NOTE: tcp_fields_to_host(th) must be called before this function is called. */
437
int
438
tcplp_input(struct ip6_hdr* ip6, struct tcphdr* th, otMessage* msg, struct tcpcb* tp, struct tcpcb_listen* tpl,
439
          struct tcplp_signals* sig)
440
0
{
441
  /*
442
   * samkumar: I significantly modified this function, compared to the
443
   * FreeBSD version. This function used to be reponsible for matching an
444
   * incoming TCP segment to its TCB. That functionality is now done by
445
   * TCPlp, and this function is only called once a match has been
446
   * identified.
447
   *
448
   * The tp and tpl arguments are used to indicate the match. Exactly one of
449
   * them must be NULL, and the other must be set. If tp is non-NULL, then
450
   * this function assumes that the packet was matched to an active socket
451
   * (connection endpoint). If tpl is non-NULL, then this function assumes
452
   * that this packet is a candidate match for a passive socket (listener)
453
   * and attempts to set up a new connection if the flags, sequence numbers,
454
   * etc. look OK.
455
   *
456
   * TCPlp assumes that the packets are IPv6, so I removed any logic specific
457
   * to IPv4.
458
   *
459
   * And of course, all code pertaining to locks and stats has been removed.
460
   */
461
0
  int tlen = 0, off;
462
0
  int thflags;
463
0
  uint8_t iptos = 0;
464
0
  int drop_hdrlen;
465
0
  int rstreason = 0;
466
0
  struct tcpopt to;   /* options in this segment */
467
0
  uint8_t* optp = NULL;
468
0
  int optlen = 0;
469
0
  to.to_flags = 0;
470
0
  KASSERT(tp || tpl, ("One of tp and tpl must be positive"));
471
472
  /*
473
   * samkumar: Here, there used to be code that handled preprocessing:
474
   * calling m_pullup(m, sizeof(*ip6) + sizeof(*th)) to get the headers
475
   * contiguous in memory, setting the ip6 and th pointers, validating the
476
   * checksum, and dropping packets with unspecified source address. In
477
   * TCPlp, all of this is done for a packet before this function is called.
478
   */
479
480
0
  tlen = ntohs(ip6->ip6_plen); // assume *off == sizeof(*ip6)
481
482
  /*
483
   * samkumar: Logic that handled IPv4 was deleted below. I won't add a
484
   * comment every time this is done, but I'm putting it here (one of the
485
   * first instances of this) for clarity.
486
   */
487
0
  iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
488
489
  /*
490
   * Check that TCP offset makes sense,
491
   * pull out TCP options and adjust length.    XXX
492
   */
493
0
  off = (th->th_off_x2 >> TH_OFF_SHIFT) << 2;
494
0
  if (off < sizeof (struct tcphdr) || off > tlen) {
495
0
    goto drop;
496
0
  }
497
0
  tlen -= off;  /* tlen is used instead of ti->ti_len */
498
  /* samkumar: now, tlen is the length of the data */
499
500
0
  if (off > sizeof (struct tcphdr)) {
501
    /*
502
     * samkumar: I removed a call to IP6_EXTHDR_CHECK, which I believe
503
     * checks for IPv6 extension headers. In TCPlp, we assume that these
504
     * are handled elsewhere in the networking stack, before the incoming
505
     * packet is processed at the TCP layer. I also removed the followup
506
     * calls to reassign the ip6 and th pointers.
507
     */
508
0
    optlen = off - sizeof (struct tcphdr);
509
0
    optp = (uint8_t *)(th + 1);
510
0
  }
511
512
0
  thflags = th->th_flags;
513
514
  /*
515
   * samkumar: There used to be a call here to tcp_fields_to_host(th), which
516
   * changes the byte order of various fields to host format. I removed this
517
   * call from there and handle it in TCPlp, before calling this. The reason
518
   * is that it's possible for this function to be called twice by TCPlp's
519
   * logic (e.g., if the packet matches a TIME-WAIT socket this function
520
   * returns early, and the packet may then match a listening socket, at
521
   * which ppoint this function will be called again). Thus, any operations
522
   * like this, which mutate the packet itself, need to happen before calling
523
   * this function.
524
   */
525
526
  /*
527
   * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options.
528
   *
529
   * samkumar: My TCP header is in a different buffer from the IP header.
530
   * drop_hdrlen is only meaningful as an offset into the TCP buffer,
531
   * because it is used to determine how much of the packet to discard
532
   * before copying it into the receive buffer. Therefore, my offset does
533
   * not include the length of IP header and options, only the length of
534
   * the TCP header and options.
535
   */
536
0
  drop_hdrlen = /*off0 +*/ off;
537
538
  /*
539
   * Locate pcb for segment; if we're likely to add or remove a
540
   * connection then first acquire pcbinfo lock.  There are three cases
541
   * where we might discover later we need a write lock despite the
542
   * flags: ACKs moving a connection out of the syncache, ACKs for a
543
   * connection in TIMEWAIT and SYNs not targeting a listening socket.
544
   */
545
546
  /*
547
   * samkumar: Locking code is removed, invalidating most of the above
548
   * comment.
549
   */
550
551
  /*
552
   * samkumar: The FreeBSD code at logic here to check m->m_flags for the
553
   * M_IP6_NEXTHOP flag, and search for the PACKET_TAG_IPFORWARD tag and
554
   * store it in fwd_tag if so. In TCPlp, we assume that the IPv6 layer of
555
   * the host network stack handles this kind of IPv6-related functionality,
556
   * so this logic has been removed.
557
   */
558
559
  /*
560
   * samkumar: Here, there was code to match the packet to an inpcb and reply
561
   * with an RST segment if no match is found. This included taking the
562
   * fwd_tag into account, if set above (see the previous comment). I removed
563
   * this code because, in TCPlp, this is done before calling this function.
564
   */
565
566
  /*
567
   * A previous connection in TIMEWAIT state is supposed to catch stray
568
   * or duplicate segments arriving late.  If this segment was a
569
   * legitimate new connection attempt, the old INPCB gets removed and
570
   * we can try again to find a listening socket.
571
   *
572
   * At this point, due to earlier optimism, we may hold only an inpcb
573
   * lock, and not the inpcbinfo write lock.  If so, we need to try to
574
   * acquire it, or if that fails, acquire a reference on the inpcb,
575
   * drop all locks, acquire a global write lock, and then re-acquire
576
   * the inpcb lock.  We may at that point discover that another thread
577
   * has tried to free the inpcb, in which case we need to loop back
578
   * and try to find a new inpcb to deliver to.
579
   *
580
   * XXXRW: It may be time to rethink timewait locking.
581
   */
582
  /*
583
   * samkumar: The original code checked inp->inp_flags & INP_TIMEWAIT. I
584
   * changed it to instead check tp->t_state, since we don't use inpcbs in
585
   * TCPlp.
586
   */
587
0
  if (tp && tp->t_state == TCP6S_TIME_WAIT) {
588
    /*
589
     * samkumar: There's nothing wrong with the call to tcp_dooptions call
590
     * that I've commented out below; it's just that the modified
591
     * "tcp_twcheck" function no longer needs the options structure, so
592
     * I figured that there's no longer a good reason to parse the options.
593
     * In fact, this call was probably unnecessary even in the original
594
     * FreeBSD TCP code, since tcp_twcheck, even without my modifications,
595
     * did not use the pointer to the options structure!
596
     */
597
    //if (thflags & TH_SYN)
598
      //tcp_dooptions(&to, optp, optlen, TO_SYN);
599
    /*
600
     * samkumar: The original code would "goto findpcb;" if this branch is
601
     * taken. Matching with a TCB is done outside of this function in
602
     * TCPlp, so we instead return a special value so that the caller knows
603
     * to try re-matching this packet to a socket.
604
     */
605
0
    if (tcp_twcheck(tp,/*inp, &to,*/ th, /*m,*/ tlen))
606
0
      return (RELOOKUP_REQUIRED);
607
0
    return (IPPROTO_DONE);
608
0
  }
609
  /*
610
   * The TCPCB may no longer exist if the connection is winding
611
   * down or it is in the CLOSED state.  Either way we drop the
612
   * segment and send an appropriate response.
613
   */
614
  /*
615
   * samkumar: There used to be code here that grabs the tp from the inpcb
616
   * and drops with reset if the connection is in the closed state or if
617
   * the tp is NULL. In TCPlp, the equivalent logic is done before entering
618
   * this function. There was also code here to handle TCP offload, which
619
   * TCPlp does not handle.
620
   */
621
622
  /*
623
   * We've identified a valid inpcb, but it could be that we need an
624
   * inpcbinfo write lock but don't hold it.  In this case, attempt to
625
   * acquire using the same strategy as the TIMEWAIT case above.  If we
626
   * relock, we have to jump back to 'relocked' as the connection might
627
   * now be in TIMEWAIT.
628
   */
629
  /*
630
   * samkumar: There used to be some code here for synchronization, MAC
631
   * management, and debugging.
632
   */
633
634
  /*
635
   * When the socket is accepting connections (the INPCB is in LISTEN
636
   * state) we look into the SYN cache if this is a new connection
637
   * attempt or the completion of a previous one. Instead of checking
638
   * so->so_options to check if the socket is listening, we rely on the
639
   * arguments passed to this function (if tp == NULL, then tpl is not NULL
640
   * and is the matching listen socket).
641
   */
642
643
0
  if (/*so->so_options & SO_ACCEPTCONN*/tp == NULL) {
644
0
    int tfo_cookie_valid = 0;
645
0
    uint64_t tfo_response_cookie;
646
    // int tfo_response_cookie_valid = 0;
647
648
    /* samkumar: NULL check isn't needed but prevents a compiler warning */
649
0
    KASSERT(tpl != NULL && tpl->t_state == TCP6S_LISTEN, ("listen socket must be in listening state!"));
650
651
    /*
652
     * samkumar: There used to be some code here that checks if the
653
     * received segment is an ACK, and if so, searches the SYN cache to
654
     * find an entry whose connection establishment handshake this segment
655
     * completes. If such an entry is found, then a socket is created and
656
     * then tcp_do_segment is called to actually run the code to mark the
657
     * connection as established. If the received segment is an RST, then
658
     * that is processed in the syncache as well. In TCPlp we do not use a
659
     * SYN cache, so I've removed that code. The actual connection
660
     * establishment/processing logic happens in tcp_do_segment anyway,
661
     * which is called at the bottom of this function, so there's no need
662
     * to rewrite this code with special-case logic for that.
663
     */
664
665
    /*
666
     * We can't do anything without SYN.
667
     */
668
0
    if ((thflags & TH_SYN) == 0) {
669
      /*
670
       * samkumar: Here, and in several other instances, the FreeBSD
671
       * code would call tcp_log_addrs. Improving logging in these
672
       * edge cases in TCPlp is left for the future --- for now, I just
673
       * put "<addrs go here>" where the address string would go.
674
       */
675
0
      tcplp_sys_log("%s; %s: Listen socket: "
676
0
          "SYN is missing, segment ignored",
677
0
          "<addrs go here>", __func__);
678
0
      goto dropunlock;
679
0
    }
680
    /*
681
     * (SYN|ACK) is bogus on a listen socket.
682
     */
683
0
    if (thflags & TH_ACK) {
684
      /* samkumar: See above comment regarding tcp_log_addrs. */
685
0
      tcplp_sys_log("%s; %s: Listen socket: "
686
0
          "SYN|ACK invalid, segment rejected",
687
0
          "<addrs go here>", __func__);
688
      /* samkumar: Removed call to syncache_badack(&inc); */
689
0
      rstreason = BANDLIM_RST_OPENPORT;
690
0
      goto dropwithreset;
691
0
    }
692
    /*
693
     * If the drop_synfin option is enabled, drop all
694
     * segments with both the SYN and FIN bits set.
695
     * This prevents e.g. nmap from identifying the
696
     * TCP/IP stack.
697
     * XXX: Poor reasoning.  nmap has other methods
698
     * and is constantly refining its stack detection
699
     * strategies.
700
     * XXX: This is a violation of the TCP specification
701
     * and was used by RFC1644.
702
     */
703
0
    if ((thflags & TH_FIN) && V_drop_synfin) {
704
      /* samkumar: See above comment regarding tcp_log_addrs. */
705
0
      tcplp_sys_log("%s; %s: Listen socket: "
706
0
          "SYN|FIN segment ignored (based on "
707
0
          "sysctl setting)", "<addrs go here>", __func__);
708
0
      goto dropunlock;
709
0
    }
710
    /*
711
     * Segment's flags are (SYN) or (SYN|FIN).
712
     *
713
     * TH_PUSH, TH_URG, TH_ECE, TH_CWR are ignored
714
     * as they do not affect the state of the TCP FSM.
715
     * The data pointed to by TH_URG and th_urp is ignored.
716
     */
717
0
    KASSERT((thflags & (TH_RST|TH_ACK)) == 0,
718
0
        ("%s: Listen socket: TH_RST or TH_ACK set", __func__));
719
0
    KASSERT(thflags & (TH_SYN),
720
0
        ("%s: Listen socket: TH_SYN not set", __func__));
721
722
    /*
723
     * samkumar: There used to be some code here to reject incoming
724
     * SYN packets for deprecated interface addresses unless
725
     * V_ip6_use_deprecated is true. Rejecting the packet, in this case,
726
     * means to "goto dropwithreset". I removed this functionality.
727
     */
728
729
    /*
730
     * Basic sanity checks on incoming SYN requests:
731
     *   Don't respond if the destination is a link layer
732
     *  broadcast according to RFC1122 4.2.3.10, p. 104.
733
     *   If it is from this socket it must be forged.
734
     *   Don't respond if the source or destination is a
735
     *  global or subnet broad- or multicast address.
736
     *   Note that it is quite possible to receive unicast
737
     *  link-layer packets with a broadcast IP address. Use
738
     *  in_broadcast() to find them.
739
     */
740
741
    /*
742
     * samkumar: There used to be a sanity check that drops (via
743
     * "goto dropunlock") any broadcast or multicast packets. This check is
744
     * done by checking m->m_flags for (M_BAST|M_MCAST). The original
745
     * FreeBSD code for this has been removed (since checking m->m_flags
746
     * isn't really useful to us anyway). Note that other FreeBSD code that
747
     * checks for multicast source/destination addresses is retained below
748
     * (but only for the IPv6 case; the original FreeBSD code also handled
749
     * it for IPv4 addresses).
750
     */
751
752
0
    if (th->th_dport == th->th_sport &&
753
0
        IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) {
754
      /* samkumar: See above comment regarding tcp_log_addrs. */
755
0
      tcplp_sys_log("%s; %s: Listen socket: "
756
0
      "Connection attempt to/from self "
757
0
      "ignored", "<addrs go here>", __func__);
758
0
      goto dropunlock;
759
0
    }
760
0
    if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
761
0
        IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
762
      /* samkumar: See above comment regarding tcp_log_addrs. */
763
0
      tcplp_sys_log("%s; %s: Listen socket: "
764
0
      "Connection attempt from/to multicast "
765
0
      "address ignored", "<addrs go here>", __func__);
766
0
      goto dropunlock;
767
0
    }
768
769
    /*
770
     * samkumar: The FreeBSD code would call
771
     * syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
772
     * to add an entry to the SYN cache at this point. TCPlp doesn't use a
773
     * syncache, so we initialize the new socket right away. The code to
774
     * initialize the socket is taken from the syncache_socket function.
775
     */
776
    /*
777
     * samkumar: As of FreeBSD 10.3, the syncache_add function returns
778
     * a flag indicating if a "fast open" code path should be taken.
779
     * In that case, there is a "goto" statement to the removed logic
780
     * above that calls tcp_do_segment after expanding a syncache entry.
781
     * Analogous logic is implemented below.
782
     */
783
0
    tcp_dooptions(&to, optp, optlen, TO_SYN);
784
785
    /*
786
     * samkumar: TCP Fast Open logic taken from syncache_add in
787
     * FreeBSD 12.0.
788
     */
789
0
    if (V_tcp_fastopen_server_enable && /*IS_FASTOPEN(tp->t_flags) &&
790
      (tp->t_tfo_pending != NULL) && */
791
0
      (to.to_flags & TOF_FASTOPEN)) {
792
      /*
793
       * Limit the number of pending TFO connections to
794
       * approximately half of the queue limit.  This prevents TFO
795
       * SYN floods from starving the service by filling the
796
       * listen queue with bogus TFO connections.
797
       */
798
      /*
799
       * samkumar: Since we let the application handle the listen
800
       * queue it doesn't make sense to limit the number of pending
801
       * TFO connections as above. Long term, I think the best fix
802
       * is to let applications know if an incoming connection is
803
       * TFO, so that they can handle the case appropriately (e.g.,
804
       * by disabling TFO or by declining the connection).
805
       */
806
0
      int result = tcp_fastopen_check_cookie(NULL,
807
0
        to.to_tfo_cookie, to.to_tfo_len,
808
0
        &tfo_response_cookie);
809
0
      tfo_cookie_valid = (result > 0);
810
      // tfo_response_cookie_valid = (result >= 0);
811
0
    }
812
813
0
    tp = tcplp_sys_accept_ready(tpl, &ip6->ip6_src, th->th_sport); // Try to allocate an active socket to accept into
814
0
    if (tp == NULL) {
815
      /* If we couldn't allocate, just ignore the SYN. */
816
0
      return IPPROTO_DONE;
817
0
    }
818
0
    if (tp == (struct tcpcb *) -1) {
819
0
      rstreason = ECONNREFUSED;
820
0
      tp = NULL;
821
0
      goto dropwithreset;
822
0
    }
823
0
    sig->accepted_connection = tp;
824
0
    tcp_state_change(tp, TCPS_SYN_RECEIVED);
825
0
    tpmarkpassiveopen(tp);
826
0
    tp->iss = tcp_new_isn(tp);
827
0
    tp->irs = th->th_seq;
828
0
    tcp_rcvseqinit(tp);
829
0
    tcp_sendseqinit(tp);
830
0
    tp->snd_wl1 = th->th_seq;
831
    /*
832
     * samkumar: We remove the "+ 1"s below since we use
833
     * tcplp_output to send the appropriate SYN-ACK. For
834
     * example, syncache_tfo_expand eliminates the "+ 1"s
835
     * too. My understanding is that syncache_socket has
836
     * the "+ 1"s because it's normally called once the
837
     * SYN-ACK has already been ACKed, which is not how
838
     * TCPlp operates.
839
     */
840
0
    tp->snd_max = tp->iss/* + 1*/;
841
0
    tp->snd_nxt = tp->iss/* + 1*/;
842
0
    tp->rcv_up = th->th_seq + 1;
843
0
    tp->rcv_wnd = imin(imax(cbuf_free_space(&tp->recvbuf), 0), TCP_MAXWIN);
844
0
    tp->rcv_adv += tp->rcv_wnd;
845
0
    tp->last_ack_sent = tp->rcv_nxt;
846
0
    memcpy(&tp->laddr, &ip6->ip6_dst, sizeof(tp->laddr));
847
0
    memcpy(&tp->faddr, &ip6->ip6_src, sizeof(tp->faddr));
848
0
    tp->fport = th->th_sport;
849
0
    tp->lport = tpl->lport;
850
851
    /*
852
     * samkumar: Several of the checks below (taken from syncache_socket!)
853
     * check for flags in sc->sc_flags. They have been written to directly
854
     * check for the conditions on the TCP options structure or in the TCP
855
     * header that would ordinarily be used to set flags in sc->sc_flags
856
     * when adding an entry to the SYN cache.
857
     *
858
     * In effect, we combine the logic in syncache_add to set elements of
859
     * sc with the logic in syncache_socket to transfer state from sc
860
     * to the socket, but short-circuit the process to avoid ever storing
861
     * data in sc. Since this isn't just adding or deleting code, I decided
862
     * that it's better to keep comments indicating exactly how I composed
863
     * these two functions.
864
     */
865
0
    tp->t_flags = tp->t_flags & (TF_NOPUSH | TF_NODELAY | TF_NOOPT);
866
//    tp->t_flags = sototcpcb(lso)->t_flags & (TF_NOPUSH|TF_NODELAY);
867
//    if (sc->sc_flags & SCF_NOOPT)
868
//      tp->t_flags |= TF_NOOPT;
869
//    else {
870
0
    if (!(tp->t_flags & TF_NOOPT) && V_tcp_do_rfc1323) {
871
0
      if (/*sc->sc_flags & SCF_WINSCALE*/to.to_flags & TOF_SCALE) {
872
0
        int wscale = 0;
873
874
        /*
875
         * Pick the smallest possible scaling factor that
876
         * will still allow us to scale up to sb_max, aka
877
         * kern.ipc.maxsockbuf.
878
         *
879
         * We do this because there are broken firewalls that
880
         * will corrupt the window scale option, leading to
881
         * the other endpoint believing that our advertised
882
         * window is unscaled.  At scale factors larger than
883
         * 5 the unscaled window will drop below 1500 bytes,
884
         * leading to serious problems when traversing these
885
         * broken firewalls.
886
         *
887
         * With the default maxsockbuf of 256K, a scale factor
888
         * of 3 will be chosen by this algorithm.  Those who
889
         * choose a larger maxsockbuf should watch out
890
         * for the compatiblity problems mentioned above.
891
         *
892
         * RFC1323: The Window field in a SYN (i.e., a <SYN>
893
         * or <SYN,ACK>) segment itself is never scaled.
894
         */
895
896
        /*
897
         * samkumar: The original logic, taken from syncache_add, is
898
         * listed below, commented out. In practice, we just use
899
         * wscale = 0 because in TCPlp we assume that the buffers
900
         * aren't big enough for window scaling to be all that useful.
901
         */
902
#if 0
903
        while (wscale < TCP_MAX_WINSHIFT &&
904
          (TCP_MAXWIN << wscale) < sb_max)
905
          wscale++;
906
#endif
907
908
0
        tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
909
0
        tp->snd_scale = /*sc->sc_requested_s_scale*/to.to_wscale;
910
0
        tp->request_r_scale = wscale;
911
0
      }
912
0
      if (/*sc->sc_flags & SCF_TIMESTAMP*/to.to_flags & TOF_TS) {
913
0
        tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
914
0
        tp->ts_recent = /*sc->sc_tsreflect*/to.to_tsval;
915
0
        tp->ts_recent_age = tcp_ts_getticks();
916
0
        tp->ts_offset = /*sc->sc_tsoff*/0; // No syncookies, so this should always be 0
917
0
      }
918
919
      /*
920
       * samkumar: there used to be code here that would set the
921
       * TF_SIGNATURE flag on tp->t_flags if SCF_SIGNATURE is set on
922
       * sc->sc_flags. I've left it in below, commented out.
923
       */
924
#if 0
925
  #ifdef TCP_SIGNATURE
926
      if (sc->sc_flags & SCF_SIGNATURE)
927
        tp->t_flags |= TF_SIGNATURE;
928
  #endif
929
#endif
930
0
      if (/*sc->sc_flags & SCF_SACK*/ to.to_flags & TOF_SACKPERM)
931
0
        tp->t_flags |= TF_SACK_PERMIT;
932
0
    }
933
0
    if (/*sc->sc_flags & SCF_ECN*/(th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn)
934
0
      tp->t_flags |= TF_ECN_PERMIT;
935
936
    /*
937
     * Set up MSS and get cached values from tcp_hostcache.
938
     * This might overwrite some of the defaults we just set.
939
     */
940
0
    tcp_mss(tp, /*sc->sc_peer_mss*/(to.to_flags & TOF_MSS) ? to.to_mss : 0);
941
942
0
    if (tfo_cookie_valid) {
943
      /*
944
       * samkumar: The code below is taken from syncache_tfo_socket.
945
       * It calls syncache_socket (upon which the above code is based)
946
       * so it makes sense for this logic to go here.
947
       */
948
0
      tp->t_flags |= TF_FASTOPEN;
949
0
      tp->t_tfo_cookie.server = tfo_response_cookie;
950
0
      tp->snd_max = tp->iss;
951
0
      tp->snd_nxt = tp->iss;
952
      // tp->tfo_pending = pending_counter;
953
      /* This would normally "goto" labeled code that calls tcp_do_segment. */
954
0
      tcp_do_segment(ip6, th, msg, tp, drop_hdrlen, tlen, iptos, sig);
955
956
0
      tp->accepted_from = tpl;
957
0
      return (IPPROTO_DONE);
958
0
    } else {
959
0
      tp->t_flags |= TF_ACKNOW; // samkumar: my addition
960
0
    }
961
962
0
    tcplp_output(tp); // to send the SYN-ACK
963
964
0
    tp->accepted_from = tpl;
965
0
    return (IPPROTO_DONE);
966
0
  } else if (tp->t_state == TCPS_LISTEN) {
967
    /*
968
     * When a listen socket is torn down the SO_ACCEPTCONN
969
     * flag is removed first while connections are drained
970
     * from the accept queue in a unlock/lock cycle of the
971
     * ACCEPT_LOCK, opening a race condition allowing a SYN
972
     * attempt go through unhandled.
973
     */
974
0
    goto dropunlock;
975
0
  }
976
977
0
  KASSERT(tp, ("tp is still NULL!"));
978
979
  /*
980
   * samkumar: There used to be code here to verify TCP signatures. We don't
981
   * support TCP signatures in TCPlp.
982
   */
983
984
  /*
985
   * Segment belongs to a connection in SYN_SENT, ESTABLISHED or later
986
   * state.  tcp_do_segment() always consumes the mbuf chain, unlocks
987
   * the inpcb, and unlocks pcbinfo.
988
   */
989
0
  tcp_do_segment(ip6, th, msg, tp, drop_hdrlen, tlen, iptos, sig);
990
0
  return (IPPROTO_DONE);
991
992
  /*
993
   * samkumar: Removed some locking and debugging code under all three of
994
   * these labels: dropwithreset, dropunlock, and drop. I also removed some
995
   * memory management code (e.g., calling m_freem(m) if m != NULL) since
996
   * the caller of this function will take care of that kind of memory
997
   * management in TCPlp.
998
   */
999
0
dropwithreset:
1000
1001
  /*
1002
   * samkumar: The check against inp != NULL is now a check on tp != NULL.
1003
   */
1004
0
  if (tp != NULL) {
1005
0
    tcp_dropwithreset(ip6, th, tp, tp->instance, tlen, rstreason);
1006
0
  } else
1007
0
    tcp_dropwithreset(ip6, th, NULL, tpl->instance, tlen, rstreason);
1008
0
  goto drop;
1009
1010
0
dropunlock:
1011
0
drop:
1012
0
  return (IPPROTO_DONE);
1013
0
}
1014
1015
/*
1016
 * samkumar: Original signature
1017
 * static void
1018
 * tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
1019
 *     struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
1020
 *     int ti_locked)
1021
 */
1022
static void
1023
tcp_do_segment(struct ip6_hdr* ip6, struct tcphdr *th, otMessage* msg,
1024
    struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
1025
    struct tcplp_signals* sig)
1026
0
{
1027
  /*
1028
   * samkumar: All code pertaining to locks, stats, and debug has been
1029
   * removed from this function.
1030
   */
1031
1032
0
  int thflags, acked, ourfinisacked, needoutput = 0;
1033
0
  int rstreason, todrop, win;
1034
0
  uint64_t tiwin;
1035
0
  struct tcpopt to;
1036
0
  int tfo_syn;
1037
0
  uint32_t ticks = tcplp_sys_get_ticks();
1038
0
  otInstance* instance = tp->instance;
1039
0
  thflags = th->th_flags;
1040
0
  tp->sackhint.last_sack_ack = 0;
1041
1042
  /*
1043
   * If this is either a state-changing packet or current state isn't
1044
   * established, we require a write lock on tcbinfo.  Otherwise, we
1045
   * allow the tcbinfo to be in either alocked or unlocked, as the
1046
   * caller may have unnecessarily acquired a write lock due to a race.
1047
   */
1048
1049
  /* samkumar: There used to be synchronization code here. */
1050
0
  KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
1051
0
      __func__));
1052
0
  KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
1053
0
      __func__));
1054
1055
  /*
1056
   * Segment received on connection.
1057
   * Reset idle time and keep-alive timer.
1058
   * XXX: This should be done after segment
1059
   * validation to ignore broken/spoofed segs.
1060
   */
1061
0
  tp->t_rcvtime = ticks;
1062
0
  if (TCPS_HAVEESTABLISHED(tp->t_state))
1063
0
    tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
1064
1065
  /*
1066
   * Scale up the window into a 32-bit value.
1067
   * For the SYN_SENT state the scale is zero.
1068
   */
1069
0
  tiwin = th->th_win << tp->snd_scale;
1070
1071
  /*
1072
   * TCP ECN processing.
1073
   */
1074
  /*
1075
   * samkumar: I intentionally left the TCPSTAT_INC lines below commented
1076
   * out, to avoid altering the structure of the code too much by
1077
   * reorganizing the switch statement.
1078
   */
1079
0
  if (tp->t_flags & TF_ECN_PERMIT) {
1080
0
    if (thflags & TH_CWR)
1081
0
      tp->t_flags &= ~TF_ECN_SND_ECE;
1082
0
    switch (iptos & IPTOS_ECN_MASK) {
1083
0
    case IPTOS_ECN_CE:
1084
0
      tp->t_flags |= TF_ECN_SND_ECE;
1085
      //TCPSTAT_INC(tcps_ecn_ce);
1086
0
      break;
1087
0
    case IPTOS_ECN_ECT0:
1088
      //TCPSTAT_INC(tcps_ecn_ect0);
1089
0
      break;
1090
0
    case IPTOS_ECN_ECT1:
1091
      //TCPSTAT_INC(tcps_ecn_ect1);
1092
0
      break;
1093
0
    }
1094
1095
    /* Process a packet differently from RFC3168. */
1096
0
    cc_ecnpkt_handler(tp, th, iptos);
1097
1098
    /* Congestion experienced. */
1099
0
    if (thflags & TH_ECE) {
1100
0
      cc_cong_signal(tp, th, CC_ECN);
1101
0
    }
1102
0
  }
1103
1104
  /*
1105
   * Parse options on any incoming segment.
1106
   */
1107
0
  tcp_dooptions(&to, (uint8_t *)(th + 1),
1108
0
      ((th->th_off_x2 >> TH_OFF_SHIFT) << 2) - sizeof(struct tcphdr),
1109
0
      (thflags & TH_SYN) ? TO_SYN : 0);
1110
1111
  /*
1112
   * If echoed timestamp is later than the current time,
1113
   * fall back to non RFC1323 RTT calculation.  Normalize
1114
   * timestamp if syncookies were used when this connection
1115
   * was established.
1116
   */
1117
1118
0
  if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
1119
0
    to.to_tsecr -= tp->ts_offset;
1120
0
    if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
1121
0
      to.to_tsecr = 0;
1122
0
  }
1123
  /*
1124
   * If timestamps were negotiated during SYN/ACK they should
1125
   * appear on every segment during this session and vice versa.
1126
   */
1127
0
  if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) {
1128
    /* samkumar: See above comment regarding tcp_log_addrs. */
1129
0
    tcplp_sys_log("%s; %s: Timestamp missing, "
1130
0
      "no action", "<addrs go here>", __func__);
1131
0
  }
1132
0
  if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) {
1133
    /* samkumar: See above comment regarding tcp_log_addrs. */
1134
0
    tcplp_sys_log("%s; %s: Timestamp not expected, "
1135
0
      "no action", "<addrs go here>", __func__);
1136
0
  }
1137
1138
  /*
1139
   * Process options only when we get SYN/ACK back. The SYN case
1140
   * for incoming connections is handled in tcp_syncache.
1141
   * According to RFC1323 the window field in a SYN (i.e., a <SYN>
1142
   * or <SYN,ACK>) segment itself is never scaled.
1143
   * XXX this is traditional behavior, may need to be cleaned up.
1144
   */
1145
0
  if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
1146
0
    if ((to.to_flags & TOF_SCALE) &&
1147
0
        (tp->t_flags & TF_REQ_SCALE)) {
1148
0
      tp->t_flags |= TF_RCVD_SCALE;
1149
0
      tp->snd_scale = to.to_wscale;
1150
0
    }
1151
    /*
1152
     * Initial send window.  It will be updated with
1153
     * the next incoming segment to the scaled value.
1154
     */
1155
0
    tp->snd_wnd = th->th_win;
1156
0
    if (to.to_flags & TOF_TS) {
1157
0
      tp->t_flags |= TF_RCVD_TSTMP;
1158
0
      tp->ts_recent = to.to_tsval;
1159
0
      tp->ts_recent_age = tcp_ts_getticks();
1160
0
    }
1161
0
    if (to.to_flags & TOF_MSS)
1162
0
      tcp_mss(tp, to.to_mss);
1163
0
    if ((tp->t_flags & TF_SACK_PERMIT) &&
1164
0
        (to.to_flags & TOF_SACKPERM) == 0)
1165
0
      tp->t_flags &= ~TF_SACK_PERMIT;
1166
    /*
1167
     * samkumar: TCP Fast Open logic from FreeBSD 12.0.
1168
     */
1169
0
    if (IS_FASTOPEN(tp->t_flags)) {
1170
0
      if (to.to_flags & TOF_FASTOPEN) {
1171
0
        uint16_t mss;
1172
1173
0
        if (to.to_flags & TOF_MSS)
1174
0
          mss = to.to_mss;
1175
0
        else
1176
          /*
1177
           * samkumar: The original code here would set
1178
           * mss to either TCP6_MAXSS or TCP_MAXSS depending
1179
           * on whether the INP_IPV6 flag is present in
1180
           * tp->t_inpcb->inp_vflag. In TCPlp, we always
1181
           * assume IPv6.
1182
           */
1183
0
          mss = TCP6_MAXSS;
1184
0
        tcp_fastopen_update_cache(tp, mss,
1185
0
            to.to_tfo_len, to.to_tfo_cookie);
1186
0
      } else
1187
0
        tcp_fastopen_disable_path(tp);
1188
0
    }
1189
0
  }
1190
  /*
1191
   * Header prediction: check for the two common cases
1192
   * of a uni-directional data xfer.  If the packet has
1193
   * no control flags, is in-sequence, the window didn't
1194
   * change and we're not retransmitting, it's a
1195
   * candidate.  If the length is zero and the ack moved
1196
   * forward, we're the sender side of the xfer.  Just
1197
   * free the data acked & wake any higher level process
1198
   * that was blocked waiting for space.  If the length
1199
   * is non-zero and the ack didn't move, we're the
1200
   * receiver side.  If we're getting packets in-order
1201
   * (the reassembly queue is empty), add the data to
1202
   * the socket buffer and note that we need a delayed ack.
1203
   * Make sure that the hidden state-flags are also off.
1204
   * Since we check for TCPS_ESTABLISHED first, it can only
1205
   * be TH_NEEDSYN.
1206
   */
1207
  /*
1208
   * samkumar: Replaced LIST_EMPTY(&tp->tsegq with the call to bmp_isempty).
1209
   */
1210
0
  if (tp->t_state == TCPS_ESTABLISHED &&
1211
0
      th->th_seq == tp->rcv_nxt &&
1212
0
      (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
1213
0
      tp->snd_nxt == tp->snd_max &&
1214
0
      tiwin && tiwin == tp->snd_wnd &&
1215
0
      ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
1216
0
      bmp_isempty(tp->reassbmp, REASSBMP_SIZE(tp)) &&
1217
0
      ((to.to_flags & TOF_TS) == 0 ||
1218
0
       TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
1219
1220
    /*
1221
     * If last ACK falls within this segment's sequence numbers,
1222
     * record the timestamp.
1223
     * NOTE that the test is modified according to the latest
1224
     * proposal of the tcplw@cray.com list (Braden 1993/04/26).
1225
     */
1226
0
    if ((to.to_flags & TOF_TS) != 0 &&
1227
0
        SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
1228
0
      tp->ts_recent_age = tcp_ts_getticks();
1229
0
      tp->ts_recent = to.to_tsval;
1230
0
    }
1231
1232
0
    if (tlen == 0) {
1233
0
      if (SEQ_GT(th->th_ack, tp->snd_una) &&
1234
0
          SEQ_LEQ(th->th_ack, tp->snd_max) &&
1235
0
          !IN_RECOVERY(tp->t_flags) &&
1236
0
          (to.to_flags & TOF_SACK) == 0 &&
1237
0
          TAILQ_EMPTY(&tp->snd_holes)) {
1238
        /*
1239
         * This is a pure ack for outstanding data.
1240
         */
1241
1242
        /*
1243
         * "bad retransmit" recovery.
1244
         */
1245
0
        if (tp->t_rxtshift == 1 &&
1246
0
            tp->t_flags & TF_PREVVALID &&
1247
0
            (int)(ticks - tp->t_badrxtwin) < 0) {
1248
0
          cc_cong_signal(tp, th, CC_RTO_ERR);
1249
0
        }
1250
1251
        /*
1252
         * Recalculate the transmit timer / rtt.
1253
         *
1254
         * Some boxes send broken timestamp replies
1255
         * during the SYN+ACK phase, ignore
1256
         * timestamps of 0 or we could calculate a
1257
         * huge RTT and blow up the retransmit timer.
1258
         */
1259
1260
0
        if ((to.to_flags & TOF_TS) != 0 &&
1261
0
            to.to_tsecr) {
1262
0
          uint32_t t;
1263
1264
0
          t = tcp_ts_getticks() - to.to_tsecr;
1265
0
          if (!tp->t_rttlow || tp->t_rttlow > t)
1266
0
            tp->t_rttlow = t;
1267
0
          tcp_xmit_timer(tp,
1268
0
              TCP_TS_TO_TICKS(t) + 1);
1269
0
        } else if (tp->t_rtttime &&
1270
0
            SEQ_GT(th->th_ack, tp->t_rtseq)) {
1271
0
          if (!tp->t_rttlow ||
1272
0
              tp->t_rttlow > ticks - tp->t_rtttime)
1273
0
            tp->t_rttlow = ticks - tp->t_rtttime;
1274
0
          tcp_xmit_timer(tp,
1275
0
              ticks - tp->t_rtttime);
1276
0
        }
1277
1278
0
        acked = BYTES_THIS_ACK(tp, th);
1279
1280
        /*
1281
         * samkumar: Replaced sbdrop(&so->so_snd, acked) with this call
1282
         * to lbuf_pop.
1283
         */
1284
0
        {
1285
0
          uint32_t poppedbytes = lbuf_pop(&tp->sendbuf, acked, &sig->links_popped);
1286
0
          KASSERT(poppedbytes == acked, ("More bytes were acked than are in the send buffer"));
1287
0
          sig->bytes_acked += poppedbytes;
1288
0
        }
1289
0
        if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
1290
0
            SEQ_LEQ(th->th_ack, tp->snd_recover))
1291
0
          tp->snd_recover = th->th_ack - 1;
1292
1293
        /*
1294
         * Let the congestion control algorithm update
1295
         * congestion control related information. This
1296
         * typically means increasing the congestion
1297
         * window.
1298
         */
1299
0
        cc_ack_received(tp, th, CC_ACK);
1300
1301
0
        tp->snd_una = th->th_ack;
1302
        /*
1303
         * Pull snd_wl2 up to prevent seq wrap relative
1304
         * to th_ack.
1305
         */
1306
0
        tp->snd_wl2 = th->th_ack;
1307
0
        tp->t_dupacks = 0;
1308
1309
        /*
1310
         * If all outstanding data are acked, stop
1311
         * retransmit timer, otherwise restart timer
1312
         * using current (possibly backed-off) value.
1313
         * If process is waiting for space,
1314
         * wakeup/selwakeup/signal.  If data
1315
         * are ready to send, let tcplp_output
1316
         * decide between more output or persist.
1317
         */
1318
1319
0
        if (tp->snd_una == tp->snd_max)
1320
0
          tcp_timer_activate(tp, TT_REXMT, 0);
1321
0
        else if (!tcp_timer_active(tp, TT_PERSIST))
1322
0
          tcp_timer_activate(tp, TT_REXMT,
1323
0
                  tp->t_rxtcur);
1324
1325
        /*
1326
         * samkumar: There used to be a call to sowwakeup(so); here,
1327
         * which wakes up any threads waiting for the socket to
1328
         * become ready for writing. TCPlp handles its send buffer
1329
         * differently so we do not need to replace this call with
1330
         * specialized code to handle this.
1331
         */
1332
1333
        /*
1334
         * samkumar: Replaced sbavail(&so->so_snd) with this call to
1335
         * lbuf_used_space.
1336
         */
1337
0
        if (lbuf_used_space(&tp->sendbuf))
1338
0
          (void) tcplp_output(tp);
1339
0
        goto check_delack;
1340
0
      }
1341
0
    } else if (th->th_ack == tp->snd_una &&
1342
      /*
1343
       * samkumar: Replaced sbspace(&so->so_rcv) with this call to
1344
       * cbuf_free_space.
1345
       */
1346
0
        tlen <= cbuf_free_space(&tp->recvbuf)) {
1347
1348
      /*
1349
       * This is a pure, in-sequence data packet with
1350
       * nothing on the reassembly queue and we have enough
1351
       * buffer space to take it.
1352
       */
1353
      /* Clean receiver SACK report if present */
1354
0
      if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks)
1355
0
        tcp_clean_sackreport(tp);
1356
1357
0
      tp->rcv_nxt += tlen;
1358
      /*
1359
       * Pull snd_wl1 up to prevent seq wrap relative to
1360
       * th_seq.
1361
       */
1362
0
      tp->snd_wl1 = th->th_seq;
1363
      /*
1364
       * Pull rcv_up up to prevent seq wrap relative to
1365
       * rcv_nxt.
1366
       */
1367
0
      tp->rcv_up = tp->rcv_nxt;
1368
1369
    /*
1370
     * Automatic sizing of receive socket buffer.  Often the send
1371
     * buffer size is not optimally adjusted to the actual network
1372
     * conditions at hand (delay bandwidth product).  Setting the
1373
     * buffer size too small limits throughput on links with high
1374
     * bandwidth and high delay (eg. trans-continental/oceanic links).
1375
     *
1376
     * On the receive side the socket buffer memory is only rarely
1377
     * used to any significant extent.  This allows us to be much
1378
     * more aggressive in scaling the receive socket buffer.  For
1379
     * the case that the buffer space is actually used to a large
1380
     * extent and we run out of kernel memory we can simply drop
1381
     * the new segments; TCP on the sender will just retransmit it
1382
     * later.  Setting the buffer size too big may only consume too
1383
     * much kernel memory if the application doesn't read() from
1384
     * the socket or packet loss or reordering makes use of the
1385
     * reassembly queue.
1386
     *
1387
     * The criteria to step up the receive buffer one notch are:
1388
     *  1. Application has not set receive buffer size with
1389
     *     SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
1390
     *  2. the number of bytes received during the time it takes
1391
     *     one timestamp to be reflected back to us (the RTT);
1392
     *  3. received bytes per RTT is within seven eighth of the
1393
     *     current socket buffer size;
1394
     *  4. receive buffer size has not hit maximal automatic size;
1395
     *
1396
     * This algorithm does one step per RTT at most and only if
1397
     * we receive a bulk stream w/o packet losses or reorderings.
1398
     * Shrinking the buffer during idle times is not necessary as
1399
     * it doesn't consume any memory when idle.
1400
     *
1401
     * TODO: Only step up if the application is actually serving
1402
     * the buffer to better manage the socket buffer resources.
1403
     */
1404
1405
      /*
1406
       * samkumar: There used to be code here to dynamically size the
1407
       * receive buffer (tp->rfbuf_ts, rp->rfbuf_cnt, and the local
1408
       * newsize variable). In TCPlp, we don't support this, as the user
1409
       * allocates the receive buffer and its size can't be changed here.
1410
       * Therefore, I removed the code that does this. Note that the
1411
       * actual resizing of the buffer is done using sbreserve_locked,
1412
       * whose call comes later (not exactly where this comment is).
1413
       */
1414
1415
      /* Add data to socket buffer. */
1416
1417
      /*
1418
       * samkumar: The code that was here would just free the mbuf
1419
       * (with m_freem(m)) if SBS_CANTRCVMORE is set in
1420
       * so->so_rcv.sb_state. Otherwise, it would cut drop_hdrlen bytes
1421
       * from the mbuf (using m_adj(m, drop_hdrlen)) to discard the
1422
       * headers and then append the mbuf to the receive buffer using
1423
       * sbappendstream_locked(&so->so_rcv, m, 0). I've rewritten this
1424
       * to work the TCPlp way. The check to so->so_rcv.sb_state is
1425
       * replaced by a tcpiscantrcv call, and we copy bytes into
1426
       * TCPlp's circular buffer (since we designed it to avoid
1427
       * having dynamically-allocated memory for the receive buffer).
1428
       */
1429
1430
0
      if (!tpiscantrcv(tp)) {
1431
0
        cbuf_write(&tp->recvbuf, msg, otMessageGetOffset(msg) + drop_hdrlen, tlen, cbuf_copy_from_message);
1432
0
        if (tlen > 0) {
1433
0
          sig->recvbuf_added = true;
1434
0
        }
1435
0
      } else {
1436
        /*
1437
         * samkumar: We already know tlen != 0, so if we got here, then
1438
         * it means that we got data after we called SHUT_RD, or after
1439
         * receiving a FIN. I'm going to drop the connection in this
1440
         * case. I think FreeBSD might have just dropped the packet
1441
         * silently, but Linux handles it this way; this seems to be
1442
         * the right approach to me.
1443
         */
1444
0
        tcp_drop(tp, ECONNABORTED);
1445
0
        goto drop;
1446
0
      }
1447
      /* NB: sorwakeup_locked() does an implicit unlock. */
1448
      /*
1449
       * samkumar: There used to be a call to sorwakeup_locked(so); here,
1450
       * which wakes up any threads waiting for the socket to become
1451
       * become ready for reading. TCPlp handles its buffering
1452
       * differently so we do not need to replace this call with
1453
       * specialized code to handle this.
1454
       */
1455
0
      if (DELAY_ACK(tp, tlen)) {
1456
0
        tp->t_flags |= TF_DELACK;
1457
0
      } else {
1458
0
        tp->t_flags |= TF_ACKNOW;
1459
0
        tcplp_output(tp);
1460
0
      }
1461
0
      goto check_delack;
1462
0
    }
1463
0
  }
1464
1465
  /*
1466
   * Calculate amount of space in receive window,
1467
   * and then do TCP input processing.
1468
   * Receive window is amount of space in rcv queue,
1469
   * but not less than advertised window.
1470
   */
1471
  /* samkumar: Replaced sbspace(&so->so_rcv) with call to cbuf_free_space. */
1472
0
  win = cbuf_free_space(&tp->recvbuf);
1473
0
  if (win < 0)
1474
0
    win = 0;
1475
0
  tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
1476
1477
  /* Reset receive buffer auto scaling when not in bulk receive mode. */
1478
  /* samkumar: Removed this receive buffer autoscaling code. */
1479
1480
0
  switch (tp->t_state) {
1481
1482
  /*
1483
   * If the state is SYN_RECEIVED:
1484
   *  if seg contains an ACK, but not for our SYN/ACK, send a RST.
1485
   *  (Added by Sam) if seg is resending the original SYN, resend the SYN/ACK
1486
   */
1487
  /*
1488
   * samkumar: If we receive a retransmission of the original SYN, then
1489
   * resend the SYN/ACK segment. This case was probably handled by the
1490
   * SYN cache. Because TCPlp does not use a SYN cache, we need to write
1491
   * custom logic for it. It is handled in the "else if" clause below.
1492
   */
1493
0
  case TCPS_SYN_RECEIVED:
1494
0
    if ((thflags & TH_ACK) &&
1495
0
        (SEQ_LEQ(th->th_ack, tp->snd_una) ||
1496
0
         SEQ_GT(th->th_ack, tp->snd_max))) {
1497
0
        rstreason = BANDLIM_RST_OPENPORT;
1498
0
        goto dropwithreset;
1499
0
    } else if (!IS_FASTOPEN(tp->t_flags) && (thflags & TH_SYN) && !(thflags & TH_ACK) && (th->th_seq == tp->irs)) {
1500
0
      tp->t_flags |= TF_ACKNOW;
1501
0
    }
1502
    /*
1503
     * samkumar: TCP Fast Open Logic from FreeBSD 12.0.
1504
     */
1505
0
    if (IS_FASTOPEN(tp->t_flags)) {
1506
      /*
1507
       * When a TFO connection is in SYN_RECEIVED, the
1508
       * only valid packets are the initial SYN, a
1509
       * retransmit/copy of the initial SYN (possibly with
1510
       * a subset of the original data), a valid ACK, a
1511
       * FIN, or a RST.
1512
       */
1513
0
      if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) {
1514
0
        rstreason = BANDLIM_RST_OPENPORT;
1515
0
        goto dropwithreset;
1516
0
      } else if (thflags & TH_SYN) {
1517
        /* non-initial SYN is ignored */
1518
0
        if ((tcp_timer_active(tp, TT_DELACK) || 
1519
0
             tcp_timer_active(tp, TT_REXMT)))
1520
0
          goto drop;
1521
0
      } else if (!(thflags & (TH_ACK|TH_FIN|TH_RST))) {
1522
0
        goto drop;
1523
0
      }
1524
0
    }
1525
0
    break;
1526
1527
  /*
1528
   * If the state is SYN_SENT:
1529
   *  if seg contains an ACK, but not for our SYN, drop the input.
1530
   *  if seg contains a RST, then drop the connection.
1531
   *  if seg does not contain SYN, then drop it.
1532
   * Otherwise this is an acceptable SYN segment
1533
   *  initialize tp->rcv_nxt and tp->irs
1534
   *  if seg contains ack then advance tp->snd_una
1535
   *  if seg contains an ECE and ECN support is enabled, the stream
1536
   *      is ECN capable.
1537
   *  if SYN has been acked change to ESTABLISHED else SYN_RCVD state
1538
   *  arrange for segment to be acked (eventually)
1539
   *  continue processing rest of data/controls, beginning with URG
1540
   */
1541
0
  case TCPS_SYN_SENT:
1542
0
    if ((thflags & TH_ACK) &&
1543
0
        (SEQ_LEQ(th->th_ack, tp->iss) ||
1544
0
         SEQ_GT(th->th_ack, tp->snd_max))) {
1545
0
      rstreason = BANDLIM_UNLIMITED;
1546
0
      goto dropwithreset;
1547
0
    }
1548
0
    if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) {
1549
0
      tp = tcp_drop(tp, ECONNREFUSED);
1550
0
    }
1551
0
    if (thflags & TH_RST)
1552
0
      goto drop;
1553
0
    if (!(thflags & TH_SYN))
1554
0
      goto drop;
1555
1556
0
    tp->irs = th->th_seq;
1557
0
    tcp_rcvseqinit(tp);
1558
0
    if (thflags & TH_ACK) {
1559
0
      int tfo_partial_ack = 0;
1560
1561
      /*
1562
       * samkumar: Removed call to soisconnected(so), since TCPlp has its
1563
       * own buffering.
1564
       */
1565
1566
      /* Do window scaling on this connection? */
1567
0
      if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
1568
0
        (TF_RCVD_SCALE|TF_REQ_SCALE)) {
1569
0
        tp->rcv_scale = tp->request_r_scale;
1570
0
      }
1571
0
      tp->rcv_adv += imin(tp->rcv_wnd,
1572
0
          TCP_MAXWIN << tp->rcv_scale);
1573
0
      tp->snd_una++;    /* SYN is acked */
1574
      /*
1575
       * If not all the data that was sent in the TFO SYN
1576
       * has been acked, resend the remainder right away.
1577
       */
1578
0
      if (IS_FASTOPEN(tp->t_flags) &&
1579
0
          (tp->snd_una != tp->snd_max)) {
1580
0
        tp->snd_nxt = th->th_ack;
1581
0
        tfo_partial_ack = 1;
1582
0
      }
1583
      /*
1584
       * If there's data, delay ACK; if there's also a FIN
1585
       * ACKNOW will be turned on later.
1586
       */
1587
0
      if (DELAY_ACK(tp, tlen) && tlen != 0 && !tfo_partial_ack)
1588
0
        tcp_timer_activate(tp, TT_DELACK,
1589
0
            tcp_delacktime);
1590
0
      else
1591
0
        tp->t_flags |= TF_ACKNOW;
1592
1593
0
      if ((thflags & TH_ECE) && V_tcp_do_ecn) {
1594
0
        tp->t_flags |= TF_ECN_PERMIT;
1595
0
      }
1596
1597
      /*
1598
       * Received <SYN,ACK> in SYN_SENT[*] state.
1599
       * Transitions:
1600
       *  SYN_SENT  --> ESTABLISHED
1601
       *  SYN_SENT* --> FIN_WAIT_1
1602
       */
1603
0
      tp->t_starttime = ticks;
1604
0
      if (tp->t_flags & TF_NEEDFIN) {
1605
0
        tcp_state_change(tp, TCPS_FIN_WAIT_1);
1606
0
        tp->t_flags &= ~TF_NEEDFIN;
1607
0
        thflags &= ~TH_SYN;
1608
0
      } else {
1609
0
        tcp_state_change(tp, TCPS_ESTABLISHED);
1610
        /* samkumar: Set conn_established signal for TCPlp. */
1611
0
        sig->conn_established = true;
1612
0
        cc_conn_init(tp);
1613
0
        tcp_timer_activate(tp, TT_KEEP,
1614
0
            TP_KEEPIDLE(tp));
1615
0
      }
1616
0
    } else {
1617
      /*
1618
       * Received initial SYN in SYN-SENT[*] state =>
1619
       * simultaneous open.
1620
       * If it succeeds, connection is * half-synchronized.
1621
       * Otherwise, do 3-way handshake:
1622
       *        SYN-SENT -> SYN-RECEIVED
1623
       *        SYN-SENT* -> SYN-RECEIVED*
1624
       */
1625
0
      tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
1626
0
      tcp_timer_activate(tp, TT_REXMT, 0);
1627
0
      tcp_state_change(tp, TCPS_SYN_RECEIVED);
1628
      /*
1629
       * samkumar: We would have incremented snd_next in tcplp_output when
1630
       * we sent the original SYN, so decrement it here. (Another
1631
       * consequence of removing the SYN cache.)
1632
       */
1633
0
      tp->snd_nxt--;
1634
0
    }
1635
1636
    /*
1637
     * Advance th->th_seq to correspond to first data byte.
1638
     * If data, trim to stay within window,
1639
     * dropping FIN if necessary.
1640
     */
1641
0
    th->th_seq++;
1642
0
    if (tlen > tp->rcv_wnd) {
1643
0
      todrop = tlen - tp->rcv_wnd;
1644
      /*
1645
       * samkumar: I removed a call to m_adj(m, -todrop), which intends
1646
       * to trim the data so it fits in the window. We can just read less
1647
       * when copying into the receive buffer in TCPlp, so we don't need
1648
       * to do this.
1649
       */
1650
0
      (void) todrop; /* samkumar: Prevent a compiler warning */
1651
0
      tlen = tp->rcv_wnd;
1652
0
      thflags &= ~TH_FIN;
1653
0
    }
1654
0
    tp->snd_wl1 = th->th_seq - 1;
1655
0
    tp->rcv_up = th->th_seq;
1656
    /*
1657
     * Client side of transaction: already sent SYN and data.
1658
     * If the remote host used T/TCP to validate the SYN,
1659
     * our data will be ACK'd; if so, enter normal data segment
1660
     * processing in the middle of step 5, ack processing.
1661
     * Otherwise, goto step 6.
1662
     */
1663
0
    if (thflags & TH_ACK)
1664
0
      goto process_ACK;
1665
1666
0
    goto step6;
1667
1668
  /*
1669
   * If the state is LAST_ACK or CLOSING or TIME_WAIT:
1670
   *      do normal processing.
1671
   *
1672
   * NB: Leftover from RFC1644 T/TCP.  Cases to be reused later.
1673
   */
1674
0
  case TCPS_LAST_ACK:
1675
0
  case TCPS_CLOSING:
1676
0
    break;  /* continue normal processing */
1677
0
  }
1678
1679
  /*
1680
   * States other than LISTEN or SYN_SENT.
1681
   * First check the RST flag and sequence number since reset segments
1682
   * are exempt from the timestamp and connection count tests.  This
1683
   * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix
1684
   * below which allowed reset segments in half the sequence space
1685
   * to fall though and be processed (which gives forged reset
1686
   * segments with a random sequence number a 50 percent chance of
1687
   * killing a connection).
1688
   * Then check timestamp, if present.
1689
   * Then check the connection count, if present.
1690
   * Then check that at least some bytes of segment are within
1691
   * receive window.  If segment begins before rcv_nxt,
1692
   * drop leading data (and SYN); if nothing left, just ack.
1693
   */
1694
0
  if (thflags & TH_RST) {
1695
    /*
1696
     * RFC5961 Section 3.2
1697
     *
1698
     * - RST drops connection only if SEG.SEQ == RCV.NXT.
1699
     * - If RST is in window, we send challenge ACK.
1700
     *
1701
     * Note: to take into account delayed ACKs, we should
1702
     *   test against last_ack_sent instead of rcv_nxt.
1703
     * Note 2: we handle special case of closed window, not
1704
     *   covered by the RFC.
1705
     */
1706
0
    if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
1707
0
        SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
1708
0
        (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
1709
1710
      /*
1711
       * samkumar: This if statement used to also be prefaced with
1712
       * "V_tcp_insecure_rst ||". But I removed it, since there's no
1713
       * reason to support an insecure option in TCPlp (my guess is that
1714
       * FreeBSD supported it for legacy reasons).
1715
       */
1716
0
      if (tp->last_ack_sent == th->th_seq) {
1717
        /*
1718
         * samkumar: Normally, the error number would be stored in
1719
         * so->so_error. Instead, we put it in this "droperror" local
1720
         * variable and then pass it to tcplp_sys_connection_lost.
1721
         */
1722
0
        int droperror = 0;
1723
        /* Drop the connection. */
1724
0
        switch (tp->t_state) {
1725
0
        case TCPS_SYN_RECEIVED:
1726
0
          droperror = ECONNREFUSED;
1727
0
          goto close;
1728
0
        case TCPS_ESTABLISHED:
1729
0
        case TCPS_FIN_WAIT_1:
1730
0
        case TCPS_FIN_WAIT_2:
1731
0
        case TCPS_CLOSE_WAIT:
1732
0
          droperror = ECONNRESET;
1733
0
        close:
1734
0
          tcp_state_change(tp, TCPS_CLOSED);
1735
          /* FALLTHROUGH */
1736
0
        default:
1737
0
          tp = tcp_close_tcb(tp);
1738
0
          tcplp_sys_connection_lost(tp, droperror);
1739
0
        }
1740
0
      } else {
1741
        /* Send challenge ACK. */
1742
0
        tcp_respond(tp, tp->instance, ip6, th, tp->rcv_nxt, tp->snd_nxt, TH_ACK);
1743
0
        tp->last_ack_sent = tp->rcv_nxt;
1744
0
      }
1745
0
    }
1746
0
    goto drop;
1747
0
  }
1748
1749
  /*
1750
   * RFC5961 Section 4.2
1751
   * Send challenge ACK for any SYN in synchronized state.
1752
   */
1753
  /*
1754
   * samkumar: I added the check for the SYN-RECEIVED state in this if
1755
   * statement (another consequence of removing the SYN cache).
1756
   */
1757
0
  if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT && tp->t_state != TCP6S_SYN_RECEIVED) {
1758
    /*
1759
     * samkumar: The modern way to handle this is to send a Challenge ACK.
1760
     * FreeBSD supports this, but it also has this V_tcp_insecure_syn
1761
     * options that will cause it to drop the connection if the SYN falls
1762
     * in the receive window. In TCPlp we *only* support Challenge ACKs
1763
     * (the secure way of doing it), so I've removed code for the insecure
1764
     * way. (Presumably the reason why FreeBSD supports the insecure way is
1765
     * for legacy code, which we don't really care about in TCPlp).
1766
     */
1767
    /* Send challenge ACK. */
1768
0
    tcplp_sys_log("Sending challenge ACK");
1769
0
    tcp_respond(tp, tp->instance, ip6, th, tp->rcv_nxt, tp->snd_nxt, TH_ACK);
1770
0
    tp->last_ack_sent = tp->rcv_nxt;
1771
0
    goto drop;
1772
0
  }
1773
1774
  /*
1775
   * RFC 1323 PAWS: If we have a timestamp reply on this segment
1776
   * and it's less than ts_recent, drop it.
1777
   */
1778
0
  if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
1779
0
      TSTMP_LT(to.to_tsval, tp->ts_recent)) {
1780
1781
    /* Check to see if ts_recent is over 24 days old.  */
1782
0
    if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) {
1783
      /*
1784
       * Invalidate ts_recent.  If this segment updates
1785
       * ts_recent, the age will be reset later and ts_recent
1786
       * will get a valid value.  If it does not, setting
1787
       * ts_recent to zero will at least satisfy the
1788
       * requirement that zero be placed in the timestamp
1789
       * echo reply when ts_recent isn't valid.  The
1790
       * age isn't reset until we get a valid ts_recent
1791
       * because we don't want out-of-order segments to be
1792
       * dropped when ts_recent is old.
1793
       */
1794
0
      tp->ts_recent = 0;
1795
0
    } else {
1796
0
      if (tlen)
1797
0
        goto dropafterack;
1798
0
      goto drop;
1799
0
    }
1800
0
  }
1801
1802
  /*
1803
   * In the SYN-RECEIVED state, validate that the packet belongs to
1804
   * this connection before trimming the data to fit the receive
1805
   * window.  Check the sequence number versus IRS since we know
1806
   * the sequence numbers haven't wrapped.  This is a partial fix
1807
   * for the "LAND" DoS attack.
1808
   */
1809
0
  if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) {
1810
0
    rstreason = BANDLIM_RST_OPENPORT;
1811
0
    goto dropwithreset;
1812
0
  }
1813
1814
0
  todrop = tp->rcv_nxt - th->th_seq;
1815
0
  if (todrop > 0) {
1816
0
    if (thflags & TH_SYN) {
1817
0
      thflags &= ~TH_SYN;
1818
0
      th->th_seq++;
1819
0
      if (th->th_urp > 1)
1820
0
        th->th_urp--;
1821
0
      else
1822
0
        thflags &= ~TH_URG;
1823
0
      todrop--;
1824
0
    }
1825
    /*
1826
     * Following if statement from Stevens, vol. 2, p. 960.
1827
     */
1828
0
    if (todrop > tlen
1829
0
        || (todrop == tlen && (thflags & TH_FIN) == 0)) {
1830
      /*
1831
       * Any valid FIN must be to the left of the window.
1832
       * At this point the FIN must be a duplicate or out
1833
       * of sequence; drop it.
1834
       */
1835
0
      thflags &= ~TH_FIN;
1836
1837
      /*
1838
       * Send an ACK to resynchronize and drop any data.
1839
       * But keep on processing for RST or ACK.
1840
       */
1841
0
      tp->t_flags |= TF_ACKNOW;
1842
0
      todrop = tlen;
1843
0
    }
1844
    /* samkumar: There was an else case that only collected stats. */
1845
0
    drop_hdrlen += todrop;  /* drop from the top afterwards */
1846
0
    th->th_seq += todrop;
1847
0
    tlen -= todrop;
1848
0
    if (th->th_urp > todrop)
1849
0
      th->th_urp -= todrop;
1850
0
    else {
1851
0
      thflags &= ~TH_URG;
1852
0
      th->th_urp = 0;
1853
0
    }
1854
0
  }
1855
1856
  /*
1857
   * If new data are received on a connection after the
1858
   * user processes are gone, then RST the other end.
1859
   */
1860
  /*
1861
   * samkumar: TCPlp is designed for embedded systems where there is no
1862
   * concept of a "process" that has allocated a TCP socket. Therefore, we
1863
   * do not implement the functionality in the above comment (the code for
1864
   * it used to be here, and I removed it).
1865
   */
1866
  /*
1867
   * If segment ends after window, drop trailing data
1868
   * (and PUSH and FIN); if nothing left, just ACK.
1869
   */
1870
0
  todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd);
1871
0
  if (todrop > 0) {
1872
0
    if (todrop >= tlen) {
1873
      /*
1874
       * If window is closed can only take segments at
1875
       * window edge, and have to drop data and PUSH from
1876
       * incoming segments.  Continue processing, but
1877
       * remember to ack.  Otherwise, drop segment
1878
       * and ack.
1879
       */
1880
0
      if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
1881
0
        tp->t_flags |= TF_ACKNOW;
1882
0
      } else
1883
0
        goto dropafterack;
1884
0
    }
1885
    /*
1886
     * samkumar: I removed a call to m_adj(m, -todrop), which intends
1887
     * to trim the data so it fits in the window. We can just read less
1888
     * when copying into the receive buffer in TCPlp, so we don't need
1889
     * to do this. Subtracting it from tlen gives us enough information to
1890
     * do this later. In FreeBSD, this isn't possible because the mbuf
1891
     * itself becomes part of the receive buffer, so the mbuf has to be
1892
     * trimmed in order for this to work out.
1893
     */
1894
0
    tlen -= todrop;
1895
0
    thflags &= ~(TH_PUSH|TH_FIN);
1896
0
  }
1897
1898
  /*
1899
   * If last ACK falls within this segment's sequence numbers,
1900
   * record its timestamp.
1901
   * NOTE:
1902
   * 1) That the test incorporates suggestions from the latest
1903
   *    proposal of the tcplw@cray.com list (Braden 1993/04/26).
1904
   * 2) That updating only on newer timestamps interferes with
1905
   *    our earlier PAWS tests, so this check should be solely
1906
   *    predicated on the sequence space of this segment.
1907
   * 3) That we modify the segment boundary check to be
1908
   *        Last.ACK.Sent <= SEG.SEQ + SEG.Len
1909
   *    instead of RFC1323's
1910
   *        Last.ACK.Sent < SEG.SEQ + SEG.Len,
1911
   *    This modified check allows us to overcome RFC1323's
1912
   *    limitations as described in Stevens TCP/IP Illustrated
1913
   *    Vol. 2 p.869. In such cases, we can still calculate the
1914
   *    RTT correctly when RCV.NXT == Last.ACK.Sent.
1915
   */
1916
1917
0
  if ((to.to_flags & TOF_TS) != 0 &&
1918
0
      SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
1919
0
      SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
1920
0
    ((thflags & (TH_SYN|TH_FIN)) != 0))) {
1921
0
    tp->ts_recent_age = tcp_ts_getticks();
1922
0
    tp->ts_recent = to.to_tsval;
1923
0
  }
1924
1925
  /*
1926
   * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN
1927
   * flag is on (half-synchronized state), then queue data for
1928
   * later processing; else drop segment and return.
1929
   */
1930
0
  if ((thflags & TH_ACK) == 0) {
1931
0
    if (tp->t_state == TCPS_SYN_RECEIVED ||
1932
0
        (tp->t_flags & TF_NEEDSYN)) {
1933
0
      if (tp->t_state == TCPS_SYN_RECEIVED &&
1934
0
          IS_FASTOPEN(tp->t_flags)) {
1935
0
        tp->snd_wnd = tiwin;
1936
0
        cc_conn_init(tp);
1937
0
      }
1938
0
      goto step6;
1939
0
    } else if (tp->t_flags & TF_ACKNOW)
1940
0
      goto dropafterack;
1941
0
    else
1942
0
      goto drop;
1943
0
  }
1944
1945
0
  tcplp_sys_log("Processing ACK");
1946
1947
  /*
1948
   * Ack processing.
1949
   */
1950
0
  switch (tp->t_state) {
1951
1952
  /*
1953
   * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
1954
   * ESTABLISHED state and continue processing.
1955
   * The ACK was checked above.
1956
   */
1957
0
  case TCPS_SYN_RECEIVED:
1958
    /*
1959
     * samkumar: Removed call to soisconnected(so), since TCPlp has its
1960
     * own buffering.
1961
     */
1962
    /* Do window scaling? */
1963
0
    if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
1964
0
      (TF_RCVD_SCALE|TF_REQ_SCALE)) {
1965
0
      tp->rcv_scale = tp->request_r_scale;
1966
0
      tp->snd_wnd = tiwin;
1967
0
    }
1968
    /*
1969
     * Make transitions:
1970
     *      SYN-RECEIVED  -> ESTABLISHED
1971
     *      SYN-RECEIVED* -> FIN-WAIT-1
1972
     */
1973
0
    tp->t_starttime = ticks;
1974
    /*
1975
     * samkumar: I'm eliminating the TFO pending counter.
1976
     */
1977
0
    if (IS_FASTOPEN(tp->t_flags)/* && tp->t_tfo_pending */) {\
1978
      /*
1979
      tcp_fastopen_decrement_counter(tp->t_tfo_pending);
1980
      tp->t_tfo_pending = NULL;
1981
      */
1982
1983
      /*
1984
       * Account for the ACK of our SYN prior to
1985
       * regular ACK processing below.
1986
       */ 
1987
0
      tp->snd_una++;
1988
0
    }
1989
0
    if (tp->t_flags & TF_NEEDFIN) {
1990
0
      tcp_state_change(tp, TCPS_FIN_WAIT_1);
1991
0
      tp->t_flags &= ~TF_NEEDFIN;
1992
0
    } else {
1993
0
      tcp_state_change(tp, TCPS_ESTABLISHED);
1994
      /* samkumar: Set conn_established signal for TCPlp. */
1995
0
      sig->conn_established = true;
1996
      /*
1997
       * TFO connections call cc_conn_init() during SYN
1998
       * processing.  Calling it again here for such
1999
       * connections is not harmless as it would undo the
2000
       * snd_cwnd reduction that occurs when a TFO SYN|ACK
2001
       * is retransmitted.
2002
       */
2003
0
      if (!IS_FASTOPEN(tp->t_flags))
2004
0
        cc_conn_init(tp);
2005
0
      tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
2006
      /*
2007
       * samkumar: I added this check to account for simultaneous open.
2008
       * If this socket was opened actively, then the fact that we are
2009
       * in SYN-RECEIVED indicates that we are in simultaneous open.
2010
       * Therefore, don't ACK the SYN-ACK (unless it contains data or
2011
       * something, which will be processed later).
2012
       */
2013
0
      if (!tpispassiveopen(tp)) {
2014
0
        tp->t_flags &= ~TF_ACKNOW;
2015
0
      } else {
2016
        /*
2017
         * samkumar: Otherwise, we entered the ESTABLISHED state by
2018
         * accepting a connection, so call the appropriate callback in
2019
         * TCPlp. TODO: consider using signals to handle this?
2020
         */
2021
0
         bool accepted = tcplp_sys_accepted_connection(tp->accepted_from, tp, &ip6->ip6_src, th->th_sport);
2022
0
         if (!accepted) {
2023
0
           rstreason = ECONNREFUSED;
2024
0
           goto dropwithreset;
2025
0
         }
2026
0
       }
2027
0
    }
2028
    /*
2029
     * If segment contains data or ACK, will call tcp_reass()
2030
     * later; if not, do so now to pass queued data to user.
2031
     */
2032
0
    if (tlen == 0 && (thflags & TH_FIN) == 0)
2033
0
      (void) tcp_reass(tp, (struct tcphdr *)0, 0,
2034
0
          (otMessage*)0, 0, sig);
2035
2036
0
    tp->snd_wl1 = th->th_seq - 1;
2037
    /* FALLTHROUGH */
2038
2039
  /*
2040
   * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
2041
   * ACKs.  If the ack is in the range
2042
   *  tp->snd_una < th->th_ack <= tp->snd_max
2043
   * then advance tp->snd_una to th->th_ack and drop
2044
   * data from the retransmission queue.  If this ACK reflects
2045
   * more up to date window information we update our window information.
2046
   */
2047
0
  case TCPS_ESTABLISHED:
2048
0
  case TCPS_FIN_WAIT_1:
2049
0
  case TCPS_FIN_WAIT_2:
2050
0
  case TCPS_CLOSE_WAIT:
2051
0
  case TCPS_CLOSING:
2052
0
  case TCPS_LAST_ACK:
2053
0
    if (SEQ_GT(th->th_ack, tp->snd_max)) {
2054
0
      goto dropafterack;
2055
0
    }
2056
2057
0
    if ((tp->t_flags & TF_SACK_PERMIT) &&
2058
0
        ((to.to_flags & TOF_SACK) ||
2059
0
         !TAILQ_EMPTY(&tp->snd_holes)))
2060
0
      tcp_sack_doack(tp, &to, th->th_ack);
2061
2062
0
    if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
2063
0
      if (tlen == 0 && tiwin == tp->snd_wnd) {
2064
        /*
2065
         * If this is the first time we've seen a
2066
         * FIN from the remote, this is not a
2067
         * duplicate and it needs to be processed
2068
         * normally.  This happens during a
2069
         * simultaneous close.
2070
         */
2071
0
        if ((thflags & TH_FIN) &&
2072
0
            (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
2073
0
          tp->t_dupacks = 0;
2074
0
          break;
2075
0
        }
2076
        /*
2077
         * If we have outstanding data (other than
2078
         * a window probe), this is a completely
2079
         * duplicate ack (ie, window info didn't
2080
         * change and FIN isn't set),
2081
         * the ack is the biggest we've
2082
         * seen and we've seen exactly our rexmt
2083
         * threshhold of them, assume a packet
2084
         * has been dropped and retransmit it.
2085
         * Kludge snd_nxt & the congestion
2086
         * window so we send only this one
2087
         * packet.
2088
         *
2089
         * We know we're losing at the current
2090
         * window size so do congestion avoidance
2091
         * (set ssthresh to half the current window
2092
         * and pull our congestion window back to
2093
         * the new ssthresh).
2094
         *
2095
         * Dup acks mean that packets have left the
2096
         * network (they're now cached at the receiver)
2097
         * so bump cwnd by the amount in the receiver
2098
         * to keep a constant cwnd packets in the
2099
         * network.
2100
         *
2101
         * When using TCP ECN, notify the peer that
2102
         * we reduced the cwnd.
2103
         */
2104
0
        if (!tcp_timer_active(tp, TT_REXMT) ||
2105
0
            th->th_ack != tp->snd_una)
2106
0
          tp->t_dupacks = 0;
2107
0
        else if (++tp->t_dupacks > tcprexmtthresh ||
2108
0
             IN_FASTRECOVERY(tp->t_flags)) {
2109
0
          cc_ack_received(tp, th, CC_DUPACK);
2110
0
          if ((tp->t_flags & TF_SACK_PERMIT) &&
2111
0
              IN_FASTRECOVERY(tp->t_flags)) {
2112
0
            int awnd;
2113
2114
            /*
2115
             * Compute the amount of data in flight first.
2116
             * We can inject new data into the pipe iff
2117
             * we have less than 1/2 the original window's
2118
             * worth of data in flight.
2119
             */
2120
0
            awnd = (tp->snd_nxt - tp->snd_fack) +
2121
0
              tp->sackhint.sack_bytes_rexmit;
2122
0
            if (awnd < tp->snd_ssthresh) {
2123
0
              tp->snd_cwnd += tp->t_maxseg;
2124
0
              if (tp->snd_cwnd > tp->snd_ssthresh)
2125
0
                tp->snd_cwnd = tp->snd_ssthresh;
2126
0
            }
2127
0
          } else
2128
0
            tp->snd_cwnd += tp->t_maxseg;
2129
#ifdef INSTRUMENT_TCP
2130
          tcplp_sys_log("TCP DUPACK");
2131
#endif
2132
0
          (void) tcplp_output(tp);
2133
0
          goto drop;
2134
0
        } else if (tp->t_dupacks == tcprexmtthresh) {
2135
0
          tcp_seq onxt = tp->snd_nxt;
2136
2137
          /*
2138
           * If we're doing sack, check to
2139
           * see if we're already in sack
2140
           * recovery. If we're not doing sack,
2141
           * check to see if we're in newreno
2142
           * recovery.
2143
           */
2144
0
          if (tp->t_flags & TF_SACK_PERMIT) {
2145
0
            if (IN_FASTRECOVERY(tp->t_flags)) {
2146
0
              tp->t_dupacks = 0;
2147
0
              break;
2148
0
            }
2149
0
          } else {
2150
0
            if (SEQ_LEQ(th->th_ack,
2151
0
                tp->snd_recover)) {
2152
0
              tp->t_dupacks = 0;
2153
0
              break;
2154
0
            }
2155
0
          }
2156
          /* Congestion signal before ack. */
2157
0
          cc_cong_signal(tp, th, CC_NDUPACK);
2158
0
          cc_ack_received(tp, th, CC_DUPACK);
2159
0
          tcp_timer_activate(tp, TT_REXMT, 0);
2160
0
          tp->t_rtttime = 0;
2161
2162
#ifdef INSTRUMENT_TCP
2163
          tcplp_sys_log("TCP DUPACK_THRESH");
2164
#endif
2165
0
          if (tp->t_flags & TF_SACK_PERMIT) {
2166
0
            tp->sack_newdata = tp->snd_nxt;
2167
0
            tp->snd_cwnd = tp->t_maxseg;
2168
0
            (void) tcplp_output(tp);
2169
0
            goto drop;
2170
0
          }
2171
2172
0
          tp->snd_nxt = th->th_ack;
2173
0
          tp->snd_cwnd = tp->t_maxseg;
2174
0
          (void) tcplp_output(tp);
2175
          /*
2176
           * samkumar: I added casts to uint64_t below to
2177
           * fix an OpenThread code scanning alert relating
2178
           * to integer overflow in multiplication.
2179
           */
2180
0
          tp->snd_cwnd = tp->snd_ssthresh +
2181
0
               ((uint64_t) tp->t_maxseg) *
2182
0
               ((uint64_t) (tp->t_dupacks - tp->snd_limited));
2183
#ifdef INSTRUMENT_TCP
2184
          tcplp_sys_log("TCP SET_cwnd %d", (int) tp->snd_cwnd);
2185
#endif
2186
0
          if (SEQ_GT(onxt, tp->snd_nxt))
2187
0
            tp->snd_nxt = onxt;
2188
0
          goto drop;
2189
0
        } else if (V_tcp_do_rfc3042) {
2190
          /*
2191
           * Process first and second duplicate
2192
           * ACKs. Each indicates a segment
2193
           * leaving the network, creating room
2194
           * for more. Make sure we can send a
2195
           * packet on reception of each duplicate
2196
           * ACK by increasing snd_cwnd by one
2197
           * segment. Restore the original
2198
           * snd_cwnd after packet transmission.
2199
           */
2200
0
          uint64_t oldcwnd;
2201
0
          tcp_seq oldsndmax;
2202
0
          uint32_t sent;
2203
0
          int avail;
2204
0
          cc_ack_received(tp, th, CC_DUPACK);
2205
0
          oldcwnd = tp->snd_cwnd;
2206
0
          oldsndmax = tp->snd_max;
2207
2208
#ifdef INSTRUMENT_TCP
2209
          tcplp_sys_log("TCP LIM_TRANS");
2210
#endif
2211
2212
0
          KASSERT(tp->t_dupacks == 1 ||
2213
0
              tp->t_dupacks == 2,
2214
0
              ("%s: dupacks not 1 or 2",
2215
0
              __func__));
2216
0
          if (tp->t_dupacks == 1)
2217
0
            tp->snd_limited = 0;
2218
0
          tp->snd_cwnd =
2219
0
              (tp->snd_nxt - tp->snd_una) +
2220
0
              (tp->t_dupacks - tp->snd_limited) *
2221
0
              tp->t_maxseg;
2222
          /*
2223
           * Only call tcplp_output when there
2224
           * is new data available to be sent.
2225
           * Otherwise we would send pure ACKs.
2226
           */
2227
          /*
2228
           * samkumar: Replace sbavail(&so->so_snd) with the call to
2229
           * lbuf_used_space.
2230
           */
2231
0
          avail = lbuf_used_space(&tp->sendbuf) -
2232
0
              (tp->snd_nxt - tp->snd_una);
2233
0
          if (avail > 0)
2234
0
            (void) tcplp_output(tp);
2235
0
          sent = tp->snd_max - oldsndmax;
2236
0
          if (sent > tp->t_maxseg) {
2237
0
            KASSERT((tp->t_dupacks == 2 &&
2238
0
                tp->snd_limited == 0) ||
2239
0
               (sent == tp->t_maxseg + 1 &&
2240
0
                tp->t_flags & TF_SENTFIN),
2241
0
                ("%s: sent too much",
2242
0
                __func__));
2243
0
            tp->snd_limited = 2;
2244
0
          } else if (sent > 0)
2245
0
            ++tp->snd_limited;
2246
0
          tp->snd_cwnd = oldcwnd;
2247
#ifdef INSTRUMENT_TCP
2248
          tcplp_sys_log("TCP RESET_cwnd %d", (int) tp->snd_cwnd);
2249
#endif
2250
0
          goto drop;
2251
0
        }
2252
0
      } else
2253
0
        tp->t_dupacks = 0;
2254
0
      break;
2255
0
    }
2256
2257
0
    KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
2258
0
        ("%s: th_ack <= snd_una", __func__));
2259
2260
    /*
2261
     * If the congestion window was inflated to account
2262
     * for the other side's cached packets, retract it.
2263
     */
2264
0
    if (IN_FASTRECOVERY(tp->t_flags)) {
2265
0
      if (SEQ_LT(th->th_ack, tp->snd_recover)) {
2266
0
        if (tp->t_flags & TF_SACK_PERMIT)
2267
0
          tcp_sack_partialack(tp, th);
2268
0
        else
2269
0
          tcp_newreno_partial_ack(tp, th);
2270
0
      } else
2271
0
        cc_post_recovery(tp, th);
2272
0
    }
2273
2274
0
    tp->t_dupacks = 0;
2275
    /*
2276
     * If we reach this point, ACK is not a duplicate,
2277
     *     i.e., it ACKs something we sent.
2278
     */
2279
0
    if (tp->t_flags & TF_NEEDSYN) {
2280
      /*
2281
       * T/TCP: Connection was half-synchronized, and our
2282
       * SYN has been ACK'd (so connection is now fully
2283
       * synchronized).  Go to non-starred state,
2284
       * increment snd_una for ACK of SYN, and check if
2285
       * we can do window scaling.
2286
       */
2287
0
      tp->t_flags &= ~TF_NEEDSYN;
2288
0
      tp->snd_una++;
2289
      /* Do window scaling? */
2290
0
      if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
2291
0
        (TF_RCVD_SCALE|TF_REQ_SCALE)) {
2292
0
        tp->rcv_scale = tp->request_r_scale;
2293
        /* Send window already scaled. */
2294
0
      }
2295
0
    }
2296
2297
0
process_ACK:
2298
0
    acked = BYTES_THIS_ACK(tp, th);
2299
2300
0
    tcplp_sys_log("Bytes acked: %d", acked);
2301
    /*
2302
     * If we just performed our first retransmit, and the ACK
2303
     * arrives within our recovery window, then it was a mistake
2304
     * to do the retransmit in the first place.  Recover our
2305
     * original cwnd and ssthresh, and proceed to transmit where
2306
     * we left off.
2307
     */
2308
0
    if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID &&
2309
0
        (int)(ticks - tp->t_badrxtwin) < 0)
2310
0
      cc_cong_signal(tp, th, CC_RTO_ERR);
2311
2312
    /*
2313
     * If we have a timestamp reply, update smoothed
2314
     * round trip time.  If no timestamp is present but
2315
     * transmit timer is running and timed sequence
2316
     * number was acked, update smoothed round trip time.
2317
     * Since we now have an rtt measurement, cancel the
2318
     * timer backoff (cf., Phil Karn's retransmit alg.).
2319
     * Recompute the initial retransmit timer.
2320
     *
2321
     * Some boxes send broken timestamp replies
2322
     * during the SYN+ACK phase, ignore
2323
     * timestamps of 0 or we could calculate a
2324
     * huge RTT and blow up the retransmit timer.
2325
     */
2326
2327
0
    if ((to.to_flags & TOF_TS) != 0 && to.to_tsecr) {
2328
0
      uint32_t t;
2329
2330
0
      t = tcp_ts_getticks() - to.to_tsecr;
2331
0
      if (!tp->t_rttlow || tp->t_rttlow > t)
2332
0
        tp->t_rttlow = t;
2333
0
      tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1);
2334
0
    } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) {
2335
0
      if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime)
2336
0
        tp->t_rttlow = ticks - tp->t_rtttime;
2337
0
      tcp_xmit_timer(tp, ticks - tp->t_rtttime);
2338
0
    }
2339
2340
    /*
2341
     * If all outstanding data is acked, stop retransmit
2342
     * timer and remember to restart (more output or persist).
2343
     * If there is more data to be acked, restart retransmit
2344
     * timer, using current (possibly backed-off) value.
2345
     */
2346
0
    if (th->th_ack == tp->snd_max) {
2347
0
      tcp_timer_activate(tp, TT_REXMT, 0);
2348
0
      needoutput = 1;
2349
0
    } else if (!tcp_timer_active(tp, TT_PERSIST)) {
2350
0
      tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
2351
0
    }
2352
2353
    /*
2354
     * If no data (only SYN) was ACK'd,
2355
     *    skip rest of ACK processing.
2356
     */
2357
0
    if (acked == 0)
2358
0
      goto step6;
2359
2360
    /*
2361
     * Let the congestion control algorithm update congestion
2362
     * control related information. This typically means increasing
2363
     * the congestion window.
2364
     */
2365
0
    cc_ack_received(tp, th, CC_ACK);
2366
2367
    /*
2368
     * samkumar: I replaced the calls to sbavail(&so->so_snd) with new
2369
     * calls to lbuf_used_space, and then I modified the code to actually
2370
     * remove code from the send buffer, formerly done via
2371
     * sbcut_locked(&so->so_send, (int)sbavail(&so->so_snd)) in the if case
2372
     * and sbcut_locked(&so->so_snd, acked) in the else case, to use the
2373
     * data structures for TCPlp's data buffering.
2374
     */
2375
0
    if (acked > lbuf_used_space(&tp->sendbuf)) {
2376
0
      uint32_t poppedbytes;
2377
0
      uint32_t usedspace = lbuf_used_space(&tp->sendbuf);
2378
0
      tp->snd_wnd -= usedspace;
2379
0
      poppedbytes = lbuf_pop(&tp->sendbuf, usedspace, &sig->links_popped);
2380
0
      KASSERT(poppedbytes == usedspace, ("Could not fully empty send buffer"));
2381
0
      sig->bytes_acked += poppedbytes;
2382
0
      ourfinisacked = 1;
2383
0
    } else {
2384
0
      uint32_t poppedbytes = lbuf_pop(&tp->sendbuf, acked, &sig->links_popped);
2385
0
      KASSERT(poppedbytes == acked, ("Could not remove acked bytes from send buffer"));
2386
0
      sig->bytes_acked += poppedbytes;
2387
0
      tp->snd_wnd -= acked;
2388
0
      ourfinisacked = 0;
2389
0
    }
2390
    /* NB: sowwakeup_locked() does an implicit unlock. */
2391
    /*
2392
     * samkumar: There used to be a call to sowwakeup(so); here,
2393
     * which wakes up any threads waiting for the socket to
2394
     * become ready for writing. TCPlp handles its send buffer
2395
     * differently so we do not need to replace this call with
2396
     * specialized code to handle this.
2397
     */
2398
    /* Detect una wraparound. */
2399
0
    if (!IN_RECOVERY(tp->t_flags) &&
2400
0
        SEQ_GT(tp->snd_una, tp->snd_recover) &&
2401
0
        SEQ_LEQ(th->th_ack, tp->snd_recover))
2402
0
      tp->snd_recover = th->th_ack - 1;
2403
    /* XXXLAS: Can this be moved up into cc_post_recovery? */
2404
0
    if (IN_RECOVERY(tp->t_flags) &&
2405
0
        SEQ_GEQ(th->th_ack, tp->snd_recover)) {
2406
0
      EXIT_RECOVERY(tp->t_flags);
2407
0
    }
2408
0
    tp->snd_una = th->th_ack;
2409
0
    if (tp->t_flags & TF_SACK_PERMIT) {
2410
0
      if (SEQ_GT(tp->snd_una, tp->snd_recover))
2411
0
        tp->snd_recover = tp->snd_una;
2412
0
    }
2413
0
    if (SEQ_LT(tp->snd_nxt, tp->snd_una))
2414
0
      tp->snd_nxt = tp->snd_una;
2415
2416
0
    switch (tp->t_state) {
2417
2418
    /*
2419
     * In FIN_WAIT_1 STATE in addition to the processing
2420
     * for the ESTABLISHED state if our FIN is now acknowledged
2421
     * then enter FIN_WAIT_2.
2422
     */
2423
0
    case TCPS_FIN_WAIT_1:
2424
0
      if (ourfinisacked) {
2425
        /*
2426
         * If we can't receive any more
2427
         * data, then closing user can proceed.
2428
         * Starting the timer is contrary to the
2429
         * specification, but if we don't get a FIN
2430
         * we'll hang forever.
2431
         *
2432
         * XXXjl:
2433
         * we should release the tp also, and use a
2434
         * compressed state.
2435
         */
2436
        /*
2437
         * samkumar: I replaced a check for the SBS_CANTRCVMORE flag
2438
         * in so->so_rcv.sb_state with a call to tcpiscantrcv.
2439
         */
2440
0
        if (tpiscantrcv(tp)) {
2441
          /* samkumar: Removed a call to soisdisconnected(so). */
2442
0
          tcp_timer_activate(tp, TT_2MSL,
2443
0
              (tcp_fast_finwait2_recycle ?
2444
0
              tcp_finwait2_timeout :
2445
0
              TP_MAXIDLE(tp)));
2446
0
        }
2447
0
        tcp_state_change(tp, TCPS_FIN_WAIT_2);
2448
0
      }
2449
0
      break;
2450
2451
    /*
2452
     * In CLOSING STATE in addition to the processing for
2453
     * the ESTABLISHED state if the ACK acknowledges our FIN
2454
     * then enter the TIME-WAIT state, otherwise ignore
2455
     * the segment.
2456
     */
2457
0
    case TCPS_CLOSING:
2458
0
      if (ourfinisacked) {
2459
        /*
2460
         * samkumar: I added the line below. We need to avoid sending
2461
         * an ACK in the TIME-WAIT state, since we don't want to
2462
         * ACK ACKs. This edge case appears because TCPlp, unlike the
2463
         * original FreeBSD code, uses tcpcbs for connections in the
2464
         * TIME-WAIT state (FreeBSD uses a different, smaller
2465
         * structure).
2466
         */
2467
0
        tp->t_flags &= ~TF_ACKNOW;
2468
0
        tcp_twstart(tp);
2469
0
        return;
2470
0
      }
2471
0
      break;
2472
2473
    /*
2474
     * In LAST_ACK, we may still be waiting for data to drain
2475
     * and/or to be acked, as well as for the ack of our FIN.
2476
     * If our FIN is now acknowledged, delete the TCB,
2477
     * enter the closed state and return.
2478
     */
2479
0
    case TCPS_LAST_ACK:
2480
0
      if (ourfinisacked) {
2481
0
        tp = tcp_close_tcb(tp);
2482
0
        tcplp_sys_connection_lost(tp, CONN_LOST_NORMAL);
2483
0
        goto drop;
2484
0
      }
2485
0
      break;
2486
0
    }
2487
0
  }
2488
2489
0
step6:
2490
2491
  /*
2492
   * Update window information.
2493
   * Don't look at window if no ACK: TAC's send garbage on first SYN.
2494
   */
2495
0
  if ((thflags & TH_ACK) &&
2496
0
      (SEQ_LT(tp->snd_wl1, th->th_seq) ||
2497
0
      (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
2498
0
       (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
2499
    /* keep track of pure window updates */
2500
    /*
2501
     * samkumar: There used to be an if statement here that would check if
2502
     * this is a "pure" window update (tlen == 0 &&
2503
     * tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) and keep
2504
     * statistics for how often that happens.
2505
     */
2506
0
    tp->snd_wnd = tiwin;
2507
0
    tp->snd_wl1 = th->th_seq;
2508
0
    tp->snd_wl2 = th->th_ack;
2509
0
    if (tp->snd_wnd > tp->max_sndwnd)
2510
0
      tp->max_sndwnd = tp->snd_wnd;
2511
0
    needoutput = 1;
2512
0
  }
2513
2514
  /*
2515
   * Process segments with URG.
2516
   */
2517
  /*
2518
   * samkumar: TCPlp does not support the urgent pointer, so we omit all
2519
   * urgent-pointer-related processing and buffering. The code below is the
2520
   * code that was in the "else" case that handles no valid urgent data in
2521
   * the received packet.
2522
   */
2523
0
  {
2524
    /*
2525
     * If no out of band data is expected,
2526
     * pull receive urgent pointer along
2527
     * with the receive window.
2528
     */
2529
0
    if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
2530
0
      tp->rcv_up = tp->rcv_nxt;
2531
0
  }
2532
2533
  /*
2534
   * Process the segment text, merging it into the TCP sequencing queue,
2535
   * and arranging for acknowledgment of receipt if necessary.
2536
   * This process logically involves adjusting tp->rcv_wnd as data
2537
   * is presented to the user (this happens in tcp_usrreq.c,
2538
   * case PRU_RCVD).  If a FIN has already been received on this
2539
   * connection then we just ignore the text.
2540
   */
2541
0
  tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
2542
0
       IS_FASTOPEN(tp->t_flags));
2543
0
  if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
2544
0
      TCPS_HAVERCVDFIN(tp->t_state) == 0) {
2545
0
    tcp_seq save_start = th->th_seq;
2546
    /*
2547
     * samkumar: I removed a call to m_adj(m, drop_hdrlen), which intends
2548
     * to drop data from the mbuf so it can be chained into the receive
2549
     * header. This is not necessary for TCPlp because we copy the data
2550
     * anyway; we just add the offset when copying data into the receive
2551
     * buffer.
2552
     */
2553
    /*
2554
     * Insert segment which includes th into TCP reassembly queue
2555
     * with control block tp.  Set thflags to whether reassembly now
2556
     * includes a segment with FIN.  This handles the common case
2557
     * inline (segment is the next to be received on an established
2558
     * connection, and the queue is empty), avoiding linkage into
2559
     * and removal from the queue and repetition of various
2560
     * conversions.
2561
     * Set DELACK for segments received in order, but ack
2562
     * immediately when segments are out of order (so
2563
     * fast retransmit can work).
2564
     */
2565
    /*
2566
     * samkumar: I replaced LIST_EMPTY(&tp->t_segq) with the calls to
2567
     * tpiscantrcv and bmp_isempty on the second line below.
2568
     */
2569
0
    if (th->th_seq == tp->rcv_nxt &&
2570
0
        (tpiscantrcv(tp) || bmp_isempty(tp->reassbmp, REASSBMP_SIZE(tp))) &&
2571
0
        (TCPS_HAVEESTABLISHED(tp->t_state) ||
2572
0
       tfo_syn)) {
2573
0
      if (DELAY_ACK(tp, tlen) || tfo_syn)
2574
0
        tp->t_flags |= TF_DELACK;
2575
0
      else
2576
0
        tp->t_flags |= TF_ACKNOW;
2577
0
      tp->rcv_nxt += tlen;
2578
0
      thflags = th->th_flags & TH_FIN;
2579
2580
      /*
2581
       * samkumar: I replaced the code that used to be here (which would
2582
       * free the mbuf with m_freem(m) if the SBS_CANTRCVMORE flag is set
2583
       * on so->so_rcv.sb_state, and otherwise call
2584
       * sbappendstream_locked(&so->so_rcv, m, 0);).
2585
       */
2586
0
      if (!tpiscantrcv(tp)) {
2587
0
        cbuf_write(&tp->recvbuf, msg, otMessageGetOffset(msg) + drop_hdrlen, tlen, cbuf_copy_from_message);
2588
0
        if (tlen > 0) {
2589
0
          sig->recvbuf_added = true;
2590
0
        }
2591
0
      } else if (tlen > 0) {
2592
        /*
2593
         * samkumar: We already know tlen != 0, so if we got here, then
2594
         * it means that we got data after we called SHUT_RD, or after
2595
         * receiving a FIN. I'm going to drop the connection in this
2596
         * case. I think FreeBSD might have just dropped the packet
2597
         * silently, but Linux handles it this way; this seems to be
2598
         * the right approach to me.
2599
         */
2600
0
        tcp_drop(tp, ECONNABORTED);
2601
0
        goto drop;
2602
0
      }
2603
      /* NB: sorwakeup_locked() does an implicit unlock. */
2604
      /*
2605
       * samkumar: There used to be a call to sorwakeup_locked(so); here,
2606
       * which wakes up any threads waiting for the socket to become
2607
       * become ready for reading. TCPlp handles its buffering
2608
       * differently so we do not need to replace this call with
2609
       * specialized code to handle this.
2610
       */
2611
0
    } else if (tpiscantrcv(tp)) {
2612
      /*
2613
       * samkumar: We will reach this point if we get out-of-order data
2614
       * on a socket which was shut down with SHUT_RD, or where we
2615
       * already received a FIN. My response here is to drop the segment
2616
       * and send an RST.
2617
       */
2618
0
      tcp_drop(tp, ECONNABORTED);
2619
0
      goto drop;
2620
0
    } else {
2621
      /*
2622
       * XXX: Due to the header drop above "th" is
2623
       * theoretically invalid by now.  Fortunately
2624
       * m_adj() doesn't actually frees any mbufs
2625
       * when trimming from the head.
2626
       */
2627
0
      thflags = tcp_reass(tp, th, &tlen, msg, otMessageGetOffset(msg) + drop_hdrlen, sig);
2628
0
      tp->t_flags |= TF_ACKNOW;
2629
0
    }
2630
    // Only place tlen is used after the call to tcp_reass is below
2631
0
    if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))
2632
0
      tcp_update_sack_list(tp, save_start, save_start + tlen);
2633
    /*
2634
     * samkumar: This is not me commenting things out; this was already
2635
     * commented out in the FreeBSD code.
2636
     */
2637
#if 0
2638
    /*
2639
     * Note the amount of data that peer has sent into
2640
     * our window, in order to estimate the sender's
2641
     * buffer size.
2642
     * XXX: Unused.
2643
     */
2644
    if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt))
2645
      len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
2646
    else
2647
      len = so->so_rcv.sb_hiwat;
2648
#endif
2649
0
  } else {
2650
0
    thflags &= ~TH_FIN;
2651
0
  }
2652
2653
  /*
2654
   * If FIN is received ACK the FIN and let the user know
2655
   * that the connection is closing.
2656
   */
2657
0
  if (thflags & TH_FIN) {
2658
0
    tcplp_sys_log("FIN Processing start");
2659
0
    if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
2660
      /* samkumar: replace socantrcvmore with tpcantrcvmore */
2661
0
      tpcantrcvmore(tp);
2662
      /*
2663
       * If connection is half-synchronized
2664
       * (ie NEEDSYN flag on) then delay ACK,
2665
       * so it may be piggybacked when SYN is sent.
2666
       * Otherwise, since we received a FIN then no
2667
       * more input can be expected, send ACK now.
2668
       */
2669
0
      if (tp->t_flags & TF_NEEDSYN)
2670
0
        tp->t_flags |= TF_DELACK;
2671
0
      else
2672
0
        tp->t_flags |= TF_ACKNOW;
2673
0
      tp->rcv_nxt++;
2674
0
    }
2675
    /*
2676
     * samkumar: This -2 state is added by me, so that we do not consider
2677
     * any more FINs in reassembly.
2678
     */
2679
0
    if (tp->reass_fin_index != -2) {
2680
0
      sig->rcvd_fin = true;
2681
0
      tp->reass_fin_index = -2;
2682
0
    }
2683
0
    switch (tp->t_state) {
2684
2685
    /*
2686
     * In SYN_RECEIVED and ESTABLISHED STATES
2687
     * enter the CLOSE_WAIT state.
2688
     */
2689
0
    case TCPS_SYN_RECEIVED:
2690
0
      tp->t_starttime = ticks;
2691
      /* FALLTHROUGH */
2692
0
    case TCPS_ESTABLISHED:
2693
0
      tcp_state_change(tp, TCPS_CLOSE_WAIT);
2694
0
      break;
2695
2696
    /*
2697
     * If still in FIN_WAIT_1 STATE FIN has not been acked so
2698
     * enter the CLOSING state.
2699
     */
2700
0
    case TCPS_FIN_WAIT_1:
2701
0
      tcp_state_change(tp, TCPS_CLOSING);
2702
0
      break;
2703
2704
    /*
2705
     * In FIN_WAIT_2 state enter the TIME_WAIT state,
2706
     * starting the time-wait timer, turning off the other
2707
     * standard timers.
2708
     */
2709
0
    case TCPS_FIN_WAIT_2:
2710
0
      tcp_twstart(tp);
2711
0
      return;
2712
0
    }
2713
0
  }
2714
2715
  /*
2716
   * samkumar: Remove code for synchronization and debugging, here and in
2717
   * the labels below. I also removed the line to free the mbuf if it hasn't
2718
   * been freed already (the line was "m_freem(m)").
2719
   */
2720
  /*
2721
   * Return any desired output.
2722
   */
2723
0
  if (needoutput || (tp->t_flags & TF_ACKNOW))
2724
0
    (void) tcplp_output(tp);
2725
2726
0
check_delack:
2727
0
  if (tp->t_flags & TF_DELACK) {
2728
0
    tp->t_flags &= ~TF_DELACK;
2729
0
    tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
2730
0
  }
2731
0
  return;
2732
2733
0
dropafterack:
2734
  /*
2735
   * Generate an ACK dropping incoming segment if it occupies
2736
   * sequence space, where the ACK reflects our state.
2737
   *
2738
   * We can now skip the test for the RST flag since all
2739
   * paths to this code happen after packets containing
2740
   * RST have been dropped.
2741
   *
2742
   * In the SYN-RECEIVED state, don't send an ACK unless the
2743
   * segment we received passes the SYN-RECEIVED ACK test.
2744
   * If it fails send a RST.  This breaks the loop in the
2745
   * "LAND" DoS attack, and also prevents an ACK storm
2746
   * between two listening ports that have been sent forged
2747
   * SYN segments, each with the source address of the other.
2748
   */
2749
0
  if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) &&
2750
0
      (SEQ_GT(tp->snd_una, th->th_ack) ||
2751
0
       SEQ_GT(th->th_ack, tp->snd_max)) ) {
2752
0
    rstreason = BANDLIM_RST_OPENPORT;
2753
0
    goto dropwithreset;
2754
0
  }
2755
2756
0
  tp->t_flags |= TF_ACKNOW;
2757
0
  (void) tcplp_output(tp);
2758
0
  return;
2759
2760
0
dropwithreset:
2761
0
  if (tp != NULL) {
2762
0
    tcp_dropwithreset(ip6, th, tp, instance, tlen, rstreason);
2763
0
  } else
2764
0
    tcp_dropwithreset(ip6, th, NULL, instance, tlen, rstreason);
2765
0
  return;
2766
2767
0
drop:
2768
0
  return;
2769
0
}
2770
2771
/*
2772
 * Parse TCP options and place in tcpopt.
2773
 */
2774
static void
2775
tcp_dooptions(struct tcpopt *to, uint8_t *cp, int cnt, int flags)
2776
0
{
2777
0
  int opt, optlen;
2778
2779
0
  to->to_flags = 0;
2780
0
  for (; cnt > 0; cnt -= optlen, cp += optlen) {
2781
0
    opt = cp[0];
2782
0
    if (opt == TCPOPT_EOL)
2783
0
      break;
2784
0
    if (opt == TCPOPT_NOP)
2785
0
      optlen = 1;
2786
0
    else {
2787
0
      if (cnt < 2)
2788
0
        break;
2789
0
      optlen = cp[1];
2790
0
      if (optlen < 2 || optlen > cnt)
2791
0
        break;
2792
0
    }
2793
0
    switch (opt) {
2794
0
    case TCPOPT_MAXSEG:
2795
0
      if (optlen != TCPOLEN_MAXSEG)
2796
0
        continue;
2797
0
      if (!(flags & TO_SYN))
2798
0
        continue;
2799
0
      to->to_flags |= TOF_MSS;
2800
0
      bcopy((char *)cp + 2,
2801
0
          (char *)&to->to_mss, sizeof(to->to_mss));
2802
0
      to->to_mss = ntohs(to->to_mss);
2803
0
      break;
2804
0
    case TCPOPT_WINDOW:
2805
0
      if (optlen != TCPOLEN_WINDOW)
2806
0
        continue;
2807
0
      if (!(flags & TO_SYN))
2808
0
        continue;
2809
0
      to->to_flags |= TOF_SCALE;
2810
0
      to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT);
2811
0
      break;
2812
0
    case TCPOPT_TIMESTAMP:
2813
0
      if (optlen != TCPOLEN_TIMESTAMP)
2814
0
        continue;
2815
0
      to->to_flags |= TOF_TS;
2816
0
      bcopy((char *)cp + 2,
2817
0
          (char *)&to->to_tsval, sizeof(to->to_tsval));
2818
0
      to->to_tsval = ntohl(to->to_tsval);
2819
0
      bcopy((char *)cp + 6,
2820
0
          (char *)&to->to_tsecr, sizeof(to->to_tsecr));
2821
0
      to->to_tsecr = ntohl(to->to_tsecr);
2822
0
      break;
2823
#ifdef TCP_SIGNATURE
2824
    /*
2825
     * XXX In order to reply to a host which has set the
2826
     * TCP_SIGNATURE option in its initial SYN, we have to
2827
     * record the fact that the option was observed here
2828
     * for the syncache code to perform the correct response.
2829
     */
2830
    case TCPOPT_SIGNATURE:
2831
      if (optlen != TCPOLEN_SIGNATURE)
2832
        continue;
2833
      to->to_flags |= TOF_SIGNATURE;
2834
      to->to_signature = cp + 2;
2835
      break;
2836
#endif
2837
0
    case TCPOPT_SACK_PERMITTED:
2838
0
      if (optlen != TCPOLEN_SACK_PERMITTED)
2839
0
        continue;
2840
0
      if (!(flags & TO_SYN))
2841
0
        continue;
2842
0
      if (!V_tcp_do_sack)
2843
0
        continue;
2844
0
      to->to_flags |= TOF_SACKPERM;
2845
0
      break;
2846
0
    case TCPOPT_SACK:
2847
0
      if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
2848
0
        continue;
2849
0
      if (flags & TO_SYN)
2850
0
        continue;
2851
0
      to->to_flags |= TOF_SACK;
2852
0
      to->to_nsacks = (optlen - 2) / TCPOLEN_SACK;
2853
0
      to->to_sacks = cp + 2;
2854
0
      break;
2855
0
    case TCPOPT_FAST_OPEN:
2856
      /*
2857
       * Cookie length validation is performed by the
2858
       * server side cookie checking code or the client
2859
       * side cookie cache update code.
2860
       */
2861
0
      if (!(flags & TO_SYN))
2862
0
        continue;
2863
0
      if (!V_tcp_fastopen_client_enable &&
2864
0
          !V_tcp_fastopen_server_enable)
2865
0
        continue;
2866
0
      to->to_flags |= TOF_FASTOPEN;
2867
0
      to->to_tfo_len = optlen - 2;
2868
0
      to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL;
2869
0
      break;
2870
0
    default:
2871
0
      continue;
2872
0
    }
2873
0
  }
2874
0
}
2875
2876
2877
/*
2878
 * Collect new round-trip time estimate
2879
 * and update averages and current timeout.
2880
 */
2881
static void
2882
tcp_xmit_timer(struct tcpcb *tp, int rtt)
2883
0
{
2884
0
  int delta;
2885
2886
0
  tp->t_rttupdated++;
2887
0
  if (tp->t_srtt != 0) {
2888
    /*
2889
     * srtt is stored as fixed point with 5 bits after the
2890
     * binary point (i.e., scaled by 8).  The following magic
2891
     * is equivalent to the smoothing algorithm in rfc793 with
2892
     * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
2893
     * point).  Adjust rtt to origin 0.
2894
     */
2895
0
    delta = ((rtt - 1) << TCP_DELTA_SHIFT)
2896
0
      - (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
2897
2898
0
    if ((tp->t_srtt += delta) <= 0)
2899
0
      tp->t_srtt = 1;
2900
2901
    /*
2902
     * We accumulate a smoothed rtt variance (actually, a
2903
     * smoothed mean difference), then set the retransmit
2904
     * timer to smoothed rtt + 4 times the smoothed variance.
2905
     * rttvar is stored as fixed point with 4 bits after the
2906
     * binary point (scaled by 16).  The following is
2907
     * equivalent to rfc793 smoothing with an alpha of .75
2908
     * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
2909
     * rfc793's wired-in beta.
2910
     */
2911
0
    if (delta < 0)
2912
0
      delta = -delta;
2913
0
    delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
2914
0
    if ((tp->t_rttvar += delta) <= 0)
2915
0
      tp->t_rttvar = 1;
2916
0
    if (tp->t_rttbest > tp->t_srtt + tp->t_rttvar)
2917
0
        tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
2918
0
  } else {
2919
    /*
2920
     * No rtt measurement yet - use the unsmoothed rtt.
2921
     * Set the variance to half the rtt (so our first
2922
     * retransmit happens at 3*rtt).
2923
     */
2924
0
    tp->t_srtt = rtt << TCP_RTT_SHIFT;
2925
0
    tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
2926
0
    tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
2927
0
  }
2928
0
  tp->t_rtttime = 0;
2929
0
  tp->t_rxtshift = 0;
2930
2931
  /*
2932
   * the retransmit should happen at rtt + 4 * rttvar.
2933
   * Because of the way we do the smoothing, srtt and rttvar
2934
   * will each average +1/2 tick of bias.  When we compute
2935
   * the retransmit timer, we want 1/2 tick of rounding and
2936
   * 1 extra tick because of +-1/2 tick uncertainty in the
2937
   * firing of the timer.  The bias will give us exactly the
2938
   * 1.5 tick we need.  But, because the bias is
2939
   * statistical, we have to test that we don't drop below
2940
   * the minimum feasible timer (which is 2 ticks).
2941
   */
2942
0
  TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
2943
0
          max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
2944
2945
#ifdef INSTRUMENT_TCP
2946
  tcplp_sys_log("TCP timer %u %d %d %d", (unsigned int) tcplp_sys_get_millis(), rtt, (int) tp->t_srtt, (int) tp->t_rttvar);
2947
#endif
2948
2949
2950
  /*
2951
   * We received an ack for a packet that wasn't retransmitted;
2952
   * it is probably safe to discard any error indications we've
2953
   * received recently.  This isn't quite right, but close enough
2954
   * for now (a route might have failed after we sent a segment,
2955
   * and the return path might not be symmetrical).
2956
   */
2957
0
  tp->t_softerror = 0;
2958
0
}
2959
2960
/*
2961
 * samkumar: Taken from netinet6/in6.c.
2962
 *
2963
 * This function is supposed to check whether the provided address is an
2964
 * IPv6 address of this host. This function, however, is used only as a hint,
2965
 * as the MSS is clamped at V_tcp_v6mssdflt for connections to non-local
2966
 * addresses. It is difficult for us to actually determine if the address
2967
 * belongs to us, so we are conservative and only return 1 (true) if it is
2968
 * obviously so---we keep the part of the function that checks for loopback or
2969
 * link local and remove the rest of the code that checks for the addresses
2970
 * assigned to interfaces. In cases where we return 0 but should have returned
2971
 * 1, we may conservatively clamp the MTU, but that should be OK for TCPlp.
2972
 * In fact, the constants are set such that we'll get the right answer whether
2973
 * we clamp or not, so this shouldn't really matter at all.
2974
 */
2975
int
2976
in6_localaddr(struct in6_addr *in6)
2977
0
{
2978
0
  if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6))
2979
0
    return 1;
2980
0
  return (0);
2981
0
}
2982
2983
/*
2984
 * Determine a reasonable value for maxseg size.
2985
 * If the route is known, check route for mtu.
2986
 * If none, use an mss that can be handled on the outgoing interface
2987
 * without forcing IP to fragment.  If no route is found, route has no mtu,
2988
 * or the destination isn't local, use a default, hopefully conservative
2989
 * size (usually 512 or the default IP max size, but no more than the mtu
2990
 * of the interface), as we can't discover anything about intervening
2991
 * gateways or networks.  We also initialize the congestion/slow start
2992
 * window to be a single segment if the destination isn't local.
2993
 * While looking at the routing entry, we also initialize other path-dependent
2994
 * parameters from pre-set or cached values in the routing entry.
2995
 *
2996
 * Also take into account the space needed for options that we
2997
 * send regularly.  Make maxseg shorter by that amount to assure
2998
 * that we can send maxseg amount of data even when the options
2999
 * are present.  Store the upper limit of the length of options plus
3000
 * data in maxopd.
3001
 *
3002
 * NOTE that this routine is only called when we process an incoming
3003
 * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS
3004
 * settings are handled in tcp_mssopt().
3005
 */
3006
/*
3007
 * samkumar: Using struct tcpcb instead of the inpcb.
3008
 */
3009
void
3010
tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
3011
    struct hc_metrics_lite *metricptr, struct tcp_ifcap *cap)
3012
0
{
3013
  /*
3014
   * samkumar: I removed all IPv4-specific logic and cases, including logic
3015
   * to check for IPv4 vs. IPv6, as well as all locking and debugging code.
3016
   */
3017
0
  int mss = 0;
3018
0
  uint64_t maxmtu = 0;
3019
0
  struct hc_metrics_lite metrics;
3020
0
  int origoffer;
3021
0
  size_t min_protoh = IP6HDR_SIZE + sizeof (struct tcphdr);
3022
3023
0
  if (mtuoffer != -1) {
3024
0
    KASSERT(offer == -1, ("%s: conflict", __func__));
3025
0
    offer = mtuoffer - min_protoh;
3026
0
  }
3027
0
  origoffer = offer;
3028
3029
0
  maxmtu = tcp_maxmtu6(tp, cap);
3030
0
  tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt;
3031
3032
  /*
3033
   * No route to sender, stay with default mss and return.
3034
   */
3035
0
  if (maxmtu == 0) {
3036
    /*
3037
     * In case we return early we need to initialize metrics
3038
     * to a defined state as tcp_hc_get() would do for us
3039
     * if there was no cache hit.
3040
     */
3041
0
    if (metricptr != NULL)
3042
0
      bzero(metricptr, sizeof(struct hc_metrics_lite));
3043
0
    return;
3044
0
  }
3045
3046
  /* What have we got? */
3047
0
  switch (offer) {
3048
0
    case 0:
3049
      /*
3050
       * Offer == 0 means that there was no MSS on the SYN
3051
       * segment, in this case we use tcp_mssdflt as
3052
       * already assigned to t_maxopd above.
3053
       */
3054
0
      offer = tp->t_maxopd;
3055
0
      break;
3056
3057
0
    case -1:
3058
      /*
3059
       * Offer == -1 means that we didn't receive SYN yet.
3060
       */
3061
      /* FALLTHROUGH */
3062
3063
0
    default:
3064
      /*
3065
       * Prevent DoS attack with too small MSS. Round up
3066
       * to at least minmss.
3067
       */
3068
0
      offer = max(offer, V_tcp_minmss);
3069
0
  }
3070
3071
  /*
3072
   * rmx information is now retrieved from tcp_hostcache.
3073
   */
3074
0
  tcp_hc_get(tp, &metrics);
3075
0
  if (metricptr != NULL)
3076
0
    bcopy(&metrics, metricptr, sizeof(struct hc_metrics_lite));
3077
3078
  /*
3079
   * If there's a discovered mtu in tcp hostcache, use it.
3080
   * Else, use the link mtu.
3081
   */
3082
0
  if (metrics.rmx_mtu)
3083
0
    mss = min(metrics.rmx_mtu, maxmtu) - min_protoh;
3084
0
  else {
3085
0
    mss = maxmtu - min_protoh;
3086
0
    if (!V_path_mtu_discovery &&
3087
0
        !in6_localaddr(&tp->faddr))
3088
0
      mss = min(mss, V_tcp_v6mssdflt);
3089
    /*
3090
     * XXX - The above conditional (mss = maxmtu - min_protoh)
3091
     * probably violates the TCP spec.
3092
     * The problem is that, since we don't know the
3093
     * other end's MSS, we are supposed to use a conservative
3094
     * default.  But, if we do that, then MTU discovery will
3095
     * never actually take place, because the conservative
3096
     * default is much less than the MTUs typically seen
3097
     * on the Internet today.  For the moment, we'll sweep
3098
     * this under the carpet.
3099
     *
3100
     * The conservative default might not actually be a problem
3101
     * if the only case this occurs is when sending an initial
3102
     * SYN with options and data to a host we've never talked
3103
     * to before.  Then, they will reply with an MSS value which
3104
     * will get recorded and the new parameters should get
3105
     * recomputed.  For Further Study.
3106
     */
3107
0
  }
3108
0
  mss = min(mss, offer);
3109
3110
  /*
3111
   * Sanity check: make sure that maxopd will be large
3112
   * enough to allow some data on segments even if the
3113
   * all the option space is used (40bytes).  Otherwise
3114
   * funny things may happen in tcplp_output.
3115
   */
3116
  /*
3117
   * samkumar: When I was experimenting with different MSS values, I had
3118
   * changed this to "mss = max(mss, TCP_MAXOLEN + 1);" but I am changing it
3119
   * back for the version that will be merged into OpenThread.
3120
   */
3121
0
  mss = max(mss, 64);
3122
3123
  /*
3124
   * maxopd stores the maximum length of data AND options
3125
   * in a segment; maxseg is the amount of data in a normal
3126
   * segment.  We need to store this value (maxopd) apart
3127
   * from maxseg, because now every segment carries options
3128
   * and thus we normally have somewhat less data in segments.
3129
   */
3130
0
  tp->t_maxopd = mss;
3131
3132
  /*
3133
   * origoffer==-1 indicates that no segments were received yet.
3134
   * In this case we just guess.
3135
   */
3136
0
  if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
3137
0
      (origoffer == -1 ||
3138
0
       (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
3139
0
    mss -= TCPOLEN_TSTAMP_APPA;
3140
3141
0
  tp->t_maxseg = mss;
3142
0
}
3143
3144
void
3145
tcp_mss(struct tcpcb *tp, int offer)
3146
0
{
3147
0
  struct hc_metrics_lite metrics;
3148
0
  struct tcp_ifcap cap;
3149
3150
0
  KASSERT(tp != NULL, ("%s: tp == NULL", __func__));
3151
3152
0
  bzero(&cap, sizeof(cap));
3153
0
  tcp_mss_update(tp, offer, -1, &metrics, &cap);
3154
3155
  /*
3156
   * samkumar: There used to be code below that might modify the MSS, but I
3157
   * removed all of it (see the comments below for the reason). It used to
3158
   * read tp->t_maxseg into the local variable mss, modify mss, and then
3159
   * reassign tp->t_maxseg to mss. I've kept the assignments, commented out,
3160
   * for clarity.
3161
   */
3162
  //mss = tp->t_maxseg;
3163
3164
  /*
3165
   * If there's a pipesize, change the socket buffer to that size,
3166
   * don't change if sb_hiwat is different than default (then it
3167
   * has been changed on purpose with setsockopt).
3168
   * Make the socket buffers an integral number of mss units;
3169
   * if the mss is larger than the socket buffer, decrease the mss.
3170
   */
3171
3172
  /*
3173
   * samkumar: There used to be code here would would limit the MSS to at
3174
   * most the size of the send buffer, and then round up the send buffer to
3175
   * a multiple of the MSS using
3176
   * "sbreserve_locked(&so->so_snd, bufsize, so, NULL);". With TCPlp, we do
3177
   * not do this, because the linked buffer used at the send buffer doesn't
3178
   * have a real limit. Had we used a circular buffer, then limiting the MSS
3179
   * to the buffer size would have made sense, but we still would not be able
3180
   * to resize the send buffer because it is not allocated by TCPlp.
3181
   */
3182
3183
  /*
3184
   * samkumar: See the comment above about me removing code that modifies
3185
   * the MSS, making this assignment and the one above both unnecessary.
3186
   */
3187
  //tp->t_maxseg = mss;
3188
3189
  /*
3190
   * samkumar: There used to be code here that would round up the receive
3191
   * buffer size to a multiple of the MSS, assuming that the receive buffer
3192
   * size is bigger than the MSS. The new buffer size is set using
3193
   * "sbreserve_locked(&so->so_rcv, bufsize, so, NULL);". In TCPlp, the
3194
   * buffer is not allocated by TCPlp so I removed the code for this.
3195
   */
3196
  /*
3197
   * samkumar: There used to be code here to handle TCP Segmentation
3198
   * Offloading (TSO); I removed it becuase we don't support that in TCPlp.
3199
   */
3200
0
}
3201
3202
/*
3203
 * Determine the MSS option to send on an outgoing SYN.
3204
 */
3205
/*
3206
 * samkumar: In the signature, changed "struct in_conninfo *inc" to
3207
 * "struct tcpcb* tp".
3208
 */
3209
int
3210
tcp_mssopt(struct tcpcb* tp)
3211
0
{
3212
  /*
3213
   * samkumar: I removed all processing code specific to IPv4, or to decide
3214
   * between IPv4 and IPv6. This is OK because TCPlp assumes IPv6.
3215
   */
3216
0
  int mss = 0;
3217
0
  uint64_t maxmtu = 0;
3218
0
  uint64_t thcmtu = 0;
3219
0
  size_t min_protoh;
3220
3221
0
  KASSERT(tp != NULL, ("tcp_mssopt with NULL tcpcb pointer"));
3222
3223
0
  mss = V_tcp_v6mssdflt;
3224
0
  maxmtu = tcp_maxmtu6(tp, NULL);
3225
0
  min_protoh = IP6HDR_SIZE + sizeof(struct tcphdr);
3226
3227
0
  thcmtu = tcp_hc_getmtu(tp); /* IPv4 and IPv6 */
3228
3229
0
  if (maxmtu && thcmtu)
3230
0
    mss = min(maxmtu, thcmtu) - min_protoh;
3231
0
  else if (maxmtu || thcmtu)
3232
0
    mss = max(maxmtu, thcmtu) - min_protoh;
3233
3234
0
  return (mss);
3235
0
}
3236
3237
/*
3238
 * On a partial ack arrives, force the retransmission of the
3239
 * next unacknowledged segment.  Do not clear tp->t_dupacks.
3240
 * By setting snd_nxt to ti_ack, this forces retransmission timer to
3241
 * be started again.
3242
 */
3243
static void
3244
tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
3245
0
{
3246
0
  tcp_seq onxt = tp->snd_nxt;
3247
0
  uint64_t  ocwnd = tp->snd_cwnd;
3248
3249
0
  tcp_timer_activate(tp, TT_REXMT, 0);
3250
0
  tp->t_rtttime = 0;
3251
0
  tp->snd_nxt = th->th_ack;
3252
  /*
3253
   * Set snd_cwnd to one segment beyond acknowledged offset.
3254
   * (tp->snd_una has not yet been updated when this function is called.)
3255
   */
3256
0
  tp->snd_cwnd = tp->t_maxseg + BYTES_THIS_ACK(tp, th);
3257
0
  tp->t_flags |= TF_ACKNOW;
3258
#ifdef INSTRUMENT_TCP
3259
  tcplp_sys_log("TCP Partial_ACK");
3260
#endif
3261
0
  (void) tcplp_output(tp);
3262
0
  tp->snd_cwnd = ocwnd;
3263
0
  if (SEQ_GT(onxt, tp->snd_nxt))
3264
0
    tp->snd_nxt = onxt;
3265
  /*
3266
   * Partial window deflation.  Relies on fact that tp->snd_una
3267
   * not updated yet.
3268
   */
3269
0
  if (tp->snd_cwnd > BYTES_THIS_ACK(tp, th))
3270
0
    tp->snd_cwnd -= BYTES_THIS_ACK(tp, th);
3271
0
  else
3272
0
    tp->snd_cwnd = 0;
3273
0
  tp->snd_cwnd += tp->t_maxseg;
3274
#ifdef INSTRUMENT_TCP
3275
  tcplp_sys_log("TCP Partial_ACK_final %d", (int) tp->snd_cwnd);
3276
#endif
3277
0
}