Coverage Report

Created: 2025-10-28 06:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openthread/third_party/tcplp/bsdtcp/tcp_output.c
Line
Count
Source
1
/*-
2
 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3
 *  The Regents of the University of California.  All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
7
 * are met:
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in the
12
 *    documentation and/or other materials provided with the distribution.
13
 * 4. Neither the name of the University nor the names of its contributors
14
 *    may be used to endorse or promote products derived from this software
15
 *    without specific prior written permission.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 *
29
 *  @(#)tcp_output.c  8.4 (Berkeley) 5/24/95
30
 */
31
32
#include <errno.h>
33
#include <string.h>
34
35
#include "../tcplp.h"
36
#include "tcp.h"
37
#include "tcp_fastopen.h"
38
#include "tcp_fsm.h"
39
#include "tcp_var.h"
40
#include "tcp_seq.h"
41
#include "tcp_timer.h"
42
#include "ip.h"
43
#include "../lib/cbuf.h"
44
45
#include "tcp_const.h"
46
47
#include <openthread/ip6.h>
48
#include <openthread/message.h>
49
#include <openthread/tcp.h>
50
51
static inline void
52
cc_after_idle(struct tcpcb *tp)
53
0
{
54
  /* samkumar: Removed synchronization. */
55
0
  if (CC_ALGO(tp)->after_idle != NULL)
56
0
    CC_ALGO(tp)->after_idle(tp->ccv);
57
0
}
58
59
0
long min(long a, long b) {
60
0
  if (a < b) {
61
0
    return a;
62
0
  } else {
63
0
    return b;
64
0
  }
65
0
}
66
67
0
unsigned long ulmin(unsigned long a, unsigned long b) {
68
0
  if (a < b) {
69
0
    return a;
70
0
  } else {
71
0
    return b;
72
0
  }
73
0
}
74
75
0
#define lmin(a, b) min(a, b)
76
77
void
78
tcp_setpersist(struct tcpcb *tp)
79
0
{
80
0
  int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
81
0
  int tt;
82
83
0
  tp->t_flags &= ~TF_PREVVALID;
84
0
  if (tcp_timer_active(tp, TT_REXMT))
85
0
    tcplp_sys_panic("PANIC: tcp_setpersist: retransmit pending");
86
  /*
87
   * Start/restart persistance timer.
88
   */
89
0
  TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
90
0
          TCPTV_PERSMIN, TCPTV_PERSMAX);
91
0
  tcp_timer_activate(tp, TT_PERSIST, tt);
92
0
  if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
93
0
    tp->t_rxtshift++;
94
0
}
95
96
/*
97
 * Tcp output routine: figure out what should be sent and send it.
98
 */
99
int
100
tcplp_output(struct tcpcb *tp)
101
0
{
102
  /*
103
   * samkumar: The biggest change in this function is in how outgoing
104
   * segments are built and sent out. That code has been updated to account
105
   * for TCPlp's buffering, and using otMessages rather than mbufs to
106
   * construct the outgoing segments.
107
   *
108
   * And, of course, all code corresponding to locks, stats, and debugging
109
   * has been removed, and all code specific to IPv4 or to decide between
110
   * IPv6 and IPv4 handling has been removed.
111
   */
112
113
0
  struct tcphdr* th = NULL;
114
0
  int idle;
115
0
  long len, recwin, sendwin;
116
0
  int off, flags, error = 0;  /* Keep compiler happy */
117
0
  int sendalot, mtu;
118
0
  int sack_rxmit, sack_bytes_rxmt;
119
0
  struct sackhole* p;
120
0
  unsigned ipoptlen, optlen, hdrlen;
121
0
  struct tcpopt to;
122
0
  unsigned int wanted_cookie = 0;
123
0
  unsigned int dont_sendalot = 0;
124
0
  uint8_t opt[TCP_MAXOLEN];
125
0
  uint32_t ticks = tcplp_sys_get_ticks();
126
127
  /* samkumar: Code for TCP offload has been removed. */
128
129
  /*
130
   * For TFO connections in SYN_SENT or SYN_RECEIVED,
131
   * only allow the initial SYN or SYN|ACK and those sent
132
   * by the retransmit timer.
133
   */
134
0
  if (IS_FASTOPEN(tp->t_flags) &&
135
0
      ((tp->t_state == TCPS_SYN_SENT) ||
136
0
       (tp->t_state == TCPS_SYN_RECEIVED)) &&
137
0
      SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN or SYN|ACK sent */
138
0
      (tp->snd_nxt != tp->snd_una))       /* not a retransmit */
139
0
    return (0);
140
141
  /*
142
   * Determine length of data that should be transmitted,
143
   * and flags that will be used.
144
   * If there is some data or critical controls (SYN, RST)
145
   * to send, then transmit; otherwise, investigate further.
146
   */
147
0
  idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una);
148
0
  if (idle && ticks - tp->t_rcvtime >= tp->t_rxtcur)
149
0
    cc_after_idle(tp);
150
151
0
  tp->t_flags &= ~TF_LASTIDLE;
152
0
  if (idle) {
153
0
    if (tp->t_flags & TF_MORETOCOME) {
154
0
      tp->t_flags |= TF_LASTIDLE;
155
0
      idle = 0;
156
0
    }
157
0
  }
158
  /* samkumar: This would be printed once per _window_ that is transmitted. */
159
#ifdef INSTRUMENT_TCP
160
  tcplp_sys_log("TCP output %u %d %d", (unsigned int) tcplp_sys_get_millis(), (int) tp->snd_wnd, (int) tp->snd_cwnd);
161
#endif
162
163
0
again:
164
  /*
165
   * If we've recently taken a timeout, snd_max will be greater than
166
   * snd_nxt.  There may be SACK information that allows us to avoid
167
   * resending already delivered data.  Adjust snd_nxt accordingly.
168
   */
169
0
  if ((tp->t_flags & TF_SACK_PERMIT) &&
170
0
      SEQ_LT(tp->snd_nxt, tp->snd_max))
171
0
    tcp_sack_adjust(tp);
172
0
  sendalot = 0;
173
  /* samkumar: Removed code for supporting TSO. */
174
0
  mtu = 0;
175
0
  off = tp->snd_nxt - tp->snd_una;
176
0
  sendwin = min(tp->snd_wnd, tp->snd_cwnd);
177
178
0
  flags = tcp_outflags[tp->t_state];
179
  /*
180
   * Send any SACK-generated retransmissions.  If we're explicitly trying
181
   * to send out new data (when sendalot is 1), bypass this function.
182
   * If we retransmit in fast recovery mode, decrement snd_cwnd, since
183
   * we're replacing a (future) new transmission with a retransmission
184
   * now, and we previously incremented snd_cwnd in tcplp_input().
185
   */
186
  /*
187
   * Still in sack recovery , reset rxmit flag to zero.
188
   */
189
0
  sack_rxmit = 0;
190
0
  sack_bytes_rxmt = 0;
191
0
  len = 0;
192
0
  p = NULL;
193
0
  if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags) &&
194
0
      (p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
195
0
    long cwin;
196
197
0
    cwin = min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt;
198
0
    if (cwin < 0)
199
0
      cwin = 0;
200
    /* Do not retransmit SACK segments beyond snd_recover */
201
0
    if (SEQ_GT(p->end, tp->snd_recover)) {
202
      /*
203
       * (At least) part of sack hole extends beyond
204
       * snd_recover. Check to see if we can rexmit data
205
       * for this hole.
206
       */
207
0
      if (SEQ_GEQ(p->rxmit, tp->snd_recover)) {
208
        /*
209
         * Can't rexmit any more data for this hole.
210
         * That data will be rexmitted in the next
211
         * sack recovery episode, when snd_recover
212
         * moves past p->rxmit.
213
         */
214
0
        p = NULL;
215
0
        goto after_sack_rexmit;
216
0
      } else
217
        /* Can rexmit part of the current hole */
218
0
        len = ((long)ulmin(cwin,
219
0
               tp->snd_recover - p->rxmit));
220
0
    } else
221
0
      len = ((long)ulmin(cwin, p->end - p->rxmit));
222
0
    off = p->rxmit - tp->snd_una;
223
0
    KASSERT(off >= 0,("%s: sack block to the left of una : %d",
224
0
        __func__, off));
225
0
    if (len > 0) {
226
0
      sack_rxmit = 1;
227
0
      sendalot = 1;
228
0
    }
229
0
  }
230
0
after_sack_rexmit:
231
  /*
232
   * Get standard flags, and add SYN or FIN if requested by 'hidden'
233
   * state flags.
234
   */
235
0
  if (tp->t_flags & TF_NEEDFIN)
236
0
    flags |= TH_FIN;
237
0
  if (tp->t_flags & TF_NEEDSYN)
238
0
    flags |= TH_SYN;
239
240
  /*
241
   * If in persist timeout with window of 0, send 1 byte.
242
   * Otherwise, if window is small but nonzero
243
   * and timer expired, we will send what we can
244
   * and go to transmit state.
245
   */
246
0
  if (tp->t_flags & TF_FORCEDATA) {
247
0
    if (sendwin == 0) {
248
      /*
249
       * If we still have some data to send, then
250
       * clear the FIN bit.  Usually this would
251
       * happen below when it realizes that we
252
       * aren't sending all the data.  However,
253
       * if we have exactly 1 byte of unsent data,
254
       * then it won't clear the FIN bit below,
255
       * and if we are in persist state, we wind
256
       * up sending the packet without recording
257
       * that we sent the FIN bit.
258
       *
259
       * We can't just blindly clear the FIN bit,
260
       * because if we don't have any more data
261
       * to send then the probe will be the FIN
262
       * itself.
263
       */
264
      /*
265
       * samkumar: Replaced call to sbused(&so->so_snd) with the call to
266
       * lbuf_used_space below.
267
       */
268
0
      if (off < lbuf_used_space(&tp->sendbuf))
269
0
        flags &= ~TH_FIN;
270
0
      sendwin = 1;
271
0
    } else {
272
0
      tcp_timer_activate(tp, TT_PERSIST, 0);
273
0
      tp->t_rxtshift = 0;
274
0
    }
275
0
  }
276
277
  /*
278
   * If snd_nxt == snd_max and we have transmitted a FIN, the
279
   * offset will be > 0 even if so_snd.sb_cc is 0, resulting in
280
   * a negative length.  This can also occur when TCP opens up
281
   * its congestion window while receiving additional duplicate
282
   * acks after fast-retransmit because TCP will reset snd_nxt
283
   * to snd_max after the fast-retransmit.
284
   *
285
   * In the normal retransmit-FIN-only case, however, snd_nxt will
286
   * be set to snd_una, the offset will be 0, and the length may
287
   * wind up 0.
288
   *
289
   * If sack_rxmit is true we are retransmitting from the scoreboard
290
   * in which case len is already set.
291
   */
292
0
  if (sack_rxmit == 0) {
293
0
    if (sack_bytes_rxmt == 0)
294
      /*
295
       * samkumar: Replaced sbavail(&so->so_snd) with this call to
296
       * lbuf_used_space.
297
       */
298
0
      len = ((long)ulmin(lbuf_used_space(&tp->sendbuf), sendwin) -
299
0
          off);
300
0
    else {
301
0
      long cwin;
302
303
      /*
304
       * We are inside of a SACK recovery episode and are
305
       * sending new data, having retransmitted all the
306
       * data possible in the scoreboard.
307
       */
308
      /*
309
       * samkumar: Replaced sbavail(&so->so_snd) with this call to
310
       * lbuf_used_space.
311
       */
312
0
      len = ((long)ulmin(lbuf_used_space(&tp->sendbuf), tp->snd_wnd) -
313
0
          off);
314
      /*
315
       * Don't remove this (len > 0) check !
316
       * We explicitly check for len > 0 here (although it
317
       * isn't really necessary), to work around a gcc
318
       * optimization issue - to force gcc to compute
319
       * len above. Without this check, the computation
320
       * of len is bungled by the optimizer.
321
       */
322
0
      if (len > 0) {
323
0
        cwin = tp->snd_cwnd -
324
0
          (tp->snd_nxt - tp->sack_newdata) -
325
0
          sack_bytes_rxmt;
326
0
        if (cwin < 0)
327
0
          cwin = 0;
328
0
        len = lmin(len, cwin);
329
0
      }
330
0
    }
331
0
  }
332
333
  /*
334
   * Lop off SYN bit if it has already been sent.  However, if this
335
   * is SYN-SENT state and if segment contains data and if we don't
336
   * know that foreign host supports TAO, suppress sending segment.
337
   */
338
0
  if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
339
0
    if (tp->t_state != TCPS_SYN_RECEIVED)
340
0
      flags &= ~TH_SYN;
341
    /*
342
     * When sending additional segments following a TFO SYN|ACK,
343
     * do not include the SYN bit.
344
     */
345
0
    if (IS_FASTOPEN(tp->t_flags) &&
346
0
        (tp->t_state == TCPS_SYN_RECEIVED))
347
0
      flags &= ~TH_SYN;
348
0
    off--, len++;
349
0
  }
350
351
  /*
352
   * Be careful not to send data and/or FIN on SYN segments.
353
   * This measure is needed to prevent interoperability problems
354
   * with not fully conformant TCP implementations.
355
   */
356
0
  if ((flags & TH_SYN) && (tp->t_flags & TF_NOOPT)) {
357
0
    len = 0;
358
0
    flags &= ~TH_FIN;
359
0
  }
360
361
  /*
362
   * On TFO sockets, ensure no data is sent in the following cases:
363
   *
364
   *  - When retransmitting SYN|ACK on a passively-created socket
365
   *
366
   *  - When retransmitting SYN on an actively created socket
367
   *
368
   *  - When sending a zero-length cookie (cookie request) on an
369
   *    actively created socket
370
   *
371
   *  - When the socket is in the CLOSED state (RST is being sent)
372
   */
373
  /*
374
   * samkumar: I commented out the check to ensure no data is sent
375
   * on a TFO cookie request. As far as I am aware, this is still
376
   * compliant with the RFC.
377
   */
378
0
  if (IS_FASTOPEN(tp->t_flags) &&
379
0
      (((flags & TH_SYN) && (tp->t_rxtshift > 0)) ||
380
       /*((tp->t_state == TCPS_SYN_SENT) &&
381
        (tp->t_tfo_client_cookie_len == 0)) ||*/
382
0
       (flags & TH_RST)))
383
0
    len = 0;
384
0
  if (len <= 0) {
385
    /*
386
     * If FIN has been sent but not acked,
387
     * but we haven't been called to retransmit,
388
     * len will be < 0.  Otherwise, window shrank
389
     * after we sent into it.  If window shrank to 0,
390
     * cancel pending retransmit, pull snd_nxt back
391
     * to (closed) window, and set the persist timer
392
     * if it isn't already going.  If the window didn't
393
     * close completely, just wait for an ACK.
394
     *
395
     * We also do a general check here to ensure that
396
     * we will set the persist timer when we have data
397
     * to send, but a 0-byte window. This makes sure
398
     * the persist timer is set even if the packet
399
     * hits one of the "goto send" lines below.
400
     */
401
0
    len = 0;
402
    /*
403
     * samkumar: Replaced sbavail(&so->so_snd) with this call to
404
     * lbuf_used_space.
405
     */
406
0
    if ((sendwin == 0) && (TCPS_HAVEESTABLISHED(tp->t_state)) &&
407
0
      (off < (int) lbuf_used_space(&tp->sendbuf))) {
408
0
      tcp_timer_activate(tp, TT_REXMT, 0);
409
0
      tp->t_rxtshift = 0;
410
0
      tp->snd_nxt = tp->snd_una;
411
0
      if (!tcp_timer_active(tp, TT_PERSIST)) {
412
0
        tcp_setpersist(tp);
413
0
      }
414
0
    }
415
0
  }
416
417
418
  /* len will be >= 0 after this point. */
419
0
  KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
420
421
  /*
422
   * Automatic sizing of send socket buffer.  Often the send buffer
423
   * size is not optimally adjusted to the actual network conditions
424
   * at hand (delay bandwidth product).  Setting the buffer size too
425
   * small limits throughput on links with high bandwidth and high
426
   * delay (eg. trans-continental/oceanic links).  Setting the
427
   * buffer size too big consumes too much real kernel memory,
428
   * especially with many connections on busy servers.
429
   *
430
   * The criteria to step up the send buffer one notch are:
431
   *  1. receive window of remote host is larger than send buffer
432
   *     (with a fudge factor of 5/4th);
433
   *  2. send buffer is filled to 7/8th with data (so we actually
434
   *     have data to make use of it);
435
   *  3. send buffer fill has not hit maximal automatic size;
436
   *  4. our send window (slow start and cogestion controlled) is
437
   *     larger than sent but unacknowledged data in send buffer.
438
   *
439
   * The remote host receive window scaling factor may limit the
440
   * growing of the send buffer before it reaches its allowed
441
   * maximum.
442
   *
443
   * It scales directly with slow start or congestion window
444
   * and does at most one step per received ACK.  This fast
445
   * scaling has the drawback of growing the send buffer beyond
446
   * what is strictly necessary to make full use of a given
447
   * delay*bandwith product.  However testing has shown this not
448
   * to be much of an problem.  At worst we are trading wasting
449
   * of available bandwith (the non-use of it) for wasting some
450
   * socket buffer memory.
451
   *
452
   * TODO: Shrink send buffer during idle periods together
453
   * with congestion window.  Requires another timer.  Has to
454
   * wait for upcoming tcp timer rewrite.
455
   *
456
   * XXXGL: should there be used sbused() or sbavail()?
457
   */
458
   /*
459
   * samkumar: There used to be code here to dynamically size the
460
   * send buffer (by calling sbreserve_locked). In TCPlp, we don't support
461
   * this, as the send buffer doesn't have a well-defined size (and even if
462
   * we were to use a circular buffer, it would be a fixed-size buffer
463
   * allocated by the application). Therefore, I removed the code that does
464
   * this.
465
   */
466
467
   /*
468
   * samkumar: There used to be code here to handle TCP Segmentation
469
   * Offloading (TSO); I removed it becuase we don't support that in TCPlp.
470
   */
471
472
0
  if (sack_rxmit) {
473
    /*
474
     * samkumar: Replaced sbused(&so->so_snd) with this call to
475
     * lbuf_used_space.
476
     */
477
0
    if (SEQ_LT(p->rxmit + len, tp->snd_una + lbuf_used_space(&tp->sendbuf)))
478
0
      flags &= ~TH_FIN;
479
0
  } else {
480
0
    if (SEQ_LT(tp->snd_nxt + len, tp->snd_una +
481
      /*
482
       * samkumar: Replaced sbused(&so->so_snd) with this call to
483
       * lbuf_used_space.
484
       */
485
0
      lbuf_used_space(&tp->sendbuf)))
486
0
      flags &= ~TH_FIN;
487
0
  }
488
489
  /*
490
   * samkumar: Replaced sbspace(&so->so_rcv) with this call to
491
   * cbuf_free_space.
492
   */
493
0
  recwin = cbuf_free_space(&tp->recvbuf);
494
495
  /*
496
   * Sender silly window avoidance.   We transmit under the following
497
   * conditions when len is non-zero:
498
   *
499
   *  - We have a full segment (or more with TSO)
500
   *  - This is the last buffer in a write()/send() and we are
501
   *    either idle or running NODELAY
502
   *  - we've timed out (e.g. persist timer)
503
   *  - we have more then 1/2 the maximum send window's worth of
504
   *    data (receiver may be limited the window size)
505
   *  - we need to retransmit
506
   */
507
0
  if (len) {
508
0
    if (len >= tp->t_maxseg)
509
0
      goto send;
510
    /*
511
     * NOTE! on localhost connections an 'ack' from the remote
512
     * end may occur synchronously with the output and cause
513
     * us to flush a buffer queued with moretocome.  XXX
514
     *
515
     * note: the len + off check is almost certainly unnecessary.
516
     */
517
    /*
518
     * samkumar: Replaced sbavail(&so->so_snd) with this call to
519
     * lbuf_used_space.
520
     */
521
0
    if (!(tp->t_flags & TF_MORETOCOME) &&  /* normal case */
522
0
        (idle || (tp->t_flags & TF_NODELAY)) &&
523
0
        len + off >= lbuf_used_space(&tp->sendbuf) &&
524
0
        (tp->t_flags & TF_NOPUSH) == 0) {
525
0
      goto send;
526
0
    }
527
0
    if (tp->t_flags & TF_FORCEDATA)   /* typ. timeout case */
528
0
      goto send;
529
0
    if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
530
0
      goto send;
531
0
    if (SEQ_LT(tp->snd_nxt, tp->snd_max)) /* retransmit case */
532
0
      goto send;
533
0
    if (sack_rxmit)
534
0
      goto send;
535
0
  }
536
537
  /*
538
   * Sending of standalone window updates.
539
   *
540
   * Window updates are important when we close our window due to a
541
   * full socket buffer and are opening it again after the application
542
   * reads data from it.  Once the window has opened again and the
543
   * remote end starts to send again the ACK clock takes over and
544
   * provides the most current window information.
545
   *
546
   * We must avoid the silly window syndrome whereas every read
547
   * from the receive buffer, no matter how small, causes a window
548
   * update to be sent.  We also should avoid sending a flurry of
549
   * window updates when the socket buffer had queued a lot of data
550
   * and the application is doing small reads.
551
   *
552
   * Prevent a flurry of pointless window updates by only sending
553
   * an update when we can increase the advertized window by more
554
   * than 1/4th of the socket buffer capacity.  When the buffer is
555
   * getting full or is very small be more aggressive and send an
556
   * update whenever we can increase by two mss sized segments.
557
   * In all other situations the ACK's to new incoming data will
558
   * carry further window increases.
559
   *
560
   * Don't send an independent window update if a delayed
561
   * ACK is pending (it will get piggy-backed on it) or the
562
   * remote side already has done a half-close and won't send
563
   * more data.  Skip this if the connection is in T/TCP
564
   * half-open state.
565
   */
566
0
  if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) &&
567
0
      !(tp->t_flags & TF_DELACK) &&
568
0
      !TCPS_HAVERCVDFIN(tp->t_state)) {
569
    /*
570
     * "adv" is the amount we could increase the window,
571
     * taking into account that we are limited by
572
     * TCP_MAXWIN << tp->rcv_scale.
573
     */
574
0
    long adv;
575
0
    int oldwin;
576
577
0
    adv = min(recwin, (long)TCP_MAXWIN << tp->rcv_scale);
578
0
    if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) {
579
0
      oldwin = (tp->rcv_adv - tp->rcv_nxt);
580
0
      adv -= oldwin;
581
0
    } else
582
0
      oldwin = 0;
583
584
    /*
585
     * If the new window size ends up being the same as the old
586
     * size when it is scaled, then don't force a window update.
587
     */
588
0
    if (oldwin >> tp->rcv_scale == (adv + oldwin) >> tp->rcv_scale)
589
0
      goto dontupdate;
590
591
    /*
592
     * samkumar: Here, FreeBSD has some heuristics to decide whether or
593
     * not to send a window update. The code for the original heuristics
594
     * is commented out, using #if 0. These heuristics compare "adv,"
595
     * the size of the window update, with the size of the local receive
596
     * buffer. The FreeBSD heuristics aren't applicable because they are
597
     * orders of magnitude off from what we see in TCPlp. For example,
598
     * FreeBSD only sends a window update if it is at least two segments
599
     * big. Note that, in the experiments I did, the second case did not
600
     * filter window updates further because, in the experiments, the
601
     * receive buffer was smaller than 8 segments.
602
     *
603
     * I replaced these heuristics with a simpler version, which you can
604
     * see below. For the experiments I did, the first condition
605
     * (checking if adv >= (long)(2 * tp->t_maxseg)) wasn't included; this
606
     * did not matter because the receive buffer was smaller than 8
607
     * segments, so any condition that would have triggered the first
608
     * condition would have triggered the second one anyway. I've included
609
     * the first condition in this version in an effort to be more robust,
610
     * in case someone does try to run TCPlp with a large receive buffer.
611
     *
612
     * It may be worth studying this more and revisiting the heuristic to
613
     * use here. In case we try to resurrect the old FreeBSD heuristics,
614
     * note that so->so_rcv.sb_hiwat in FreeBSD corresponds roughly to
615
     * cbuf_size(&tp->recvbuf) in TCPlp.
616
     */
617
#if 0
618
    if (adv >= (long)(2 * tp->t_maxseg) &&
619
        (adv >= (long)(so->so_rcv.sb_hiwat / 4) ||
620
         recwin <= (long)(so->so_rcv.sb_hiwat / 8) ||
621
         so->so_rcv.sb_hiwat <= 8 * tp->t_maxseg))
622
      goto send;
623
#endif
624
0
    if (adv >= (long)(2 * tp->t_maxseg) ||
625
0
        adv >= (long)cbuf_size(&tp->recvbuf) / 4)
626
0
      goto send;
627
0
  }
628
0
dontupdate:
629
630
  /*
631
   * Send if we owe the peer an ACK, RST, SYN, or urgent data.  ACKNOW
632
   * is also a catch-all for the retransmit timer timeout case.
633
   */
634
0
  if (tp->t_flags & TF_ACKNOW) {
635
0
    goto send;
636
0
  }
637
0
  if ((flags & TH_RST) ||
638
0
      ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0))
639
0
    goto send;
640
0
  if (SEQ_GT(tp->snd_up, tp->snd_una))
641
0
    goto send;
642
  /*
643
   * If our state indicates that FIN should be sent
644
   * and we have not yet done so, then we need to send.
645
   */
646
0
  if (flags & TH_FIN &&
647
0
      ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
648
0
    goto send;
649
  /*
650
   * In SACK, it is possible for tcplp_output to fail to send a segment
651
   * after the retransmission timer has been turned off.  Make sure
652
   * that the retransmission timer is set.
653
   */
654
0
  if ((tp->t_flags & TF_SACK_PERMIT) &&
655
0
      SEQ_GT(tp->snd_max, tp->snd_una) &&
656
0
      !tcp_timer_active(tp, TT_REXMT) &&
657
0
      !tcp_timer_active(tp, TT_PERSIST)) {
658
0
    tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
659
0
    goto just_return;
660
0
  }
661
662
  /*
663
   * TCP window updates are not reliable, rather a polling protocol
664
   * using ``persist'' packets is used to insure receipt of window
665
   * updates.  The three ``states'' for the output side are:
666
   *  idle      not doing retransmits or persists
667
   *  persisting    to move a small or zero window
668
   *  (re)transmitting  and thereby not persisting
669
   *
670
   * tcp_timer_active(tp, TT_PERSIST)
671
   *  is true when we are in persist state.
672
   * (tp->t_flags & TF_FORCEDATA)
673
   *  is set when we are called to send a persist packet.
674
   * tcp_timer_active(tp, TT_REXMT)
675
   *  is set when we are retransmitting
676
   * The output side is idle when both timers are zero.
677
   *
678
   * If send window is too small, there is data to transmit, and no
679
   * retransmit or persist is pending, then go to persist state.
680
   * If nothing happens soon, send when timer expires:
681
   * if window is nonzero, transmit what we can,
682
   * otherwise force out a byte.
683
   */
684
  /*
685
   * samkumar: Replaced sbavail(&so->so_snd) with this call to
686
   * lbuf_used_space.
687
   */
688
0
  if (lbuf_used_space(&tp->sendbuf) && !tcp_timer_active(tp, TT_REXMT) &&
689
0
      !tcp_timer_active(tp, TT_PERSIST)) {
690
0
    tp->t_rxtshift = 0;
691
0
    tcp_setpersist(tp);
692
0
  }
693
694
  /*
695
   * No reason to send a segment, just return.
696
   */
697
0
just_return:
698
0
  return (0);
699
700
0
send:
701
0
  if (len > 0) {
702
0
    if (len >= tp->t_maxseg)
703
0
      tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT;
704
0
    else
705
0
      tp->t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT;
706
0
  }
707
  /*
708
   * Before ESTABLISHED, force sending of initial options
709
   * unless TCP set not to do any options.
710
   * NOTE: we assume that the IP/TCP header plus TCP options
711
   * always fit in a single mbuf, leaving room for a maximum
712
   * link header, i.e.
713
   *  max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MCLBYTES
714
   */
715
0
  optlen = 0;
716
0
  hdrlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
717
718
  /*
719
   * Compute options for segment.
720
   * We only have to care about SYN and established connection
721
   * segments.  Options for SYN-ACK segments are handled in TCP
722
   * syncache.
723
   */
724
  /*
725
   * samkumar: I've done away with the syncache. However, it
726
   * seems that the existing logic works fine for SYN-ACK as
727
   * well.
728
   */
729
0
  to.to_flags = 0;
730
0
  if ((tp->t_flags & TF_NOOPT) == 0) {
731
    /* Maximum segment size. */
732
0
    if (flags & TH_SYN) {
733
0
      tp->snd_nxt = tp->iss;
734
0
      to.to_mss = tcp_mssopt(tp);
735
0
      to.to_flags |= TOF_MSS;
736
737
      /*
738
       * On SYN or SYN|ACK transmits on TFO connections,
739
       * only include the TFO option if it is not a
740
       * retransmit, as the presence of the TFO option may
741
       * have caused the original SYN or SYN|ACK to have
742
       * been dropped by a middlebox.
743
       */
744
0
      if (IS_FASTOPEN(tp->t_flags) &&
745
0
          (tp->t_rxtshift == 0)) {
746
0
        if (tp->t_state == TCPS_SYN_RECEIVED) {
747
0
          to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
748
0
          to.to_tfo_cookie =
749
0
              (u_int8_t *)&tp->t_tfo_cookie.server;
750
0
          to.to_flags |= TOF_FASTOPEN;
751
0
          wanted_cookie = 1;
752
0
        } else if (tp->t_state == TCPS_SYN_SENT) {
753
0
          to.to_tfo_len =
754
0
              tp->t_tfo_client_cookie_len;
755
0
          to.to_tfo_cookie =
756
0
              tp->t_tfo_cookie.client;
757
0
          to.to_flags |= TOF_FASTOPEN;
758
0
          wanted_cookie = 1;
759
          /*
760
           * If we wind up having more data to
761
           * send with the SYN than can fit in
762
           * one segment, don't send any more
763
           * until the SYN|ACK comes back from
764
           * the other end.
765
           */
766
0
          dont_sendalot = 1;
767
0
        }
768
0
      }
769
0
    }
770
    /* Window scaling. */
771
0
    if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
772
0
      to.to_wscale = tp->request_r_scale;
773
0
      to.to_flags |= TOF_SCALE;
774
0
    }
775
    /* Timestamps. */
776
0
    if ((tp->t_flags & TF_RCVD_TSTMP) ||
777
0
        ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
778
0
      to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
779
0
      to.to_tsecr = tp->ts_recent;
780
0
      to.to_flags |= TOF_TS;
781
      /*
782
       * samkumar: I removed the code to set the timestamp tp->rfbuf_ts
783
       * for receive buffer autosizing, since we don't do autosizing on
784
       * the receive buffer in TCPlp.
785
       */
786
0
    }
787
788
    /* Selective ACK's. */
789
0
    if (tp->t_flags & TF_SACK_PERMIT) {
790
0
      if (flags & TH_SYN)
791
0
        to.to_flags |= TOF_SACKPERM;
792
0
      else if (TCPS_HAVEESTABLISHED(tp->t_state) &&
793
0
          (tp->t_flags & TF_SACK_PERMIT) &&
794
0
          tp->rcv_numsacks > 0) {
795
0
        to.to_flags |= TOF_SACK;
796
0
        to.to_nsacks = tp->rcv_numsacks;
797
0
        to.to_sacks = (uint8_t *)tp->sackblks;
798
0
      }
799
0
    }
800
801
    /*
802
     * samkumar: Remove logic to set TOF_SIGNATURE flag in to.to_flags,
803
     * since TCPlp does not support TCP signatures.
804
     */
805
806
    /* Processing the options. */
807
0
    hdrlen += optlen = tcp_addoptions(&to, opt);
808
    /*
809
     * If we wanted a TFO option to be added, but it was unable
810
     * to fit, ensure no data is sent.
811
     */
812
0
    if (IS_FASTOPEN(tp->t_flags) && wanted_cookie &&
813
0
        !(to.to_flags & TOF_FASTOPEN))
814
0
      len = 0;
815
0
  }
816
  /*
817
   * samkumar: This used to be set to ip6_optlen(tp->t_inpcb), instead of 0,
818
   * along with some additional code to handle IPSEC. In TCPlp we don't set
819
   * IPv6 options here; we expect those to be set by the host network stack.
820
   * Of course, code that supports IPv4 has been removed as well.
821
   */
822
0
  ipoptlen = 0;
823
824
  /*
825
   * Adjust data length if insertion of options will
826
   * bump the packet length beyond the t_maxopd length.
827
   * Clear the FIN bit because we cut off the tail of
828
   * the segment.
829
   */
830
0
  if (len + optlen + ipoptlen > tp->t_maxopd) {
831
0
    flags &= ~TH_FIN;
832
    /*
833
     * samkumar: Remove code for TCP segmentation offloading.
834
     */
835
0
    len = tp->t_maxopd - optlen - ipoptlen;
836
0
    sendalot = 1;
837
0
    if (dont_sendalot)
838
0
        sendalot = 0;
839
0
  }
840
  /*
841
   * samkumar: The else case of the above "if" statement would set tso to 0.
842
   * Removing this since we no longer need a tso variable.
843
   */
844
0
  KASSERT(len + hdrlen + ipoptlen <= IP_MAXPACKET,
845
0
      ("%s: len > IP_MAXPACKET", __func__));
846
847
  /*
848
   * This KASSERT is here to catch edge cases at a well defined place.
849
   * Before, those had triggered (random) panic conditions further down.
850
   */
851
0
  KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
852
853
  /*
854
   * Grab a header mbuf, attaching a copy of data to
855
   * be transmitted, and initialize the header from
856
   * the template for sends on this connection.
857
   */
858
859
  /*
860
   * samkumar: The code to allocate, build, and send outgoing segments has
861
   * been rewritten. I've left the original code to build the output mbuf
862
   * here in a comment, for reference. The new code is below.
863
   */
864
#if 0
865
  if (len) {
866
    struct mbuf *mb;
867
    uint32_t moff;
868
869
    if ((tp->t_flags & TF_FORCEDATA) && len == 1)
870
      TCPSTAT_INC(tcps_sndprobe);
871
    else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
872
      tp->t_sndrexmitpack++;
873
      TCPSTAT_INC(tcps_sndrexmitpack);
874
      TCPSTAT_ADD(tcps_sndrexmitbyte, len);
875
    } else {
876
      TCPSTAT_INC(tcps_sndpack);
877
      TCPSTAT_ADD(tcps_sndbyte, len);
878
    }
879
#ifdef INET6
880
    if (MHLEN < hdrlen + max_linkhdr)
881
      m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
882
    else
883
#endif
884
      m = m_gethdr(M_NOWAIT, MT_DATA);
885
886
    if (m == NULL) {
887
      SOCKBUF_UNLOCK(&so->so_snd);
888
      error = ENOBUFS;
889
      sack_rxmit = 0;
890
      goto out;
891
    }
892
893
    m->m_data += max_linkhdr;
894
    m->m_len = hdrlen;
895
896
    /*
897
     * Start the m_copy functions from the closest mbuf
898
     * to the offset in the socket buffer chain.
899
     */
900
    mb = sbsndptr(&so->so_snd, off, len, &moff);
901
902
    if (len <= MHLEN - hdrlen - max_linkhdr) {
903
      m_copydata(mb, moff, (int)len,
904
          mtod(m, caddr_t) + hdrlen);
905
      m->m_len += len;
906
    } else {
907
      m->m_next = m_copy(mb, moff, (int)len);
908
      if (m->m_next == NULL) {
909
        SOCKBUF_UNLOCK(&so->so_snd);
910
        (void) m_free(m);
911
        error = ENOBUFS;
912
        sack_rxmit = 0;
913
        goto out;
914
      }
915
    }
916
917
    /*
918
     * If we're sending everything we've got, set PUSH.
919
     * (This will keep happy those implementations which only
920
     * give data to the user when a buffer fills or
921
     * a PUSH comes in.)
922
     */
923
    if (off + len == sbused(&so->so_snd))
924
      flags |= TH_PUSH;
925
    SOCKBUF_UNLOCK(&so->so_snd);
926
  } else {
927
    SOCKBUF_UNLOCK(&so->so_snd);
928
    if (tp->t_flags & TF_ACKNOW)
929
      TCPSTAT_INC(tcps_sndacks);
930
    else if (flags & (TH_SYN|TH_FIN|TH_RST))
931
      TCPSTAT_INC(tcps_sndctrl);
932
    else if (SEQ_GT(tp->snd_up, tp->snd_una))
933
      TCPSTAT_INC(tcps_sndurg);
934
    else
935
      TCPSTAT_INC(tcps_sndwinup);
936
937
    m = m_gethdr(M_NOWAIT, MT_DATA);
938
    if (m == NULL) {
939
      error = ENOBUFS;
940
      sack_rxmit = 0;
941
      goto out;
942
    }
943
#ifdef INET6
944
    if (isipv6 && (MHLEN < hdrlen + max_linkhdr) &&
945
        MHLEN >= hdrlen) {
946
      M_ALIGN(m, hdrlen);
947
    } else
948
#endif
949
    m->m_data += max_linkhdr;
950
    m->m_len = hdrlen;
951
  }
952
#endif
953
954
0
  KASSERT(ipoptlen == 0, ("No IP options supported")); // samkumar
955
956
0
  otMessage* message = tcplp_sys_new_message(tp->instance);
957
0
  if (message == NULL) {
958
0
    error = ENOBUFS;
959
0
    sack_rxmit = 0;
960
0
    goto out;
961
0
  }
962
0
  if (otMessageSetLength(message, sizeof(struct tcphdr) + optlen + len) != OT_ERROR_NONE) {
963
0
    tcplp_sys_free_message(tp->instance, message);
964
0
    error = ENOBUFS;
965
0
    sack_rxmit = 0;
966
0
    goto out;
967
0
  }
968
0
  if (len) {
969
0
      uint32_t used_space = lbuf_used_space(&tp->sendbuf);
970
971
    /*
972
     * The TinyOS version has a way to avoid the copying we have to do here.
973
     * Because it is possible to send iovecs directly in the BLIP stack, and
974
     * an lbuf is made of iovecs, we could just "save" the starting and ending
975
     * iovecs, modify them to get exactly the slice we want, call "send" on
976
     * the resulting chain, and then restore the starting and ending iovecs
977
     * once "send" returns.
978
     *
979
     * In RIOT, pktsnips have additional behavior regarding memory management
980
     * that precludes this optimization. But, now that we have moved to
981
     * cbufs, this is not relevant anymore.
982
     */
983
0
    {
984
0
      otLinkedBuffer* start;
985
0
      size_t start_offset;
986
0
      otLinkedBuffer* end;
987
0
      size_t end_offset;
988
0
      otLinkedBuffer* curr;
989
0
      int rv = lbuf_getrange(&tp->sendbuf, off, len, &start, &start_offset, &end, &end_offset);
990
0
      size_t message_offset = otMessageGetOffset(message) + sizeof(struct tcphdr) + optlen;
991
0
      KASSERT(rv == 0, ("Reading send buffer out of range!"));
992
0
      for (curr = start; curr != end->mNext; curr = curr->mNext) {
993
0
        const uint8_t* data_to_copy = curr->mData;
994
0
        size_t length_to_copy = curr->mLength;
995
0
        if (curr == start) {
996
0
          data_to_copy += start_offset;
997
0
          length_to_copy -= start_offset;
998
0
        }
999
0
        if (curr == end) {
1000
0
          length_to_copy -= end_offset;
1001
0
        }
1002
0
        otMessageWrite(message, message_offset, data_to_copy, length_to_copy);
1003
0
        message_offset += length_to_copy;
1004
0
      }
1005
0
    }
1006
1007
    /*
1008
     * If we're sending everything we've got, set PUSH.
1009
     * (This will keep happy those implementations which only
1010
     * give data to the user when a buffer fills or
1011
     * a PUSH comes in.)
1012
     */
1013
    /* samkumar: Replaced call to sbused(&so->so_snd) with used_space. */
1014
0
    if (off + len == used_space)
1015
0
      flags |= TH_PUSH;
1016
0
  }
1017
1018
0
  char outbuf[sizeof(struct tcphdr) + TCP_MAXOLEN];
1019
0
  th = (struct tcphdr*) (&outbuf[0]);
1020
1021
  /*
1022
   * samkumar: I replaced the original call to tcpip_fillheaders with the
1023
   * one below.
1024
   */
1025
0
  otMessageInfo ip6info;
1026
0
  tcpip_fillheaders(tp, &ip6info, th);
1027
1028
  /*
1029
   * Fill in fields, remembering maximum advertised
1030
   * window for use in delaying messages about window sizes.
1031
   * If resending a FIN, be sure not to use a new sequence number.
1032
   */
1033
0
  if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
1034
0
      tp->snd_nxt == tp->snd_max)
1035
0
    tp->snd_nxt--;
1036
  /*
1037
   * If we are starting a connection, send ECN setup
1038
   * SYN packet. If we are on a retransmit, we may
1039
   * resend those bits a number of times as per
1040
   * RFC 3168.
1041
   */
1042
0
  if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
1043
0
    if (tp->t_rxtshift >= 1) {
1044
0
      if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
1045
0
        flags |= TH_ECE|TH_CWR;
1046
0
    } else
1047
0
      flags |= TH_ECE|TH_CWR;
1048
0
  }
1049
1050
  /*
1051
   * samkumar: Make tcplp_output reply with ECE flag in the SYN-ACK for
1052
   * ECN-enabled connections. The existing code in FreeBSD didn't have to do
1053
   * this, because it didn't use tcplp_output to send the SYN-ACK; it
1054
   * constructed the SYN-ACK segment manually. Yet another consequnce of
1055
   * removing the SYN cache...
1056
   */
1057
0
  if (tp->t_state == TCPS_SYN_RECEIVED && tp->t_flags & TF_ECN_PERMIT &&
1058
0
    V_tcp_do_ecn) {
1059
0
    flags |= TH_ECE;
1060
0
  }
1061
1062
0
  if (tp->t_state == TCPS_ESTABLISHED &&
1063
0
      (tp->t_flags & TF_ECN_PERMIT)) {
1064
    /*
1065
     * If the peer has ECN, mark data packets with
1066
     * ECN capable transmission (ECT).
1067
     * Ignore pure ack packets, retransmissions and window probes.
1068
     */
1069
0
    if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
1070
0
        !((tp->t_flags & TF_FORCEDATA) && len == 1)) {
1071
      /*
1072
       * samkumar: Replaced ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
1073
       * with the following code, which will cause OpenThread to set the
1074
       * ECT0 bit in the header.
1075
       */
1076
0
      ip6info.mEcn = OT_ECN_CAPABLE_0;
1077
0
    }
1078
1079
    /*
1080
     * Reply with proper ECN notifications.
1081
     */
1082
0
    if (tp->t_flags & TF_ECN_SND_CWR) {
1083
0
      flags |= TH_CWR;
1084
0
      tp->t_flags &= ~TF_ECN_SND_CWR;
1085
0
    }
1086
0
    if (tp->t_flags & TF_ECN_SND_ECE)
1087
0
      flags |= TH_ECE;
1088
0
  }
1089
1090
  /*
1091
   * If we are doing retransmissions, then snd_nxt will
1092
   * not reflect the first unsent octet.  For ACK only
1093
   * packets, we do not want the sequence number of the
1094
   * retransmitted packet, we want the sequence number
1095
   * of the next unsent octet.  So, if there is no data
1096
   * (and no SYN or FIN), use snd_max instead of snd_nxt
1097
   * when filling in ti_seq.  But if we are in persist
1098
   * state, snd_max might reflect one byte beyond the
1099
   * right edge of the window, so use snd_nxt in that
1100
   * case, since we know we aren't doing a retransmission.
1101
   * (retransmit and persist are mutually exclusive...)
1102
   */
1103
0
  if (sack_rxmit == 0) {
1104
0
    if (len || (flags & (TH_SYN|TH_FIN)) ||
1105
0
        tcp_timer_active(tp, TT_PERSIST))
1106
0
      th->th_seq = htonl(tp->snd_nxt);
1107
0
    else
1108
0
      th->th_seq = htonl(tp->snd_max);
1109
0
  } else {
1110
0
    th->th_seq = htonl(p->rxmit);
1111
0
    p->rxmit += len;
1112
0
    tp->sackhint.sack_bytes_rexmit += len;
1113
0
  }
1114
1115
  /*
1116
   * samkumar: Check if this is a retransmission (added as part of TCPlp).
1117
   * This kind of stats collection is useful but not necessary for TCP, so
1118
   * I've left it as a comment in case we want to bring this back to measure
1119
   * performance.
1120
   */
1121
#if 0
1122
  if (len > 0 && !tcp_timer_active(tp, TT_PERSIST) && SEQ_LT(ntohl(th->th_seq), tp->snd_max)) {
1123
    tcplp_totalRexmitCnt++;
1124
  }
1125
#endif
1126
1127
0
  th->th_ack = htonl(tp->rcv_nxt);
1128
0
  if (optlen) {
1129
0
    bcopy(opt, th + 1, optlen);
1130
0
    th->th_off_x2 = ((sizeof (struct tcphdr) + optlen) >> 2) << TH_OFF_SHIFT;
1131
0
  }
1132
0
  th->th_flags = flags;
1133
  /*
1134
   * Calculate receive window.  Don't shrink window,
1135
   * but avoid silly window syndrome.
1136
   */
1137
  /* samkumar: Replaced so->so_rcv.sb_hiwat with this call to cbuf_size. */
1138
0
  if (recwin < (long)(cbuf_size(&tp->recvbuf) / 4) &&
1139
0
      recwin < (long)tp->t_maxseg)
1140
0
    recwin = 0;
1141
0
  if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) &&
1142
0
      recwin < (long)(tp->rcv_adv - tp->rcv_nxt))
1143
0
    recwin = (long)(tp->rcv_adv - tp->rcv_nxt);
1144
0
  if (recwin > (long)TCP_MAXWIN << tp->rcv_scale)
1145
0
    recwin = (long)TCP_MAXWIN << tp->rcv_scale;
1146
1147
  /*
1148
   * According to RFC1323 the window field in a SYN (i.e., a <SYN>
1149
   * or <SYN,ACK>) segment itself is never scaled.  The <SYN,ACK>
1150
   * case is handled in syncache.
1151
   */
1152
0
  if (flags & TH_SYN)
1153
0
    th->th_win = htons((uint16_t)
1154
0
        (min(cbuf_size(&tp->recvbuf), TCP_MAXWIN)));
1155
0
  else
1156
0
    th->th_win = htons((uint16_t)(recwin >> tp->rcv_scale));
1157
1158
  /*
1159
   * Adjust the RXWIN0SENT flag - indicate that we have advertised
1160
   * a 0 window.  This may cause the remote transmitter to stall.  This
1161
   * flag tells soreceive() to disable delayed acknowledgements when
1162
   * draining the buffer.  This can occur if the receiver is attempting
1163
   * to read more data than can be buffered prior to transmitting on
1164
   * the connection.
1165
   */
1166
0
  if (th->th_win == 0) {
1167
0
    tp->t_flags |= TF_RXWIN0SENT;
1168
0
  } else
1169
0
    tp->t_flags &= ~TF_RXWIN0SENT;
1170
0
  if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
1171
0
    th->th_urp = htons((uint16_t)(tp->snd_up - tp->snd_nxt));
1172
0
    th->th_flags |= TH_URG;
1173
0
  } else
1174
    /*
1175
     * If no urgent pointer to send, then we pull
1176
     * the urgent pointer to the left edge of the send window
1177
     * so that it doesn't drift into the send window on sequence
1178
     * number wraparound.
1179
     */
1180
0
    tp->snd_up = tp->snd_una;   /* drag it along */
1181
1182
  /*
1183
   * samkumar: Removed code for TCP signatures.
1184
   */
1185
  /*
1186
   * Put TCP length in extended header, and then
1187
   * checksum extended header and data.
1188
   */
1189
  /*
1190
   * samkumar: The code to implement the above comment isn't relevant to us.
1191
   * Checksum computation is not handled using FreeBSD code, so we don't need
1192
   * to build an extended header.
1193
   */
1194
  /*
1195
   * samkumar: Removed code for TCP Segmentation Offloading.
1196
   */
1197
  /* samkumar: Removed mbuf-specific assertions an debug code. */
1198
  /*
1199
   * Fill in IP length and desired time to live and
1200
   * send to IP level.  There should be a better way
1201
   * to handle ttl and tos; we could keep them in
1202
   * the template, but need a way to checksum without them.
1203
   */
1204
  /*
1205
   * m->m_pkthdr.len should have been set before checksum calculation,
1206
   * because in6_cksum() need it.
1207
   */
1208
  /*
1209
   * samkumar: The IPv6 packet length and hop limit are handled by the host
1210
   * network stack, not by TCPlp. I've also removed code for Path MTU
1211
   * discovery. And of course, I've removed debug code as well.
1212
   */
1213
  /* samkumar: I've replaced the call to ip6_output with the following. */
1214
0
  otMessageWrite(message, 0, outbuf, sizeof(struct tcphdr) + optlen);
1215
0
  tcplp_sys_send_message(tp->instance, message, &ip6info);
1216
1217
0
out:
1218
  /*
1219
   * In transmit state, time the transmission and arrange for
1220
   * the retransmit.  In persist state, just set snd_max.
1221
   */
1222
0
  if ((tp->t_flags & TF_FORCEDATA) == 0 ||
1223
0
      !tcp_timer_active(tp, TT_PERSIST)) {
1224
0
    tcp_seq startseq = tp->snd_nxt;
1225
1226
    /*
1227
     * Advance snd_nxt over sequence space of this segment.
1228
     */
1229
0
    if (flags & (TH_SYN|TH_FIN)) {
1230
0
      if (flags & TH_SYN)
1231
0
        tp->snd_nxt++;
1232
0
      if (flags & TH_FIN) {
1233
0
        tp->snd_nxt++;
1234
0
        tp->t_flags |= TF_SENTFIN;
1235
0
      }
1236
0
    }
1237
0
    if (sack_rxmit)
1238
0
      goto timer;
1239
0
    tp->snd_nxt += len;
1240
0
    if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
1241
0
      tp->snd_max = tp->snd_nxt;
1242
      /*
1243
       * Time this transmission if not a retransmission and
1244
       * not currently timing anything.
1245
       */
1246
0
      if (tp->t_rtttime == 0) {
1247
0
        tp->t_rtttime = ticks;
1248
0
        tp->t_rtseq = startseq;
1249
0
      }
1250
0
    }
1251
1252
    /*
1253
     * Set retransmit timer if not currently set,
1254
     * and not doing a pure ack or a keep-alive probe.
1255
     * Initial value for retransmit timer is smoothed
1256
     * round-trip time + 2 * round-trip time variance.
1257
     * Initialize shift counter which is used for backoff
1258
     * of retransmit time.
1259
     */
1260
0
timer:
1261
0
    if (!tcp_timer_active(tp, TT_REXMT) &&
1262
0
        ((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
1263
0
         (tp->snd_nxt != tp->snd_una))) {
1264
0
      if (tcp_timer_active(tp, TT_PERSIST)) {
1265
0
        tcp_timer_activate(tp, TT_PERSIST, 0);
1266
0
        tp->t_rxtshift = 0;
1267
0
      }
1268
0
      tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
1269
      /*
1270
       * samkumar: Replaced sbavail(&so->so_snd) with this call to
1271
       * lbuf_used_space.
1272
       */
1273
0
    } else if (len == 0 && lbuf_used_space(&tp->sendbuf) &&
1274
0
        !tcp_timer_active(tp, TT_REXMT) &&
1275
0
        !tcp_timer_active(tp, TT_PERSIST)) {
1276
      /*
1277
       * Avoid a situation where we do not set persist timer
1278
       * after a zero window condition. For example:
1279
       * 1) A -> B: packet with enough data to fill the window
1280
       * 2) B -> A: ACK for #1 + new data (0 window
1281
       *    advertisement)
1282
       * 3) A -> B: ACK for #2, 0 len packet
1283
       *
1284
       * In this case, A will not activate the persist timer,
1285
       * because it chose to send a packet. Unless tcplp_output
1286
       * is called for some other reason (delayed ack timer,
1287
       * another input packet from B, socket syscall), A will
1288
       * not send zero window probes.
1289
       *
1290
       * So, if you send a 0-length packet, but there is data
1291
       * in the socket buffer, and neither the rexmt or
1292
       * persist timer is already set, then activate the
1293
       * persist timer.
1294
       */
1295
0
      tp->t_rxtshift = 0;
1296
0
      tcp_setpersist(tp);
1297
0
    }
1298
0
  } else {
1299
    /*
1300
     * Persist case, update snd_max but since we are in
1301
     * persist mode (no window) we do not update snd_nxt.
1302
     */
1303
0
    int xlen = len;
1304
0
    if (flags & TH_SYN)
1305
0
      ++xlen;
1306
0
    if (flags & TH_FIN) {
1307
0
      ++xlen;
1308
0
      tp->t_flags |= TF_SENTFIN;
1309
0
    }
1310
0
    if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max))
1311
0
      tp->snd_max = tp->snd_nxt + len;
1312
0
  }
1313
1314
0
  if (error) {
1315
1316
    /*
1317
     * We know that the packet was lost, so back out the
1318
     * sequence number advance, if any.
1319
     *
1320
     * If the error is EPERM the packet got blocked by the
1321
     * local firewall.  Normally we should terminate the
1322
     * connection but the blocking may have been spurious
1323
     * due to a firewall reconfiguration cycle.  So we treat
1324
     * it like a packet loss and let the retransmit timer and
1325
     * timeouts do their work over time.
1326
     * XXX: It is a POLA question whether calling tcp_drop right
1327
     * away would be the really correct behavior instead.
1328
     */
1329
0
    if (((tp->t_flags & TF_FORCEDATA) == 0 ||
1330
0
        !tcp_timer_active(tp, TT_PERSIST)) &&
1331
0
        ((flags & TH_SYN) == 0) &&
1332
0
        (error != EPERM)) {
1333
0
      if (sack_rxmit) {
1334
0
        p->rxmit -= len;
1335
0
        tp->sackhint.sack_bytes_rexmit -= len;
1336
0
        KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
1337
0
            ("sackhint bytes rtx >= 0"));
1338
0
      } else
1339
0
        tp->snd_nxt -= len;
1340
0
    }
1341
0
    switch (error) {
1342
0
    case EPERM:
1343
0
      tp->t_softerror = error;
1344
0
      return (error);
1345
0
    case ENOBUFS:
1346
0
                  if (!tcp_timer_active(tp, TT_REXMT) &&
1347
0
          !tcp_timer_active(tp, TT_PERSIST))
1348
0
                          tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
1349
0
      tp->snd_cwnd = tp->t_maxseg;
1350
#ifdef INSTRUMENT_TCP
1351
      tcplp_sys_log("TCP ALLOCFAIL %u %d", (unsigned int) tcplp_sys_get_millis(), (int) tp->snd_cwnd);
1352
#endif
1353
0
      return (0);
1354
0
    case EMSGSIZE:
1355
      /*
1356
       * For some reason the interface we used initially
1357
       * to send segments changed to another or lowered
1358
       * its MTU.
1359
       * If TSO was active we either got an interface
1360
       * without TSO capabilits or TSO was turned off.
1361
       * If we obtained mtu from ip_output() then update
1362
       * it and try again.
1363
       */
1364
      /* samkumar: Removed code for TCP Segmentation Offloading. */
1365
0
      if (mtu != 0) {
1366
0
        tcp_mss_update(tp, -1, mtu, NULL, NULL);
1367
0
        goto again;
1368
0
      }
1369
0
      return (error);
1370
0
    case EHOSTDOWN:
1371
0
    case EHOSTUNREACH:
1372
0
    case ENETDOWN:
1373
0
    case ENETUNREACH:
1374
0
      if (TCPS_HAVERCVDSYN(tp->t_state)) {
1375
0
        tp->t_softerror = error;
1376
0
        return (0);
1377
0
      }
1378
      /* FALLTHROUGH */
1379
0
    default:
1380
0
      return (error);
1381
0
    }
1382
0
  }
1383
1384
  /*
1385
   * Data sent (as far as we can tell).
1386
   * If this advertises a larger window than any other segment,
1387
   * then remember the size of the advertised window.
1388
   * Any pending ACK has now been sent.
1389
   */
1390
0
  if (recwin >= 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv))
1391
0
    tp->rcv_adv = tp->rcv_nxt + recwin;
1392
0
  tp->last_ack_sent = tp->rcv_nxt;
1393
0
  tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
1394
0
  if (tcp_timer_active(tp, TT_DELACK))
1395
0
    tcp_timer_activate(tp, TT_DELACK, 0);
1396
1397
  /*
1398
   * samkumar: This was already commented out (using #if 0) in the original
1399
   * FreeBSD code.
1400
   */
1401
#if 0
1402
  /*
1403
   * This completely breaks TCP if newreno is turned on.  What happens
1404
   * is that if delayed-acks are turned on on the receiver, this code
1405
   * on the transmitter effectively destroys the TCP window, forcing
1406
   * it to four packets (1.5Kx4 = 6K window).
1407
   */
1408
  if (sendalot && --maxburst)
1409
    goto again;
1410
#endif
1411
0
  if (sendalot)
1412
0
    goto again;
1413
0
  return (0);
1414
0
}
1415
1416
/*
1417
 * Insert TCP options according to the supplied parameters to the place
1418
 * optp in a consistent way.  Can handle unaligned destinations.
1419
 *
1420
 * The order of the option processing is crucial for optimal packing and
1421
 * alignment for the scarce option space.
1422
 *
1423
 * The optimal order for a SYN/SYN-ACK segment is:
1424
 *   MSS (4) + NOP (1) + Window scale (3) + SACK permitted (2) +
1425
 *   Timestamp (10) + Signature (18) = 38 bytes out of a maximum of 40.
1426
 *
1427
 * The SACK options should be last.  SACK blocks consume 8*n+2 bytes.
1428
 * So a full size SACK blocks option is 34 bytes (with 4 SACK blocks).
1429
 * At minimum we need 10 bytes (to generate 1 SACK block).  If both
1430
 * TCP Timestamps (12 bytes) and TCP Signatures (18 bytes) are present,
1431
 * we only have 10 bytes for SACK options (40 - (12 + 18)).
1432
 */
1433
int
1434
tcp_addoptions(struct tcpopt *to, uint8_t *optp)
1435
0
{
1436
0
  uint32_t mask, optlen = 0;
1437
1438
0
  for (mask = 1; mask < TOF_MAXOPT; mask <<= 1) {
1439
0
    if ((to->to_flags & mask) != mask)
1440
0
      continue;
1441
0
    if (optlen == TCP_MAXOLEN)
1442
0
      break;
1443
0
    switch (to->to_flags & mask) {
1444
0
    case TOF_MSS:
1445
0
      while (optlen % 4) {
1446
0
        optlen += TCPOLEN_NOP;
1447
0
        *optp++ = TCPOPT_NOP;
1448
0
      }
1449
0
      if (TCP_MAXOLEN - optlen < TCPOLEN_MAXSEG)
1450
0
        continue;
1451
0
      optlen += TCPOLEN_MAXSEG;
1452
0
      *optp++ = TCPOPT_MAXSEG;
1453
0
      *optp++ = TCPOLEN_MAXSEG;
1454
0
      to->to_mss = htons(to->to_mss);
1455
0
      bcopy((uint8_t *)&to->to_mss, optp, sizeof(to->to_mss));
1456
0
      optp += sizeof(to->to_mss);
1457
0
      break;
1458
0
    case TOF_SCALE:
1459
0
      while (!optlen || optlen % 2 != 1) {
1460
0
        optlen += TCPOLEN_NOP;
1461
0
        *optp++ = TCPOPT_NOP;
1462
0
      }
1463
0
      if (TCP_MAXOLEN - optlen < TCPOLEN_WINDOW)
1464
0
        continue;
1465
0
      optlen += TCPOLEN_WINDOW;
1466
0
      *optp++ = TCPOPT_WINDOW;
1467
0
      *optp++ = TCPOLEN_WINDOW;
1468
0
      *optp++ = to->to_wscale;
1469
0
      break;
1470
0
    case TOF_SACKPERM:
1471
0
      while (optlen % 2) {
1472
0
        optlen += TCPOLEN_NOP;
1473
0
        *optp++ = TCPOPT_NOP;
1474
0
      }
1475
0
      if (TCP_MAXOLEN - optlen < TCPOLEN_SACK_PERMITTED)
1476
0
        continue;
1477
0
      optlen += TCPOLEN_SACK_PERMITTED;
1478
0
      *optp++ = TCPOPT_SACK_PERMITTED;
1479
0
      *optp++ = TCPOLEN_SACK_PERMITTED;
1480
0
      break;
1481
0
    case TOF_TS:
1482
0
      while (!optlen || optlen % 4 != 2) {
1483
0
        optlen += TCPOLEN_NOP;
1484
0
        *optp++ = TCPOPT_NOP;
1485
0
      }
1486
0
      if (TCP_MAXOLEN - optlen < TCPOLEN_TIMESTAMP)
1487
0
        continue;
1488
0
      optlen += TCPOLEN_TIMESTAMP;
1489
0
      *optp++ = TCPOPT_TIMESTAMP;
1490
0
      *optp++ = TCPOLEN_TIMESTAMP;
1491
0
      to->to_tsval = htonl(to->to_tsval);
1492
0
      to->to_tsecr = htonl(to->to_tsecr);
1493
0
      bcopy((uint8_t *)&to->to_tsval, optp, sizeof(to->to_tsval));
1494
0
      optp += sizeof(to->to_tsval);
1495
0
      bcopy((uint8_t *)&to->to_tsecr, optp, sizeof(to->to_tsecr));
1496
0
      optp += sizeof(to->to_tsecr);
1497
0
      break;
1498
0
    case TOF_SIGNATURE:
1499
0
      {
1500
0
      int siglen = TCPOLEN_SIGNATURE - 2;
1501
1502
0
      while (!optlen || optlen % 4 != 2) {
1503
0
        optlen += TCPOLEN_NOP;
1504
0
        *optp++ = TCPOPT_NOP;
1505
0
      }
1506
0
      if (TCP_MAXOLEN - optlen < TCPOLEN_SIGNATURE)
1507
0
        continue;
1508
0
      optlen += TCPOLEN_SIGNATURE;
1509
0
      *optp++ = TCPOPT_SIGNATURE;
1510
0
      *optp++ = TCPOLEN_SIGNATURE;
1511
0
      to->to_signature = optp;
1512
0
      while (siglen--)
1513
0
         *optp++ = 0;
1514
0
      break;
1515
0
      }
1516
0
    case TOF_SACK:
1517
0
      {
1518
0
      int sackblks = 0;
1519
0
      struct sackblk *sack = (struct sackblk *)to->to_sacks;
1520
0
      tcp_seq sack_seq;
1521
1522
0
      while (!optlen || optlen % 4 != 2) {
1523
0
        optlen += TCPOLEN_NOP;
1524
0
        *optp++ = TCPOPT_NOP;
1525
0
      }
1526
0
      if (TCP_MAXOLEN - optlen < TCPOLEN_SACKHDR + TCPOLEN_SACK)
1527
0
        continue;
1528
0
      optlen += TCPOLEN_SACKHDR;
1529
0
      *optp++ = TCPOPT_SACK;
1530
0
      sackblks = min(to->to_nsacks,
1531
0
          (TCP_MAXOLEN - optlen) / TCPOLEN_SACK);
1532
0
      *optp++ = TCPOLEN_SACKHDR + sackblks * TCPOLEN_SACK;
1533
0
      while (sackblks--) {
1534
0
        sack_seq = htonl(sack->start);
1535
0
        bcopy((uint8_t *)&sack_seq, optp, sizeof(sack_seq));
1536
0
        optp += sizeof(sack_seq);
1537
0
        sack_seq = htonl(sack->end);
1538
0
        bcopy((uint8_t *)&sack_seq, optp, sizeof(sack_seq));
1539
0
        optp += sizeof(sack_seq);
1540
0
        optlen += TCPOLEN_SACK;
1541
0
        sack++;
1542
0
      }
1543
      /* samkumar: Removed TCPSTAT_INC(tcps_sack_send_blocks); */
1544
0
      break;
1545
0
      }
1546
0
    case TOF_FASTOPEN:
1547
0
      {
1548
0
      int total_len;
1549
1550
      /* XXX is there any point to aligning this option? */
1551
0
      total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len;
1552
0
      if (TCP_MAXOLEN - optlen < total_len) {
1553
0
        to->to_flags &= ~TOF_FASTOPEN;
1554
0
        continue;
1555
0
      }
1556
0
      *optp++ = TCPOPT_FAST_OPEN;
1557
0
      *optp++ = total_len;
1558
0
      if (to->to_tfo_len > 0) {
1559
0
        bcopy(to->to_tfo_cookie, optp, to->to_tfo_len);
1560
0
        optp += to->to_tfo_len;
1561
0
      }
1562
0
      optlen += total_len;
1563
0
      break;
1564
0
      }
1565
0
    default:
1566
0
      tcplp_sys_panic("PANIC: %s: unknown TCP option type", __func__);
1567
0
      break;
1568
0
    }
1569
0
  }
1570
1571
  /* Terminate and pad TCP options to a 4 byte boundary. */
1572
0
  if (optlen % 4) {
1573
0
    optlen += TCPOLEN_EOL;
1574
0
    *optp++ = TCPOPT_EOL;
1575
0
  }
1576
  /*
1577
   * According to RFC 793 (STD0007):
1578
   *   "The content of the header beyond the End-of-Option option
1579
   *    must be header padding (i.e., zero)."
1580
   *   and later: "The padding is composed of zeros."
1581
   */
1582
0
  while (optlen % 4) {
1583
0
    optlen += TCPOLEN_PAD;
1584
0
    *optp++ = TCPOPT_PAD;
1585
0
  }
1586
1587
0
  KASSERT(optlen <= TCP_MAXOLEN, ("%s: TCP options too long", __func__));
1588
0
  return (optlen);
1589
0
}