Coverage Report

Created: 2025-06-13 06:12

/src/haproxy/src/proto_tcp.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * AF_INET/AF_INET6 SOCK_STREAM protocol layer (tcp)
3
 *
4
 * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
5
 *
6
 * This program is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU General Public License
8
 * as published by the Free Software Foundation; either version
9
 * 2 of the License, or (at your option) any later version.
10
 *
11
 */
12
13
/* this is to have tcp_info defined on systems using musl
14
 * library, such as Alpine Linux.
15
 */
16
#define _GNU_SOURCE
17
18
#include <ctype.h>
19
#include <errno.h>
20
#include <stdio.h>
21
#include <stdlib.h>
22
#include <string.h>
23
#include <time.h>
24
25
#include <sys/param.h>
26
#include <sys/socket.h>
27
#include <sys/types.h>
28
29
#include <netinet/tcp.h>
30
#include <netinet/in.h>
31
32
#include <haproxy/api.h>
33
#include <haproxy/arg.h>
34
#include <haproxy/connection.h>
35
#include <haproxy/errors.h>
36
#include <haproxy/fd.h>
37
#include <haproxy/global.h>
38
#include <haproxy/list.h>
39
#include <haproxy/listener.h>
40
#include <haproxy/log.h>
41
#include <haproxy/namespace.h>
42
#include <haproxy/port_range.h>
43
#include <haproxy/proto_tcp.h>
44
#include <haproxy/protocol.h>
45
#include <haproxy/proxy-t.h>
46
#include <haproxy/sock.h>
47
#include <haproxy/sock_inet.h>
48
#include <haproxy/tools.h>
49
50
51
static int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen);
52
static int tcp_suspend_receiver(struct receiver *rx);
53
static int tcp_resume_receiver(struct receiver *rx);
54
static void tcp_enable_listener(struct listener *listener);
55
static void tcp_disable_listener(struct listener *listener);
56
static int tcp_get_info(struct connection *conn, long long int *info, int info_num);
57
58
/* Note: must not be declared <const> as its list will be overwritten */
59
struct protocol proto_tcpv4 = {
60
  .name           = "tcpv4",
61
62
  /* connection layer */
63
  .xprt_type      = PROTO_TYPE_STREAM,
64
  .listen         = tcp_bind_listener,
65
  .enable         = tcp_enable_listener,
66
  .disable        = tcp_disable_listener,
67
  .add            = default_add_listener,
68
  .unbind         = default_unbind_listener,
69
  .suspend        = default_suspend_listener,
70
  .resume         = default_resume_listener,
71
  .accept_conn    = sock_accept_conn,
72
  .ctrl_init      = sock_conn_ctrl_init,
73
  .ctrl_close     = sock_conn_ctrl_close,
74
  .connect        = tcp_connect_server,
75
  .drain          = sock_drain,
76
  .check_events   = sock_check_events,
77
  .ignore_events  = sock_ignore_events,
78
  .get_info       = tcp_get_info,
79
80
  /* binding layer */
81
  .rx_suspend     = tcp_suspend_receiver,
82
  .rx_resume      = tcp_resume_receiver,
83
84
  /* address family */
85
  .fam            = &proto_fam_inet4,
86
87
  /* socket layer */
88
  .proto_type     = PROTO_TYPE_STREAM,
89
  .sock_type      = SOCK_STREAM,
90
  .sock_prot      = IPPROTO_TCP,
91
  .rx_enable      = sock_enable,
92
  .rx_disable     = sock_disable,
93
  .rx_unbind      = sock_unbind,
94
  .rx_listening   = sock_accepting_conn,
95
  .default_iocb   = sock_accept_iocb,
96
#ifdef SO_REUSEPORT
97
  .flags          = PROTO_F_REUSEPORT_SUPPORTED,
98
#endif
99
};
100
101
INITCALL1(STG_REGISTER, protocol_register, &proto_tcpv4);
102
103
/* Note: must not be declared <const> as its list will be overwritten */
104
struct protocol proto_tcpv6 = {
105
  .name           = "tcpv6",
106
107
  /* connection layer */
108
  .xprt_type      = PROTO_TYPE_STREAM,
109
  .listen         = tcp_bind_listener,
110
  .enable         = tcp_enable_listener,
111
  .disable        = tcp_disable_listener,
112
  .add            = default_add_listener,
113
  .unbind         = default_unbind_listener,
114
  .suspend        = default_suspend_listener,
115
  .resume         = default_resume_listener,
116
  .accept_conn    = sock_accept_conn,
117
  .ctrl_init      = sock_conn_ctrl_init,
118
  .ctrl_close     = sock_conn_ctrl_close,
119
  .connect        = tcp_connect_server,
120
  .drain          = sock_drain,
121
  .check_events   = sock_check_events,
122
  .ignore_events  = sock_ignore_events,
123
  .get_info       = tcp_get_info,
124
125
  /* binding layer */
126
  .rx_suspend     = tcp_suspend_receiver,
127
  .rx_resume      = tcp_resume_receiver,
128
129
  /* address family */
130
  .fam            = &proto_fam_inet6,
131
132
  /* socket layer */
133
  .proto_type     = PROTO_TYPE_STREAM,
134
  .sock_type      = SOCK_STREAM,
135
  .sock_prot      = IPPROTO_TCP,
136
  .rx_enable      = sock_enable,
137
  .rx_disable     = sock_disable,
138
  .rx_unbind      = sock_unbind,
139
  .rx_listening   = sock_accepting_conn,
140
  .default_iocb   = sock_accept_iocb,
141
#ifdef SO_REUSEPORT
142
  .flags          = PROTO_F_REUSEPORT_SUPPORTED,
143
#endif
144
};
145
146
INITCALL1(STG_REGISTER, protocol_register, &proto_tcpv6);
147
148
#ifdef HA_HAVE_MPTCP
149
/* Most fields are copied from proto_tcpv4 */
150
struct protocol proto_mptcpv4 = {
151
  .name           = "mptcpv4",
152
153
  /* connection layer */
154
  .xprt_type      = PROTO_TYPE_STREAM,
155
  .listen         = tcp_bind_listener,
156
  .enable         = tcp_enable_listener,
157
  .disable        = tcp_disable_listener,
158
  .add            = default_add_listener,
159
  .unbind         = default_unbind_listener,
160
  .suspend        = default_suspend_listener,
161
  .resume         = default_resume_listener,
162
  .accept_conn    = sock_accept_conn,
163
  .ctrl_init      = sock_conn_ctrl_init,
164
  .ctrl_close     = sock_conn_ctrl_close,
165
  .connect        = tcp_connect_server,
166
  .drain          = sock_drain,
167
  .check_events   = sock_check_events,
168
  .ignore_events  = sock_ignore_events,
169
  .get_info       = tcp_get_info,
170
171
  /* binding layer */
172
  .rx_suspend     = tcp_suspend_receiver,
173
  .rx_resume      = tcp_resume_receiver,
174
175
  /* address family */
176
  .fam            = &proto_fam_inet4,
177
178
  /* socket layer */
179
  .proto_type     = PROTO_TYPE_STREAM,
180
  .sock_type      = SOCK_STREAM,
181
  .sock_prot      = IPPROTO_MPTCP,    /* MPTCP specific */
182
  .rx_enable      = sock_enable,
183
  .rx_disable     = sock_disable,
184
  .rx_unbind      = sock_unbind,
185
  .rx_listening   = sock_accepting_conn,
186
  .default_iocb   = sock_accept_iocb,
187
#ifdef SO_REUSEPORT
188
  .flags          = PROTO_F_REUSEPORT_SUPPORTED,
189
#endif
190
};
191
192
INITCALL1(STG_REGISTER, protocol_register, &proto_mptcpv4);
193
194
/* Most fields are copied from proto_tcpv6 */
195
struct protocol proto_mptcpv6 = {
196
  .name           = "mptcpv6",
197
198
  /* connection layer */
199
  .xprt_type      = PROTO_TYPE_STREAM,
200
  .listen         = tcp_bind_listener,
201
  .enable         = tcp_enable_listener,
202
  .disable        = tcp_disable_listener,
203
  .add            = default_add_listener,
204
  .unbind         = default_unbind_listener,
205
  .suspend        = default_suspend_listener,
206
  .resume         = default_resume_listener,
207
  .accept_conn    = sock_accept_conn,
208
  .ctrl_init      = sock_conn_ctrl_init,
209
  .ctrl_close     = sock_conn_ctrl_close,
210
  .connect        = tcp_connect_server,
211
  .drain          = sock_drain,
212
  .check_events   = sock_check_events,
213
  .ignore_events  = sock_ignore_events,
214
  .get_info       = tcp_get_info,
215
216
  /* binding layer */
217
  .rx_suspend     = tcp_suspend_receiver,
218
  .rx_resume      = tcp_resume_receiver,
219
220
  /* address family */
221
  .fam            = &proto_fam_inet6,
222
223
  /* socket layer */
224
  .proto_type     = PROTO_TYPE_STREAM,
225
  .sock_type      = SOCK_STREAM,
226
  .sock_prot      = IPPROTO_MPTCP,    /* MPTCP specific */
227
  .rx_enable      = sock_enable,
228
  .rx_disable     = sock_disable,
229
  .rx_unbind      = sock_unbind,
230
  .rx_listening   = sock_accepting_conn,
231
  .default_iocb   = sock_accept_iocb,
232
#ifdef SO_REUSEPORT
233
  .flags          = PROTO_F_REUSEPORT_SUPPORTED,
234
#endif
235
};
236
237
INITCALL1(STG_REGISTER, protocol_register, &proto_mptcpv6);
238
#endif
239
240
/* Binds ipv4/ipv6 address <local> to socket <fd>, unless <flags> is set, in which
241
 * case we try to bind <remote>. <flags> is a 2-bit field consisting of :
242
 *  - 0 : ignore remote address (may even be a NULL pointer)
243
 *  - 1 : use provided address
244
 *  - 2 : use provided port
245
 *  - 3 : use both
246
 *
247
 * The function supports multiple foreign binding methods :
248
 *   - linux_tproxy: we directly bind to the foreign address
249
 * The second one can be used as a fallback for the first one.
250
 * This function returns 0 when everything's OK, 1 if it could not bind, to the
251
 * local address, 2 if it could not bind to the foreign address.
252
 */
253
int tcp_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct sockaddr_storage *remote)
254
0
{
255
0
  struct sockaddr_storage bind_addr;
256
0
  int foreign_ok = 0;
257
0
  int ret;
258
0
  static THREAD_LOCAL int ip_transp_working = 1;
259
0
  static THREAD_LOCAL int ip6_transp_working = 1;
260
261
0
  switch (local->ss_family) {
262
0
  case AF_INET:
263
0
    if (flags && ip_transp_working) {
264
      /* This deserves some explanation. Some platforms will support
265
       * multiple combinations of certain methods, so we try the
266
       * supported ones until one succeeds.
267
       */
268
0
      if (sock_inet4_make_foreign(fd))
269
0
        foreign_ok = 1;
270
0
      else
271
0
        ip_transp_working = 0;
272
0
    }
273
0
    break;
274
0
  case AF_INET6:
275
0
    if (flags && ip6_transp_working) {
276
0
      if (sock_inet6_make_foreign(fd))
277
0
        foreign_ok = 1;
278
0
      else
279
0
        ip6_transp_working = 0;
280
0
    }
281
0
    break;
282
0
  }
283
284
0
  if (flags) {
285
0
    memset(&bind_addr, 0, sizeof(bind_addr));
286
0
    bind_addr.ss_family = remote->ss_family;
287
0
    switch (remote->ss_family) {
288
0
    case AF_INET:
289
0
      if (flags & 1)
290
0
        ((struct sockaddr_in *)&bind_addr)->sin_addr = ((struct sockaddr_in *)remote)->sin_addr;
291
0
      if (flags & 2)
292
0
        ((struct sockaddr_in *)&bind_addr)->sin_port = ((struct sockaddr_in *)remote)->sin_port;
293
0
      break;
294
0
    case AF_INET6:
295
0
      if (flags & 1)
296
0
        ((struct sockaddr_in6 *)&bind_addr)->sin6_addr = ((struct sockaddr_in6 *)remote)->sin6_addr;
297
0
      if (flags & 2)
298
0
        ((struct sockaddr_in6 *)&bind_addr)->sin6_port = ((struct sockaddr_in6 *)remote)->sin6_port;
299
0
      break;
300
0
    default:
301
      /* we don't want to try to bind to an unknown address family */
302
0
      foreign_ok = 0;
303
0
    }
304
0
  }
305
306
0
  setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
307
0
  if (foreign_ok) {
308
0
    if (is_inet_addr(&bind_addr)) {
309
0
      ret = bind(fd, (struct sockaddr *)&bind_addr, get_addr_len(&bind_addr));
310
0
      if (ret < 0)
311
0
        return 2;
312
0
    }
313
0
  }
314
0
  else {
315
0
    if (is_inet_addr(local)) {
316
0
      ret = bind(fd, (struct sockaddr *)local, get_addr_len(local));
317
0
      if (ret < 0)
318
0
        return 1;
319
0
    }
320
0
  }
321
322
0
  if (!flags)
323
0
    return 0;
324
325
0
  if (!foreign_ok)
326
    /* we could not bind to a foreign address */
327
0
    return 2;
328
329
0
  return 0;
330
0
}
331
332
/*
333
 * This function initiates a TCP connection establishment to the target assigned
334
 * to connection <conn> using (si->{target,dst}). A source address may be
335
 * pointed to by conn->src in case of transparent proxying. Normal source
336
 * bind addresses are still determined locally (due to the possible need of a
337
 * source port). conn->target may point either to a valid server or to a backend,
338
 * depending on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are
339
 * supported. The <data> parameter is a boolean indicating whether there are data
340
 * waiting for being sent or not, in order to adjust data write polling and on
341
 * some platforms, the ability to avoid an empty initial ACK. The <flags> argument
342
 * allows the caller to force using a delayed ACK when establishing the connection
343
 *   - 0 = no delayed ACK unless data are advertised and backend has tcp-smart-connect
344
 *   - CONNECT_DELACK_SMART_CONNECT = delayed ACK if backend has tcp-smart-connect, regardless of data
345
 *   - CONNECT_DELACK_ALWAYS = delayed ACK regardless of backend options
346
 *
347
 * Note that a pending send_proxy message accounts for data.
348
 *
349
 * It can return one of :
350
 *  - SF_ERR_NONE if everything's OK
351
 *  - SF_ERR_SRVTO if there are no more servers
352
 *  - SF_ERR_SRVCL if the connection was refused by the server
353
 *  - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
354
 *  - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
355
 *  - SF_ERR_INTERNAL for any other purely internal errors
356
 * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
357
 *
358
 * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
359
 * it's invalid and the caller has nothing to do.
360
 */
361
362
int tcp_connect_server(struct connection *conn, int flags)
363
0
{
364
0
  int fd, stream_err;
365
0
  struct server *srv;
366
0
  struct proxy *be;
367
0
  struct conn_src *src;
368
0
  int use_fastopen = 0;
369
0
  struct sockaddr_storage *addr;
370
371
0
  BUG_ON(!conn->dst);
372
373
0
  conn->flags |= CO_FL_WAIT_L4_CONN; /* connection in progress */
374
375
0
  switch (obj_type(conn->target)) {
376
0
  case OBJ_TYPE_PROXY:
377
0
    be = __objt_proxy(conn->target);
378
0
    srv = NULL;
379
0
    break;
380
0
  case OBJ_TYPE_SERVER:
381
0
    srv = __objt_server(conn->target);
382
0
    be = srv->proxy;
383
    /* Make sure we check that we have data before activating
384
     * TFO, or we could trigger a kernel issue whereby after
385
     * a successful connect() == 0, any subsequent connect()
386
     * will return EINPROGRESS instead of EISCONN.
387
     */
388
0
    use_fastopen = (srv->flags & SRV_F_FASTOPEN) &&
389
0
                   ((flags & (CONNECT_CAN_USE_TFO | CONNECT_HAS_DATA)) ==
390
0
        (CONNECT_CAN_USE_TFO | CONNECT_HAS_DATA));
391
0
    break;
392
0
  default:
393
0
    conn->flags |= CO_FL_ERROR;
394
0
    return SF_ERR_INTERNAL;
395
0
  }
396
397
398
399
  /* perform common checks on obtained socket FD, return appropriate Stream Error Flag in case of failure */
400
0
  fd = conn->handle.fd = sock_create_server_socket(conn, be, PROTO_TYPE_STREAM, SOCK_STREAM, &stream_err);
401
0
  if (fd == -1)
402
0
    return stream_err;
403
404
  /* FD is OK, continue with protocol specific settings */
405
0
  if (be->options & PR_O_TCP_SRV_KA) {
406
0
    setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one));
407
408
0
#ifdef TCP_KEEPCNT
409
0
    if (be->srvtcpka_cnt)
410
0
      setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &be->srvtcpka_cnt, sizeof(be->srvtcpka_cnt));
411
0
#endif
412
413
0
#ifdef TCP_KEEPIDLE
414
0
    if (be->srvtcpka_idle)
415
0
      setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &be->srvtcpka_idle, sizeof(be->srvtcpka_idle));
416
0
#endif
417
418
0
#ifdef TCP_KEEPINTVL
419
0
    if (be->srvtcpka_intvl)
420
0
      setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &be->srvtcpka_intvl, sizeof(be->srvtcpka_intvl));
421
0
#endif
422
0
  }
423
424
  /* allow specific binding :
425
   * - server-specific at first
426
   * - proxy-specific next
427
   */
428
0
  if (srv && srv->conn_src.opts & CO_SRC_BIND)
429
0
    src = &srv->conn_src;
430
0
  else if (be->conn_src.opts & CO_SRC_BIND)
431
0
    src = &be->conn_src;
432
0
  else
433
0
    src = NULL;
434
435
0
  if (src) {
436
0
    int ret, flags = 0;
437
438
0
    if (conn->src && is_inet_addr(conn->src)) {
439
0
      switch (src->opts & CO_SRC_TPROXY_MASK) {
440
0
      case CO_SRC_TPROXY_CLI:
441
0
      case CO_SRC_TPROXY_ADDR:
442
0
        flags = 3;
443
0
        break;
444
0
      case CO_SRC_TPROXY_CIP:
445
0
      case CO_SRC_TPROXY_DYN:
446
0
        flags = 1;
447
0
        break;
448
0
      }
449
0
    }
450
451
0
#ifdef SO_BINDTODEVICE
452
    /* Note: this might fail if not CAP_NET_RAW */
453
0
    if (src->iface_name)
454
0
      setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, src->iface_name, src->iface_len + 1);
455
0
#endif
456
457
0
    if (src->sport_range) {
458
0
      int attempts = 10; /* should be more than enough to find a spare port */
459
0
      struct sockaddr_storage sa;
460
461
0
      ret = 1;
462
0
      memcpy(&sa, &src->source_addr, sizeof(sa));
463
464
0
      do {
465
        /* note: in case of retry, we may have to release a previously
466
         * allocated port, hence this loop's construct.
467
         */
468
0
        port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
469
0
        fdinfo[fd].port_range = NULL;
470
471
0
        if (!attempts)
472
0
          break;
473
0
        attempts--;
474
475
0
        fdinfo[fd].local_port = port_range_alloc_port(src->sport_range);
476
0
        if (!fdinfo[fd].local_port) {
477
0
          conn->err_code = CO_ER_PORT_RANGE;
478
0
          break;
479
0
        }
480
481
0
        fdinfo[fd].port_range = src->sport_range;
482
0
        set_host_port(&sa, fdinfo[fd].local_port);
483
484
0
        ret = tcp_bind_socket(fd, flags, &sa, conn->src);
485
0
        if (ret != 0)
486
0
          conn->err_code = CO_ER_CANT_BIND;
487
0
      } while (ret != 0); /* binding NOK */
488
0
    }
489
0
    else {
490
0
#ifdef IP_BIND_ADDRESS_NO_PORT
491
0
      static THREAD_LOCAL int bind_address_no_port = 1;
492
0
      setsockopt(fd, IPPROTO_IP, IP_BIND_ADDRESS_NO_PORT, (const void *) &bind_address_no_port, sizeof(int));
493
0
#endif
494
0
      ret = tcp_bind_socket(fd, flags, &src->source_addr, conn->src);
495
0
      if (ret != 0)
496
0
        conn->err_code = CO_ER_CANT_BIND;
497
0
    }
498
499
0
    if (unlikely(ret != 0)) {
500
0
      port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
501
0
      fdinfo[fd].port_range = NULL;
502
0
      close(fd);
503
504
0
      if (ret == 1) {
505
0
        ha_alert("Cannot bind to source address before connect() for backend %s. Aborting.\n",
506
0
           be->id);
507
0
        send_log(be, LOG_EMERG,
508
0
           "Cannot bind to source address before connect() for backend %s.\n",
509
0
           be->id);
510
0
      } else {
511
0
        ha_alert("Cannot bind to tproxy source address before connect() for backend %s. Aborting.\n",
512
0
           be->id);
513
0
        send_log(be, LOG_EMERG,
514
0
           "Cannot bind to tproxy source address before connect() for backend %s.\n",
515
0
           be->id);
516
0
      }
517
0
      conn->flags |= CO_FL_ERROR;
518
0
      return SF_ERR_RESOURCE;
519
0
    }
520
0
  }
521
522
0
#if defined(TCP_QUICKACK)
523
  /* disabling tcp quick ack now allows the first request to leave the
524
   * machine with the first ACK. We only do this if there are pending
525
   * data in the buffer.
526
   */
527
0
  if (flags & (CONNECT_DELACK_ALWAYS) ||
528
0
      ((flags & CONNECT_DELACK_SMART_CONNECT ||
529
0
        (flags & CONNECT_HAS_DATA) || conn->send_proxy_ofs) &&
530
0
       (be->options2 & PR_O2_SMARTCON)))
531
0
                setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero));
532
0
#endif
533
534
0
#ifdef TCP_USER_TIMEOUT
535
  /* there is not much more we can do here when it fails, it's still minor */
536
0
  if (srv && srv->tcp_ut)
537
0
    setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &srv->tcp_ut, sizeof(srv->tcp_ut));
538
0
#endif
539
540
0
  if (use_fastopen) {
541
0
#if defined(TCP_FASTOPEN_CONNECT)
542
0
                setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN_CONNECT, &one, sizeof(one));
543
0
#endif
544
0
  }
545
0
  if (global.tune.server_sndbuf)
546
0
                setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
547
548
0
#if defined(TCP_NOTSENT_LOWAT)
549
0
  if (global.tune.server_notsent_lowat)
550
0
    setsockopt(fd, IPPROTO_TCP, TCP_NOTSENT_LOWAT, &global.tune.server_notsent_lowat, sizeof(global.tune.server_notsent_lowat));
551
0
#endif
552
553
0
  if (global.tune.server_rcvbuf)
554
0
                setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
555
556
0
  addr = (conn->flags & CO_FL_SOCKS4) ? &srv->socks4_addr : conn->dst;
557
0
  if (connect(fd, (const struct sockaddr *)addr, get_addr_len(addr)) == -1) {
558
0
    if (errno == EINPROGRESS || errno == EALREADY) {
559
      /* common case, let's wait for connect status */
560
0
      conn->flags |= CO_FL_WAIT_L4_CONN;
561
0
    }
562
0
    else if (errno == EISCONN) {
563
      /* should normally not happen but if so, indicates that it's OK */
564
0
      conn->flags &= ~CO_FL_WAIT_L4_CONN;
565
0
    }
566
0
    else if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
567
0
      char *msg;
568
0
      if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EADDRNOTAVAIL) {
569
0
        msg = "no free ports";
570
0
        conn->err_code = CO_ER_FREE_PORTS;
571
0
      }
572
0
      else {
573
0
        msg = "local address already in use";
574
0
        conn->err_code = CO_ER_ADDR_INUSE;
575
0
      }
576
577
0
      qfprintf(stderr,"Connect() failed for backend %s: %s.\n", be->id, msg);
578
0
      port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
579
0
      fdinfo[fd].port_range = NULL;
580
0
      close(fd);
581
0
      send_log(be, LOG_ERR, "Connect() failed for backend %s: %s.\n", be->id, msg);
582
0
      conn->flags |= CO_FL_ERROR;
583
0
      return SF_ERR_RESOURCE;
584
0
    } else if (errno == ETIMEDOUT) {
585
      //qfprintf(stderr,"Connect(): ETIMEDOUT");
586
0
      port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
587
0
      fdinfo[fd].port_range = NULL;
588
0
      close(fd);
589
0
      conn->err_code = CO_ER_SOCK_ERR;
590
0
      conn->flags |= CO_FL_ERROR;
591
0
      return SF_ERR_SRVTO;
592
0
    } else {
593
      // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
594
      //qfprintf(stderr,"Connect(): %d", errno);
595
0
      port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
596
0
      fdinfo[fd].port_range = NULL;
597
0
      close(fd);
598
0
      conn->err_code = CO_ER_SOCK_ERR;
599
0
      conn->flags |= CO_FL_ERROR;
600
0
      return SF_ERR_SRVCL;
601
0
    }
602
0
  }
603
0
  else {
604
    /* connect() == 0, this is great! */
605
0
    conn->flags &= ~CO_FL_WAIT_L4_CONN;
606
0
  }
607
608
0
  conn_ctrl_init(conn);       /* registers the FD */
609
0
  HA_ATOMIC_OR(&fdtab[fd].state, FD_LINGER_RISK);  /* close hard if needed */
610
611
0
  if (conn->flags & CO_FL_WAIT_L4_CONN) {
612
0
    fd_want_send(fd);
613
0
    fd_cant_send(fd);
614
0
    fd_cant_recv(fd);
615
0
  }
616
617
0
  return SF_ERR_NONE;  /* connection is OK */
618
0
}
619
620
/* This function tries to bind a TCPv4/v6 listener. It may return a warning or
621
 * an error message in <errmsg> if the message is at most <errlen> bytes long
622
 * (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
623
 * The return value is composed from ERR_ABORT, ERR_WARN,
624
 * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
625
 * was alright and that no message was returned. ERR_RETRYABLE means that an
626
 * error occurred but that it may vanish after a retry (eg: port in use), and
627
 * ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
628
 * the meaning of the error, but just indicate that a message is present which
629
 * should be displayed with the respective level. Last, ERR_ABORT indicates
630
 * that it's pointless to try to start other listeners. No error message is
631
 * returned if errlen is NULL.
632
 */
633
int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen)
634
0
{
635
0
  int fd, err;
636
0
  int ready;
637
0
  struct buffer *msg = alloc_trash_chunk();
638
639
0
  err = ERR_NONE;
640
641
0
  if (!msg) {
642
0
    if (errlen)
643
0
      snprintf(errmsg, errlen, "out of memory");
644
0
    return ERR_ALERT | ERR_FATAL;
645
0
  }
646
647
  /* ensure we never return garbage */
648
0
  if (errlen)
649
0
    *errmsg = 0;
650
651
0
  if (listener->state != LI_ASSIGNED)
652
0
    return ERR_NONE; /* already bound */
653
654
0
  if (!(listener->rx.flags & RX_F_BOUND)) {
655
0
    chunk_appendf(msg, "%sreceiving socket not bound", msg->data ? ", " : "");
656
0
    goto tcp_return;
657
0
  }
658
659
0
  if (listener->rx.flags & RX_F_MUST_DUP)
660
0
    goto done;
661
662
0
  fd = listener->rx.fd;
663
664
0
  if (listener->bind_conf->options & BC_O_NOLINGER)
665
0
    setsockopt(fd, SOL_SOCKET, SO_LINGER, &nolinger, sizeof(struct linger));
666
0
  else {
667
0
    struct linger tmplinger;
668
0
    socklen_t len = sizeof(tmplinger);
669
0
    if (getsockopt(fd, SOL_SOCKET, SO_LINGER, &tmplinger, &len) == 0 &&
670
0
        (tmplinger.l_onoff == 1 || tmplinger.l_linger == 0)) {
671
0
      tmplinger.l_onoff = 0;
672
0
      tmplinger.l_linger = 0;
673
0
      setsockopt(fd, SOL_SOCKET, SO_LINGER, &tmplinger,
674
0
          sizeof(tmplinger));
675
0
    }
676
0
  }
677
678
0
#if defined(TCP_MAXSEG)
679
0
  if (listener->bind_conf->maxseg > 0) {
680
0
    if (setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG,
681
0
             &listener->bind_conf->maxseg, sizeof(listener->bind_conf->maxseg)) == -1) {
682
0
      chunk_appendf(msg, "%scannot set MSS to %d, (%s)", msg->data ? ", " : "", listener->bind_conf->maxseg,
683
0
              strerror(errno));
684
0
      err |= ERR_WARN;
685
0
    }
686
0
  } else {
687
    /* we may want to try to restore the default MSS if the socket was inherited */
688
0
    int tmpmaxseg = -1;
689
0
    int defaultmss;
690
0
    int v4 = listener->rx.addr.ss_family == AF_INET;
691
0
    socklen_t len = sizeof(tmpmaxseg);
692
693
0
    if (listener->rx.proto->sock_prot == IPPROTO_MPTCP) {
694
0
      if (v4)
695
0
        defaultmss = sock_inet_mptcp_maxseg_default;
696
0
      else
697
0
        defaultmss = sock_inet6_mptcp_maxseg_default;
698
0
    } else {
699
0
      if (v4)
700
0
        defaultmss = sock_inet_tcp_maxseg_default;
701
0
      else
702
0
        defaultmss = sock_inet6_tcp_maxseg_default;
703
0
    }
704
705
0
    getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &tmpmaxseg, &len);
706
0
    if (defaultmss > 0 &&
707
0
        tmpmaxseg != defaultmss &&
708
0
        setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &defaultmss, sizeof(defaultmss)) == -1) {
709
0
      chunk_appendf(msg, "%scannot set MSS to %d, (%s)", msg->data ? ", " : "", defaultmss,
710
0
              strerror(errno));
711
0
      err |= ERR_WARN;
712
0
    }
713
0
  }
714
0
#endif
715
0
#if defined(TCP_USER_TIMEOUT)
716
0
  if (listener->bind_conf->tcp_ut) {
717
0
    if (setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT,
718
0
             &listener->bind_conf->tcp_ut, sizeof(listener->bind_conf->tcp_ut)) == -1) {
719
0
      chunk_appendf(msg, "%scannot set TCP User Timeout, (%s)", msg->data ? ", " : "",
720
0
              strerror(errno));
721
0
      err |= ERR_WARN;
722
0
    }
723
0
  } else
724
0
    setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &zero,
725
0
        sizeof(zero));
726
0
#endif
727
0
#if defined(TCP_DEFER_ACCEPT)
728
0
  if (listener->bind_conf->options & BC_O_DEF_ACCEPT) {
729
    /* defer accept by up to one second */
730
0
    int accept_delay = 1;
731
0
    if (setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &accept_delay, sizeof(accept_delay)) == -1) {
732
0
      chunk_appendf(msg, "%scannot enable DEFER_ACCEPT, (%s)", msg->data ? ", " : "",
733
0
              strerror(errno));
734
0
      err |= ERR_WARN;
735
0
    }
736
0
  } else
737
0
    setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &zero,
738
0
        sizeof(zero));
739
0
#endif
740
0
#if defined(TCP_FASTOPEN)
741
0
  if (listener->bind_conf->options & BC_O_TCP_FO) {
742
    /* TFO needs a queue length, let's use the configured backlog */
743
0
    int qlen = listener_backlog(listener);
744
0
    if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) {
745
0
      chunk_appendf(msg, "%scannot enable TCP_FASTOPEN, (%s)", msg->data ? ", " : "",
746
0
              strerror(errno));
747
0
      err |= ERR_WARN;
748
0
    }
749
0
  } else {
750
0
    socklen_t len;
751
0
    int qlen;
752
0
    len = sizeof(qlen);
753
    /* Only disable fast open if it was enabled, we don't want
754
     * the kernel to create a fast open queue if there's none.
755
     */
756
0
    if (getsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, &len) == 0 &&
757
0
        qlen != 0) {
758
0
      if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &zero,
759
0
          sizeof(zero)) == -1) {
760
0
        chunk_appendf(msg, "%scannot disable TCP_FASTOPEN, (%s)", msg->data ? ", " : "",
761
0
                strerror(errno));
762
0
        err |= ERR_WARN;
763
0
      }
764
0
    }
765
0
  }
766
0
#endif
767
768
0
  ready = sock_accepting_conn(&listener->rx) > 0;
769
770
0
  if (!ready && /* only listen if not already done by external process */
771
0
      listen(fd, listener_backlog(listener)) == -1) {
772
0
    err |= ERR_RETRYABLE | ERR_ALERT;
773
0
    chunk_appendf(msg, "%scannot listen to socket: (%s)", msg->data ? ", " : "",
774
0
            strerror(errno));
775
0
    goto tcp_close_return;
776
0
  }
777
778
#if !defined(TCP_DEFER_ACCEPT) && defined(SO_ACCEPTFILTER)
779
  /* the socket needs to listen first */
780
  if (listener->bind_conf->options & BC_O_DEF_ACCEPT) {
781
    struct accept_filter_arg accept;
782
    memset(&accept, 0, sizeof(accept));
783
    strlcpy2(accept.af_name, "dataready", sizeof(accept.af_name));
784
    if (setsockopt(fd, SOL_SOCKET, SO_ACCEPTFILTER, &accept, sizeof(accept)) == -1) {
785
      chunk_appendf(msg, "%scannot enable ACCEPT_FILTER, (%s)", msg->data ? ", " : "",
786
              strerror(errno));
787
      err |= ERR_WARN;
788
    }
789
  }
790
#endif
791
0
#if defined(TCP_QUICKACK)
792
0
  if (listener->bind_conf->options & BC_O_NOQUICKACK)
793
0
    setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero));
794
0
  else
795
0
    setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &one, sizeof(one));
796
0
#endif
797
798
0
 done:
799
  /* the socket is ready */
800
0
  listener_set_state(listener, LI_LISTEN);
801
0
  goto tcp_return;
802
803
0
 tcp_close_return:
804
0
  fd_delete(fd);
805
0
 tcp_return:
806
0
  if (msg && errlen && msg->data) {
807
0
    char pn[INET6_ADDRSTRLEN];
808
809
0
    addr_to_str(&listener->rx.addr, pn, sizeof(pn));
810
0
    snprintf(errmsg, errlen, "%s for [%s:%d]", msg->area, pn, get_host_port(&listener->rx.addr));
811
0
  }
812
0
  free_trash_chunk(msg);
813
0
  msg = NULL;
814
0
  return err;
815
0
}
816
817
/* Enable receipt of incoming connections for listener <l>. The receiver must
818
 * still be valid.
819
 */
820
static void tcp_enable_listener(struct listener *l)
821
0
{
822
0
  fd_want_recv_safe(l->rx.fd);
823
0
}
824
825
/* Disable receipt of incoming connections for listener <l>. The receiver must
826
 * still be valid.
827
 */
828
static void tcp_disable_listener(struct listener *l)
829
0
{
830
0
  fd_stop_recv(l->rx.fd);
831
0
}
832
833
/* Suspend a receiver. Returns < 0 in case of failure, 0 if the receiver
834
 * was totally stopped, or > 0 if correctly suspended. Note that inherited FDs
835
 * are neither suspended nor resumed, we only enable/disable polling on them.
836
 */
837
static int tcp_suspend_receiver(struct receiver *rx)
838
0
{
839
0
  const struct sockaddr sa = { .sa_family = AF_UNSPEC };
840
0
  int ret;
841
842
  /* We never disconnect a shared FD otherwise we'd break it in the
843
   * parent process and any possible subsequent worker inheriting it.
844
   * Thus we just stop receiving from it.
845
   */
846
0
  if (rx->flags & RX_F_INHERITED)
847
0
    goto done;
848
849
0
  if (connect(rx->fd, &sa, sizeof(sa)) < 0)
850
0
    goto check_already_done;
851
0
 done:
852
0
  fd_stop_recv(rx->fd);
853
0
  return 1;
854
855
0
 check_already_done:
856
  /* in case one of the shutdown() above fails, it might be because we're
857
   * dealing with a socket that is shared with other processes doing the
858
   * same. Let's check if it's still accepting connections.
859
   */
860
0
  ret = sock_accepting_conn(rx);
861
0
  if (ret <= 0) {
862
    /* unrecoverable or paused by another process */
863
0
    fd_stop_recv(rx->fd);
864
0
    return ret == 0;
865
0
  }
866
867
  /* still listening, that's not good */
868
0
  return -1;
869
0
}
870
871
/* Resume a receiver. Returns < 0 in case of failure, 0 if the receiver
872
 * was totally stopped, or > 0 if correctly resumed. Note that inherited FDs
873
 * are neither suspended nor resumed, we only enable/disable polling on them.
874
 */
875
static int tcp_resume_receiver(struct receiver *rx)
876
0
{
877
0
  struct listener *l = LIST_ELEM(rx, struct listener *, rx);
878
879
0
  if (rx->fd < 0)
880
0
    return 0;
881
882
0
  if ((rx->flags & RX_F_INHERITED) || listen(rx->fd, listener_backlog(l)) == 0) {
883
0
    fd_want_recv(l->rx.fd);
884
0
    return 1;
885
0
  }
886
0
  return -1;
887
0
}
888
889
#ifdef TCP_INFO
890
/* Returns some tcp_info data if it's available for <conn> connection into <*info>.
891
 * "info_num" represents the required value.
892
 * If the function fails it returns 0, otherwise it returns 1 and "result" is filled.
893
 */
894
static int tcp_get_info(struct connection *conn, long long int *info, int info_num)
895
0
{
896
0
  struct tcp_info tcp_info;
897
0
  socklen_t optlen;
898
899
  /* The fd may not be available for the tcp_info struct, and the
900
    syscal can fail. */
901
0
  optlen = sizeof(tcp_info);
902
0
  if ((conn->flags & CO_FL_FDLESS) ||
903
0
      getsockopt(conn->handle.fd, IPPROTO_TCP, TCP_INFO, &tcp_info, &optlen) == -1)
904
0
    return 0;
905
906
0
  switch (info_num) {
907
#if defined(__APPLE__)
908
  case 0:  *info = tcp_info.tcpi_rttcur;         break;
909
  case 1:  *info = tcp_info.tcpi_rttvar;         break;
910
  case 2:  *info = tcp_info.tcpi_tfo_syn_data_acked; break;
911
  case 4:  *info = tcp_info.tcpi_tfo_syn_loss;   break;
912
  case 5:  *info = tcp_info.tcpi_rto;            break;
913
#else
914
  /* all other platforms supporting TCP_INFO have these ones */
915
0
  case 0:  *info = tcp_info.tcpi_rtt;            break;
916
0
  case 1:  *info = tcp_info.tcpi_rttvar;         break;
917
0
# if defined(__linux__)
918
  /* these ones are common to all Linux versions */
919
0
  case 2:  *info = tcp_info.tcpi_unacked;        break;
920
0
  case 3:  *info = tcp_info.tcpi_sacked;         break;
921
0
  case 4:  *info = tcp_info.tcpi_lost;           break;
922
0
  case 5:  *info = tcp_info.tcpi_retrans;        break;
923
0
  case 6:  *info = tcp_info.tcpi_fackets;        break;
924
0
  case 7:  *info = tcp_info.tcpi_reordering;     break;
925
# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
926
  /* the ones are found on FreeBSD, NetBSD and OpenBSD featuring TCP_INFO */
927
  case 2:  *info = tcp_info.__tcpi_unacked;      break;
928
  case 3:  *info = tcp_info.__tcpi_sacked;       break;
929
  case 4:  *info = tcp_info.__tcpi_lost;         break;
930
  case 5:  *info = tcp_info.__tcpi_retrans;      break;
931
  case 6:  *info = tcp_info.__tcpi_fackets;      break;
932
  case 7:  *info = tcp_info.__tcpi_reordering;   break;
933
# endif
934
0
#endif // apple
935
0
  default: return 0;
936
0
  }
937
938
0
  return 1;
939
0
}
940
#else
941
static int tcp_get_info(struct connection *conn, long long int *info, int info_num)
942
{
943
  return 0;
944
}
945
#endif /* TCP_INFO */
946
947
948
/*
949
 * Local variables:
950
 *  c-indent-level: 8
951
 *  c-basic-offset: 8
952
 * End:
953
 */