Coverage Report

Created: 2025-07-23 07:16

/src/wget/src/connect.c
Line
Count
Source (jump to first uncovered line)
1
/* Establishing and handling network connections.
2
   Copyright (C) 1995-2011, 2015, 2018-2024 Free Software Foundation,
3
   Inc.
4
5
This file is part of GNU Wget.
6
7
GNU Wget is free software; you can redistribute it and/or modify
8
it under the terms of the GNU General Public License as published by
9
the Free Software Foundation; either version 3 of the License, or
10
 (at your option) any later version.
11
12
GNU Wget is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
GNU General Public License for more details.
16
17
You should have received a copy of the GNU General Public License
18
along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19
20
Additional permission under GNU GPL version 3 section 7
21
22
If you modify this program, or any covered work, by linking or
23
combining it with the OpenSSL project's OpenSSL library (or a
24
modified version of that library), containing parts covered by the
25
terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26
grants you additional permission to convey the resulting work.
27
Corresponding Source for a non-source form of such a combination
28
shall include the source code for the parts of OpenSSL used as well
29
as that of the covered work.  */
30
31
#include "wget.h"
32
33
#include "exits.h"
34
#include <stdio.h>
35
#include <stdlib.h>
36
#include <unistd.h>
37
#include <assert.h>
38
39
#include <sys/socket.h>
40
#include <sys/select.h>
41
42
#ifndef WINDOWS
43
# ifdef __VMS
44
#  include "vms_ip.h"
45
# else /* def __VMS */
46
#  include <netdb.h>
47
# endif /* def __VMS [else] */
48
# include <netinet/in.h>
49
# ifndef __BEOS__
50
#  include <arpa/inet.h>
51
# endif
52
#endif /* not WINDOWS */
53
54
#include <errno.h>
55
#include <string.h>
56
#include <sys/time.h>
57
58
#include "utils.h"
59
#include "host.h"
60
#include "connect.h"
61
#include "hash.h"
62
63
#include <stdint.h>
64
65
/* Define sockaddr_storage where unavailable (presumably on IPv4-only
66
   hosts).  */
67
68
#ifndef ENABLE_IPV6
69
# ifndef HAVE_STRUCT_SOCKADDR_STORAGE
70
#  define sockaddr_storage sockaddr_in
71
# endif
72
#endif /* ENABLE_IPV6 */
73
74
/* Fill SA as per the data in IP and PORT.  SA should point to struct
75
   sockaddr_storage if ENABLE_IPV6 is defined, to struct sockaddr_in
76
   otherwise.  */
77
78
static void
79
sockaddr_set_data (struct sockaddr *sa, const ip_address *ip, int port)
80
0
{
81
0
  switch (ip->family)
82
0
    {
83
0
    case AF_INET:
84
0
      {
85
0
        struct sockaddr_in *sin = (struct sockaddr_in *)sa;
86
0
        xzero (*sin);
87
0
        sin->sin_family = AF_INET;
88
0
        sin->sin_port = htons (port);
89
0
        sin->sin_addr = ip->data.d4;
90
0
        break;
91
0
      }
92
0
#ifdef ENABLE_IPV6
93
0
    case AF_INET6:
94
0
      {
95
0
        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
96
0
        xzero (*sin6);
97
0
        sin6->sin6_family = AF_INET6;
98
0
        sin6->sin6_port = htons (port);
99
0
        sin6->sin6_addr = ip->data.d6;
100
0
#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
101
0
        sin6->sin6_scope_id = ip->ipv6_scope;
102
0
#endif
103
0
        break;
104
0
      }
105
0
#endif /* ENABLE_IPV6 */
106
0
    default:
107
0
      abort ();
108
0
    }
109
0
}
110
111
/* Get the data of SA, specifically the IP address and the port.  If
112
   you're not interested in one or the other information, pass NULL as
113
   the pointer.  */
114
115
static void
116
sockaddr_get_data (const struct sockaddr *sa, ip_address *ip, int *port)
117
0
{
118
0
  switch (sa->sa_family)
119
0
    {
120
0
    case AF_INET:
121
0
      {
122
0
        struct sockaddr_in *sin = (struct sockaddr_in *)sa;
123
0
        if (ip)
124
0
          {
125
0
            ip->family = AF_INET;
126
0
            ip->data.d4 = sin->sin_addr;
127
0
          }
128
0
        if (port)
129
0
          *port = ntohs (sin->sin_port);
130
0
        break;
131
0
      }
132
0
#ifdef ENABLE_IPV6
133
0
    case AF_INET6:
134
0
      {
135
0
        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
136
0
        if (ip)
137
0
          {
138
0
            ip->family = AF_INET6;
139
0
            ip->data.d6 = sin6->sin6_addr;
140
0
#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
141
0
            ip->ipv6_scope = sin6->sin6_scope_id;
142
0
#endif
143
0
          }
144
0
        if (port)
145
0
          *port = ntohs (sin6->sin6_port);
146
0
        break;
147
0
      }
148
0
#endif
149
0
    default:
150
0
      abort ();
151
0
    }
152
0
}
153
154
/* Return the size of the sockaddr structure depending on its
155
   family.  */
156
157
static socklen_t
158
sockaddr_size (const struct sockaddr *sa)
159
0
{
160
0
  switch (sa->sa_family)
161
0
    {
162
0
    case AF_INET:
163
0
      return sizeof (struct sockaddr_in);
164
0
#ifdef ENABLE_IPV6
165
0
    case AF_INET6:
166
0
      return sizeof (struct sockaddr_in6);
167
0
#endif
168
0
    default:
169
0
      abort ();
170
0
    }
171
0
}
172
173
/* Resolve the bind address specified via --bind-address and store it
174
   to SA.  The resolved value is stored in a static variable and
175
   reused after the first invocation of this function.
176
177
   Returns true on success, false on failure.  */
178
179
static bool
180
resolve_bind_address (struct sockaddr *sa)
181
0
{
182
0
  struct address_list *al;
183
184
  /* Make sure this is called only once.  opt.bind_address doesn't
185
     change during a Wget run.  */
186
0
  static bool called, should_bind;
187
0
  static ip_address ip;
188
0
  if (called)
189
0
    {
190
0
      if (should_bind)
191
0
        sockaddr_set_data (sa, &ip, 0);
192
0
      return should_bind;
193
0
    }
194
0
  called = true;
195
196
0
  al = lookup_host (opt.bind_address, LH_BIND | LH_SILENT);
197
0
  if (!al)
198
0
    {
199
      /* #### We should be able to print the error message here. */
200
0
      logprintf (LOG_NOTQUIET,
201
0
                 _("%s: unable to resolve bind address %s; disabling bind.\n"),
202
0
                 exec_name, quote (opt.bind_address));
203
0
      should_bind = false;
204
0
      return false;
205
0
    }
206
207
  /* Pick the first address in the list and use it as bind address.
208
     Perhaps we should try multiple addresses in succession, but I
209
     don't think that's necessary in practice.  */
210
0
  ip = *address_list_address_at (al, 0);
211
0
  address_list_release (al);
212
213
0
  sockaddr_set_data (sa, &ip, 0);
214
0
  should_bind = true;
215
0
  return true;
216
0
}
217
218
struct cwt_context {
219
  int fd;
220
  const struct sockaddr *addr;
221
  socklen_t addrlen;
222
  int result;
223
};
224
225
static void
226
connect_with_timeout_callback (void *arg)
227
0
{
228
0
  struct cwt_context *ctx = (struct cwt_context *)arg;
229
0
  ctx->result = connect (ctx->fd, ctx->addr, ctx->addrlen);
230
0
}
231
232
/* Like connect, but specifies a timeout.  If connecting takes longer
233
   than TIMEOUT seconds, -1 is returned and errno is set to
234
   ETIMEDOUT.  */
235
236
static int
237
connect_with_timeout (int fd, const struct sockaddr *addr, socklen_t addrlen,
238
                      double timeout)
239
0
{
240
0
  struct cwt_context ctx;
241
0
  ctx.fd = fd;
242
0
  ctx.addr = addr;
243
0
  ctx.addrlen = addrlen;
244
245
0
  if (run_with_timeout (timeout, connect_with_timeout_callback, &ctx))
246
0
    {
247
0
      errno = ETIMEDOUT;
248
0
      return -1;
249
0
    }
250
0
  if (ctx.result == -1 && errno == EINTR)
251
0
    errno = ETIMEDOUT;
252
0
  return ctx.result;
253
0
}
254
255
/* Connect via TCP to the specified address and port.
256
257
   If PRINT is non-NULL, it is the host name to print that we're
258
   connecting to.  */
259
260
int
261
connect_to_ip (const ip_address *ip, int port, const char *print)
262
0
{
263
0
  struct sockaddr_storage ss;
264
0
  struct sockaddr *sa = (struct sockaddr *)&ss;
265
0
  int sock;
266
267
  /* If PRINT is non-NULL, print the "Connecting to..." line, with
268
     PRINT being the host name we're connecting to.  */
269
0
  if (print)
270
0
    {
271
0
      const char *txt_addr = print_address (ip);
272
0
      if (0 != strcmp (print, txt_addr))
273
0
        {
274
0
          char *str = NULL, *name;
275
276
0
          if (opt.enable_iri && (name = idn_decode ((char *) print)) != NULL)
277
0
            {
278
0
              str = aprintf ("%s (%s)", name, print);
279
0
              xfree (name);
280
0
            }
281
282
0
          logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "),
283
0
                     str ? str : escnonprint_uri (print), txt_addr, port);
284
285
0
          xfree (str);
286
0
        }
287
0
      else
288
0
        {
289
0
           if (ip->family == AF_INET)
290
0
               logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port);
291
0
#ifdef ENABLE_IPV6
292
0
           else if (ip->family == AF_INET6)
293
0
               logprintf (LOG_VERBOSE, _("Connecting to [%s]:%d... "), txt_addr, port);
294
0
#endif
295
0
        }
296
0
    }
297
298
  /* Store the sockaddr info to SA.  */
299
0
  sockaddr_set_data (sa, ip, port);
300
301
  /* Create the socket of the family appropriate for the address.  */
302
0
  sock = socket (sa->sa_family, SOCK_STREAM, 0);
303
0
  if (sock < 0)
304
0
    goto err;
305
306
0
#if defined(ENABLE_IPV6) && defined(IPV6_V6ONLY)
307
0
  if (opt.ipv6_only) {
308
0
    int on = 1;
309
    /* In case of error, we will go on anyway... */
310
0
    int err = setsockopt (sock, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof (on));
311
0
    IF_DEBUG
312
0
      if (err < 0)
313
0
        DEBUGP (("Failed setting IPV6_V6ONLY: %s", strerror (errno)));
314
0
  }
315
0
#endif
316
317
  /* For very small rate limits, set the buffer size (and hence,
318
     hopefully, the kernel's TCP window size) to the per-second limit.
319
     That way we should never have to sleep for more than 1s between
320
     network reads.  */
321
0
  if (opt.limit_rate && opt.limit_rate < 8192)
322
0
    {
323
0
      int bufsize = opt.limit_rate;
324
0
      if (bufsize < 512)
325
0
        bufsize = 512;          /* avoid pathologically small values */
326
0
#ifdef SO_RCVBUF
327
0
      if (setsockopt (sock, SOL_SOCKET, SO_RCVBUF,
328
0
                  (void *) &bufsize, (socklen_t) sizeof (bufsize)))
329
0
        logprintf (LOG_NOTQUIET, _("setsockopt SO_RCVBUF failed: %s\n"),
330
0
                   strerror (errno));
331
0
#endif
332
      /* When we add limit_rate support for writing, which is useful
333
         for POST, we should also set SO_SNDBUF here.  */
334
0
    }
335
336
0
  if (opt.bind_address)
337
0
    {
338
      /* Bind the client side of the socket to the requested
339
         address.  */
340
0
      struct sockaddr_storage bind_ss;
341
0
      struct sockaddr *bind_sa = (struct sockaddr *)&bind_ss;
342
0
      if (resolve_bind_address (bind_sa))
343
0
        {
344
0
          if (bind (sock, bind_sa, sockaddr_size (bind_sa)) < 0)
345
0
            goto err;
346
0
        }
347
0
    }
348
349
  /* Connect the socket to the remote endpoint.  */
350
0
  if (connect_with_timeout (sock, sa, sockaddr_size (sa),
351
0
                            opt.connect_timeout) < 0)
352
0
    goto err;
353
354
  /* Success. */
355
0
  assert (sock >= 0);
356
0
  if (print)
357
0
    logprintf (LOG_VERBOSE, _("connected.\n"));
358
0
  DEBUGP (("Created socket %d.\n", sock));
359
0
  return sock;
360
361
0
 err:
362
0
  {
363
    /* Protect errno from possible modifications by close and
364
       logprintf.  */
365
0
    int save_errno = errno;
366
0
    if (sock >= 0)
367
0
      {
368
#ifdef WIN32
369
  /* If the connection timed out, fd_close will hang in Gnulib's
370
     close_fd_maybe_socket, inside the call to WSAEnumNetworkEvents.  */
371
  if (errno != ETIMEDOUT)
372
#endif
373
0
    fd_close (sock);
374
0
      }
375
0
    if (print)
376
0
      logprintf (LOG_NOTQUIET, _("failed: %s.\n"), strerror (errno));
377
0
    errno = save_errno;
378
0
    return -1;
379
0
  }
380
0
}
381
382
/* Connect via TCP to a remote host on the specified port.
383
384
   HOST is resolved as an Internet host name.  If HOST resolves to
385
   more than one IP address, they are tried in the order returned by
386
   DNS until connecting to one of them succeeds.  */
387
388
int
389
connect_to_host (const char *host, int port)
390
0
{
391
0
  int i, start, end;
392
0
  int sock;
393
394
0
  struct address_list *al = lookup_host (host, 0);
395
396
0
 retry:
397
0
  if (!al)
398
0
    {
399
0
      logprintf (LOG_NOTQUIET,
400
0
                 _("%s: unable to resolve host address %s\n"),
401
0
                 exec_name, quote (host));
402
0
      return E_HOST;
403
0
    }
404
405
0
  address_list_get_bounds (al, &start, &end);
406
0
  for (i = start; i < end; i++)
407
0
    {
408
0
      const ip_address *ip = address_list_address_at (al, i);
409
0
      sock = connect_to_ip (ip, port, host);
410
0
      if (sock >= 0)
411
0
        {
412
          /* Success. */
413
0
          address_list_set_connected (al);
414
0
          address_list_release (al);
415
0
          return sock;
416
0
        }
417
418
      /* The attempt to connect has failed.  Continue with the loop
419
         and try next address. */
420
421
0
      address_list_set_faulty (al, i);
422
0
    }
423
424
  /* Failed to connect to any of the addresses in AL. */
425
426
0
  if (address_list_connected_p (al))
427
0
    {
428
      /* We connected to AL before, but cannot do so now.  That might
429
         indicate that our DNS cache entry for HOST has expired.  */
430
0
      address_list_release (al);
431
0
      al = lookup_host (host, LH_REFRESH);
432
0
      goto retry;
433
0
    }
434
0
  address_list_release (al);
435
436
0
  return -1;
437
0
}
438
439
/* Create a socket, bind it to local interface BIND_ADDRESS on port
440
   *PORT, set up a listen backlog, and return the resulting socket, or
441
   -1 in case of error.
442
443
   BIND_ADDRESS is the address of the interface to bind to.  If it is
444
   NULL, the socket is bound to the default address.  PORT should
445
   point to the port number that will be used for the binding.  If
446
   that number is 0, the system will choose a suitable port, and the
447
   chosen value will be written to *PORT.
448
449
   Calling accept() on such a socket waits for and accepts incoming
450
   TCP connections.  */
451
452
int
453
bind_local (const ip_address *bind_address, int *port)
454
0
{
455
0
  int sock;
456
0
  struct sockaddr_storage ss;
457
0
  struct sockaddr *sa = (struct sockaddr *)&ss;
458
459
  /* For setting options with setsockopt. */
460
0
  int setopt_val = 1;
461
0
  void *setopt_ptr = (void *)&setopt_val;
462
0
  socklen_t setopt_size = sizeof (setopt_val);
463
464
0
  sock = socket (bind_address->family, SOCK_STREAM, 0);
465
0
  if (sock < 0)
466
0
    return -1;
467
468
0
#ifdef SO_REUSEADDR
469
0
  if (setsockopt (sock, SOL_SOCKET, SO_REUSEADDR, setopt_ptr, setopt_size))
470
0
    logprintf (LOG_NOTQUIET, _("setsockopt SO_REUSEADDR failed: %s\n"),
471
0
               strerror (errno));
472
0
#endif
473
474
0
  xzero (ss);
475
0
  sockaddr_set_data (sa, bind_address, *port);
476
0
  if (bind (sock, sa, sockaddr_size (sa)) < 0)
477
0
    {
478
0
      fd_close (sock);
479
0
      return -1;
480
0
    }
481
0
  DEBUGP (("Local socket fd %d bound.\n", sock));
482
483
  /* If *PORT is 0, find out which port we've bound to.  */
484
0
  if (*port == 0)
485
0
    {
486
0
      socklen_t addrlen = sockaddr_size (sa);
487
0
      if (getsockname (sock, sa, &addrlen) < 0)
488
0
        {
489
          /* If we can't find out the socket's local address ("name"),
490
             something is seriously wrong with the socket, and it's
491
             unusable for us anyway because we must know the chosen
492
             port.  */
493
0
          fd_close (sock);
494
0
          return -1;
495
0
        }
496
0
      sockaddr_get_data (sa, NULL, port);
497
0
      DEBUGP (("binding to address %s using port %i.\n",
498
0
               print_address (bind_address), *port));
499
0
    }
500
0
  if (listen (sock, 1) < 0)
501
0
    {
502
0
      fd_close (sock);
503
0
      return -1;
504
0
    }
505
0
  return sock;
506
0
}
507
508
/* Like a call to accept(), but with the added check for timeout.
509
510
   In other words, accept a client connection on LOCAL_SOCK, and
511
   return the new socket used for communication with the client.
512
   LOCAL_SOCK should have been bound, e.g. using bind_local().
513
514
   The caller is blocked until a connection is established.  If no
515
   connection is established for opt.connect_timeout seconds, the
516
   function exits with an error status.  */
517
518
int
519
accept_connection (int local_sock)
520
0
{
521
0
  int sock;
522
523
  /* We don't need the values provided by accept, but accept
524
     apparently requires them to be present.  */
525
0
  struct sockaddr_storage ss;
526
0
  struct sockaddr *sa = (struct sockaddr *)&ss;
527
0
  socklen_t addrlen = sizeof (ss);
528
529
0
  if (opt.connect_timeout)
530
0
    {
531
0
      int test = select_fd (local_sock, opt.connect_timeout, WAIT_FOR_READ);
532
0
      if (test == 0)
533
0
        errno = ETIMEDOUT;
534
0
      if (test <= 0)
535
0
        return -1;
536
0
    }
537
0
  sock = accept (local_sock, sa, &addrlen);
538
0
  DEBUGP (("Accepted client at socket %d.\n", sock));
539
0
  return sock;
540
0
}
541
542
/* Get the IP address associated with the connection on FD and store
543
   it to IP.  Return true on success, false otherwise.
544
545
   If ENDPOINT is ENDPOINT_LOCAL, it returns the address of the local
546
   (client) side of the socket.  Else if ENDPOINT is ENDPOINT_PEER, it
547
   returns the address of the remote (peer's) side of the socket.  */
548
549
bool
550
socket_ip_address (int sock, ip_address *ip, int endpoint)
551
0
{
552
0
  struct sockaddr_storage storage;
553
0
  struct sockaddr *sockaddr = (struct sockaddr *) &storage;
554
0
  socklen_t addrlen = sizeof (storage);
555
0
  int ret;
556
557
0
  memset (sockaddr, 0, addrlen);
558
0
  if (endpoint == ENDPOINT_LOCAL)
559
0
    ret = getsockname (sock, sockaddr, &addrlen);
560
0
  else if (endpoint == ENDPOINT_PEER)
561
0
    ret = getpeername (sock, sockaddr, &addrlen);
562
0
  else
563
0
    abort ();
564
0
  if (ret < 0)
565
0
    return false;
566
567
0
  memset(ip, 0, sizeof(ip_address));
568
0
  ip->family = sockaddr->sa_family;
569
0
  switch (sockaddr->sa_family)
570
0
    {
571
0
#ifdef ENABLE_IPV6
572
0
    case AF_INET6:
573
0
      {
574
0
        struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)&storage;
575
0
        ip->data.d6 = sa6->sin6_addr;
576
0
#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
577
0
        ip->ipv6_scope = sa6->sin6_scope_id;
578
0
#endif
579
0
        DEBUGP (("conaddr is: %s\n", print_address (ip)));
580
0
        return true;
581
0
      }
582
0
#endif
583
0
    case AF_INET:
584
0
      {
585
0
        struct sockaddr_in *sa = (struct sockaddr_in *)&storage;
586
0
        ip->data.d4 = sa->sin_addr;
587
0
        DEBUGP (("conaddr is: %s\n", print_address (ip)));
588
0
        return true;
589
0
      }
590
0
    default:
591
0
      abort ();
592
0
    }
593
0
}
594
595
/* Get the socket family of connection on FD and store
596
   Return family type on success, -1 otherwise.
597
598
   If ENDPOINT is ENDPOINT_LOCAL, it returns the sock family of the local
599
   (client) side of the socket.  Else if ENDPOINT is ENDPOINT_PEER, it
600
   returns the sock family of the remote (peer's) side of the socket.  */
601
602
int
603
socket_family (int sock, int endpoint)
604
0
{
605
0
  struct sockaddr_storage storage;
606
0
  struct sockaddr *sockaddr = (struct sockaddr *) &storage;
607
0
  socklen_t addrlen = sizeof (storage);
608
0
  int ret;
609
610
0
  memset (sockaddr, 0, addrlen);
611
612
0
  if (endpoint == ENDPOINT_LOCAL)
613
0
    ret = getsockname (sock, sockaddr, &addrlen);
614
0
  else if (endpoint == ENDPOINT_PEER)
615
0
    ret = getpeername (sock, sockaddr, &addrlen);
616
0
  else
617
0
    abort ();
618
619
0
  if (ret < 0)
620
0
    return -1;
621
622
0
  return sockaddr->sa_family;
623
0
}
624
625
/* Return true if the error from the connect code can be considered
626
   retryable.  Wget normally retries after errors, but the exception
627
   are the "unsupported protocol" type errors (possible on IPv4/IPv6
628
   dual family systems) and "connection refused".  */
629
630
bool
631
retryable_socket_connect_error (int err)
632
0
{
633
  /* Have to guard against some of these values not being defined.
634
     Cannot use a switch statement because some of the values might be
635
     equal.  */
636
0
  if (false
637
0
#ifdef EAFNOSUPPORT
638
0
      || err == EAFNOSUPPORT
639
0
#endif
640
0
#ifdef EPFNOSUPPORT
641
0
      || err == EPFNOSUPPORT
642
0
#endif
643
0
#ifdef ESOCKTNOSUPPORT          /* no, "sockt" is not a typo! */
644
0
      || err == ESOCKTNOSUPPORT
645
0
#endif
646
0
#ifdef EPROTONOSUPPORT
647
0
      || err == EPROTONOSUPPORT
648
0
#endif
649
0
#ifdef ENOPROTOOPT
650
0
      || err == ENOPROTOOPT
651
0
#endif
652
      /* Apparently, older versions of Linux and BSD used EINVAL
653
         instead of EAFNOSUPPORT and such.  */
654
0
      || err == EINVAL
655
0
      )
656
0
    return false;
657
658
0
  if (!opt.retry_connrefused)
659
0
    if (err == ECONNREFUSED
660
0
#ifdef ENETUNREACH
661
0
        || err == ENETUNREACH   /* network is unreachable */
662
0
#endif
663
0
#ifdef EHOSTUNREACH
664
0
        || err == EHOSTUNREACH  /* host is unreachable */
665
0
#endif
666
0
        )
667
0
      return false;
668
669
0
  return true;
670
0
}
671
672
/* Wait for a single descriptor to become available, timing out after
673
   MAXTIME seconds.  Returns 1 if FD is available, 0 for timeout and
674
   -1 for error.  The argument WAIT_FOR can be a combination of
675
   WAIT_FOR_READ and WAIT_FOR_WRITE.
676
677
   This is a mere convenience wrapper around the select call, and
678
   should be taken as such (for example, it doesn't implement Wget's
679
   0-timeout-means-no-timeout semantics.)  */
680
681
static int
682
select_fd_internal (int fd, double maxtime, int wait_for, bool convert_back _GL_UNUSED)
683
0
{
684
0
  fd_set fdset;
685
0
  fd_set *rd = NULL, *wr = NULL;
686
0
  struct timeval tmout;
687
0
  int result;
688
689
0
  if (fd < 0)
690
0
    return -1;
691
692
0
  if (fd >= FD_SETSIZE)
693
0
    {
694
0
      logprintf (LOG_NOTQUIET, _("Too many fds open.  Cannot use select on a fd >= %d\n"), FD_SETSIZE);
695
0
      exit (WGET_EXIT_GENERIC_ERROR);
696
0
    }
697
0
  FD_ZERO (&fdset);
698
0
  FD_SET (fd, &fdset);
699
0
  if (wait_for & WAIT_FOR_READ)
700
0
    rd = &fdset;
701
0
  if (wait_for & WAIT_FOR_WRITE)
702
0
    wr = &fdset;
703
704
0
  tmout.tv_sec = (long) maxtime;
705
0
  tmout.tv_usec = 1000000 * (maxtime - (long) maxtime);
706
707
0
  do
708
0
  {
709
0
    result = select (fd + 1, rd, wr, NULL, &tmout);
710
#ifdef WINDOWS
711
    /* gnulib select() converts blocking sockets to nonblocking in windows.
712
       wget uses blocking sockets so we must convert them back to blocking.  */
713
    if (convert_back)
714
      set_windows_fd_as_blocking_socket (fd);
715
#endif
716
0
  }
717
0
  while (result < 0 && errno == EINTR);
718
719
0
  return result;
720
0
}
721
722
int
723
select_fd (int fd, double maxtime, int wait_for)
724
0
{
725
0
  return select_fd_internal (fd, maxtime, wait_for, true);
726
0
}
727
728
#ifdef WINDOWS
729
int
730
select_fd_nb (int fd, double maxtime, int wait_for)
731
{
732
  return select_fd_internal (fd, maxtime, wait_for, false);
733
}
734
#endif
735
736
/* Return true if the connection to the remote site established
737
   through SOCK is still open.
738
739
   Specifically, this function returns true if SOCK is not ready for
740
   reading.  This is because, when the connection closes, the socket
741
   is ready for reading because EOF is about to be delivered.  A side
742
   effect of this method is that sockets that have pending data are
743
   considered non-open.  This is actually a good thing for callers of
744
   this function, where such pending data can only be unwanted
745
   leftover from a previous request.  */
746
747
bool
748
test_socket_open (int sock)
749
0
{
750
0
  fd_set check_set;
751
0
  struct timeval to;
752
0
  int ret = 0;
753
754
0
  if (sock >= FD_SETSIZE)
755
0
    {
756
0
      logprintf (LOG_NOTQUIET, _("Too many fds open.  Cannot use select on a fd >= %d\n"), FD_SETSIZE);
757
0
      exit (WGET_EXIT_GENERIC_ERROR);
758
0
    }
759
  /* Check if we still have a valid (non-EOF) connection.  From Andrew
760
   * Maholski's code in the Unix Socket FAQ.  */
761
762
0
  FD_ZERO (&check_set);
763
0
  FD_SET (sock, &check_set);
764
765
  /* Wait one microsecond */
766
0
  to.tv_sec = 0;
767
0
  to.tv_usec = 1;
768
769
0
  ret = select (sock + 1, &check_set, NULL, NULL, &to);
770
#ifdef WINDOWS
771
/* gnulib select() converts blocking sockets to nonblocking in windows.
772
wget uses blocking sockets so we must convert them back to blocking
773
*/
774
  set_windows_fd_as_blocking_socket ( sock );
775
#endif
776
777
0
  if ( !ret )
778
    /* We got a timeout, it means we're still connected. */
779
0
    return true;
780
0
  else
781
    /* Read now would not wait, it means we have either pending data
782
       or EOF/error. */
783
0
    return false;
784
0
}
785
786
/* Basic socket operations, mostly EINTR wrappers.  */
787
788
static int
789
sock_read (int fd, char *buf, int bufsize)
790
0
{
791
0
  int res;
792
0
  do
793
0
    res = read (fd, buf, bufsize);
794
0
  while (res == -1 && errno == EINTR);
795
0
  return res;
796
0
}
797
798
static int
799
sock_write (int fd, char *buf, int bufsize)
800
0
{
801
0
  int res;
802
0
  do
803
0
    res = write (fd, buf, bufsize);
804
0
  while (res == -1 && errno == EINTR);
805
0
  return res;
806
0
}
807
808
static int
809
sock_poll (int fd, double timeout, int wait_for)
810
0
{
811
0
  return select_fd (fd, timeout, wait_for);
812
0
}
813
814
static int
815
sock_peek (int fd, char *buf, int bufsize)
816
0
{
817
0
  int res;
818
0
  do
819
0
    res = recv (fd, buf, bufsize, MSG_PEEK);
820
0
  while (res == -1 && errno == EINTR);
821
0
  return res;
822
0
}
823
824
static void
825
sock_close (int fd)
826
0
{
827
0
  close (fd);
828
0
  DEBUGP (("Closed fd %d\n", fd));
829
0
}
830
#undef read
831
#undef write
832
#undef close
833
834
/* Reading and writing from the network.  We build around the socket
835
   (file descriptor) API, but support "extended" operations for things
836
   that are not mere file descriptors under the hood, such as SSL
837
   sockets.
838
839
   That way the user code can call fd_read(fd, ...) and we'll run read
840
   or SSL_read or whatever is necessary.  */
841
842
static struct hash_table *transport_map;
843
static unsigned int transport_map_modified_tick;
844
845
struct transport_info {
846
  struct transport_implementation *imp;
847
  void *ctx;
848
};
849
850
/* Register the transport layer operations that will be used when
851
   reading, writing, and polling FD.
852
853
   This should be used for transport layers like SSL that piggyback on
854
   sockets.  FD should otherwise be a real socket, on which you can
855
   call getpeername, etc.  */
856
857
void
858
fd_register_transport (int fd, struct transport_implementation *imp, void *ctx)
859
349
{
860
349
  struct transport_info *info;
861
862
  /* The file descriptor must be non-negative to be registered.
863
     Negative values are ignored by fd_close(), and -1 cannot be used as
864
     hash key.  */
865
349
  assert (fd >= 0);
866
867
349
  info = xnew (struct transport_info);
868
349
  info->imp = imp;
869
349
  info->ctx = ctx;
870
349
  if (!transport_map)
871
349
    transport_map = hash_table_new (0, NULL, NULL);
872
349
  hash_table_put (transport_map, (void *)(intptr_t) fd, info);
873
349
  ++transport_map_modified_tick;
874
349
}
875
876
/* Return context of the transport registered with
877
   fd_register_transport.  This assumes fd_register_transport was
878
   previously called on FD.  */
879
880
void *
881
fd_transport_context (int fd)
882
0
{
883
0
  struct transport_info *info = hash_table_get (transport_map, (void *)(intptr_t) fd);
884
0
  return info ? info->ctx : NULL;
885
0
}
886
887
/* When fd_read/fd_write are called multiple times in a loop, they should
888
   remember the INFO pointer instead of fetching it every time.  It is
889
   not enough to compare FD to LAST_FD because FD might have been
890
   closed and reopened.  modified_tick ensures that changes to
891
   transport_map will not be unnoticed.
892
893
   This is a macro because we want the static storage variables to be
894
   per-function.  */
895
896
3.76k
#define LAZY_RETRIEVE_INFO(info) do {                                   \
897
3.76k
  static struct transport_info *last_info;                              \
898
3.76k
  static int last_fd = -1;                                              \
899
3.76k
  static unsigned int last_tick;                                        \
900
3.76k
  if (!transport_map)                                                   \
901
3.76k
    info = NULL;                                                        \
902
3.76k
  else if (last_fd == fd && last_tick == transport_map_modified_tick)   \
903
3.76k
    info = last_info;                                                   \
904
3.76k
  else                                                                  \
905
3.76k
    {                                                                   \
906
698
      info = hash_table_get (transport_map, (void *)(intptr_t) fd);     \
907
698
      last_fd = fd;                                                     \
908
698
      last_info = info;                                                 \
909
698
      last_tick = transport_map_modified_tick;                          \
910
698
    }                                                                   \
911
3.76k
} while (0)
912
913
static bool
914
poll_internal (int fd, struct transport_info *info, int wf, double timeout)
915
0
{
916
0
  if (timeout == -1)
917
0
    timeout = opt.read_timeout;
918
0
  if (timeout)
919
0
    {
920
0
      int test;
921
0
      if (info && info->imp->poller)
922
0
        test = info->imp->poller (fd, timeout, wf, info->ctx);
923
0
      else
924
0
        test = sock_poll (fd, timeout, wf);
925
0
      if (test == 0)
926
0
        errno = ETIMEDOUT;
927
0
      if (test <= 0)
928
0
        return false;
929
0
    }
930
0
  return true;
931
0
}
932
933
/* Read no more than BUFSIZE bytes of data from FD, storing them to
934
   BUF.  If TIMEOUT is non-zero, the operation aborts if no data is
935
   received after that many seconds.  If TIMEOUT is -1, the value of
936
   opt.timeout is used for TIMEOUT.  */
937
938
int
939
fd_read (int fd, char *buf, int bufsize, double timeout)
940
1.62k
{
941
1.62k
  struct transport_info *info;
942
1.62k
  LAZY_RETRIEVE_INFO (info);
943
944
  /* let imp->reader take care about timeout.
945
     (or in worst case timeout can be 2*timeout) */
946
1.62k
  if (info && info->imp->reader)
947
1.62k
    return info->imp->reader (fd, buf, bufsize, info->ctx, timeout);
948
949
0
  if (!poll_internal (fd, info, WAIT_FOR_READ, timeout))
950
0
    return -1;
951
0
  return sock_read (fd, buf, bufsize);
952
0
}
953
954
/* Like fd_read, except it provides a "preview" of the data that will
955
   be read by subsequent calls to fd_read.  Specifically, it copies no
956
   more than BUFSIZE bytes of the currently available data to BUF and
957
   returns the number of bytes copied.  Return values and timeout
958
   semantics are the same as those of fd_read.
959
960
   CAVEAT: Do not assume that the first subsequent call to fd_read
961
   will retrieve the same amount of data.  Reading can return more or
962
   less data, depending on the TCP implementation and other
963
   circumstances.  However, barring an error, it can be expected that
964
   all the peeked data will eventually be read by fd_read.  */
965
966
int
967
fd_peek (int fd, char *buf, int bufsize, double timeout)
968
2.14k
{
969
2.14k
  struct transport_info *info;
970
2.14k
  LAZY_RETRIEVE_INFO (info);
971
972
2.14k
  if (info && info->imp->peeker)
973
2.14k
    return info->imp->peeker (fd, buf, bufsize, info->ctx, timeout);
974
975
0
  if (!poll_internal (fd, info, WAIT_FOR_READ, timeout))
976
0
    return -1;
977
0
  return sock_peek (fd, buf, bufsize);
978
0
}
979
980
/* Write the entire contents of BUF to FD.  If TIMEOUT is non-zero,
981
   the operation aborts if no data is received after that many
982
   seconds.  If TIMEOUT is -1, the value of opt.timeout is used for
983
   TIMEOUT.  */
984
985
int
986
fd_write (int fd, char *buf, int bufsize, double timeout)
987
0
{
988
0
  int res;
989
0
  struct transport_info *info;
990
0
  LAZY_RETRIEVE_INFO (info);
991
992
  /* `write' may write less than LEN bytes, thus the loop keeps trying
993
     it until all was written, or an error occurred.  */
994
0
  res = 0;
995
0
  while (bufsize > 0)
996
0
    {
997
0
      if (!poll_internal (fd, info, WAIT_FOR_WRITE, timeout))
998
0
        return -1;
999
0
      if (info && info->imp->writer)
1000
0
        res = info->imp->writer (fd, buf, bufsize, info->ctx);
1001
0
      else
1002
0
        res = sock_write (fd, buf, bufsize);
1003
0
      if (res <= 0)
1004
0
        break;
1005
0
      buf += res;
1006
0
      bufsize -= res;
1007
0
    }
1008
0
  return res;
1009
0
}
1010
1011
/* Report the most recent error(s) on FD.  This should only be called
1012
   after fd_* functions, such as fd_read and fd_write, and only if
1013
   they return a negative result.  For errors coming from other calls
1014
   such as setsockopt or fopen, strerror should continue to be
1015
   used.
1016
1017
   If the transport doesn't support error messages or doesn't supply
1018
   one, strerror(errno) is returned.  The returned error message
1019
   should not be used after fd_close has been called.  */
1020
1021
const char *
1022
fd_errstr (int fd)
1023
0
{
1024
  /* Don't bother with LAZY_RETRIEVE_INFO, as this will only be called
1025
     in case of error, never in a tight loop.  */
1026
0
  struct transport_info *info = NULL;
1027
1028
0
  if (transport_map)
1029
0
    info = hash_table_get (transport_map, (void *)(intptr_t) fd);
1030
1031
0
  if (info && info->imp->errstr)
1032
0
    {
1033
0
      const char *err = info->imp->errstr (fd, info->ctx);
1034
0
      if (err)
1035
0
        return err;
1036
      /* else, fall through and print the system error. */
1037
0
    }
1038
0
  return strerror (errno);
1039
0
}
1040
1041
/* Close the file descriptor FD.  */
1042
1043
void
1044
fd_close (int fd)
1045
0
{
1046
0
  struct transport_info *info;
1047
0
  if (fd < 0)
1048
0
    return;
1049
1050
  /* Don't use LAZY_RETRIEVE_INFO because fd_close() is only called once
1051
     per socket, so that particular optimization wouldn't work.  */
1052
0
  info = NULL;
1053
0
  if (transport_map)
1054
0
    info = hash_table_get (transport_map, (void *)(intptr_t) fd);
1055
1056
0
  if (info && info->imp->closer)
1057
0
    info->imp->closer (fd, info->ctx);
1058
0
  else
1059
0
    sock_close (fd);
1060
1061
0
  if (info)
1062
0
    {
1063
0
      hash_table_remove (transport_map, (void *)(intptr_t) fd);
1064
0
      xfree (info);
1065
0
      ++transport_map_modified_tick;
1066
0
    }
1067
0
}
1068
1069
#if defined DEBUG_MALLOC || defined TESTING
1070
void
1071
connect_cleanup(void)
1072
349
{
1073
349
  if (transport_map)
1074
349
    {
1075
349
      hash_table_iterator iter;
1076
698
      for (hash_table_iterate (transport_map, &iter); hash_table_iter_next (&iter); )
1077
349
        {
1078
349
          xfree (iter.value);
1079
349
        }
1080
349
      hash_table_destroy (transport_map);
1081
349
      transport_map = NULL;
1082
349
    }
1083
349
}
1084
#endif