Coverage Report

Created: 2026-02-14 07:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/wget/src/cookies.c
Line
Count
Source
1
/* Support for cookies.
2
   Copyright (C) 2001-2011, 2015, 2018-2024, 2026 Free Software
3
   Foundation, Inc.
4
5
This file is part of GNU Wget.
6
7
GNU Wget is free software; you can redistribute it and/or modify
8
it under the terms of the GNU General Public License as published by
9
the Free Software Foundation; either version 3 of the License, or (at
10
your option) any later version.
11
12
GNU Wget is distributed in the hope that it will be useful, but
13
WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
General Public License for more details.
16
17
You should have received a copy of the GNU General Public License
18
along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19
20
Additional permission under GNU GPL version 3 section 7
21
22
If you modify this program, or any covered work, by linking or
23
combining it with the OpenSSL project's OpenSSL library (or a
24
modified version of that library), containing parts covered by the
25
terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26
grants you additional permission to convey the resulting work.
27
Corresponding Source for a non-source form of such a combination
28
shall include the source code for the parts of OpenSSL used as well
29
as that of the covered work.  */
30
31
/* Written by Hrvoje Niksic.  Parts are loosely inspired by the
32
   cookie patch submitted by Tomasz Wegrzanowski.
33
34
   This implements the client-side cookie support, as specified
35
   (loosely) by Netscape's "preliminary specification", currently
36
   available at:
37
38
       http://wp.netscape.com/newsref/std/cookie_spec.html
39
40
   rfc2109 is not supported because of its incompatibilities with the
41
   above widely-used specification.  rfc2965 is entirely ignored,
42
   since popular client software doesn't implement it, and even the
43
   sites that do send Set-Cookie2 also emit Set-Cookie for
44
   compatibility.  */
45
46
#include "wget.h"
47
48
#include <stdint.h>
49
#include <stdio.h>
50
#include <string.h>
51
#include <stdlib.h>
52
#include <assert.h>
53
#include <errno.h>
54
#include <time.h>
55
#ifdef HAVE_LIBPSL
56
# include <libpsl.h>
57
#endif
58
#include "utils.h"
59
#include "hash.h"
60
#include "cookies.h"
61
#include "http.h"               /* for http_atotm */
62
#include "c-strcase.h"
63
64
65
/* Declarations of `struct cookie' and the most basic functions. */
66
67
/* Cookie jar serves as cookie storage and a means of retrieving
68
   cookies efficiently.  All cookies with the same domain are stored
69
   in a linked list called "chain".  A cookie chain can be reached by
70
   looking up the domain in the cookie jar's chains_by_domain table.
71
72
   For example, to reach all the cookies under google.com, one must
73
   execute hash_table_get(jar->chains_by_domain, "google.com").  Of
74
   course, when sending a cookie to `www.google.com', one must search
75
   for cookies that belong to either `www.google.com' or `google.com'
76
   -- but the point is that the code doesn't need to go through *all*
77
   the cookies.  */
78
79
struct cookie_jar {
80
  /* Cookie chains indexed by domain.  */
81
  struct hash_table *chains;
82
83
  int cookie_count;             /* number of cookies in the jar. */
84
};
85
86
/* Value set by entry point functions, so that the low-level
87
   routines don't need to call time() all the time.  */
88
static time_t cookies_now;
89
90
struct cookie_jar *
91
cookie_jar_new (void)
92
1.88k
{
93
1.88k
  struct cookie_jar *jar = xnew (struct cookie_jar);
94
1.88k
  jar->chains = make_nocase_string_hash_table (0);
95
1.88k
  jar->cookie_count = 0;
96
1.88k
  return jar;
97
1.88k
}
98
99
struct cookie {
100
  char *domain;                 /* domain of the cookie */
101
  int port;                     /* port number */
102
  char *path;                   /* path prefix of the cookie */
103
104
  unsigned discard_requested :1;/* whether cookie was created to
105
                                   request discarding another
106
                                   cookie. */
107
108
  unsigned secure :1;           /* whether cookie should be
109
                                   transmitted over non-https
110
                                   connections. */
111
  unsigned domain_exact :1;     /* whether DOMAIN must match as a
112
                                   whole. */
113
114
  unsigned permanent :1;        /* whether the cookie should outlive
115
                                   the session. */
116
  time_t expiry_time;           /* time when the cookie expires, 0
117
                                   means undetermined. */
118
119
  char *attr;                   /* cookie attribute name */
120
  char *value;                  /* cookie attribute value */
121
122
  struct cookie *next;          /* used for chaining of cookies in the
123
                                   same domain. */
124
};
125
126
5.66k
#define PORT_ANY (-1)
127
128
/* Allocate and return a new, empty cookie structure. */
129
130
static struct cookie *
131
cookie_new (void)
132
5.66k
{
133
5.66k
  struct cookie *cookie = xnew0 (struct cookie);
134
135
  /* Both cookie->permanent and cookie->expiry_time are now 0.  This
136
     means that the cookie doesn't expire, but is only valid for this
137
     session (i.e. not written out to disk).  */
138
139
5.66k
  cookie->port = PORT_ANY;
140
5.66k
  return cookie;
141
5.66k
}
142
143
/* Non-zero if the cookie has expired.  Assumes cookies_now has been
144
   set by one of the entry point functions.  */
145
146
static bool
147
cookie_expired_p (const struct cookie *c)
148
0
{
149
0
  return c->expiry_time != 0 && c->expiry_time < cookies_now;
150
0
}
151
152
/* Deallocate COOKIE and its components. */
153
154
static void
155
delete_cookie (struct cookie *cookie)
156
5.66k
{
157
5.66k
  xfree (cookie->domain);
158
5.66k
  xfree (cookie->path);
159
5.66k
  xfree (cookie->attr);
160
5.66k
  xfree (cookie->value);
161
5.66k
  xfree (cookie);
162
5.66k
}
163
164
/* Functions for storing cookies.
165
166
   All cookies can be reached beginning with jar->chains.  The key in
167
   that table is the domain name, and the value is a linked list of
168
   all cookies from that domain.  Every new cookie is placed on the
169
   head of the list.  */
170
171
/* Find and return a cookie in JAR whose domain, path, and attribute
172
   name correspond to COOKIE.  If found, PREVPTR will point to the
173
   location of the cookie previous in chain, or NULL if the found
174
   cookie is the head of a chain.
175
176
   If no matching cookie is found, return NULL. */
177
178
static struct cookie *
179
find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
180
                      struct cookie **prevptr)
181
1.12k
{
182
1.12k
  struct cookie *chain, *prev;
183
184
1.12k
  chain = hash_table_get (jar->chains, cookie->domain);
185
1.12k
  if (!chain)
186
0
    goto nomatch;
187
188
1.12k
  prev = NULL;
189
1.69k
  for (; chain; prev = chain, chain = chain->next)
190
1.12k
    if (0 == strcmp (cookie->path, chain->path)
191
568
        && 0 == strcmp (cookie->attr, chain->attr)
192
568
        && cookie->port == chain->port)
193
565
      {
194
565
        *prevptr = prev;
195
565
        return chain;
196
565
      }
197
198
563
 nomatch:
199
563
  *prevptr = NULL;
200
563
  return NULL;
201
1.12k
}
202
203
/* Store COOKIE to the jar.
204
205
   This is done by placing COOKIE at the head of its chain.  However,
206
   if COOKIE matches a cookie already in memory, as determined by
207
   find_matching_cookie, the old cookie is unlinked and destroyed.
208
209
   The key of each chain's hash table entry is allocated only the
210
   first time; next hash_table_put's reuse the same key.  */
211
212
static void
213
store_cookie (struct cookie_jar *jar, struct cookie *cookie)
214
1.69k
{
215
1.69k
  struct cookie *chain_head;
216
1.69k
  char *chain_key;
217
218
1.69k
  if (hash_table_get_pair (jar->chains, cookie->domain,
219
1.69k
                           &chain_key, &chain_head))
220
1.12k
    {
221
      /* A chain of cookies in this domain already exists.  Check for
222
         duplicates -- if an extant cookie exactly matches our domain,
223
         port, path, and name, replace it.  */
224
1.12k
      struct cookie *prev;
225
1.12k
      struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
226
227
1.12k
      if (victim)
228
565
        {
229
          /* Remove VICTIM from the chain.  COOKIE will be placed at
230
             the head. */
231
565
          if (prev)
232
0
            {
233
0
              prev->next = victim->next;
234
0
              cookie->next = chain_head;
235
0
            }
236
565
          else
237
565
            {
238
              /* prev is NULL; apparently VICTIM was at the head of
239
                 the chain.  This place will be taken by COOKIE, so
240
                 all we need to do is:  */
241
565
              cookie->next = victim->next;
242
565
            }
243
565
          delete_cookie (victim);
244
565
          --jar->cookie_count;
245
565
          DEBUGP (("Deleted old cookie (to be replaced.)\n"));
246
565
        }
247
563
      else
248
563
        cookie->next = chain_head;
249
1.12k
    }
250
566
  else
251
566
    {
252
      /* We are now creating the chain.  Use a copy of cookie->domain
253
         as the key for the life-time of the chain.  Using
254
         cookie->domain would be unsafe because the life-time of the
255
         chain may exceed the life-time of the cookie.  (Cookies may
256
         be deleted from the chain by this very function.)  */
257
566
      cookie->next = NULL;
258
566
      chain_key = xstrdup (cookie->domain);
259
566
    }
260
261
1.69k
  hash_table_put (jar->chains, chain_key, cookie);
262
1.69k
  ++jar->cookie_count;
263
264
1.69k
  IF_DEBUG
265
0
    {
266
0
      time_t exptime = cookie->expiry_time;
267
0
      DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
268
0
               cookie->domain, cookie->port,
269
0
               cookie->port == PORT_ANY ? " (ANY)" : "",
270
0
               cookie->path,
271
0
               cookie->permanent ? "permanent" : "session",
272
0
               cookie->secure ? "secure" : "insecure",
273
0
               cookie->expiry_time ? datetime_str (exptime) : "none",
274
0
               cookie->attr, cookie->value));
275
0
    }
276
1.69k
}
277
278
/* Discard a cookie matching COOKIE's domain, port, path, and
279
   attribute name.  This gets called when we encounter a cookie whose
280
   expiry date is in the past, or whose max-age is set to 0.  The
281
   former corresponds to netscape cookie spec, while the latter is
282
   specified by rfc2109.  */
283
284
static void
285
discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
286
571
{
287
571
  struct cookie *prev, *victim;
288
289
571
  if (!hash_table_count (jar->chains))
290
    /* No elements == nothing to discard. */
291
571
    return;
292
293
0
  victim = find_matching_cookie (jar, cookie, &prev);
294
0
  if (victim)
295
0
    {
296
0
      if (prev)
297
        /* Simply unchain the victim. */
298
0
        prev->next = victim->next;
299
0
      else
300
0
        {
301
          /* VICTIM was head of its chain.  We need to place a new
302
             cookie at the head.  */
303
0
          char *chain_key = NULL;
304
0
          int res;
305
306
0
          res = hash_table_get_pair (jar->chains, victim->domain,
307
0
                                     &chain_key, NULL);
308
309
0
          if (res == 0)
310
0
            {
311
0
              logprintf (LOG_VERBOSE, _("Unable to get cookie for %s\n"),
312
0
                         victim->domain);
313
0
            }
314
0
          if (!victim->next)
315
0
            {
316
              /* VICTIM was the only cookie in the chain.  Destroy the
317
                 chain and deallocate the chain key.  */
318
0
              hash_table_remove (jar->chains, victim->domain);
319
0
              xfree (chain_key);
320
0
            }
321
0
          else
322
0
            hash_table_put (jar->chains, chain_key, victim->next);
323
0
        }
324
0
      delete_cookie (victim);
325
0
      DEBUGP (("Discarded old cookie.\n"));
326
0
    }
327
0
}
328
329
/* Functions for parsing the `Set-Cookie' header, and creating new
330
   cookies from the wire.  */
331
332
#define TOKEN_IS(token, string_literal)                         \
333
134k
  BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal)
334
335
10.9k
#define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e)
336
337
/* Parse the contents of the `Set-Cookie' header.  The header looks
338
   like this:
339
340
   name1=value1; name2=value2; ...
341
342
   Trailing semicolon is optional; spaces are allowed between all
343
   tokens.  Additionally, values may be quoted.
344
345
   A new cookie is returned upon success, NULL otherwise.
346
347
   The first name-value pair will be used to set the cookie's
348
   attribute name and value.  Subsequent parameters will be checked
349
   against field names such as `domain', `path', etc.  Recognized
350
   fields will be parsed and the corresponding members of COOKIE
351
   filled.  */
352
353
static struct cookie *
354
parse_set_cookie (const char *set_cookie, bool silent)
355
5.66k
{
356
5.66k
  const char *ptr = set_cookie;
357
5.66k
  struct cookie *cookie = cookie_new ();
358
5.66k
  param_token name, value;
359
360
5.66k
  if (!extract_param (&ptr, &name, &value, ';', NULL))
361
426
    goto error;
362
5.24k
  if (!value.b)
363
2.65k
    goto error;
364
365
  /* If the value is quoted, do not modify it.  */
366
2.58k
  if (*(value.b - 1) == '"')
367
153
    value.b--;
368
2.58k
  if (*value.e == '"')
369
153
    value.e++;
370
371
2.58k
  cookie->attr = strdupdelim (name.b, name.e);
372
2.58k
  cookie->value = strdupdelim (value.b, value.e);
373
374
32.5k
  while (extract_param (&ptr, &name, &value, ';', NULL))
375
30.0k
    {
376
30.0k
      if (TOKEN_IS (name, "domain"))
377
1.07k
        {
378
1.07k
          if (!TOKEN_NON_EMPTY (value))
379
6
            goto error;
380
1.07k
          xfree (cookie->domain);
381
          /* Strictly speaking, we should set cookie->domain_exact if the
382
             domain doesn't begin with a dot.  But many sites set the
383
             domain to "foo.com" and expect "subhost.foo.com" to get the
384
             cookie, and it apparently works in browsers.  */
385
1.07k
          if (*value.b == '.')
386
456
            ++value.b;
387
1.07k
          cookie->domain = strdupdelim (value.b, value.e);
388
1.07k
        }
389
29.0k
      else if (TOKEN_IS (name, "path"))
390
1.40k
        {
391
1.40k
          if (!TOKEN_NON_EMPTY (value))
392
9
            goto error;
393
1.39k
          xfree (cookie->path);
394
1.39k
          cookie->path = strdupdelim (value.b, value.e);
395
1.39k
        }
396
27.6k
      else if (TOKEN_IS (name, "expires"))
397
2.95k
        {
398
2.95k
          char value_copy[128];
399
2.95k
          size_t value_len = value.e - value.b;
400
2.95k
          time_t expires;
401
402
2.95k
          if (!TOKEN_NON_EMPTY (value) || value_len >= sizeof (value_copy))
403
27
            goto error;
404
405
2.92k
          memcpy (value_copy, value.b, value_len);
406
2.92k
          value_copy[value_len] = 0;
407
408
          /* Check if expiration spec is valid.
409
             If not, assume default (cookie doesn't expire, but valid only for
410
             this session.) */
411
2.92k
          expires = http_atotm (value_copy);
412
2.92k
          if (expires != (time_t) -1)
413
930
            {
414
930
              cookie->permanent = 1;
415
930
              cookie->expiry_time = expires;
416
              /* According to netscape's specification, expiry time in
417
                 the past means that discarding of a matching cookie
418
                 is requested.  */
419
930
              if (cookie->expiry_time < cookies_now)
420
732
                cookie->discard_requested = 1;
421
930
            }
422
2.92k
        }
423
24.6k
      else if (TOKEN_IS (name, "max-age"))
424
1.30k
        {
425
1.30k
          double maxage = -1;
426
1.30k
          char value_copy[32];
427
1.30k
          size_t value_len = value.e - value.b;
428
429
1.30k
          if (!TOKEN_NON_EMPTY (value) || value_len >= sizeof (value_copy))
430
27
            goto error;
431
432
1.27k
          memcpy (value_copy, value.b, value_len);
433
1.27k
          value_copy[value_len] = 0;
434
435
1.27k
          sscanf (value_copy, "%lf", &maxage);
436
1.27k
          if (maxage == -1)
437
            /* something went wrong. */
438
21
            goto error;
439
1.25k
          cookie->permanent = 1;
440
1.25k
          cookie->expiry_time = cookies_now + (time_t) maxage;
441
442
          /* According to rfc2109, a cookie with max-age of 0 means that
443
             discarding of a matching cookie is requested.  */
444
1.25k
          if (maxage == 0)
445
795
            cookie->discard_requested = 1;
446
1.25k
        }
447
23.3k
      else if (TOKEN_IS (name, "secure"))
448
504
        {
449
          /* ignore value completely */
450
504
          cookie->secure = 1;
451
504
        }
452
      /* else: Ignore unrecognized attribute. */
453
30.0k
    }
454
2.49k
  if (*ptr)
455
    /* extract_param has encountered a syntax error */
456
69
    goto error;
457
458
  /* The cookie has been successfully constructed; return it. */
459
2.43k
  return cookie;
460
461
3.23k
 error:
462
3.23k
  if (!silent)
463
3.23k
    logprintf (LOG_NOTQUIET,
464
3.23k
               _("Syntax error in Set-Cookie: %s at position %d.\n"),
465
3.23k
               quotearg_style (escape_quoting_style, set_cookie),
466
3.23k
               (int) (ptr - set_cookie));
467
3.23k
  delete_cookie (cookie);
468
3.23k
  return NULL;
469
2.49k
}
470
471
#undef TOKEN_IS
472
#undef TOKEN_NON_EMPTY
473
474
/* Sanity checks.  These are important, otherwise it is possible for
475
   mailcious attackers to destroy important cookie information and/or
476
   violate your privacy.  */
477
478
479
0
#define REQUIRE_DIGITS(p) do {                  \
480
0
  if (!c_isdigit (*p))                          \
481
0
    return false;                               \
482
0
  for (++p; c_isdigit (*p); p++)                \
483
0
    ;                                           \
484
0
} while (0)
485
486
0
#define REQUIRE_DOT(p) do {                     \
487
0
  if (*p++ != '.')                              \
488
0
    return false;                               \
489
0
} while (0)
490
491
/* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
492
493
   We don't want to call network functions like inet_addr() because
494
   all we need is a check, preferably one that is small, fast, and
495
   well-defined.  */
496
497
static bool
498
numeric_address_p (const char *addr)
499
0
{
500
0
  const char *p = addr;
501
502
0
  REQUIRE_DIGITS (p);           /* A */
503
0
  REQUIRE_DOT (p);              /* . */
504
0
  REQUIRE_DIGITS (p);           /* B */
505
0
  REQUIRE_DOT (p);              /* . */
506
0
  REQUIRE_DIGITS (p);           /* C */
507
0
  REQUIRE_DOT (p);              /* . */
508
0
  REQUIRE_DIGITS (p);           /* D */
509
510
0
  if (*p != '\0')
511
0
    return false;
512
0
  return true;
513
0
}
514
515
/* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
516
   Originally I tried to make the check compliant with rfc2109, but
517
   the sites deviated too often, so I had to fall back to "tail
518
   matching", as defined by the original Netscape's cookie spec.
519
520
   Wget now uses libpsl to check domain names against a public suffix
521
   list to see if they are valid. However, since we don't provide a
522
   psl on our own, if libpsl is compiled without a public suffix list,
523
   fall back to using the original "tail matching" heuristic. Also if
524
   libpsl is unable to convert the domain to lowercase, which means that
525
   it doesn't have any runtime conversion support, we again fall back to
526
   "tail matching" since libpsl states the results are unpredictable with
527
   upper case strings.
528
   */
529
530
#ifdef HAVE_LIBPSL
531
static psl_ctx_t *psl;
532
#endif
533
534
static bool
535
check_domain_match (const char *cookie_domain, const char *host)
536
192
{
537
192
#ifdef HAVE_LIBPSL
538
192
  static int init_psl;
539
192
  char *cookie_domain_lower = NULL;
540
192
  char *host_lower = NULL;
541
192
  int is_acceptable;
542
543
192
  DEBUGP (("cdm: 1\n"));
544
192
  if (!init_psl)
545
1
    {
546
1
      init_psl = 1;
547
548
1
#ifdef HAVE_PSL_LATEST
549
1
      if ((psl = psl_latest (NULL)))
550
1
        goto have_psl;
551
552
0
      DEBUGP (("\nPSL: Failed to load any PSL data. "
553
0
               "Falling back to insecure heuristics.\n"));
554
#else
555
      if ((psl = psl_builtin ()) && !psl_builtin_outdated ())
556
        goto have_psl;
557
558
      DEBUGP (("\nPSL: built-in data outdated. "
559
               "Trying to load data from %s.\n",
560
              quote (psl_builtin_filename ())));
561
562
      if ((psl = psl_load_file (psl_builtin_filename ())))
563
        goto have_psl;
564
565
      DEBUGP (("\nPSL: %s not found or not readable. "
566
               "Falling back to built-in data.\n",
567
              quote (psl_builtin_filename ())));
568
569
      if (!(psl = psl_builtin ()))
570
        {
571
          DEBUGP (("\nPSL: libpsl not built with a public suffix list. "
572
                   "Falling back to insecure heuristics.\n"));
573
          goto no_psl;
574
        }
575
#endif
576
0
    }
577
191
  else if (!psl)
578
189
    goto no_psl;
579
580
3
have_psl:
581
3
  if (psl_str_to_utf8lower (cookie_domain, NULL, NULL, &cookie_domain_lower) == PSL_SUCCESS &&
582
0
      psl_str_to_utf8lower (host, NULL, NULL, &host_lower) == PSL_SUCCESS)
583
0
    {
584
0
      is_acceptable = psl_is_cookie_domain_acceptable (psl, host_lower, cookie_domain_lower);
585
0
    }
586
3
  else
587
3
    {
588
3
        DEBUGP (("libpsl unable to parse domain name. "
589
3
                 "Falling back to simple heuristics.\n"));
590
3
        goto no_psl;
591
3
    }
592
593
0
  xfree (cookie_domain_lower);
594
0
  xfree (host_lower);
595
596
0
  return is_acceptable == 1;
597
598
192
no_psl:
599
  /* Cleanup the PSL pointers first */
600
192
  xfree (cookie_domain_lower);
601
192
  xfree (host_lower);
602
192
#endif
603
604
  /* For efficiency make some elementary checks first */
605
192
  DEBUGP (("cdm: 2\n"));
606
607
  /* For the sake of efficiency, check for exact match first. */
608
192
  if (0 == strcasecmp (cookie_domain, host))
609
24
    return true;
610
611
168
  DEBUGP (("cdm: 3\n"));
612
613
  /* HOST must match the tail of cookie_domain. */
614
168
  if (!match_tail (host, cookie_domain, true))
615
147
    return false;
616
617
  /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
618
     make sure that somebody is not trying to set the cookie for a
619
     subdomain shared by many entities.  For example, "company.co.uk"
620
     must not be allowed to set a cookie for ".co.uk".  On the other
621
     hand, "sso.redhat.de" should be able to set a cookie for
622
     ".redhat.de".
623
624
     The only marginally sane way to handle this I can think of is to
625
     reject on the basis of the length of the second-level domain name
626
     (but when the top-level domain is unknown), with the assumption
627
     that those of three or less characters could be reserved.  For
628
     example:
629
630
          .co.org -> works because the TLD is known
631
           .co.uk -> doesn't work because "co" is only two chars long
632
          .com.au -> doesn't work because "com" is only 3 chars long
633
          .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
634
          .cnn.de -> doesn't work for the same reason (ugh!!)
635
         .abcd.de -> works because "abcd" is 4 chars long
636
      .img.cnn.de -> works because it's not trying to set the 2nd level domain
637
       .cnn.co.uk -> works for the same reason
638
639
    That should prevent misuse, while allowing reasonable usage.  If
640
    someone knows of a better way to handle this, please let me
641
    know.  */
642
21
  {
643
21
    const char *p = cookie_domain;
644
21
    int dccount = 1;            /* number of domain components */
645
21
    int ldcl  = 0;              /* last domain component length */
646
21
    int nldcl = 0;              /* next to last domain component length */
647
21
    int out;
648
21
    if (*p == '.')
649
      /* Ignore leading period in this calculation. */
650
0
      ++p;
651
21
    DEBUGP (("cdm: 4\n"));
652
42
    for (out = 0; !out; p++)
653
21
      switch (*p)
654
21
        {
655
21
        case '\0':
656
21
          out = 1;
657
21
          break;
658
0
        case '.':
659
0
          if (ldcl == 0)
660
            /* Empty domain component found -- the domain is invalid. */
661
0
            return false;
662
0
          if (*(p + 1) == '\0')
663
0
            {
664
              /* Tolerate trailing '.' by not treating the domain as
665
                 one ending with an empty domain component.  */
666
0
              out = 1;
667
0
              break;
668
0
            }
669
0
          nldcl = ldcl;
670
0
          ldcl  = 0;
671
0
          ++dccount;
672
0
          break;
673
0
        default:
674
0
          ++ldcl;
675
21
        }
676
677
21
    DEBUGP (("cdm: 5\n"));
678
679
21
    if (dccount < 2)
680
21
      return false;
681
682
0
    DEBUGP (("cdm: 6\n"));
683
684
0
    if (dccount == 2)
685
0
      {
686
0
        size_t i;
687
0
        int known_toplevel = false;
688
0
        static const char *known_toplevel_domains[] = {
689
0
          ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
690
0
        };
691
0
        for (i = 0; i < countof (known_toplevel_domains); i++)
692
0
          if (match_tail (cookie_domain, known_toplevel_domains[i], true))
693
0
            {
694
0
              known_toplevel = true;
695
0
              break;
696
0
            }
697
0
        if (!known_toplevel && nldcl <= 3)
698
0
          return false;
699
0
      }
700
0
  }
701
702
0
  DEBUGP (("cdm: 7\n"));
703
704
  /* Don't allow the host "foobar.com" to set a cookie for domain
705
     "bar.com".  */
706
0
  if (*cookie_domain != '.')
707
0
    {
708
0
      int dlen = strlen (cookie_domain);
709
0
      int hlen = strlen (host);
710
      /* cookie host:    hostname.foobar.com */
711
      /* desired domain:             bar.com */
712
      /* '.' must be here in host-> ^        */
713
0
      if (hlen > dlen && host[hlen - dlen - 1] != '.')
714
0
        return false;
715
0
    }
716
717
0
  DEBUGP (("cdm: 8\n"));
718
719
0
  return true;
720
0
}
721
722
static int path_matches (const char *, const char *);
723
724
/* Check whether PATH begins with COOKIE_PATH. */
725
726
static bool
727
check_path_match (const char *cookie_path, const char *path)
728
180
{
729
180
  return path_matches (path, cookie_path) != 0;
730
180
}
731
732
/* Process the HTTP `Set-Cookie' header.  This results in storing the
733
   cookie or discarding a matching one, or ignoring it completely, all
734
   depending on the contents.  */
735
736
void
737
cookie_handle_set_cookie (struct cookie_jar *jar,
738
                          const char *host, int port,
739
                          const char *path, const char *set_cookie)
740
5.66k
{
741
5.66k
  struct cookie *cookie;
742
5.66k
  cookies_now = time (NULL);
743
5.66k
  char buf[1024], *tmp;
744
5.66k
  size_t pathlen = strlen(path);
745
746
  /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
747
     usage assumes /-prefixed paths.  Until the rest of Wget is fixed,
748
     simply prepend slash to PATH.  */
749
5.66k
  if (pathlen < sizeof (buf) - 1)
750
5.66k
    tmp = buf;
751
0
  else
752
0
    tmp = xmalloc (pathlen + 2);
753
754
5.66k
  *tmp = '/';
755
5.66k
  memcpy (tmp + 1, path, pathlen + 1);
756
5.66k
  path = tmp;
757
758
5.66k
  cookie = parse_set_cookie (set_cookie, false);
759
5.66k
  if (!cookie)
760
3.23k
    goto out;
761
762
  /* Sanitize parts of cookie. */
763
764
2.43k
  if (!cookie->domain)
765
2.23k
    {
766
2.23k
      cookie->domain = xstrdup (host);
767
2.23k
      cookie->domain_exact = 1;
768
      /* Set the port, but only if it's non-default. */
769
2.23k
      if (port != 80 && port != 443)
770
1.49k
        cookie->port = port;
771
2.23k
    }
772
192
  else
773
192
    {
774
192
      if (!check_domain_match (cookie->domain, host))
775
168
        {
776
168
          logprintf (LOG_NOTQUIET,
777
168
                     _("Cookie coming from %s attempted to set domain to "),
778
168
                     quotearg_style (escape_quoting_style, host));
779
168
          logprintf (LOG_NOTQUIET,
780
168
                     _("%s\n"),
781
168
                     quotearg_style (escape_quoting_style, cookie->domain));
782
168
          cookie->discard_requested = true;
783
168
        }
784
192
    }
785
786
2.43k
  if (!cookie->path)
787
2.25k
    {
788
      /* The cookie doesn't set path: set it to the URL path, sans the
789
         file part ("/dir/file" truncated to "/dir/").  */
790
2.25k
      char *trailing_slash = strrchr (path, '/');
791
2.25k
      if (trailing_slash)
792
2.25k
        cookie->path = strdupdelim (path, trailing_slash + 1);
793
0
      else
794
        /* no slash in the string -- can this even happen? */
795
0
        cookie->path = xstrdup (path);
796
2.25k
    }
797
180
  else
798
180
    {
799
      /* The cookie sets its own path; verify that it is legal. */
800
180
      if (!check_path_match (cookie->path, path))
801
165
        {
802
165
          DEBUGP (("Attempt to fake the path: %s, %s\n",
803
165
                   cookie->path, path));
804
165
          goto out;
805
165
        }
806
180
    }
807
808
  /* Now store the cookie, or discard an existing cookie, if
809
     discarding was requested.  */
810
811
2.26k
  if (cookie->discard_requested)
812
571
    {
813
571
      discard_matching_cookie (jar, cookie);
814
571
      goto out;
815
571
    }
816
817
1.69k
  store_cookie (jar, cookie);
818
1.69k
  if (tmp != buf)
819
0
    xfree (tmp);
820
1.69k
  return;
821
822
3.97k
 out:
823
3.97k
  if (cookie)
824
736
    delete_cookie (cookie);
825
3.97k
  if (tmp != buf)
826
0
    xfree (tmp);
827
3.97k
}
828
829
/* Support for sending out cookies in HTTP requests, based on
830
   previously stored cookies.  Entry point is
831
   `build_cookies_request'.  */
832
833
/* Return a count of how many times CHR occurs in STRING. */
834
835
static int
836
count_char (const char *string, char chr)
837
0
{
838
0
  const char *p;
839
0
  int count = 0;
840
0
  for (p = string; *p; p++)
841
0
    if (*p == chr)
842
0
      ++count;
843
0
  return count;
844
0
}
845
846
/* Find the cookie chains whose domains match HOST and store them to
847
   DEST.
848
849
   A cookie chain is the head of a list of cookies that belong to a
850
   host/domain.  Given HOST "img.search.xemacs.org", this function
851
   will return the chains for "img.search.xemacs.org",
852
   "search.xemacs.org", and "xemacs.org" -- those of them that exist
853
   (if any), that is.
854
855
   DEST should be large enough to accept (in the worst case) as many
856
   elements as there are domain components of HOST.  */
857
858
static int
859
find_chains_of_host (struct cookie_jar *jar, const char *host,
860
                     struct cookie *dest[])
861
0
{
862
0
  int dest_count = 0;
863
0
  int passes, passcnt;
864
865
  /* Bail out quickly if there are no cookies in the jar.  */
866
0
  if (!hash_table_count (jar->chains))
867
0
    return 0;
868
869
0
  if (numeric_address_p (host))
870
    /* If host is an IP address, only check for the exact match. */
871
0
    passes = 1;
872
0
  else
873
    /* Otherwise, check all the subdomains except the top-level (last)
874
       one.  As a domain with N components has N-1 dots, the number of
875
       passes equals the number of dots.  */
876
0
    passes = count_char (host, '.');
877
878
0
  passcnt = 0;
879
880
  /* Find chains that match HOST, starting with exact match and
881
     progressing to less specific domains.  For instance, given HOST
882
     fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
883
     srk.fer.hr's, then fer.hr's.  */
884
0
  while (1)
885
0
    {
886
0
      struct cookie *chain = hash_table_get (jar->chains, host);
887
0
      if (chain)
888
0
        dest[dest_count++] = chain;
889
0
      if (++passcnt >= passes)
890
0
        break;
891
0
      host = strchr (host, '.') + 1;
892
0
    }
893
894
0
  return dest_count;
895
0
}
896
897
/* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
898
   otherwise.  */
899
900
static int
901
path_matches (const char *full_path, const char *prefix)
902
180
{
903
180
  int len = strlen (prefix);
904
905
180
  if (0 != strncmp (full_path, prefix, len))
906
    /* FULL_PATH doesn't begin with PREFIX. */
907
165
    return 0;
908
909
  /* Length of PREFIX determines the quality of the match. */
910
15
  return len + 1;
911
180
}
912
913
/* Return true if COOKIE matches the provided parameters of the URL
914
   being downloaded: HOST, PORT, PATH, and SECFLAG.
915
916
   If PATH_GOODNESS is non-NULL, store the "path goodness" value
917
   there.  That value is a measure of how closely COOKIE matches PATH,
918
   used for ordering cookies.  */
919
920
static bool
921
cookie_matches_url (const struct cookie *cookie,
922
                    const char *host, int port, const char *path,
923
                    bool secflag, int *path_goodness)
924
0
{
925
0
  int pg;
926
927
0
  if (cookie_expired_p (cookie))
928
    /* Ignore stale cookies.  Don't bother unchaining the cookie at
929
       this point -- Wget is a relatively short-lived application, and
930
       stale cookies will not be saved by `save_cookies'.  On the
931
       other hand, this function should be as efficient as
932
       possible.  */
933
0
    return false;
934
935
0
  if (cookie->secure && !secflag)
936
    /* Don't transmit secure cookies over insecure connections.  */
937
0
    return false;
938
0
  if (cookie->port != PORT_ANY && cookie->port != port)
939
0
    return false;
940
941
  /* If exact domain match is required, verify that cookie's domain is
942
     equal to HOST.  If not, assume success on the grounds of the
943
     cookie's chain having been found by find_chains_of_host.  */
944
0
  if (cookie->domain_exact
945
0
      && 0 != strcasecmp (host, cookie->domain))
946
0
    return false;
947
948
0
  pg = path_matches (path, cookie->path);
949
0
  if (pg == 0)
950
0
    return false;
951
952
0
  if (path_goodness)
953
    /* If the caller requested path_goodness, we return it.  This is
954
       an optimization, so that the caller doesn't need to call
955
       path_matches() again.  */
956
0
    *path_goodness = pg;
957
0
  return true;
958
0
}
959
960
/* A structure that points to a cookie, along with the additional
961
   information about the cookie's "goodness".  This allows us to sort
962
   the cookies when returning them to the server, as required by the
963
   spec.  */
964
965
struct weighed_cookie {
966
  struct cookie *cookie;
967
  int domain_goodness;
968
  int path_goodness;
969
};
970
971
/* Comparator used for uniquifying the list. */
972
973
static int
974
equality_comparator (const void *p1, const void *p2)
975
0
{
976
0
  struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
977
0
  struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
978
979
0
  int namecmp  = strcmp (wc1->cookie->attr, wc2->cookie->attr);
980
0
  int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
981
982
  /* We only really care whether both name and value are equal.  We
983
     return them in this order only for consistency...  */
984
0
  return namecmp ? namecmp : valuecmp;
985
0
}
986
987
/* Eliminate duplicate cookies.  "Duplicate cookies" are any two
988
   cookies with the same attr name and value.  Whenever a duplicate
989
   pair is found, one of the cookies is removed.  */
990
991
static int
992
eliminate_dups (struct weighed_cookie *outgoing, int count)
993
0
{
994
0
  struct weighed_cookie *h;     /* hare */
995
0
  struct weighed_cookie *t;     /* tortoise */
996
0
  struct weighed_cookie *end = outgoing + count;
997
998
  /* We deploy a simple uniquify algorithm: first sort the array
999
     according to our sort criteria, then copy it to itself, comparing
1000
     each cookie to its neighbor and ignoring the duplicates.  */
1001
1002
0
  qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1003
1004
  /* "Hare" runs through all the entries in the array, followed by
1005
     "tortoise".  If a duplicate is found, the hare skips it.
1006
     Non-duplicate entries are copied to the tortoise ptr.  */
1007
1008
0
  for (h = t = outgoing; h < end; h++)
1009
0
    {
1010
0
      if (h != end - 1)
1011
0
        {
1012
0
          struct cookie *c0 = h[0].cookie;
1013
0
          struct cookie *c1 = h[1].cookie;
1014
0
          if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
1015
0
            continue;           /* ignore the duplicate */
1016
0
        }
1017
1018
      /* If the hare has advanced past the tortoise (because of
1019
         previous dups), make sure the values get copied.  Otherwise,
1020
         no copying is necessary.  */
1021
0
      if (h != t)
1022
0
        *t++ = *h;
1023
0
      else
1024
0
        t++;
1025
0
    }
1026
0
  return t - outgoing;
1027
0
}
1028
1029
/* Comparator used for sorting by quality. */
1030
1031
static int
1032
goodness_comparator (const void *p1, const void *p2)
1033
0
{
1034
0
  struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1035
0
  struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1036
1037
  /* Subtractions take `wc2' as the first argument becauase we want a
1038
     sort in *decreasing* order of goodness.  */
1039
0
  int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1040
0
  int pgdiff = wc2->path_goodness - wc1->path_goodness;
1041
1042
  /* Sort by domain goodness; if these are the same, sort by path
1043
     goodness.  (The sorting order isn't really specified; maybe it
1044
     should be the other way around.)  */
1045
0
  return dgdiff ? dgdiff : pgdiff;
1046
0
}
1047
1048
/* Generate a `Cookie' header for a request that goes to HOST:PORT and
1049
   requests PATH from the server.  The resulting string is allocated
1050
   with `malloc', and the caller is responsible for freeing it.  If no
1051
   cookies pertain to this request, i.e. no cookie header should be
1052
   generated, NULL is returned.  */
1053
1054
char *
1055
cookie_header (struct cookie_jar *jar, const char *host,
1056
               int port, const char *path, bool secflag)
1057
0
{
1058
0
  struct cookie *chains[32];
1059
0
  int chain_count;
1060
1061
0
  struct cookie *cookie;
1062
0
  struct weighed_cookie *outgoing;
1063
0
  size_t count, i, ocnt;
1064
0
  char *result = NULL;
1065
0
  int result_size, pos;
1066
0
  char pathbuf[1024];
1067
1068
  /* First, find the cookie chains whose domains match HOST. */
1069
1070
  /* Allocate room for find_chains_of_host to write to.  The number of
1071
     chains can at most equal the number of subdomains, hence
1072
     1+<number of dots>.  We ignore cookies with more than 32 labels. */
1073
0
  chain_count = 1 + count_char (host, '.');
1074
0
  if (chain_count > (int) countof (chains))
1075
0
    return NULL;
1076
0
  chain_count = find_chains_of_host (jar, host, chains);
1077
1078
  /* No cookies for this host. */
1079
0
  if (chain_count <= 0)
1080
0
    return NULL;
1081
1082
  /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
1083
     usage assumes /-prefixed paths.  Until the rest of Wget is fixed,
1084
     simply prepend slash to PATH.  */
1085
0
  {
1086
0
    char *tmp;
1087
0
    size_t pathlen = strlen(path);
1088
1089
0
    if (pathlen < sizeof (pathbuf) - 1)
1090
0
      tmp = pathbuf;
1091
0
    else
1092
0
      tmp = xmalloc (pathlen + 2);
1093
1094
0
    *tmp = '/';
1095
0
    memcpy (tmp + 1, path, pathlen + 1);
1096
0
    path = tmp;
1097
0
  }
1098
1099
0
  cookies_now = time (NULL);
1100
1101
  /* Now extract from the chains those cookies that match our host
1102
     (for domain_exact cookies), port (for cookies with port other
1103
     than PORT_ANY), etc.  See matching_cookie for details.  */
1104
1105
  /* Count the number of matching cookies. */
1106
0
  count = 0;
1107
0
  for (i = 0; i < (unsigned) chain_count; i++)
1108
0
    for (cookie = chains[i]; cookie; cookie = cookie->next)
1109
0
      if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
1110
0
        ++count;
1111
0
  if (!count)
1112
0
    goto out;                /* no cookies matched */
1113
1114
  /* Allocate the array. */
1115
0
  if (count > SIZE_MAX / sizeof (struct weighed_cookie))
1116
0
    goto out;                /* unable to process so many cookies */
1117
0
  outgoing = xmalloc (count * sizeof (struct weighed_cookie));
1118
1119
  /* Fill the array with all the matching cookies from the chains that
1120
     match HOST. */
1121
0
  ocnt = 0;
1122
0
  for (i = 0; i < (unsigned) chain_count; i++)
1123
0
    for (cookie = chains[i]; cookie; cookie = cookie->next)
1124
0
      {
1125
0
        int pg;
1126
0
        if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1127
0
          continue;
1128
0
        outgoing[ocnt].cookie = cookie;
1129
0
        outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1130
0
        outgoing[ocnt].path_goodness   = pg;
1131
0
        ++ocnt;
1132
0
      }
1133
0
  assert (ocnt == count);
1134
1135
  /* Eliminate duplicate cookies; that is, those whose name and value
1136
     are the same.  */
1137
0
  count = eliminate_dups (outgoing, count);
1138
1139
  /* Sort the array so that best-matching domains come first, and
1140
     that, within one domain, best-matching paths come first. */
1141
0
  qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1142
1143
  /* Count the space the name=value pairs will take. */
1144
0
  result_size = 0;
1145
0
  for (i = 0; i < count; i++)
1146
0
    {
1147
0
      struct cookie *c = outgoing[i].cookie;
1148
      /* name=value */
1149
0
      result_size += strlen (c->attr) + 1 + strlen (c->value);
1150
0
    }
1151
1152
  /* Allocate output buffer:
1153
     name=value pairs -- result_size
1154
     "; " separators  -- (count - 1) * 2
1155
     \0 terminator    -- 1 */
1156
0
  result_size = result_size + (count - 1) * 2 + 1;
1157
0
  result = xmalloc (result_size);
1158
0
  pos = 0;
1159
0
  for (i = 0; i < count; i++)
1160
0
    {
1161
0
      struct cookie *c = outgoing[i].cookie;
1162
0
      int namlen = strlen (c->attr);
1163
0
      int vallen = strlen (c->value);
1164
1165
0
      memcpy (result + pos, c->attr, namlen);
1166
0
      pos += namlen;
1167
0
      result[pos++] = '=';
1168
0
      memcpy (result + pos, c->value, vallen);
1169
0
      pos += vallen;
1170
0
      if (i < count - 1)
1171
0
        {
1172
0
          result[pos++] = ';';
1173
0
          result[pos++] = ' ';
1174
0
        }
1175
0
    }
1176
0
  result[pos++] = '\0';
1177
0
  xfree (outgoing);
1178
0
  assert (pos == result_size);
1179
1180
0
out:
1181
0
  if (path != pathbuf)
1182
0
    xfree (path);
1183
1184
0
return result;
1185
0
}
1186
1187
/* Support for loading and saving cookies.  The format used for
1188
   loading and saving should be the format of the `cookies.txt' file
1189
   used by Netscape and Mozilla, at least the Unix versions.
1190
   (Apparently IE can export cookies in that format as well.)  The
1191
   format goes like this:
1192
1193
       DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1194
1195
     DOMAIN      -- cookie domain, optionally followed by :PORT
1196
     DOMAIN-FLAG -- whether all hosts in the domain match
1197
     PATH        -- cookie path
1198
     SECURE-FLAG -- whether cookie requires secure connection
1199
     TIMESTAMP   -- expiry timestamp, number of seconds since epoch
1200
     ATTR-NAME   -- name of the cookie attribute
1201
     ATTR-VALUE  -- value of the cookie attribute (empty if absent)
1202
1203
   The fields are separated by TABs.  All fields are mandatory, except
1204
   for ATTR-VALUE.  The `-FLAG' fields are boolean, their legal values
1205
   being "TRUE" and "FALSE'.  Empty lines, lines consisting of
1206
   whitespace only, and comment lines (beginning with # optionally
1207
   preceded by whitespace) are ignored.
1208
1209
   Example line from cookies.txt (split in two lines for readability):
1210
1211
       .google.com      TRUE    /       FALSE   2147368447      \
1212
       PREF     ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1213
1214
*/
1215
1216
/* If the region [B, E) ends with :<digits>, parse the number, return
1217
   it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1218
   If port is not specified, return 0.  */
1219
1220
static int
1221
domain_port (const char *domain_b, const char *domain_e,
1222
             const char **domain_e_ptr)
1223
0
{
1224
0
  int port = 0;
1225
0
  const char *p;
1226
0
  const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1227
0
  if (!colon)
1228
0
    return 0;
1229
0
  for (p = colon + 1; p < domain_e && c_isdigit (*p); p++)
1230
0
    port = 10 * port + (*p - '0');
1231
0
  if (p < domain_e)
1232
    /* Garbage following port number. */
1233
0
    return 0;
1234
0
  *domain_e_ptr = colon;
1235
0
  return port;
1236
0
}
1237
1238
0
#define GET_WORD(p, b, e) do {                  \
1239
0
  b = p;                                        \
1240
0
  while (*p && *p != '\t')                      \
1241
0
    ++p;                                        \
1242
0
  e = p;                                        \
1243
0
  if (b == e || !*p)                            \
1244
0
    goto next;                                  \
1245
0
  ++p;                                          \
1246
0
} while (0)
1247
1248
/* Load cookies from FILE.  */
1249
1250
void
1251
cookie_jar_load (struct cookie_jar *jar, const char *file)
1252
0
{
1253
0
  char *line = NULL;
1254
0
  size_t bufsize = 0;
1255
1256
0
  FILE *fp = fopen (file, "r");
1257
0
  if (!fp)
1258
0
    {
1259
0
      logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"),
1260
0
                 quote (file), strerror (errno));
1261
0
      return;
1262
0
    }
1263
1264
0
  cookies_now = time (NULL);
1265
1266
0
  while (getline (&line, &bufsize, fp) > 0)
1267
0
    {
1268
0
      struct cookie *cookie;
1269
0
      char *p = line;
1270
1271
0
      double expiry;
1272
0
      int port;
1273
1274
0
      char *domain_b  = NULL, *domain_e  = NULL;
1275
0
      char *domflag_b = NULL, *domflag_e = NULL;
1276
0
      char *path_b    = NULL, *path_e    = NULL;
1277
0
      char *secure_b  = NULL, *secure_e  = NULL;
1278
0
      char *expires_b = NULL, *expires_e = NULL;
1279
0
      char *name_b    = NULL, *name_e    = NULL;
1280
0
      char *value_b   = NULL, *value_e   = NULL;
1281
1282
      /* Skip leading white-space. */
1283
0
      while (*p && c_isspace (*p))
1284
0
        ++p;
1285
      /* Ignore empty lines.  */
1286
0
      if (!*p || *p == '#')
1287
0
        continue;
1288
1289
0
      GET_WORD (p, domain_b,  domain_e);
1290
0
      GET_WORD (p, domflag_b, domflag_e);
1291
0
      GET_WORD (p, path_b,    path_e);
1292
0
      GET_WORD (p, secure_b,  secure_e);
1293
0
      GET_WORD (p, expires_b, expires_e);
1294
0
      GET_WORD (p, name_b,    name_e);
1295
1296
      /* Don't use GET_WORD for value because it ends with newline,
1297
         not TAB.  */
1298
0
      value_b = p;
1299
0
      value_e = p + strlen (p);
1300
0
      if (value_e > value_b && value_e[-1] == '\n')
1301
0
        --value_e;
1302
0
      if (value_e > value_b && value_e[-1] == '\r')
1303
0
        --value_e;
1304
      /* Empty values are legal (I think), so don't bother checking. */
1305
1306
0
      cookie = cookie_new ();
1307
1308
0
      cookie->attr    = strdupdelim (name_b, name_e);
1309
0
      cookie->value   = strdupdelim (value_b, value_e);
1310
0
      cookie->path    = strdupdelim (path_b, path_e);
1311
0
      cookie->secure  = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1312
1313
      /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1314
         value indicating if all machines within a given domain can
1315
         access the variable.  This value is set automatically by the
1316
         browser, depending on the value set for the domain."  */
1317
0
      cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1318
1319
      /* DOMAIN needs special treatment because we might need to
1320
         extract the port.  */
1321
0
      port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1322
0
      if (port)
1323
0
        cookie->port = port;
1324
1325
0
      if (*domain_b == '.')
1326
0
        ++domain_b;             /* remove leading dot internally */
1327
0
      cookie->domain  = strdupdelim (domain_b, domain_e);
1328
1329
      /* safe default in case EXPIRES field is garbled. */
1330
0
      expiry = (double)cookies_now - 1;
1331
1332
      /* I don't like changing the line, but it's safe here.  (line is
1333
         malloced.)  */
1334
0
      *expires_e = '\0';
1335
0
      sscanf (expires_b, "%lf", &expiry);
1336
1337
0
      if (expiry == 0)
1338
0
        {
1339
          /* EXPIRY can be 0 for session cookies saved because the
1340
             user specified `--keep-session-cookies' in the past.
1341
             They remain session cookies, and will be saved only if
1342
             the user has specified `keep-session-cookies' again.  */
1343
0
        }
1344
0
      else
1345
0
        {
1346
0
          if (expiry < cookies_now)
1347
0
            goto abort_cookie;  /* ignore stale cookie. */
1348
0
          cookie->expiry_time = (time_t) expiry;
1349
0
          cookie->permanent = 1;
1350
0
        }
1351
1352
0
      store_cookie (jar, cookie);
1353
1354
0
    next:
1355
0
      continue;
1356
1357
0
    abort_cookie:
1358
0
      delete_cookie (cookie);
1359
0
    }
1360
1361
0
  xfree(line);
1362
0
  fclose (fp);
1363
0
}
1364
1365
/* Save cookies, in format described above, to FILE. */
1366
1367
void
1368
cookie_jar_save (struct cookie_jar *jar, const char *file)
1369
0
{
1370
0
  FILE *fp;
1371
0
  hash_table_iterator iter;
1372
1373
0
  DEBUGP (("Saving cookies to %s.\n", file));
1374
1375
0
  cookies_now = time (NULL);
1376
1377
0
  fp = fopen (file, "w");
1378
0
  if (!fp)
1379
0
    {
1380
0
      logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"),
1381
0
                 quote (file), strerror (errno));
1382
0
      return;
1383
0
    }
1384
1385
0
  fputs ("# HTTP Cookie File\n", fp);
1386
0
  fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (cookies_now));
1387
0
  fputs ("# Edit at your own risk.\n\n", fp);
1388
1389
0
  for (hash_table_iterate (jar->chains, &iter);
1390
0
       hash_table_iter_next (&iter);
1391
0
       )
1392
0
    {
1393
0
      const char *domain = iter.key;
1394
0
      struct cookie *cookie = iter.value;
1395
0
      for (; cookie; cookie = cookie->next)
1396
0
        {
1397
0
          if (!cookie->permanent && !opt.keep_session_cookies)
1398
0
            continue;
1399
0
          if (cookie_expired_p (cookie))
1400
0
            continue;
1401
0
          if (!cookie->domain_exact)
1402
0
            fputc ('.', fp);
1403
0
          fputs (domain, fp);
1404
0
          if (cookie->port != PORT_ANY)
1405
0
            fprintf (fp, ":%d", cookie->port);
1406
0
          fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1407
0
                   cookie->domain_exact ? "FALSE" : "TRUE",
1408
0
                   cookie->path, cookie->secure ? "TRUE" : "FALSE",
1409
0
                   (double)cookie->expiry_time,
1410
0
                   cookie->attr, cookie->value);
1411
0
          if (ferror (fp))
1412
0
            goto out;
1413
0
        }
1414
0
    }
1415
0
 out:
1416
0
  if (ferror (fp))
1417
0
    logprintf (LOG_NOTQUIET, _("Error writing to %s: %s\n"),
1418
0
               quote (file), strerror (errno));
1419
0
  if (fclose (fp) < 0)
1420
0
    logprintf (LOG_NOTQUIET, _("Error closing %s: %s\n"),
1421
0
               quote (file), strerror (errno));
1422
1423
0
  DEBUGP (("Done saving cookies.\n"));
1424
0
}
1425
1426
/* Clean up cookie-related data. */
1427
1428
void
1429
cookie_jar_delete (struct cookie_jar *jar)
1430
1.88k
{
1431
  /* Iterate over chains (indexed by domain) and free them. */
1432
1.88k
  hash_table_iterator iter;
1433
2.45k
  for (hash_table_iterate (jar->chains, &iter); hash_table_iter_next (&iter); )
1434
566
    {
1435
566
      struct cookie *chain = iter.value;
1436
566
      xfree (iter.key);
1437
      /* Then all cookies in this chain. */
1438
1.69k
      while (chain)
1439
1.12k
        {
1440
1.12k
          struct cookie *next = chain->next;
1441
1.12k
          delete_cookie (chain);
1442
1.12k
          chain = next;
1443
1.12k
        }
1444
566
    }
1445
1.88k
  hash_table_destroy (jar->chains);
1446
1.88k
  xfree (jar);
1447
1448
1.88k
#ifdef HAVE_LIBPSL
1449
1.88k
  psl_free (psl);
1450
  psl = NULL;
1451
1.88k
#endif
1452
1.88k
}
1453
1454
/* Test cases.  Currently this is only tests parse_set_cookies.  To
1455
   use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1456
   from main.  */
1457
1458
#ifdef TEST_COOKIES
1459
void
1460
test_cookies (void)
1461
{
1462
  /* Tests expected to succeed: */
1463
  static struct {
1464
    const char *data;
1465
    const char *results[10];
1466
  } tests_succ[] = {
1467
    { "arg=value", {"arg", "value", NULL} },
1468
    { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1469
    { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1470
    { "arg1=value1;  arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1471
    { "arg1=value1;  arg2=value2;  ", {"arg1", "value1", "arg2", "value2", NULL} },
1472
    { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1473
    { "arg=", {"arg", "", NULL} },
1474
    { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1475
    { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1476
  };
1477
1478
  /* Tests expected to fail: */
1479
  static char *tests_fail[] = {
1480
    ";",
1481
    "arg=\"unterminated",
1482
    "=empty-name",
1483
    "arg1=;=another-empty-name",
1484
  };
1485
  int i;
1486
1487
  for (i = 0; i < countof (tests_succ); i++)
1488
    {
1489
      int ind;
1490
      const char *data = tests_succ[i].data;
1491
      const char **expected = tests_succ[i].results;
1492
      struct cookie *c;
1493
1494
      c = parse_set_cookie (data, true);
1495
      if (!c)
1496
        {
1497
          printf ("NULL cookie returned for valid data: %s\n", data);
1498
          continue;
1499
        }
1500
1501
      /* Test whether extract_param handles these cases correctly. */
1502
      {
1503
        param_token name, value;
1504
        const char *ptr = data;
1505
        int j = 0;
1506
        while (extract_param (&ptr, &name, &value, ';', NULL))
1507
          {
1508
            char *n = strdupdelim (name.b, name.e);
1509
            char *v = strdupdelim (value.b, value.e);
1510
            if (!expected[j])
1511
              {
1512
                printf ("Too many parameters for '%s'\n", data);
1513
                break;
1514
              }
1515
            if (0 != strcmp (expected[j], n))
1516
              printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1517
                      j / 2 + 1, data, expected[j], n);
1518
            if (0 != strcmp (expected[j + 1], v))
1519
              printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1520
                      j / 2 + 1, data, expected[j + 1], v);
1521
            j += 2;
1522
            xfree (n);
1523
            xfree (v);
1524
          }
1525
        if (expected[j])
1526
          printf ("Too few parameters for '%s'\n", data);
1527
      }
1528
    }
1529
1530
  for (i = 0; i < countof (tests_fail); i++)
1531
    {
1532
      struct cookie *c;
1533
      char *data = tests_fail[i];
1534
      c = parse_set_cookie (data, true);
1535
      if (c)
1536
        printf ("Failed to report error on invalid data: %s\n", data);
1537
    }
1538
}
1539
#endif /* TEST_COOKIES */