Coverage Report

Created: 2026-05-16 06:55

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/wget2/libwget/cookie_parse.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2012 Tim Ruehsen
3
 * Copyright (c) 2015-2026 Free Software Foundation, Inc.
4
 *
5
 * This file is part of libwget.
6
 *
7
 * Libwget is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Lesser General Public License as published by
9
 * the Free Software Foundation, either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * Libwget is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public License
18
 * along with libwget.  If not, see <https://www.gnu.org/licenses/>.
19
 *
20
 *
21
 * Cookie parsing routines
22
 *
23
 * Changelog
24
 * 23.10.2012  Tim Ruehsen  created
25
 * 14.08.2019  Tim Ruehsen  split out from cookie.c
26
 *
27
 * see https://tools.ietf.org/html/rfc6265
28
 *
29
 */
30
31
#include <config.h>
32
33
#include <stdio.h>
34
#include <stdlib.h>
35
#include <string.h>
36
#include <limits.h>
37
#include <ctype.h>
38
#include <time.h>
39
40
#include <c-ctype.h>
41
42
#include <wget.h>
43
#include "private.h"
44
#include "cookie.h"
45
46
bool cookie_domain_match(const char *domain, const char *host)
47
596
{
48
596
  size_t domain_length, host_length;
49
596
  const char *p;
50
51
596
  debug_printf("domain_match(%s,%s)", domain, host);
52
53
596
  if (!strcmp(domain, host))
54
0
    return true; // an exact match
55
56
596
  domain_length = strlen(domain);
57
596
  host_length = strlen(host);
58
59
596
  if (domain_length >= host_length)
60
440
    return false; // host is too short
61
62
156
  p = host + host_length - domain_length;
63
156
  if (!strcmp(p, domain) && p[-1] == '.')
64
4
    return true;
65
66
152
  return false;
67
156
}
68
69
bool cookie_path_match(const char *cookie_path, const char *request_path)
70
1.16k
{
71
1.16k
  const char *last_slash;
72
1.16k
  size_t cookie_path_length, iri_path_length;
73
1.16k
  bool cookie_path_slash = false;
74
75
1.16k
  if (*cookie_path == '/') {
76
1.16k
    cookie_path++;
77
1.16k
    cookie_path_slash = true;
78
1.16k
  }
79
80
1.16k
  if (request_path && *request_path == '/')
81
0
    request_path++;
82
83
1.16k
  debug_printf("path_match(/%s,/%s)\n", cookie_path, request_path ? request_path : "");
84
85
  // algorithm as described in RFC 6265 5.1.4
86
87
//  if (!request_path || *request_path != '/' || !(last_slash = strrchr(request_path + 1, '/'))) {
88
//    request_path = "/";
89
//    iri_path_length = 1;
90
1.16k
  if (!request_path || !(last_slash = strrchr(request_path, '/'))) {
91
1.16k
    request_path = "";
92
1.16k
    iri_path_length = 0;
93
1.16k
  } else {
94
0
    iri_path_length = last_slash - request_path;
95
0
  }
96
97
1.16k
  cookie_path_length = strlen(cookie_path);
98
99
1.16k
  if (iri_path_length < cookie_path_length)
100
    // cookie-path is not a prefix of request-path
101
32
    return false;
102
103
1.13k
  if (iri_path_length == 0 && cookie_path_length == 0)
104
    // slash matches slash
105
1.13k
    return true;
106
107
0
  if (!strncmp(cookie_path, request_path, cookie_path_length)) {
108
0
    if (!request_path[cookie_path_length])
109
      // the cookie-path and the request-path are identical
110
0
      return true;
111
112
0
    if ((cookie_path_length > 0 && cookie_path[cookie_path_length - 1] == '/') || cookie_path_slash)
113
      // the cookie-path is a prefix of the request-path, and the last
114
      // character of the cookie-path is %x2F ("/").
115
0
      return true;
116
117
0
    if (request_path[cookie_path_length] == '/')
118
      // the cookie-path is a prefix of the request-path, and the first
119
      // character of the request-path that is not included in the cookie-
120
      // path is a %x2F ("/") character.
121
0
      return true;
122
0
  }
123
124
0
  return false;
125
0
}
126
127
wget_cookie *wget_cookie_init(wget_cookie *cookie)
128
9.79k
{
129
9.79k
  if (!cookie) {
130
6.62k
    cookie = wget_calloc(1, sizeof(wget_cookie));
131
6.62k
    if (!cookie)
132
0
      return NULL;
133
6.62k
  } else
134
3.17k
    memset(cookie, 0, sizeof(*cookie));
135
136
9.79k
  cookie->last_access = cookie->creation = time(NULL);
137
138
9.79k
  return cookie;
139
9.79k
}
140
141
void wget_cookie_deinit(wget_cookie *cookie)
142
7.32k
{
143
7.32k
  if (cookie) {
144
7.32k
    xfree(cookie->name);
145
7.32k
    xfree(cookie->value);
146
7.32k
    xfree(cookie->domain);
147
7.32k
    xfree(cookie->path);
148
7.32k
  }
149
7.32k
}
150
151
void wget_cookie_free(wget_cookie **cookie)
152
4.15k
{
153
4.15k
  if (cookie) {
154
4.15k
    wget_cookie_deinit(*cookie);
155
4.15k
    xfree(*cookie);
156
4.15k
  }
157
4.15k
}
158
159
// for vector destruction
160
void cookie_free(void *cookie)
161
2.68k
{
162
2.68k
  if (cookie) {
163
2.68k
    wget_cookie_deinit(cookie);
164
2.68k
    xfree(cookie);
165
2.68k
  }
166
2.68k
}
167
168
/*
169
int wget_cookie_equals(wget_cookie *cookie1, wget_cookie *cookie2)
170
{
171
  if (!cookie1)
172
    return !cookie2;
173
174
  if (!cookie2)
175
    return 0;
176
177
  if (wget_strcmp(cookie1->name, cookie2->name) ||
178
    wget_strcmp(cookie1->value, cookie2->value) ||
179
    wget_strcmp(cookie1->domain, cookie2->domain) ||
180
    wget_strcmp(cookie1->path, cookie2->path) ||
181
    cookie1->domain_dot != cookie2->domain_dot ||
182
    cookie1->normalized != cookie2->normalized ||
183
    cookie1->persistent != cookie2->persistent ||
184
    cookie1->host_only != cookie2->host_only ||
185
    cookie1->secure_only != cookie2->secure_only ||
186
    cookie1->http_only != cookie2->http_only)
187
  {
188
    return 0;
189
  }
190
191
  return 1;
192
}
193
*/
194
195
char *wget_cookie_to_setcookie(wget_cookie *cookie)
196
3.22k
{
197
3.22k
  char expires[32] = "";
198
199
3.22k
  if (!cookie)
200
1.67k
    return wget_strdup("(null)");
201
202
1.55k
  if (cookie->expires)
203
115
    wget_http_print_date(cookie->expires, expires, sizeof(expires)); // date format from RFC 6265
204
205
1.55k
  return wget_aprintf("%s=%s%s%s%s%s; domain=%s%s%s%s",
206
1.55k
    cookie->name, cookie->value,
207
1.55k
    *expires ? "; expires=" : "", *expires ? expires : "",
208
1.55k
    cookie->path ? "; path=" : "", cookie->path ? cookie->path : "",
209
1.55k
    cookie->host_only ? "" : ".", cookie->domain,
210
1.55k
    cookie->http_only ? "; HttpOnly" : "",
211
1.55k
    cookie->secure_only ? "; Secure" : "");
212
3.22k
}
213
214
/*
215
 RFC 6265
216
217
 set-cookie-header = "Set-Cookie:" SP set-cookie-string
218
 set-cookie-string = cookie-pair *( ";" SP cookie-av )
219
 cookie-pair       = cookie-name "=" cookie-value
220
 cookie-name       = token
221
 cookie-value      = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE )
222
 cookie-octet      = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E
223
                       ; US-ASCII characters excluding CTLs,
224
                       ; whitespace DQUOTE, comma, semicolon,
225
                       ; and backslash
226
 token             = <token, defined in [RFC2616], Section 2.2>
227
228
 cookie-av         = expires-av / max-age-av / domain-av /
229
                     path-av / secure-av / httponly-av /
230
                     extension-av
231
 expires-av        = "Expires=" sane-cookie-date
232
 sane-cookie-date  = <rfc1123-date, defined in [RFC2616], Section 3.3.1>
233
 max-age-av        = "Max-Age=" non-zero-digit *DIGIT
234
                       ; In practice, both expires-av and max-age-av
235
                       ; are limited to dates representable by the
236
                       ; user agent.
237
 non-zero-digit    = %x31-39
238
                       ; digits 1 through 9
239
 domain-av         = "Domain=" domain-value
240
 domain-value      = <subdomain>
241
                       ; defined in [RFC1034], Section 3.5, as
242
                       ; enhanced by [RFC1123], Section 2.1
243
 path-av           = "Path=" path-value
244
 path-value        = <any CHAR except CTLs or ";">
245
 secure-av         = "Secure"
246
 httponly-av       = "HttpOnly"
247
 extension-av      = <any CHAR except CTLs or ";">
248
*/
249
const char *wget_cookie_parse_setcookie(const char *s, wget_cookie **_cookie)
250
6.62k
{
251
6.62k
  const char *name, *p;
252
6.62k
  wget_cookie *cookie = wget_cookie_init(NULL);
253
254
  // remove leading whitespace from cookie name
255
7.06k
  while (c_isspace(*s)) s++;
256
257
  // s = wget_http_parse_token(s, &cookie->name);
258
  // also accept UTF-8 (NON-ASCII) characters in cookie name
259
27.9k
  for (p = s; (*s >= 32 && *s <= 126 && *s != '=' && *s != ';') || *s < 0; s++);
260
261
  // remove trailing whitespace from cookie name
262
7.26k
  while (s > p && c_isspace(s[-1])) s--;
263
6.62k
  cookie->name = wget_strmemdup(p, s - p);
264
265
  // advance to next delimiter
266
7.28k
  while (c_isspace(*s)) s++;
267
268
6.62k
  if (cookie->name && *cookie->name && *s == '=') {
269
    // *cookie-octet / ( DQUOTE *cookie-octet DQUOTE )
270
271
    // skip over delimiter and remove leading whitespace from cookie value
272
6.35k
    for (s++; c_isspace(*s);) s++;
273
274
/* RFC compliance is too strict
275
    if (*s == '\"')
276
      s++;
277
    // cookie-octet      = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E
278
    for (p = s; *s > 32 && *s <= 126 && *s != '\\' && *s != ',' && *s != ';' && *s != '\"'; s++);
279
*/
280
281
    // also accept UTF-8 (NON-ASCII) characters in cookie value
282
22.0k
    for (p = s; (*s >= 32 && *s <= 126 && *s != ';') || *s < 0; s++);
283
284
    // remove trailing whitespace from cookie value
285
6.16k
    while (s > p && c_isspace(s[-1])) s--;
286
287
5.59k
    cookie->value = wget_strmemdup(p, s - p);
288
289
22.3k
    do {
290
      // find next delimiter
291
30.0k
      while (*s && *s != ';') s++;
292
22.3k
      if (!*s) break;
293
294
      // skip delimiter and remove leading spaces from attribute name
295
21.3k
      for (s++; c_isspace(*s);) s++;
296
20.5k
      if (!*s) break;
297
298
20.2k
      s = wget_http_parse_token(s, &name);
299
300
20.2k
      if (name) {
301
        // find next delimiter
302
24.1k
        while (*s && *s != '=' && *s != ';') s++;
303
        // if (!*s) break;
304
305
20.2k
        if (*s == '=') {
306
          // find end of value
307
15.9k
          for (s++; c_isspace(*s);) s++;
308
76.8k
          for (p = s; (*s >= 32 && *s <= 126 && *s != ';') || *s < 0; s++);
309
310
15.1k
          if (!wget_strcasecmp_ascii(name, "expires")) {
311
6.94k
            cookie->expires = wget_http_parse_full_date(p);
312
8.16k
          } else if (!wget_strcasecmp_ascii(name, "max-age")) {
313
1.81k
            long offset = atol(p);
314
315
1.81k
            if (offset > 0) {
316
              // limit offset to avoid integer overflow
317
1.08k
              if (offset > INT_MAX)
318
352
                offset = INT_MAX;
319
1.08k
              cookie->maxage = time(NULL) + offset;
320
1.08k
            } else
321
735
              cookie->maxage = 0;
322
6.34k
          } else if (!wget_strcasecmp_ascii(name, "domain")) {
323
2.29k
            if (p != s) {
324
1.91k
              if (*p == '.') { // RFC 6265 5.2.3
325
1.07k
                do { p++; } while (*p == '.');
326
612
                cookie->domain_dot = 1;
327
612
              } else
328
1.30k
                cookie->domain_dot = 0;
329
330
              // remove trailing whitespace from attribute value
331
2.29k
              while (s > p && c_isspace(s[-1])) s--;
332
333
1.91k
              xfree(cookie->domain);
334
1.91k
              cookie->domain = wget_strmemdup(p, s - p);
335
1.91k
            }
336
4.04k
          } else if (!wget_strcasecmp_ascii(name, "path")) {
337
            // remove trailing whitespace from attribute value
338
2.42k
            while (s > p && c_isspace(s[-1])) s--;
339
340
1.56k
            xfree(cookie->path);
341
1.56k
            cookie->path = wget_strmemdup(p, s - p);
342
2.48k
          } else if (!wget_strcasecmp_ascii(name, "secure")) {
343
            // here we ignore the value
344
384
            cookie->secure_only = 1;
345
2.09k
          } else if (!wget_strcasecmp_ascii(name, "httponly")) {
346
            // here we ignore the value
347
384
            cookie->http_only = 1;
348
1.71k
          } else {
349
1.71k
            debug_printf("Unsupported cookie-av '%s'\n", name);
350
1.71k
          }
351
15.1k
        } else if (!wget_strcasecmp_ascii(name, "secure")) {
352
482
          cookie->secure_only = 1;
353
4.70k
        } else if (!wget_strcasecmp_ascii(name, "httponly")) {
354
406
          cookie->http_only = 1;
355
4.29k
        } else {
356
4.29k
          debug_printf("Unsupported cookie-av '%s'\n", name);
357
4.29k
        }
358
359
20.2k
        xfree(name);
360
20.2k
      }
361
20.2k
    } while (*s);
362
363
5.59k
  } else {
364
1.02k
    wget_cookie_free(&cookie);
365
1.02k
    error_printf(_("Cookie without name or assignment ignored\n"));
366
1.02k
  }
367
368
6.62k
  if (_cookie)
369
6.62k
    *_cookie = cookie;
370
0
  else
371
0
    wget_cookie_free(&cookie);
372
373
6.62k
  return s;
374
6.62k
}
375
376
// normalize/sanitize and store cookies
377
static int cookie_normalize_cookie(const wget_iri *iri, wget_cookie *cookie)
378
3.32k
{
379
/*
380
  debug_printf("normalize cookie %s=%s\n", cookie->name, cookie->value);
381
  debug_printf("<  %s=%s\n", cookie->name, cookie->value);
382
  debug_printf("<  expires=%lld max-age=%lld\n", (long long)cookie->expires, (long long)cookie->maxage);
383
  debug_printf("<  domain=%s\n", cookie->domain);
384
  debug_printf("<  path=%s\n", cookie->path);
385
  debug_printf("<  normalized=%d persistent=%d hostonly=%d secure=%d httponly=%d\n",
386
    cookie->normalized, cookie->persistent, cookie->host_only, cookie->secure_only, cookie->http_only);
387
*/
388
3.32k
  if (!cookie)
389
0
    return -1;
390
391
3.32k
  cookie->normalized = 0;
392
393
3.32k
  if (cookie->maxage)
394
342
    cookie->expires = cookie->maxage;
395
396
3.32k
  cookie->persistent = cookie->expires != 0;
397
398
  // convert domain to lowercase
399
3.32k
  wget_strtolower((char *)cookie->domain);
400
401
3.32k
  if (iri) {
402
    // cookies comes from a HTTP header and needs checking
403
404
    // RFC 6265 4.1.2.5. The Secure Attribute
405
    // If the secure-only-flag is true, then the user agent MUST NOT save the
406
    // cookie unless the request-uri's scheme is https.
407
3.11k
    if (cookie->secure_only && iri->scheme != WGET_IRI_SCHEME_HTTPS) {
408
38
      debug_printf("Secure cookie requires secure origin: %s %s\n", cookie->name, iri->host);
409
38
      return -1; // ignore cookie
410
38
    }
411
412
    // check prefixes as proposed in https://tools.ietf.org/html/draft-ietf-httpbis-cookie-prefixes-00
413
3.07k
    if (!wget_strncmp(cookie->name, "__Secure-", 9)) {
414
4
      if (!cookie->secure_only || iri->scheme != WGET_IRI_SCHEME_HTTPS) {
415
4
        debug_printf("Cookie prefix requires secure origin: %s %s\n", cookie->name, iri->host);
416
4
        return -1; // ignore cookie
417
4
      }
418
4
    }
419
3.06k
    else if (!wget_strncmp(cookie->name, "__Host-", 7)) {
420
2
      if (!cookie->secure_only || iri->scheme != WGET_IRI_SCHEME_HTTPS) {
421
2
        debug_printf("Cookie prefix requires secure origin: %s %s\n", cookie->name, iri->host);
422
2
        return -1; // ignore cookie
423
2
      }
424
0
      if (!cookie->host_only) {
425
0
        debug_printf("Cookie prefix requires hostonly flag: %s %s\n", cookie->name, iri->host);
426
0
        return -1; // ignore cookie
427
0
      }
428
0
      if (wget_strcmp(cookie->path, "/")) {
429
0
        debug_printf("Cookie prefix requires path \"/\": %s %s\n", cookie->name, iri->host);
430
0
        return -1; // ignore cookie
431
0
      }
432
0
    }
433
434
3.06k
    if (cookie->domain && *cookie->domain) {
435
600
      if (!strcmp(cookie->domain, iri->host)) {
436
4
        cookie->host_only = 1;
437
596
      } else if (cookie_domain_match(cookie->domain, iri->host)) {
438
4
        cookie->host_only = 0;
439
592
      } else {
440
592
        debug_printf("Domain mismatch: %s %s\n", cookie->domain, iri->host);
441
592
        return -1; // ignore cookie
442
592
      }
443
2.46k
    } else {
444
2.46k
      xfree(cookie->domain);
445
2.46k
      cookie->domain = wget_strdup(iri->host);
446
2.46k
      cookie->host_only = 1;
447
2.46k
    }
448
449
2.47k
    if (!cookie->path || *cookie->path != '/') {
450
2.40k
      const char *p = iri->path ? strrchr(iri->path, '/') : NULL;
451
452
2.40k
      xfree(cookie->path);
453
454
2.40k
      if (p && p != iri->path) {
455
0
        cookie->path = wget_strmemdup(iri->path, p - iri->path);
456
2.40k
      } else {
457
2.40k
        cookie->path = wget_strdup("/");
458
        // err_printf(_("Unexpected URI without '/': %s\n"), iri->path);
459
        // return -1; // ignore cookie
460
2.40k
      }
461
2.40k
    }
462
2.47k
  }
463
464
2.68k
  cookie->normalized = 1;
465
466
/*
467
  debug_printf(">  %s=%s\n", cookie->name, cookie->value);
468
  debug_printf(">  expires=%lld max-age=%lld\n", (long long)cookie->expires, (long long)cookie->maxage);
469
  debug_printf(">  domain=%s\n", cookie->domain);
470
  debug_printf(">  path=%s\n", cookie->path);
471
  debug_printf(">  normalized=%d persistent=%d hostonly=%d secure=%d httponly=%d\n",
472
    cookie->normalized, cookie->persistent, cookie->host_only, cookie->secure_only, cookie->http_only);
473
*/
474
475
2.68k
  return 0;
476
3.32k
}
477
478
int wget_cookie_normalize(const wget_iri *iri, wget_cookie *cookie)
479
1.77k
{
480
//  wget_thread_mutex_lock(&_cookies_mutex);
481
482
1.77k
  int ret = cookie_normalize_cookie(iri, cookie);
483
484
//  wget_thread_mutex_unlock(&_cookies_mutex);
485
486
1.77k
  return ret;
487
1.77k
}
488
489
void wget_cookie_normalize_cookies(const wget_iri *iri, const wget_vector *cookies)
490
1.55k
{
491
//  wget_thread_mutex_lock(&_cookies_mutex);
492
493
3.11k
  for (int it = 0; it < wget_vector_size(cookies); it++)
494
1.55k
    cookie_normalize_cookie(iri, wget_vector_get(cookies, it));
495
496
//  wget_thread_mutex_unlock(&_cookies_mutex);
497
1.55k
}