Coverage Report

Created: 2025-03-18 06:55

/src/wget2/libwget/cookie_parse.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2012 Tim Ruehsen
3
 * Copyright (c) 2015-2024 Free Software Foundation, Inc.
4
 *
5
 * This file is part of libwget.
6
 *
7
 * Libwget is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Lesser General Public License as published by
9
 * the Free Software Foundation, either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * Libwget is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public License
18
 * along with libwget.  If not, see <https://www.gnu.org/licenses/>.
19
 *
20
 *
21
 * Cookie parsing routines
22
 *
23
 * Changelog
24
 * 23.10.2012  Tim Ruehsen  created
25
 * 14.08.2019  Tim Ruehsen  split out from cookie.c
26
 *
27
 * see https://tools.ietf.org/html/rfc6265
28
 *
29
 */
30
31
#include <config.h>
32
33
#include <stdio.h>
34
#include <stdlib.h>
35
#include <string.h>
36
#include <limits.h>
37
#include <ctype.h>
38
#include <time.h>
39
40
#include <c-ctype.h>
41
42
#include <wget.h>
43
#include "private.h"
44
#include "cookie.h"
45
46
bool cookie_domain_match(const char *domain, const char *host)
47
604
{
48
604
  size_t domain_length, host_length;
49
604
  const char *p;
50
51
604
  debug_printf("domain_match(%s,%s)", domain, host);
52
53
604
  if (!strcmp(domain, host))
54
0
    return true; // an exact match
55
56
604
  domain_length = strlen(domain);
57
604
  host_length = strlen(host);
58
59
604
  if (domain_length >= host_length)
60
426
    return false; // host is too short
61
62
178
  p = host + host_length - domain_length;
63
178
  if (!strcmp(p, domain) && p[-1] == '.')
64
4
    return true;
65
66
174
  return false;
67
178
}
68
69
bool cookie_path_match(const char *cookie_path, const char *request_path)
70
1.21k
{
71
1.21k
  const char *last_slash;
72
1.21k
  size_t cookie_path_length, iri_path_length;
73
1.21k
  bool cookie_path_slash = false;
74
75
1.21k
  if (*cookie_path == '/') {
76
1.21k
    cookie_path++;
77
1.21k
    cookie_path_slash = true;
78
1.21k
  }
79
80
1.21k
  if (request_path && *request_path == '/')
81
0
    request_path++;
82
83
1.21k
  debug_printf("path_match(/%s,/%s)\n", cookie_path, request_path ? request_path : "");
84
85
  // algorithm as described in RFC 6265 5.1.4
86
87
//  if (!request_path || *request_path != '/' || !(last_slash = strrchr(request_path + 1, '/'))) {
88
//    request_path = "/";
89
//    iri_path_length = 1;
90
1.21k
  if (!request_path || !(last_slash = strrchr(request_path, '/'))) {
91
1.21k
    request_path = "";
92
1.21k
    iri_path_length = 0;
93
1.21k
  } else {
94
0
    iri_path_length = last_slash - request_path;
95
0
  }
96
97
1.21k
  cookie_path_length = strlen(cookie_path);
98
99
1.21k
  if (iri_path_length < cookie_path_length)
100
    // cookie-path is not a prefix of request-path
101
28
    return false;
102
103
1.19k
  if (iri_path_length == 0 && cookie_path_length == 0)
104
    // slash matches slash
105
1.19k
    return true;
106
107
0
  if (!strncmp(cookie_path, request_path, cookie_path_length)) {
108
0
    if (!request_path[cookie_path_length])
109
      // the cookie-path and the request-path are identical
110
0
      return true;
111
112
0
    if ((cookie_path_length > 0 && cookie_path[cookie_path_length - 1] == '/') || cookie_path_slash)
113
      // the cookie-path is a prefix of the request-path, and the last
114
      // character of the cookie-path is %x2F ("/").
115
0
      return true;
116
117
0
    if (request_path[cookie_path_length] == '/')
118
      // the cookie-path is a prefix of the request-path, and the first
119
      // character of the request-path that is not included in the cookie-
120
      // path is a %x2F ("/") character.
121
0
      return true;
122
0
  }
123
124
0
  return false;
125
0
}
126
127
wget_cookie *wget_cookie_init(wget_cookie *cookie)
128
10.2k
{
129
10.2k
  if (!cookie) {
130
6.90k
    cookie = wget_calloc(1, sizeof(wget_cookie));
131
6.90k
    if (!cookie)
132
0
      return NULL;
133
6.90k
  } else
134
3.34k
    memset(cookie, 0, sizeof(*cookie));
135
136
10.2k
  cookie->last_access = cookie->creation = time(NULL);
137
138
10.2k
  return cookie;
139
10.2k
}
140
141
void wget_cookie_deinit(wget_cookie *cookie)
142
7.63k
{
143
7.63k
  if (cookie) {
144
7.63k
    xfree(cookie->name);
145
7.63k
    xfree(cookie->value);
146
7.63k
    xfree(cookie->domain);
147
7.63k
    xfree(cookie->path);
148
7.63k
  }
149
7.63k
}
150
151
void wget_cookie_free(wget_cookie **cookie)
152
4.29k
{
153
4.29k
  if (cookie) {
154
4.29k
    wget_cookie_deinit(*cookie);
155
4.29k
    xfree(*cookie);
156
4.29k
  }
157
4.29k
}
158
159
// for vector destruction
160
void cookie_free(void *cookie)
161
2.84k
{
162
2.84k
  if (cookie) {
163
2.84k
    wget_cookie_deinit(cookie);
164
2.84k
    xfree(cookie);
165
2.84k
  }
166
2.84k
}
167
168
/*
169
int wget_cookie_equals(wget_cookie *cookie1, wget_cookie *cookie2)
170
{
171
  if (!cookie1)
172
    return !cookie2;
173
174
  if (!cookie2)
175
    return 0;
176
177
  if (wget_strcmp(cookie1->name, cookie2->name) ||
178
    wget_strcmp(cookie1->value, cookie2->value) ||
179
    wget_strcmp(cookie1->domain, cookie2->domain) ||
180
    wget_strcmp(cookie1->path, cookie2->path) ||
181
    cookie1->domain_dot != cookie2->domain_dot ||
182
    cookie1->normalized != cookie2->normalized ||
183
    cookie1->persistent != cookie2->persistent ||
184
    cookie1->host_only != cookie2->host_only ||
185
    cookie1->secure_only != cookie2->secure_only ||
186
    cookie1->http_only != cookie2->http_only)
187
  {
188
    return 0;
189
  }
190
191
  return 1;
192
}
193
*/
194
195
char *wget_cookie_to_setcookie(wget_cookie *cookie)
196
3.36k
{
197
3.36k
  char expires[32] = "";
198
199
3.36k
  if (!cookie)
200
1.74k
    return wget_strdup("(null)");
201
202
1.61k
  if (cookie->expires)
203
111
    wget_http_print_date(cookie->expires, expires, sizeof(expires)); // date format from RFC 6265
204
205
1.61k
  return wget_aprintf("%s=%s%s%s%s%s; domain=%s%s%s%s",
206
1.61k
    cookie->name, cookie->value,
207
1.61k
    *expires ? "; expires=" : "", *expires ? expires : "",
208
1.61k
    cookie->path ? "; path=" : "", cookie->path ? cookie->path : "",
209
1.61k
    cookie->host_only ? "" : ".", cookie->domain,
210
1.61k
    cookie->http_only ? "; HttpOnly" : "",
211
1.61k
    cookie->secure_only ? "; Secure" : "");
212
3.36k
}
213
214
/*
215
 RFC 6265
216
217
 set-cookie-header = "Set-Cookie:" SP set-cookie-string
218
 set-cookie-string = cookie-pair *( ";" SP cookie-av )
219
 cookie-pair       = cookie-name "=" cookie-value
220
 cookie-name       = token
221
 cookie-value      = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE )
222
 cookie-octet      = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E
223
                       ; US-ASCII characters excluding CTLs,
224
                       ; whitespace DQUOTE, comma, semicolon,
225
                       ; and backslash
226
 token             = <token, defined in [RFC2616], Section 2.2>
227
228
 cookie-av         = expires-av / max-age-av / domain-av /
229
                     path-av / secure-av / httponly-av /
230
                     extension-av
231
 expires-av        = "Expires=" sane-cookie-date
232
 sane-cookie-date  = <rfc1123-date, defined in [RFC2616], Section 3.3.1>
233
 max-age-av        = "Max-Age=" non-zero-digit *DIGIT
234
                       ; In practice, both expires-av and max-age-av
235
                       ; are limited to dates representable by the
236
                       ; user agent.
237
 non-zero-digit    = %x31-39
238
                       ; digits 1 through 9
239
 domain-av         = "Domain=" domain-value
240
 domain-value      = <subdomain>
241
                       ; defined in [RFC1034], Section 3.5, as
242
                       ; enhanced by [RFC1123], Section 2.1
243
 path-av           = "Path=" path-value
244
 path-value        = <any CHAR except CTLs or ";">
245
 secure-av         = "Secure"
246
 httponly-av       = "HttpOnly"
247
 extension-av      = <any CHAR except CTLs or ";">
248
*/
249
const char *wget_cookie_parse_setcookie(const char *s, wget_cookie **_cookie)
250
6.90k
{
251
6.90k
  const char *name, *p;
252
6.90k
  wget_cookie *cookie = wget_cookie_init(NULL);
253
254
  // remove leading whitespace from cookie name
255
7.61k
  while (c_isspace(*s)) s++;
256
257
  // s = wget_http_parse_token(s, &cookie->name);
258
  // also accept UTF-8 (NON-ASCII) characters in cookie name
259
25.2k
  for (p = s; (*s >= 32 && *s <= 126 && *s != '=' && *s != ';') || *s < 0; s++);
260
261
  // remove trailing whitespace from cookie name
262
7.48k
  while (s > p && c_isspace(s[-1])) s--;
263
6.90k
  cookie->name = wget_strmemdup(p, s - p);
264
265
  // advance to next delimiter
266
8.06k
  while (c_isspace(*s)) s++;
267
268
6.90k
  if (cookie->name && *cookie->name && *s == '=') {
269
    // *cookie-octet / ( DQUOTE *cookie-octet DQUOTE )
270
271
    // skip over delimiter and remove leading whitespace from cookie value
272
8.24k
    for (s++; c_isspace(*s);) s++;
273
274
/* RFC compliance is too strict
275
    if (*s == '\"')
276
      s++;
277
    // cookie-octet      = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E
278
    for (p = s; *s > 32 && *s <= 126 && *s != '\\' && *s != ',' && *s != ';' && *s != '\"'; s++);
279
*/
280
281
    // also accept UTF-8 (NON-ASCII) characters in cookie value
282
22.0k
    for (p = s; (*s >= 32 && *s <= 126 && *s != ';') || *s < 0; s++);
283
284
    // remove trailing whitespace from cookie value
285
6.27k
    while (s > p && c_isspace(s[-1])) s--;
286
287
5.69k
    cookie->value = wget_strmemdup(p, s - p);
288
289
23.2k
    do {
290
      // find next delimiter
291
30.2k
      while (*s && *s != ';') s++;
292
23.2k
      if (!*s) break;
293
294
      // skip delimiter and remove leading spaces from attribute name
295
22.5k
      for (s++; c_isspace(*s);) s++;
296
21.4k
      if (!*s) break;
297
298
21.1k
      s = wget_http_parse_token(s, &name);
299
300
21.1k
      if (name) {
301
        // find next delimiter
302
24.6k
        while (*s && *s != '=' && *s != ';') s++;
303
        // if (!*s) break;
304
305
21.1k
        if (*s == '=') {
306
          // find end of value
307
17.1k
          for (s++; c_isspace(*s);) s++;
308
77.5k
          for (p = s; (*s >= 32 && *s <= 126 && *s != ';') || *s < 0; s++);
309
310
15.7k
          if (!wget_strcasecmp_ascii(name, "expires")) {
311
7.23k
            cookie->expires = wget_http_parse_full_date(p);
312
8.55k
          } else if (!wget_strcasecmp_ascii(name, "max-age")) {
313
1.79k
            long offset = atol(p);
314
315
1.79k
            if (offset > 0) {
316
              // limit offset to avoid integer overflow
317
1.15k
              if (offset > INT_MAX)
318
366
                offset = INT_MAX;
319
1.15k
              cookie->maxage = time(NULL) + offset;
320
1.15k
            } else
321
639
              cookie->maxage = 0;
322
6.76k
          } else if (!wget_strcasecmp_ascii(name, "domain")) {
323
2.82k
            if (p != s) {
324
2.39k
              if (*p == '.') { // RFC 6265 5.2.3
325
1.27k
                do { p++; } while (*p == '.');
326
612
                cookie->domain_dot = 1;
327
612
              } else
328
1.78k
                cookie->domain_dot = 0;
329
330
              // remove trailing whitespace from attribute value
331
2.84k
              while (s > p && c_isspace(s[-1])) s--;
332
333
2.39k
              xfree(cookie->domain);
334
2.39k
              cookie->domain = wget_strmemdup(p, s - p);
335
2.39k
            }
336
3.94k
          } else if (!wget_strcasecmp_ascii(name, "path")) {
337
            // remove trailing whitespace from attribute value
338
2.26k
            while (s > p && c_isspace(s[-1])) s--;
339
340
1.51k
            xfree(cookie->path);
341
1.51k
            cookie->path = wget_strmemdup(p, s - p);
342
2.42k
          } else if (!wget_strcasecmp_ascii(name, "secure")) {
343
            // here we ignore the value
344
384
            cookie->secure_only = 1;
345
2.04k
          } else if (!wget_strcasecmp_ascii(name, "httponly")) {
346
            // here we ignore the value
347
384
            cookie->http_only = 1;
348
1.65k
          } else {
349
1.65k
            debug_printf("Unsupported cookie-av '%s'\n", name);
350
1.65k
          }
351
15.7k
        } else if (!wget_strcasecmp_ascii(name, "secure")) {
352
416
          cookie->secure_only = 1;
353
4.96k
        } else if (!wget_strcasecmp_ascii(name, "httponly")) {
354
396
          cookie->http_only = 1;
355
4.56k
        } else {
356
4.56k
          debug_printf("Unsupported cookie-av '%s'\n", name);
357
4.56k
        }
358
359
21.1k
        xfree(name);
360
21.1k
      }
361
21.1k
    } while (*s);
362
363
5.69k
  } else {
364
1.20k
    wget_cookie_free(&cookie);
365
1.20k
    error_printf(_("Cookie without name or assignment ignored\n"));
366
1.20k
  }
367
368
6.90k
  if (_cookie)
369
6.90k
    *_cookie = cookie;
370
0
  else
371
0
    wget_cookie_free(&cookie);
372
373
6.90k
  return s;
374
6.90k
}
375
376
// normalize/sanitize and store cookies
377
static int cookie_normalize_cookie(const wget_iri *iri, wget_cookie *cookie)
378
3.45k
{
379
/*
380
  debug_printf("normalize cookie %s=%s\n", cookie->name, cookie->value);
381
  debug_printf("<  %s=%s\n", cookie->name, cookie->value);
382
  debug_printf("<  expires=%lld max-age=%lld\n", (long long)cookie->expires, (long long)cookie->maxage);
383
  debug_printf("<  domain=%s\n", cookie->domain);
384
  debug_printf("<  path=%s\n", cookie->path);
385
  debug_printf("<  normalized=%d persistent=%d hostonly=%d secure=%d httponly=%d\n",
386
    cookie->normalized, cookie->persistent, cookie->host_only, cookie->secure_only, cookie->http_only);
387
*/
388
3.45k
  if (!cookie)
389
0
    return -1;
390
391
3.45k
  cookie->normalized = 0;
392
393
3.45k
  if (cookie->maxage)
394
384
    cookie->expires = cookie->maxage;
395
396
3.45k
  cookie->persistent = cookie->expires != 0;
397
398
  // convert domain to lowercase
399
3.45k
  wget_strtolower((char *)cookie->domain);
400
401
3.45k
  if (iri) {
402
    // cookies comes from a HTTP header and needs checking
403
404
    // check prefixes as proposed in https://tools.ietf.org/html/draft-ietf-httpbis-cookie-prefixes-00
405
3.22k
    if (!wget_strncmp(cookie->name, "__Secure-", 9)) {
406
4
      if (!cookie->secure_only || iri->scheme != WGET_IRI_SCHEME_HTTPS) {
407
4
        debug_printf("Cookie prefix requires secure origin: %s %s\n", cookie->name, iri->host);
408
4
        return -1; // ignore cookie
409
4
      }
410
4
    }
411
3.21k
    else if (!wget_strncmp(cookie->name, "__Host-", 7)) {
412
4
      if (!cookie->secure_only || iri->scheme != WGET_IRI_SCHEME_HTTPS) {
413
4
        debug_printf("Cookie prefix requires secure origin: %s %s\n", cookie->name, iri->host);
414
4
        return -1; // ignore cookie
415
4
      }
416
0
      if (!cookie->host_only) {
417
0
        debug_printf("Cookie prefix requires hostonly flag: %s %s\n", cookie->name, iri->host);
418
0
        return -1; // ignore cookie
419
0
      }
420
0
      if (wget_strcmp(cookie->path, "/")) {
421
0
        debug_printf("Cookie prefix requires path \"/\": %s %s\n", cookie->name, iri->host);
422
0
        return -1; // ignore cookie
423
0
      }
424
0
    }
425
426
3.21k
    if (cookie->domain && *cookie->domain) {
427
610
      if (!strcmp(cookie->domain, iri->host)) {
428
6
        cookie->host_only = 1;
429
604
      } else if (cookie_domain_match(cookie->domain, iri->host)) {
430
4
        cookie->host_only = 0;
431
600
      } else {
432
600
        debug_printf("Domain mismatch: %s %s\n", cookie->domain, iri->host);
433
600
        return -1; // ignore cookie
434
600
      }
435
2.60k
    } else {
436
2.60k
      xfree(cookie->domain);
437
2.60k
      cookie->domain = wget_strdup(iri->host);
438
2.60k
      cookie->host_only = 1;
439
2.60k
    }
440
441
2.61k
    if (!cookie->path || *cookie->path != '/') {
442
2.55k
      const char *p = iri->path ? strrchr(iri->path, '/') : NULL;
443
444
2.55k
      xfree(cookie->path);
445
446
2.55k
      if (p && p != iri->path) {
447
0
        cookie->path = wget_strmemdup(iri->path, p - iri->path);
448
2.55k
      } else {
449
2.55k
        cookie->path = wget_strdup("/");
450
        // err_printf(_("Unexpected URI without '/': %s\n"), iri->path);
451
        // return -1; // ignore cookie
452
2.55k
      }
453
2.55k
    }
454
2.61k
  }
455
456
2.84k
  cookie->normalized = 1;
457
458
/*
459
  debug_printf(">  %s=%s\n", cookie->name, cookie->value);
460
  debug_printf(">  expires=%lld max-age=%lld\n", (long long)cookie->expires, (long long)cookie->maxage);
461
  debug_printf(">  domain=%s\n", cookie->domain);
462
  debug_printf(">  path=%s\n", cookie->path);
463
  debug_printf(">  normalized=%d persistent=%d hostonly=%d secure=%d httponly=%d\n",
464
    cookie->normalized, cookie->persistent, cookie->host_only, cookie->secure_only, cookie->http_only);
465
*/
466
467
2.84k
  return 0;
468
3.45k
}
469
470
int wget_cookie_normalize(const wget_iri *iri, wget_cookie *cookie)
471
1.84k
{
472
//  wget_thread_mutex_lock(&_cookies_mutex);
473
474
1.84k
  int ret = cookie_normalize_cookie(iri, cookie);
475
476
//  wget_thread_mutex_unlock(&_cookies_mutex);
477
478
1.84k
  return ret;
479
1.84k
}
480
481
void wget_cookie_normalize_cookies(const wget_iri *iri, const wget_vector *cookies)
482
1.61k
{
483
//  wget_thread_mutex_lock(&_cookies_mutex);
484
485
3.22k
  for (int it = 0; it < wget_vector_size(cookies); it++)
486
1.61k
    cookie_normalize_cookie(iri, wget_vector_get(cookies, it));
487
488
//  wget_thread_mutex_unlock(&_cookies_mutex);
489
1.61k
}