Coverage Report

Created: 2024-03-08 06:32

/src/wget2/libwget/cookie_parse.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2012 Tim Ruehsen
3
 * Copyright (c) 2015-2024 Free Software Foundation, Inc.
4
 *
5
 * This file is part of libwget.
6
 *
7
 * Libwget is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Lesser General Public License as published by
9
 * the Free Software Foundation, either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * Libwget is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public License
18
 * along with libwget.  If not, see <https://www.gnu.org/licenses/>.
19
 *
20
 *
21
 * Cookie parsing routines
22
 *
23
 * Changelog
24
 * 23.10.2012  Tim Ruehsen  created
25
 * 14.08.2019  Tim Ruehsen  split out from cookie.c
26
 *
27
 * see https://tools.ietf.org/html/rfc6265
28
 *
29
 */
30
31
#include <config.h>
32
33
#include <stdio.h>
34
#include <stdlib.h>
35
#include <string.h>
36
#include <limits.h>
37
#include <ctype.h>
38
#include <time.h>
39
40
#include <c-ctype.h>
41
42
#include <wget.h>
43
#include "private.h"
44
#include "cookie.h"
45
46
bool cookie_domain_match(const char *domain, const char *host)
47
0
{
48
0
  size_t domain_length, host_length;
49
0
  const char *p;
50
51
0
  debug_printf("domain_match(%s,%s)", domain, host);
52
53
0
  if (!strcmp(domain, host))
54
0
    return true; // an exact match
55
56
0
  domain_length = strlen(domain);
57
0
  host_length = strlen(host);
58
59
0
  if (domain_length >= host_length)
60
0
    return false; // host is too short
61
62
0
  p = host + host_length - domain_length;
63
0
  if (!strcmp(p, domain) && p[-1] == '.')
64
0
    return true;
65
66
0
  return false;
67
0
}
68
69
bool cookie_path_match(const char *cookie_path, const char *request_path)
70
0
{
71
0
  const char *last_slash;
72
0
  size_t cookie_path_length, iri_path_length;
73
0
  bool cookie_path_slash = false;
74
75
0
  if (*cookie_path == '/') {
76
0
    cookie_path++;
77
0
    cookie_path_slash = true;
78
0
  }
79
80
0
  if (request_path && *request_path == '/')
81
0
    request_path++;
82
83
0
  debug_printf("path_match(/%s,/%s)\n", cookie_path, request_path ? request_path : "");
84
85
  // algorithm as described in RFC 6265 5.1.4
86
87
//  if (!request_path || *request_path != '/' || !(last_slash = strrchr(request_path + 1, '/'))) {
88
//    request_path = "/";
89
//    iri_path_length = 1;
90
0
  if (!request_path || !(last_slash = strrchr(request_path, '/'))) {
91
0
    request_path = "";
92
0
    iri_path_length = 0;
93
0
  } else {
94
0
    iri_path_length = last_slash - request_path;
95
0
  }
96
97
0
  cookie_path_length = strlen(cookie_path);
98
99
0
  if (iri_path_length < cookie_path_length)
100
    // cookie-path is not a prefix of request-path
101
0
    return false;
102
103
0
  if (iri_path_length == 0 && cookie_path_length == 0)
104
    // slash matches slash
105
0
    return true;
106
107
0
  if (!strncmp(cookie_path, request_path, cookie_path_length)) {
108
0
    if (!request_path[cookie_path_length])
109
      // the cookie-path and the request-path are identical
110
0
      return true;
111
112
0
    if ((cookie_path_length > 0 && cookie_path[cookie_path_length - 1] == '/') || cookie_path_slash)
113
      // the cookie-path is a prefix of the request-path, and the last
114
      // character of the cookie-path is %x2F ("/").
115
0
      return true;
116
117
0
    if (request_path[cookie_path_length] == '/')
118
      // the cookie-path is a prefix of the request-path, and the first
119
      // character of the request-path that is not included in the cookie-
120
      // path is a %x2F ("/") character.
121
0
      return true;
122
0
  }
123
124
0
  return false;
125
0
}
126
127
wget_cookie *wget_cookie_init(wget_cookie *cookie)
128
0
{
129
0
  if (!cookie) {
130
0
    cookie = wget_calloc(1, sizeof(wget_cookie));
131
0
    if (!cookie)
132
0
      return NULL;
133
0
  } else
134
0
    memset(cookie, 0, sizeof(*cookie));
135
136
0
  cookie->last_access = cookie->creation = time(NULL);
137
138
0
  return cookie;
139
0
}
140
141
void wget_cookie_deinit(wget_cookie *cookie)
142
0
{
143
0
  if (cookie) {
144
0
    xfree(cookie->name);
145
0
    xfree(cookie->value);
146
0
    xfree(cookie->domain);
147
0
    xfree(cookie->path);
148
0
  }
149
0
}
150
151
void wget_cookie_free(wget_cookie **cookie)
152
0
{
153
0
  if (cookie) {
154
0
    wget_cookie_deinit(*cookie);
155
0
    xfree(*cookie);
156
0
  }
157
0
}
158
159
// for vector destruction
160
void cookie_free(void *cookie)
161
0
{
162
0
  if (cookie) {
163
0
    wget_cookie_deinit(cookie);
164
0
    xfree(cookie);
165
0
  }
166
0
}
167
168
/*
169
int wget_cookie_equals(wget_cookie *cookie1, wget_cookie *cookie2)
170
{
171
  if (!cookie1)
172
    return !cookie2;
173
174
  if (!cookie2)
175
    return 0;
176
177
  if (wget_strcmp(cookie1->name, cookie2->name) ||
178
    wget_strcmp(cookie1->value, cookie2->value) ||
179
    wget_strcmp(cookie1->domain, cookie2->domain) ||
180
    wget_strcmp(cookie1->path, cookie2->path) ||
181
    cookie1->domain_dot != cookie2->domain_dot ||
182
    cookie1->normalized != cookie2->normalized ||
183
    cookie1->persistent != cookie2->persistent ||
184
    cookie1->host_only != cookie2->host_only ||
185
    cookie1->secure_only != cookie2->secure_only ||
186
    cookie1->http_only != cookie2->http_only)
187
  {
188
    return 0;
189
  }
190
191
  return 1;
192
}
193
*/
194
195
char *wget_cookie_to_setcookie(wget_cookie *cookie)
196
0
{
197
0
  char expires[32] = "";
198
199
0
  if (!cookie)
200
0
    return wget_strdup("(null)");
201
202
0
  if (cookie->expires)
203
0
    wget_http_print_date(cookie->expires, expires, sizeof(expires)); // date format from RFC 6265
204
205
0
  return wget_aprintf("%s=%s%s%s%s%s; domain=%s%s%s%s",
206
0
    cookie->name, cookie->value,
207
0
    *expires ? "; expires=" : "", *expires ? expires : "",
208
0
    cookie->path ? "; path=" : "", cookie->path ? cookie->path : "",
209
0
    cookie->host_only ? "" : ".", cookie->domain,
210
0
    cookie->http_only ? "; HttpOnly" : "",
211
0
    cookie->secure_only ? "; Secure" : "");
212
0
}
213
214
/*
215
 RFC 6265
216
217
 set-cookie-header = "Set-Cookie:" SP set-cookie-string
218
 set-cookie-string = cookie-pair *( ";" SP cookie-av )
219
 cookie-pair       = cookie-name "=" cookie-value
220
 cookie-name       = token
221
 cookie-value      = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE )
222
 cookie-octet      = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E
223
                       ; US-ASCII characters excluding CTLs,
224
                       ; whitespace DQUOTE, comma, semicolon,
225
                       ; and backslash
226
 token             = <token, defined in [RFC2616], Section 2.2>
227
228
 cookie-av         = expires-av / max-age-av / domain-av /
229
                     path-av / secure-av / httponly-av /
230
                     extension-av
231
 expires-av        = "Expires=" sane-cookie-date
232
 sane-cookie-date  = <rfc1123-date, defined in [RFC2616], Section 3.3.1>
233
 max-age-av        = "Max-Age=" non-zero-digit *DIGIT
234
                       ; In practice, both expires-av and max-age-av
235
                       ; are limited to dates representable by the
236
                       ; user agent.
237
 non-zero-digit    = %x31-39
238
                       ; digits 1 through 9
239
 domain-av         = "Domain=" domain-value
240
 domain-value      = <subdomain>
241
                       ; defined in [RFC1034], Section 3.5, as
242
                       ; enhanced by [RFC1123], Section 2.1
243
 path-av           = "Path=" path-value
244
 path-value        = <any CHAR except CTLs or ";">
245
 secure-av         = "Secure"
246
 httponly-av       = "HttpOnly"
247
 extension-av      = <any CHAR except CTLs or ";">
248
*/
249
const char *wget_cookie_parse_setcookie(const char *s, wget_cookie **_cookie)
250
0
{
251
0
  const char *name, *p;
252
0
  wget_cookie *cookie = wget_cookie_init(NULL);
253
254
  // remove leading whitespace from cookie name
255
0
  while (c_isspace(*s)) s++;
256
257
  // s = wget_http_parse_token(s, &cookie->name);
258
  // also accept UTF-8 (NON-ASCII) characters in cookie name
259
0
  for (p = s; (*s >= 32 && *s <= 126 && *s != '=' && *s != ';') || *s < 0; s++);
260
261
  // remove trailing whitespace from cookie name
262
0
  while (s > p && c_isspace(s[-1])) s--;
263
0
  cookie->name = wget_strmemdup(p, s - p);
264
265
  // advance to next delimiter
266
0
  while (c_isspace(*s)) s++;
267
268
0
  if (cookie->name && *cookie->name && *s == '=') {
269
    // *cookie-octet / ( DQUOTE *cookie-octet DQUOTE )
270
271
    // skip over delimiter and remove leading whitespace from cookie value
272
0
    for (s++; c_isspace(*s);) s++;
273
274
/* RFC compliance is too strict
275
    if (*s == '\"')
276
      s++;
277
    // cookie-octet      = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E
278
    for (p = s; *s > 32 && *s <= 126 && *s != '\\' && *s != ',' && *s != ';' && *s != '\"'; s++);
279
*/
280
281
    // also accept UTF-8 (NON-ASCII) characters in cookie value
282
0
    for (p = s; (*s >= 32 && *s <= 126 && *s != ';') || *s < 0; s++);
283
284
    // remove trailing whitespace from cookie value
285
0
    while (s > p && c_isspace(s[-1])) s--;
286
287
0
    cookie->value = wget_strmemdup(p, s - p);
288
289
0
    do {
290
      // find next delimiter
291
0
      while (*s && *s != ';') s++;
292
0
      if (!*s) break;
293
294
      // skip delimiter and remove leading spaces from attribute name
295
0
      for (s++; c_isspace(*s);) s++;
296
0
      if (!*s) break;
297
298
0
      s = wget_http_parse_token(s, &name);
299
300
0
      if (name) {
301
        // find next delimiter
302
0
        while (*s && *s != '=' && *s != ';') s++;
303
        // if (!*s) break;
304
305
0
        if (*s == '=') {
306
          // find end of value
307
0
          for (s++; c_isspace(*s);) s++;
308
0
          for (p = s; (*s >= 32 && *s <= 126 && *s != ';') || *s < 0; s++);
309
310
0
          if (!wget_strcasecmp_ascii(name, "expires")) {
311
0
            cookie->expires = wget_http_parse_full_date(p);
312
0
          } else if (!wget_strcasecmp_ascii(name, "max-age")) {
313
0
            long offset = atol(p);
314
315
0
            if (offset > 0) {
316
              // limit offset to avoid integer overflow
317
0
              if (offset > INT_MAX)
318
0
                offset = INT_MAX;
319
0
              cookie->maxage = time(NULL) + offset;
320
0
            } else
321
0
              cookie->maxage = 0;
322
0
          } else if (!wget_strcasecmp_ascii(name, "domain")) {
323
0
            if (p != s) {
324
0
              if (*p == '.') { // RFC 6265 5.2.3
325
0
                do { p++; } while (*p == '.');
326
0
                cookie->domain_dot = 1;
327
0
              } else
328
0
                cookie->domain_dot = 0;
329
330
              // remove trailing whitespace from attribute value
331
0
              while (s > p && c_isspace(s[-1])) s--;
332
333
0
              xfree(cookie->domain);
334
0
              cookie->domain = wget_strmemdup(p, s - p);
335
0
            }
336
0
          } else if (!wget_strcasecmp_ascii(name, "path")) {
337
            // remove trailing whitespace from attribute value
338
0
            while (s > p && c_isspace(s[-1])) s--;
339
340
0
            xfree(cookie->path);
341
0
            cookie->path = wget_strmemdup(p, s - p);
342
0
          } else if (!wget_strcasecmp_ascii(name, "secure")) {
343
            // here we ignore the value
344
0
            cookie->secure_only = 1;
345
0
          } else if (!wget_strcasecmp_ascii(name, "httponly")) {
346
            // here we ignore the value
347
0
            cookie->http_only = 1;
348
0
          } else {
349
0
            debug_printf("Unsupported cookie-av '%s'\n", name);
350
0
          }
351
0
        } else if (!wget_strcasecmp_ascii(name, "secure")) {
352
0
          cookie->secure_only = 1;
353
0
        } else if (!wget_strcasecmp_ascii(name, "httponly")) {
354
0
          cookie->http_only = 1;
355
0
        } else {
356
0
          debug_printf("Unsupported cookie-av '%s'\n", name);
357
0
        }
358
359
0
        xfree(name);
360
0
      }
361
0
    } while (*s);
362
363
0
  } else {
364
0
    wget_cookie_free(&cookie);
365
0
    error_printf(_("Cookie without name or assignment ignored\n"));
366
0
  }
367
368
0
  if (_cookie)
369
0
    *_cookie = cookie;
370
0
  else
371
0
    wget_cookie_free(&cookie);
372
373
0
  return s;
374
0
}
375
376
// normalize/sanitize and store cookies
377
static int cookie_normalize_cookie(const wget_iri *iri, wget_cookie *cookie)
378
0
{
379
/*
380
  debug_printf("normalize cookie %s=%s\n", cookie->name, cookie->value);
381
  debug_printf("<  %s=%s\n", cookie->name, cookie->value);
382
  debug_printf("<  expires=%lld max-age=%lld\n", (long long)cookie->expires, (long long)cookie->maxage);
383
  debug_printf("<  domain=%s\n", cookie->domain);
384
  debug_printf("<  path=%s\n", cookie->path);
385
  debug_printf("<  normalized=%d persistent=%d hostonly=%d secure=%d httponly=%d\n",
386
    cookie->normalized, cookie->persistent, cookie->host_only, cookie->secure_only, cookie->http_only);
387
*/
388
0
  if (!cookie)
389
0
    return -1;
390
391
0
  cookie->normalized = 0;
392
393
0
  if (cookie->maxage)
394
0
    cookie->expires = cookie->maxage;
395
396
0
  cookie->persistent = cookie->expires != 0;
397
398
  // convert domain to lowercase
399
0
  wget_strtolower((char *)cookie->domain);
400
401
0
  if (iri) {
402
    // cookies comes from a HTTP header and needs checking
403
404
    // check prefixes as proposed in https://tools.ietf.org/html/draft-ietf-httpbis-cookie-prefixes-00
405
0
    if (!wget_strncmp(cookie->name, "__Secure-", 9)) {
406
0
      if (!cookie->secure_only || iri->scheme != WGET_IRI_SCHEME_HTTPS) {
407
0
        debug_printf("Cookie prefix requires secure origin: %s %s\n", cookie->name, iri->host);
408
0
        return -1; // ignore cookie
409
0
      }
410
0
    }
411
0
    else if (!wget_strncmp(cookie->name, "__Host-", 7)) {
412
0
      if (!cookie->secure_only || iri->scheme != WGET_IRI_SCHEME_HTTPS) {
413
0
        debug_printf("Cookie prefix requires secure origin: %s %s\n", cookie->name, iri->host);
414
0
        return -1; // ignore cookie
415
0
      }
416
0
      if (!cookie->host_only) {
417
0
        debug_printf("Cookie prefix requires hostonly flag: %s %s\n", cookie->name, iri->host);
418
0
        return -1; // ignore cookie
419
0
      }
420
0
      if (wget_strcmp(cookie->path, "/")) {
421
0
        debug_printf("Cookie prefix requires path \"/\": %s %s\n", cookie->name, iri->host);
422
0
        return -1; // ignore cookie
423
0
      }
424
0
    }
425
426
0
    if (cookie->domain && *cookie->domain) {
427
0
      if (!strcmp(cookie->domain, iri->host)) {
428
0
        cookie->host_only = 1;
429
0
      } else if (cookie_domain_match(cookie->domain, iri->host)) {
430
0
        cookie->host_only = 0;
431
0
      } else {
432
0
        debug_printf("Domain mismatch: %s %s\n", cookie->domain, iri->host);
433
0
        return -1; // ignore cookie
434
0
      }
435
0
    } else {
436
0
      xfree(cookie->domain);
437
0
      cookie->domain = wget_strdup(iri->host);
438
0
      cookie->host_only = 1;
439
0
    }
440
441
0
    if (!cookie->path || *cookie->path != '/') {
442
0
      const char *p = iri->path ? strrchr(iri->path, '/') : NULL;
443
444
0
      xfree(cookie->path);
445
446
0
      if (p && p != iri->path) {
447
0
        cookie->path = wget_strmemdup(iri->path, p - iri->path);
448
0
      } else {
449
0
        cookie->path = wget_strdup("/");
450
        // err_printf(_("Unexpected URI without '/': %s\n"), iri->path);
451
        // return -1; // ignore cookie
452
0
      }
453
0
    }
454
0
  }
455
456
0
  cookie->normalized = 1;
457
458
/*
459
  debug_printf(">  %s=%s\n", cookie->name, cookie->value);
460
  debug_printf(">  expires=%lld max-age=%lld\n", (long long)cookie->expires, (long long)cookie->maxage);
461
  debug_printf(">  domain=%s\n", cookie->domain);
462
  debug_printf(">  path=%s\n", cookie->path);
463
  debug_printf(">  normalized=%d persistent=%d hostonly=%d secure=%d httponly=%d\n",
464
    cookie->normalized, cookie->persistent, cookie->host_only, cookie->secure_only, cookie->http_only);
465
*/
466
467
0
  return 0;
468
0
}
469
470
int wget_cookie_normalize(const wget_iri *iri, wget_cookie *cookie)
471
0
{
472
//  wget_thread_mutex_lock(&_cookies_mutex);
473
474
0
  int ret = cookie_normalize_cookie(iri, cookie);
475
476
//  wget_thread_mutex_unlock(&_cookies_mutex);
477
478
0
  return ret;
479
0
}
480
481
void wget_cookie_normalize_cookies(const wget_iri *iri, const wget_vector *cookies)
482
0
{
483
//  wget_thread_mutex_lock(&_cookies_mutex);
484
485
0
  for (int it = 0; it < wget_vector_size(cookies); it++)
486
0
    cookie_normalize_cookie(iri, wget_vector_get(cookies, it));
487
488
//  wget_thread_mutex_unlock(&_cookies_mutex);
489
0
}