/src/wget2/libwget/cookie_parse.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2012 Tim Ruehsen |
3 | | * Copyright (c) 2015-2024 Free Software Foundation, Inc. |
4 | | * |
5 | | * This file is part of libwget. |
6 | | * |
7 | | * Libwget is free software: you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as published by |
9 | | * the Free Software Foundation, either version 3 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * Libwget is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public License |
18 | | * along with libwget. If not, see <https://www.gnu.org/licenses/>. |
19 | | * |
20 | | * |
21 | | * Cookie parsing routines |
22 | | * |
23 | | * Changelog |
24 | | * 23.10.2012 Tim Ruehsen created |
25 | | * 14.08.2019 Tim Ruehsen split out from cookie.c |
26 | | * |
27 | | * see https://tools.ietf.org/html/rfc6265 |
28 | | * |
29 | | */ |
30 | | |
31 | | #include <config.h> |
32 | | |
33 | | #include <stdio.h> |
34 | | #include <stdlib.h> |
35 | | #include <string.h> |
36 | | #include <limits.h> |
37 | | #include <ctype.h> |
38 | | #include <time.h> |
39 | | |
40 | | #include <c-ctype.h> |
41 | | |
42 | | #include <wget.h> |
43 | | #include "private.h" |
44 | | #include "cookie.h" |
45 | | |
46 | | bool cookie_domain_match(const char *domain, const char *host) |
47 | 604 | { |
48 | 604 | size_t domain_length, host_length; |
49 | 604 | const char *p; |
50 | | |
51 | 604 | debug_printf("domain_match(%s,%s)", domain, host); |
52 | | |
53 | 604 | if (!strcmp(domain, host)) |
54 | 0 | return true; // an exact match |
55 | | |
56 | 604 | domain_length = strlen(domain); |
57 | 604 | host_length = strlen(host); |
58 | | |
59 | 604 | if (domain_length >= host_length) |
60 | 426 | return false; // host is too short |
61 | | |
62 | 178 | p = host + host_length - domain_length; |
63 | 178 | if (!strcmp(p, domain) && p[-1] == '.') |
64 | 4 | return true; |
65 | | |
66 | 174 | return false; |
67 | 178 | } |
68 | | |
69 | | bool cookie_path_match(const char *cookie_path, const char *request_path) |
70 | 1.21k | { |
71 | 1.21k | const char *last_slash; |
72 | 1.21k | size_t cookie_path_length, iri_path_length; |
73 | 1.21k | bool cookie_path_slash = false; |
74 | | |
75 | 1.21k | if (*cookie_path == '/') { |
76 | 1.21k | cookie_path++; |
77 | 1.21k | cookie_path_slash = true; |
78 | 1.21k | } |
79 | | |
80 | 1.21k | if (request_path && *request_path == '/') |
81 | 0 | request_path++; |
82 | | |
83 | 1.21k | debug_printf("path_match(/%s,/%s)\n", cookie_path, request_path ? request_path : ""); |
84 | | |
85 | | // algorithm as described in RFC 6265 5.1.4 |
86 | | |
87 | | // if (!request_path || *request_path != '/' || !(last_slash = strrchr(request_path + 1, '/'))) { |
88 | | // request_path = "/"; |
89 | | // iri_path_length = 1; |
90 | 1.21k | if (!request_path || !(last_slash = strrchr(request_path, '/'))) { |
91 | 1.21k | request_path = ""; |
92 | 1.21k | iri_path_length = 0; |
93 | 1.21k | } else { |
94 | 0 | iri_path_length = last_slash - request_path; |
95 | 0 | } |
96 | | |
97 | 1.21k | cookie_path_length = strlen(cookie_path); |
98 | | |
99 | 1.21k | if (iri_path_length < cookie_path_length) |
100 | | // cookie-path is not a prefix of request-path |
101 | 28 | return false; |
102 | | |
103 | 1.19k | if (iri_path_length == 0 && cookie_path_length == 0) |
104 | | // slash matches slash |
105 | 1.19k | return true; |
106 | | |
107 | 0 | if (!strncmp(cookie_path, request_path, cookie_path_length)) { |
108 | 0 | if (!request_path[cookie_path_length]) |
109 | | // the cookie-path and the request-path are identical |
110 | 0 | return true; |
111 | | |
112 | 0 | if ((cookie_path_length > 0 && cookie_path[cookie_path_length - 1] == '/') || cookie_path_slash) |
113 | | // the cookie-path is a prefix of the request-path, and the last |
114 | | // character of the cookie-path is %x2F ("/"). |
115 | 0 | return true; |
116 | | |
117 | 0 | if (request_path[cookie_path_length] == '/') |
118 | | // the cookie-path is a prefix of the request-path, and the first |
119 | | // character of the request-path that is not included in the cookie- |
120 | | // path is a %x2F ("/") character. |
121 | 0 | return true; |
122 | 0 | } |
123 | | |
124 | 0 | return false; |
125 | 0 | } |
126 | | |
127 | | wget_cookie *wget_cookie_init(wget_cookie *cookie) |
128 | 10.2k | { |
129 | 10.2k | if (!cookie) { |
130 | 6.90k | cookie = wget_calloc(1, sizeof(wget_cookie)); |
131 | 6.90k | if (!cookie) |
132 | 0 | return NULL; |
133 | 6.90k | } else |
134 | 3.34k | memset(cookie, 0, sizeof(*cookie)); |
135 | | |
136 | 10.2k | cookie->last_access = cookie->creation = time(NULL); |
137 | | |
138 | 10.2k | return cookie; |
139 | 10.2k | } |
140 | | |
141 | | void wget_cookie_deinit(wget_cookie *cookie) |
142 | 7.63k | { |
143 | 7.63k | if (cookie) { |
144 | 7.63k | xfree(cookie->name); |
145 | 7.63k | xfree(cookie->value); |
146 | 7.63k | xfree(cookie->domain); |
147 | 7.63k | xfree(cookie->path); |
148 | 7.63k | } |
149 | 7.63k | } |
150 | | |
151 | | void wget_cookie_free(wget_cookie **cookie) |
152 | 4.29k | { |
153 | 4.29k | if (cookie) { |
154 | 4.29k | wget_cookie_deinit(*cookie); |
155 | 4.29k | xfree(*cookie); |
156 | 4.29k | } |
157 | 4.29k | } |
158 | | |
159 | | // for vector destruction |
160 | | void cookie_free(void *cookie) |
161 | 2.84k | { |
162 | 2.84k | if (cookie) { |
163 | 2.84k | wget_cookie_deinit(cookie); |
164 | 2.84k | xfree(cookie); |
165 | 2.84k | } |
166 | 2.84k | } |
167 | | |
168 | | /* |
169 | | int wget_cookie_equals(wget_cookie *cookie1, wget_cookie *cookie2) |
170 | | { |
171 | | if (!cookie1) |
172 | | return !cookie2; |
173 | | |
174 | | if (!cookie2) |
175 | | return 0; |
176 | | |
177 | | if (wget_strcmp(cookie1->name, cookie2->name) || |
178 | | wget_strcmp(cookie1->value, cookie2->value) || |
179 | | wget_strcmp(cookie1->domain, cookie2->domain) || |
180 | | wget_strcmp(cookie1->path, cookie2->path) || |
181 | | cookie1->domain_dot != cookie2->domain_dot || |
182 | | cookie1->normalized != cookie2->normalized || |
183 | | cookie1->persistent != cookie2->persistent || |
184 | | cookie1->host_only != cookie2->host_only || |
185 | | cookie1->secure_only != cookie2->secure_only || |
186 | | cookie1->http_only != cookie2->http_only) |
187 | | { |
188 | | return 0; |
189 | | } |
190 | | |
191 | | return 1; |
192 | | } |
193 | | */ |
194 | | |
195 | | char *wget_cookie_to_setcookie(wget_cookie *cookie) |
196 | 3.36k | { |
197 | 3.36k | char expires[32] = ""; |
198 | | |
199 | 3.36k | if (!cookie) |
200 | 1.74k | return wget_strdup("(null)"); |
201 | | |
202 | 1.61k | if (cookie->expires) |
203 | 111 | wget_http_print_date(cookie->expires, expires, sizeof(expires)); // date format from RFC 6265 |
204 | | |
205 | 1.61k | return wget_aprintf("%s=%s%s%s%s%s; domain=%s%s%s%s", |
206 | 1.61k | cookie->name, cookie->value, |
207 | 1.61k | *expires ? "; expires=" : "", *expires ? expires : "", |
208 | 1.61k | cookie->path ? "; path=" : "", cookie->path ? cookie->path : "", |
209 | 1.61k | cookie->host_only ? "" : ".", cookie->domain, |
210 | 1.61k | cookie->http_only ? "; HttpOnly" : "", |
211 | 1.61k | cookie->secure_only ? "; Secure" : ""); |
212 | 3.36k | } |
213 | | |
214 | | /* |
215 | | RFC 6265 |
216 | | |
217 | | set-cookie-header = "Set-Cookie:" SP set-cookie-string |
218 | | set-cookie-string = cookie-pair *( ";" SP cookie-av ) |
219 | | cookie-pair = cookie-name "=" cookie-value |
220 | | cookie-name = token |
221 | | cookie-value = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE ) |
222 | | cookie-octet = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E |
223 | | ; US-ASCII characters excluding CTLs, |
224 | | ; whitespace DQUOTE, comma, semicolon, |
225 | | ; and backslash |
226 | | token = <token, defined in [RFC2616], Section 2.2> |
227 | | |
228 | | cookie-av = expires-av / max-age-av / domain-av / |
229 | | path-av / secure-av / httponly-av / |
230 | | extension-av |
231 | | expires-av = "Expires=" sane-cookie-date |
232 | | sane-cookie-date = <rfc1123-date, defined in [RFC2616], Section 3.3.1> |
233 | | max-age-av = "Max-Age=" non-zero-digit *DIGIT |
234 | | ; In practice, both expires-av and max-age-av |
235 | | ; are limited to dates representable by the |
236 | | ; user agent. |
237 | | non-zero-digit = %x31-39 |
238 | | ; digits 1 through 9 |
239 | | domain-av = "Domain=" domain-value |
240 | | domain-value = <subdomain> |
241 | | ; defined in [RFC1034], Section 3.5, as |
242 | | ; enhanced by [RFC1123], Section 2.1 |
243 | | path-av = "Path=" path-value |
244 | | path-value = <any CHAR except CTLs or ";"> |
245 | | secure-av = "Secure" |
246 | | httponly-av = "HttpOnly" |
247 | | extension-av = <any CHAR except CTLs or ";"> |
248 | | */ |
249 | | const char *wget_cookie_parse_setcookie(const char *s, wget_cookie **_cookie) |
250 | 6.90k | { |
251 | 6.90k | const char *name, *p; |
252 | 6.90k | wget_cookie *cookie = wget_cookie_init(NULL); |
253 | | |
254 | | // remove leading whitespace from cookie name |
255 | 7.61k | while (c_isspace(*s)) s++; |
256 | | |
257 | | // s = wget_http_parse_token(s, &cookie->name); |
258 | | // also accept UTF-8 (NON-ASCII) characters in cookie name |
259 | 25.2k | for (p = s; (*s >= 32 && *s <= 126 && *s != '=' && *s != ';') || *s < 0; s++); |
260 | | |
261 | | // remove trailing whitespace from cookie name |
262 | 7.48k | while (s > p && c_isspace(s[-1])) s--; |
263 | 6.90k | cookie->name = wget_strmemdup(p, s - p); |
264 | | |
265 | | // advance to next delimiter |
266 | 8.06k | while (c_isspace(*s)) s++; |
267 | | |
268 | 6.90k | if (cookie->name && *cookie->name && *s == '=') { |
269 | | // *cookie-octet / ( DQUOTE *cookie-octet DQUOTE ) |
270 | | |
271 | | // skip over delimiter and remove leading whitespace from cookie value |
272 | 8.24k | for (s++; c_isspace(*s);) s++; |
273 | | |
274 | | /* RFC compliance is too strict |
275 | | if (*s == '\"') |
276 | | s++; |
277 | | // cookie-octet = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E |
278 | | for (p = s; *s > 32 && *s <= 126 && *s != '\\' && *s != ',' && *s != ';' && *s != '\"'; s++); |
279 | | */ |
280 | | |
281 | | // also accept UTF-8 (NON-ASCII) characters in cookie value |
282 | 22.0k | for (p = s; (*s >= 32 && *s <= 126 && *s != ';') || *s < 0; s++); |
283 | | |
284 | | // remove trailing whitespace from cookie value |
285 | 6.27k | while (s > p && c_isspace(s[-1])) s--; |
286 | | |
287 | 5.69k | cookie->value = wget_strmemdup(p, s - p); |
288 | | |
289 | 23.2k | do { |
290 | | // find next delimiter |
291 | 30.2k | while (*s && *s != ';') s++; |
292 | 23.2k | if (!*s) break; |
293 | | |
294 | | // skip delimiter and remove leading spaces from attribute name |
295 | 22.5k | for (s++; c_isspace(*s);) s++; |
296 | 21.4k | if (!*s) break; |
297 | | |
298 | 21.1k | s = wget_http_parse_token(s, &name); |
299 | | |
300 | 21.1k | if (name) { |
301 | | // find next delimiter |
302 | 24.6k | while (*s && *s != '=' && *s != ';') s++; |
303 | | // if (!*s) break; |
304 | | |
305 | 21.1k | if (*s == '=') { |
306 | | // find end of value |
307 | 17.1k | for (s++; c_isspace(*s);) s++; |
308 | 77.5k | for (p = s; (*s >= 32 && *s <= 126 && *s != ';') || *s < 0; s++); |
309 | | |
310 | 15.7k | if (!wget_strcasecmp_ascii(name, "expires")) { |
311 | 7.23k | cookie->expires = wget_http_parse_full_date(p); |
312 | 8.55k | } else if (!wget_strcasecmp_ascii(name, "max-age")) { |
313 | 1.79k | long offset = atol(p); |
314 | | |
315 | 1.79k | if (offset > 0) { |
316 | | // limit offset to avoid integer overflow |
317 | 1.15k | if (offset > INT_MAX) |
318 | 366 | offset = INT_MAX; |
319 | 1.15k | cookie->maxage = time(NULL) + offset; |
320 | 1.15k | } else |
321 | 639 | cookie->maxage = 0; |
322 | 6.76k | } else if (!wget_strcasecmp_ascii(name, "domain")) { |
323 | 2.82k | if (p != s) { |
324 | 2.39k | if (*p == '.') { // RFC 6265 5.2.3 |
325 | 1.27k | do { p++; } while (*p == '.'); |
326 | 612 | cookie->domain_dot = 1; |
327 | 612 | } else |
328 | 1.78k | cookie->domain_dot = 0; |
329 | | |
330 | | // remove trailing whitespace from attribute value |
331 | 2.84k | while (s > p && c_isspace(s[-1])) s--; |
332 | | |
333 | 2.39k | xfree(cookie->domain); |
334 | 2.39k | cookie->domain = wget_strmemdup(p, s - p); |
335 | 2.39k | } |
336 | 3.94k | } else if (!wget_strcasecmp_ascii(name, "path")) { |
337 | | // remove trailing whitespace from attribute value |
338 | 2.26k | while (s > p && c_isspace(s[-1])) s--; |
339 | | |
340 | 1.51k | xfree(cookie->path); |
341 | 1.51k | cookie->path = wget_strmemdup(p, s - p); |
342 | 2.42k | } else if (!wget_strcasecmp_ascii(name, "secure")) { |
343 | | // here we ignore the value |
344 | 384 | cookie->secure_only = 1; |
345 | 2.04k | } else if (!wget_strcasecmp_ascii(name, "httponly")) { |
346 | | // here we ignore the value |
347 | 384 | cookie->http_only = 1; |
348 | 1.65k | } else { |
349 | 1.65k | debug_printf("Unsupported cookie-av '%s'\n", name); |
350 | 1.65k | } |
351 | 15.7k | } else if (!wget_strcasecmp_ascii(name, "secure")) { |
352 | 416 | cookie->secure_only = 1; |
353 | 4.96k | } else if (!wget_strcasecmp_ascii(name, "httponly")) { |
354 | 396 | cookie->http_only = 1; |
355 | 4.56k | } else { |
356 | 4.56k | debug_printf("Unsupported cookie-av '%s'\n", name); |
357 | 4.56k | } |
358 | | |
359 | 21.1k | xfree(name); |
360 | 21.1k | } |
361 | 21.1k | } while (*s); |
362 | | |
363 | 5.69k | } else { |
364 | 1.20k | wget_cookie_free(&cookie); |
365 | 1.20k | error_printf(_("Cookie without name or assignment ignored\n")); |
366 | 1.20k | } |
367 | | |
368 | 6.90k | if (_cookie) |
369 | 6.90k | *_cookie = cookie; |
370 | 0 | else |
371 | 0 | wget_cookie_free(&cookie); |
372 | | |
373 | 6.90k | return s; |
374 | 6.90k | } |
375 | | |
376 | | // normalize/sanitize and store cookies |
377 | | static int cookie_normalize_cookie(const wget_iri *iri, wget_cookie *cookie) |
378 | 3.45k | { |
379 | | /* |
380 | | debug_printf("normalize cookie %s=%s\n", cookie->name, cookie->value); |
381 | | debug_printf("< %s=%s\n", cookie->name, cookie->value); |
382 | | debug_printf("< expires=%lld max-age=%lld\n", (long long)cookie->expires, (long long)cookie->maxage); |
383 | | debug_printf("< domain=%s\n", cookie->domain); |
384 | | debug_printf("< path=%s\n", cookie->path); |
385 | | debug_printf("< normalized=%d persistent=%d hostonly=%d secure=%d httponly=%d\n", |
386 | | cookie->normalized, cookie->persistent, cookie->host_only, cookie->secure_only, cookie->http_only); |
387 | | */ |
388 | 3.45k | if (!cookie) |
389 | 0 | return -1; |
390 | | |
391 | 3.45k | cookie->normalized = 0; |
392 | | |
393 | 3.45k | if (cookie->maxage) |
394 | 384 | cookie->expires = cookie->maxage; |
395 | | |
396 | 3.45k | cookie->persistent = cookie->expires != 0; |
397 | | |
398 | | // convert domain to lowercase |
399 | 3.45k | wget_strtolower((char *)cookie->domain); |
400 | | |
401 | 3.45k | if (iri) { |
402 | | // cookies comes from a HTTP header and needs checking |
403 | | |
404 | | // check prefixes as proposed in https://tools.ietf.org/html/draft-ietf-httpbis-cookie-prefixes-00 |
405 | 3.22k | if (!wget_strncmp(cookie->name, "__Secure-", 9)) { |
406 | 4 | if (!cookie->secure_only || iri->scheme != WGET_IRI_SCHEME_HTTPS) { |
407 | 4 | debug_printf("Cookie prefix requires secure origin: %s %s\n", cookie->name, iri->host); |
408 | 4 | return -1; // ignore cookie |
409 | 4 | } |
410 | 4 | } |
411 | 3.21k | else if (!wget_strncmp(cookie->name, "__Host-", 7)) { |
412 | 4 | if (!cookie->secure_only || iri->scheme != WGET_IRI_SCHEME_HTTPS) { |
413 | 4 | debug_printf("Cookie prefix requires secure origin: %s %s\n", cookie->name, iri->host); |
414 | 4 | return -1; // ignore cookie |
415 | 4 | } |
416 | 0 | if (!cookie->host_only) { |
417 | 0 | debug_printf("Cookie prefix requires hostonly flag: %s %s\n", cookie->name, iri->host); |
418 | 0 | return -1; // ignore cookie |
419 | 0 | } |
420 | 0 | if (wget_strcmp(cookie->path, "/")) { |
421 | 0 | debug_printf("Cookie prefix requires path \"/\": %s %s\n", cookie->name, iri->host); |
422 | 0 | return -1; // ignore cookie |
423 | 0 | } |
424 | 0 | } |
425 | | |
426 | 3.21k | if (cookie->domain && *cookie->domain) { |
427 | 610 | if (!strcmp(cookie->domain, iri->host)) { |
428 | 6 | cookie->host_only = 1; |
429 | 604 | } else if (cookie_domain_match(cookie->domain, iri->host)) { |
430 | 4 | cookie->host_only = 0; |
431 | 600 | } else { |
432 | 600 | debug_printf("Domain mismatch: %s %s\n", cookie->domain, iri->host); |
433 | 600 | return -1; // ignore cookie |
434 | 600 | } |
435 | 2.60k | } else { |
436 | 2.60k | xfree(cookie->domain); |
437 | 2.60k | cookie->domain = wget_strdup(iri->host); |
438 | 2.60k | cookie->host_only = 1; |
439 | 2.60k | } |
440 | | |
441 | 2.61k | if (!cookie->path || *cookie->path != '/') { |
442 | 2.55k | const char *p = iri->path ? strrchr(iri->path, '/') : NULL; |
443 | | |
444 | 2.55k | xfree(cookie->path); |
445 | | |
446 | 2.55k | if (p && p != iri->path) { |
447 | 0 | cookie->path = wget_strmemdup(iri->path, p - iri->path); |
448 | 2.55k | } else { |
449 | 2.55k | cookie->path = wget_strdup("/"); |
450 | | // err_printf(_("Unexpected URI without '/': %s\n"), iri->path); |
451 | | // return -1; // ignore cookie |
452 | 2.55k | } |
453 | 2.55k | } |
454 | 2.61k | } |
455 | | |
456 | 2.84k | cookie->normalized = 1; |
457 | | |
458 | | /* |
459 | | debug_printf("> %s=%s\n", cookie->name, cookie->value); |
460 | | debug_printf("> expires=%lld max-age=%lld\n", (long long)cookie->expires, (long long)cookie->maxage); |
461 | | debug_printf("> domain=%s\n", cookie->domain); |
462 | | debug_printf("> path=%s\n", cookie->path); |
463 | | debug_printf("> normalized=%d persistent=%d hostonly=%d secure=%d httponly=%d\n", |
464 | | cookie->normalized, cookie->persistent, cookie->host_only, cookie->secure_only, cookie->http_only); |
465 | | */ |
466 | | |
467 | 2.84k | return 0; |
468 | 3.45k | } |
469 | | |
470 | | int wget_cookie_normalize(const wget_iri *iri, wget_cookie *cookie) |
471 | 1.84k | { |
472 | | // wget_thread_mutex_lock(&_cookies_mutex); |
473 | | |
474 | 1.84k | int ret = cookie_normalize_cookie(iri, cookie); |
475 | | |
476 | | // wget_thread_mutex_unlock(&_cookies_mutex); |
477 | | |
478 | 1.84k | return ret; |
479 | 1.84k | } |
480 | | |
481 | | void wget_cookie_normalize_cookies(const wget_iri *iri, const wget_vector *cookies) |
482 | 1.61k | { |
483 | | // wget_thread_mutex_lock(&_cookies_mutex); |
484 | | |
485 | 3.22k | for (int it = 0; it < wget_vector_size(cookies); it++) |
486 | 1.61k | cookie_normalize_cookie(iri, wget_vector_get(cookies, it)); |
487 | | |
488 | | // wget_thread_mutex_unlock(&_cookies_mutex); |
489 | 1.61k | } |