Line | Count | Source |
1 | | /* Support for cookies. |
2 | | Copyright (C) 2001-2011, 2015, 2018-2024, 2026 Free Software |
3 | | Foundation, Inc. |
4 | | |
5 | | This file is part of GNU Wget. |
6 | | |
7 | | GNU Wget is free software; you can redistribute it and/or modify |
8 | | it under the terms of the GNU General Public License as published by |
9 | | the Free Software Foundation; either version 3 of the License, or (at |
10 | | your option) any later version. |
11 | | |
12 | | GNU Wget is distributed in the hope that it will be useful, but |
13 | | WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | General Public License for more details. |
16 | | |
17 | | You should have received a copy of the GNU General Public License |
18 | | along with Wget. If not, see <http://www.gnu.org/licenses/>. |
19 | | |
20 | | Additional permission under GNU GPL version 3 section 7 |
21 | | |
22 | | If you modify this program, or any covered work, by linking or |
23 | | combining it with the OpenSSL project's OpenSSL library (or a |
24 | | modified version of that library), containing parts covered by the |
25 | | terms of the OpenSSL or SSLeay licenses, the Free Software Foundation |
26 | | grants you additional permission to convey the resulting work. |
27 | | Corresponding Source for a non-source form of such a combination |
28 | | shall include the source code for the parts of OpenSSL used as well |
29 | | as that of the covered work. */ |
30 | | |
31 | | /* Written by Hrvoje Niksic. Parts are loosely inspired by the |
32 | | cookie patch submitted by Tomasz Wegrzanowski. |
33 | | |
34 | | This implements the client-side cookie support, as specified |
35 | | (loosely) by Netscape's "preliminary specification", currently |
36 | | available at: |
37 | | |
38 | | http://wp.netscape.com/newsref/std/cookie_spec.html |
39 | | |
40 | | rfc2109 is not supported because of its incompatibilities with the |
41 | | above widely-used specification. rfc2965 is entirely ignored, |
42 | | since popular client software doesn't implement it, and even the |
43 | | sites that do send Set-Cookie2 also emit Set-Cookie for |
44 | | compatibility. */ |
45 | | |
46 | | #include "wget.h" |
47 | | |
48 | | #include <stdint.h> |
49 | | #include <stdio.h> |
50 | | #include <string.h> |
51 | | #include <stdlib.h> |
52 | | #include <assert.h> |
53 | | #include <errno.h> |
54 | | #include <time.h> |
55 | | #ifdef HAVE_LIBPSL |
56 | | # include <libpsl.h> |
57 | | #endif |
58 | | #include "utils.h" |
59 | | #include "hash.h" |
60 | | #include "cookies.h" |
61 | | #include "http.h" /* for http_atotm */ |
62 | | #include "c-strcase.h" |
63 | | |
64 | | |
65 | | /* Declarations of `struct cookie' and the most basic functions. */ |
66 | | |
67 | | /* Cookie jar serves as cookie storage and a means of retrieving |
68 | | cookies efficiently. All cookies with the same domain are stored |
69 | | in a linked list called "chain". A cookie chain can be reached by |
70 | | looking up the domain in the cookie jar's chains_by_domain table. |
71 | | |
72 | | For example, to reach all the cookies under google.com, one must |
73 | | execute hash_table_get(jar->chains_by_domain, "google.com"). Of |
74 | | course, when sending a cookie to `www.google.com', one must search |
75 | | for cookies that belong to either `www.google.com' or `google.com' |
76 | | -- but the point is that the code doesn't need to go through *all* |
77 | | the cookies. */ |
78 | | |
79 | | struct cookie_jar { |
80 | | /* Cookie chains indexed by domain. */ |
81 | | struct hash_table *chains; |
82 | | |
83 | | int cookie_count; /* number of cookies in the jar. */ |
84 | | }; |
85 | | |
86 | | /* Value set by entry point functions, so that the low-level |
87 | | routines don't need to call time() all the time. */ |
88 | | static time_t cookies_now; |
89 | | |
90 | | struct cookie_jar * |
91 | | cookie_jar_new (void) |
92 | 1.88k | { |
93 | 1.88k | struct cookie_jar *jar = xnew (struct cookie_jar); |
94 | 1.88k | jar->chains = make_nocase_string_hash_table (0); |
95 | 1.88k | jar->cookie_count = 0; |
96 | 1.88k | return jar; |
97 | 1.88k | } |
98 | | |
99 | | struct cookie { |
100 | | char *domain; /* domain of the cookie */ |
101 | | int port; /* port number */ |
102 | | char *path; /* path prefix of the cookie */ |
103 | | |
104 | | unsigned discard_requested :1;/* whether cookie was created to |
105 | | request discarding another |
106 | | cookie. */ |
107 | | |
108 | | unsigned secure :1; /* whether cookie should be |
109 | | transmitted over non-https |
110 | | connections. */ |
111 | | unsigned domain_exact :1; /* whether DOMAIN must match as a |
112 | | whole. */ |
113 | | |
114 | | unsigned permanent :1; /* whether the cookie should outlive |
115 | | the session. */ |
116 | | time_t expiry_time; /* time when the cookie expires, 0 |
117 | | means undetermined. */ |
118 | | |
119 | | char *attr; /* cookie attribute name */ |
120 | | char *value; /* cookie attribute value */ |
121 | | |
122 | | struct cookie *next; /* used for chaining of cookies in the |
123 | | same domain. */ |
124 | | }; |
125 | | |
126 | 5.66k | #define PORT_ANY (-1) |
127 | | |
128 | | /* Allocate and return a new, empty cookie structure. */ |
129 | | |
130 | | static struct cookie * |
131 | | cookie_new (void) |
132 | 5.66k | { |
133 | 5.66k | struct cookie *cookie = xnew0 (struct cookie); |
134 | | |
135 | | /* Both cookie->permanent and cookie->expiry_time are now 0. This |
136 | | means that the cookie doesn't expire, but is only valid for this |
137 | | session (i.e. not written out to disk). */ |
138 | | |
139 | 5.66k | cookie->port = PORT_ANY; |
140 | 5.66k | return cookie; |
141 | 5.66k | } |
142 | | |
143 | | /* Non-zero if the cookie has expired. Assumes cookies_now has been |
144 | | set by one of the entry point functions. */ |
145 | | |
146 | | static bool |
147 | | cookie_expired_p (const struct cookie *c) |
148 | 0 | { |
149 | 0 | return c->expiry_time != 0 && c->expiry_time < cookies_now; |
150 | 0 | } |
151 | | |
152 | | /* Deallocate COOKIE and its components. */ |
153 | | |
154 | | static void |
155 | | delete_cookie (struct cookie *cookie) |
156 | 5.66k | { |
157 | 5.66k | xfree (cookie->domain); |
158 | 5.66k | xfree (cookie->path); |
159 | 5.66k | xfree (cookie->attr); |
160 | 5.66k | xfree (cookie->value); |
161 | 5.66k | xfree (cookie); |
162 | 5.66k | } |
163 | | |
164 | | /* Functions for storing cookies. |
165 | | |
166 | | All cookies can be reached beginning with jar->chains. The key in |
167 | | that table is the domain name, and the value is a linked list of |
168 | | all cookies from that domain. Every new cookie is placed on the |
169 | | head of the list. */ |
170 | | |
171 | | /* Find and return a cookie in JAR whose domain, path, and attribute |
172 | | name correspond to COOKIE. If found, PREVPTR will point to the |
173 | | location of the cookie previous in chain, or NULL if the found |
174 | | cookie is the head of a chain. |
175 | | |
176 | | If no matching cookie is found, return NULL. */ |
177 | | |
178 | | static struct cookie * |
179 | | find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie, |
180 | | struct cookie **prevptr) |
181 | 1.12k | { |
182 | 1.12k | struct cookie *chain, *prev; |
183 | | |
184 | 1.12k | chain = hash_table_get (jar->chains, cookie->domain); |
185 | 1.12k | if (!chain) |
186 | 0 | goto nomatch; |
187 | | |
188 | 1.12k | prev = NULL; |
189 | 1.69k | for (; chain; prev = chain, chain = chain->next) |
190 | 1.12k | if (0 == strcmp (cookie->path, chain->path) |
191 | 568 | && 0 == strcmp (cookie->attr, chain->attr) |
192 | 568 | && cookie->port == chain->port) |
193 | 565 | { |
194 | 565 | *prevptr = prev; |
195 | 565 | return chain; |
196 | 565 | } |
197 | | |
198 | 563 | nomatch: |
199 | 563 | *prevptr = NULL; |
200 | 563 | return NULL; |
201 | 1.12k | } |
202 | | |
203 | | /* Store COOKIE to the jar. |
204 | | |
205 | | This is done by placing COOKIE at the head of its chain. However, |
206 | | if COOKIE matches a cookie already in memory, as determined by |
207 | | find_matching_cookie, the old cookie is unlinked and destroyed. |
208 | | |
209 | | The key of each chain's hash table entry is allocated only the |
210 | | first time; next hash_table_put's reuse the same key. */ |
211 | | |
212 | | static void |
213 | | store_cookie (struct cookie_jar *jar, struct cookie *cookie) |
214 | 1.69k | { |
215 | 1.69k | struct cookie *chain_head; |
216 | 1.69k | char *chain_key; |
217 | | |
218 | 1.69k | if (hash_table_get_pair (jar->chains, cookie->domain, |
219 | 1.69k | &chain_key, &chain_head)) |
220 | 1.12k | { |
221 | | /* A chain of cookies in this domain already exists. Check for |
222 | | duplicates -- if an extant cookie exactly matches our domain, |
223 | | port, path, and name, replace it. */ |
224 | 1.12k | struct cookie *prev; |
225 | 1.12k | struct cookie *victim = find_matching_cookie (jar, cookie, &prev); |
226 | | |
227 | 1.12k | if (victim) |
228 | 565 | { |
229 | | /* Remove VICTIM from the chain. COOKIE will be placed at |
230 | | the head. */ |
231 | 565 | if (prev) |
232 | 0 | { |
233 | 0 | prev->next = victim->next; |
234 | 0 | cookie->next = chain_head; |
235 | 0 | } |
236 | 565 | else |
237 | 565 | { |
238 | | /* prev is NULL; apparently VICTIM was at the head of |
239 | | the chain. This place will be taken by COOKIE, so |
240 | | all we need to do is: */ |
241 | 565 | cookie->next = victim->next; |
242 | 565 | } |
243 | 565 | delete_cookie (victim); |
244 | 565 | --jar->cookie_count; |
245 | 565 | DEBUGP (("Deleted old cookie (to be replaced.)\n")); |
246 | 565 | } |
247 | 563 | else |
248 | 563 | cookie->next = chain_head; |
249 | 1.12k | } |
250 | 566 | else |
251 | 566 | { |
252 | | /* We are now creating the chain. Use a copy of cookie->domain |
253 | | as the key for the life-time of the chain. Using |
254 | | cookie->domain would be unsafe because the life-time of the |
255 | | chain may exceed the life-time of the cookie. (Cookies may |
256 | | be deleted from the chain by this very function.) */ |
257 | 566 | cookie->next = NULL; |
258 | 566 | chain_key = xstrdup (cookie->domain); |
259 | 566 | } |
260 | | |
261 | 1.69k | hash_table_put (jar->chains, chain_key, cookie); |
262 | 1.69k | ++jar->cookie_count; |
263 | | |
264 | 1.69k | IF_DEBUG |
265 | 0 | { |
266 | 0 | time_t exptime = cookie->expiry_time; |
267 | 0 | DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n", |
268 | 0 | cookie->domain, cookie->port, |
269 | 0 | cookie->port == PORT_ANY ? " (ANY)" : "", |
270 | 0 | cookie->path, |
271 | 0 | cookie->permanent ? "permanent" : "session", |
272 | 0 | cookie->secure ? "secure" : "insecure", |
273 | 0 | cookie->expiry_time ? datetime_str (exptime) : "none", |
274 | 0 | cookie->attr, cookie->value)); |
275 | 0 | } |
276 | 1.69k | } |
277 | | |
278 | | /* Discard a cookie matching COOKIE's domain, port, path, and |
279 | | attribute name. This gets called when we encounter a cookie whose |
280 | | expiry date is in the past, or whose max-age is set to 0. The |
281 | | former corresponds to netscape cookie spec, while the latter is |
282 | | specified by rfc2109. */ |
283 | | |
284 | | static void |
285 | | discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie) |
286 | 571 | { |
287 | 571 | struct cookie *prev, *victim; |
288 | | |
289 | 571 | if (!hash_table_count (jar->chains)) |
290 | | /* No elements == nothing to discard. */ |
291 | 571 | return; |
292 | | |
293 | 0 | victim = find_matching_cookie (jar, cookie, &prev); |
294 | 0 | if (victim) |
295 | 0 | { |
296 | 0 | if (prev) |
297 | | /* Simply unchain the victim. */ |
298 | 0 | prev->next = victim->next; |
299 | 0 | else |
300 | 0 | { |
301 | | /* VICTIM was head of its chain. We need to place a new |
302 | | cookie at the head. */ |
303 | 0 | char *chain_key = NULL; |
304 | 0 | int res; |
305 | |
|
306 | 0 | res = hash_table_get_pair (jar->chains, victim->domain, |
307 | 0 | &chain_key, NULL); |
308 | |
|
309 | 0 | if (res == 0) |
310 | 0 | { |
311 | 0 | logprintf (LOG_VERBOSE, _("Unable to get cookie for %s\n"), |
312 | 0 | victim->domain); |
313 | 0 | } |
314 | 0 | if (!victim->next) |
315 | 0 | { |
316 | | /* VICTIM was the only cookie in the chain. Destroy the |
317 | | chain and deallocate the chain key. */ |
318 | 0 | hash_table_remove (jar->chains, victim->domain); |
319 | 0 | xfree (chain_key); |
320 | 0 | } |
321 | 0 | else |
322 | 0 | hash_table_put (jar->chains, chain_key, victim->next); |
323 | 0 | } |
324 | 0 | delete_cookie (victim); |
325 | 0 | DEBUGP (("Discarded old cookie.\n")); |
326 | 0 | } |
327 | 0 | } |
328 | | |
329 | | /* Functions for parsing the `Set-Cookie' header, and creating new |
330 | | cookies from the wire. */ |
331 | | |
332 | | #define TOKEN_IS(token, string_literal) \ |
333 | 134k | BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal) |
334 | | |
335 | 10.9k | #define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e) |
336 | | |
337 | | /* Parse the contents of the `Set-Cookie' header. The header looks |
338 | | like this: |
339 | | |
340 | | name1=value1; name2=value2; ... |
341 | | |
342 | | Trailing semicolon is optional; spaces are allowed between all |
343 | | tokens. Additionally, values may be quoted. |
344 | | |
345 | | A new cookie is returned upon success, NULL otherwise. |
346 | | |
347 | | The first name-value pair will be used to set the cookie's |
348 | | attribute name and value. Subsequent parameters will be checked |
349 | | against field names such as `domain', `path', etc. Recognized |
350 | | fields will be parsed and the corresponding members of COOKIE |
351 | | filled. */ |
352 | | |
353 | | static struct cookie * |
354 | | parse_set_cookie (const char *set_cookie, bool silent) |
355 | 5.66k | { |
356 | 5.66k | const char *ptr = set_cookie; |
357 | 5.66k | struct cookie *cookie = cookie_new (); |
358 | 5.66k | param_token name, value; |
359 | | |
360 | 5.66k | if (!extract_param (&ptr, &name, &value, ';', NULL)) |
361 | 426 | goto error; |
362 | 5.24k | if (!value.b) |
363 | 2.65k | goto error; |
364 | | |
365 | | /* If the value is quoted, do not modify it. */ |
366 | 2.58k | if (*(value.b - 1) == '"') |
367 | 153 | value.b--; |
368 | 2.58k | if (*value.e == '"') |
369 | 153 | value.e++; |
370 | | |
371 | 2.58k | cookie->attr = strdupdelim (name.b, name.e); |
372 | 2.58k | cookie->value = strdupdelim (value.b, value.e); |
373 | | |
374 | 32.5k | while (extract_param (&ptr, &name, &value, ';', NULL)) |
375 | 30.0k | { |
376 | 30.0k | if (TOKEN_IS (name, "domain")) |
377 | 1.07k | { |
378 | 1.07k | if (!TOKEN_NON_EMPTY (value)) |
379 | 6 | goto error; |
380 | 1.07k | xfree (cookie->domain); |
381 | | /* Strictly speaking, we should set cookie->domain_exact if the |
382 | | domain doesn't begin with a dot. But many sites set the |
383 | | domain to "foo.com" and expect "subhost.foo.com" to get the |
384 | | cookie, and it apparently works in browsers. */ |
385 | 1.07k | if (*value.b == '.') |
386 | 456 | ++value.b; |
387 | 1.07k | cookie->domain = strdupdelim (value.b, value.e); |
388 | 1.07k | } |
389 | 29.0k | else if (TOKEN_IS (name, "path")) |
390 | 1.40k | { |
391 | 1.40k | if (!TOKEN_NON_EMPTY (value)) |
392 | 9 | goto error; |
393 | 1.39k | xfree (cookie->path); |
394 | 1.39k | cookie->path = strdupdelim (value.b, value.e); |
395 | 1.39k | } |
396 | 27.6k | else if (TOKEN_IS (name, "expires")) |
397 | 2.95k | { |
398 | 2.95k | char value_copy[128]; |
399 | 2.95k | size_t value_len = value.e - value.b; |
400 | 2.95k | time_t expires; |
401 | | |
402 | 2.95k | if (!TOKEN_NON_EMPTY (value) || value_len >= sizeof (value_copy)) |
403 | 27 | goto error; |
404 | | |
405 | 2.92k | memcpy (value_copy, value.b, value_len); |
406 | 2.92k | value_copy[value_len] = 0; |
407 | | |
408 | | /* Check if expiration spec is valid. |
409 | | If not, assume default (cookie doesn't expire, but valid only for |
410 | | this session.) */ |
411 | 2.92k | expires = http_atotm (value_copy); |
412 | 2.92k | if (expires != (time_t) -1) |
413 | 930 | { |
414 | 930 | cookie->permanent = 1; |
415 | 930 | cookie->expiry_time = expires; |
416 | | /* According to netscape's specification, expiry time in |
417 | | the past means that discarding of a matching cookie |
418 | | is requested. */ |
419 | 930 | if (cookie->expiry_time < cookies_now) |
420 | 732 | cookie->discard_requested = 1; |
421 | 930 | } |
422 | 2.92k | } |
423 | 24.6k | else if (TOKEN_IS (name, "max-age")) |
424 | 1.30k | { |
425 | 1.30k | double maxage = -1; |
426 | 1.30k | char value_copy[32]; |
427 | 1.30k | size_t value_len = value.e - value.b; |
428 | | |
429 | 1.30k | if (!TOKEN_NON_EMPTY (value) || value_len >= sizeof (value_copy)) |
430 | 27 | goto error; |
431 | | |
432 | 1.27k | memcpy (value_copy, value.b, value_len); |
433 | 1.27k | value_copy[value_len] = 0; |
434 | | |
435 | 1.27k | sscanf (value_copy, "%lf", &maxage); |
436 | 1.27k | if (maxage == -1) |
437 | | /* something went wrong. */ |
438 | 21 | goto error; |
439 | 1.25k | cookie->permanent = 1; |
440 | 1.25k | cookie->expiry_time = cookies_now + (time_t) maxage; |
441 | | |
442 | | /* According to rfc2109, a cookie with max-age of 0 means that |
443 | | discarding of a matching cookie is requested. */ |
444 | 1.25k | if (maxage == 0) |
445 | 795 | cookie->discard_requested = 1; |
446 | 1.25k | } |
447 | 23.3k | else if (TOKEN_IS (name, "secure")) |
448 | 504 | { |
449 | | /* ignore value completely */ |
450 | 504 | cookie->secure = 1; |
451 | 504 | } |
452 | | /* else: Ignore unrecognized attribute. */ |
453 | 30.0k | } |
454 | 2.49k | if (*ptr) |
455 | | /* extract_param has encountered a syntax error */ |
456 | 69 | goto error; |
457 | | |
458 | | /* The cookie has been successfully constructed; return it. */ |
459 | 2.43k | return cookie; |
460 | | |
461 | 3.23k | error: |
462 | 3.23k | if (!silent) |
463 | 3.23k | logprintf (LOG_NOTQUIET, |
464 | 3.23k | _("Syntax error in Set-Cookie: %s at position %d.\n"), |
465 | 3.23k | quotearg_style (escape_quoting_style, set_cookie), |
466 | 3.23k | (int) (ptr - set_cookie)); |
467 | 3.23k | delete_cookie (cookie); |
468 | 3.23k | return NULL; |
469 | 2.49k | } |
470 | | |
471 | | #undef TOKEN_IS |
472 | | #undef TOKEN_NON_EMPTY |
473 | | |
474 | | /* Sanity checks. These are important, otherwise it is possible for |
475 | | mailcious attackers to destroy important cookie information and/or |
476 | | violate your privacy. */ |
477 | | |
478 | | |
479 | 0 | #define REQUIRE_DIGITS(p) do { \ |
480 | 0 | if (!c_isdigit (*p)) \ |
481 | 0 | return false; \ |
482 | 0 | for (++p; c_isdigit (*p); p++) \ |
483 | 0 | ; \ |
484 | 0 | } while (0) |
485 | | |
486 | 0 | #define REQUIRE_DOT(p) do { \ |
487 | 0 | if (*p++ != '.') \ |
488 | 0 | return false; \ |
489 | 0 | } while (0) |
490 | | |
491 | | /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>. |
492 | | |
493 | | We don't want to call network functions like inet_addr() because |
494 | | all we need is a check, preferably one that is small, fast, and |
495 | | well-defined. */ |
496 | | |
497 | | static bool |
498 | | numeric_address_p (const char *addr) |
499 | 0 | { |
500 | 0 | const char *p = addr; |
501 | |
|
502 | 0 | REQUIRE_DIGITS (p); /* A */ |
503 | 0 | REQUIRE_DOT (p); /* . */ |
504 | 0 | REQUIRE_DIGITS (p); /* B */ |
505 | 0 | REQUIRE_DOT (p); /* . */ |
506 | 0 | REQUIRE_DIGITS (p); /* C */ |
507 | 0 | REQUIRE_DOT (p); /* . */ |
508 | 0 | REQUIRE_DIGITS (p); /* D */ |
509 | | |
510 | 0 | if (*p != '\0') |
511 | 0 | return false; |
512 | 0 | return true; |
513 | 0 | } |
514 | | |
515 | | /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST. |
516 | | Originally I tried to make the check compliant with rfc2109, but |
517 | | the sites deviated too often, so I had to fall back to "tail |
518 | | matching", as defined by the original Netscape's cookie spec. |
519 | | |
520 | | Wget now uses libpsl to check domain names against a public suffix |
521 | | list to see if they are valid. However, since we don't provide a |
522 | | psl on our own, if libpsl is compiled without a public suffix list, |
523 | | fall back to using the original "tail matching" heuristic. Also if |
524 | | libpsl is unable to convert the domain to lowercase, which means that |
525 | | it doesn't have any runtime conversion support, we again fall back to |
526 | | "tail matching" since libpsl states the results are unpredictable with |
527 | | upper case strings. |
528 | | */ |
529 | | |
530 | | #ifdef HAVE_LIBPSL |
531 | | static psl_ctx_t *psl; |
532 | | #endif |
533 | | |
534 | | static bool |
535 | | check_domain_match (const char *cookie_domain, const char *host) |
536 | 192 | { |
537 | 192 | #ifdef HAVE_LIBPSL |
538 | 192 | static int init_psl; |
539 | 192 | char *cookie_domain_lower = NULL; |
540 | 192 | char *host_lower = NULL; |
541 | 192 | int is_acceptable; |
542 | | |
543 | 192 | DEBUGP (("cdm: 1\n")); |
544 | 192 | if (!init_psl) |
545 | 1 | { |
546 | 1 | init_psl = 1; |
547 | | |
548 | 1 | #ifdef HAVE_PSL_LATEST |
549 | 1 | if ((psl = psl_latest (NULL))) |
550 | 1 | goto have_psl; |
551 | | |
552 | 0 | DEBUGP (("\nPSL: Failed to load any PSL data. " |
553 | 0 | "Falling back to insecure heuristics.\n")); |
554 | | #else |
555 | | if ((psl = psl_builtin ()) && !psl_builtin_outdated ()) |
556 | | goto have_psl; |
557 | | |
558 | | DEBUGP (("\nPSL: built-in data outdated. " |
559 | | "Trying to load data from %s.\n", |
560 | | quote (psl_builtin_filename ()))); |
561 | | |
562 | | if ((psl = psl_load_file (psl_builtin_filename ()))) |
563 | | goto have_psl; |
564 | | |
565 | | DEBUGP (("\nPSL: %s not found or not readable. " |
566 | | "Falling back to built-in data.\n", |
567 | | quote (psl_builtin_filename ()))); |
568 | | |
569 | | if (!(psl = psl_builtin ())) |
570 | | { |
571 | | DEBUGP (("\nPSL: libpsl not built with a public suffix list. " |
572 | | "Falling back to insecure heuristics.\n")); |
573 | | goto no_psl; |
574 | | } |
575 | | #endif |
576 | 0 | } |
577 | 191 | else if (!psl) |
578 | 189 | goto no_psl; |
579 | | |
580 | 3 | have_psl: |
581 | 3 | if (psl_str_to_utf8lower (cookie_domain, NULL, NULL, &cookie_domain_lower) == PSL_SUCCESS && |
582 | 0 | psl_str_to_utf8lower (host, NULL, NULL, &host_lower) == PSL_SUCCESS) |
583 | 0 | { |
584 | 0 | is_acceptable = psl_is_cookie_domain_acceptable (psl, host_lower, cookie_domain_lower); |
585 | 0 | } |
586 | 3 | else |
587 | 3 | { |
588 | 3 | DEBUGP (("libpsl unable to parse domain name. " |
589 | 3 | "Falling back to simple heuristics.\n")); |
590 | 3 | goto no_psl; |
591 | 3 | } |
592 | | |
593 | 0 | xfree (cookie_domain_lower); |
594 | 0 | xfree (host_lower); |
595 | |
|
596 | 0 | return is_acceptable == 1; |
597 | | |
598 | 192 | no_psl: |
599 | | /* Cleanup the PSL pointers first */ |
600 | 192 | xfree (cookie_domain_lower); |
601 | 192 | xfree (host_lower); |
602 | 192 | #endif |
603 | | |
604 | | /* For efficiency make some elementary checks first */ |
605 | 192 | DEBUGP (("cdm: 2\n")); |
606 | | |
607 | | /* For the sake of efficiency, check for exact match first. */ |
608 | 192 | if (0 == strcasecmp (cookie_domain, host)) |
609 | 24 | return true; |
610 | | |
611 | 168 | DEBUGP (("cdm: 3\n")); |
612 | | |
613 | | /* HOST must match the tail of cookie_domain. */ |
614 | 168 | if (!match_tail (host, cookie_domain, true)) |
615 | 147 | return false; |
616 | | |
617 | | /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must |
618 | | make sure that somebody is not trying to set the cookie for a |
619 | | subdomain shared by many entities. For example, "company.co.uk" |
620 | | must not be allowed to set a cookie for ".co.uk". On the other |
621 | | hand, "sso.redhat.de" should be able to set a cookie for |
622 | | ".redhat.de". |
623 | | |
624 | | The only marginally sane way to handle this I can think of is to |
625 | | reject on the basis of the length of the second-level domain name |
626 | | (but when the top-level domain is unknown), with the assumption |
627 | | that those of three or less characters could be reserved. For |
628 | | example: |
629 | | |
630 | | .co.org -> works because the TLD is known |
631 | | .co.uk -> doesn't work because "co" is only two chars long |
632 | | .com.au -> doesn't work because "com" is only 3 chars long |
633 | | .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh) |
634 | | .cnn.de -> doesn't work for the same reason (ugh!!) |
635 | | .abcd.de -> works because "abcd" is 4 chars long |
636 | | .img.cnn.de -> works because it's not trying to set the 2nd level domain |
637 | | .cnn.co.uk -> works for the same reason |
638 | | |
639 | | That should prevent misuse, while allowing reasonable usage. If |
640 | | someone knows of a better way to handle this, please let me |
641 | | know. */ |
642 | 21 | { |
643 | 21 | const char *p = cookie_domain; |
644 | 21 | int dccount = 1; /* number of domain components */ |
645 | 21 | int ldcl = 0; /* last domain component length */ |
646 | 21 | int nldcl = 0; /* next to last domain component length */ |
647 | 21 | int out; |
648 | 21 | if (*p == '.') |
649 | | /* Ignore leading period in this calculation. */ |
650 | 0 | ++p; |
651 | 21 | DEBUGP (("cdm: 4\n")); |
652 | 42 | for (out = 0; !out; p++) |
653 | 21 | switch (*p) |
654 | 21 | { |
655 | 21 | case '\0': |
656 | 21 | out = 1; |
657 | 21 | break; |
658 | 0 | case '.': |
659 | 0 | if (ldcl == 0) |
660 | | /* Empty domain component found -- the domain is invalid. */ |
661 | 0 | return false; |
662 | 0 | if (*(p + 1) == '\0') |
663 | 0 | { |
664 | | /* Tolerate trailing '.' by not treating the domain as |
665 | | one ending with an empty domain component. */ |
666 | 0 | out = 1; |
667 | 0 | break; |
668 | 0 | } |
669 | 0 | nldcl = ldcl; |
670 | 0 | ldcl = 0; |
671 | 0 | ++dccount; |
672 | 0 | break; |
673 | 0 | default: |
674 | 0 | ++ldcl; |
675 | 21 | } |
676 | | |
677 | 21 | DEBUGP (("cdm: 5\n")); |
678 | | |
679 | 21 | if (dccount < 2) |
680 | 21 | return false; |
681 | | |
682 | 0 | DEBUGP (("cdm: 6\n")); |
683 | |
|
684 | 0 | if (dccount == 2) |
685 | 0 | { |
686 | 0 | size_t i; |
687 | 0 | int known_toplevel = false; |
688 | 0 | static const char *known_toplevel_domains[] = { |
689 | 0 | ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int" |
690 | 0 | }; |
691 | 0 | for (i = 0; i < countof (known_toplevel_domains); i++) |
692 | 0 | if (match_tail (cookie_domain, known_toplevel_domains[i], true)) |
693 | 0 | { |
694 | 0 | known_toplevel = true; |
695 | 0 | break; |
696 | 0 | } |
697 | 0 | if (!known_toplevel && nldcl <= 3) |
698 | 0 | return false; |
699 | 0 | } |
700 | 0 | } |
701 | | |
702 | 0 | DEBUGP (("cdm: 7\n")); |
703 | | |
704 | | /* Don't allow the host "foobar.com" to set a cookie for domain |
705 | | "bar.com". */ |
706 | 0 | if (*cookie_domain != '.') |
707 | 0 | { |
708 | 0 | int dlen = strlen (cookie_domain); |
709 | 0 | int hlen = strlen (host); |
710 | | /* cookie host: hostname.foobar.com */ |
711 | | /* desired domain: bar.com */ |
712 | | /* '.' must be here in host-> ^ */ |
713 | 0 | if (hlen > dlen && host[hlen - dlen - 1] != '.') |
714 | 0 | return false; |
715 | 0 | } |
716 | | |
717 | 0 | DEBUGP (("cdm: 8\n")); |
718 | |
|
719 | 0 | return true; |
720 | 0 | } |
721 | | |
722 | | static int path_matches (const char *, const char *); |
723 | | |
724 | | /* Check whether PATH begins with COOKIE_PATH. */ |
725 | | |
726 | | static bool |
727 | | check_path_match (const char *cookie_path, const char *path) |
728 | 180 | { |
729 | 180 | return path_matches (path, cookie_path) != 0; |
730 | 180 | } |
731 | | |
732 | | /* Process the HTTP `Set-Cookie' header. This results in storing the |
733 | | cookie or discarding a matching one, or ignoring it completely, all |
734 | | depending on the contents. */ |
735 | | |
736 | | void |
737 | | cookie_handle_set_cookie (struct cookie_jar *jar, |
738 | | const char *host, int port, |
739 | | const char *path, const char *set_cookie) |
740 | 5.66k | { |
741 | 5.66k | struct cookie *cookie; |
742 | 5.66k | cookies_now = time (NULL); |
743 | 5.66k | char buf[1024], *tmp; |
744 | 5.66k | size_t pathlen = strlen(path); |
745 | | |
746 | | /* Wget's paths don't begin with '/' (blame rfc1808), but cookie |
747 | | usage assumes /-prefixed paths. Until the rest of Wget is fixed, |
748 | | simply prepend slash to PATH. */ |
749 | 5.66k | if (pathlen < sizeof (buf) - 1) |
750 | 5.66k | tmp = buf; |
751 | 0 | else |
752 | 0 | tmp = xmalloc (pathlen + 2); |
753 | | |
754 | 5.66k | *tmp = '/'; |
755 | 5.66k | memcpy (tmp + 1, path, pathlen + 1); |
756 | 5.66k | path = tmp; |
757 | | |
758 | 5.66k | cookie = parse_set_cookie (set_cookie, false); |
759 | 5.66k | if (!cookie) |
760 | 3.23k | goto out; |
761 | | |
762 | | /* Sanitize parts of cookie. */ |
763 | | |
764 | 2.43k | if (!cookie->domain) |
765 | 2.23k | { |
766 | 2.23k | cookie->domain = xstrdup (host); |
767 | 2.23k | cookie->domain_exact = 1; |
768 | | /* Set the port, but only if it's non-default. */ |
769 | 2.23k | if (port != 80 && port != 443) |
770 | 1.49k | cookie->port = port; |
771 | 2.23k | } |
772 | 192 | else |
773 | 192 | { |
774 | 192 | if (!check_domain_match (cookie->domain, host)) |
775 | 168 | { |
776 | 168 | logprintf (LOG_NOTQUIET, |
777 | 168 | _("Cookie coming from %s attempted to set domain to "), |
778 | 168 | quotearg_style (escape_quoting_style, host)); |
779 | 168 | logprintf (LOG_NOTQUIET, |
780 | 168 | _("%s\n"), |
781 | 168 | quotearg_style (escape_quoting_style, cookie->domain)); |
782 | 168 | cookie->discard_requested = true; |
783 | 168 | } |
784 | 192 | } |
785 | | |
786 | 2.43k | if (!cookie->path) |
787 | 2.25k | { |
788 | | /* The cookie doesn't set path: set it to the URL path, sans the |
789 | | file part ("/dir/file" truncated to "/dir/"). */ |
790 | 2.25k | char *trailing_slash = strrchr (path, '/'); |
791 | 2.25k | if (trailing_slash) |
792 | 2.25k | cookie->path = strdupdelim (path, trailing_slash + 1); |
793 | 0 | else |
794 | | /* no slash in the string -- can this even happen? */ |
795 | 0 | cookie->path = xstrdup (path); |
796 | 2.25k | } |
797 | 180 | else |
798 | 180 | { |
799 | | /* The cookie sets its own path; verify that it is legal. */ |
800 | 180 | if (!check_path_match (cookie->path, path)) |
801 | 165 | { |
802 | 165 | DEBUGP (("Attempt to fake the path: %s, %s\n", |
803 | 165 | cookie->path, path)); |
804 | 165 | goto out; |
805 | 165 | } |
806 | 180 | } |
807 | | |
808 | | /* Now store the cookie, or discard an existing cookie, if |
809 | | discarding was requested. */ |
810 | | |
811 | 2.26k | if (cookie->discard_requested) |
812 | 571 | { |
813 | 571 | discard_matching_cookie (jar, cookie); |
814 | 571 | goto out; |
815 | 571 | } |
816 | | |
817 | 1.69k | store_cookie (jar, cookie); |
818 | 1.69k | if (tmp != buf) |
819 | 0 | xfree (tmp); |
820 | 1.69k | return; |
821 | | |
822 | 3.97k | out: |
823 | 3.97k | if (cookie) |
824 | 736 | delete_cookie (cookie); |
825 | 3.97k | if (tmp != buf) |
826 | 0 | xfree (tmp); |
827 | 3.97k | } |
828 | | |
829 | | /* Support for sending out cookies in HTTP requests, based on |
830 | | previously stored cookies. Entry point is |
831 | | `build_cookies_request'. */ |
832 | | |
833 | | /* Return a count of how many times CHR occurs in STRING. */ |
834 | | |
835 | | static int |
836 | | count_char (const char *string, char chr) |
837 | 0 | { |
838 | 0 | const char *p; |
839 | 0 | int count = 0; |
840 | 0 | for (p = string; *p; p++) |
841 | 0 | if (*p == chr) |
842 | 0 | ++count; |
843 | 0 | return count; |
844 | 0 | } |
845 | | |
846 | | /* Find the cookie chains whose domains match HOST and store them to |
847 | | DEST. |
848 | | |
849 | | A cookie chain is the head of a list of cookies that belong to a |
850 | | host/domain. Given HOST "img.search.xemacs.org", this function |
851 | | will return the chains for "img.search.xemacs.org", |
852 | | "search.xemacs.org", and "xemacs.org" -- those of them that exist |
853 | | (if any), that is. |
854 | | |
855 | | DEST should be large enough to accept (in the worst case) as many |
856 | | elements as there are domain components of HOST. */ |
857 | | |
858 | | static int |
859 | | find_chains_of_host (struct cookie_jar *jar, const char *host, |
860 | | struct cookie *dest[]) |
861 | 0 | { |
862 | 0 | int dest_count = 0; |
863 | 0 | int passes, passcnt; |
864 | | |
865 | | /* Bail out quickly if there are no cookies in the jar. */ |
866 | 0 | if (!hash_table_count (jar->chains)) |
867 | 0 | return 0; |
868 | | |
869 | 0 | if (numeric_address_p (host)) |
870 | | /* If host is an IP address, only check for the exact match. */ |
871 | 0 | passes = 1; |
872 | 0 | else |
873 | | /* Otherwise, check all the subdomains except the top-level (last) |
874 | | one. As a domain with N components has N-1 dots, the number of |
875 | | passes equals the number of dots. */ |
876 | 0 | passes = count_char (host, '.'); |
877 | |
|
878 | 0 | passcnt = 0; |
879 | | |
880 | | /* Find chains that match HOST, starting with exact match and |
881 | | progressing to less specific domains. For instance, given HOST |
882 | | fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then |
883 | | srk.fer.hr's, then fer.hr's. */ |
884 | 0 | while (1) |
885 | 0 | { |
886 | 0 | struct cookie *chain = hash_table_get (jar->chains, host); |
887 | 0 | if (chain) |
888 | 0 | dest[dest_count++] = chain; |
889 | 0 | if (++passcnt >= passes) |
890 | 0 | break; |
891 | 0 | host = strchr (host, '.') + 1; |
892 | 0 | } |
893 | |
|
894 | 0 | return dest_count; |
895 | 0 | } |
896 | | |
897 | | /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero |
898 | | otherwise. */ |
899 | | |
900 | | static int |
901 | | path_matches (const char *full_path, const char *prefix) |
902 | 180 | { |
903 | 180 | int len = strlen (prefix); |
904 | | |
905 | 180 | if (0 != strncmp (full_path, prefix, len)) |
906 | | /* FULL_PATH doesn't begin with PREFIX. */ |
907 | 165 | return 0; |
908 | | |
909 | | /* Length of PREFIX determines the quality of the match. */ |
910 | 15 | return len + 1; |
911 | 180 | } |
912 | | |
913 | | /* Return true if COOKIE matches the provided parameters of the URL |
914 | | being downloaded: HOST, PORT, PATH, and SECFLAG. |
915 | | |
916 | | If PATH_GOODNESS is non-NULL, store the "path goodness" value |
917 | | there. That value is a measure of how closely COOKIE matches PATH, |
918 | | used for ordering cookies. */ |
919 | | |
920 | | static bool |
921 | | cookie_matches_url (const struct cookie *cookie, |
922 | | const char *host, int port, const char *path, |
923 | | bool secflag, int *path_goodness) |
924 | 0 | { |
925 | 0 | int pg; |
926 | |
|
927 | 0 | if (cookie_expired_p (cookie)) |
928 | | /* Ignore stale cookies. Don't bother unchaining the cookie at |
929 | | this point -- Wget is a relatively short-lived application, and |
930 | | stale cookies will not be saved by `save_cookies'. On the |
931 | | other hand, this function should be as efficient as |
932 | | possible. */ |
933 | 0 | return false; |
934 | | |
935 | 0 | if (cookie->secure && !secflag) |
936 | | /* Don't transmit secure cookies over insecure connections. */ |
937 | 0 | return false; |
938 | 0 | if (cookie->port != PORT_ANY && cookie->port != port) |
939 | 0 | return false; |
940 | | |
941 | | /* If exact domain match is required, verify that cookie's domain is |
942 | | equal to HOST. If not, assume success on the grounds of the |
943 | | cookie's chain having been found by find_chains_of_host. */ |
944 | 0 | if (cookie->domain_exact |
945 | 0 | && 0 != strcasecmp (host, cookie->domain)) |
946 | 0 | return false; |
947 | | |
948 | 0 | pg = path_matches (path, cookie->path); |
949 | 0 | if (pg == 0) |
950 | 0 | return false; |
951 | | |
952 | 0 | if (path_goodness) |
953 | | /* If the caller requested path_goodness, we return it. This is |
954 | | an optimization, so that the caller doesn't need to call |
955 | | path_matches() again. */ |
956 | 0 | *path_goodness = pg; |
957 | 0 | return true; |
958 | 0 | } |
959 | | |
960 | | /* A structure that points to a cookie, along with the additional |
961 | | information about the cookie's "goodness". This allows us to sort |
962 | | the cookies when returning them to the server, as required by the |
963 | | spec. */ |
964 | | |
965 | | struct weighed_cookie { |
966 | | struct cookie *cookie; |
967 | | int domain_goodness; |
968 | | int path_goodness; |
969 | | }; |
970 | | |
971 | | /* Comparator used for uniquifying the list. */ |
972 | | |
973 | | static int |
974 | | equality_comparator (const void *p1, const void *p2) |
975 | 0 | { |
976 | 0 | struct weighed_cookie *wc1 = (struct weighed_cookie *)p1; |
977 | 0 | struct weighed_cookie *wc2 = (struct weighed_cookie *)p2; |
978 | |
|
979 | 0 | int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr); |
980 | 0 | int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value); |
981 | | |
982 | | /* We only really care whether both name and value are equal. We |
983 | | return them in this order only for consistency... */ |
984 | 0 | return namecmp ? namecmp : valuecmp; |
985 | 0 | } |
986 | | |
987 | | /* Eliminate duplicate cookies. "Duplicate cookies" are any two |
988 | | cookies with the same attr name and value. Whenever a duplicate |
989 | | pair is found, one of the cookies is removed. */ |
990 | | |
991 | | static int |
992 | | eliminate_dups (struct weighed_cookie *outgoing, int count) |
993 | 0 | { |
994 | 0 | struct weighed_cookie *h; /* hare */ |
995 | 0 | struct weighed_cookie *t; /* tortoise */ |
996 | 0 | struct weighed_cookie *end = outgoing + count; |
997 | | |
998 | | /* We deploy a simple uniquify algorithm: first sort the array |
999 | | according to our sort criteria, then copy it to itself, comparing |
1000 | | each cookie to its neighbor and ignoring the duplicates. */ |
1001 | |
|
1002 | 0 | qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator); |
1003 | | |
1004 | | /* "Hare" runs through all the entries in the array, followed by |
1005 | | "tortoise". If a duplicate is found, the hare skips it. |
1006 | | Non-duplicate entries are copied to the tortoise ptr. */ |
1007 | |
|
1008 | 0 | for (h = t = outgoing; h < end; h++) |
1009 | 0 | { |
1010 | 0 | if (h != end - 1) |
1011 | 0 | { |
1012 | 0 | struct cookie *c0 = h[0].cookie; |
1013 | 0 | struct cookie *c1 = h[1].cookie; |
1014 | 0 | if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value)) |
1015 | 0 | continue; /* ignore the duplicate */ |
1016 | 0 | } |
1017 | | |
1018 | | /* If the hare has advanced past the tortoise (because of |
1019 | | previous dups), make sure the values get copied. Otherwise, |
1020 | | no copying is necessary. */ |
1021 | 0 | if (h != t) |
1022 | 0 | *t++ = *h; |
1023 | 0 | else |
1024 | 0 | t++; |
1025 | 0 | } |
1026 | 0 | return t - outgoing; |
1027 | 0 | } |
1028 | | |
1029 | | /* Comparator used for sorting by quality. */ |
1030 | | |
1031 | | static int |
1032 | | goodness_comparator (const void *p1, const void *p2) |
1033 | 0 | { |
1034 | 0 | struct weighed_cookie *wc1 = (struct weighed_cookie *)p1; |
1035 | 0 | struct weighed_cookie *wc2 = (struct weighed_cookie *)p2; |
1036 | | |
1037 | | /* Subtractions take `wc2' as the first argument becauase we want a |
1038 | | sort in *decreasing* order of goodness. */ |
1039 | 0 | int dgdiff = wc2->domain_goodness - wc1->domain_goodness; |
1040 | 0 | int pgdiff = wc2->path_goodness - wc1->path_goodness; |
1041 | | |
1042 | | /* Sort by domain goodness; if these are the same, sort by path |
1043 | | goodness. (The sorting order isn't really specified; maybe it |
1044 | | should be the other way around.) */ |
1045 | 0 | return dgdiff ? dgdiff : pgdiff; |
1046 | 0 | } |
1047 | | |
1048 | | /* Generate a `Cookie' header for a request that goes to HOST:PORT and |
1049 | | requests PATH from the server. The resulting string is allocated |
1050 | | with `malloc', and the caller is responsible for freeing it. If no |
1051 | | cookies pertain to this request, i.e. no cookie header should be |
1052 | | generated, NULL is returned. */ |
1053 | | |
1054 | | char * |
1055 | | cookie_header (struct cookie_jar *jar, const char *host, |
1056 | | int port, const char *path, bool secflag) |
1057 | 0 | { |
1058 | 0 | struct cookie *chains[32]; |
1059 | 0 | int chain_count; |
1060 | |
|
1061 | 0 | struct cookie *cookie; |
1062 | 0 | struct weighed_cookie *outgoing; |
1063 | 0 | size_t count, i, ocnt; |
1064 | 0 | char *result = NULL; |
1065 | 0 | int result_size, pos; |
1066 | 0 | char pathbuf[1024]; |
1067 | | |
1068 | | /* First, find the cookie chains whose domains match HOST. */ |
1069 | | |
1070 | | /* Allocate room for find_chains_of_host to write to. The number of |
1071 | | chains can at most equal the number of subdomains, hence |
1072 | | 1+<number of dots>. We ignore cookies with more than 32 labels. */ |
1073 | 0 | chain_count = 1 + count_char (host, '.'); |
1074 | 0 | if (chain_count > (int) countof (chains)) |
1075 | 0 | return NULL; |
1076 | 0 | chain_count = find_chains_of_host (jar, host, chains); |
1077 | | |
1078 | | /* No cookies for this host. */ |
1079 | 0 | if (chain_count <= 0) |
1080 | 0 | return NULL; |
1081 | | |
1082 | | /* Wget's paths don't begin with '/' (blame rfc1808), but cookie |
1083 | | usage assumes /-prefixed paths. Until the rest of Wget is fixed, |
1084 | | simply prepend slash to PATH. */ |
1085 | 0 | { |
1086 | 0 | char *tmp; |
1087 | 0 | size_t pathlen = strlen(path); |
1088 | |
|
1089 | 0 | if (pathlen < sizeof (pathbuf) - 1) |
1090 | 0 | tmp = pathbuf; |
1091 | 0 | else |
1092 | 0 | tmp = xmalloc (pathlen + 2); |
1093 | |
|
1094 | 0 | *tmp = '/'; |
1095 | 0 | memcpy (tmp + 1, path, pathlen + 1); |
1096 | 0 | path = tmp; |
1097 | 0 | } |
1098 | |
|
1099 | 0 | cookies_now = time (NULL); |
1100 | | |
1101 | | /* Now extract from the chains those cookies that match our host |
1102 | | (for domain_exact cookies), port (for cookies with port other |
1103 | | than PORT_ANY), etc. See matching_cookie for details. */ |
1104 | | |
1105 | | /* Count the number of matching cookies. */ |
1106 | 0 | count = 0; |
1107 | 0 | for (i = 0; i < (unsigned) chain_count; i++) |
1108 | 0 | for (cookie = chains[i]; cookie; cookie = cookie->next) |
1109 | 0 | if (cookie_matches_url (cookie, host, port, path, secflag, NULL)) |
1110 | 0 | ++count; |
1111 | 0 | if (!count) |
1112 | 0 | goto out; /* no cookies matched */ |
1113 | | |
1114 | | /* Allocate the array. */ |
1115 | 0 | if (count > SIZE_MAX / sizeof (struct weighed_cookie)) |
1116 | 0 | goto out; /* unable to process so many cookies */ |
1117 | 0 | outgoing = xmalloc (count * sizeof (struct weighed_cookie)); |
1118 | | |
1119 | | /* Fill the array with all the matching cookies from the chains that |
1120 | | match HOST. */ |
1121 | 0 | ocnt = 0; |
1122 | 0 | for (i = 0; i < (unsigned) chain_count; i++) |
1123 | 0 | for (cookie = chains[i]; cookie; cookie = cookie->next) |
1124 | 0 | { |
1125 | 0 | int pg; |
1126 | 0 | if (!cookie_matches_url (cookie, host, port, path, secflag, &pg)) |
1127 | 0 | continue; |
1128 | 0 | outgoing[ocnt].cookie = cookie; |
1129 | 0 | outgoing[ocnt].domain_goodness = strlen (cookie->domain); |
1130 | 0 | outgoing[ocnt].path_goodness = pg; |
1131 | 0 | ++ocnt; |
1132 | 0 | } |
1133 | 0 | assert (ocnt == count); |
1134 | | |
1135 | | /* Eliminate duplicate cookies; that is, those whose name and value |
1136 | | are the same. */ |
1137 | 0 | count = eliminate_dups (outgoing, count); |
1138 | | |
1139 | | /* Sort the array so that best-matching domains come first, and |
1140 | | that, within one domain, best-matching paths come first. */ |
1141 | 0 | qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator); |
1142 | | |
1143 | | /* Count the space the name=value pairs will take. */ |
1144 | 0 | result_size = 0; |
1145 | 0 | for (i = 0; i < count; i++) |
1146 | 0 | { |
1147 | 0 | struct cookie *c = outgoing[i].cookie; |
1148 | | /* name=value */ |
1149 | 0 | result_size += strlen (c->attr) + 1 + strlen (c->value); |
1150 | 0 | } |
1151 | | |
1152 | | /* Allocate output buffer: |
1153 | | name=value pairs -- result_size |
1154 | | "; " separators -- (count - 1) * 2 |
1155 | | \0 terminator -- 1 */ |
1156 | 0 | result_size = result_size + (count - 1) * 2 + 1; |
1157 | 0 | result = xmalloc (result_size); |
1158 | 0 | pos = 0; |
1159 | 0 | for (i = 0; i < count; i++) |
1160 | 0 | { |
1161 | 0 | struct cookie *c = outgoing[i].cookie; |
1162 | 0 | int namlen = strlen (c->attr); |
1163 | 0 | int vallen = strlen (c->value); |
1164 | |
|
1165 | 0 | memcpy (result + pos, c->attr, namlen); |
1166 | 0 | pos += namlen; |
1167 | 0 | result[pos++] = '='; |
1168 | 0 | memcpy (result + pos, c->value, vallen); |
1169 | 0 | pos += vallen; |
1170 | 0 | if (i < count - 1) |
1171 | 0 | { |
1172 | 0 | result[pos++] = ';'; |
1173 | 0 | result[pos++] = ' '; |
1174 | 0 | } |
1175 | 0 | } |
1176 | 0 | result[pos++] = '\0'; |
1177 | 0 | xfree (outgoing); |
1178 | 0 | assert (pos == result_size); |
1179 | |
|
1180 | 0 | out: |
1181 | 0 | if (path != pathbuf) |
1182 | 0 | xfree (path); |
1183 | |
|
1184 | 0 | return result; |
1185 | 0 | } |
1186 | | |
1187 | | /* Support for loading and saving cookies. The format used for |
1188 | | loading and saving should be the format of the `cookies.txt' file |
1189 | | used by Netscape and Mozilla, at least the Unix versions. |
1190 | | (Apparently IE can export cookies in that format as well.) The |
1191 | | format goes like this: |
1192 | | |
1193 | | DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE |
1194 | | |
1195 | | DOMAIN -- cookie domain, optionally followed by :PORT |
1196 | | DOMAIN-FLAG -- whether all hosts in the domain match |
1197 | | PATH -- cookie path |
1198 | | SECURE-FLAG -- whether cookie requires secure connection |
1199 | | TIMESTAMP -- expiry timestamp, number of seconds since epoch |
1200 | | ATTR-NAME -- name of the cookie attribute |
1201 | | ATTR-VALUE -- value of the cookie attribute (empty if absent) |
1202 | | |
1203 | | The fields are separated by TABs. All fields are mandatory, except |
1204 | | for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values |
1205 | | being "TRUE" and "FALSE'. Empty lines, lines consisting of |
1206 | | whitespace only, and comment lines (beginning with # optionally |
1207 | | preceded by whitespace) are ignored. |
1208 | | |
1209 | | Example line from cookies.txt (split in two lines for readability): |
1210 | | |
1211 | | .google.com TRUE / FALSE 2147368447 \ |
1212 | | PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012 |
1213 | | |
1214 | | */ |
1215 | | |
1216 | | /* If the region [B, E) ends with :<digits>, parse the number, return |
1217 | | it, and store new boundary (location of the `:') to DOMAIN_E_PTR. |
1218 | | If port is not specified, return 0. */ |
1219 | | |
1220 | | static int |
1221 | | domain_port (const char *domain_b, const char *domain_e, |
1222 | | const char **domain_e_ptr) |
1223 | 0 | { |
1224 | 0 | int port = 0; |
1225 | 0 | const char *p; |
1226 | 0 | const char *colon = memchr (domain_b, ':', domain_e - domain_b); |
1227 | 0 | if (!colon) |
1228 | 0 | return 0; |
1229 | 0 | for (p = colon + 1; p < domain_e && c_isdigit (*p); p++) |
1230 | 0 | port = 10 * port + (*p - '0'); |
1231 | 0 | if (p < domain_e) |
1232 | | /* Garbage following port number. */ |
1233 | 0 | return 0; |
1234 | 0 | *domain_e_ptr = colon; |
1235 | 0 | return port; |
1236 | 0 | } |
1237 | | |
1238 | 0 | #define GET_WORD(p, b, e) do { \ |
1239 | 0 | b = p; \ |
1240 | 0 | while (*p && *p != '\t') \ |
1241 | 0 | ++p; \ |
1242 | 0 | e = p; \ |
1243 | 0 | if (b == e || !*p) \ |
1244 | 0 | goto next; \ |
1245 | 0 | ++p; \ |
1246 | 0 | } while (0) |
1247 | | |
1248 | | /* Load cookies from FILE. */ |
1249 | | |
1250 | | void |
1251 | | cookie_jar_load (struct cookie_jar *jar, const char *file) |
1252 | 0 | { |
1253 | 0 | char *line = NULL; |
1254 | 0 | size_t bufsize = 0; |
1255 | |
|
1256 | 0 | FILE *fp = fopen (file, "r"); |
1257 | 0 | if (!fp) |
1258 | 0 | { |
1259 | 0 | logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"), |
1260 | 0 | quote (file), strerror (errno)); |
1261 | 0 | return; |
1262 | 0 | } |
1263 | | |
1264 | 0 | cookies_now = time (NULL); |
1265 | |
|
1266 | 0 | while (getline (&line, &bufsize, fp) > 0) |
1267 | 0 | { |
1268 | 0 | struct cookie *cookie; |
1269 | 0 | char *p = line; |
1270 | |
|
1271 | 0 | double expiry; |
1272 | 0 | int port; |
1273 | |
|
1274 | 0 | char *domain_b = NULL, *domain_e = NULL; |
1275 | 0 | char *domflag_b = NULL, *domflag_e = NULL; |
1276 | 0 | char *path_b = NULL, *path_e = NULL; |
1277 | 0 | char *secure_b = NULL, *secure_e = NULL; |
1278 | 0 | char *expires_b = NULL, *expires_e = NULL; |
1279 | 0 | char *name_b = NULL, *name_e = NULL; |
1280 | 0 | char *value_b = NULL, *value_e = NULL; |
1281 | | |
1282 | | /* Skip leading white-space. */ |
1283 | 0 | while (*p && c_isspace (*p)) |
1284 | 0 | ++p; |
1285 | | /* Ignore empty lines. */ |
1286 | 0 | if (!*p || *p == '#') |
1287 | 0 | continue; |
1288 | | |
1289 | 0 | GET_WORD (p, domain_b, domain_e); |
1290 | 0 | GET_WORD (p, domflag_b, domflag_e); |
1291 | 0 | GET_WORD (p, path_b, path_e); |
1292 | 0 | GET_WORD (p, secure_b, secure_e); |
1293 | 0 | GET_WORD (p, expires_b, expires_e); |
1294 | 0 | GET_WORD (p, name_b, name_e); |
1295 | | |
1296 | | /* Don't use GET_WORD for value because it ends with newline, |
1297 | | not TAB. */ |
1298 | 0 | value_b = p; |
1299 | 0 | value_e = p + strlen (p); |
1300 | 0 | if (value_e > value_b && value_e[-1] == '\n') |
1301 | 0 | --value_e; |
1302 | 0 | if (value_e > value_b && value_e[-1] == '\r') |
1303 | 0 | --value_e; |
1304 | | /* Empty values are legal (I think), so don't bother checking. */ |
1305 | |
|
1306 | 0 | cookie = cookie_new (); |
1307 | |
|
1308 | 0 | cookie->attr = strdupdelim (name_b, name_e); |
1309 | 0 | cookie->value = strdupdelim (value_b, value_e); |
1310 | 0 | cookie->path = strdupdelim (path_b, path_e); |
1311 | 0 | cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE"); |
1312 | | |
1313 | | /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE |
1314 | | value indicating if all machines within a given domain can |
1315 | | access the variable. This value is set automatically by the |
1316 | | browser, depending on the value set for the domain." */ |
1317 | 0 | cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE"); |
1318 | | |
1319 | | /* DOMAIN needs special treatment because we might need to |
1320 | | extract the port. */ |
1321 | 0 | port = domain_port (domain_b, domain_e, (const char **)&domain_e); |
1322 | 0 | if (port) |
1323 | 0 | cookie->port = port; |
1324 | |
|
1325 | 0 | if (*domain_b == '.') |
1326 | 0 | ++domain_b; /* remove leading dot internally */ |
1327 | 0 | cookie->domain = strdupdelim (domain_b, domain_e); |
1328 | | |
1329 | | /* safe default in case EXPIRES field is garbled. */ |
1330 | 0 | expiry = (double)cookies_now - 1; |
1331 | | |
1332 | | /* I don't like changing the line, but it's safe here. (line is |
1333 | | malloced.) */ |
1334 | 0 | *expires_e = '\0'; |
1335 | 0 | sscanf (expires_b, "%lf", &expiry); |
1336 | |
|
1337 | 0 | if (expiry == 0) |
1338 | 0 | { |
1339 | | /* EXPIRY can be 0 for session cookies saved because the |
1340 | | user specified `--keep-session-cookies' in the past. |
1341 | | They remain session cookies, and will be saved only if |
1342 | | the user has specified `keep-session-cookies' again. */ |
1343 | 0 | } |
1344 | 0 | else |
1345 | 0 | { |
1346 | 0 | if (expiry < cookies_now) |
1347 | 0 | goto abort_cookie; /* ignore stale cookie. */ |
1348 | 0 | cookie->expiry_time = (time_t) expiry; |
1349 | 0 | cookie->permanent = 1; |
1350 | 0 | } |
1351 | | |
1352 | 0 | store_cookie (jar, cookie); |
1353 | |
|
1354 | 0 | next: |
1355 | 0 | continue; |
1356 | | |
1357 | 0 | abort_cookie: |
1358 | 0 | delete_cookie (cookie); |
1359 | 0 | } |
1360 | | |
1361 | 0 | xfree(line); |
1362 | 0 | fclose (fp); |
1363 | 0 | } |
1364 | | |
1365 | | /* Save cookies, in format described above, to FILE. */ |
1366 | | |
1367 | | void |
1368 | | cookie_jar_save (struct cookie_jar *jar, const char *file) |
1369 | 0 | { |
1370 | 0 | FILE *fp; |
1371 | 0 | hash_table_iterator iter; |
1372 | |
|
1373 | 0 | DEBUGP (("Saving cookies to %s.\n", file)); |
1374 | |
|
1375 | 0 | cookies_now = time (NULL); |
1376 | |
|
1377 | 0 | fp = fopen (file, "w"); |
1378 | 0 | if (!fp) |
1379 | 0 | { |
1380 | 0 | logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"), |
1381 | 0 | quote (file), strerror (errno)); |
1382 | 0 | return; |
1383 | 0 | } |
1384 | | |
1385 | 0 | fputs ("# HTTP Cookie File\n", fp); |
1386 | 0 | fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (cookies_now)); |
1387 | 0 | fputs ("# Edit at your own risk.\n\n", fp); |
1388 | |
|
1389 | 0 | for (hash_table_iterate (jar->chains, &iter); |
1390 | 0 | hash_table_iter_next (&iter); |
1391 | 0 | ) |
1392 | 0 | { |
1393 | 0 | const char *domain = iter.key; |
1394 | 0 | struct cookie *cookie = iter.value; |
1395 | 0 | for (; cookie; cookie = cookie->next) |
1396 | 0 | { |
1397 | 0 | if (!cookie->permanent && !opt.keep_session_cookies) |
1398 | 0 | continue; |
1399 | 0 | if (cookie_expired_p (cookie)) |
1400 | 0 | continue; |
1401 | 0 | if (!cookie->domain_exact) |
1402 | 0 | fputc ('.', fp); |
1403 | 0 | fputs (domain, fp); |
1404 | 0 | if (cookie->port != PORT_ANY) |
1405 | 0 | fprintf (fp, ":%d", cookie->port); |
1406 | 0 | fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n", |
1407 | 0 | cookie->domain_exact ? "FALSE" : "TRUE", |
1408 | 0 | cookie->path, cookie->secure ? "TRUE" : "FALSE", |
1409 | 0 | (double)cookie->expiry_time, |
1410 | 0 | cookie->attr, cookie->value); |
1411 | 0 | if (ferror (fp)) |
1412 | 0 | goto out; |
1413 | 0 | } |
1414 | 0 | } |
1415 | 0 | out: |
1416 | 0 | if (ferror (fp)) |
1417 | 0 | logprintf (LOG_NOTQUIET, _("Error writing to %s: %s\n"), |
1418 | 0 | quote (file), strerror (errno)); |
1419 | 0 | if (fclose (fp) < 0) |
1420 | 0 | logprintf (LOG_NOTQUIET, _("Error closing %s: %s\n"), |
1421 | 0 | quote (file), strerror (errno)); |
1422 | |
|
1423 | 0 | DEBUGP (("Done saving cookies.\n")); |
1424 | 0 | } |
1425 | | |
1426 | | /* Clean up cookie-related data. */ |
1427 | | |
1428 | | void |
1429 | | cookie_jar_delete (struct cookie_jar *jar) |
1430 | 1.88k | { |
1431 | | /* Iterate over chains (indexed by domain) and free them. */ |
1432 | 1.88k | hash_table_iterator iter; |
1433 | 2.45k | for (hash_table_iterate (jar->chains, &iter); hash_table_iter_next (&iter); ) |
1434 | 566 | { |
1435 | 566 | struct cookie *chain = iter.value; |
1436 | 566 | xfree (iter.key); |
1437 | | /* Then all cookies in this chain. */ |
1438 | 1.69k | while (chain) |
1439 | 1.12k | { |
1440 | 1.12k | struct cookie *next = chain->next; |
1441 | 1.12k | delete_cookie (chain); |
1442 | 1.12k | chain = next; |
1443 | 1.12k | } |
1444 | 566 | } |
1445 | 1.88k | hash_table_destroy (jar->chains); |
1446 | 1.88k | xfree (jar); |
1447 | | |
1448 | 1.88k | #ifdef HAVE_LIBPSL |
1449 | 1.88k | psl_free (psl); |
1450 | | psl = NULL; |
1451 | 1.88k | #endif |
1452 | 1.88k | } |
1453 | | |
1454 | | /* Test cases. Currently this is only tests parse_set_cookies. To |
1455 | | use, recompile Wget with -DTEST_COOKIES and call test_cookies() |
1456 | | from main. */ |
1457 | | |
1458 | | #ifdef TEST_COOKIES |
1459 | | void |
1460 | | test_cookies (void) |
1461 | | { |
1462 | | /* Tests expected to succeed: */ |
1463 | | static struct { |
1464 | | const char *data; |
1465 | | const char *results[10]; |
1466 | | } tests_succ[] = { |
1467 | | { "arg=value", {"arg", "value", NULL} }, |
1468 | | { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} }, |
1469 | | { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} }, |
1470 | | { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} }, |
1471 | | { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} }, |
1472 | | { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} }, |
1473 | | { "arg=", {"arg", "", NULL} }, |
1474 | | { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} }, |
1475 | | { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} }, |
1476 | | }; |
1477 | | |
1478 | | /* Tests expected to fail: */ |
1479 | | static char *tests_fail[] = { |
1480 | | ";", |
1481 | | "arg=\"unterminated", |
1482 | | "=empty-name", |
1483 | | "arg1=;=another-empty-name", |
1484 | | }; |
1485 | | int i; |
1486 | | |
1487 | | for (i = 0; i < countof (tests_succ); i++) |
1488 | | { |
1489 | | int ind; |
1490 | | const char *data = tests_succ[i].data; |
1491 | | const char **expected = tests_succ[i].results; |
1492 | | struct cookie *c; |
1493 | | |
1494 | | c = parse_set_cookie (data, true); |
1495 | | if (!c) |
1496 | | { |
1497 | | printf ("NULL cookie returned for valid data: %s\n", data); |
1498 | | continue; |
1499 | | } |
1500 | | |
1501 | | /* Test whether extract_param handles these cases correctly. */ |
1502 | | { |
1503 | | param_token name, value; |
1504 | | const char *ptr = data; |
1505 | | int j = 0; |
1506 | | while (extract_param (&ptr, &name, &value, ';', NULL)) |
1507 | | { |
1508 | | char *n = strdupdelim (name.b, name.e); |
1509 | | char *v = strdupdelim (value.b, value.e); |
1510 | | if (!expected[j]) |
1511 | | { |
1512 | | printf ("Too many parameters for '%s'\n", data); |
1513 | | break; |
1514 | | } |
1515 | | if (0 != strcmp (expected[j], n)) |
1516 | | printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n", |
1517 | | j / 2 + 1, data, expected[j], n); |
1518 | | if (0 != strcmp (expected[j + 1], v)) |
1519 | | printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n", |
1520 | | j / 2 + 1, data, expected[j + 1], v); |
1521 | | j += 2; |
1522 | | xfree (n); |
1523 | | xfree (v); |
1524 | | } |
1525 | | if (expected[j]) |
1526 | | printf ("Too few parameters for '%s'\n", data); |
1527 | | } |
1528 | | } |
1529 | | |
1530 | | for (i = 0; i < countof (tests_fail); i++) |
1531 | | { |
1532 | | struct cookie *c; |
1533 | | char *data = tests_fail[i]; |
1534 | | c = parse_set_cookie (data, true); |
1535 | | if (c) |
1536 | | printf ("Failed to report error on invalid data: %s\n", data); |
1537 | | } |
1538 | | } |
1539 | | #endif /* TEST_COOKIES */ |