/src/wget2/libwget/http_parse.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2017-2024 Free Software Foundation, Inc. |
3 | | * |
4 | | * This file is part of libwget. |
5 | | * |
6 | | * Libwget is free software: you can redistribute it and/or modify |
7 | | * it under the terms of the GNU Lesser General Public License as published by |
8 | | * the Free Software Foundation, either version 3 of the License, or |
9 | | * (at your option) any later version. |
10 | | * |
11 | | * Libwget is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | * GNU Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public License |
17 | | * along with libwget. If not, see <https://www.gnu.org/licenses/>. |
18 | | * |
19 | | * |
20 | | * HTTP parsing routines |
21 | | * |
22 | | * Resources: |
23 | | * RFC 2616 |
24 | | * RFC 6265 |
25 | | * |
26 | | */ |
27 | | |
28 | | #include <config.h> |
29 | | |
30 | | #include <stdio.h> |
31 | | #include <stdlib.h> |
32 | | #include <string.h> |
33 | | #include <c-ctype.h> |
34 | | #include <time.h> |
35 | | #include <errno.h> |
36 | | #include <stdint.h> |
37 | | |
38 | | #include <wget.h> |
39 | | #include "private.h" |
40 | | #include "http.h" |
41 | | |
42 | 417k | #define HTTP_CTYPE_SEPARATOR (1<<0) |
43 | | |
44 | | static const unsigned char |
45 | | http_ctype[256] = { |
46 | | ['('] = HTTP_CTYPE_SEPARATOR, |
47 | | [')'] = HTTP_CTYPE_SEPARATOR, |
48 | | ['<'] = HTTP_CTYPE_SEPARATOR, |
49 | | ['>'] = HTTP_CTYPE_SEPARATOR, |
50 | | ['@'] = HTTP_CTYPE_SEPARATOR, |
51 | | [','] = HTTP_CTYPE_SEPARATOR, |
52 | | [';'] = HTTP_CTYPE_SEPARATOR, |
53 | | [':'] = HTTP_CTYPE_SEPARATOR, |
54 | | ['\\'] = HTTP_CTYPE_SEPARATOR, |
55 | | ['\"'] = HTTP_CTYPE_SEPARATOR, |
56 | | ['/'] = HTTP_CTYPE_SEPARATOR, |
57 | | ['['] = HTTP_CTYPE_SEPARATOR, |
58 | | [']'] = HTTP_CTYPE_SEPARATOR, |
59 | | ['?'] = HTTP_CTYPE_SEPARATOR, |
60 | | ['='] = HTTP_CTYPE_SEPARATOR, |
61 | | ['{'] = HTTP_CTYPE_SEPARATOR, |
62 | | ['}'] = HTTP_CTYPE_SEPARATOR, |
63 | | [' '] = HTTP_CTYPE_SEPARATOR, |
64 | | ['\t'] = HTTP_CTYPE_SEPARATOR |
65 | | }; |
66 | | |
67 | | static inline bool http_isseparator(char c) |
68 | 417k | { |
69 | 417k | return (http_ctype[(unsigned char)(c)]&HTTP_CTYPE_SEPARATOR) != 0; |
70 | 417k | } |
71 | | |
72 | | /**Gets the hostname of the remote endpoint. |
73 | | * \param conn a wget_http_connection |
74 | | * \return A string containing hostname. Returned memory is owned by |
75 | | * _conn_ and should not be modified or freed. |
76 | | */ |
77 | | const char *wget_http_get_host(const wget_http_connection *conn) |
78 | 0 | { |
79 | 0 | return conn->esc_host; |
80 | 0 | } |
81 | | |
82 | | /**Gets the port number of the remote endpoint. |
83 | | * \param conn a wget_http_connection |
84 | | * \return A string containing port number. Returned memory is owned by |
85 | | * _conn_ and should not be modified or freed. |
86 | | */ |
87 | | uint16_t wget_http_get_port(const wget_http_connection *conn) |
88 | 0 | { |
89 | 0 | return conn->port; |
90 | 0 | } |
91 | | |
92 | | /**Get the scheme used by the connection. |
93 | | * \param conn a wget_http_connection |
94 | | * \return A WGET_IRI_SCHEM_* value. |
95 | | */ |
96 | | wget_iri_scheme wget_http_get_scheme(const wget_http_connection *conn) |
97 | 0 | { |
98 | 0 | return conn->scheme; |
99 | 0 | } |
100 | | |
101 | | /**Gets the protocol used by the connection |
102 | | * \param conn a wget_http_connection |
103 | | * \return Either WGET_PROTOCOL_HTTP_1_1 or WGET_PROTOCOL_HTTP_2_0 |
104 | | */ |
105 | | int wget_http_get_protocol(const wget_http_connection *conn) |
106 | 0 | { |
107 | 0 | return conn->protocol; |
108 | 0 | } |
109 | | |
110 | | bool wget_http_isseparator(char c) |
111 | 0 | { |
112 | 0 | return http_isseparator(c); |
113 | 0 | } |
114 | | |
115 | | // TEXT = <any OCTET except CTLs, but including LWS> |
116 | | //int http_istext(char c) |
117 | | //{ |
118 | | // return (c>=32 && c<=126) || c=='\r' || c=='\n' || c=='\t'; |
119 | | //} |
120 | | |
121 | | // token = 1*<any CHAR except CTLs or separators> |
122 | | |
123 | | bool wget_http_istoken(char c) |
124 | 462k | { |
125 | 462k | return c > 32 && c <= 126 && !http_isseparator(c); |
126 | 462k | } |
127 | | |
128 | | const char *wget_http_parse_token(const char *s, const char **token) |
129 | 48.6k | { |
130 | 48.6k | const char *p; |
131 | | |
132 | 194k | for (p = s; wget_http_istoken(*s); s++); |
133 | | |
134 | 48.6k | *token = wget_strmemdup(p, s - p); |
135 | | |
136 | 48.6k | return s; |
137 | 48.6k | } |
138 | | |
139 | | // quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) |
140 | | // qdtext = <any TEXT except <">> |
141 | | // quoted-pair = "\" CHAR |
142 | | // TEXT = <any OCTET except CTLs, but including LWS> |
143 | | // CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)> |
144 | | // LWS = [CRLF] 1*( SP | HT ) |
145 | | |
146 | | const char *wget_http_parse_quoted_string(const char *s, const char **qstring) |
147 | 3.56k | { |
148 | 3.56k | if (*s == '\"') { |
149 | 3.56k | const char *p = ++s; |
150 | | |
151 | | // relaxed scanning |
152 | 21.0k | while (*s) { |
153 | 19.0k | if (*s == '\"') break; |
154 | 17.4k | else if (*s == '\\' && s[1]) { |
155 | 603 | s += 2; |
156 | 603 | } else |
157 | 16.8k | s++; |
158 | 19.0k | } |
159 | | |
160 | 3.56k | *qstring = wget_strmemdup(p, s - p); |
161 | 3.56k | if (*s == '\"') s++; |
162 | 3.56k | } else |
163 | 0 | *qstring = NULL; |
164 | | |
165 | 3.56k | return s; |
166 | 3.56k | } |
167 | | |
168 | | // generic-param = token [ EQUAL gen-value ] |
169 | | // gen-value = token / host / quoted-string |
170 | | |
171 | | const char *wget_http_parse_param(const char *s, const char **param, const char **value) |
172 | 38.1k | { |
173 | 38.1k | const char *p; |
174 | | |
175 | 38.1k | *param = *value = NULL; |
176 | | |
177 | 38.8k | while (c_isblank(*s)) s++; |
178 | | |
179 | 38.1k | if (*s == ';') { |
180 | 3.35k | s++; |
181 | 4.28k | while (c_isblank(*s)) s++; |
182 | 3.35k | } |
183 | 38.1k | if (!*s) return s; |
184 | | |
185 | 108k | for (p = s; wget_http_istoken(*s); s++); |
186 | 37.0k | *param = wget_strmemdup(p, s - p); |
187 | | |
188 | 37.6k | while (c_isblank(*s)) s++; |
189 | | |
190 | 37.0k | if (*s && *s++ == '=') { |
191 | 20.5k | while (c_isblank(*s)) s++; |
192 | 20.1k | if (*s == '\"') { |
193 | 2.93k | s = wget_http_parse_quoted_string(s, value); |
194 | 17.1k | } else { |
195 | 17.1k | s = wget_http_parse_token(s, value); |
196 | 17.1k | } |
197 | 20.1k | } |
198 | | |
199 | 37.0k | return s; |
200 | 38.1k | } |
201 | | |
202 | | // message-header = field-name ":" [ field-value ] |
203 | | // field-name = token |
204 | | // field-value = *( field-content | LWS ) |
205 | | // field-content = <the OCTETs making up the field-value |
206 | | // and consisting of either *TEXT or combinations |
207 | | // of token, separators, and quoted-string> |
208 | | |
209 | | const char *wget_http_parse_name(const char *s, const char **name) |
210 | 0 | { |
211 | 0 | while (c_isblank(*s)) s++; |
212 | |
|
213 | 0 | s = wget_http_parse_token(s, name); |
214 | |
|
215 | 0 | while (*s && *s != ':') s++; |
216 | |
|
217 | 0 | return *s == ':' ? s + 1 : s; |
218 | 0 | } |
219 | | |
220 | | const char *wget_parse_name_fixed(const char *s, const char **name, size_t *namelen) |
221 | 34.6k | { |
222 | 35.4k | while (c_isblank(*s)) s++; |
223 | | |
224 | 34.6k | *name = s; |
225 | | |
226 | 137k | while (wget_http_istoken(*s)) |
227 | 102k | s++; |
228 | | |
229 | 34.6k | *namelen = s - *name; |
230 | | |
231 | 36.8k | while (*s && *s != ':') s++; |
232 | | |
233 | 34.6k | return *s == ':' ? s + 1 : s; |
234 | 34.6k | } |
235 | | |
236 | | static int WGET_GCC_NONNULL_ALL compare_param(wget_http_header_param *p1, wget_http_header_param *p2) |
237 | 0 | { |
238 | 0 | return wget_strcasecmp_ascii(p1->name, p2->name); |
239 | 0 | } |
240 | | |
241 | | void wget_http_add_param(wget_vector **params, wget_http_header_param *param) |
242 | 0 | { |
243 | 0 | if (!*params) *params = wget_vector_create(4, (wget_vector_compare_fn *) compare_param); |
244 | 0 | wget_vector_add_memdup(*params, param, sizeof(*param)); |
245 | 0 | } |
246 | | |
247 | | /* |
248 | | Link = "Link" ":" #link-value |
249 | | link-value = "<" URI-Reference ">" *( ";" link-param ) |
250 | | link-param = ( ( "rel" "=" relation-types ) |
251 | | | ( "anchor" "=" <"> URI-Reference <"> ) |
252 | | | ( "rev" "=" relation-types ) |
253 | | | ( "hreflang" "=" Language-Tag ) |
254 | | | ( "media" "=" ( MediaDesc | ( <"> MediaDesc <"> ) ) ) |
255 | | | ( "title" "=" quoted-string ) |
256 | | | ( "title*" "=" ext-value ) |
257 | | | ( "type" "=" ( media-type | quoted-mt ) ) |
258 | | | ( link-extension ) ) |
259 | | link-extension = ( parmname [ "=" ( ptoken | quoted-string ) ] ) |
260 | | | ( ext-name-star "=" ext-value ) |
261 | | ext-name-star = parmname "*" ; reserved for RFC2231-profiled |
262 | | ; extensions. Whitespace NOT |
263 | | ; allowed in between. |
264 | | ptoken = 1*ptokenchar |
265 | | ptokenchar = "!" | "#" | "$" | "%" | "&" | "'" | "(" |
266 | | | ")" | "*" | "+" | "-" | "." | "/" | DIGIT |
267 | | | ":" | "<" | "=" | ">" | "?" | "@" | ALPHA |
268 | | | "[" | "]" | "^" | "_" | "`" | "{" | "|" |
269 | | | "}" | "~" |
270 | | media-type = type-name "/" subtype-name |
271 | | quoted-mt = <"> media-type <"> |
272 | | relation-types = relation-type |
273 | | | <"> relation-type *( 1*SP relation-type ) <"> |
274 | | relation-type = reg-rel-type | ext-rel-type |
275 | | reg-rel-type = LOALPHA *( LOALPHA | DIGIT | "." | "-" ) |
276 | | ext-rel-type = URI |
277 | | */ |
278 | | const char *wget_http_parse_link(const char *s, wget_http_link *link) |
279 | 1.68k | { |
280 | 1.68k | memset(link, 0, sizeof(*link)); |
281 | | |
282 | 2.06k | while (c_isblank(*s)) s++; |
283 | | |
284 | 1.68k | if (*s == '<') { |
285 | | // URI reference as of RFC 3987 (if relative, resolve as of RFC 3986) |
286 | 1.19k | const char *p = s + 1; |
287 | 1.19k | if ((s = strchr(p, '>')) != NULL) { |
288 | 932 | const char *name = NULL, *value = NULL; |
289 | | |
290 | 932 | link->uri = wget_strmemdup(p, s - p); |
291 | 932 | s++; |
292 | | |
293 | 1.31k | while (c_isblank(*s)) s++; |
294 | | |
295 | 2.83k | while (*s == ';') { |
296 | 1.90k | s = wget_http_parse_param(s, &name, &value); |
297 | 1.90k | if (name && value) { |
298 | 1.12k | if (!wget_strcasecmp_ascii(name, "rel")) { |
299 | 194 | if (!wget_strcasecmp_ascii(value, "describedby")) |
300 | 0 | link->rel = link_rel_describedby; |
301 | 194 | else if (!wget_strcasecmp_ascii(value, "duplicate")) |
302 | 0 | link->rel = link_rel_duplicate; |
303 | 930 | } else if (!wget_strcasecmp_ascii(name, "pri")) { |
304 | 280 | link->pri = atoi(value); |
305 | 650 | } else if (!wget_strcasecmp_ascii(name, "type")) { |
306 | 400 | if (!link->type) { |
307 | 206 | link->type = value; |
308 | 206 | value = NULL; |
309 | 206 | } |
310 | 400 | } |
311 | | // http_add_param(&link->params,¶m); |
312 | 1.51k | while (c_isblank(*s)) s++; |
313 | 1.12k | } |
314 | | |
315 | 1.90k | xfree(name); |
316 | 1.90k | xfree(value); |
317 | 1.90k | } |
318 | | |
319 | | // if (!msg->contacts) msg->contacts=vec_create(1,1,NULL); |
320 | | // vec_add(msg->contacts,&contact,sizeof(contact)); |
321 | | |
322 | 1.45k | while (*s && !c_isblank(*s)) s++; |
323 | 932 | } |
324 | 1.19k | } |
325 | | |
326 | 1.68k | return s; |
327 | 1.68k | } |
328 | | |
329 | | // from RFC 3230: |
330 | | // Digest = "Digest" ":" #(instance-digest) |
331 | | // instance-digest = digest-algorithm "=" <encoded digest output> |
332 | | // digest-algorithm = token |
333 | | |
334 | | const char *wget_http_parse_digest(const char *s, wget_http_digest *digest) |
335 | 7.20k | { |
336 | 7.20k | memset(digest, 0, sizeof(*digest)); |
337 | | |
338 | 7.58k | while (c_isblank(*s)) s++; |
339 | 7.20k | s = wget_http_parse_token(s, &digest->algorithm); |
340 | | |
341 | 7.62k | while (c_isblank(*s)) s++; |
342 | | |
343 | 7.20k | if (*s == '=') { |
344 | 1.32k | s++; |
345 | 1.86k | while (c_isblank(*s)) s++; |
346 | 1.32k | if (*s == '\"') { |
347 | 638 | s = wget_http_parse_quoted_string(s, &digest->encoded_digest); |
348 | 690 | } else { |
349 | 690 | const char *p; |
350 | | |
351 | 1.29k | for (p = s; *s && !c_isblank(*s) && *s != ',' && *s != ';'; s++); |
352 | 690 | digest->encoded_digest = wget_strmemdup(p, s - p); |
353 | 690 | } |
354 | 1.32k | } |
355 | | |
356 | 8.17k | while (*s && !c_isblank(*s)) s++; |
357 | | |
358 | 7.20k | return s; |
359 | 7.20k | } |
360 | | |
361 | | // RFC 2617: |
362 | | // challenge = auth-scheme 1*SP 1#auth-param |
363 | | // auth-scheme = token |
364 | | // auth-param = token "=" ( token | quoted-string ) |
365 | | |
366 | | const char *wget_http_parse_challenge(const char *s, wget_http_challenge *challenge) |
367 | 3.05k | { |
368 | 3.05k | memset(challenge, 0, sizeof(*challenge)); |
369 | | |
370 | 3.61k | while (c_isblank(*s)) s++; |
371 | 3.05k | s = wget_http_parse_token(s, &challenge->auth_scheme); |
372 | | |
373 | 3.05k | if (*s == ' ') |
374 | 1.67k | s++; // Auth scheme must have a space at the end of the token |
375 | 1.38k | else { |
376 | | // parse/syntax error |
377 | 1.38k | xfree(challenge->auth_scheme); |
378 | 1.38k | return s; |
379 | 1.38k | } |
380 | | |
381 | 1.67k | wget_http_header_param param; |
382 | 7.07k | do { |
383 | 7.07k | const char *old = s; |
384 | 7.07k | s = wget_http_parse_param(s, ¶m.name, ¶m.value); |
385 | 7.07k | if (param.name) { |
386 | 6.88k | if (*param.name && !param.value) { |
387 | 202 | xfree(param.name); |
388 | 202 | return old; // a new scheme detected |
389 | 202 | } |
390 | | |
391 | 6.67k | if (!param.value) { |
392 | 585 | xfree(param.name); |
393 | 585 | continue; |
394 | 585 | } |
395 | | |
396 | 6.09k | if (!challenge->params) |
397 | 1.07k | challenge->params = wget_stringmap_create_nocase(8); |
398 | 6.09k | wget_stringmap_put(challenge->params, param.name, param.value); |
399 | 6.09k | } |
400 | | |
401 | 6.67k | while (c_isblank(*s)) s++; |
402 | | |
403 | 6.28k | if (*s != ',') break; |
404 | 5.02k | else if (*s) s++; |
405 | 6.28k | } while (*s); |
406 | | |
407 | 1.47k | return s; |
408 | 1.67k | } |
409 | | |
410 | | const char *wget_http_parse_challenges(const char *s, wget_vector *challenges) |
411 | 0 | { |
412 | 0 | wget_http_challenge challenge; |
413 | |
|
414 | 0 | while (*s) { |
415 | 0 | s = wget_http_parse_challenge(s, &challenge); |
416 | 0 | if (challenge.auth_scheme) { |
417 | 0 | wget_vector_add_memdup(challenges, &challenge, sizeof(challenge)); |
418 | 0 | } |
419 | 0 | } |
420 | |
|
421 | 0 | return s; |
422 | 0 | } |
423 | | |
424 | | const char *wget_http_parse_location(const char *s, const char **location) |
425 | 67 | { |
426 | 67 | const char *p; |
427 | | |
428 | 487 | while (c_isblank(*s)) s++; |
429 | | |
430 | | /* |
431 | | * The correct (and still lenient) variant was: |
432 | | * for (p = s; *s && !c_isblank(*s); s++); |
433 | | * |
434 | | * And then there were spaces in the URI, see |
435 | | * https://gitlab.com/gnuwget/wget2/issues/420 |
436 | | */ |
437 | | |
438 | 659 | for (p = s; *s && *s != '\r' && *s != '\n'; s++); |
439 | 467 | while (s > p && c_isblank(*(s - 1))) s--; // remove trailing spaces (OWS - optional white space) |
440 | | |
441 | 67 | *location = wget_strmemdup(p, s - p); |
442 | | |
443 | 67 | return s; |
444 | 67 | } |
445 | | |
446 | | // Transfer-Encoding = "Transfer-Encoding" ":" 1#transfer-coding |
447 | | // transfer-coding = "chunked" | transfer-extension |
448 | | // transfer-extension = token *( ";" parameter ) |
449 | | // parameter = attribute "=" value |
450 | | // attribute = token |
451 | | // value = token | quoted-string |
452 | | |
453 | | const char *wget_http_parse_transfer_encoding(const char *s, wget_transfer_encoding *transfer_encoding) |
454 | 1.28k | { |
455 | 2.56k | while (c_isblank(*s)) s++; |
456 | | |
457 | 1.28k | if (!wget_strcasecmp_ascii(s, "identity")) |
458 | 194 | *transfer_encoding = wget_transfer_encoding_identity; |
459 | 1.09k | else |
460 | 1.09k | *transfer_encoding = wget_transfer_encoding_chunked; |
461 | | |
462 | 7.57k | while (wget_http_istoken(*s)) s++; |
463 | | |
464 | 1.28k | return s; |
465 | 1.28k | } |
466 | | |
467 | | // Content-Type = "Content-Type" ":" media-type |
468 | | // media-type = type "/" subtype *( ";" parameter ) |
469 | | // type = token |
470 | | // subtype = token |
471 | | // example: Content-Type: text/html; charset=ISO-8859-4 |
472 | | |
473 | | const char *wget_http_parse_content_type(const char *s, const char **content_type, const char **charset) |
474 | 2.03k | { |
475 | 2.03k | wget_http_header_param param; |
476 | 2.03k | const char *p; |
477 | | |
478 | 2.93k | while (c_isblank(*s)) s++; |
479 | | |
480 | 3.15k | for (p = s; *s && (wget_http_istoken(*s) || *s == '/'); s++); |
481 | 2.03k | if (content_type) |
482 | 100 | *content_type = wget_strmemdup(p, s - p); |
483 | | |
484 | 2.03k | if (charset) { |
485 | 2.03k | *charset = NULL; |
486 | | |
487 | 12.7k | while (*s) { |
488 | 10.9k | s=wget_http_parse_param(s, ¶m.name, ¶m.value); |
489 | 10.9k | if (!wget_strcasecmp_ascii("charset", param.name)) { |
490 | 204 | xfree(param.name); |
491 | 204 | *charset = param.value; |
492 | 204 | break; |
493 | 204 | } |
494 | 10.7k | xfree(param.name); |
495 | 10.7k | xfree(param.value); |
496 | 10.7k | } |
497 | 2.03k | } |
498 | | |
499 | 2.03k | return s; |
500 | 2.03k | } |
501 | | |
502 | | // RFC 6266 - Use of the Content-Disposition Header Field in the Hypertext Transfer Protocol (HTTP) |
503 | | // content-disposition = "Content-Disposition" ":" disposition-type *( ";" disposition-parm ) |
504 | | // disposition-type = "inline" | "attachment" | disp-ext-type ; case-insensitive |
505 | | // disp-ext-type = token |
506 | | // disposition-parm = filename-parm | disp-ext-parm |
507 | | // filename-parm = "filename" "=" value | "filename*" "=" ext-value |
508 | | // disp-ext-parm = token "=" value | ext-token "=" ext-value |
509 | | // ext-token = <the characters in token, followed by "*"> |
510 | | // |
511 | | // Defined in [RFC2616]: |
512 | | // |
513 | | // token = <token, defined in [RFC2616], Section 2.2> |
514 | | // quoted-string = <quoted-string, defined in [RFC2616], Section 2.2> |
515 | | // value = <value, defined in [RFC2616], Section 3.6> ; token | quoted-string |
516 | | // |
517 | | // Defined in [RFC5987]: |
518 | | // |
519 | | // ext-value = <ext-value, defined in [RFC5987], Section 3.2> |
520 | | |
521 | | const char *wget_http_parse_content_disposition(const char *s, const char **filename) |
522 | 1.36k | { |
523 | 1.36k | wget_http_header_param param; |
524 | 1.36k | char *p; |
525 | | |
526 | 1.36k | if (filename) { |
527 | 1.36k | *filename = NULL; |
528 | | |
529 | 3.22k | while (*s && !*filename) { |
530 | 2.59k | s = wget_http_parse_param(s, ¶m.name, ¶m.value); |
531 | 2.59k | if (param.value && !wget_strcasecmp_ascii("filename", param.name)) { |
532 | | // just take the last path part as filename |
533 | 179 | if (!*filename) { |
534 | 179 | if ((p = strpbrk(param.value,"/\\"))) { |
535 | 2 | p = wget_strdup(p + 1); |
536 | 177 | } else { |
537 | 177 | p = (char *) param.value; |
538 | 177 | param.value = NULL; |
539 | 177 | } |
540 | | |
541 | 179 | wget_percent_unescape(p); |
542 | 179 | if (!wget_str_is_valid_utf8(p)) { |
543 | | // if it is not UTF-8, assume ISO-8859-1 |
544 | | // see https://stackoverflow.com/questions/93551/how-to-encode-the-filename-parameter-of-content-disposition-header-in-http |
545 | 80 | *filename = wget_str_to_utf8(p, "iso-8859-1"); |
546 | 80 | xfree(p); |
547 | 99 | } else { |
548 | 99 | *filename = p; |
549 | 99 | p = NULL; |
550 | 99 | } |
551 | 179 | } |
552 | 2.41k | } else if (param.value && !wget_strcasecmp_ascii("filename*", param.name)) { |
553 | | // RFC5987 |
554 | | // ext-value = charset "'" [ language ] "'" value-chars |
555 | | // ; like RFC 2231's <extended-initial-value> |
556 | | // ; (see [RFC2231], Section 7) |
557 | | |
558 | | // charset = "UTF-8" / "ISO-8859-1" / mime-charset |
559 | | |
560 | | // mime-charset = 1*mime-charsetc |
561 | | // mime-charsetc = ALPHA / DIGIT |
562 | | // / "!" / "#" / "$" / "%" / "&" |
563 | | // / "+" / "-" / "^" / "_" / "`" |
564 | | // / "{" / "}" / "~" |
565 | | // ; as <mime-charset> in Section 2.3 of [RFC2978] |
566 | | // ; except that the single quote is not included |
567 | | // ; SHOULD be registered in the IANA charset registry |
568 | | |
569 | | // language = <Language-Tag, defined in [RFC5646], Section 2.1> |
570 | | |
571 | | // value-chars = *( pct-encoded / attr-char ) |
572 | | |
573 | | // pct-encoded = "%" HEXDIG HEXDIG |
574 | | // ; see [RFC3986], Section 2.1 |
575 | | |
576 | | // attr-char = ALPHA / DIGIT |
577 | | // / "!" / "#" / "$" / "&" / "+" / "-" / "." |
578 | | // / "^" / "_" / "`" / "|" / "~" |
579 | | // ; token except ( "*" / "'" / "%" ) |
580 | | |
581 | 1.32k | if ((p = strchr(param.value, '\''))) { |
582 | 1.12k | const char *charset = param.value; |
583 | 1.12k | const char *language = p + 1; |
584 | 1.12k | *p = 0; |
585 | 1.12k | if ((p = strchr(language, '\''))) { |
586 | 930 | *p++ = 0; |
587 | 930 | if (*p) { |
588 | 736 | wget_percent_unescape(p); |
589 | 736 | if (wget_str_needs_encoding(p)) |
590 | 719 | *filename = wget_str_to_utf8(p, charset); |
591 | 17 | else |
592 | 17 | *filename = wget_strdup(p); |
593 | | |
594 | | // just take the last path part as filename |
595 | 736 | if (*filename && (p = strpbrk(*filename, "/\\"))) { |
596 | 1 | p = wget_strdup(p + 1); |
597 | 1 | xfree(*filename); |
598 | 1 | *filename = p; |
599 | 1 | } |
600 | | |
601 | 736 | xfree(param.name); |
602 | 736 | xfree(param.value); |
603 | 736 | break; // stop looping, we found the final filename |
604 | 736 | } |
605 | 930 | } |
606 | 1.12k | } |
607 | 1.32k | } |
608 | 1.85k | xfree(param.name); |
609 | 1.85k | xfree(param.value); |
610 | 1.85k | } |
611 | 1.36k | } |
612 | | |
613 | 1.36k | return s; |
614 | 1.36k | } |
615 | | |
616 | | // RFC 7469 |
617 | | // Example: |
618 | | // Public-Key-Pins: |
619 | | // pin-sha256="d6qzRu9zOECb90Uez27xWltNsj0e1Md7GkYYkVoZWmM="; |
620 | | // pin-sha256="E9CZ9INDbd+2eRQozYqqbQ2yXLVKB9+xcprMF+44U1g="; |
621 | | // pin-sha256="LPJNul+wow4m6DsqxbninhsWHlwfp0JecwQzYpOLmCQ="; |
622 | | // max-age=10000; includeSubDomains |
623 | | const char *wget_http_parse_public_key_pins(const char *s, wget_hpkp *hpkp) |
624 | 1.02k | { |
625 | 1.02k | wget_http_header_param param; |
626 | | |
627 | 1.02k | wget_hpkp_set_include_subdomains(hpkp, false); |
628 | | |
629 | 14.4k | while (*s) { |
630 | 13.4k | s = wget_http_parse_param(s, ¶m.name, ¶m.value); |
631 | | |
632 | 13.4k | if (param.value) { |
633 | 6.41k | if (!wget_strcasecmp_ascii(param.name, "max-age")) { |
634 | 929 | wget_hpkp_set_maxage(hpkp, (int64_t) atoll(param.value)); |
635 | 5.48k | } else if (!wget_strncasecmp_ascii(param.name, "pin-", 4)) { |
636 | 3.93k | wget_hpkp_pin_add(hpkp, param.name + 4, param.value); |
637 | 3.93k | } |
638 | 7.02k | } else { |
639 | 7.02k | if (!wget_strcasecmp_ascii(param.name, "includeSubDomains")) |
640 | 383 | wget_hpkp_set_include_subdomains(hpkp, true); |
641 | 7.02k | } |
642 | | |
643 | 13.4k | xfree(param.name); |
644 | 13.4k | xfree(param.value); |
645 | 13.4k | } |
646 | | |
647 | 1.02k | return s; |
648 | 1.02k | } |
649 | | |
650 | | // RFC 6797 |
651 | | // |
652 | | // Strict-Transport-Security = "Strict-Transport-Security" ":" [ directive ] *( ";" [ directive ] ) |
653 | | // directive = directive-name [ "=" directive-value ] |
654 | | // directive-name = token |
655 | | // directive-value = token | quoted-string |
656 | | |
657 | | const char *wget_http_parse_strict_transport_security(const char *s, int64_t *maxage, bool *include_subdomains) |
658 | 1.11k | { |
659 | 1.11k | wget_http_header_param param; |
660 | | |
661 | 1.11k | *maxage = 0; |
662 | 1.11k | *include_subdomains = 0; |
663 | | |
664 | 3.34k | while (*s) { |
665 | 2.22k | s = wget_http_parse_param(s, ¶m.name, ¶m.value); |
666 | | |
667 | 2.22k | if (param.value) { |
668 | 938 | if (!wget_strcasecmp_ascii(param.name, "max-age")) { |
669 | 248 | *maxage = (int64_t) atoll(param.value); |
670 | 248 | } |
671 | 1.29k | } else { |
672 | 1.29k | if (!wget_strcasecmp_ascii(param.name, "includeSubDomains")) { |
673 | 194 | *include_subdomains = 1; |
674 | 194 | } |
675 | 1.29k | } |
676 | | |
677 | 2.22k | xfree(param.name); |
678 | 2.22k | xfree(param.value); |
679 | 2.22k | } |
680 | | |
681 | 1.11k | return s; |
682 | 1.11k | } |
683 | | |
684 | | // Content-Encoding = "Content-Encoding" ":" 1#content-coding |
685 | | |
686 | | const char *wget_http_parse_content_encoding(const char *s, char *content_encoding) |
687 | 2.74k | { |
688 | 3.26k | while (c_isblank(*s)) s++; |
689 | | |
690 | 2.74k | if (!wget_strcasecmp_ascii(s, "gzip") || !wget_strcasecmp_ascii(s, "x-gzip")) |
691 | 388 | *content_encoding = wget_content_encoding_gzip; |
692 | 2.35k | else if (!wget_strcasecmp_ascii(s, "deflate")) |
693 | 194 | *content_encoding = wget_content_encoding_deflate; |
694 | 2.15k | else if (!wget_strcasecmp_ascii(s, "bzip2")) |
695 | 194 | *content_encoding = wget_content_encoding_bzip2; |
696 | 1.96k | else if (!wget_strcasecmp_ascii(s, "xz") || !wget_strcasecmp_ascii(s, "lzma") || !wget_strcasecmp_ascii(s, "x-lzma")) |
697 | | // 'xz' is the tag currently understood by Firefox (2.1.2014) |
698 | | // 'lzma' / 'x-lzma' are the tags currently understood by ELinks |
699 | 585 | *content_encoding = wget_content_encoding_lzma; |
700 | 1.38k | else if (!wget_strcasecmp_ascii(s, "br")) |
701 | 324 | *content_encoding = wget_content_encoding_brotli; |
702 | 1.05k | else if (!wget_strcasecmp_ascii(s, "zstd")) |
703 | 194 | *content_encoding = wget_content_encoding_zstd; |
704 | 862 | else if (!wget_strcasecmp_ascii(s, "lzip")) |
705 | 194 | *content_encoding = wget_content_encoding_lzip; |
706 | 668 | else |
707 | 668 | *content_encoding = wget_content_encoding_identity; |
708 | | |
709 | 11.8k | while (wget_http_istoken(*s)) s++; |
710 | | |
711 | 2.74k | return s; |
712 | 2.74k | } |
713 | | |
714 | | const char *wget_http_parse_connection(const char *s, bool *keep_alive) |
715 | 1.73k | { |
716 | 1.73k | const char *e; |
717 | | |
718 | 1.73k | *keep_alive = false; |
719 | | |
720 | 4.16k | for (e = s; *e; s = e + 1) { |
721 | 2.42k | if ((e = strchrnul(s, ',')) != s) { |
722 | 3.70k | while (c_isblank(*s)) s++; |
723 | | |
724 | 1.90k | if (!wget_strncasecmp_ascii(s, "keep-alive", 10)) |
725 | 1.47k | *keep_alive = true; |
726 | 1.90k | } |
727 | 2.42k | } |
728 | | |
729 | 1.73k | return s; |
730 | 1.73k | } |
731 | | |
732 | | const char *wget_http_parse_etag(const char *s, const char **etag) |
733 | 48 | { |
734 | 48 | const char *p; |
735 | | |
736 | 560 | while (c_isblank(*s)) s++; |
737 | | |
738 | 256 | for (p = s; *s && !c_isblank(*s); s++); |
739 | 48 | *etag = wget_strmemdup(p, s - p); |
740 | | |
741 | 48 | return s; |
742 | 48 | } |
743 | | |
744 | | /* |
745 | | // returns GMT/UTC time as an integer of format YYYYMMDDHHMMSS |
746 | | // this makes us independent from size of time_t - work around possible year 2038 problems |
747 | | static long long NONNULL_ALL parse_rfc1123_date(const char *s) |
748 | | { |
749 | | // we simply can't use strptime() since it requires us to setlocale() |
750 | | // which is not thread-safe !!! |
751 | | static const char *mnames[12] = { |
752 | | "Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" |
753 | | }; |
754 | | static int days_per_month[12] = { |
755 | | 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 |
756 | | }; |
757 | | int day, mon = 0, year, hour, min, sec, leap, it; |
758 | | char mname[4] = ""; |
759 | | |
760 | | if (sscanf(s, " %*[a-zA-Z], %02d %3s %4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) >= 6) { |
761 | | // RFC 822 / 1123: Wed, 09 Jun 2021 10:18:14 GMT |
762 | | } |
763 | | else if (sscanf(s, " %*[a-zA-Z], %2d-%3s-%4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) >= 6) { |
764 | | // RFC 850 / 1036 or Netscape: Wednesday, 09-Jun-21 10:18:14 or Wed, 09-Jun-2021 10:18:14 |
765 | | } |
766 | | else if (sscanf(s, " %*[a-zA-Z], %3s %2d %2d:%2d:%2d %4d", mname, &day, &hour, &min, &sec, &year) >= 6) { |
767 | | // ANSI C's asctime(): Wed Jun 09 10:18:14 2021 |
768 | | } else { |
769 | | error_printf(_("Failed to parse date '%s'\n"), s); |
770 | | return 0; // return as session cookie |
771 | | } |
772 | | |
773 | | if (*mname) { |
774 | | for (it = 0; it < countof(mnames); it++) { |
775 | | if (!wget_strcasecmp_ascii(mname, mnames[it])) { |
776 | | mon = it + 1; |
777 | | break; |
778 | | } |
779 | | } |
780 | | } |
781 | | |
782 | | if (year < 70 && year >= 0) year += 2000; |
783 | | else if (year >= 70 && year <= 99) year += 1900; |
784 | | |
785 | | if (mon == 2 && year % 4 == 0 && (year % 100 != 0 || year % 400 == 0)) |
786 | | leap = 1; |
787 | | else |
788 | | leap = 0; |
789 | | |
790 | | // we don't handle leap seconds |
791 | | |
792 | | if (year < 1601 || mon < 1 || mon > 12 || day < 1 || (day > days_per_month[mon - 1] + leap) || |
793 | | hour < 0 || hour > 23 || min < 0 || min > 60 || sec < 0 || sec > 60) |
794 | | { |
795 | | error_printf(_("Failed to parse date '%s'\n"), s); |
796 | | return 0; // return as session cookie |
797 | | } |
798 | | |
799 | | return(((((long long)year*100 + mon)*100 + day)*100 + hour)*100 + min)*100 + sec; |
800 | | } |
801 | | */ |
802 | | |
803 | | // copied this routine from |
804 | | // https://ftp.netbsd.org/pub/pkgsrc/current/pkgsrc/pkgtools/libnbcompat/files/timegm.c |
805 | | |
806 | | static int leap_days(int y1, int y2) |
807 | 1.02k | { |
808 | 1.02k | y1--; |
809 | 1.02k | y2--; |
810 | 1.02k | return (y2/4 - y1/4) - (y2/100 - y1/100) + (y2/400 - y1/400); |
811 | 1.02k | } |
812 | | |
813 | | /* |
814 | | RFC 2616, 3.3.1 Full Date |
815 | | HTTP-date = rfc1123-date | rfc850-date | asctime-date |
816 | | rfc1123-date = wkday "," SP date1 SP time SP "GMT" |
817 | | rfc850-date = weekday "," SP date2 SP time SP "GMT" |
818 | | asctime-date = wkday SP date3 SP time SP 4DIGIT |
819 | | date1 = 2DIGIT SP month SP 4DIGIT |
820 | | ; day month year (e.g., 02 Jun 1982) |
821 | | date2 = 2DIGIT "-" month "-" 2DIGIT |
822 | | ; day-month-year (e.g., 02-Jun-82) |
823 | | date3 = month SP ( 2DIGIT | ( SP 1DIGIT )) |
824 | | ; month day (e.g., Jun 2) |
825 | | time = 2DIGIT ":" 2DIGIT ":" 2DIGIT |
826 | | ; 00:00:00 - 23:59:59 |
827 | | wkday = "Mon" | "Tue" | "Wed" |
828 | | | "Thu" | "Fri" | "Sat" | "Sun" |
829 | | weekday = "Monday" | "Tuesday" | "Wednesday" |
830 | | | "Thursday" | "Friday" | "Saturday" | "Sunday" |
831 | | month = "Jan" | "Feb" | "Mar" | "Apr" |
832 | | | "May" | "Jun" | "Jul" | "Aug" |
833 | | | "Sep" | "Oct" | "Nov" | "Dec" |
834 | | */ |
835 | | |
836 | | int64_t wget_http_parse_full_date(const char *s) |
837 | 8.84k | { |
838 | | // we simply can't use strptime() since it requires us to setlocale() |
839 | | // which is not thread-safe !!! |
840 | 8.84k | static const char *mnames[12] = { |
841 | 8.84k | "Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" |
842 | 8.84k | }; |
843 | 8.84k | static int days_per_month[12] = { |
844 | 8.84k | 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 |
845 | 8.84k | }; |
846 | | // cumulated number of days until beginning of month for non-leap years |
847 | 8.84k | static const int sum_of_days[12] = { |
848 | 8.84k | 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 |
849 | 8.84k | }; |
850 | | |
851 | 8.84k | int day, mon = 0, year, hour, min, sec, leap_month, leap_year, days; |
852 | 8.84k | char mname[4] = ""; |
853 | | |
854 | 8.84k | if (sscanf(s, " %*[a-zA-Z], %2d %3s %4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) == 6) { |
855 | | // RFC 822 / 1123: Wed, 09 Jun 2021 10:18:14 GMT |
856 | 8.59k | } else if (sscanf(s, " %*[a-zA-Z], %2d-%3s-%4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) == 6) { |
857 | | // RFC 850 / 1036 or Netscape: Wednesday, 09-Jun-21 10:18:14 or Wed, 09-Jun-2021 10:18:14 |
858 | 8.33k | } else if (sscanf(s, " %*[a-zA-Z] %3s %2d %2d:%2d:%2d %4d", mname, &day, &hour, &min, &sec, &year) == 6) { |
859 | | // ANSI C's asctime(): Wed Jun 09 10:18:14 2021 |
860 | 8.08k | } else if (sscanf(s, " %d %3s %4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) == 6) { |
861 | | // non-standard: 1 Mar 2027 09:23:12 GMT |
862 | 6.80k | } else if (sscanf(s, " %*s %3s %2d %4d %2d:%2d:%2d", mname, &day, &year, &hour, &min, &sec) == 6) { |
863 | | // non-standard: Sun Nov 26 2023 21:24:47 |
864 | 5.85k | } else { |
865 | 949 | error_printf(_("Failed to parse date '%s'\n"), s); |
866 | 949 | return 0; // return as session cookie |
867 | 949 | } |
868 | | |
869 | 7.90k | if (*mname) { |
870 | 58.9k | for (unsigned it = 0; it < countof(mnames); it++) { |
871 | 56.0k | if (!wget_strcasecmp_ascii(mname, mnames[it])) { |
872 | 5.02k | mon = it + 1; |
873 | 5.02k | break; |
874 | 5.02k | } |
875 | 56.0k | } |
876 | 7.90k | } |
877 | | |
878 | 7.90k | if (year < 70 && year >= 0) year += 2000; |
879 | 1.88k | else if (year >= 70 && year <= 99) year += 1900; |
880 | 7.90k | if (year < 1970) year = 1970; |
881 | | |
882 | | // we don't handle leap seconds |
883 | | |
884 | 7.90k | leap_year = year % 4 == 0 && (year % 100 != 0 || year % 400 == 0); |
885 | 7.90k | leap_month = (mon == 2 && leap_year); |
886 | | |
887 | 7.90k | if (mon < 1 || mon > 12 || day < 1 || (day > days_per_month[mon - 1] + leap_month) || |
888 | 7.90k | hour < 0 || hour > 23 || min < 0 || min > 60 || sec < 0 || sec > 60) |
889 | 6.87k | { |
890 | 6.87k | error_printf(_("Failed to parse date '%s'\n"), s); |
891 | 6.87k | return 0; // return as session cookie |
892 | 6.87k | } |
893 | | |
894 | | // calculate time_t (represented as int64_t) from GMT/UTC time values |
895 | | |
896 | 1.02k | days = 365 * (year - 1970) + leap_days(1970, year); |
897 | 1.02k | days += sum_of_days[mon - 1] + (mon > 2 && leap_year); |
898 | 1.02k | days += day - 1; |
899 | | |
900 | 1.02k | return (((int64_t)days * 24 + hour) * 60 + min) * 60 + sec; |
901 | 7.90k | } |
902 | | |
903 | | char *wget_http_print_date(int64_t t, char *buf, size_t bufsize) |
904 | 111 | { |
905 | 111 | static const char *dnames[7] = { |
906 | 111 | "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" |
907 | 111 | }; |
908 | 111 | static const char *mnames[12] = { |
909 | 111 | "Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" |
910 | 111 | }; |
911 | 111 | struct tm tm; |
912 | 111 | time_t tt; |
913 | | |
914 | 111 | if (!bufsize) |
915 | 0 | return buf; |
916 | | |
917 | 111 | #if __LP64__ == 1 |
918 | 111 | tt = (time_t) t; // 64bit time_t |
919 | | #else |
920 | | // 32bit time_t |
921 | | if (t > 2147483647) |
922 | | tt = 2147483647; |
923 | | else |
924 | | tt = (time_t) t; |
925 | | #endif |
926 | | |
927 | 111 | if (gmtime_r(&tt, &tm)) { |
928 | 111 | wget_snprintf(buf, bufsize, "%s, %02d %s %d %02d:%02d:%02d GMT", |
929 | 111 | dnames[tm.tm_wday],tm.tm_mday,mnames[tm.tm_mon],tm.tm_year+1900, |
930 | 111 | tm.tm_hour, tm.tm_min, tm.tm_sec); |
931 | 111 | } else |
932 | 0 | *buf = 0; |
933 | | |
934 | 111 | return buf; |
935 | 111 | } |
936 | | |
937 | | // adjust time (t) by number of seconds (n) |
938 | | /* |
939 | | static long long adjust_time(long long t, int n) |
940 | | { |
941 | | static int days_per_month[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; |
942 | | int day, mon, year, hour, min, sec, leap; |
943 | | |
944 | | sec = t % 100; |
945 | | min = (t /= 100) % 100; |
946 | | hour = (t /= 100) % 100; |
947 | | day = (t /= 100) % 100; |
948 | | mon = (t /= 100) % 100; |
949 | | year = t / 100; |
950 | | |
951 | | sec += n; |
952 | | |
953 | | if (n >= 0) { |
954 | | if (sec >= 60) { |
955 | | min += sec / 60; |
956 | | sec %= 60; |
957 | | } |
958 | | if (min >= 60) { |
959 | | hour += min / 60; |
960 | | min %= 60; |
961 | | } |
962 | | if (hour >= 24) { |
963 | | day += hour / 24; |
964 | | hour %= 24; |
965 | | } |
966 | | while (1) { |
967 | | if (mon == 2 && year % 4 == 0 && (year % 100 != 0 || year % 400 == 0)) |
968 | | leap = 1; |
969 | | else |
970 | | leap = 0; |
971 | | if (day > days_per_month[mon - 1] + leap) { |
972 | | day -= (days_per_month[mon - 1] + leap); |
973 | | mon++; |
974 | | if (mon > 12) { |
975 | | mon = 1; |
976 | | year++; |
977 | | } |
978 | | } else break; |
979 | | } |
980 | | } else { // n<0 |
981 | | if (sec < 0) { |
982 | | min += (sec - 59) / 60; |
983 | | sec = 59 + (sec + 1) % 60; |
984 | | } |
985 | | if (min < 0) { |
986 | | hour += (min - 59) / 60; |
987 | | min = 59 + (min + 1) % 60; |
988 | | } |
989 | | if (hour < 0) { |
990 | | day += (hour - 23) / 24; |
991 | | hour = 23 + (hour + 1) % 24; |
992 | | } |
993 | | for (;;) { |
994 | | if (day <= 0) { |
995 | | if (--mon < 1) { |
996 | | mon = 12; |
997 | | year--; |
998 | | } |
999 | | if (mon == 2 && year % 4 == 0 && (year % 100 != 0 || year % 400 == 0)) |
1000 | | leap = 1; |
1001 | | else |
1002 | | leap = 0; |
1003 | | day += (days_per_month[mon - 1] + leap); |
1004 | | } else break; |
1005 | | } |
1006 | | } |
1007 | | |
1008 | | return (((((long long)year*100 + mon)*100 + day)*100 + hour)*100 + min)*100 + sec; |
1009 | | } |
1010 | | |
1011 | | // return current GMT/UTC |
1012 | | |
1013 | | static int64_t get_current_time(void) |
1014 | | { |
1015 | | int64_t t = time(NULL); |
1016 | | struct tm tm; |
1017 | | |
1018 | | gmtime_r(&t, &tm); |
1019 | | |
1020 | | return (((((int64_t)(tm.tm_year + 1900)*100 + tm.tm_mon + 1)*100 + tm.tm_mday)*100 + tm.tm_hour)*100 + tm.tm_min)*100 + tm.tm_sec; |
1021 | | } |
1022 | | */ |
1023 | | |
1024 | | /* |
1025 | | RFC 6265 |
1026 | | |
1027 | | set-cookie-header = "Set-Cookie:" SP set-cookie-string |
1028 | | set-cookie-string = cookie-pair *( ";" SP cookie-av ) |
1029 | | cookie-pair = cookie-name "=" cookie-value |
1030 | | cookie-name = token |
1031 | | cookie-value = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE ) |
1032 | | cookie-octet = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E |
1033 | | ; US-ASCII characters excluding CTLs, |
1034 | | ; whitespace DQUOTE, comma, semicolon, |
1035 | | ; and backslash |
1036 | | token = <token, defined in [RFC2616], Section 2.2> |
1037 | | |
1038 | | cookie-av = expires-av / max-age-av / domain-av / |
1039 | | path-av / secure-av / httponly-av / |
1040 | | extension-av |
1041 | | expires-av = "Expires=" sane-cookie-date |
1042 | | sane-cookie-date = <rfc1123-date, defined in [RFC2616], Section 3.3.1> |
1043 | | max-age-av = "Max-Age=" non-zero-digit *DIGIT |
1044 | | ; In practice, both expires-av and max-age-av |
1045 | | ; are limited to dates representable by the |
1046 | | ; user agent. |
1047 | | non-zero-digit = %x31-39 |
1048 | | ; digits 1 through 9 |
1049 | | domain-av = "Domain=" domain-value |
1050 | | domain-value = <subdomain> |
1051 | | ; defined in [RFC1034], Section 3.5, as |
1052 | | ; enhanced by [RFC1123], Section 2.1 |
1053 | | path-av = "Path=" path-value |
1054 | | path-value = <any CHAR except CTLs or ";"> |
1055 | | secure-av = "Secure" |
1056 | | httponly-av = "HttpOnly" |
1057 | | extension-av = <any CHAR except CTLs or ";"> |
1058 | | */ |
1059 | | const char *wget_http_parse_setcookie(const char *s, wget_cookie **cookie) |
1060 | 3.60k | { |
1061 | 3.60k | return wget_cookie_parse_setcookie(s, cookie); |
1062 | 3.60k | } |
1063 | | |
1064 | | static void cookie_free(void *cookie) |
1065 | 2.47k | { |
1066 | 2.47k | if (cookie) |
1067 | 2.47k | wget_cookie_free((wget_cookie **) &cookie); |
1068 | 2.47k | } |
1069 | | |
1070 | | int wget_http_parse_header_line(wget_http_response *resp, const char *name, size_t namelen, const char *value, size_t valuelen) |
1071 | 34.6k | { |
1072 | 34.6k | if (!name || !value) |
1073 | 0 | return WGET_E_INVALID; |
1074 | | |
1075 | 34.6k | char valuebuf[256]; |
1076 | 34.6k | char *value0; |
1077 | 34.6k | int ret = WGET_E_SUCCESS; |
1078 | | |
1079 | 34.6k | value0 = wget_strmemcpy_a(valuebuf, sizeof(valuebuf), value, valuelen); |
1080 | 34.6k | if (!value0) |
1081 | 0 | return WGET_E_MEMORY; |
1082 | | |
1083 | 34.6k | switch (*name | 0x20) { |
1084 | 611 | case ':': |
1085 | 611 | if (!memcmp(name, ":status", namelen) && valuelen == 3) { |
1086 | 205 | resp->code = ((value[0] - '0') * 10 + (value[1] - '0')) * 10 + (value[2] - '0'); |
1087 | 205 | } else |
1088 | 406 | ret = WGET_E_UNKNOWN; |
1089 | 611 | break; |
1090 | 7.67k | case 'c': |
1091 | 7.67k | if (!wget_strncasecmp_ascii(name, "content-encoding", namelen)) { |
1092 | 2.74k | wget_http_parse_content_encoding(value0, &resp->content_encoding); |
1093 | 4.93k | } else if (!wget_strncasecmp_ascii(name, "content-type", namelen)) { |
1094 | 294 | if (!resp->content_type && !resp->content_type_encoding) |
1095 | 100 | wget_http_parse_content_type(value0, &resp->content_type, &resp->content_type_encoding); |
1096 | 4.63k | } else if (!wget_strncasecmp_ascii(name, "content-length", namelen)) { |
1097 | 834 | resp->content_length = (size_t)atoll(value0); |
1098 | 834 | resp->content_length_valid = 1; |
1099 | 3.80k | } else if (!wget_strncasecmp_ascii(name, "content-disposition", namelen)) { |
1100 | 1.56k | if (!resp->content_filename) |
1101 | 1.36k | wget_http_parse_content_disposition(value0, &resp->content_filename); |
1102 | 2.24k | } else if (!wget_strncasecmp_ascii(name, "connection", namelen)) { |
1103 | 1.73k | wget_http_parse_connection(value0, &resp->keep_alive); |
1104 | 1.73k | } else if (!wget_strncasecmp_ascii(name, "Content-Security-Policy", namelen)) { |
1105 | 195 | resp->csp = 1; |
1106 | 195 | } else |
1107 | 309 | ret = WGET_E_UNKNOWN; |
1108 | 7.67k | break; |
1109 | 7.42k | case 'd': |
1110 | 7.42k | if (!wget_strncasecmp_ascii(name, "digest", namelen)) { |
1111 | | // https://tools.ietf.org/html/rfc3230 |
1112 | 7.20k | wget_http_digest digest; |
1113 | 7.20k | wget_http_parse_digest(value0, &digest); |
1114 | | // debug_printf("%s: %s\n",digest.algorithm,digest.encoded_digest); |
1115 | 7.20k | if (!resp->digests) { |
1116 | 281 | resp->digests = wget_vector_create(4, NULL); |
1117 | 281 | wget_vector_set_destructor(resp->digests, (wget_vector_destructor *) wget_http_free_digest); |
1118 | 281 | } |
1119 | 7.20k | wget_vector_add_memdup(resp->digests, &digest, sizeof(digest)); |
1120 | 7.20k | } else |
1121 | 218 | ret = WGET_E_UNKNOWN; |
1122 | 7.42k | break; |
1123 | 453 | case 'e': |
1124 | 453 | if (!wget_strncasecmp_ascii(name, "etag", namelen)) { |
1125 | 242 | if (!resp->etag) |
1126 | 48 | wget_http_parse_etag(value0, &resp->etag); |
1127 | 242 | } else |
1128 | 211 | ret = WGET_E_UNKNOWN; |
1129 | 453 | break; |
1130 | 444 | case 'i': |
1131 | 444 | if (!wget_strncasecmp_ascii(name, "icy-metaint", namelen)) { |
1132 | 200 | resp->icy_metaint = atoi(value0); |
1133 | 200 | } else |
1134 | 244 | ret = WGET_E_UNKNOWN; |
1135 | 444 | break; |
1136 | 2.74k | case 'l': |
1137 | 2.74k | if (!wget_strncasecmp_ascii(name, "last-modified", namelen)) { |
1138 | | // Last-Modified: Thu, 07 Feb 2008 15:03:24 GMT |
1139 | 307 | resp->last_modified = wget_http_parse_full_date(value0); |
1140 | 2.44k | } else if (resp->code / 100 == 3 && !wget_strncasecmp_ascii(name, "location", namelen)) { |
1141 | 296 | if (!resp->location) |
1142 | 67 | wget_http_parse_location(value0, &resp->location); |
1143 | 2.14k | } else if (resp->code / 100 == 3 && !wget_strncasecmp_ascii(name, "link", namelen)) { |
1144 | | // debug_printf("s=%.31s\n",s); |
1145 | 1.68k | wget_http_link link; |
1146 | 1.68k | wget_http_parse_link(value0, &link); |
1147 | | // debug_printf("link->uri=%s\n",link.uri); |
1148 | 1.68k | if (!resp->links) { |
1149 | 239 | resp->links = wget_vector_create(8, NULL); |
1150 | 239 | wget_vector_set_destructor(resp->links, (wget_vector_destructor *) wget_http_free_link); |
1151 | 239 | } |
1152 | 1.68k | wget_vector_add_memdup(resp->links, &link, sizeof(link)); |
1153 | 1.68k | } else |
1154 | 455 | ret = WGET_E_UNKNOWN; |
1155 | 2.74k | break; |
1156 | 1.71k | case 'p': |
1157 | 1.71k | if (!wget_strncasecmp_ascii(name, "public-key-pins", namelen)) { |
1158 | 1.22k | if (!resp->hpkp) { |
1159 | 1.02k | resp->hpkp = wget_hpkp_new(); |
1160 | 1.02k | wget_http_parse_public_key_pins(value0, resp->hpkp); |
1161 | 1.02k | debug_printf("new host pubkey pinnings added to hpkp db\n"); |
1162 | 1.02k | } |
1163 | 1.22k | } |
1164 | 484 | else if (!wget_strncasecmp_ascii(name, "proxy-authenticate", namelen)) { |
1165 | 267 | wget_http_challenge *challenge = wget_malloc(sizeof(wget_http_challenge)); |
1166 | | |
1167 | 267 | if (!challenge) { |
1168 | 0 | ret = WGET_E_MEMORY; |
1169 | 0 | goto out; |
1170 | 0 | } |
1171 | | |
1172 | 267 | wget_http_parse_challenge(value0, challenge); |
1173 | | |
1174 | 267 | if (!resp->challenges) { |
1175 | 8 | resp->challenges = wget_vector_create(2, NULL); |
1176 | 8 | wget_vector_set_destructor(resp->challenges, (wget_vector_destructor *) wget_http_free_challenge); |
1177 | 8 | } |
1178 | 267 | wget_vector_add(resp->challenges, challenge); |
1179 | 267 | } else |
1180 | 217 | ret = WGET_E_UNKNOWN; |
1181 | 1.71k | break; |
1182 | 4.96k | case 's': |
1183 | 4.96k | if (!wget_strncasecmp_ascii(name, "set-cookie", namelen)) { |
1184 | | // this is a parser. content validation must be done by higher level functions. |
1185 | 3.60k | wget_cookie *cookie; |
1186 | 3.60k | wget_http_parse_setcookie(value0, &cookie); |
1187 | | |
1188 | 3.60k | if (cookie) { |
1189 | 2.47k | if (!resp->cookies) { |
1190 | 774 | resp->cookies = wget_vector_create(4, NULL); |
1191 | 774 | wget_vector_set_destructor(resp->cookies, cookie_free); |
1192 | 774 | } |
1193 | 2.47k | wget_vector_add(resp->cookies, cookie); |
1194 | 2.47k | } |
1195 | 3.60k | } |
1196 | 1.35k | else if (!wget_strncasecmp_ascii(name, "strict-transport-security", namelen)) { |
1197 | 1.11k | resp->hsts = 1; |
1198 | 1.11k | wget_http_parse_strict_transport_security(value0, &resp->hsts_maxage, &resp->hsts_include_subdomains); |
1199 | 1.11k | } else |
1200 | 235 | ret = WGET_E_UNKNOWN; |
1201 | 4.96k | break; |
1202 | 1.49k | case 't': |
1203 | 1.49k | if (!wget_strncasecmp_ascii(name, "transfer-encoding", namelen)) { |
1204 | 1.28k | wget_http_parse_transfer_encoding(value0, &resp->transfer_encoding); |
1205 | 1.28k | } else |
1206 | 207 | ret = WGET_E_UNKNOWN; |
1207 | 1.49k | break; |
1208 | 2.99k | case 'w': |
1209 | 2.99k | if (!wget_strncasecmp_ascii(name, "www-authenticate", namelen)) { |
1210 | 2.79k | wget_http_challenge *challenge = wget_malloc(sizeof(wget_http_challenge)); |
1211 | | |
1212 | 2.79k | if (!challenge) { |
1213 | 0 | ret = WGET_E_MEMORY; |
1214 | 0 | goto out; |
1215 | 0 | } |
1216 | | |
1217 | 2.79k | wget_http_parse_challenge(value0, challenge); |
1218 | | |
1219 | 2.79k | if (!resp->challenges) { |
1220 | 476 | resp->challenges = wget_vector_create(2, NULL); |
1221 | 476 | wget_vector_set_destructor(resp->challenges, (wget_vector_destructor *) wget_http_free_challenge); |
1222 | 476 | } |
1223 | 2.79k | wget_vector_add(resp->challenges, challenge); |
1224 | 2.79k | } else |
1225 | 208 | ret = WGET_E_UNKNOWN; |
1226 | 2.99k | break; |
1227 | 2.99k | case 'x': |
1228 | 1.51k | if (!wget_strncasecmp_ascii(name, "x-archive-orig-last-modified", namelen)) { |
1229 | 1.30k | resp->last_modified = wget_http_parse_full_date(value0); |
1230 | 1.30k | } else |
1231 | 215 | ret = WGET_E_UNKNOWN; |
1232 | 1.51k | break; |
1233 | 2.59k | default: |
1234 | 2.59k | ret = WGET_E_UNKNOWN; |
1235 | 2.59k | break; |
1236 | 34.6k | } |
1237 | | |
1238 | 34.6k | out: |
1239 | 34.6k | if (value0 != valuebuf) |
1240 | 218 | xfree(value0); |
1241 | | |
1242 | 34.6k | return ret; |
1243 | 34.6k | } |
1244 | | |
1245 | | /* content of <buf> will be destroyed */ |
1246 | | /* buf must be 0-terminated */ |
1247 | | wget_http_response *wget_http_parse_response_header(char *buf) |
1248 | 6.62k | { |
1249 | 6.62k | char *eol; |
1250 | | |
1251 | 6.62k | wget_http_response *resp = wget_calloc(1, sizeof(wget_http_response)); |
1252 | 6.62k | if (!resp) |
1253 | 0 | return NULL; |
1254 | | |
1255 | 6.62k | if (sscanf(buf, " HTTP/%3hd.%3hd %3hd %31[^\r\n] ", |
1256 | 6.62k | &resp->major, &resp->minor, &resp->code, resp->reason) >= 3) { |
1257 | 1.98k | if ((eol = strchr(buf + 10, '\n'))) { |
1258 | | // eol[-1]=0; |
1259 | | // debug_printf("# %s\n",buf); |
1260 | 1.97k | } else { |
1261 | | // empty HTTP header |
1262 | 1 | return resp; |
1263 | 1 | } |
1264 | 4.64k | } else if (sscanf(buf, " ICY %3hd %31[^\r\n] ", &resp->code, resp->reason) >= 1) { |
1265 | 4.63k | if ((eol = strchr(buf + 4, '\n'))) { |
1266 | | // eol[-1]=0; |
1267 | | // debug_printf("# %s\n",buf); |
1268 | 4.63k | } else { |
1269 | | // empty HTTP header |
1270 | 1 | return resp; |
1271 | 1 | } |
1272 | 4.63k | } else { |
1273 | 6 | error_printf(_("HTTP response header not found\n")); |
1274 | 6 | xfree(resp); |
1275 | 6 | return NULL; |
1276 | 6 | } |
1277 | | |
1278 | | // 'close' is default on HTTP/1.0, else 'keep_alive' is default |
1279 | 6.61k | if ((resp->major == 1 && resp->minor >= 1) || resp->major > 1) |
1280 | 1.94k | resp->keep_alive = 1; |
1281 | | |
1282 | 41.2k | for (char *line = eol + 1; eol && *line && *line != '\r' && *line != '\n'; line = eol ? eol + 1 : NULL) { |
1283 | 34.6k | eol = strchr(line, '\n'); |
1284 | 34.9k | while (eol && c_isblank(eol[1])) { // handle split lines |
1285 | 351 | *eol = eol[-1] = ' '; |
1286 | 351 | eol = strchr(eol, '\n'); |
1287 | 351 | } |
1288 | | |
1289 | 34.6k | if (eol) { |
1290 | 28.0k | if (eol[-1] == '\r') |
1291 | 2.75k | eol[-1] = 0; |
1292 | 25.3k | else |
1293 | 25.3k | *eol = 0; |
1294 | 28.0k | } |
1295 | | |
1296 | 34.6k | size_t namelen, valuelen; |
1297 | 34.6k | const char *name; |
1298 | 34.6k | const char *value = wget_parse_name_fixed(line, &name, &namelen); |
1299 | | // value now points directly after : |
1300 | | |
1301 | 34.6k | if (eol) |
1302 | 28.0k | valuelen = eol - value - (eol[-1] == 0); |
1303 | 6.56k | else |
1304 | 6.56k | valuelen = strlen(value); |
1305 | | |
1306 | 34.6k | wget_http_parse_header_line(resp, name, namelen, value, valuelen); |
1307 | 34.6k | } |
1308 | | |
1309 | 6.61k | return resp; |
1310 | 6.62k | } |
1311 | | |
1312 | | void wget_http_free_param(wget_http_header_param *param) |
1313 | 6.47k | { |
1314 | 6.47k | xfree(param->name); |
1315 | 6.47k | xfree(param->value); |
1316 | 6.47k | xfree(param); |
1317 | 6.47k | } |
1318 | | |
1319 | | void wget_http_free_link(wget_http_link *link) |
1320 | 1.68k | { |
1321 | 1.68k | xfree(link->uri); |
1322 | 1.68k | xfree(link->type); |
1323 | 1.68k | xfree(link); |
1324 | 1.68k | } |
1325 | | |
1326 | | void wget_http_free_links(wget_vector **links) |
1327 | 6.61k | { |
1328 | 6.61k | wget_vector_free(links); |
1329 | 6.61k | } |
1330 | | |
1331 | | void wget_http_free_digest(wget_http_digest *digest) |
1332 | 7.20k | { |
1333 | 7.20k | xfree(digest->algorithm); |
1334 | 7.20k | xfree(digest->encoded_digest); |
1335 | 7.20k | xfree(digest); |
1336 | 7.20k | } |
1337 | | |
1338 | | void wget_http_free_digests(wget_vector **digests) |
1339 | 6.61k | { |
1340 | 6.61k | wget_vector_free(digests); |
1341 | 6.61k | } |
1342 | | |
1343 | | void wget_http_free_challenge(wget_http_challenge *challenge) |
1344 | 8.45k | { |
1345 | 8.45k | xfree(challenge->auth_scheme); |
1346 | 8.45k | wget_stringmap_free(&challenge->params); |
1347 | 8.45k | xfree(challenge); |
1348 | 8.45k | } |
1349 | | |
1350 | | void wget_http_free_challenges(wget_vector **challenges) |
1351 | 6.61k | { |
1352 | 6.61k | wget_vector_free(challenges); |
1353 | 6.61k | } |
1354 | | |
1355 | | void wget_http_free_cookies(wget_vector **cookies) |
1356 | 8.23k | { |
1357 | 8.23k | wget_vector_free(cookies); |
1358 | 8.23k | } |
1359 | | |
1360 | | void wget_http_free_hpkp_entries(wget_hpkp **hpkp) |
1361 | 6.61k | { |
1362 | 6.61k | if (hpkp) { |
1363 | 6.61k | wget_hpkp_free(*hpkp); |
1364 | 6.61k | *hpkp = NULL; |
1365 | 6.61k | } |
1366 | 6.61k | } |
1367 | | |
1368 | | void wget_http_free_response(wget_http_response **resp) |
1369 | 8.54k | { |
1370 | 8.54k | if (resp && *resp) { |
1371 | 6.61k | wget_http_free_links(&(*resp)->links); |
1372 | 6.61k | wget_http_free_digests(&(*resp)->digests); |
1373 | 6.61k | wget_http_free_challenges(&(*resp)->challenges); |
1374 | 6.61k | wget_http_free_cookies(&(*resp)->cookies); |
1375 | 6.61k | wget_http_free_hpkp_entries(&(*resp)->hpkp); |
1376 | 6.61k | xfree((*resp)->content_type); |
1377 | 6.61k | xfree((*resp)->content_type_encoding); |
1378 | 6.61k | xfree((*resp)->content_filename); |
1379 | 6.61k | xfree((*resp)->location); |
1380 | 6.61k | xfree((*resp)->etag); |
1381 | | // xfree((*resp)->reason); |
1382 | 6.61k | wget_buffer_free(&(*resp)->header); |
1383 | 6.61k | wget_buffer_free(&(*resp)->body); |
1384 | 6.61k | xfree(*resp); |
1385 | 6.61k | } |
1386 | 8.54k | } |
1387 | | |
1388 | | /* for security reasons: set all freed pointers to NULL */ |
1389 | | void wget_http_free_request(wget_http_request **req) |
1390 | 3.84k | { |
1391 | 3.84k | if (req && *req) { |
1392 | 3.84k | wget_buffer_deinit(&(*req)->esc_resource); |
1393 | 3.84k | wget_buffer_deinit(&(*req)->esc_host); |
1394 | 3.84k | wget_vector_free(&(*req)->headers); |
1395 | 3.84k | xfree((*req)->body); |
1396 | 3.84k | xfree(*req); |
1397 | 3.84k | } |
1398 | 3.84k | } |