Coverage Report

Created: 2025-03-18 06:55

/src/wget2/libwget/http_parse.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2017-2024 Free Software Foundation, Inc.
3
 *
4
 * This file is part of libwget.
5
 *
6
 * Libwget is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU Lesser General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * Libwget is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public License
17
 * along with libwget.  If not, see <https://www.gnu.org/licenses/>.
18
 *
19
 *
20
 * HTTP parsing routines
21
 *
22
 * Resources:
23
 * RFC 2616
24
 * RFC 6265
25
 *
26
 */
27
28
#include <config.h>
29
30
#include <stdio.h>
31
#include <stdlib.h>
32
#include <string.h>
33
#include <c-ctype.h>
34
#include <time.h>
35
#include <errno.h>
36
#include <stdint.h>
37
38
#include <wget.h>
39
#include "private.h"
40
#include "http.h"
41
42
417k
#define HTTP_CTYPE_SEPARATOR (1<<0)
43
44
static const unsigned char
45
  http_ctype[256] = {
46
    ['('] = HTTP_CTYPE_SEPARATOR,
47
    [')'] = HTTP_CTYPE_SEPARATOR,
48
    ['<'] = HTTP_CTYPE_SEPARATOR,
49
    ['>'] = HTTP_CTYPE_SEPARATOR,
50
    ['@'] = HTTP_CTYPE_SEPARATOR,
51
    [','] = HTTP_CTYPE_SEPARATOR,
52
    [';'] = HTTP_CTYPE_SEPARATOR,
53
    [':'] = HTTP_CTYPE_SEPARATOR,
54
    ['\\'] = HTTP_CTYPE_SEPARATOR,
55
    ['\"'] = HTTP_CTYPE_SEPARATOR,
56
    ['/'] = HTTP_CTYPE_SEPARATOR,
57
    ['['] = HTTP_CTYPE_SEPARATOR,
58
    [']'] = HTTP_CTYPE_SEPARATOR,
59
    ['?'] = HTTP_CTYPE_SEPARATOR,
60
    ['='] = HTTP_CTYPE_SEPARATOR,
61
    ['{'] = HTTP_CTYPE_SEPARATOR,
62
    ['}'] = HTTP_CTYPE_SEPARATOR,
63
    [' '] = HTTP_CTYPE_SEPARATOR,
64
    ['\t'] = HTTP_CTYPE_SEPARATOR
65
  };
66
67
static inline bool http_isseparator(char c)
68
417k
{
69
417k
  return (http_ctype[(unsigned char)(c)]&HTTP_CTYPE_SEPARATOR) != 0;
70
417k
}
71
72
/**Gets the hostname of the remote endpoint.
73
 * \param conn a wget_http_connection
74
 * \return A string containing hostname. Returned memory is owned by
75
 *         _conn_ and should not be modified or freed.
76
 */
77
const char *wget_http_get_host(const wget_http_connection *conn)
78
0
{
79
0
  return conn->esc_host;
80
0
}
81
82
/**Gets the port number of the remote endpoint.
83
 * \param conn a wget_http_connection
84
 * \return A string containing port number. Returned memory is owned by
85
 *         _conn_ and should not be modified or freed.
86
 */
87
uint16_t wget_http_get_port(const wget_http_connection *conn)
88
0
{
89
0
  return conn->port;
90
0
}
91
92
/**Get the scheme used by the connection.
93
 * \param conn a wget_http_connection
94
 * \return A WGET_IRI_SCHEM_* value.
95
 */
96
wget_iri_scheme wget_http_get_scheme(const wget_http_connection *conn)
97
0
{
98
0
  return conn->scheme;
99
0
}
100
101
/**Gets the protocol used by the connection
102
 * \param conn a wget_http_connection
103
 * \return Either WGET_PROTOCOL_HTTP_1_1 or WGET_PROTOCOL_HTTP_2_0
104
 */
105
int wget_http_get_protocol(const wget_http_connection *conn)
106
0
{
107
0
  return conn->protocol;
108
0
}
109
110
bool wget_http_isseparator(char c)
111
0
{
112
0
  return http_isseparator(c);
113
0
}
114
115
// TEXT           = <any OCTET except CTLs, but including LWS>
116
//int http_istext(char c)
117
//{
118
//  return (c>=32 && c<=126) || c=='\r' || c=='\n' || c=='\t';
119
//}
120
121
// token          = 1*<any CHAR except CTLs or separators>
122
123
bool wget_http_istoken(char c)
124
462k
{
125
462k
  return c > 32 && c <= 126 && !http_isseparator(c);
126
462k
}
127
128
const char *wget_http_parse_token(const char *s, const char **token)
129
48.6k
{
130
48.6k
  const char *p;
131
132
194k
  for (p = s; wget_http_istoken(*s); s++);
133
134
48.6k
  *token = wget_strmemdup(p, s - p);
135
136
48.6k
  return s;
137
48.6k
}
138
139
// quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
140
// qdtext         = <any TEXT except <">>
141
// quoted-pair    = "\" CHAR
142
// TEXT           = <any OCTET except CTLs, but including LWS>
143
// CTL            = <any US-ASCII control character (octets 0 - 31) and DEL (127)>
144
// LWS            = [CRLF] 1*( SP | HT )
145
146
const char *wget_http_parse_quoted_string(const char *s, const char **qstring)
147
3.56k
{
148
3.56k
  if (*s == '\"') {
149
3.56k
    const char *p = ++s;
150
151
    // relaxed scanning
152
21.0k
    while (*s) {
153
19.0k
      if (*s == '\"') break;
154
17.4k
      else if (*s == '\\' && s[1]) {
155
603
        s += 2;
156
603
      } else
157
16.8k
        s++;
158
19.0k
    }
159
160
3.56k
    *qstring = wget_strmemdup(p, s - p);
161
3.56k
    if (*s == '\"') s++;
162
3.56k
  } else
163
0
    *qstring = NULL;
164
165
3.56k
  return s;
166
3.56k
}
167
168
// generic-param  =  token [ EQUAL gen-value ]
169
// gen-value      =  token / host / quoted-string
170
171
const char *wget_http_parse_param(const char *s, const char **param, const char **value)
172
38.1k
{
173
38.1k
  const char *p;
174
175
38.1k
  *param = *value = NULL;
176
177
38.8k
  while (c_isblank(*s)) s++;
178
179
38.1k
  if (*s == ';') {
180
3.35k
    s++;
181
4.28k
    while (c_isblank(*s)) s++;
182
3.35k
  }
183
38.1k
  if (!*s) return s;
184
185
108k
  for (p = s; wget_http_istoken(*s); s++);
186
37.0k
  *param = wget_strmemdup(p, s - p);
187
188
37.6k
  while (c_isblank(*s)) s++;
189
190
37.0k
  if (*s && *s++ == '=') {
191
20.5k
    while (c_isblank(*s)) s++;
192
20.1k
    if (*s == '\"') {
193
2.93k
      s = wget_http_parse_quoted_string(s, value);
194
17.1k
    } else {
195
17.1k
      s = wget_http_parse_token(s, value);
196
17.1k
    }
197
20.1k
  }
198
199
37.0k
  return s;
200
38.1k
}
201
202
// message-header = field-name ":" [ field-value ]
203
// field-name     = token
204
// field-value    = *( field-content | LWS )
205
// field-content  = <the OCTETs making up the field-value
206
//                  and consisting of either *TEXT or combinations
207
//                  of token, separators, and quoted-string>
208
209
const char *wget_http_parse_name(const char *s, const char **name)
210
0
{
211
0
  while (c_isblank(*s)) s++;
212
213
0
  s = wget_http_parse_token(s, name);
214
215
0
  while (*s && *s != ':') s++;
216
217
0
  return *s == ':' ? s + 1 : s;
218
0
}
219
220
const char *wget_parse_name_fixed(const char *s, const char **name, size_t *namelen)
221
34.6k
{
222
35.4k
  while (c_isblank(*s)) s++;
223
224
34.6k
  *name = s;
225
226
137k
  while (wget_http_istoken(*s))
227
102k
    s++;
228
229
34.6k
  *namelen = s - *name;
230
231
36.8k
  while (*s && *s != ':') s++;
232
233
34.6k
  return *s == ':' ? s + 1 : s;
234
34.6k
}
235
236
static int WGET_GCC_NONNULL_ALL compare_param(wget_http_header_param *p1, wget_http_header_param *p2)
237
0
{
238
0
  return wget_strcasecmp_ascii(p1->name, p2->name);
239
0
}
240
241
void wget_http_add_param(wget_vector **params, wget_http_header_param *param)
242
0
{
243
0
  if (!*params) *params = wget_vector_create(4, (wget_vector_compare_fn *) compare_param);
244
0
  wget_vector_add_memdup(*params, param, sizeof(*param));
245
0
}
246
247
/*
248
  Link           = "Link" ":" #link-value
249
  link-value     = "<" URI-Reference ">" *( ";" link-param )
250
  link-param     = ( ( "rel" "=" relation-types )
251
            | ( "anchor" "=" <"> URI-Reference <"> )
252
            | ( "rev" "=" relation-types )
253
            | ( "hreflang" "=" Language-Tag )
254
            | ( "media" "=" ( MediaDesc | ( <"> MediaDesc <"> ) ) )
255
            | ( "title" "=" quoted-string )
256
            | ( "title*" "=" ext-value )
257
            | ( "type" "=" ( media-type | quoted-mt ) )
258
            | ( link-extension ) )
259
  link-extension = ( parmname [ "=" ( ptoken | quoted-string ) ] )
260
            | ( ext-name-star "=" ext-value )
261
  ext-name-star  = parmname "*" ; reserved for RFC2231-profiled
262
                      ; extensions.  Whitespace NOT
263
                      ; allowed in between.
264
  ptoken         = 1*ptokenchar
265
  ptokenchar     = "!" | "#" | "$" | "%" | "&" | "'" | "("
266
            | ")" | "*" | "+" | "-" | "." | "/" | DIGIT
267
            | ":" | "<" | "=" | ">" | "?" | "@" | ALPHA
268
            | "[" | "]" | "^" | "_" | "`" | "{" | "|"
269
            | "}" | "~"
270
  media-type     = type-name "/" subtype-name
271
  quoted-mt      = <"> media-type <">
272
  relation-types = relation-type
273
            | <"> relation-type *( 1*SP relation-type ) <">
274
  relation-type  = reg-rel-type | ext-rel-type
275
  reg-rel-type   = LOALPHA *( LOALPHA | DIGIT | "." | "-" )
276
  ext-rel-type   = URI
277
*/
278
const char *wget_http_parse_link(const char *s, wget_http_link *link)
279
1.68k
{
280
1.68k
  memset(link, 0, sizeof(*link));
281
282
2.06k
  while (c_isblank(*s)) s++;
283
284
1.68k
  if (*s == '<') {
285
    // URI reference as of RFC 3987 (if relative, resolve as of RFC 3986)
286
1.19k
    const char *p = s + 1;
287
1.19k
    if ((s = strchr(p, '>')) != NULL) {
288
932
      const char *name = NULL, *value = NULL;
289
290
932
      link->uri = wget_strmemdup(p, s - p);
291
932
      s++;
292
293
1.31k
      while (c_isblank(*s)) s++;
294
295
2.83k
      while (*s == ';') {
296
1.90k
        s = wget_http_parse_param(s, &name, &value);
297
1.90k
        if (name && value) {
298
1.12k
          if (!wget_strcasecmp_ascii(name, "rel")) {
299
194
            if (!wget_strcasecmp_ascii(value, "describedby"))
300
0
              link->rel = link_rel_describedby;
301
194
            else if (!wget_strcasecmp_ascii(value, "duplicate"))
302
0
              link->rel = link_rel_duplicate;
303
930
          } else if (!wget_strcasecmp_ascii(name, "pri")) {
304
280
            link->pri = atoi(value);
305
650
          } else if (!wget_strcasecmp_ascii(name, "type")) {
306
400
            if (!link->type) {
307
206
              link->type = value;
308
206
              value = NULL;
309
206
            }
310
400
          }
311
          //        http_add_param(&link->params,&param);
312
1.51k
          while (c_isblank(*s)) s++;
313
1.12k
        }
314
315
1.90k
        xfree(name);
316
1.90k
        xfree(value);
317
1.90k
      }
318
319
      //      if (!msg->contacts) msg->contacts=vec_create(1,1,NULL);
320
      //      vec_add(msg->contacts,&contact,sizeof(contact));
321
322
1.45k
      while (*s && !c_isblank(*s)) s++;
323
932
    }
324
1.19k
  }
325
326
1.68k
  return s;
327
1.68k
}
328
329
// from RFC 3230:
330
// Digest = "Digest" ":" #(instance-digest)
331
// instance-digest = digest-algorithm "=" <encoded digest output>
332
// digest-algorithm = token
333
334
const char *wget_http_parse_digest(const char *s, wget_http_digest *digest)
335
7.20k
{
336
7.20k
  memset(digest, 0, sizeof(*digest));
337
338
7.58k
  while (c_isblank(*s)) s++;
339
7.20k
  s = wget_http_parse_token(s, &digest->algorithm);
340
341
7.62k
  while (c_isblank(*s)) s++;
342
343
7.20k
  if (*s == '=') {
344
1.32k
    s++;
345
1.86k
    while (c_isblank(*s)) s++;
346
1.32k
    if (*s == '\"') {
347
638
      s = wget_http_parse_quoted_string(s, &digest->encoded_digest);
348
690
    } else {
349
690
      const char *p;
350
351
1.29k
      for (p = s; *s && !c_isblank(*s) && *s != ',' && *s != ';'; s++);
352
690
      digest->encoded_digest = wget_strmemdup(p, s - p);
353
690
    }
354
1.32k
  }
355
356
8.17k
  while (*s && !c_isblank(*s)) s++;
357
358
7.20k
  return s;
359
7.20k
}
360
361
// RFC 2617:
362
// challenge   = auth-scheme 1*SP 1#auth-param
363
// auth-scheme = token
364
// auth-param  = token "=" ( token | quoted-string )
365
366
const char *wget_http_parse_challenge(const char *s, wget_http_challenge *challenge)
367
3.05k
{
368
3.05k
  memset(challenge, 0, sizeof(*challenge));
369
370
3.61k
  while (c_isblank(*s)) s++;
371
3.05k
  s = wget_http_parse_token(s, &challenge->auth_scheme);
372
373
3.05k
  if (*s == ' ')
374
1.67k
    s++; // Auth scheme must have a space at the end of the token
375
1.38k
  else {
376
    // parse/syntax error
377
1.38k
    xfree(challenge->auth_scheme);
378
1.38k
    return s;
379
1.38k
  }
380
381
1.67k
  wget_http_header_param param;
382
7.07k
  do {
383
7.07k
    const char *old = s;
384
7.07k
    s = wget_http_parse_param(s, &param.name, &param.value);
385
7.07k
    if (param.name) {
386
6.88k
      if (*param.name && !param.value) {
387
202
        xfree(param.name);
388
202
        return old; // a new scheme detected
389
202
      }
390
391
6.67k
      if (!param.value) {
392
585
        xfree(param.name);
393
585
        continue;
394
585
      }
395
396
6.09k
      if (!challenge->params)
397
1.07k
        challenge->params = wget_stringmap_create_nocase(8);
398
6.09k
      wget_stringmap_put(challenge->params, param.name, param.value);
399
6.09k
    }
400
401
6.67k
    while (c_isblank(*s)) s++;
402
403
6.28k
    if (*s != ',') break;
404
5.02k
    else if (*s) s++;
405
6.28k
  } while (*s);
406
407
1.47k
  return s;
408
1.67k
}
409
410
const char *wget_http_parse_challenges(const char *s, wget_vector *challenges)
411
0
{
412
0
  wget_http_challenge challenge;
413
414
0
  while (*s) {
415
0
    s = wget_http_parse_challenge(s, &challenge);
416
0
    if (challenge.auth_scheme) {
417
0
      wget_vector_add_memdup(challenges, &challenge, sizeof(challenge));
418
0
    }
419
0
  }
420
421
0
  return s;
422
0
}
423
424
const char *wget_http_parse_location(const char *s, const char **location)
425
67
{
426
67
  const char *p;
427
428
487
  while (c_isblank(*s)) s++;
429
430
  /*
431
   * The correct (and still lenient) variant was:
432
   * for (p = s; *s && !c_isblank(*s); s++);
433
   *
434
   * And then there were spaces in the URI, see
435
   *   https://gitlab.com/gnuwget/wget2/issues/420
436
   */
437
438
659
  for (p = s; *s && *s != '\r' && *s != '\n'; s++);
439
467
  while (s > p && c_isblank(*(s - 1))) s--; // remove trailing spaces (OWS - optional white space)
440
441
67
  *location = wget_strmemdup(p, s - p);
442
443
67
  return s;
444
67
}
445
446
// Transfer-Encoding       = "Transfer-Encoding" ":" 1#transfer-coding
447
// transfer-coding         = "chunked" | transfer-extension
448
// transfer-extension      = token *( ";" parameter )
449
// parameter               = attribute "=" value
450
// attribute               = token
451
// value                   = token | quoted-string
452
453
const char *wget_http_parse_transfer_encoding(const char *s, wget_transfer_encoding *transfer_encoding)
454
1.28k
{
455
2.56k
  while (c_isblank(*s)) s++;
456
457
1.28k
  if (!wget_strcasecmp_ascii(s, "identity"))
458
194
    *transfer_encoding = wget_transfer_encoding_identity;
459
1.09k
  else
460
1.09k
    *transfer_encoding = wget_transfer_encoding_chunked;
461
462
7.57k
  while (wget_http_istoken(*s)) s++;
463
464
1.28k
  return s;
465
1.28k
}
466
467
// Content-Type   = "Content-Type" ":" media-type
468
// media-type     = type "/" subtype *( ";" parameter )
469
// type           = token
470
// subtype        = token
471
// example: Content-Type: text/html; charset=ISO-8859-4
472
473
const char *wget_http_parse_content_type(const char *s, const char **content_type, const char **charset)
474
2.03k
{
475
2.03k
  wget_http_header_param param;
476
2.03k
  const char *p;
477
478
2.93k
  while (c_isblank(*s)) s++;
479
480
3.15k
  for (p = s; *s && (wget_http_istoken(*s) || *s == '/'); s++);
481
2.03k
  if (content_type)
482
100
    *content_type = wget_strmemdup(p, s - p);
483
484
2.03k
  if (charset) {
485
2.03k
    *charset = NULL;
486
487
12.7k
    while (*s) {
488
10.9k
      s=wget_http_parse_param(s, &param.name, &param.value);
489
10.9k
      if (!wget_strcasecmp_ascii("charset", param.name)) {
490
204
        xfree(param.name);
491
204
        *charset = param.value;
492
204
        break;
493
204
      }
494
10.7k
      xfree(param.name);
495
10.7k
      xfree(param.value);
496
10.7k
    }
497
2.03k
  }
498
499
2.03k
  return s;
500
2.03k
}
501
502
// RFC 6266 - Use of the Content-Disposition Header Field in the Hypertext Transfer Protocol (HTTP)
503
// content-disposition = "Content-Disposition" ":" disposition-type *( ";" disposition-parm )
504
// disposition-type    = "inline" | "attachment" | disp-ext-type ; case-insensitive
505
// disp-ext-type       = token
506
// disposition-parm    = filename-parm | disp-ext-parm
507
// filename-parm       = "filename" "=" value | "filename*" "=" ext-value
508
// disp-ext-parm       = token "=" value | ext-token "=" ext-value
509
// ext-token           = <the characters in token, followed by "*">
510
//
511
// Defined in [RFC2616]:
512
//
513
// token         = <token, defined in [RFC2616], Section 2.2>
514
// quoted-string = <quoted-string, defined in [RFC2616], Section 2.2>
515
// value         = <value, defined in [RFC2616], Section 3.6> ; token | quoted-string
516
//
517
// Defined in [RFC5987]:
518
//
519
// ext-value   = <ext-value, defined in [RFC5987], Section 3.2>
520
521
const char *wget_http_parse_content_disposition(const char *s, const char **filename)
522
1.36k
{
523
1.36k
  wget_http_header_param param;
524
1.36k
  char *p;
525
526
1.36k
  if (filename) {
527
1.36k
    *filename = NULL;
528
529
3.22k
    while (*s && !*filename) {
530
2.59k
      s = wget_http_parse_param(s, &param.name, &param.value);
531
2.59k
      if (param.value && !wget_strcasecmp_ascii("filename", param.name)) {
532
        // just take the last path part as filename
533
179
        if (!*filename) {
534
179
          if ((p = strpbrk(param.value,"/\\"))) {
535
2
            p = wget_strdup(p + 1);
536
177
          } else {
537
177
            p = (char *) param.value;
538
177
            param.value = NULL;
539
177
          }
540
541
179
          wget_percent_unescape(p);
542
179
          if (!wget_str_is_valid_utf8(p)) {
543
            // if it is not UTF-8, assume ISO-8859-1
544
            // see https://stackoverflow.com/questions/93551/how-to-encode-the-filename-parameter-of-content-disposition-header-in-http
545
80
            *filename = wget_str_to_utf8(p, "iso-8859-1");
546
80
            xfree(p);
547
99
          } else {
548
99
            *filename = p;
549
99
            p = NULL;
550
99
          }
551
179
        }
552
2.41k
      } else if (param.value && !wget_strcasecmp_ascii("filename*", param.name)) {
553
        // RFC5987
554
        // ext-value     = charset  "'" [ language ] "'" value-chars
555
        // ; like RFC 2231's <extended-initial-value>
556
        // ; (see [RFC2231], Section 7)
557
558
        // charset       = "UTF-8" / "ISO-8859-1" / mime-charset
559
560
        // mime-charset  = 1*mime-charsetc
561
        // mime-charsetc = ALPHA / DIGIT
562
        //    / "!" / "#" / "$" / "%" / "&"
563
        //    / "+" / "-" / "^" / "_" / "`"
564
        //    / "{" / "}" / "~"
565
        //    ; as <mime-charset> in Section 2.3 of [RFC2978]
566
        //    ; except that the single quote is not included
567
        //    ; SHOULD be registered in the IANA charset registry
568
569
        // language      = <Language-Tag, defined in [RFC5646], Section 2.1>
570
571
        // value-chars   = *( pct-encoded / attr-char )
572
573
        // pct-encoded   = "%" HEXDIG HEXDIG
574
        //    ; see [RFC3986], Section 2.1
575
576
        // attr-char     = ALPHA / DIGIT
577
        //    / "!" / "#" / "$" / "&" / "+" / "-" / "."
578
        //    / "^" / "_" / "`" / "|" / "~"
579
        //    ; token except ( "*" / "'" / "%" )
580
581
1.32k
        if ((p = strchr(param.value, '\''))) {
582
1.12k
          const char *charset = param.value;
583
1.12k
          const char *language = p + 1;
584
1.12k
          *p = 0;
585
1.12k
          if ((p = strchr(language, '\''))) {
586
930
            *p++ = 0;
587
930
            if (*p) {
588
736
              wget_percent_unescape(p);
589
736
              if (wget_str_needs_encoding(p))
590
719
                *filename = wget_str_to_utf8(p, charset);
591
17
              else
592
17
                *filename = wget_strdup(p);
593
594
              // just take the last path part as filename
595
736
              if (*filename && (p = strpbrk(*filename, "/\\"))) {
596
1
                p = wget_strdup(p + 1);
597
1
                xfree(*filename);
598
1
                *filename = p;
599
1
              }
600
601
736
              xfree(param.name);
602
736
              xfree(param.value);
603
736
              break; // stop looping, we found the final filename
604
736
            }
605
930
          }
606
1.12k
        }
607
1.32k
      }
608
1.85k
      xfree(param.name);
609
1.85k
      xfree(param.value);
610
1.85k
    }
611
1.36k
  }
612
613
1.36k
  return s;
614
1.36k
}
615
616
// RFC 7469
617
// Example:
618
//   Public-Key-Pins:
619
//        pin-sha256="d6qzRu9zOECb90Uez27xWltNsj0e1Md7GkYYkVoZWmM=";
620
//         pin-sha256="E9CZ9INDbd+2eRQozYqqbQ2yXLVKB9+xcprMF+44U1g=";
621
//         pin-sha256="LPJNul+wow4m6DsqxbninhsWHlwfp0JecwQzYpOLmCQ=";
622
//         max-age=10000; includeSubDomains
623
const char *wget_http_parse_public_key_pins(const char *s, wget_hpkp *hpkp)
624
1.02k
{
625
1.02k
  wget_http_header_param param;
626
627
1.02k
  wget_hpkp_set_include_subdomains(hpkp, false);
628
629
14.4k
  while (*s) {
630
13.4k
    s = wget_http_parse_param(s, &param.name, &param.value);
631
632
13.4k
    if (param.value) {
633
6.41k
      if (!wget_strcasecmp_ascii(param.name, "max-age")) {
634
929
        wget_hpkp_set_maxage(hpkp, (int64_t) atoll(param.value));
635
5.48k
      } else if (!wget_strncasecmp_ascii(param.name, "pin-", 4)) {
636
3.93k
        wget_hpkp_pin_add(hpkp, param.name + 4, param.value);
637
3.93k
      }
638
7.02k
    } else {
639
7.02k
      if (!wget_strcasecmp_ascii(param.name, "includeSubDomains"))
640
383
        wget_hpkp_set_include_subdomains(hpkp, true);
641
7.02k
    }
642
643
13.4k
    xfree(param.name);
644
13.4k
    xfree(param.value);
645
13.4k
  }
646
647
1.02k
  return s;
648
1.02k
}
649
650
// RFC 6797
651
//
652
// Strict-Transport-Security = "Strict-Transport-Security" ":" [ directive ]  *( ";" [ directive ] )
653
// directive                 = directive-name [ "=" directive-value ]
654
// directive-name            = token
655
// directive-value           = token | quoted-string
656
657
const char *wget_http_parse_strict_transport_security(const char *s, int64_t *maxage, bool *include_subdomains)
658
1.11k
{
659
1.11k
  wget_http_header_param param;
660
661
1.11k
  *maxage = 0;
662
1.11k
  *include_subdomains = 0;
663
664
3.34k
  while (*s) {
665
2.22k
    s = wget_http_parse_param(s, &param.name, &param.value);
666
667
2.22k
    if (param.value) {
668
938
      if (!wget_strcasecmp_ascii(param.name, "max-age")) {
669
248
        *maxage = (int64_t) atoll(param.value);
670
248
      }
671
1.29k
    } else {
672
1.29k
      if (!wget_strcasecmp_ascii(param.name, "includeSubDomains")) {
673
194
        *include_subdomains = 1;
674
194
      }
675
1.29k
    }
676
677
2.22k
    xfree(param.name);
678
2.22k
    xfree(param.value);
679
2.22k
  }
680
681
1.11k
  return s;
682
1.11k
}
683
684
// Content-Encoding  = "Content-Encoding" ":" 1#content-coding
685
686
const char *wget_http_parse_content_encoding(const char *s, char *content_encoding)
687
2.74k
{
688
3.26k
  while (c_isblank(*s)) s++;
689
690
2.74k
  if (!wget_strcasecmp_ascii(s, "gzip") || !wget_strcasecmp_ascii(s, "x-gzip"))
691
388
    *content_encoding = wget_content_encoding_gzip;
692
2.35k
  else if (!wget_strcasecmp_ascii(s, "deflate"))
693
194
    *content_encoding = wget_content_encoding_deflate;
694
2.15k
  else if (!wget_strcasecmp_ascii(s, "bzip2"))
695
194
    *content_encoding = wget_content_encoding_bzip2;
696
1.96k
  else if (!wget_strcasecmp_ascii(s, "xz") || !wget_strcasecmp_ascii(s, "lzma") || !wget_strcasecmp_ascii(s, "x-lzma"))
697
    // 'xz' is the tag currently understood by Firefox (2.1.2014)
698
    // 'lzma' / 'x-lzma' are the tags currently understood by ELinks
699
585
    *content_encoding = wget_content_encoding_lzma;
700
1.38k
  else if (!wget_strcasecmp_ascii(s, "br"))
701
324
    *content_encoding = wget_content_encoding_brotli;
702
1.05k
  else if (!wget_strcasecmp_ascii(s, "zstd"))
703
194
    *content_encoding = wget_content_encoding_zstd;
704
862
  else if (!wget_strcasecmp_ascii(s, "lzip"))
705
194
    *content_encoding = wget_content_encoding_lzip;
706
668
  else
707
668
    *content_encoding = wget_content_encoding_identity;
708
709
11.8k
  while (wget_http_istoken(*s)) s++;
710
711
2.74k
  return s;
712
2.74k
}
713
714
const char *wget_http_parse_connection(const char *s, bool *keep_alive)
715
1.73k
{
716
1.73k
  const char *e;
717
718
1.73k
  *keep_alive = false;
719
720
4.16k
  for (e = s; *e; s = e + 1) {
721
2.42k
    if ((e = strchrnul(s, ',')) != s) {
722
3.70k
      while (c_isblank(*s)) s++;
723
724
1.90k
      if (!wget_strncasecmp_ascii(s, "keep-alive", 10))
725
1.47k
        *keep_alive = true;
726
1.90k
    }
727
2.42k
  }
728
729
1.73k
  return s;
730
1.73k
}
731
732
const char *wget_http_parse_etag(const char *s, const char **etag)
733
48
{
734
48
  const char *p;
735
736
560
  while (c_isblank(*s)) s++;
737
738
256
  for (p = s; *s && !c_isblank(*s); s++);
739
48
  *etag = wget_strmemdup(p, s - p);
740
741
48
  return s;
742
48
}
743
744
/*
745
// returns GMT/UTC time as an integer of format YYYYMMDDHHMMSS
746
// this makes us independent from size of time_t - work around possible year 2038 problems
747
static long long NONNULL_ALL parse_rfc1123_date(const char *s)
748
{
749
  // we simply can't use strptime() since it requires us to setlocale()
750
  // which is not thread-safe !!!
751
  static const char *mnames[12] = {
752
    "Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
753
  };
754
  static int days_per_month[12] = {
755
    31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
756
  };
757
  int day, mon = 0, year, hour, min, sec, leap, it;
758
  char mname[4] = "";
759
760
  if (sscanf(s, " %*[a-zA-Z], %02d %3s %4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) >= 6) {
761
    // RFC 822 / 1123: Wed, 09 Jun 2021 10:18:14 GMT
762
  }
763
  else if (sscanf(s, " %*[a-zA-Z], %2d-%3s-%4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) >= 6) {
764
    // RFC 850 / 1036 or Netscape: Wednesday, 09-Jun-21 10:18:14 or Wed, 09-Jun-2021 10:18:14
765
  }
766
  else if (sscanf(s, " %*[a-zA-Z], %3s %2d %2d:%2d:%2d %4d", mname, &day, &hour, &min, &sec, &year) >= 6) {
767
    // ANSI C's asctime(): Wed Jun 09 10:18:14 2021
768
  } else {
769
    error_printf(_("Failed to parse date '%s'\n"), s);
770
    return 0; // return as session cookie
771
  }
772
773
  if (*mname) {
774
    for (it = 0; it < countof(mnames); it++) {
775
      if (!wget_strcasecmp_ascii(mname, mnames[it])) {
776
        mon = it + 1;
777
        break;
778
      }
779
    }
780
  }
781
782
  if (year < 70 && year >= 0) year += 2000;
783
  else if (year >= 70 && year <= 99) year += 1900;
784
785
  if (mon == 2 && year % 4 == 0 && (year % 100 != 0 || year % 400 == 0))
786
    leap = 1;
787
  else
788
    leap = 0;
789
790
  // we don't handle leap seconds
791
792
  if (year < 1601 || mon < 1 || mon > 12 || day < 1 || (day > days_per_month[mon - 1] + leap) ||
793
    hour < 0 || hour > 23 || min < 0 || min > 60 || sec < 0 || sec > 60)
794
  {
795
    error_printf(_("Failed to parse date '%s'\n"), s);
796
    return 0; // return as session cookie
797
  }
798
799
  return(((((long long)year*100 + mon)*100 + day)*100 + hour)*100 + min)*100 + sec;
800
}
801
*/
802
803
// copied this routine from
804
// https://ftp.netbsd.org/pub/pkgsrc/current/pkgsrc/pkgtools/libnbcompat/files/timegm.c
805
806
static int leap_days(int y1, int y2)
807
1.02k
{
808
1.02k
  y1--;
809
1.02k
  y2--;
810
1.02k
  return (y2/4 - y1/4) - (y2/100 - y1/100) + (y2/400 - y1/400);
811
1.02k
}
812
813
/*
814
RFC 2616, 3.3.1 Full Date
815
HTTP-date    = rfc1123-date | rfc850-date | asctime-date
816
rfc1123-date = wkday "," SP date1 SP time SP "GMT"
817
rfc850-date  = weekday "," SP date2 SP time SP "GMT"
818
asctime-date = wkday SP date3 SP time SP 4DIGIT
819
date1        = 2DIGIT SP month SP 4DIGIT
820
          ; day month year (e.g., 02 Jun 1982)
821
date2        = 2DIGIT "-" month "-" 2DIGIT
822
          ; day-month-year (e.g., 02-Jun-82)
823
date3        = month SP ( 2DIGIT | ( SP 1DIGIT ))
824
          ; month day (e.g., Jun  2)
825
time         = 2DIGIT ":" 2DIGIT ":" 2DIGIT
826
          ; 00:00:00 - 23:59:59
827
wkday        = "Mon" | "Tue" | "Wed"
828
         | "Thu" | "Fri" | "Sat" | "Sun"
829
weekday      = "Monday" | "Tuesday" | "Wednesday"
830
         | "Thursday" | "Friday" | "Saturday" | "Sunday"
831
month        = "Jan" | "Feb" | "Mar" | "Apr"
832
         | "May" | "Jun" | "Jul" | "Aug"
833
         | "Sep" | "Oct" | "Nov" | "Dec"
834
*/
835
836
int64_t wget_http_parse_full_date(const char *s)
837
8.84k
{
838
  // we simply can't use strptime() since it requires us to setlocale()
839
  // which is not thread-safe !!!
840
8.84k
  static const char *mnames[12] = {
841
8.84k
    "Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
842
8.84k
  };
843
8.84k
  static int days_per_month[12] = {
844
8.84k
    31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
845
8.84k
  };
846
  // cumulated number of days until beginning of month for non-leap years
847
8.84k
  static const int sum_of_days[12] = {
848
8.84k
    0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334
849
8.84k
  };
850
851
8.84k
  int day, mon = 0, year, hour, min, sec, leap_month, leap_year, days;
852
8.84k
  char mname[4] = "";
853
854
8.84k
  if (sscanf(s, " %*[a-zA-Z], %2d %3s %4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) == 6) {
855
    // RFC 822 / 1123: Wed, 09 Jun 2021 10:18:14 GMT
856
8.59k
  } else if (sscanf(s, " %*[a-zA-Z], %2d-%3s-%4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) == 6) {
857
    // RFC 850 / 1036 or Netscape: Wednesday, 09-Jun-21 10:18:14 or Wed, 09-Jun-2021 10:18:14
858
8.33k
  } else if (sscanf(s, " %*[a-zA-Z] %3s %2d %2d:%2d:%2d %4d", mname, &day, &hour, &min, &sec, &year) == 6) {
859
    // ANSI C's asctime(): Wed Jun 09 10:18:14 2021
860
8.08k
  } else if (sscanf(s, " %d %3s %4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) == 6) {
861
    // non-standard: 1 Mar 2027 09:23:12 GMT
862
6.80k
  } else if (sscanf(s, " %*s %3s %2d %4d %2d:%2d:%2d", mname, &day, &year, &hour, &min, &sec) == 6) {
863
    // non-standard: Sun Nov 26 2023 21:24:47
864
5.85k
  } else {
865
949
    error_printf(_("Failed to parse date '%s'\n"), s);
866
949
    return 0; // return as session cookie
867
949
  }
868
869
7.90k
  if (*mname) {
870
58.9k
    for (unsigned it = 0; it < countof(mnames); it++) {
871
56.0k
      if (!wget_strcasecmp_ascii(mname, mnames[it])) {
872
5.02k
        mon = it + 1;
873
5.02k
        break;
874
5.02k
      }
875
56.0k
    }
876
7.90k
  }
877
878
7.90k
  if (year < 70 && year >= 0) year += 2000;
879
1.88k
  else if (year >= 70 && year <= 99) year += 1900;
880
7.90k
  if (year < 1970) year = 1970;
881
882
  // we don't handle leap seconds
883
884
7.90k
  leap_year = year % 4 == 0 && (year % 100 != 0 || year % 400 == 0);
885
7.90k
  leap_month = (mon == 2 && leap_year);
886
887
7.90k
  if (mon < 1 || mon > 12 || day < 1 || (day > days_per_month[mon - 1] + leap_month) ||
888
7.90k
    hour < 0 || hour > 23 || min < 0 || min > 60 || sec < 0 || sec > 60)
889
6.87k
  {
890
6.87k
    error_printf(_("Failed to parse date '%s'\n"), s);
891
6.87k
    return 0; // return as session cookie
892
6.87k
  }
893
894
  // calculate time_t (represented as int64_t) from GMT/UTC time values
895
896
1.02k
  days = 365 * (year - 1970) + leap_days(1970, year);
897
1.02k
  days += sum_of_days[mon - 1] + (mon > 2 && leap_year);
898
1.02k
  days += day - 1;
899
900
1.02k
  return (((int64_t)days * 24 + hour) * 60 + min) * 60 + sec;
901
7.90k
}
902
903
char *wget_http_print_date(int64_t t, char *buf, size_t bufsize)
904
111
{
905
111
  static const char *dnames[7] = {
906
111
    "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
907
111
  };
908
111
  static const char *mnames[12] = {
909
111
    "Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
910
111
  };
911
111
  struct tm tm;
912
111
  time_t tt;
913
914
111
  if (!bufsize)
915
0
    return buf;
916
917
111
#if __LP64__ == 1
918
111
  tt = (time_t) t; // 64bit time_t
919
#else
920
  // 32bit time_t
921
  if (t > 2147483647)
922
    tt = 2147483647;
923
  else
924
    tt = (time_t) t;
925
#endif
926
927
111
  if (gmtime_r(&tt, &tm)) {
928
111
    wget_snprintf(buf, bufsize, "%s, %02d %s %d %02d:%02d:%02d GMT",
929
111
      dnames[tm.tm_wday],tm.tm_mday,mnames[tm.tm_mon],tm.tm_year+1900,
930
111
      tm.tm_hour, tm.tm_min, tm.tm_sec);
931
111
  } else
932
0
    *buf = 0;
933
934
111
  return buf;
935
111
}
936
937
// adjust time (t) by number of seconds (n)
938
/*
939
static long long adjust_time(long long t, int n)
940
{
941
  static int days_per_month[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
942
  int day, mon, year, hour, min, sec, leap;
943
944
  sec = t % 100;
945
  min = (t /= 100) % 100;
946
  hour = (t /= 100) % 100;
947
  day = (t /= 100) % 100;
948
  mon = (t /= 100) % 100;
949
  year = t / 100;
950
951
  sec += n;
952
953
  if (n >= 0) {
954
    if (sec >= 60) {
955
      min += sec / 60;
956
      sec %= 60;
957
    }
958
    if (min >= 60) {
959
      hour += min / 60;
960
      min %= 60;
961
    }
962
    if (hour >= 24) {
963
      day += hour / 24;
964
      hour %= 24;
965
    }
966
    while (1) {
967
      if (mon == 2 && year % 4 == 0 && (year % 100 != 0 || year % 400 == 0))
968
        leap = 1;
969
      else
970
        leap = 0;
971
      if (day > days_per_month[mon - 1] + leap) {
972
        day -= (days_per_month[mon - 1] + leap);
973
        mon++;
974
        if (mon > 12) {
975
          mon = 1;
976
          year++;
977
        }
978
      } else break;
979
    }
980
  } else { // n<0
981
    if (sec < 0) {
982
      min += (sec - 59) / 60;
983
      sec = 59 + (sec + 1) % 60;
984
    }
985
    if (min < 0) {
986
      hour += (min - 59) / 60;
987
      min = 59 + (min + 1) % 60;
988
    }
989
    if (hour < 0) {
990
      day += (hour - 23) / 24;
991
      hour = 23 + (hour + 1) % 24;
992
    }
993
    for (;;) {
994
      if (day <= 0) {
995
        if (--mon < 1) {
996
          mon = 12;
997
          year--;
998
        }
999
        if (mon == 2 && year % 4 == 0 && (year % 100 != 0 || year % 400 == 0))
1000
          leap = 1;
1001
        else
1002
          leap = 0;
1003
        day += (days_per_month[mon - 1] + leap);
1004
      } else break;
1005
    }
1006
  }
1007
1008
  return (((((long long)year*100 + mon)*100 + day)*100 + hour)*100 + min)*100 + sec;
1009
}
1010
1011
// return current GMT/UTC
1012
1013
static int64_t get_current_time(void)
1014
{
1015
  int64_t t = time(NULL);
1016
  struct tm tm;
1017
1018
  gmtime_r(&t, &tm);
1019
1020
  return (((((int64_t)(tm.tm_year + 1900)*100 + tm.tm_mon + 1)*100 + tm.tm_mday)*100 + tm.tm_hour)*100 + tm.tm_min)*100 + tm.tm_sec;
1021
}
1022
*/
1023
1024
/*
1025
 RFC 6265
1026
1027
 set-cookie-header = "Set-Cookie:" SP set-cookie-string
1028
 set-cookie-string = cookie-pair *( ";" SP cookie-av )
1029
 cookie-pair       = cookie-name "=" cookie-value
1030
 cookie-name       = token
1031
 cookie-value      = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE )
1032
 cookie-octet      = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E
1033
                       ; US-ASCII characters excluding CTLs,
1034
                       ; whitespace DQUOTE, comma, semicolon,
1035
                       ; and backslash
1036
 token             = <token, defined in [RFC2616], Section 2.2>
1037
1038
 cookie-av         = expires-av / max-age-av / domain-av /
1039
                     path-av / secure-av / httponly-av /
1040
                     extension-av
1041
 expires-av        = "Expires=" sane-cookie-date
1042
 sane-cookie-date  = <rfc1123-date, defined in [RFC2616], Section 3.3.1>
1043
 max-age-av        = "Max-Age=" non-zero-digit *DIGIT
1044
                       ; In practice, both expires-av and max-age-av
1045
                       ; are limited to dates representable by the
1046
                       ; user agent.
1047
 non-zero-digit    = %x31-39
1048
                       ; digits 1 through 9
1049
 domain-av         = "Domain=" domain-value
1050
 domain-value      = <subdomain>
1051
                       ; defined in [RFC1034], Section 3.5, as
1052
                       ; enhanced by [RFC1123], Section 2.1
1053
 path-av           = "Path=" path-value
1054
 path-value        = <any CHAR except CTLs or ";">
1055
 secure-av         = "Secure"
1056
 httponly-av       = "HttpOnly"
1057
 extension-av      = <any CHAR except CTLs or ";">
1058
*/
1059
const char *wget_http_parse_setcookie(const char *s, wget_cookie **cookie)
1060
3.60k
{
1061
3.60k
  return wget_cookie_parse_setcookie(s, cookie);
1062
3.60k
}
1063
1064
static void cookie_free(void *cookie)
1065
2.47k
{
1066
2.47k
  if (cookie)
1067
2.47k
    wget_cookie_free((wget_cookie **) &cookie);
1068
2.47k
}
1069
1070
int wget_http_parse_header_line(wget_http_response *resp, const char *name, size_t namelen, const char *value, size_t valuelen)
1071
34.6k
{
1072
34.6k
  if (!name || !value)
1073
0
    return WGET_E_INVALID;
1074
1075
34.6k
  char valuebuf[256];
1076
34.6k
  char *value0;
1077
34.6k
  int ret = WGET_E_SUCCESS;
1078
1079
34.6k
  value0 = wget_strmemcpy_a(valuebuf, sizeof(valuebuf), value, valuelen);
1080
34.6k
  if (!value0)
1081
0
    return WGET_E_MEMORY;
1082
1083
34.6k
  switch (*name | 0x20) {
1084
611
  case ':':
1085
611
    if (!memcmp(name, ":status", namelen) && valuelen == 3) {
1086
205
      resp->code = ((value[0] - '0') * 10 + (value[1] - '0')) * 10 + (value[2] - '0');
1087
205
    } else
1088
406
      ret = WGET_E_UNKNOWN;
1089
611
    break;
1090
7.67k
  case 'c':
1091
7.67k
    if (!wget_strncasecmp_ascii(name, "content-encoding", namelen)) {
1092
2.74k
      wget_http_parse_content_encoding(value0, &resp->content_encoding);
1093
4.93k
    } else if (!wget_strncasecmp_ascii(name, "content-type", namelen)) {
1094
294
      if (!resp->content_type && !resp->content_type_encoding)
1095
100
        wget_http_parse_content_type(value0, &resp->content_type, &resp->content_type_encoding);
1096
4.63k
    } else if (!wget_strncasecmp_ascii(name, "content-length", namelen)) {
1097
834
      resp->content_length = (size_t)atoll(value0);
1098
834
      resp->content_length_valid = 1;
1099
3.80k
    } else if (!wget_strncasecmp_ascii(name, "content-disposition", namelen)) {
1100
1.56k
      if (!resp->content_filename)
1101
1.36k
        wget_http_parse_content_disposition(value0, &resp->content_filename);
1102
2.24k
    } else if (!wget_strncasecmp_ascii(name, "connection", namelen)) {
1103
1.73k
      wget_http_parse_connection(value0, &resp->keep_alive);
1104
1.73k
    } else if (!wget_strncasecmp_ascii(name, "Content-Security-Policy", namelen)) {
1105
195
      resp->csp = 1;
1106
195
    } else
1107
309
      ret = WGET_E_UNKNOWN;
1108
7.67k
    break;
1109
7.42k
  case 'd':
1110
7.42k
    if (!wget_strncasecmp_ascii(name, "digest", namelen)) {
1111
      // https://tools.ietf.org/html/rfc3230
1112
7.20k
      wget_http_digest digest;
1113
7.20k
      wget_http_parse_digest(value0, &digest);
1114
      // debug_printf("%s: %s\n",digest.algorithm,digest.encoded_digest);
1115
7.20k
      if (!resp->digests) {
1116
281
        resp->digests = wget_vector_create(4, NULL);
1117
281
        wget_vector_set_destructor(resp->digests, (wget_vector_destructor *) wget_http_free_digest);
1118
281
      }
1119
7.20k
      wget_vector_add_memdup(resp->digests, &digest, sizeof(digest));
1120
7.20k
    } else
1121
218
      ret = WGET_E_UNKNOWN;
1122
7.42k
    break;
1123
453
  case 'e':
1124
453
    if (!wget_strncasecmp_ascii(name, "etag", namelen)) {
1125
242
      if (!resp->etag)
1126
48
        wget_http_parse_etag(value0, &resp->etag);
1127
242
    } else
1128
211
      ret = WGET_E_UNKNOWN;
1129
453
    break;
1130
444
  case 'i':
1131
444
    if (!wget_strncasecmp_ascii(name, "icy-metaint", namelen)) {
1132
200
      resp->icy_metaint = atoi(value0);
1133
200
    } else
1134
244
      ret = WGET_E_UNKNOWN;
1135
444
    break;
1136
2.74k
  case 'l':
1137
2.74k
    if (!wget_strncasecmp_ascii(name, "last-modified", namelen)) {
1138
      // Last-Modified: Thu, 07 Feb 2008 15:03:24 GMT
1139
307
      resp->last_modified = wget_http_parse_full_date(value0);
1140
2.44k
    } else if (resp->code / 100 == 3 && !wget_strncasecmp_ascii(name, "location", namelen)) {
1141
296
      if (!resp->location)
1142
67
        wget_http_parse_location(value0, &resp->location);
1143
2.14k
    } else if (resp->code / 100 == 3 && !wget_strncasecmp_ascii(name, "link", namelen)) {
1144
      // debug_printf("s=%.31s\n",s);
1145
1.68k
      wget_http_link link;
1146
1.68k
      wget_http_parse_link(value0, &link);
1147
      // debug_printf("link->uri=%s\n",link.uri);
1148
1.68k
      if (!resp->links) {
1149
239
        resp->links = wget_vector_create(8, NULL);
1150
239
        wget_vector_set_destructor(resp->links, (wget_vector_destructor *) wget_http_free_link);
1151
239
      }
1152
1.68k
      wget_vector_add_memdup(resp->links, &link, sizeof(link));
1153
1.68k
    } else
1154
455
      ret = WGET_E_UNKNOWN;
1155
2.74k
    break;
1156
1.71k
  case 'p':
1157
1.71k
    if (!wget_strncasecmp_ascii(name, "public-key-pins", namelen)) {
1158
1.22k
      if (!resp->hpkp) {
1159
1.02k
        resp->hpkp = wget_hpkp_new();
1160
1.02k
        wget_http_parse_public_key_pins(value0, resp->hpkp);
1161
1.02k
        debug_printf("new host pubkey pinnings added to hpkp db\n");
1162
1.02k
      }
1163
1.22k
    }
1164
484
    else if (!wget_strncasecmp_ascii(name, "proxy-authenticate", namelen)) {
1165
267
      wget_http_challenge *challenge = wget_malloc(sizeof(wget_http_challenge));
1166
1167
267
      if (!challenge) {
1168
0
        ret = WGET_E_MEMORY;
1169
0
        goto out;
1170
0
      }
1171
1172
267
      wget_http_parse_challenge(value0, challenge);
1173
1174
267
      if (!resp->challenges) {
1175
8
        resp->challenges = wget_vector_create(2, NULL);
1176
8
        wget_vector_set_destructor(resp->challenges, (wget_vector_destructor *) wget_http_free_challenge);
1177
8
      }
1178
267
      wget_vector_add(resp->challenges, challenge);
1179
267
    } else
1180
217
      ret = WGET_E_UNKNOWN;
1181
1.71k
    break;
1182
4.96k
  case 's':
1183
4.96k
    if (!wget_strncasecmp_ascii(name, "set-cookie", namelen)) {
1184
      // this is a parser. content validation must be done by higher level functions.
1185
3.60k
      wget_cookie *cookie;
1186
3.60k
      wget_http_parse_setcookie(value0, &cookie);
1187
1188
3.60k
      if (cookie) {
1189
2.47k
        if (!resp->cookies) {
1190
774
          resp->cookies = wget_vector_create(4, NULL);
1191
774
          wget_vector_set_destructor(resp->cookies, cookie_free);
1192
774
        }
1193
2.47k
        wget_vector_add(resp->cookies, cookie);
1194
2.47k
      }
1195
3.60k
    }
1196
1.35k
    else if (!wget_strncasecmp_ascii(name, "strict-transport-security", namelen)) {
1197
1.11k
      resp->hsts = 1;
1198
1.11k
      wget_http_parse_strict_transport_security(value0, &resp->hsts_maxage, &resp->hsts_include_subdomains);
1199
1.11k
    } else
1200
235
      ret = WGET_E_UNKNOWN;
1201
4.96k
    break;
1202
1.49k
  case 't':
1203
1.49k
    if (!wget_strncasecmp_ascii(name, "transfer-encoding", namelen)) {
1204
1.28k
      wget_http_parse_transfer_encoding(value0, &resp->transfer_encoding);
1205
1.28k
    } else
1206
207
      ret = WGET_E_UNKNOWN;
1207
1.49k
    break;
1208
2.99k
  case 'w':
1209
2.99k
    if (!wget_strncasecmp_ascii(name, "www-authenticate", namelen)) {
1210
2.79k
      wget_http_challenge *challenge = wget_malloc(sizeof(wget_http_challenge));
1211
1212
2.79k
      if (!challenge) {
1213
0
        ret = WGET_E_MEMORY;
1214
0
        goto out;
1215
0
      }
1216
1217
2.79k
      wget_http_parse_challenge(value0, challenge);
1218
1219
2.79k
      if (!resp->challenges) {
1220
476
        resp->challenges = wget_vector_create(2, NULL);
1221
476
        wget_vector_set_destructor(resp->challenges, (wget_vector_destructor *) wget_http_free_challenge);
1222
476
      }
1223
2.79k
      wget_vector_add(resp->challenges, challenge);
1224
2.79k
    } else
1225
208
      ret = WGET_E_UNKNOWN;
1226
2.99k
    break;
1227
2.99k
  case 'x':
1228
1.51k
    if (!wget_strncasecmp_ascii(name, "x-archive-orig-last-modified", namelen)) {
1229
1.30k
      resp->last_modified = wget_http_parse_full_date(value0);
1230
1.30k
    } else
1231
215
      ret = WGET_E_UNKNOWN;
1232
1.51k
    break;
1233
2.59k
  default:
1234
2.59k
    ret = WGET_E_UNKNOWN;
1235
2.59k
    break;
1236
34.6k
  }
1237
1238
34.6k
out:
1239
34.6k
  if (value0 != valuebuf)
1240
218
    xfree(value0);
1241
1242
34.6k
  return ret;
1243
34.6k
}
1244
1245
/* content of <buf> will be destroyed */
1246
/* buf must be 0-terminated */
1247
wget_http_response *wget_http_parse_response_header(char *buf)
1248
6.62k
{
1249
6.62k
  char *eol;
1250
1251
6.62k
  wget_http_response *resp = wget_calloc(1, sizeof(wget_http_response));
1252
6.62k
  if (!resp)
1253
0
    return NULL;
1254
1255
6.62k
  if (sscanf(buf, " HTTP/%3hd.%3hd %3hd %31[^\r\n] ",
1256
6.62k
    &resp->major, &resp->minor, &resp->code, resp->reason) >= 3) {
1257
1.98k
    if ((eol = strchr(buf + 10, '\n'))) {
1258
      // eol[-1]=0;
1259
      // debug_printf("# %s\n",buf);
1260
1.97k
    } else {
1261
      // empty HTTP header
1262
1
      return resp;
1263
1
    }
1264
4.64k
  } else if (sscanf(buf, " ICY %3hd %31[^\r\n] ", &resp->code, resp->reason) >= 1) {
1265
4.63k
    if ((eol = strchr(buf + 4, '\n'))) {
1266
      // eol[-1]=0;
1267
      // debug_printf("# %s\n",buf);
1268
4.63k
    } else {
1269
      // empty HTTP header
1270
1
      return resp;
1271
1
    }
1272
4.63k
  } else {
1273
6
    error_printf(_("HTTP response header not found\n"));
1274
6
    xfree(resp);
1275
6
    return NULL;
1276
6
  }
1277
1278
  // 'close' is default on HTTP/1.0, else 'keep_alive' is default
1279
6.61k
  if ((resp->major == 1 && resp->minor >= 1) || resp->major > 1)
1280
1.94k
    resp->keep_alive = 1;
1281
1282
41.2k
  for (char *line = eol + 1; eol && *line && *line != '\r' && *line != '\n'; line = eol ? eol + 1 : NULL) {
1283
34.6k
    eol = strchr(line, '\n');
1284
34.9k
    while (eol && c_isblank(eol[1])) { // handle split lines
1285
351
      *eol = eol[-1] = ' ';
1286
351
      eol = strchr(eol, '\n');
1287
351
    }
1288
1289
34.6k
    if (eol) {
1290
28.0k
      if (eol[-1] == '\r')
1291
2.75k
        eol[-1] = 0;
1292
25.3k
      else
1293
25.3k
        *eol = 0;
1294
28.0k
    }
1295
1296
34.6k
    size_t namelen, valuelen;
1297
34.6k
    const char *name;
1298
34.6k
    const char *value = wget_parse_name_fixed(line, &name, &namelen);
1299
    // value now points directly after :
1300
1301
34.6k
    if (eol)
1302
28.0k
      valuelen = eol - value - (eol[-1] == 0);
1303
6.56k
    else
1304
6.56k
      valuelen = strlen(value);
1305
1306
34.6k
    wget_http_parse_header_line(resp, name, namelen, value, valuelen);
1307
34.6k
  }
1308
1309
6.61k
  return resp;
1310
6.62k
}
1311
1312
void wget_http_free_param(wget_http_header_param *param)
1313
6.47k
{
1314
6.47k
  xfree(param->name);
1315
6.47k
  xfree(param->value);
1316
6.47k
  xfree(param);
1317
6.47k
}
1318
1319
void wget_http_free_link(wget_http_link *link)
1320
1.68k
{
1321
1.68k
  xfree(link->uri);
1322
1.68k
  xfree(link->type);
1323
1.68k
  xfree(link);
1324
1.68k
}
1325
1326
void wget_http_free_links(wget_vector **links)
1327
6.61k
{
1328
6.61k
  wget_vector_free(links);
1329
6.61k
}
1330
1331
void wget_http_free_digest(wget_http_digest *digest)
1332
7.20k
{
1333
7.20k
  xfree(digest->algorithm);
1334
7.20k
  xfree(digest->encoded_digest);
1335
7.20k
  xfree(digest);
1336
7.20k
}
1337
1338
void wget_http_free_digests(wget_vector **digests)
1339
6.61k
{
1340
6.61k
  wget_vector_free(digests);
1341
6.61k
}
1342
1343
void wget_http_free_challenge(wget_http_challenge *challenge)
1344
8.45k
{
1345
8.45k
  xfree(challenge->auth_scheme);
1346
8.45k
  wget_stringmap_free(&challenge->params);
1347
8.45k
  xfree(challenge);
1348
8.45k
}
1349
1350
void wget_http_free_challenges(wget_vector **challenges)
1351
6.61k
{
1352
6.61k
  wget_vector_free(challenges);
1353
6.61k
}
1354
1355
void wget_http_free_cookies(wget_vector **cookies)
1356
8.23k
{
1357
8.23k
  wget_vector_free(cookies);
1358
8.23k
}
1359
1360
void wget_http_free_hpkp_entries(wget_hpkp **hpkp)
1361
6.61k
{
1362
6.61k
  if (hpkp) {
1363
6.61k
    wget_hpkp_free(*hpkp);
1364
6.61k
    *hpkp = NULL;
1365
6.61k
  }
1366
6.61k
}
1367
1368
void wget_http_free_response(wget_http_response **resp)
1369
8.54k
{
1370
8.54k
  if (resp && *resp) {
1371
6.61k
    wget_http_free_links(&(*resp)->links);
1372
6.61k
    wget_http_free_digests(&(*resp)->digests);
1373
6.61k
    wget_http_free_challenges(&(*resp)->challenges);
1374
6.61k
    wget_http_free_cookies(&(*resp)->cookies);
1375
6.61k
    wget_http_free_hpkp_entries(&(*resp)->hpkp);
1376
6.61k
    xfree((*resp)->content_type);
1377
6.61k
    xfree((*resp)->content_type_encoding);
1378
6.61k
    xfree((*resp)->content_filename);
1379
6.61k
    xfree((*resp)->location);
1380
6.61k
    xfree((*resp)->etag);
1381
    // xfree((*resp)->reason);
1382
6.61k
    wget_buffer_free(&(*resp)->header);
1383
6.61k
    wget_buffer_free(&(*resp)->body);
1384
6.61k
    xfree(*resp);
1385
6.61k
  }
1386
8.54k
}
1387
1388
/* for security reasons: set all freed pointers to NULL */
1389
void wget_http_free_request(wget_http_request **req)
1390
3.84k
{
1391
3.84k
  if (req && *req) {
1392
3.84k
    wget_buffer_deinit(&(*req)->esc_resource);
1393
3.84k
    wget_buffer_deinit(&(*req)->esc_host);
1394
3.84k
    wget_vector_free(&(*req)->headers);
1395
3.84k
    xfree((*req)->body);
1396
3.84k
    xfree(*req);
1397
3.84k
  }
1398
3.84k
}