Coverage Report

Created: 2024-03-08 06:32

/src/wget2/libwget/http_parse.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2017-2024 Free Software Foundation, Inc.
3
 *
4
 * This file is part of libwget.
5
 *
6
 * Libwget is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU Lesser General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * Libwget is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public License
17
 * along with libwget.  If not, see <https://www.gnu.org/licenses/>.
18
 *
19
 *
20
 * HTTP parsing routines
21
 *
22
 * Resources:
23
 * RFC 2616
24
 * RFC 6265
25
 *
26
 */
27
28
#include <config.h>
29
30
#include <stdio.h>
31
#include <stdlib.h>
32
#include <string.h>
33
#include <c-ctype.h>
34
#include <time.h>
35
#include <errno.h>
36
#include <stdint.h>
37
38
#include <wget.h>
39
#include "private.h"
40
#include "http.h"
41
42
0
#define HTTP_CTYPE_SEPARATOR (1<<0)
43
44
static const unsigned char
45
  http_ctype[256] = {
46
    ['('] = HTTP_CTYPE_SEPARATOR,
47
    [')'] = HTTP_CTYPE_SEPARATOR,
48
    ['<'] = HTTP_CTYPE_SEPARATOR,
49
    ['>'] = HTTP_CTYPE_SEPARATOR,
50
    ['@'] = HTTP_CTYPE_SEPARATOR,
51
    [','] = HTTP_CTYPE_SEPARATOR,
52
    [';'] = HTTP_CTYPE_SEPARATOR,
53
    [':'] = HTTP_CTYPE_SEPARATOR,
54
    ['\\'] = HTTP_CTYPE_SEPARATOR,
55
    ['\"'] = HTTP_CTYPE_SEPARATOR,
56
    ['/'] = HTTP_CTYPE_SEPARATOR,
57
    ['['] = HTTP_CTYPE_SEPARATOR,
58
    [']'] = HTTP_CTYPE_SEPARATOR,
59
    ['?'] = HTTP_CTYPE_SEPARATOR,
60
    ['='] = HTTP_CTYPE_SEPARATOR,
61
    ['{'] = HTTP_CTYPE_SEPARATOR,
62
    ['}'] = HTTP_CTYPE_SEPARATOR,
63
    [' '] = HTTP_CTYPE_SEPARATOR,
64
    ['\t'] = HTTP_CTYPE_SEPARATOR
65
  };
66
67
static inline bool http_isseparator(char c)
68
0
{
69
0
  return (http_ctype[(unsigned char)(c)]&HTTP_CTYPE_SEPARATOR) != 0;
70
0
}
71
72
/**Gets the hostname of the remote endpoint.
73
 * \param conn a wget_http_connection
74
 * \return A string containing hostname. Returned memory is owned by
75
 *         _conn_ and should not be modified or freed.
76
 */
77
const char *wget_http_get_host(const wget_http_connection *conn)
78
0
{
79
0
  return conn->esc_host;
80
0
}
81
82
/**Gets the port number of the remote endpoint.
83
 * \param conn a wget_http_connection
84
 * \return A string containing port number. Returned memory is owned by
85
 *         _conn_ and should not be modified or freed.
86
 */
87
uint16_t wget_http_get_port(const wget_http_connection *conn)
88
0
{
89
0
  return conn->port;
90
0
}
91
92
/**Get the scheme used by the connection.
93
 * \param conn a wget_http_connection
94
 * \return A WGET_IRI_SCHEM_* value.
95
 */
96
wget_iri_scheme wget_http_get_scheme(const wget_http_connection *conn)
97
0
{
98
0
  return conn->scheme;
99
0
}
100
101
/**Gets the protocol used by the connection
102
 * \param conn a wget_http_connection
103
 * \return Either WGET_PROTOCOL_HTTP_1_1 or WGET_PROTOCOL_HTTP_2_0
104
 */
105
int wget_http_get_protocol(const wget_http_connection *conn)
106
0
{
107
0
  return conn->protocol;
108
0
}
109
110
bool wget_http_isseparator(char c)
111
0
{
112
0
  return http_isseparator(c);
113
0
}
114
115
// TEXT           = <any OCTET except CTLs, but including LWS>
116
//int http_istext(char c)
117
//{
118
//  return (c>=32 && c<=126) || c=='\r' || c=='\n' || c=='\t';
119
//}
120
121
// token          = 1*<any CHAR except CTLs or separators>
122
123
bool wget_http_istoken(char c)
124
0
{
125
0
  return c > 32 && c <= 126 && !http_isseparator(c);
126
0
}
127
128
const char *wget_http_parse_token(const char *s, const char **token)
129
0
{
130
0
  const char *p;
131
132
0
  for (p = s; wget_http_istoken(*s); s++);
133
134
0
  *token = wget_strmemdup(p, s - p);
135
136
0
  return s;
137
0
}
138
139
// quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
140
// qdtext         = <any TEXT except <">>
141
// quoted-pair    = "\" CHAR
142
// TEXT           = <any OCTET except CTLs, but including LWS>
143
// CTL            = <any US-ASCII control character (octets 0 - 31) and DEL (127)>
144
// LWS            = [CRLF] 1*( SP | HT )
145
146
const char *wget_http_parse_quoted_string(const char *s, const char **qstring)
147
0
{
148
0
  if (*s == '\"') {
149
0
    const char *p = ++s;
150
151
    // relaxed scanning
152
0
    while (*s) {
153
0
      if (*s == '\"') break;
154
0
      else if (*s == '\\' && s[1]) {
155
0
        s += 2;
156
0
      } else
157
0
        s++;
158
0
    }
159
160
0
    *qstring = wget_strmemdup(p, s - p);
161
0
    if (*s == '\"') s++;
162
0
  } else
163
0
    *qstring = NULL;
164
165
0
  return s;
166
0
}
167
168
// generic-param  =  token [ EQUAL gen-value ]
169
// gen-value      =  token / host / quoted-string
170
171
const char *wget_http_parse_param(const char *s, const char **param, const char **value)
172
0
{
173
0
  const char *p;
174
175
0
  *param = *value = NULL;
176
177
0
  while (c_isblank(*s)) s++;
178
179
0
  if (*s == ';') {
180
0
    s++;
181
0
    while (c_isblank(*s)) s++;
182
0
  }
183
0
  if (!*s) return s;
184
185
0
  for (p = s; wget_http_istoken(*s); s++);
186
0
  *param = wget_strmemdup(p, s - p);
187
188
0
  while (c_isblank(*s)) s++;
189
190
0
  if (*s && *s++ == '=') {
191
0
    while (c_isblank(*s)) s++;
192
0
    if (*s == '\"') {
193
0
      s = wget_http_parse_quoted_string(s, value);
194
0
    } else {
195
0
      s = wget_http_parse_token(s, value);
196
0
    }
197
0
  }
198
199
0
  return s;
200
0
}
201
202
// message-header = field-name ":" [ field-value ]
203
// field-name     = token
204
// field-value    = *( field-content | LWS )
205
// field-content  = <the OCTETs making up the field-value
206
//                  and consisting of either *TEXT or combinations
207
//                  of token, separators, and quoted-string>
208
209
const char *wget_http_parse_name(const char *s, const char **name)
210
0
{
211
0
  while (c_isblank(*s)) s++;
212
213
0
  s = wget_http_parse_token(s, name);
214
215
0
  while (*s && *s != ':') s++;
216
217
0
  return *s == ':' ? s + 1 : s;
218
0
}
219
220
const char *wget_parse_name_fixed(const char *s, const char **name, size_t *namelen)
221
0
{
222
0
  while (c_isblank(*s)) s++;
223
224
0
  *name = s;
225
226
0
  while (wget_http_istoken(*s))
227
0
    s++;
228
229
0
  *namelen = s - *name;
230
231
0
  while (*s && *s != ':') s++;
232
233
0
  return *s == ':' ? s + 1 : s;
234
0
}
235
236
static int WGET_GCC_NONNULL_ALL compare_param(wget_http_header_param *p1, wget_http_header_param *p2)
237
0
{
238
0
  return wget_strcasecmp_ascii(p1->name, p2->name);
239
0
}
240
241
void wget_http_add_param(wget_vector **params, wget_http_header_param *param)
242
0
{
243
0
  if (!*params) *params = wget_vector_create(4, (wget_vector_compare_fn *) compare_param);
244
0
  wget_vector_add_memdup(*params, param, sizeof(*param));
245
0
}
246
247
/*
248
  Link           = "Link" ":" #link-value
249
  link-value     = "<" URI-Reference ">" *( ";" link-param )
250
  link-param     = ( ( "rel" "=" relation-types )
251
            | ( "anchor" "=" <"> URI-Reference <"> )
252
            | ( "rev" "=" relation-types )
253
            | ( "hreflang" "=" Language-Tag )
254
            | ( "media" "=" ( MediaDesc | ( <"> MediaDesc <"> ) ) )
255
            | ( "title" "=" quoted-string )
256
            | ( "title*" "=" ext-value )
257
            | ( "type" "=" ( media-type | quoted-mt ) )
258
            | ( link-extension ) )
259
  link-extension = ( parmname [ "=" ( ptoken | quoted-string ) ] )
260
            | ( ext-name-star "=" ext-value )
261
  ext-name-star  = parmname "*" ; reserved for RFC2231-profiled
262
                      ; extensions.  Whitespace NOT
263
                      ; allowed in between.
264
  ptoken         = 1*ptokenchar
265
  ptokenchar     = "!" | "#" | "$" | "%" | "&" | "'" | "("
266
            | ")" | "*" | "+" | "-" | "." | "/" | DIGIT
267
            | ":" | "<" | "=" | ">" | "?" | "@" | ALPHA
268
            | "[" | "]" | "^" | "_" | "`" | "{" | "|"
269
            | "}" | "~"
270
  media-type     = type-name "/" subtype-name
271
  quoted-mt      = <"> media-type <">
272
  relation-types = relation-type
273
            | <"> relation-type *( 1*SP relation-type ) <">
274
  relation-type  = reg-rel-type | ext-rel-type
275
  reg-rel-type   = LOALPHA *( LOALPHA | DIGIT | "." | "-" )
276
  ext-rel-type   = URI
277
*/
278
const char *wget_http_parse_link(const char *s, wget_http_link *link)
279
0
{
280
0
  memset(link, 0, sizeof(*link));
281
282
0
  while (c_isblank(*s)) s++;
283
284
0
  if (*s == '<') {
285
    // URI reference as of RFC 3987 (if relative, resolve as of RFC 3986)
286
0
    const char *p = s + 1;
287
0
    if ((s = strchr(p, '>')) != NULL) {
288
0
      const char *name = NULL, *value = NULL;
289
290
0
      link->uri = wget_strmemdup(p, s - p);
291
0
      s++;
292
293
0
      while (c_isblank(*s)) s++;
294
295
0
      while (*s == ';') {
296
0
        s = wget_http_parse_param(s, &name, &value);
297
0
        if (name && value) {
298
0
          if (!wget_strcasecmp_ascii(name, "rel")) {
299
0
            if (!wget_strcasecmp_ascii(value, "describedby"))
300
0
              link->rel = link_rel_describedby;
301
0
            else if (!wget_strcasecmp_ascii(value, "duplicate"))
302
0
              link->rel = link_rel_duplicate;
303
0
          } else if (!wget_strcasecmp_ascii(name, "pri")) {
304
0
            link->pri = atoi(value);
305
0
          } else if (!wget_strcasecmp_ascii(name, "type")) {
306
0
            if (!link->type) {
307
0
              link->type = value;
308
0
              value = NULL;
309
0
            }
310
0
          }
311
          //        http_add_param(&link->params,&param);
312
0
          while (c_isblank(*s)) s++;
313
0
        }
314
315
0
        xfree(name);
316
0
        xfree(value);
317
0
      }
318
319
      //      if (!msg->contacts) msg->contacts=vec_create(1,1,NULL);
320
      //      vec_add(msg->contacts,&contact,sizeof(contact));
321
322
0
      while (*s && !c_isblank(*s)) s++;
323
0
    }
324
0
  }
325
326
0
  return s;
327
0
}
328
329
// from RFC 3230:
330
// Digest = "Digest" ":" #(instance-digest)
331
// instance-digest = digest-algorithm "=" <encoded digest output>
332
// digest-algorithm = token
333
334
const char *wget_http_parse_digest(const char *s, wget_http_digest *digest)
335
0
{
336
0
  memset(digest, 0, sizeof(*digest));
337
338
0
  while (c_isblank(*s)) s++;
339
0
  s = wget_http_parse_token(s, &digest->algorithm);
340
341
0
  while (c_isblank(*s)) s++;
342
343
0
  if (*s == '=') {
344
0
    s++;
345
0
    while (c_isblank(*s)) s++;
346
0
    if (*s == '\"') {
347
0
      s = wget_http_parse_quoted_string(s, &digest->encoded_digest);
348
0
    } else {
349
0
      const char *p;
350
351
0
      for (p = s; *s && !c_isblank(*s) && *s != ',' && *s != ';'; s++);
352
0
      digest->encoded_digest = wget_strmemdup(p, s - p);
353
0
    }
354
0
  }
355
356
0
  while (*s && !c_isblank(*s)) s++;
357
358
0
  return s;
359
0
}
360
361
// RFC 2617:
362
// challenge   = auth-scheme 1*SP 1#auth-param
363
// auth-scheme = token
364
// auth-param  = token "=" ( token | quoted-string )
365
366
const char *wget_http_parse_challenge(const char *s, wget_http_challenge *challenge)
367
0
{
368
0
  memset(challenge, 0, sizeof(*challenge));
369
370
0
  while (c_isblank(*s)) s++;
371
0
  s = wget_http_parse_token(s, &challenge->auth_scheme);
372
373
0
  if (*s == ' ')
374
0
    s++; // Auth scheme must have a space at the end of the token
375
0
  else {
376
    // parse/syntax error
377
0
    xfree(challenge->auth_scheme);
378
0
    return s;
379
0
  }
380
381
0
  wget_http_header_param param;
382
0
  do {
383
0
    const char *old = s;
384
0
    s = wget_http_parse_param(s, &param.name, &param.value);
385
0
    if (param.name) {
386
0
      if (*param.name && !param.value) {
387
0
        xfree(param.name);
388
0
        return old; // a new scheme detected
389
0
      }
390
391
0
      if (!param.value) {
392
0
        xfree(param.name);
393
0
        continue;
394
0
      }
395
396
0
      if (!challenge->params)
397
0
        challenge->params = wget_stringmap_create_nocase(8);
398
0
      wget_stringmap_put(challenge->params, param.name, param.value);
399
0
    }
400
401
0
    while (c_isblank(*s)) s++;
402
403
0
    if (*s != ',') break;
404
0
    else if (*s) s++;
405
0
  } while (*s);
406
407
0
  return s;
408
0
}
409
410
const char *wget_http_parse_challenges(const char *s, wget_vector *challenges)
411
0
{
412
0
  wget_http_challenge challenge;
413
414
0
  while (*s) {
415
0
    s = wget_http_parse_challenge(s, &challenge);
416
0
    if (challenge.auth_scheme) {
417
0
      wget_vector_add_memdup(challenges, &challenge, sizeof(challenge));
418
0
    }
419
0
  }
420
421
0
  return s;
422
0
}
423
424
const char *wget_http_parse_location(const char *s, const char **location)
425
0
{
426
0
  const char *p;
427
428
0
  while (c_isblank(*s)) s++;
429
430
  /*
431
   * The correct (and still lenient) variant was:
432
   * for (p = s; *s && !c_isblank(*s); s++);
433
   *
434
   * And then there were spaces in the URI, see
435
   *   https://gitlab.com/gnuwget/wget2/issues/420
436
   */
437
438
0
  for (p = s; *s && *s != '\r' && *s != '\n'; s++);
439
0
  while (s > p && c_isblank(*(s - 1))) s--; // remove trailing spaces (OWS - optional white space)
440
441
0
  *location = wget_strmemdup(p, s - p);
442
443
0
  return s;
444
0
}
445
446
// Transfer-Encoding       = "Transfer-Encoding" ":" 1#transfer-coding
447
// transfer-coding         = "chunked" | transfer-extension
448
// transfer-extension      = token *( ";" parameter )
449
// parameter               = attribute "=" value
450
// attribute               = token
451
// value                   = token | quoted-string
452
453
const char *wget_http_parse_transfer_encoding(const char *s, wget_transfer_encoding *transfer_encoding)
454
0
{
455
0
  while (c_isblank(*s)) s++;
456
457
0
  if (!wget_strcasecmp_ascii(s, "identity"))
458
0
    *transfer_encoding = wget_transfer_encoding_identity;
459
0
  else
460
0
    *transfer_encoding = wget_transfer_encoding_chunked;
461
462
0
  while (wget_http_istoken(*s)) s++;
463
464
0
  return s;
465
0
}
466
467
// Content-Type   = "Content-Type" ":" media-type
468
// media-type     = type "/" subtype *( ";" parameter )
469
// type           = token
470
// subtype        = token
471
// example: Content-Type: text/html; charset=ISO-8859-4
472
473
const char *wget_http_parse_content_type(const char *s, const char **content_type, const char **charset)
474
0
{
475
0
  wget_http_header_param param;
476
0
  const char *p;
477
478
0
  while (c_isblank(*s)) s++;
479
480
0
  for (p = s; *s && (wget_http_istoken(*s) || *s == '/'); s++);
481
0
  if (content_type)
482
0
    *content_type = wget_strmemdup(p, s - p);
483
484
0
  if (charset) {
485
0
    *charset = NULL;
486
487
0
    while (*s) {
488
0
      s=wget_http_parse_param(s, &param.name, &param.value);
489
0
      if (!wget_strcasecmp_ascii("charset", param.name)) {
490
0
        xfree(param.name);
491
0
        *charset = param.value;
492
0
        break;
493
0
      }
494
0
      xfree(param.name);
495
0
      xfree(param.value);
496
0
    }
497
0
  }
498
499
0
  return s;
500
0
}
501
502
// RFC 6266 - Use of the Content-Disposition Header Field in the Hypertext Transfer Protocol (HTTP)
503
// content-disposition = "Content-Disposition" ":" disposition-type *( ";" disposition-parm )
504
// disposition-type    = "inline" | "attachment" | disp-ext-type ; case-insensitive
505
// disp-ext-type       = token
506
// disposition-parm    = filename-parm | disp-ext-parm
507
// filename-parm       = "filename" "=" value | "filename*" "=" ext-value
508
// disp-ext-parm       = token "=" value | ext-token "=" ext-value
509
// ext-token           = <the characters in token, followed by "*">
510
//
511
// Defined in [RFC2616]:
512
//
513
// token         = <token, defined in [RFC2616], Section 2.2>
514
// quoted-string = <quoted-string, defined in [RFC2616], Section 2.2>
515
// value         = <value, defined in [RFC2616], Section 3.6> ; token | quoted-string
516
//
517
// Defined in [RFC5987]:
518
//
519
// ext-value   = <ext-value, defined in [RFC5987], Section 3.2>
520
521
const char *wget_http_parse_content_disposition(const char *s, const char **filename)
522
0
{
523
0
  wget_http_header_param param;
524
0
  char *p;
525
526
0
  if (filename) {
527
0
    *filename = NULL;
528
529
0
    while (*s && !*filename) {
530
0
      s = wget_http_parse_param(s, &param.name, &param.value);
531
0
      if (param.value && !wget_strcasecmp_ascii("filename", param.name)) {
532
        // just take the last path part as filename
533
0
        if (!*filename) {
534
0
          if ((p = strpbrk(param.value,"/\\"))) {
535
0
            p = wget_strdup(p + 1);
536
0
          } else {
537
0
            p = (char *) param.value;
538
0
            param.value = NULL;
539
0
          }
540
541
0
          wget_percent_unescape(p);
542
0
          if (!wget_str_is_valid_utf8(p)) {
543
            // if it is not UTF-8, assume ISO-8859-1
544
            // see https://stackoverflow.com/questions/93551/how-to-encode-the-filename-parameter-of-content-disposition-header-in-http
545
0
            *filename = wget_str_to_utf8(p, "iso-8859-1");
546
0
            xfree(p);
547
0
          } else {
548
0
            *filename = p;
549
0
            p = NULL;
550
0
          }
551
0
        }
552
0
      } else if (param.value && !wget_strcasecmp_ascii("filename*", param.name)) {
553
        // RFC5987
554
        // ext-value     = charset  "'" [ language ] "'" value-chars
555
        // ; like RFC 2231's <extended-initial-value>
556
        // ; (see [RFC2231], Section 7)
557
558
        // charset       = "UTF-8" / "ISO-8859-1" / mime-charset
559
560
        // mime-charset  = 1*mime-charsetc
561
        // mime-charsetc = ALPHA / DIGIT
562
        //    / "!" / "#" / "$" / "%" / "&"
563
        //    / "+" / "-" / "^" / "_" / "`"
564
        //    / "{" / "}" / "~"
565
        //    ; as <mime-charset> in Section 2.3 of [RFC2978]
566
        //    ; except that the single quote is not included
567
        //    ; SHOULD be registered in the IANA charset registry
568
569
        // language      = <Language-Tag, defined in [RFC5646], Section 2.1>
570
571
        // value-chars   = *( pct-encoded / attr-char )
572
573
        // pct-encoded   = "%" HEXDIG HEXDIG
574
        //    ; see [RFC3986], Section 2.1
575
576
        // attr-char     = ALPHA / DIGIT
577
        //    / "!" / "#" / "$" / "&" / "+" / "-" / "."
578
        //    / "^" / "_" / "`" / "|" / "~"
579
        //    ; token except ( "*" / "'" / "%" )
580
581
0
        if ((p = strchr(param.value, '\''))) {
582
0
          const char *charset = param.value;
583
0
          const char *language = p + 1;
584
0
          *p = 0;
585
0
          if ((p = strchr(language, '\''))) {
586
0
            *p++ = 0;
587
0
            if (*p) {
588
0
              wget_percent_unescape(p);
589
0
              if (wget_str_needs_encoding(p))
590
0
                *filename = wget_str_to_utf8(p, charset);
591
0
              else
592
0
                *filename = wget_strdup(p);
593
594
              // just take the last path part as filename
595
0
              if (*filename && (p = strpbrk(*filename, "/\\"))) {
596
0
                p = wget_strdup(p + 1);
597
0
                xfree(*filename);
598
0
                *filename = p;
599
0
              }
600
601
0
              xfree(param.name);
602
0
              xfree(param.value);
603
0
              break; // stop looping, we found the final filename
604
0
            }
605
0
          }
606
0
        }
607
0
      }
608
0
      xfree(param.name);
609
0
      xfree(param.value);
610
0
    }
611
0
  }
612
613
0
  return s;
614
0
}
615
616
// RFC 7469
617
// Example:
618
//   Public-Key-Pins:
619
//        pin-sha256="d6qzRu9zOECb90Uez27xWltNsj0e1Md7GkYYkVoZWmM=";
620
//         pin-sha256="E9CZ9INDbd+2eRQozYqqbQ2yXLVKB9+xcprMF+44U1g=";
621
//         pin-sha256="LPJNul+wow4m6DsqxbninhsWHlwfp0JecwQzYpOLmCQ=";
622
//         max-age=10000; includeSubDomains
623
const char *wget_http_parse_public_key_pins(const char *s, wget_hpkp *hpkp)
624
0
{
625
0
  wget_http_header_param param;
626
627
0
  wget_hpkp_set_include_subdomains(hpkp, false);
628
629
0
  while (*s) {
630
0
    s = wget_http_parse_param(s, &param.name, &param.value);
631
632
0
    if (param.value) {
633
0
      if (!wget_strcasecmp_ascii(param.name, "max-age")) {
634
0
        wget_hpkp_set_maxage(hpkp, (int64_t) atoll(param.value));
635
0
      } else if (!wget_strncasecmp_ascii(param.name, "pin-", 4)) {
636
0
        wget_hpkp_pin_add(hpkp, param.name + 4, param.value);
637
0
      }
638
0
    } else {
639
0
      if (!wget_strcasecmp_ascii(param.name, "includeSubDomains"))
640
0
        wget_hpkp_set_include_subdomains(hpkp, true);
641
0
    }
642
643
0
    xfree(param.name);
644
0
    xfree(param.value);
645
0
  }
646
647
0
  return s;
648
0
}
649
650
// RFC 6797
651
//
652
// Strict-Transport-Security = "Strict-Transport-Security" ":" [ directive ]  *( ";" [ directive ] )
653
// directive                 = directive-name [ "=" directive-value ]
654
// directive-name            = token
655
// directive-value           = token | quoted-string
656
657
const char *wget_http_parse_strict_transport_security(const char *s, int64_t *maxage, bool *include_subdomains)
658
0
{
659
0
  wget_http_header_param param;
660
661
0
  *maxage = 0;
662
0
  *include_subdomains = 0;
663
664
0
  while (*s) {
665
0
    s = wget_http_parse_param(s, &param.name, &param.value);
666
667
0
    if (param.value) {
668
0
      if (!wget_strcasecmp_ascii(param.name, "max-age")) {
669
0
        *maxage = (int64_t) atoll(param.value);
670
0
      }
671
0
    } else {
672
0
      if (!wget_strcasecmp_ascii(param.name, "includeSubDomains")) {
673
0
        *include_subdomains = 1;
674
0
      }
675
0
    }
676
677
0
    xfree(param.name);
678
0
    xfree(param.value);
679
0
  }
680
681
0
  return s;
682
0
}
683
684
// Content-Encoding  = "Content-Encoding" ":" 1#content-coding
685
686
const char *wget_http_parse_content_encoding(const char *s, char *content_encoding)
687
0
{
688
0
  while (c_isblank(*s)) s++;
689
690
0
  if (!wget_strcasecmp_ascii(s, "gzip") || !wget_strcasecmp_ascii(s, "x-gzip"))
691
0
    *content_encoding = wget_content_encoding_gzip;
692
0
  else if (!wget_strcasecmp_ascii(s, "deflate"))
693
0
    *content_encoding = wget_content_encoding_deflate;
694
0
  else if (!wget_strcasecmp_ascii(s, "bzip2"))
695
0
    *content_encoding = wget_content_encoding_bzip2;
696
0
  else if (!wget_strcasecmp_ascii(s, "xz") || !wget_strcasecmp_ascii(s, "lzma") || !wget_strcasecmp_ascii(s, "x-lzma"))
697
    // 'xz' is the tag currently understood by Firefox (2.1.2014)
698
    // 'lzma' / 'x-lzma' are the tags currently understood by ELinks
699
0
    *content_encoding = wget_content_encoding_lzma;
700
0
  else if (!wget_strcasecmp_ascii(s, "br"))
701
0
    *content_encoding = wget_content_encoding_brotli;
702
0
  else if (!wget_strcasecmp_ascii(s, "zstd"))
703
0
    *content_encoding = wget_content_encoding_zstd;
704
0
  else if (!wget_strcasecmp_ascii(s, "lzip"))
705
0
    *content_encoding = wget_content_encoding_lzip;
706
0
  else
707
0
    *content_encoding = wget_content_encoding_identity;
708
709
0
  while (wget_http_istoken(*s)) s++;
710
711
0
  return s;
712
0
}
713
714
const char *wget_http_parse_connection(const char *s, bool *keep_alive)
715
0
{
716
0
  const char *e;
717
718
0
  *keep_alive = false;
719
720
0
  for (e = s; *e; s = e + 1) {
721
0
    if ((e = strchrnul(s, ',')) != s) {
722
0
      while (c_isblank(*s)) s++;
723
724
0
      if (!wget_strncasecmp_ascii(s, "keep-alive", 10))
725
0
        *keep_alive = true;
726
0
    }
727
0
  }
728
729
0
  return s;
730
0
}
731
732
const char *wget_http_parse_etag(const char *s, const char **etag)
733
0
{
734
0
  const char *p;
735
736
0
  while (c_isblank(*s)) s++;
737
738
0
  for (p = s; *s && !c_isblank(*s); s++);
739
0
  *etag = wget_strmemdup(p, s - p);
740
741
0
  return s;
742
0
}
743
744
/*
745
// returns GMT/UTC time as an integer of format YYYYMMDDHHMMSS
746
// this makes us independent from size of time_t - work around possible year 2038 problems
747
static long long NONNULL_ALL parse_rfc1123_date(const char *s)
748
{
749
  // we simply can't use strptime() since it requires us to setlocale()
750
  // which is not thread-safe !!!
751
  static const char *mnames[12] = {
752
    "Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
753
  };
754
  static int days_per_month[12] = {
755
    31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
756
  };
757
  int day, mon = 0, year, hour, min, sec, leap, it;
758
  char mname[4] = "";
759
760
  if (sscanf(s, " %*[a-zA-Z], %02d %3s %4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) >= 6) {
761
    // RFC 822 / 1123: Wed, 09 Jun 2021 10:18:14 GMT
762
  }
763
  else if (sscanf(s, " %*[a-zA-Z], %2d-%3s-%4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) >= 6) {
764
    // RFC 850 / 1036 or Netscape: Wednesday, 09-Jun-21 10:18:14 or Wed, 09-Jun-2021 10:18:14
765
  }
766
  else if (sscanf(s, " %*[a-zA-Z], %3s %2d %2d:%2d:%2d %4d", mname, &day, &hour, &min, &sec, &year) >= 6) {
767
    // ANSI C's asctime(): Wed Jun 09 10:18:14 2021
768
  } else {
769
    error_printf(_("Failed to parse date '%s'\n"), s);
770
    return 0; // return as session cookie
771
  }
772
773
  if (*mname) {
774
    for (it = 0; it < countof(mnames); it++) {
775
      if (!wget_strcasecmp_ascii(mname, mnames[it])) {
776
        mon = it + 1;
777
        break;
778
      }
779
    }
780
  }
781
782
  if (year < 70 && year >= 0) year += 2000;
783
  else if (year >= 70 && year <= 99) year += 1900;
784
785
  if (mon == 2 && year % 4 == 0 && (year % 100 != 0 || year % 400 == 0))
786
    leap = 1;
787
  else
788
    leap = 0;
789
790
  // we don't handle leap seconds
791
792
  if (year < 1601 || mon < 1 || mon > 12 || day < 1 || (day > days_per_month[mon - 1] + leap) ||
793
    hour < 0 || hour > 23 || min < 0 || min > 60 || sec < 0 || sec > 60)
794
  {
795
    error_printf(_("Failed to parse date '%s'\n"), s);
796
    return 0; // return as session cookie
797
  }
798
799
  return(((((long long)year*100 + mon)*100 + day)*100 + hour)*100 + min)*100 + sec;
800
}
801
*/
802
803
// copied this routine from
804
// https://ftp.netbsd.org/pub/pkgsrc/current/pkgsrc/pkgtools/libnbcompat/files/timegm.c
805
806
static int leap_days(int y1, int y2)
807
0
{
808
0
  y1--;
809
0
  y2--;
810
0
  return (y2/4 - y1/4) - (y2/100 - y1/100) + (y2/400 - y1/400);
811
0
}
812
813
/*
814
RFC 2616, 3.3.1 Full Date
815
HTTP-date    = rfc1123-date | rfc850-date | asctime-date
816
rfc1123-date = wkday "," SP date1 SP time SP "GMT"
817
rfc850-date  = weekday "," SP date2 SP time SP "GMT"
818
asctime-date = wkday SP date3 SP time SP 4DIGIT
819
date1        = 2DIGIT SP month SP 4DIGIT
820
          ; day month year (e.g., 02 Jun 1982)
821
date2        = 2DIGIT "-" month "-" 2DIGIT
822
          ; day-month-year (e.g., 02-Jun-82)
823
date3        = month SP ( 2DIGIT | ( SP 1DIGIT ))
824
          ; month day (e.g., Jun  2)
825
time         = 2DIGIT ":" 2DIGIT ":" 2DIGIT
826
          ; 00:00:00 - 23:59:59
827
wkday        = "Mon" | "Tue" | "Wed"
828
         | "Thu" | "Fri" | "Sat" | "Sun"
829
weekday      = "Monday" | "Tuesday" | "Wednesday"
830
         | "Thursday" | "Friday" | "Saturday" | "Sunday"
831
month        = "Jan" | "Feb" | "Mar" | "Apr"
832
         | "May" | "Jun" | "Jul" | "Aug"
833
         | "Sep" | "Oct" | "Nov" | "Dec"
834
*/
835
836
int64_t wget_http_parse_full_date(const char *s)
837
0
{
838
  // we simply can't use strptime() since it requires us to setlocale()
839
  // which is not thread-safe !!!
840
0
  static const char *mnames[12] = {
841
0
    "Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
842
0
  };
843
0
  static int days_per_month[12] = {
844
0
    31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
845
0
  };
846
  // cumulated number of days until beginning of month for non-leap years
847
0
  static const int sum_of_days[12] = {
848
0
    0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334
849
0
  };
850
851
0
  int day, mon = 0, year, hour, min, sec, leap_month, leap_year, days;
852
0
  char mname[4] = "";
853
854
0
  if (sscanf(s, " %*[a-zA-Z], %2d %3s %4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) == 6) {
855
    // RFC 822 / 1123: Wed, 09 Jun 2021 10:18:14 GMT
856
0
  } else if (sscanf(s, " %*[a-zA-Z], %2d-%3s-%4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) == 6) {
857
    // RFC 850 / 1036 or Netscape: Wednesday, 09-Jun-21 10:18:14 or Wed, 09-Jun-2021 10:18:14
858
0
  } else if (sscanf(s, " %*[a-zA-Z] %3s %2d %2d:%2d:%2d %4d", mname, &day, &hour, &min, &sec, &year) == 6) {
859
    // ANSI C's asctime(): Wed Jun 09 10:18:14 2021
860
0
  } else if (sscanf(s, " %d %3s %4d %2d:%2d:%2d", &day, mname, &year, &hour, &min, &sec) == 6) {
861
    // non-standard: 1 Mar 2027 09:23:12 GMT
862
0
  } else if (sscanf(s, " %*s %3s %2d %4d %2d:%2d:%2d", mname, &day, &year, &hour, &min, &sec) == 6) {
863
    // non-standard: Sun Nov 26 2023 21:24:47
864
0
  } else {
865
0
    error_printf(_("Failed to parse date '%s'\n"), s);
866
0
    return 0; // return as session cookie
867
0
  }
868
869
0
  if (*mname) {
870
0
    for (unsigned it = 0; it < countof(mnames); it++) {
871
0
      if (!wget_strcasecmp_ascii(mname, mnames[it])) {
872
0
        mon = it + 1;
873
0
        break;
874
0
      }
875
0
    }
876
0
  }
877
878
0
  if (year < 70 && year >= 0) year += 2000;
879
0
  else if (year >= 70 && year <= 99) year += 1900;
880
0
  if (year < 1970) year = 1970;
881
882
  // we don't handle leap seconds
883
884
0
  leap_year = year % 4 == 0 && (year % 100 != 0 || year % 400 == 0);
885
0
  leap_month = (mon == 2 && leap_year);
886
887
0
  if (mon < 1 || mon > 12 || day < 1 || (day > days_per_month[mon - 1] + leap_month) ||
888
0
    hour < 0 || hour > 23 || min < 0 || min > 60 || sec < 0 || sec > 60)
889
0
  {
890
0
    error_printf(_("Failed to parse date '%s'\n"), s);
891
0
    return 0; // return as session cookie
892
0
  }
893
894
  // calculate time_t (represented as int64_t) from GMT/UTC time values
895
896
0
  days = 365 * (year - 1970) + leap_days(1970, year);
897
0
  days += sum_of_days[mon - 1] + (mon > 2 && leap_year);
898
0
  days += day - 1;
899
900
0
  return (((int64_t)days * 24 + hour) * 60 + min) * 60 + sec;
901
0
}
902
903
char *wget_http_print_date(int64_t t, char *buf, size_t bufsize)
904
0
{
905
0
  static const char *dnames[7] = {
906
0
    "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
907
0
  };
908
0
  static const char *mnames[12] = {
909
0
    "Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
910
0
  };
911
0
  struct tm tm;
912
0
  time_t tt;
913
914
0
  if (!bufsize)
915
0
    return buf;
916
917
0
#if __LP64__ == 1
918
0
  tt = (time_t) t; // 64bit time_t
919
#else
920
  // 32bit time_t
921
  if (t > 2147483647)
922
    tt = 2147483647;
923
  else
924
    tt = (time_t) t;
925
#endif
926
927
0
  if (gmtime_r(&tt, &tm)) {
928
0
    wget_snprintf(buf, bufsize, "%s, %02d %s %d %02d:%02d:%02d GMT",
929
0
      dnames[tm.tm_wday],tm.tm_mday,mnames[tm.tm_mon],tm.tm_year+1900,
930
0
      tm.tm_hour, tm.tm_min, tm.tm_sec);
931
0
  } else
932
0
    *buf = 0;
933
934
0
  return buf;
935
0
}
936
937
// adjust time (t) by number of seconds (n)
938
/*
939
static long long adjust_time(long long t, int n)
940
{
941
  static int days_per_month[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
942
  int day, mon, year, hour, min, sec, leap;
943
944
  sec = t % 100;
945
  min = (t /= 100) % 100;
946
  hour = (t /= 100) % 100;
947
  day = (t /= 100) % 100;
948
  mon = (t /= 100) % 100;
949
  year = t / 100;
950
951
  sec += n;
952
953
  if (n >= 0) {
954
    if (sec >= 60) {
955
      min += sec / 60;
956
      sec %= 60;
957
    }
958
    if (min >= 60) {
959
      hour += min / 60;
960
      min %= 60;
961
    }
962
    if (hour >= 24) {
963
      day += hour / 24;
964
      hour %= 24;
965
    }
966
    while (1) {
967
      if (mon == 2 && year % 4 == 0 && (year % 100 != 0 || year % 400 == 0))
968
        leap = 1;
969
      else
970
        leap = 0;
971
      if (day > days_per_month[mon - 1] + leap) {
972
        day -= (days_per_month[mon - 1] + leap);
973
        mon++;
974
        if (mon > 12) {
975
          mon = 1;
976
          year++;
977
        }
978
      } else break;
979
    }
980
  } else { // n<0
981
    if (sec < 0) {
982
      min += (sec - 59) / 60;
983
      sec = 59 + (sec + 1) % 60;
984
    }
985
    if (min < 0) {
986
      hour += (min - 59) / 60;
987
      min = 59 + (min + 1) % 60;
988
    }
989
    if (hour < 0) {
990
      day += (hour - 23) / 24;
991
      hour = 23 + (hour + 1) % 24;
992
    }
993
    for (;;) {
994
      if (day <= 0) {
995
        if (--mon < 1) {
996
          mon = 12;
997
          year--;
998
        }
999
        if (mon == 2 && year % 4 == 0 && (year % 100 != 0 || year % 400 == 0))
1000
          leap = 1;
1001
        else
1002
          leap = 0;
1003
        day += (days_per_month[mon - 1] + leap);
1004
      } else break;
1005
    }
1006
  }
1007
1008
  return (((((long long)year*100 + mon)*100 + day)*100 + hour)*100 + min)*100 + sec;
1009
}
1010
1011
// return current GMT/UTC
1012
1013
static int64_t get_current_time(void)
1014
{
1015
  int64_t t = time(NULL);
1016
  struct tm tm;
1017
1018
  gmtime_r(&t, &tm);
1019
1020
  return (((((int64_t)(tm.tm_year + 1900)*100 + tm.tm_mon + 1)*100 + tm.tm_mday)*100 + tm.tm_hour)*100 + tm.tm_min)*100 + tm.tm_sec;
1021
}
1022
*/
1023
1024
/*
1025
 RFC 6265
1026
1027
 set-cookie-header = "Set-Cookie:" SP set-cookie-string
1028
 set-cookie-string = cookie-pair *( ";" SP cookie-av )
1029
 cookie-pair       = cookie-name "=" cookie-value
1030
 cookie-name       = token
1031
 cookie-value      = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE )
1032
 cookie-octet      = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E
1033
                       ; US-ASCII characters excluding CTLs,
1034
                       ; whitespace DQUOTE, comma, semicolon,
1035
                       ; and backslash
1036
 token             = <token, defined in [RFC2616], Section 2.2>
1037
1038
 cookie-av         = expires-av / max-age-av / domain-av /
1039
                     path-av / secure-av / httponly-av /
1040
                     extension-av
1041
 expires-av        = "Expires=" sane-cookie-date
1042
 sane-cookie-date  = <rfc1123-date, defined in [RFC2616], Section 3.3.1>
1043
 max-age-av        = "Max-Age=" non-zero-digit *DIGIT
1044
                       ; In practice, both expires-av and max-age-av
1045
                       ; are limited to dates representable by the
1046
                       ; user agent.
1047
 non-zero-digit    = %x31-39
1048
                       ; digits 1 through 9
1049
 domain-av         = "Domain=" domain-value
1050
 domain-value      = <subdomain>
1051
                       ; defined in [RFC1034], Section 3.5, as
1052
                       ; enhanced by [RFC1123], Section 2.1
1053
 path-av           = "Path=" path-value
1054
 path-value        = <any CHAR except CTLs or ";">
1055
 secure-av         = "Secure"
1056
 httponly-av       = "HttpOnly"
1057
 extension-av      = <any CHAR except CTLs or ";">
1058
*/
1059
const char *wget_http_parse_setcookie(const char *s, wget_cookie **cookie)
1060
0
{
1061
0
  return wget_cookie_parse_setcookie(s, cookie);
1062
0
}
1063
1064
static void cookie_free(void *cookie)
1065
0
{
1066
0
  if (cookie)
1067
0
    wget_cookie_free((wget_cookie **) &cookie);
1068
0
}
1069
1070
int wget_http_parse_header_line(wget_http_response *resp, const char *name, size_t namelen, const char *value, size_t valuelen)
1071
0
{
1072
0
  if (!name || !value)
1073
0
    return WGET_E_INVALID;
1074
1075
0
  char valuebuf[256];
1076
0
  char *value0;
1077
0
  int ret = WGET_E_SUCCESS;
1078
1079
0
  value0 = wget_strmemcpy_a(valuebuf, sizeof(valuebuf), value, valuelen);
1080
0
  if (!value0)
1081
0
    return WGET_E_MEMORY;
1082
1083
0
  switch (*name | 0x20) {
1084
0
  case ':':
1085
0
    if (!memcmp(name, ":status", namelen) && valuelen == 3) {
1086
0
      resp->code = ((value[0] - '0') * 10 + (value[1] - '0')) * 10 + (value[2] - '0');
1087
0
    } else
1088
0
      ret = WGET_E_UNKNOWN;
1089
0
    break;
1090
0
  case 'c':
1091
0
    if (!wget_strncasecmp_ascii(name, "content-encoding", namelen)) {
1092
0
      wget_http_parse_content_encoding(value0, &resp->content_encoding);
1093
0
    } else if (!wget_strncasecmp_ascii(name, "content-type", namelen)) {
1094
0
      if (!resp->content_type && !resp->content_type_encoding)
1095
0
        wget_http_parse_content_type(value0, &resp->content_type, &resp->content_type_encoding);
1096
0
    } else if (!wget_strncasecmp_ascii(name, "content-length", namelen)) {
1097
0
      resp->content_length = (size_t)atoll(value0);
1098
0
      resp->content_length_valid = 1;
1099
0
    } else if (!wget_strncasecmp_ascii(name, "content-disposition", namelen)) {
1100
0
      if (!resp->content_filename)
1101
0
        wget_http_parse_content_disposition(value0, &resp->content_filename);
1102
0
    } else if (!wget_strncasecmp_ascii(name, "connection", namelen)) {
1103
0
      wget_http_parse_connection(value0, &resp->keep_alive);
1104
0
    } else if (!wget_strncasecmp_ascii(name, "Content-Security-Policy", namelen)) {
1105
0
      resp->csp = 1;
1106
0
    } else
1107
0
      ret = WGET_E_UNKNOWN;
1108
0
    break;
1109
0
  case 'd':
1110
0
    if (!wget_strncasecmp_ascii(name, "digest", namelen)) {
1111
      // https://tools.ietf.org/html/rfc3230
1112
0
      wget_http_digest digest;
1113
0
      wget_http_parse_digest(value0, &digest);
1114
      // debug_printf("%s: %s\n",digest.algorithm,digest.encoded_digest);
1115
0
      if (!resp->digests) {
1116
0
        resp->digests = wget_vector_create(4, NULL);
1117
0
        wget_vector_set_destructor(resp->digests, (wget_vector_destructor *) wget_http_free_digest);
1118
0
      }
1119
0
      wget_vector_add_memdup(resp->digests, &digest, sizeof(digest));
1120
0
    } else
1121
0
      ret = WGET_E_UNKNOWN;
1122
0
    break;
1123
0
  case 'e':
1124
0
    if (!wget_strncasecmp_ascii(name, "etag", namelen)) {
1125
0
      if (!resp->etag)
1126
0
        wget_http_parse_etag(value0, &resp->etag);
1127
0
    } else
1128
0
      ret = WGET_E_UNKNOWN;
1129
0
    break;
1130
0
  case 'i':
1131
0
    if (!wget_strncasecmp_ascii(name, "icy-metaint", namelen)) {
1132
0
      resp->icy_metaint = atoi(value0);
1133
0
    } else
1134
0
      ret = WGET_E_UNKNOWN;
1135
0
    break;
1136
0
  case 'l':
1137
0
    if (!wget_strncasecmp_ascii(name, "last-modified", namelen)) {
1138
      // Last-Modified: Thu, 07 Feb 2008 15:03:24 GMT
1139
0
      resp->last_modified = wget_http_parse_full_date(value0);
1140
0
    } else if (resp->code / 100 == 3 && !wget_strncasecmp_ascii(name, "location", namelen)) {
1141
0
      if (!resp->location)
1142
0
        wget_http_parse_location(value0, &resp->location);
1143
0
    } else if (resp->code / 100 == 3 && !wget_strncasecmp_ascii(name, "link", namelen)) {
1144
      // debug_printf("s=%.31s\n",s);
1145
0
      wget_http_link link;
1146
0
      wget_http_parse_link(value0, &link);
1147
      // debug_printf("link->uri=%s\n",link.uri);
1148
0
      if (!resp->links) {
1149
0
        resp->links = wget_vector_create(8, NULL);
1150
0
        wget_vector_set_destructor(resp->links, (wget_vector_destructor *) wget_http_free_link);
1151
0
      }
1152
0
      wget_vector_add_memdup(resp->links, &link, sizeof(link));
1153
0
    } else
1154
0
      ret = WGET_E_UNKNOWN;
1155
0
    break;
1156
0
  case 'p':
1157
0
    if (!wget_strncasecmp_ascii(name, "public-key-pins", namelen)) {
1158
0
      if (!resp->hpkp) {
1159
0
        resp->hpkp = wget_hpkp_new();
1160
0
        wget_http_parse_public_key_pins(value0, resp->hpkp);
1161
0
        debug_printf("new host pubkey pinnings added to hpkp db\n");
1162
0
      }
1163
0
    }
1164
0
    else if (!wget_strncasecmp_ascii(name, "proxy-authenticate", namelen)) {
1165
0
      wget_http_challenge *challenge = wget_malloc(sizeof(wget_http_challenge));
1166
1167
0
      if (!challenge) {
1168
0
        ret = WGET_E_MEMORY;
1169
0
        goto out;
1170
0
      }
1171
1172
0
      wget_http_parse_challenge(value0, challenge);
1173
1174
0
      if (!resp->challenges) {
1175
0
        resp->challenges = wget_vector_create(2, NULL);
1176
0
        wget_vector_set_destructor(resp->challenges, (wget_vector_destructor *) wget_http_free_challenge);
1177
0
      }
1178
0
      wget_vector_add(resp->challenges, challenge);
1179
0
    } else
1180
0
      ret = WGET_E_UNKNOWN;
1181
0
    break;
1182
0
  case 's':
1183
0
    if (!wget_strncasecmp_ascii(name, "set-cookie", namelen)) {
1184
      // this is a parser. content validation must be done by higher level functions.
1185
0
      wget_cookie *cookie;
1186
0
      wget_http_parse_setcookie(value0, &cookie);
1187
1188
0
      if (cookie) {
1189
0
        if (!resp->cookies) {
1190
0
          resp->cookies = wget_vector_create(4, NULL);
1191
0
          wget_vector_set_destructor(resp->cookies, cookie_free);
1192
0
        }
1193
0
        wget_vector_add(resp->cookies, cookie);
1194
0
      }
1195
0
    }
1196
0
    else if (!wget_strncasecmp_ascii(name, "strict-transport-security", namelen)) {
1197
0
      resp->hsts = 1;
1198
0
      wget_http_parse_strict_transport_security(value0, &resp->hsts_maxage, &resp->hsts_include_subdomains);
1199
0
    } else
1200
0
      ret = WGET_E_UNKNOWN;
1201
0
    break;
1202
0
  case 't':
1203
0
    if (!wget_strncasecmp_ascii(name, "transfer-encoding", namelen)) {
1204
0
      wget_http_parse_transfer_encoding(value0, &resp->transfer_encoding);
1205
0
    } else
1206
0
      ret = WGET_E_UNKNOWN;
1207
0
    break;
1208
0
  case 'w':
1209
0
    if (!wget_strncasecmp_ascii(name, "www-authenticate", namelen)) {
1210
0
      wget_http_challenge *challenge = wget_malloc(sizeof(wget_http_challenge));
1211
1212
0
      if (!challenge) {
1213
0
        ret = WGET_E_MEMORY;
1214
0
        goto out;
1215
0
      }
1216
1217
0
      wget_http_parse_challenge(value0, challenge);
1218
1219
0
      if (!resp->challenges) {
1220
0
        resp->challenges = wget_vector_create(2, NULL);
1221
0
        wget_vector_set_destructor(resp->challenges, (wget_vector_destructor *) wget_http_free_challenge);
1222
0
      }
1223
0
      wget_vector_add(resp->challenges, challenge);
1224
0
    } else
1225
0
      ret = WGET_E_UNKNOWN;
1226
0
    break;
1227
0
  case 'x':
1228
0
    if (!wget_strncasecmp_ascii(name, "x-archive-orig-last-modified", namelen)) {
1229
0
      resp->last_modified = wget_http_parse_full_date(value0);
1230
0
    } else
1231
0
      ret = WGET_E_UNKNOWN;
1232
0
    break;
1233
0
  default:
1234
0
    ret = WGET_E_UNKNOWN;
1235
0
    break;
1236
0
  }
1237
1238
0
out:
1239
0
  if (value0 != valuebuf)
1240
0
    xfree(value0);
1241
1242
0
  return ret;
1243
0
}
1244
1245
/* content of <buf> will be destroyed */
1246
/* buf must be 0-terminated */
1247
wget_http_response *wget_http_parse_response_header(char *buf)
1248
0
{
1249
0
  char *eol;
1250
1251
0
  wget_http_response *resp = wget_calloc(1, sizeof(wget_http_response));
1252
0
  if (!resp)
1253
0
    return NULL;
1254
1255
0
  if (sscanf(buf, " HTTP/%3hd.%3hd %3hd %31[^\r\n] ",
1256
0
    &resp->major, &resp->minor, &resp->code, resp->reason) >= 3) {
1257
0
    if ((eol = strchr(buf + 10, '\n'))) {
1258
      // eol[-1]=0;
1259
      // debug_printf("# %s\n",buf);
1260
0
    } else {
1261
      // empty HTTP header
1262
0
      return resp;
1263
0
    }
1264
0
  } else if (sscanf(buf, " ICY %3hd %31[^\r\n] ", &resp->code, resp->reason) >= 1) {
1265
0
    if ((eol = strchr(buf + 4, '\n'))) {
1266
      // eol[-1]=0;
1267
      // debug_printf("# %s\n",buf);
1268
0
    } else {
1269
      // empty HTTP header
1270
0
      return resp;
1271
0
    }
1272
0
  } else {
1273
0
    error_printf(_("HTTP response header not found\n"));
1274
0
    xfree(resp);
1275
0
    return NULL;
1276
0
  }
1277
1278
  // 'close' is default on HTTP/1.0, else 'keep_alive' is default
1279
0
  if ((resp->major == 1 && resp->minor >= 1) || resp->major > 1)
1280
0
    resp->keep_alive = 1;
1281
1282
0
  for (char *line = eol + 1; eol && *line && *line != '\r' && *line != '\n'; line = eol ? eol + 1 : NULL) {
1283
0
    eol = strchr(line, '\n');
1284
0
    while (eol && c_isblank(eol[1])) { // handle split lines
1285
0
      *eol = eol[-1] = ' ';
1286
0
      eol = strchr(eol, '\n');
1287
0
    }
1288
1289
0
    if (eol) {
1290
0
      if (eol[-1] == '\r')
1291
0
        eol[-1] = 0;
1292
0
      else
1293
0
        *eol = 0;
1294
0
    }
1295
1296
0
    size_t namelen, valuelen;
1297
0
    const char *name;
1298
0
    const char *value = wget_parse_name_fixed(line, &name, &namelen);
1299
    // value now points directly after :
1300
1301
0
    if (eol)
1302
0
      valuelen = eol - value - (eol[-1] == 0);
1303
0
    else
1304
0
      valuelen = strlen(value);
1305
1306
0
    wget_http_parse_header_line(resp, name, namelen, value, valuelen);
1307
0
  }
1308
1309
0
  return resp;
1310
0
}
1311
1312
void wget_http_free_param(wget_http_header_param *param)
1313
5.80k
{
1314
5.80k
  xfree(param->name);
1315
5.80k
  xfree(param->value);
1316
5.80k
  xfree(param);
1317
5.80k
}
1318
1319
void wget_http_free_link(wget_http_link *link)
1320
0
{
1321
0
  xfree(link->uri);
1322
0
  xfree(link->type);
1323
0
  xfree(link);
1324
0
}
1325
1326
void wget_http_free_links(wget_vector **links)
1327
0
{
1328
0
  wget_vector_free(links);
1329
0
}
1330
1331
void wget_http_free_digest(wget_http_digest *digest)
1332
0
{
1333
0
  xfree(digest->algorithm);
1334
0
  xfree(digest->encoded_digest);
1335
0
  xfree(digest);
1336
0
}
1337
1338
void wget_http_free_digests(wget_vector **digests)
1339
0
{
1340
0
  wget_vector_free(digests);
1341
0
}
1342
1343
void wget_http_free_challenge(wget_http_challenge *challenge)
1344
4.78k
{
1345
4.78k
  xfree(challenge->auth_scheme);
1346
4.78k
  wget_stringmap_free(&challenge->params);
1347
4.78k
  xfree(challenge);
1348
4.78k
}
1349
1350
void wget_http_free_challenges(wget_vector **challenges)
1351
0
{
1352
0
  wget_vector_free(challenges);
1353
0
}
1354
1355
void wget_http_free_cookies(wget_vector **cookies)
1356
0
{
1357
0
  wget_vector_free(cookies);
1358
0
}
1359
1360
void wget_http_free_hpkp_entries(wget_hpkp **hpkp)
1361
0
{
1362
0
  if (hpkp) {
1363
0
    wget_hpkp_free(*hpkp);
1364
0
    *hpkp = NULL;
1365
0
  }
1366
0
}
1367
1368
void wget_http_free_response(wget_http_response **resp)
1369
0
{
1370
0
  if (resp && *resp) {
1371
0
    wget_http_free_links(&(*resp)->links);
1372
0
    wget_http_free_digests(&(*resp)->digests);
1373
0
    wget_http_free_challenges(&(*resp)->challenges);
1374
0
    wget_http_free_cookies(&(*resp)->cookies);
1375
0
    wget_http_free_hpkp_entries(&(*resp)->hpkp);
1376
0
    xfree((*resp)->content_type);
1377
0
    xfree((*resp)->content_type_encoding);
1378
0
    xfree((*resp)->content_filename);
1379
0
    xfree((*resp)->location);
1380
0
    xfree((*resp)->etag);
1381
    // xfree((*resp)->reason);
1382
0
    wget_buffer_free(&(*resp)->header);
1383
0
    wget_buffer_free(&(*resp)->body);
1384
0
    xfree(*resp);
1385
0
  }
1386
0
}
1387
1388
/* for security reasons: set all freed pointers to NULL */
1389
void wget_http_free_request(wget_http_request **req)
1390
0
{
1391
0
  if (req && *req) {
1392
0
    wget_buffer_deinit(&(*req)->esc_resource);
1393
0
    wget_buffer_deinit(&(*req)->esc_host);
1394
0
    wget_vector_free(&(*req)->headers);
1395
0
    xfree((*req)->body);
1396
0
    xfree(*req);
1397
0
  }
1398
0
}