Coverage Report

Created: 2025-08-26 06:59

/src/bind9/lib/isc/url.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3
 *
4
 * SPDX-License-Identifier: MPL-2.0 and MIT
5
 *
6
 * This Source Code Form is subject to the terms of the Mozilla Public
7
 * License, v. 2.0. If a copy of the MPL was not distributed with this
8
 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
9
 *
10
 * See the COPYRIGHT file distributed with this work for additional
11
 * information regarding copyright ownership.
12
 */
13
14
/*
15
 * Copyright Joyent, Inc. and other Node contributors. All rights reserved.
16
 *
17
 * Permission is hereby granted, free of charge, to any person obtaining a copy
18
 * of this software and associated documentation files (the "Software"), to
19
 * deal in the Software without restriction, including without limitation the
20
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
21
 * sell copies of the Software, and to permit persons to whom the Software is
22
 * furnished to do so, subject to the following conditions:
23
 *
24
 * The above copyright notice and this permission notice shall be included in
25
 * all copies or substantial portions of the Software.
26
 *
27
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
31
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
32
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
33
 * IN THE SOFTWARE.
34
 */
35
36
#include <ctype.h>
37
#include <limits.h>
38
#include <stddef.h>
39
#include <string.h>
40
41
#include <isc/url.h>
42
#include <isc/util.h>
43
44
#ifndef BIT_AT
45
#define BIT_AT(a, i)                                    \
46
0
  (!!((unsigned int)(a)[(unsigned int)(i) >> 3] & \
47
0
      (1 << ((unsigned int)(i) & 7))))
48
#endif
49
50
#if HTTP_PARSER_STRICT
51
#define T(v) 0
52
#else
53
#define T(v) v
54
#endif
55
56
static const uint8_t normal_url_char[32] = {
57
  /*   0 nul  1 soh  2 stx  3 etx  4 eot  5 enq  6 ack  7 bel  */
58
  0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
59
  /*   8 bs   9 ht  10 nl  11 vt  12 np  13 cr  14 so  15 si */
60
  0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
61
  /*  16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
62
  0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
63
  /*  24 can 25 em  26 sub 27 esc 28 fs  29 gs  30 rs  31 us */
64
  0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
65
  /*  32 sp  33  !  34  "  35  #  36  $  37  %  38  &  39  ' */
66
  0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
67
  /*  40  (  41  )  42  *  43  +  44  ,  45  -  46  .  47  / */
68
  1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
69
  /*  48  0  49  1  50  2  51  3  52  4  53  5  54  6  55  7 */
70
  1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
71
  /*  56  8  57  9  58  :  59  ;  60  <  61  =  62  >  63  ?  */
72
  1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
73
  /*  64  @  65  A  66  B  67  C  68  D  69  E  70  F  71  G */
74
  1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
75
  /*  72  H  73  I  74  J  75  K  76  L  77  M  78  N  79  O */
76
  1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
77
  /*  80  P  81  Q  82  R  83  S  84  T  85  U  86  V  87  W */
78
  1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
79
  /*  88  X  89  Y  90  Z  91  [  92  \  93  ]  94  ^  95  _ */
80
  1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
81
  /*  96  `  97  a  98  b  99  c 100  d 101  e 102  f 103  g */
82
  1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
83
  /* 104  h 105  i 106  j 107  k 108  l 109  m 110  n 111  o */
84
  1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
85
  /* 112  p 113  q 114  r 115  s 116  t 117  u 118  v 119  w */
86
  1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
87
  /* 120  x 121  y 122  z 123  { 124  | 125  } 126  ~ 127 del */
88
  1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
89
};
90
91
#undef T
92
93
typedef enum {
94
  s_dead = 1, /* important that this is > 0 */
95
96
  s_start_req_or_res,
97
  s_res_or_resp_H,
98
  s_start_res,
99
  s_res_H,
100
  s_res_HT,
101
  s_res_HTT,
102
  s_res_HTTP,
103
  s_res_http_major,
104
  s_res_http_dot,
105
  s_res_http_minor,
106
  s_res_http_end,
107
  s_res_first_status_code,
108
  s_res_status_code,
109
  s_res_status_start,
110
  s_res_status,
111
  s_res_line_almost_done,
112
113
  s_start_req,
114
115
  s_req_method,
116
  s_req_spaces_before_url,
117
  s_req_schema,
118
  s_req_schema_slash,
119
  s_req_schema_slash_slash,
120
  s_req_server_start,
121
  s_req_server,
122
  s_req_server_with_at,
123
  s_req_path,
124
  s_req_query_string_start,
125
  s_req_query_string,
126
  s_req_fragment_start,
127
  s_req_fragment,
128
  s_req_http_start,
129
  s_req_http_H,
130
  s_req_http_HT,
131
  s_req_http_HTT,
132
  s_req_http_HTTP,
133
  s_req_http_I,
134
  s_req_http_IC,
135
  s_req_http_major,
136
  s_req_http_dot,
137
  s_req_http_minor,
138
  s_req_http_end,
139
  s_req_line_almost_done,
140
141
  s_header_field_start,
142
  s_header_field,
143
  s_header_value_discard_ws,
144
  s_header_value_discard_ws_almost_done,
145
  s_header_value_discard_lws,
146
  s_header_value_start,
147
  s_header_value,
148
  s_header_value_lws,
149
150
  s_header_almost_done,
151
152
  s_chunk_size_start,
153
  s_chunk_size,
154
  s_chunk_parameters,
155
  s_chunk_size_almost_done,
156
157
  s_headers_almost_done,
158
  s_headers_done,
159
160
  /*
161
   * Important: 's_headers_done' must be the last 'header' state. All
162
   * states beyond this must be 'body' states. It is used for overflow
163
   * checking. See the PARSING_HEADER() macro.
164
   */
165
166
  s_chunk_data,
167
  s_chunk_data_almost_done,
168
  s_chunk_data_done,
169
170
  s_body_identity,
171
  s_body_identity_eof,
172
173
  s_message_done
174
} state_t;
175
176
typedef enum {
177
  s_http_host_dead = 1,
178
  s_http_userinfo_start,
179
  s_http_userinfo,
180
  s_http_host_start,
181
  s_http_host_v6_start,
182
  s_http_host,
183
  s_http_host_v6,
184
  s_http_host_v6_end,
185
  s_http_host_v6_zone_start,
186
  s_http_host_v6_zone,
187
  s_http_host_port_start,
188
  s_http_host_port
189
} host_state_t;
190
191
/* Macros for character classes; depends on strict-mode  */
192
#define IS_MARK(c)                                                             \
193
0
  ((c) == '-' || (c) == '_' || (c) == '.' || (c) == '!' || (c) == '~' || \
194
0
   (c) == '*' || (c) == '\'' || (c) == '(' || (c) == ')')
195
#define IS_USERINFO_CHAR(c)                                                    \
196
0
  (isalnum((unsigned char)c) || IS_MARK(c) || (c) == '%' ||              \
197
0
   (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
198
0
   (c) == '$' || (c) == ',')
199
200
#if HTTP_PARSER_STRICT
201
0
#define IS_URL_CHAR(c)  (BIT_AT(normal_url_char, (unsigned char)c))
202
0
#define IS_HOST_CHAR(c) (isalnum((unsigned char)c) || (c) == '.' || (c) == '-')
203
#else
204
#define IS_URL_CHAR(c) \
205
  (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
206
#define IS_HOST_CHAR(c) \
207
  (isalnum((unsigned char)c) || (c) == '.' || (c) == '-' || (c) == '_')
208
#endif
209
210
/*
211
 * Our URL parser.
212
 *
213
 * This is designed to be shared by http_parser_execute() for URL validation,
214
 * hence it has a state transition + byte-for-byte interface. In addition, it
215
 * is meant to be embedded in http_parser_parse_url(), which does the dirty
216
 * work of turning state transitions URL components for its API.
217
 *
218
 * This function should only be invoked with non-space characters. It is
219
 * assumed that the caller cares about (and can detect) the transition between
220
 * URL and non-URL states by looking for these.
221
 */
222
static state_t
223
0
parse_url_char(state_t s, const char ch) {
224
0
  if (ch == ' ' || ch == '\r' || ch == '\n') {
225
0
    return s_dead;
226
0
  }
227
228
0
#if HTTP_PARSER_STRICT
229
0
  if (ch == '\t' || ch == '\f') {
230
0
    return s_dead;
231
0
  }
232
0
#endif
233
234
0
  switch (s) {
235
0
  case s_req_spaces_before_url:
236
    /* Proxied requests are followed by scheme of an absolute URI
237
     * (alpha). All methods except CONNECT are followed by '/' or
238
     * '*'.
239
     */
240
241
0
    if (ch == '/' || ch == '*') {
242
0
      return s_req_path;
243
0
    }
244
245
0
    if (isalpha((unsigned char)ch)) {
246
0
      return s_req_schema;
247
0
    }
248
249
0
    break;
250
251
0
  case s_req_schema:
252
0
    if (isalpha((unsigned char)ch)) {
253
0
      return s;
254
0
    }
255
256
0
    if (ch == ':') {
257
0
      return s_req_schema_slash;
258
0
    }
259
260
0
    break;
261
262
0
  case s_req_schema_slash:
263
0
    if (ch == '/') {
264
0
      return s_req_schema_slash_slash;
265
0
    }
266
267
0
    break;
268
269
0
  case s_req_schema_slash_slash:
270
0
    if (ch == '/') {
271
0
      return s_req_server_start;
272
0
    }
273
274
0
    break;
275
276
0
  case s_req_server_with_at:
277
0
    if (ch == '@') {
278
0
      return s_dead;
279
0
    }
280
281
0
    FALLTHROUGH;
282
0
  case s_req_server_start:
283
0
  case s_req_server:
284
0
    if (ch == '/') {
285
0
      return s_req_path;
286
0
    }
287
288
0
    if (ch == '?') {
289
0
      return s_req_query_string_start;
290
0
    }
291
292
0
    if (ch == '@') {
293
0
      return s_req_server_with_at;
294
0
    }
295
296
0
    if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
297
0
      return s_req_server;
298
0
    }
299
300
0
    break;
301
302
0
  case s_req_path:
303
0
    if (IS_URL_CHAR(ch)) {
304
0
      return s;
305
0
    }
306
307
0
    switch (ch) {
308
0
    case '?':
309
0
      return s_req_query_string_start;
310
311
0
    case '#':
312
0
      return s_req_fragment_start;
313
0
    }
314
315
0
    break;
316
317
0
  case s_req_query_string_start:
318
0
  case s_req_query_string:
319
0
    if (IS_URL_CHAR(ch)) {
320
0
      return s_req_query_string;
321
0
    }
322
323
0
    switch (ch) {
324
0
    case '?':
325
      /* allow extra '?' in query string */
326
0
      return s_req_query_string;
327
328
0
    case '#':
329
0
      return s_req_fragment_start;
330
0
    }
331
332
0
    break;
333
334
0
  case s_req_fragment_start:
335
0
    if (IS_URL_CHAR(ch)) {
336
0
      return s_req_fragment;
337
0
    }
338
339
0
    switch (ch) {
340
0
    case '?':
341
0
      return s_req_fragment;
342
343
0
    case '#':
344
0
      return s;
345
0
    }
346
347
0
    break;
348
349
0
  case s_req_fragment:
350
0
    if (IS_URL_CHAR(ch)) {
351
0
      return s;
352
0
    }
353
354
0
    switch (ch) {
355
0
    case '?':
356
0
    case '#':
357
0
      return s;
358
0
    }
359
360
0
    break;
361
362
0
  default:
363
0
    break;
364
0
  }
365
366
  /*
367
   * We should never fall out of the switch above unless there's an
368
   * error.
369
   */
370
0
  return s_dead;
371
0
}
372
373
static host_state_t
374
0
http_parse_host_char(host_state_t s, const char ch) {
375
0
  switch (s) {
376
0
  case s_http_userinfo:
377
0
  case s_http_userinfo_start:
378
0
    if (ch == '@') {
379
0
      return s_http_host_start;
380
0
    }
381
382
0
    if (IS_USERINFO_CHAR(ch)) {
383
0
      return s_http_userinfo;
384
0
    }
385
0
    break;
386
387
0
  case s_http_host_start:
388
0
    if (ch == '[') {
389
0
      return s_http_host_v6_start;
390
0
    }
391
392
0
    if (IS_HOST_CHAR(ch)) {
393
0
      return s_http_host;
394
0
    }
395
396
0
    break;
397
398
0
  case s_http_host:
399
0
    if (IS_HOST_CHAR(ch)) {
400
0
      return s_http_host;
401
0
    }
402
403
0
    FALLTHROUGH;
404
0
  case s_http_host_v6_end:
405
0
    if (ch == ':') {
406
0
      return s_http_host_port_start;
407
0
    }
408
409
0
    break;
410
411
0
  case s_http_host_v6:
412
0
    if (ch == ']') {
413
0
      return s_http_host_v6_end;
414
0
    }
415
416
0
    FALLTHROUGH;
417
0
  case s_http_host_v6_start:
418
0
    if (isxdigit((unsigned char)ch) || ch == ':' || ch == '.') {
419
0
      return s_http_host_v6;
420
0
    }
421
422
0
    if (s == s_http_host_v6 && ch == '%') {
423
0
      return s_http_host_v6_zone_start;
424
0
    }
425
0
    break;
426
427
0
  case s_http_host_v6_zone:
428
0
    if (ch == ']') {
429
0
      return s_http_host_v6_end;
430
0
    }
431
432
0
    FALLTHROUGH;
433
0
  case s_http_host_v6_zone_start:
434
    /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
435
0
    if (isalnum((unsigned char)ch) || ch == '%' || ch == '.' ||
436
0
        ch == '-' || ch == '_' || ch == '~')
437
0
    {
438
0
      return s_http_host_v6_zone;
439
0
    }
440
0
    break;
441
442
0
  case s_http_host_port:
443
0
  case s_http_host_port_start:
444
0
    if (isdigit((unsigned char)ch)) {
445
0
      return s_http_host_port;
446
0
    }
447
448
0
    break;
449
450
0
  default:
451
0
    break;
452
0
  }
453
454
0
  return s_http_host_dead;
455
0
}
456
457
static isc_result_t
458
0
http_parse_host(const char *buf, isc_url_parser_t *up, int found_at) {
459
0
  host_state_t s;
460
0
  const char *p = NULL;
461
0
  size_t buflen = up->field_data[ISC_UF_HOST].off +
462
0
      up->field_data[ISC_UF_HOST].len;
463
464
0
  REQUIRE((up->field_set & (1 << ISC_UF_HOST)) != 0);
465
466
0
  up->field_data[ISC_UF_HOST].len = 0;
467
468
0
  s = found_at ? s_http_userinfo_start : s_http_host_start;
469
470
0
  for (p = buf + up->field_data[ISC_UF_HOST].off; p < buf + buflen; p++) {
471
0
    host_state_t new_s = http_parse_host_char(s, *p);
472
473
0
    if (new_s == s_http_host_dead) {
474
0
      return ISC_R_FAILURE;
475
0
    }
476
477
0
    switch (new_s) {
478
0
    case s_http_host:
479
0
      if (s != s_http_host) {
480
0
        up->field_data[ISC_UF_HOST].off =
481
0
          (uint16_t)(p - buf);
482
0
      }
483
0
      up->field_data[ISC_UF_HOST].len++;
484
0
      break;
485
486
0
    case s_http_host_v6:
487
0
      if (s != s_http_host_v6) {
488
0
        up->field_data[ISC_UF_HOST].off =
489
0
          (uint16_t)(p - buf);
490
0
      }
491
0
      up->field_data[ISC_UF_HOST].len++;
492
0
      break;
493
494
0
    case s_http_host_v6_zone_start:
495
0
    case s_http_host_v6_zone:
496
0
      up->field_data[ISC_UF_HOST].len++;
497
0
      break;
498
499
0
    case s_http_host_port:
500
0
      if (s != s_http_host_port) {
501
0
        up->field_data[ISC_UF_PORT].off =
502
0
          (uint16_t)(p - buf);
503
0
        up->field_data[ISC_UF_PORT].len = 0;
504
0
        up->field_set |= (1 << ISC_UF_PORT);
505
0
      }
506
0
      up->field_data[ISC_UF_PORT].len++;
507
0
      break;
508
509
0
    case s_http_userinfo:
510
0
      if (s != s_http_userinfo) {
511
0
        up->field_data[ISC_UF_USERINFO].off =
512
0
          (uint16_t)(p - buf);
513
0
        up->field_data[ISC_UF_USERINFO].len = 0;
514
0
        up->field_set |= (1 << ISC_UF_USERINFO);
515
0
      }
516
0
      up->field_data[ISC_UF_USERINFO].len++;
517
0
      break;
518
519
0
    default:
520
0
      break;
521
0
    }
522
523
0
    s = new_s;
524
0
  }
525
526
  /* Make sure we don't end somewhere unexpected */
527
0
  switch (s) {
528
0
  case s_http_host_start:
529
0
  case s_http_host_v6_start:
530
0
  case s_http_host_v6:
531
0
  case s_http_host_v6_zone_start:
532
0
  case s_http_host_v6_zone:
533
0
  case s_http_host_port_start:
534
0
  case s_http_userinfo:
535
0
  case s_http_userinfo_start:
536
0
    return ISC_R_FAILURE;
537
0
  default:
538
0
    break;
539
0
  }
540
541
0
  return ISC_R_SUCCESS;
542
0
}
543
544
isc_result_t
545
isc_url_parse(const char *buf, size_t buflen, bool is_connect,
546
0
        isc_url_parser_t *up) {
547
0
  state_t s;
548
0
  isc_url_field_t uf, old_uf;
549
0
  int found_at = 0;
550
0
  const char *p = NULL;
551
552
0
  if (buflen == 0) {
553
0
    return ISC_R_FAILURE;
554
0
  }
555
556
0
  up->port = up->field_set = 0;
557
0
  s = is_connect ? s_req_server_start : s_req_spaces_before_url;
558
0
  old_uf = ISC_UF_MAX;
559
560
0
  for (p = buf; p < buf + buflen; p++) {
561
0
    s = parse_url_char(s, *p);
562
563
    /* Figure out the next field that we're operating on */
564
0
    switch (s) {
565
0
    case s_dead:
566
0
      return ISC_R_FAILURE;
567
568
    /* Skip delimiters */
569
0
    case s_req_schema_slash:
570
0
    case s_req_schema_slash_slash:
571
0
    case s_req_server_start:
572
0
    case s_req_query_string_start:
573
0
    case s_req_fragment_start:
574
0
      continue;
575
576
0
    case s_req_schema:
577
0
      uf = ISC_UF_SCHEMA;
578
0
      break;
579
580
0
    case s_req_server_with_at:
581
0
      found_at = 1;
582
0
      FALLTHROUGH;
583
0
    case s_req_server:
584
0
      uf = ISC_UF_HOST;
585
0
      break;
586
587
0
    case s_req_path:
588
0
      uf = ISC_UF_PATH;
589
0
      break;
590
591
0
    case s_req_query_string:
592
0
      uf = ISC_UF_QUERY;
593
0
      break;
594
595
0
    case s_req_fragment:
596
0
      uf = ISC_UF_FRAGMENT;
597
0
      break;
598
599
0
    default:
600
0
      UNREACHABLE();
601
0
    }
602
603
    /* Nothing's changed; soldier on */
604
0
    if (uf == old_uf) {
605
0
      up->field_data[uf].len++;
606
0
      continue;
607
0
    }
608
609
0
    up->field_data[uf].off = (uint16_t)(p - buf);
610
0
    up->field_data[uf].len = 1;
611
612
0
    up->field_set |= (1 << uf);
613
0
    old_uf = uf;
614
0
  }
615
616
  /* host must be present if there is a schema */
617
  /* parsing http:///toto will fail */
618
0
  if ((up->field_set & (1 << ISC_UF_SCHEMA)) &&
619
0
      (up->field_set & (1 << ISC_UF_HOST)) == 0)
620
0
  {
621
0
    return ISC_R_FAILURE;
622
0
  }
623
624
0
  if (up->field_set & (1 << ISC_UF_HOST)) {
625
0
    isc_result_t result;
626
627
0
    result = http_parse_host(buf, up, found_at);
628
0
    if (result != ISC_R_SUCCESS) {
629
0
      return result;
630
0
    }
631
0
  }
632
633
  /* CONNECT requests can only contain "hostname:port" */
634
0
  if (is_connect &&
635
0
      up->field_set != ((1 << ISC_UF_HOST) | (1 << ISC_UF_PORT)))
636
0
  {
637
0
    return ISC_R_FAILURE;
638
0
  }
639
640
0
  if (up->field_set & (1 << ISC_UF_PORT)) {
641
0
    uint16_t off;
642
0
    uint16_t len;
643
0
    const char *pp = NULL;
644
0
    const char *end = NULL;
645
0
    unsigned long v;
646
647
0
    off = up->field_data[ISC_UF_PORT].off;
648
0
    len = up->field_data[ISC_UF_PORT].len;
649
0
    end = buf + off + len;
650
651
    /*
652
     * NOTE: The characters are already validated and are in the
653
     * [0-9] range
654
     */
655
0
    INSIST(off + len <= buflen);
656
657
0
    v = 0;
658
0
    for (pp = buf + off; pp < end; pp++) {
659
0
      v *= 10;
660
0
      v += *pp - '0';
661
662
      /* Ports have a max value of 2^16 */
663
0
      if (v > 0xffff) {
664
0
        return ISC_R_RANGE;
665
0
      }
666
0
    }
667
668
0
    up->port = (uint16_t)v;
669
0
  }
670
671
0
  return ISC_R_SUCCESS;
672
0
}