Coverage Report

Created: 2026-01-10 07:08

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/curl/lib/urlapi.c
Line
Count
Source
1
/***************************************************************************
2
 *                                  _   _ ____  _
3
 *  Project                     ___| | | |  _ \| |
4
 *                             / __| | | | |_) | |
5
 *                            | (__| |_| |  _ <| |___
6
 *                             \___|\___/|_| \_\_____|
7
 *
8
 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9
 *
10
 * This software is licensed as described in the file COPYING, which
11
 * you should have received as part of this distribution. The terms
12
 * are also available at https://curl.se/docs/copyright.html.
13
 *
14
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15
 * copies of the Software, and permit persons to whom the Software is
16
 * furnished to do so, under the terms of the COPYING file.
17
 *
18
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19
 * KIND, either express or implied.
20
 *
21
 * SPDX-License-Identifier: curl
22
 *
23
 ***************************************************************************/
24
#include "curl_setup.h"
25
26
#include "urldata.h"
27
#include "urlapi-int.h"
28
#include "strcase.h"
29
#include "url.h"
30
#include "escape.h"
31
#include "curlx/inet_pton.h"
32
#include "curlx/inet_ntop.h"
33
#include "strdup.h"
34
#include "idn.h"
35
#include "curlx/strparse.h"
36
#include "curl_memrchr.h"
37
38
#ifdef _WIN32
39
/* MS-DOS/Windows style drive prefix, eg c: in c:foo */
40
#define STARTS_WITH_DRIVE_PREFIX(str)    \
41
  ((('a' <= str[0] && str[0] <= 'z') ||  \
42
    ('A' <= str[0] && str[0] <= 'Z')) && \
43
   (str[1] == ':'))
44
#endif
45
46
/* MS-DOS/Windows style drive prefix, optionally with
47
 * a '|' instead of ':', followed by a slash or NUL */
48
#define STARTS_WITH_URL_DRIVE_PREFIX(str)                  \
49
5.28k
  ((('a' <= (str)[0] && (str)[0] <= 'z') ||                \
50
5.28k
    ('A' <= (str)[0] && (str)[0] <= 'Z')) &&               \
51
5.28k
   ((str)[1] == ':' || (str)[1] == '|') &&                 \
52
5.28k
   ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
53
54
/* scheme is not URL encoded, the longest libcurl supported ones are... */
55
1.45M
#define MAX_SCHEME_LEN 40
56
57
/*
58
 * If USE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
59
 * sure we have _some_ value for AF_INET6 without polluting our fake value
60
 * everywhere.
61
 */
62
#if !defined(USE_IPV6) && !defined(AF_INET6)
63
#define AF_INET6 (AF_INET + 1)
64
#endif
65
66
/* Internal representation of CURLU. Point to URL-encoded strings. */
67
struct Curl_URL {
68
  char *scheme;
69
  char *user;
70
  char *password;
71
  char *options; /* IMAP only? */
72
  char *host;
73
  char *zoneid; /* for numerical IPv6 addresses */
74
  char *port;
75
  char *path;
76
  char *query;
77
  char *fragment;
78
  unsigned short portnum; /* the numerical version (if 'port' is set) */
79
  BIT(query_present);    /* to support blank */
80
  BIT(fragment_present); /* to support blank */
81
  BIT(guessed_scheme);   /* when a URL without scheme is parsed */
82
};
83
84
0
#define DEFAULT_SCHEME "https"
85
86
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
87
                                      unsigned int flags);
88
89
static void free_urlhandle(struct Curl_URL *u)
90
468k
{
91
468k
  curlx_free(u->scheme);
92
468k
  curlx_free(u->user);
93
468k
  curlx_free(u->password);
94
468k
  curlx_free(u->options);
95
468k
  curlx_free(u->host);
96
468k
  curlx_free(u->zoneid);
97
468k
  curlx_free(u->port);
98
468k
  curlx_free(u->path);
99
468k
  curlx_free(u->query);
100
468k
  curlx_free(u->fragment);
101
468k
}
102
103
/*
104
 * Find the separator at the end of the hostname, or the '?' in cases like
105
 * http://www.example.com?id=2380
106
 */
107
static const char *find_host_sep(const char *url)
108
139
{
109
  /* Find the start of the hostname */
110
139
  const char *sep = strstr(url, "//");
111
139
  if(!sep)
112
120
    sep = url;
113
19
  else
114
19
    sep += 2;
115
116
  /* Find first / or ? */
117
2.15k
  while(*sep && *sep != '/' && *sep != '?')
118
2.01k
    sep++;
119
120
139
  return sep;
121
139
}
122
123
/* convert CURLcode to CURLUcode */
124
0
#define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE :   \
125
0
                  CURLUE_OUT_OF_MEMORY)
126
127
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
128
 * spaces in the source URL accordingly.
129
 *
130
 * URL encoding should be skipped for hostnames, otherwise IDN resolution
131
 * will fail.
132
 */
133
static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
134
                               size_t len, bool relative,
135
                               bool query)
136
191k
{
137
  /* we must add this with whitespace-replacing */
138
191k
  bool left = !query;
139
191k
  const unsigned char *iptr;
140
191k
  const unsigned char *host_sep = (const unsigned char *)url;
141
191k
  CURLcode result = CURLE_OK;
142
143
191k
  if(!relative) {
144
139
    size_t n;
145
139
    host_sep = (const unsigned char *)find_host_sep(url);
146
147
    /* output the first piece as-is */
148
139
    n = (const char *)host_sep - url;
149
139
    result = curlx_dyn_addn(o, url, n);
150
139
    len -= n;
151
139
  }
152
153
92.4M
  for(iptr = host_sep; len && !result; iptr++, len--) {
154
92.2M
    if(*iptr == ' ') {
155
30.8k
      if(left)
156
15.4k
        result = curlx_dyn_addn(o, "%20", 3);
157
15.4k
      else
158
15.4k
        result = curlx_dyn_addn(o, "+", 1);
159
30.8k
    }
160
92.2M
    else if((*iptr < ' ') || (*iptr >= 0x7f)) {
161
41.7M
      unsigned char out[3] = { '%' };
162
41.7M
      Curl_hexbyte(&out[1], *iptr);
163
41.7M
      result = curlx_dyn_addn(o, out, 3);
164
41.7M
    }
165
50.4M
    else {
166
50.4M
      result = curlx_dyn_addn(o, iptr, 1);
167
50.4M
      if(*iptr == '?')
168
22.3k
        left = FALSE;
169
50.4M
    }
170
92.2M
  }
171
172
191k
  if(result)
173
0
    return cc2cu(result);
174
191k
  return CURLUE_OK;
175
191k
}
176
177
/*
178
 * Returns the length of the scheme if the given URL is absolute (as opposed
179
 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
180
 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
181
 *
182
 * If 'guess_scheme' is TRUE, it means the URL might be provided without
183
 * scheme.
184
 */
185
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
186
                            bool guess_scheme)
187
520k
{
188
520k
  size_t i = 0;
189
520k
  DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
190
520k
  (void)buflen; /* only used in debug-builds */
191
520k
  if(buf)
192
242k
    buf[0] = 0; /* always leave a defined value in buf */
193
#ifdef _WIN32
194
  if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
195
    return 0;
196
#endif
197
520k
  if(ISALPHA(url[0]))
198
1.45M
    for(i = 1; i < MAX_SCHEME_LEN; ++i) {
199
1.44M
      char s = url[i];
200
1.44M
      if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.'))) {
201
        /* RFC 3986 3.1 explains:
202
           scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
203
        */
204
1.09M
      }
205
357k
      else {
206
357k
        break;
207
357k
      }
208
1.44M
    }
209
520k
  if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
210
    /* If this does not guess scheme, the scheme always ends with the colon so
211
       that this also detects data: URLs etc. In guessing mode, data: could
212
       be the hostname "data" with a specified port number. */
213
214
    /* the length of the scheme is the name part only */
215
255k
    size_t len = i;
216
255k
    if(buf) {
217
130k
      Curl_strntolower(buf, url, i);
218
130k
      buf[i] = 0;
219
130k
    }
220
255k
    return len;
221
255k
  }
222
264k
  return 0;
223
520k
}
224
225
/*
226
 * Concatenate a relative URL onto a base URL making it absolute.
227
 */
228
static CURLUcode redirect_url(const char *base, const char *relurl,
229
                              CURLU *u, unsigned int flags)
230
15.2k
{
231
15.2k
  struct dynbuf urlbuf;
232
15.2k
  bool host_changed = FALSE;
233
15.2k
  const char *useurl = relurl;
234
15.2k
  const char *cutoff = NULL;
235
15.2k
  size_t prelen;
236
15.2k
  CURLUcode uc;
237
238
  /* protsep points to the start of the hostname, after [scheme]:// */
239
15.2k
  const char *protsep = base + strlen(u->scheme) + 3;
240
15.2k
  DEBUGASSERT(base && relurl && u); /* all set here */
241
15.2k
  if(!base)
242
0
    return CURLUE_MALFORMED_INPUT; /* should never happen */
243
244
  /* handle different relative URL types */
245
15.2k
  switch(relurl[0]) {
246
1.19k
  case '/':
247
1.19k
    if(relurl[1] == '/') {
248
      /* protocol-relative URL: //example.com/path */
249
139
      cutoff = protsep;
250
139
      useurl = &relurl[2];
251
139
      host_changed = TRUE;
252
139
    }
253
1.05k
    else
254
      /* absolute /path */
255
1.05k
      cutoff = strchr(protsep, '/');
256
1.19k
    break;
257
258
577
  case '#':
259
    /* fragment-only change */
260
577
    if(u->fragment)
261
402
      cutoff = strchr(protsep, '#');
262
577
    break;
263
264
13.5k
  default:
265
    /* path or query-only change */
266
13.5k
    if(u->query && u->query[0])
267
      /* remove existing query */
268
6.85k
      cutoff = strchr(protsep, '?');
269
6.64k
    else if(u->fragment && u->fragment[0])
270
      /* Remove existing fragment */
271
825
      cutoff = strchr(protsep, '#');
272
273
13.5k
    if(relurl[0] != '?') {
274
      /* append a relative path after the last slash */
275
11.0k
      cutoff = memrchr(protsep, '/',
276
11.0k
                       cutoff ? (size_t)(cutoff - protsep) : strlen(protsep));
277
11.0k
      if(cutoff)
278
11.0k
        cutoff++; /* truncate after last slash */
279
11.0k
    }
280
13.5k
    break;
281
15.2k
  }
282
283
15.2k
  prelen = cutoff ? (size_t)(cutoff - base) : strlen(base);
284
285
  /* build new URL */
286
15.2k
  curlx_dyn_init(&urlbuf, CURL_MAX_INPUT_LENGTH);
287
288
15.2k
  if(!curlx_dyn_addn(&urlbuf, base, prelen) &&
289
15.2k
     !urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed, FALSE)) {
290
15.2k
    uc = parseurl_and_replace(curlx_dyn_ptr(&urlbuf), u,
291
15.2k
                              flags & ~CURLU_PATH_AS_IS);
292
15.2k
  }
293
0
  else
294
0
    uc = CURLUE_OUT_OF_MEMORY;
295
296
15.2k
  curlx_dyn_free(&urlbuf);
297
15.2k
  return uc;
298
15.2k
}
299
300
/* scan for byte values <= 31, 127 and sometimes space */
301
CURLUcode Curl_junkscan(const char *url, size_t *urllen, bool allowspace)
302
243k
{
303
243k
  size_t n = strlen(url);
304
243k
  size_t i;
305
243k
  unsigned char control;
306
243k
  const unsigned char *p = (const unsigned char *)url;
307
243k
  if(n > CURL_MAX_INPUT_LENGTH)
308
0
    return CURLUE_MALFORMED_INPUT;
309
310
243k
  control = allowspace ? 0x1f : 0x20;
311
440M
  for(i = 0; i < n; i++) {
312
439M
    if(p[i] <= control || p[i] == 127)
313
496
      return CURLUE_MALFORMED_INPUT;
314
439M
  }
315
242k
  *urllen = n;
316
242k
  return CURLUE_OK;
317
243k
}
318
319
/*
320
 * parse_hostname_login()
321
 *
322
 * Parse the login details (username, password and options) from the URL and
323
 * strip them out of the hostname
324
 *
325
 */
326
static CURLUcode parse_hostname_login(struct Curl_URL *u,
327
                                      const char *login,
328
                                      size_t len,
329
                                      unsigned int flags,
330
                                      size_t *offset) /* to the hostname */
331
238k
{
332
238k
  CURLUcode result = CURLUE_OK;
333
238k
  CURLcode ccode;
334
238k
  char *userp = NULL;
335
238k
  char *passwdp = NULL;
336
238k
  char *optionsp = NULL;
337
238k
  const struct Curl_handler *h = NULL;
338
339
  /* At this point, we assume all the other special cases have been taken
340
   * care of, so the host is at most
341
   *
342
   *   [user[:password][;options]]@]hostname
343
   *
344
   * We need somewhere to put the embedded details, so do that first.
345
   */
346
238k
  char *ptr;
347
348
238k
  DEBUGASSERT(login);
349
350
238k
  *offset = 0;
351
238k
  ptr = memchr(login, '@', len);
352
238k
  if(!ptr)
353
182k
    goto out;
354
355
  /* We will now try to extract the
356
   * possible login information in a string like:
357
   * ftp://user:password@ftp.site.example:8021/README */
358
56.6k
  ptr++;
359
360
  /* if this is a known scheme, get some details */
361
56.6k
  if(u->scheme)
362
28.9k
    h = Curl_get_scheme_handler(u->scheme);
363
364
  /* We could use the login information in the URL so extract it. Only parse
365
     options if the handler says we should. Note that 'h' might be NULL! */
366
56.6k
  ccode = Curl_parse_login_details(login, ptr - login - 1,
367
56.6k
                                   &userp, &passwdp,
368
56.6k
                                   (h && (h->flags & PROTOPT_URLOPTIONS)) ?
369
56.6k
                                   &optionsp : NULL);
370
56.6k
  if(ccode) {
371
    /* the only possible error from Curl_parse_login_details is out of
372
       memory: */
373
0
    result = CURLUE_OUT_OF_MEMORY;
374
0
    goto out;
375
0
  }
376
377
56.6k
  if(userp) {
378
56.6k
    if(flags & CURLU_DISALLOW_USER) {
379
      /* Option DISALLOW_USER is set and URL contains username. */
380
14
      result = CURLUE_USER_NOT_ALLOWED;
381
14
      goto out;
382
14
    }
383
56.6k
    curlx_free(u->user);
384
56.6k
    u->user = userp;
385
56.6k
  }
386
387
56.6k
  if(passwdp) {
388
18.8k
    curlx_free(u->password);
389
18.8k
    u->password = passwdp;
390
18.8k
  }
391
392
56.6k
  if(optionsp) {
393
221
    curlx_free(u->options);
394
221
    u->options = optionsp;
395
221
  }
396
397
  /* the hostname starts at this offset */
398
56.6k
  *offset = ptr - login;
399
56.6k
  return CURLUE_OK;
400
401
182k
out:
402
403
182k
  curlx_free(userp);
404
182k
  curlx_free(passwdp);
405
182k
  curlx_free(optionsp);
406
182k
  u->user = NULL;
407
182k
  u->password = NULL;
408
182k
  u->options = NULL;
409
410
182k
  return result;
411
56.6k
}
412
413
UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
414
                                   bool has_scheme)
415
238k
{
416
238k
  const char *portptr;
417
238k
  char *hostname = curlx_dyn_ptr(host);
418
  /*
419
   * Find the end of an IPv6 address on the ']' ending bracket.
420
   */
421
238k
  if(hostname[0] == '[') {
422
2.22k
    portptr = strchr(hostname, ']');
423
2.22k
    if(!portptr)
424
53
      return CURLUE_BAD_IPV6;
425
2.16k
    portptr++;
426
    /* this is a RFC2732-style specified IP-address */
427
2.16k
    if(*portptr) {
428
247
      if(*portptr != ':')
429
111
        return CURLUE_BAD_PORT_NUMBER;
430
247
    }
431
1.92k
    else
432
1.92k
      portptr = NULL;
433
2.16k
  }
434
236k
  else
435
236k
    portptr = strchr(hostname, ':');
436
437
238k
  if(portptr) {
438
8.39k
    curl_off_t port;
439
8.39k
    size_t keep = portptr - hostname;
440
441
    /* Browser behavior adaptation. If there is a colon with no digits after,
442
       just cut off the name there which makes us ignore the colon and just
443
       use the default port. Firefox, Chrome and Safari all do that.
444
445
       Do not do it if the URL has no scheme, to make something that looks like
446
       a scheme not work!
447
    */
448
8.39k
    curlx_dyn_setlen(host, keep);
449
8.39k
    portptr++;
450
8.39k
    if(!*portptr)
451
2.43k
      return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
452
453
5.95k
    if(curlx_str_number(&portptr, &port, 0xffff) || *portptr)
454
563
      return CURLUE_BAD_PORT_NUMBER;
455
456
5.39k
    u->portnum = (unsigned short)port;
457
    /* generate a new port number string to get rid of leading zeroes etc */
458
5.39k
    curlx_free(u->port);
459
5.39k
    u->port = curl_maprintf("%" CURL_FORMAT_CURL_OFF_T, port);
460
5.39k
    if(!u->port)
461
0
      return CURLUE_OUT_OF_MEMORY;
462
5.39k
  }
463
464
235k
  return CURLUE_OK;
465
238k
}
466
467
/* this assumes 'hostname' now starts with [ */
468
static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
469
                            size_t hlen) /* length of hostname */
470
2.55k
{
471
2.55k
  size_t len;
472
2.55k
  DEBUGASSERT(*hostname == '[');
473
2.55k
  if(hlen < 4) /* '[::]' is the shortest possible valid string */
474
75
    return CURLUE_BAD_IPV6;
475
2.47k
  hostname++;
476
2.47k
  hlen -= 2;
477
478
  /* only valid IPv6 letters are ok */
479
2.47k
  len = strspn(hostname, "0123456789abcdefABCDEF:.");
480
481
2.47k
  if(hlen != len) {
482
2.07k
    hlen = len;
483
2.07k
    if(hostname[len] == '%') {
484
      /* this could now be '%[zone id]' */
485
1.92k
      char zoneid[16];
486
1.92k
      int i = 0;
487
1.92k
      char *h = &hostname[len + 1];
488
      /* pass '25' if present and is a URL encoded percent sign */
489
1.92k
      if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
490
410
        h += 2;
491
13.6k
      while(*h && (*h != ']') && (i < 15))
492
11.7k
        zoneid[i++] = *h++;
493
1.92k
      if(!i || (']' != *h))
494
467
        return CURLUE_BAD_IPV6;
495
1.45k
      zoneid[i] = 0;
496
1.45k
      u->zoneid = curlx_strdup(zoneid);
497
1.45k
      if(!u->zoneid)
498
0
        return CURLUE_OUT_OF_MEMORY;
499
1.45k
      hostname[len] = ']'; /* insert end bracket */
500
1.45k
      hostname[len + 1] = 0; /* terminate the hostname */
501
1.45k
    }
502
153
    else
503
153
      return CURLUE_BAD_IPV6;
504
    /* hostname is fine */
505
2.07k
  }
506
507
  /* Normalize the IPv6 address */
508
1.85k
  {
509
1.85k
    char dest[16]; /* fits a binary IPv6 address */
510
1.85k
    hostname[hlen] = 0; /* end the address there */
511
1.85k
    if(curlx_inet_pton(AF_INET6, hostname, dest) != 1)
512
385
      return CURLUE_BAD_IPV6;
513
1.47k
    if(curlx_inet_ntop(AF_INET6, dest, hostname, hlen + 1)) {
514
1.41k
      hlen = strlen(hostname); /* might be shorter now */
515
1.41k
      hostname[hlen + 1] = 0;
516
1.41k
    }
517
1.47k
    hostname[hlen] = ']'; /* restore ending bracket */
518
1.47k
  }
519
0
  return CURLUE_OK;
520
1.85k
}
521
522
static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
523
                                size_t hlen) /* length of hostname */
524
138k
{
525
138k
  size_t len;
526
138k
  DEBUGASSERT(hostname);
527
528
138k
  if(!hlen)
529
0
    return CURLUE_NO_HOST;
530
138k
  else if(hostname[0] == '[')
531
518
    return ipv6_parse(u, hostname, hlen);
532
137k
  else {
533
    /* letters from the second string are not ok */
534
137k
    len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
535
137k
    if(hlen != len)
536
      /* hostname with bad content */
537
1.62k
      return CURLUE_BAD_HOSTNAME;
538
137k
  }
539
136k
  return CURLUE_OK;
540
138k
}
541
542
/*
543
 * Handle partial IPv4 numerical addresses and different bases, like
544
 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
545
 *
546
 * If the given input string is syntactically wrong IPv4 or any part for
547
 * example is too big, this function returns HOST_NAME.
548
 *
549
 * Output the "normalized" version of that input string in plain quad decimal
550
 * integers.
551
 *
552
 * Returns the host type.
553
 */
554
555
0
#define HOST_ERROR   -1 /* out of memory */
556
557
276k
#define HOST_NAME    1
558
192k
#define HOST_IPV4    2
559
4.07k
#define HOST_IPV6    3
560
561
static int ipv4_normalize(struct dynbuf *host)
562
236k
{
563
236k
  bool done = FALSE;
564
236k
  int n = 0;
565
236k
  const char *c = curlx_dyn_ptr(host);
566
236k
  unsigned int parts[4] = { 0, 0, 0, 0 };
567
236k
  CURLcode result = CURLE_OK;
568
569
236k
  if(*c == '[')
570
2.03k
    return HOST_IPV6;
571
572
402k
  while(!done) {
573
301k
    int rc;
574
301k
    curl_off_t l;
575
301k
    if(*c == '0') {
576
70.7k
      if(c[1] == 'x') {
577
936
        c += 2; /* skip the prefix */
578
936
        rc = curlx_str_hex(&c, &l, UINT_MAX);
579
936
      }
580
69.8k
      else
581
69.8k
        rc = curlx_str_octal(&c, &l, UINT_MAX);
582
70.7k
    }
583
230k
    else
584
230k
      rc = curlx_str_number(&c, &l, UINT_MAX);
585
586
301k
    if(rc)
587
122k
      return HOST_NAME;
588
589
178k
    parts[n] = (unsigned int)l;
590
591
178k
    switch(*c) {
592
66.7k
    case '.':
593
66.7k
      if(n == 3)
594
169
        return HOST_NAME;
595
66.5k
      n++;
596
66.5k
      c++;
597
66.5k
      break;
598
599
101k
    case '\0':
600
101k
      done = TRUE;
601
101k
      break;
602
603
10.5k
    default:
604
10.5k
      return HOST_NAME;
605
178k
    }
606
178k
  }
607
608
101k
  switch(n) {
609
82.4k
  case 0: /* a -- 32 bits */
610
82.4k
    curlx_dyn_reset(host);
611
612
82.4k
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
613
82.4k
                            (parts[0] >> 24),
614
82.4k
                            ((parts[0] >> 16) & 0xff),
615
82.4k
                            ((parts[0] >> 8) & 0xff),
616
82.4k
                            (parts[0] & 0xff));
617
82.4k
    break;
618
1.96k
  case 1: /* a.b -- 8.24 bits */
619
1.96k
    if((parts[0] > 0xff) || (parts[1] > 0xffffff))
620
848
      return HOST_NAME;
621
1.12k
    curlx_dyn_reset(host);
622
1.12k
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
623
1.12k
                            (parts[0]),
624
1.12k
                            ((parts[1] >> 16) & 0xff),
625
1.12k
                            ((parts[1] >> 8) & 0xff),
626
1.12k
                            (parts[1] & 0xff));
627
1.12k
    break;
628
2.05k
  case 2: /* a.b.c -- 8.8.16 bits */
629
2.05k
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
630
1.57k
      return HOST_NAME;
631
485
    curlx_dyn_reset(host);
632
485
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
633
485
                            (parts[0]),
634
485
                            (parts[1]),
635
485
                            ((parts[2] >> 8) & 0xff),
636
485
                            (parts[2] & 0xff));
637
485
    break;
638
14.6k
  case 3: /* a.b.c.d -- 8.8.8.8 bits */
639
14.6k
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
640
12.7k
       (parts[3] > 0xff))
641
2.51k
      return HOST_NAME;
642
12.1k
    curlx_dyn_reset(host);
643
12.1k
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
644
12.1k
                            (parts[0]),
645
12.1k
                            (parts[1]),
646
12.1k
                            (parts[2]),
647
12.1k
                            (parts[3]));
648
12.1k
    break;
649
101k
  }
650
96.2k
  if(result)
651
0
    return HOST_ERROR;
652
96.2k
  return HOST_IPV4;
653
96.2k
}
654
655
/* if necessary, replace the host content with a URL decoded version */
656
static CURLUcode urldecode_host(struct dynbuf *host)
657
138k
{
658
138k
  char *per = NULL;
659
138k
  const char *hostname = curlx_dyn_ptr(host);
660
138k
  per = strchr(hostname, '%');
661
138k
  if(!per)
662
    /* nothing to decode */
663
135k
    return CURLUE_OK;
664
2.60k
  else {
665
    /* encoded */
666
2.60k
    size_t dlen;
667
2.60k
    char *decoded;
668
2.60k
    CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
669
2.60k
                                     REJECT_CTRL);
670
2.60k
    if(result)
671
109
      return CURLUE_BAD_HOSTNAME;
672
2.49k
    curlx_dyn_reset(host);
673
2.49k
    result = curlx_dyn_addn(host, decoded, dlen);
674
2.49k
    curlx_free(decoded);
675
2.49k
    if(result)
676
0
      return cc2cu(result);
677
2.49k
  }
678
679
2.49k
  return CURLUE_OK;
680
138k
}
681
682
static CURLUcode parse_authority(struct Curl_URL *u,
683
                                 const char *auth, size_t authlen,
684
                                 unsigned int flags,
685
                                 struct dynbuf *host,
686
                                 bool has_scheme)
687
238k
{
688
238k
  size_t offset;
689
238k
  CURLUcode uc;
690
238k
  CURLcode result;
691
692
  /*
693
   * Parse the login details and strip them out of the hostname.
694
   */
695
238k
  uc = parse_hostname_login(u, auth, authlen, flags, &offset);
696
238k
  if(uc)
697
14
    goto out;
698
699
238k
  result = curlx_dyn_addn(host, auth + offset, authlen - offset);
700
238k
  if(result) {
701
0
    uc = cc2cu(result);
702
0
    goto out;
703
0
  }
704
705
238k
  uc = Curl_parse_port(u, host, has_scheme);
706
238k
  if(uc)
707
1.51k
    goto out;
708
709
237k
  if(!curlx_dyn_len(host))
710
486
    return CURLUE_NO_HOST;
711
712
236k
  switch(ipv4_normalize(host)) {
713
96.2k
  case HOST_IPV4:
714
96.2k
    break;
715
2.03k
  case HOST_IPV6:
716
2.03k
    uc = ipv6_parse(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
717
2.03k
    break;
718
138k
  case HOST_NAME:
719
138k
    uc = urldecode_host(host);
720
138k
    if(!uc)
721
138k
      uc = hostname_check(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
722
138k
    break;
723
0
  case HOST_ERROR:
724
0
    uc = CURLUE_OUT_OF_MEMORY;
725
0
    break;
726
0
  default:
727
0
    uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
728
0
    break;
729
236k
  }
730
731
238k
out:
732
238k
  return uc;
733
236k
}
734
735
/* used for HTTP/2 server push */
736
CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
737
0
{
738
0
  CURLUcode result;
739
0
  struct dynbuf host;
740
741
0
  DEBUGASSERT(authority);
742
0
  curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
743
744
0
  result = parse_authority(u, authority, strlen(authority),
745
0
                           CURLU_DISALLOW_USER, &host, !!u->scheme);
746
0
  if(result)
747
0
    curlx_dyn_free(&host);
748
0
  else {
749
0
    curlx_free(u->host);
750
0
    u->host = curlx_dyn_ptr(&host);
751
0
  }
752
0
  return result;
753
0
}
754
755
/*
756
 * "Remove Dot Segments"
757
 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
758
 */
759
760
static bool is_dot(const char **str, size_t *clen)
761
25.9M
{
762
25.9M
  const char *p = *str;
763
25.9M
  if(*p == '.') {
764
683k
    (*str)++;
765
683k
    (*clen)--;
766
683k
    return TRUE;
767
683k
  }
768
25.2M
  else if((*clen >= 3) &&
769
25.1M
          (p[0] == '%') && (p[1] == '2') && ((p[2] | 0x20) == 'e')) {
770
60.9k
    *str += 3;
771
60.9k
    *clen -= 3;
772
60.9k
    return TRUE;
773
60.9k
  }
774
25.1M
  return FALSE;
775
25.9M
}
776
777
186M
#define ISSLASH(x) ((x) == '/')
778
779
/*
780
 * dedotdotify()
781
 * @unittest: 1395
782
 *
783
 * This function gets a null-terminated path with dot and dotdot sequences
784
 * passed in and strips them off according to the rules in RFC 3986 section
785
 * 5.2.4.
786
 *
787
 * The function handles a path. It should not contain the query nor fragment.
788
 *
789
 * RETURNS
790
 *
791
 * Zero for success and 'out' set to an allocated dedotdotified string.
792
 */
793
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
794
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
795
54.8k
{
796
54.8k
  struct dynbuf out;
797
54.8k
  CURLcode result = CURLE_OK;
798
799
54.8k
  *outp = NULL;
800
  /* the path always starts with a slash, and a slash has not dot */
801
54.8k
  if(clen < 2)
802
0
    return 0;
803
804
54.8k
  curlx_dyn_init(&out, clen + 1);
805
806
  /*  A. If the input buffer begins with a prefix of "../" or "./", then
807
      remove that prefix from the input buffer; otherwise, */
808
54.8k
  if(is_dot(&input, &clen)) {
809
36
    const char *p = input;
810
36
    size_t blen = clen;
811
812
36
    if(!clen)
813
      /* . [end] */
814
2
      goto end;
815
34
    else if(ISSLASH(*p)) {
816
      /* one dot followed by a slash */
817
4
      input = p + 1;
818
4
      clen--;
819
4
    }
820
821
    /*  D. if the input buffer consists only of "." or "..", then remove
822
        that from the input buffer; otherwise, */
823
30
    else if(is_dot(&p, &blen)) {
824
12
      if(!blen)
825
        /* .. [end] */
826
4
        goto end;
827
8
      else if(ISSLASH(*p)) {
828
        /* ../ */
829
3
        input = p + 1;
830
3
        clen = blen - 1;
831
3
      }
832
12
    }
833
36
  }
834
835
185M
  while(clen && !result) { /* until end of path content */
836
185M
    if(ISSLASH(*input)) {
837
25.4M
      const char *p = &input[1];
838
25.4M
      size_t blen = clen - 1;
839
      /*  B. if the input buffer begins with a prefix of "/./" or "/.", where
840
          "."  is a complete path segment, then replace that prefix with "/" in
841
          the input buffer; otherwise, */
842
25.4M
      if(is_dot(&p, &blen)) {
843
525k
        if(!blen) { /* /. */
844
713
          result = curlx_dyn_addn(&out, "/", 1);
845
713
          break;
846
713
        }
847
524k
        else if(ISSLASH(*p)) { /* /./ */
848
136k
          input = p;
849
136k
          clen = blen;
850
136k
          continue;
851
136k
        }
852
853
        /*  C. if the input buffer begins with a prefix of "/../" or "/..",
854
            where ".." is a complete path segment, then replace that prefix
855
            with "/" in the input buffer and remove the last segment and its
856
            preceding "/" (if any) from the output buffer; otherwise, */
857
387k
        else if(is_dot(&p, &blen) && (ISSLASH(*p) || !blen)) {
858
          /* remove the last segment from the output buffer */
859
153k
          size_t len = curlx_dyn_len(&out);
860
153k
          if(len) {
861
133k
            char *ptr = curlx_dyn_ptr(&out);
862
133k
            char *last = memrchr(ptr, '/', len);
863
133k
            if(last)
864
              /* trim the output at the slash */
865
133k
              curlx_dyn_setlen(&out, last - ptr);
866
133k
          }
867
868
153k
          if(blen) { /* /../ */
869
153k
            input = p;
870
153k
            clen = blen;
871
153k
            continue;
872
153k
          }
873
420
          result = curlx_dyn_addn(&out, "/", 1);
874
420
          break;
875
153k
        }
876
525k
      }
877
25.4M
    }
878
879
    /*  E. move the first path segment in the input buffer to the end of
880
        the output buffer, including the initial "/" character (if any) and
881
        any subsequent characters up to, but not including, the next "/"
882
        character or the end of the input buffer. */
883
884
185M
    result = curlx_dyn_addn(&out, input, 1);
885
185M
    input++;
886
185M
    clen--;
887
185M
  }
888
54.8k
end:
889
54.8k
  if(!result) {
890
54.8k
    if(curlx_dyn_len(&out))
891
54.8k
      *outp = curlx_dyn_ptr(&out);
892
11
    else {
893
11
      *outp = curlx_strdup("");
894
11
      if(!*outp)
895
0
        return 1;
896
11
    }
897
54.8k
  }
898
54.8k
  return result ? 1 : 0; /* success */
899
54.8k
}
900
901
static CURLUcode parse_file(const char *url, size_t urllen, CURLU *u,
902
                            struct dynbuf *host, const char **pathp,
903
                            size_t *pathlenp)
904
2.98k
{
905
2.98k
  const char *path;
906
2.98k
  size_t pathlen;
907
2.98k
  bool uncpath = FALSE;
908
2.98k
  if(urllen <= 6)
909
    /* file:/ is not enough to actually be a complete file: URL */
910
41
    return CURLUE_BAD_FILE_URL;
911
912
  /* path has been allocated large enough to hold this */
913
2.94k
  path = &url[5];
914
2.94k
  pathlen = urllen - 5;
915
916
2.94k
  u->scheme = curlx_strdup("file");
917
2.94k
  if(!u->scheme)
918
0
    return CURLUE_OUT_OF_MEMORY;
919
920
  /* Extra handling URLs with an authority component (i.e. that start with
921
   * "file://")
922
   *
923
   * We allow omitted hostname (e.g. file:/<path>) -- valid according to
924
   * RFC 8089, but not the (current) WHAT-WG URL spec.
925
   */
926
2.94k
  if(path[0] == '/' && path[1] == '/') {
927
    /* swallow the two slashes */
928
724
    const char *ptr = &path[2];
929
930
    /*
931
     * According to RFC 8089, a file: URL can be reliably dereferenced if:
932
     *
933
     *  o it has no/blank hostname, or
934
     *
935
     *  o the hostname matches "localhost" (case-insensitively), or
936
     *
937
     *  o the hostname is a FQDN that resolves to this machine, or
938
     *
939
     *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
940
     *    Appendix E.3).
941
     *
942
     * For brevity, we only consider URLs with empty, "localhost", or
943
     * "127.0.0.1" hostnames as local, otherwise as an UNC String.
944
     *
945
     * Additionally, there is an exception for URLs with a Windows drive
946
     * letter in the authority (which was accidentally omitted from RFC 8089
947
     * Appendix E, but believe me, it was meant to be there. --MK)
948
     */
949
724
    if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
950
      /* the URL includes a hostname, it must match "localhost" or
951
         "127.0.0.1" to be valid */
952
436
      if(checkprefix("localhost/", ptr) ||
953
417
         checkprefix("127.0.0.1/", ptr)) {
954
38
        ptr += 9; /* now points to the slash after the host */
955
38
      }
956
398
      else {
957
#ifdef _WIN32
958
        size_t len;
959
960
        /* the hostname, NetBIOS computer name, can not contain disallowed
961
           chars, and the delimiting slash character must be appended to the
962
           hostname */
963
        path = strpbrk(ptr, "/\\:*?\"<>|");
964
        if(!path || *path != '/')
965
          return CURLUE_BAD_FILE_URL;
966
967
        len = path - ptr;
968
        if(len) {
969
          CURLcode code = curlx_dyn_addn(host, ptr, len);
970
          if(code)
971
            return cc2cu(code);
972
          uncpath = TRUE;
973
        }
974
975
        ptr -= 2; /* now points to the // before the host in UNC */
976
#else
977
        /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
978
           none */
979
398
        return CURLUE_BAD_FILE_URL;
980
398
#endif
981
398
      }
982
436
    }
983
984
326
    path = ptr;
985
326
    pathlen = urllen - (ptr - url);
986
326
  }
987
988
2.54k
  if(!uncpath)
989
    /* no host for file: URLs by default */
990
2.54k
    curlx_dyn_reset(host);
991
992
2.54k
#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
993
  /* Do not allow Windows drive letters when not in Windows.
994
   * This catches both "file:/c:" and "file:c:" */
995
2.54k
  if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
996
2.39k
     STARTS_WITH_URL_DRIVE_PREFIX(path)) {
997
    /* File drive letters are only accepted in MS-DOS/Windows */
998
368
    return CURLUE_BAD_FILE_URL;
999
368
  }
1000
#else
1001
  /* If the path starts with a slash and a drive letter, ditch the slash */
1002
  if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1003
    /* This cannot be done with strcpy, as the memory chunks overlap! */
1004
    path++;
1005
    pathlen--;
1006
  }
1007
#endif
1008
2.17k
  *pathp = path;
1009
2.17k
  *pathlenp = pathlen;
1010
2.17k
  return CURLUE_OK;
1011
2.54k
}
1012
1013
static CURLUcode parse_scheme(const char *url, CURLU *u, char *schemebuf,
1014
                              size_t schemelen, unsigned int flags,
1015
                              const char **hostpp)
1016
239k
{
1017
  /* clear path */
1018
239k
  const char *schemep = NULL;
1019
1020
239k
  if(schemelen) {
1021
127k
    int i = 0;
1022
127k
    const char *p = &url[schemelen + 1];
1023
319k
    while((*p == '/') && (i < 4)) {
1024
191k
      p++;
1025
191k
      i++;
1026
191k
    }
1027
1028
127k
    schemep = schemebuf;
1029
127k
    if(!Curl_get_scheme_handler(schemep) &&
1030
1.15k
       !(flags & CURLU_NON_SUPPORT_SCHEME))
1031
63
      return CURLUE_UNSUPPORTED_SCHEME;
1032
1033
127k
    if((i < 1) || (i > 3))
1034
      /* less than one or more than three slashes */
1035
60
      return CURLUE_BAD_SLASHES;
1036
1037
127k
    *hostpp = p; /* hostname starts here */
1038
127k
  }
1039
112k
  else {
1040
    /* no scheme! */
1041
1042
112k
    if(!(flags & (CURLU_DEFAULT_SCHEME | CURLU_GUESS_SCHEME)))
1043
0
      return CURLUE_BAD_SCHEME;
1044
1045
112k
    if(flags & CURLU_DEFAULT_SCHEME)
1046
0
      schemep = DEFAULT_SCHEME;
1047
1048
    /*
1049
     * The URL was badly formatted, let's try without scheme specified.
1050
     */
1051
112k
    *hostpp = url;
1052
112k
  }
1053
1054
239k
  if(schemep) {
1055
127k
    u->scheme = curlx_strdup(schemep);
1056
127k
    if(!u->scheme)
1057
0
      return CURLUE_OUT_OF_MEMORY;
1058
127k
  }
1059
239k
  return CURLUE_OK;
1060
239k
}
1061
1062
static CURLUcode guess_scheme(CURLU *u, struct dynbuf *host)
1063
108k
{
1064
108k
  const char *hostname = curlx_dyn_ptr(host);
1065
108k
  const char *schemep = NULL;
1066
  /* legacy curl-style guess based on hostname */
1067
108k
  if(checkprefix("ftp.", hostname))
1068
10.3k
    schemep = "ftp";
1069
98.0k
  else if(checkprefix("dict.", hostname))
1070
26
    schemep = "dict";
1071
97.9k
  else if(checkprefix("ldap.", hostname))
1072
224
    schemep = "ldap";
1073
97.7k
  else if(checkprefix("imap.", hostname))
1074
1.74k
    schemep = "imap";
1075
96.0k
  else if(checkprefix("smtp.", hostname))
1076
3.94k
    schemep = "smtp";
1077
92.0k
  else if(checkprefix("pop3.", hostname))
1078
3.60k
    schemep = "pop3";
1079
88.4k
  else
1080
88.4k
    schemep = "http";
1081
1082
108k
  u->scheme = curlx_strdup(schemep);
1083
108k
  if(!u->scheme)
1084
0
    return CURLUE_OUT_OF_MEMORY;
1085
1086
108k
  u->guessed_scheme = TRUE;
1087
108k
  return CURLUE_OK;
1088
108k
}
1089
1090
static CURLUcode handle_fragment(CURLU *u, const char *fragment,
1091
                                 size_t fraglen, unsigned int flags)
1092
15.4k
{
1093
15.4k
  CURLUcode result;
1094
15.4k
  u->fragment_present = TRUE;
1095
15.4k
  if(fraglen > 1) {
1096
    /* skip the leading '#' in the copy but include the terminating null */
1097
11.3k
    if(flags & CURLU_URLENCODE) {
1098
2.30k
      struct dynbuf enc;
1099
2.30k
      curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1100
2.30k
      result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1101
2.30k
      if(result)
1102
0
        return result;
1103
2.30k
      u->fragment = curlx_dyn_ptr(&enc);
1104
2.30k
    }
1105
9.03k
    else {
1106
9.03k
      u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1107
9.03k
      if(!u->fragment)
1108
0
        return CURLUE_OUT_OF_MEMORY;
1109
9.03k
    }
1110
11.3k
  }
1111
15.4k
  return CURLUE_OK;
1112
15.4k
}
1113
1114
static CURLUcode handle_query(CURLU *u, const char *query,
1115
                              size_t qlen, unsigned int flags)
1116
37.4k
{
1117
37.4k
  u->query_present = TRUE;
1118
37.4k
  if(qlen > 1) {
1119
30.7k
    if(flags & CURLU_URLENCODE) {
1120
8.35k
      struct dynbuf enc;
1121
8.35k
      CURLUcode result;
1122
8.35k
      curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1123
      /* skip the leading question mark */
1124
8.35k
      result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1125
8.35k
      if(result)
1126
0
        return result;
1127
8.35k
      u->query = curlx_dyn_ptr(&enc);
1128
8.35k
    }
1129
22.4k
    else {
1130
22.4k
      u->query = Curl_memdup0(query + 1, qlen - 1);
1131
22.4k
      if(!u->query)
1132
0
        return CURLUE_OUT_OF_MEMORY;
1133
22.4k
    }
1134
30.7k
  }
1135
6.68k
  else {
1136
    /* single byte query */
1137
6.68k
    u->query = curlx_strdup("");
1138
6.68k
    if(!u->query)
1139
0
      return CURLUE_OUT_OF_MEMORY;
1140
6.68k
  }
1141
37.4k
  return CURLUE_OK;
1142
37.4k
}
1143
1144
static CURLUcode handle_path(CURLU *u, const char *path,
1145
                             size_t pathlen, unsigned int flags)
1146
236k
{
1147
236k
  CURLUcode result;
1148
236k
  if(pathlen && (flags & CURLU_URLENCODE)) {
1149
19.6k
    struct dynbuf enc;
1150
19.6k
    curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1151
19.6k
    result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1152
19.6k
    if(result)
1153
0
      return result;
1154
19.6k
    pathlen = curlx_dyn_len(&enc);
1155
19.6k
    path = u->path = curlx_dyn_ptr(&enc);
1156
19.6k
  }
1157
1158
236k
  if(pathlen <= 1) {
1159
    /* there is no path left or just the slash, unset */
1160
175k
    path = NULL;
1161
175k
  }
1162
60.4k
  else {
1163
60.4k
    if(!u->path) {
1164
45.3k
      u->path = Curl_memdup0(path, pathlen);
1165
45.3k
      if(!u->path)
1166
0
        return CURLUE_OUT_OF_MEMORY;
1167
45.3k
      path = u->path;
1168
45.3k
    }
1169
15.0k
    else if(flags & CURLU_URLENCODE)
1170
      /* it might have encoded more than just the path so cut it */
1171
15.0k
      u->path[pathlen] = 0;
1172
1173
60.4k
    if(!(flags & CURLU_PATH_AS_IS)) {
1174
      /* remove ../ and ./ sequences according to RFC3986 */
1175
54.8k
      char *dedot;
1176
54.8k
      int err = dedotdotify(path, pathlen, &dedot);
1177
54.8k
      if(err)
1178
0
        return CURLUE_OUT_OF_MEMORY;
1179
54.8k
      if(dedot) {
1180
54.8k
        curlx_free(u->path);
1181
54.8k
        u->path = dedot;
1182
54.8k
      }
1183
54.8k
    }
1184
60.4k
  }
1185
236k
  return CURLUE_OK;
1186
236k
}
1187
1188
static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1189
243k
{
1190
243k
  const char *path;
1191
243k
  size_t pathlen;
1192
243k
  char schemebuf[MAX_SCHEME_LEN + 1];
1193
243k
  size_t schemelen = 0;
1194
243k
  size_t urllen;
1195
243k
  CURLUcode result = CURLUE_OK;
1196
243k
  struct dynbuf host;
1197
1198
243k
  DEBUGASSERT(url);
1199
1200
243k
  curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
1201
1202
243k
  result = Curl_junkscan(url, &urllen, !!(flags & CURLU_ALLOW_SPACE));
1203
243k
  if(result)
1204
496
    goto fail;
1205
1206
242k
  schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1207
242k
                                   flags & (CURLU_GUESS_SCHEME |
1208
242k
                                            CURLU_DEFAULT_SCHEME));
1209
1210
  /* handle the file: scheme */
1211
242k
  if(schemelen && !strcmp(schemebuf, "file"))
1212
2.98k
    result = parse_file(url, urllen, u, &host, &path, &pathlen);
1213
239k
  else {
1214
239k
    const char *hostp = NULL;
1215
239k
    size_t hostlen;
1216
239k
    result = parse_scheme(url, u, schemebuf, schemelen, flags, &hostp);
1217
239k
    if(result)
1218
123
      goto fail;
1219
1220
    /* find the end of the hostname + port number */
1221
239k
    hostlen = strcspn(hostp, "/?#");
1222
239k
    path = &hostp[hostlen];
1223
1224
    /* this pathlen also contains the query and the fragment */
1225
239k
    pathlen = urllen - (path - url);
1226
239k
    if(hostlen) {
1227
238k
      result = parse_authority(u, hostp, hostlen, flags, &host,
1228
238k
                               u->scheme != NULL);
1229
238k
      if(!result && (flags & CURLU_GUESS_SCHEME) && !u->scheme)
1230
108k
        result = guess_scheme(u, &host);
1231
238k
    }
1232
1.07k
    else if(flags & CURLU_NO_AUTHORITY) {
1233
      /* allowed to be empty. */
1234
0
      if(curlx_dyn_add(&host, ""))
1235
0
        result = CURLUE_OUT_OF_MEMORY;
1236
0
    }
1237
1.07k
    else
1238
1.07k
      result = CURLUE_NO_HOST;
1239
239k
  }
1240
242k
  if(!result) {
1241
    /* The path might at this point contain a fragment and/or a query to
1242
       handle */
1243
236k
    const char *fragment = strchr(path, '#');
1244
236k
    if(fragment) {
1245
15.4k
      size_t fraglen = pathlen - (fragment - path);
1246
15.4k
      result = handle_fragment(u, fragment, fraglen, flags);
1247
      /* after this, pathlen still contains the query */
1248
15.4k
      pathlen -= fraglen;
1249
15.4k
    }
1250
236k
  }
1251
242k
  if(!result) {
1252
236k
    const char *query = memchr(path, '?', pathlen);
1253
236k
    if(query) {
1254
37.4k
      size_t qlen = pathlen - (query - path);
1255
37.4k
      result = handle_query(u, query, qlen, flags);
1256
37.4k
      pathlen -= qlen;
1257
37.4k
    }
1258
236k
  }
1259
242k
  if(!result)
1260
    /* the fragment and query parts are trimmed off from the path */
1261
236k
    result = handle_path(u, path, pathlen, flags);
1262
242k
  if(!result) {
1263
236k
    u->host = curlx_dyn_ptr(&host);
1264
236k
    return CURLUE_OK;
1265
236k
  }
1266
7.33k
fail:
1267
7.33k
  curlx_dyn_free(&host);
1268
7.33k
  free_urlhandle(u);
1269
7.33k
  return result;
1270
242k
}
1271
1272
/*
1273
 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1274
 */
1275
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1276
                                      unsigned int flags)
1277
243k
{
1278
243k
  CURLUcode result;
1279
243k
  CURLU tmpurl;
1280
243k
  memset(&tmpurl, 0, sizeof(tmpurl));
1281
243k
  result = parseurl(url, &tmpurl, flags);
1282
243k
  if(!result) {
1283
236k
    free_urlhandle(u);
1284
236k
    *u = tmpurl;
1285
236k
  }
1286
243k
  return result;
1287
243k
}
1288
1289
/*
1290
 */
1291
CURLU *curl_url(void)
1292
223k
{
1293
223k
  return curlx_calloc(1, sizeof(struct Curl_URL));
1294
223k
}
1295
1296
void curl_url_cleanup(CURLU *u)
1297
633k
{
1298
633k
  if(u) {
1299
224k
    free_urlhandle(u);
1300
224k
    curlx_free(u);
1301
224k
  }
1302
633k
}
1303
1304
#define DUP(dest, src, name)                    \
1305
11.1k
  do {                                          \
1306
11.1k
    if(src->name) {                             \
1307
3.44k
      dest->name = curlx_strdup(src->name);     \
1308
3.44k
      if(!dest->name)                           \
1309
3.44k
        goto fail;                              \
1310
3.44k
    }                                           \
1311
11.1k
  } while(0)
1312
1313
CURLU *curl_url_dup(const CURLU *in)
1314
1.11k
{
1315
1.11k
  struct Curl_URL *u = curlx_calloc(1, sizeof(struct Curl_URL));
1316
1.11k
  if(u) {
1317
1.11k
    DUP(u, in, scheme);
1318
1.11k
    DUP(u, in, user);
1319
1.11k
    DUP(u, in, password);
1320
1.11k
    DUP(u, in, options);
1321
1.11k
    DUP(u, in, host);
1322
1.11k
    DUP(u, in, port);
1323
1.11k
    DUP(u, in, path);
1324
1.11k
    DUP(u, in, query);
1325
1.11k
    DUP(u, in, fragment);
1326
1.11k
    DUP(u, in, zoneid);
1327
1.11k
    u->portnum = in->portnum;
1328
1.11k
    u->fragment_present = in->fragment_present;
1329
1.11k
    u->query_present = in->query_present;
1330
1.11k
  }
1331
1.11k
  return u;
1332
0
fail:
1333
0
  curl_url_cleanup(u);
1334
0
  return NULL;
1335
1.11k
}
1336
1337
#ifndef USE_IDN
1338
#define host_decode(x, y) CURLUE_LACKS_IDN
1339
#define host_encode(x, y) CURLUE_LACKS_IDN
1340
#else
1341
static CURLUcode host_decode(const char *host, char **allochost)
1342
0
{
1343
0
  CURLcode result = Curl_idn_decode(host, allochost);
1344
0
  if(result)
1345
0
    return (result == CURLE_OUT_OF_MEMORY) ?
1346
0
      CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1347
0
  return CURLUE_OK;
1348
0
}
1349
1350
static CURLUcode host_encode(const char *host, char **allochost)
1351
0
{
1352
0
  CURLcode result = Curl_idn_encode(host, allochost);
1353
0
  if(result)
1354
0
    return (result == CURLE_OUT_OF_MEMORY) ?
1355
0
      CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1356
0
  return CURLUE_OK;
1357
0
}
1358
#endif
1359
1360
static CURLUcode urlget_format(const CURLU *u, CURLUPart what,
1361
                               const char *ptr, char **partp,
1362
                               bool plusdecode, unsigned int flags)
1363
818k
{
1364
818k
  CURLUcode uc = CURLUE_OK;
1365
818k
  size_t partlen = strlen(ptr);
1366
818k
  bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0;
1367
818k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1368
818k
  bool punycode = (flags & CURLU_PUNYCODE) && (what == CURLUPART_HOST);
1369
818k
  bool depunyfy = (flags & CURLU_PUNY2IDN) && (what == CURLUPART_HOST);
1370
818k
  char *part = Curl_memdup0(ptr, partlen);
1371
818k
  *partp = NULL;
1372
818k
  if(!part)
1373
0
    return CURLUE_OUT_OF_MEMORY;
1374
818k
  if(plusdecode) {
1375
    /* convert + to space */
1376
222
    char *plus = part;
1377
222
    size_t i = 0;
1378
2.16k
    for(i = 0; i < partlen; ++plus, i++) {
1379
1.93k
      if(*plus == '+')
1380
218
        *plus = ' ';
1381
1.93k
    }
1382
222
  }
1383
818k
  if(urldecode) {
1384
80.8k
    char *decoded;
1385
80.8k
    size_t dlen;
1386
    /* this unconditional rejection of control bytes is documented
1387
       API behavior */
1388
80.8k
    CURLcode res = Curl_urldecode(part, partlen, &decoded, &dlen, REJECT_CTRL);
1389
80.8k
    curlx_free(part);
1390
80.8k
    if(res)
1391
36
      return CURLUE_URLDECODE;
1392
80.8k
    part = decoded;
1393
80.8k
    partlen = dlen;
1394
80.8k
  }
1395
818k
  if(urlencode) {
1396
145k
    struct dynbuf enc;
1397
145k
    curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1398
145k
    uc = urlencode_str(&enc, part, partlen, TRUE, what == CURLUPART_QUERY);
1399
145k
    curlx_free(part);
1400
145k
    if(uc)
1401
0
      return uc;
1402
145k
    part = curlx_dyn_ptr(&enc);
1403
145k
  }
1404
673k
  else if(punycode) {
1405
0
    if(!Curl_is_ASCII_name(u->host)) {
1406
0
      char *punyversion = NULL;
1407
0
      uc = host_decode(part, &punyversion);
1408
0
      curlx_free(part);
1409
0
      if(uc)
1410
0
        return uc;
1411
0
      part = punyversion;
1412
0
    }
1413
0
  }
1414
673k
  else if(depunyfy) {
1415
0
    if(Curl_is_ASCII_name(u->host)) {
1416
0
      char *unpunified = NULL;
1417
0
      uc = host_encode(part, &unpunified);
1418
0
      curlx_free(part);
1419
0
      if(uc)
1420
0
        return uc;
1421
0
      part = unpunified;
1422
0
    }
1423
0
  }
1424
818k
  *partp = part;
1425
818k
  return CURLUE_OK;
1426
818k
}
1427
1428
static CURLUcode urlget_url(const CURLU *u, char **part, unsigned int flags)
1429
313k
{
1430
313k
  char *url;
1431
313k
  const char *scheme;
1432
313k
  char *options = u->options;
1433
313k
  char *port = u->port;
1434
313k
  char *allochost = NULL;
1435
313k
  bool show_fragment =
1436
313k
    u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
1437
313k
  bool show_query = (u->query && u->query[0]) ||
1438
277k
    (u->query_present && flags & CURLU_GET_EMPTY);
1439
313k
  bool punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
1440
313k
  bool depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
1441
313k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1442
313k
  char portbuf[7];
1443
313k
  if(u->scheme && curl_strequal("file", u->scheme)) {
1444
2.08k
    url = curl_maprintf("file://%s%s%s%s%s",
1445
2.08k
                        u->path,
1446
2.08k
                        show_query ? "?": "",
1447
2.08k
                        u->query ? u->query : "",
1448
2.08k
                        show_fragment ? "#": "",
1449
2.08k
                        u->fragment ? u->fragment : "");
1450
2.08k
  }
1451
311k
  else if(!u->host)
1452
116k
    return CURLUE_NO_HOST;
1453
195k
  else {
1454
195k
    const struct Curl_handler *h = NULL;
1455
195k
    char schemebuf[MAX_SCHEME_LEN + 5];
1456
195k
    if(u->scheme)
1457
195k
      scheme = u->scheme;
1458
0
    else if(flags & CURLU_DEFAULT_SCHEME)
1459
0
      scheme = DEFAULT_SCHEME;
1460
0
    else
1461
0
      return CURLUE_NO_SCHEME;
1462
1463
195k
    h = Curl_get_scheme_handler(scheme);
1464
195k
    if(!port && (flags & CURLU_DEFAULT_PORT)) {
1465
      /* there is no stored port number, but asked to deliver
1466
         a default one for the scheme */
1467
0
      if(h) {
1468
0
        curl_msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1469
0
        port = portbuf;
1470
0
      }
1471
0
    }
1472
195k
    else if(port) {
1473
      /* there is a stored port number, but asked to inhibit if it matches
1474
         the default one for the scheme */
1475
4.50k
      if(h && (h->defport == u->portnum) &&
1476
277
         (flags & CURLU_NO_DEFAULT_PORT))
1477
6
        port = NULL;
1478
4.50k
    }
1479
1480
195k
    if(h && !(h->flags & PROTOPT_URLOPTIONS))
1481
173k
      options = NULL;
1482
1483
195k
    if(u->host[0] == '[') {
1484
1.09k
      if(u->zoneid) {
1485
        /* make it '[ host %25 zoneid ]' */
1486
848
        struct dynbuf enc;
1487
848
        size_t hostlen = strlen(u->host);
1488
848
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1489
848
        if(curlx_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1490
848
                          u->zoneid))
1491
0
          return CURLUE_OUT_OF_MEMORY;
1492
848
        allochost = curlx_dyn_ptr(&enc);
1493
848
      }
1494
1.09k
    }
1495
194k
    else if(urlencode) {
1496
14.9k
      allochost = curl_easy_escape(NULL, u->host, 0);
1497
14.9k
      if(!allochost)
1498
0
        return CURLUE_OUT_OF_MEMORY;
1499
14.9k
    }
1500
179k
    else if(punycode) {
1501
0
      if(!Curl_is_ASCII_name(u->host)) {
1502
0
        CURLUcode ret = host_decode(u->host, &allochost);
1503
0
        if(ret)
1504
0
          return ret;
1505
0
      }
1506
0
    }
1507
179k
    else if(depunyfy) {
1508
0
      if(Curl_is_ASCII_name(u->host)) {
1509
0
        CURLUcode ret = host_encode(u->host, &allochost);
1510
0
        if(ret)
1511
0
          return ret;
1512
0
      }
1513
0
    }
1514
1515
195k
    if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme)
1516
195k
      curl_msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
1517
0
    else
1518
0
      schemebuf[0] = 0;
1519
1520
195k
    url = curl_maprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1521
195k
                        schemebuf,
1522
195k
                        u->user ? u->user : "",
1523
195k
                        u->password ? ":": "",
1524
195k
                        u->password ? u->password : "",
1525
195k
                        options ? ";" : "",
1526
195k
                        options ? options : "",
1527
195k
                        (u->user || u->password || options) ? "@": "",
1528
195k
                        allochost ? allochost : u->host,
1529
195k
                        port ? ":": "",
1530
195k
                        port ? port : "",
1531
195k
                        u->path ? u->path : "/",
1532
195k
                        show_query ? "?": "",
1533
195k
                        u->query ? u->query : "",
1534
195k
                        show_fragment ? "#": "",
1535
195k
                        u->fragment ? u->fragment : "");
1536
195k
    curlx_free(allochost);
1537
195k
  }
1538
197k
  if(!url)
1539
0
    return CURLUE_OUT_OF_MEMORY;
1540
197k
  *part = url;
1541
197k
  return CURLUE_OK;
1542
197k
}
1543
1544
CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1545
                       char **part, unsigned int flags)
1546
1.79M
{
1547
1.79M
  const char *ptr;
1548
1.79M
  CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1549
1.79M
  char portbuf[7];
1550
1.79M
  bool plusdecode = FALSE;
1551
1.79M
  if(!u)
1552
0
    return CURLUE_BAD_HANDLE;
1553
1.79M
  if(!part)
1554
0
    return CURLUE_BAD_PARTPOINTER;
1555
1.79M
  *part = NULL;
1556
1557
1.79M
  switch(what) {
1558
226k
  case CURLUPART_SCHEME:
1559
226k
    ptr = u->scheme;
1560
226k
    ifmissing = CURLUE_NO_SCHEME;
1561
226k
    flags &= ~CURLU_URLDECODE; /* never for schemes */
1562
226k
    if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme)
1563
0
      return CURLUE_NO_SCHEME;
1564
226k
    break;
1565
226k
  case CURLUPART_USER:
1566
193k
    ptr = u->user;
1567
193k
    ifmissing = CURLUE_NO_USER;
1568
193k
    break;
1569
194k
  case CURLUPART_PASSWORD:
1570
194k
    ptr = u->password;
1571
194k
    ifmissing = CURLUE_NO_PASSWORD;
1572
194k
    break;
1573
145k
  case CURLUPART_OPTIONS:
1574
145k
    ptr = u->options;
1575
145k
    ifmissing = CURLUE_NO_OPTIONS;
1576
145k
    break;
1577
208k
  case CURLUPART_HOST:
1578
208k
    ptr = u->host;
1579
208k
    ifmissing = CURLUE_NO_HOST;
1580
208k
    break;
1581
1.28k
  case CURLUPART_ZONEID:
1582
1.28k
    ptr = u->zoneid;
1583
1.28k
    ifmissing = CURLUE_NO_ZONEID;
1584
1.28k
    break;
1585
217k
  case CURLUPART_PORT:
1586
217k
    ptr = u->port;
1587
217k
    ifmissing = CURLUE_NO_PORT;
1588
217k
    flags &= ~CURLU_URLDECODE; /* never for port */
1589
217k
    if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1590
      /* there is no stored port number, but asked to deliver
1591
         a default one for the scheme */
1592
160k
      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1593
160k
      if(h) {
1594
160k
        curl_msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1595
160k
        ptr = portbuf;
1596
160k
      }
1597
160k
    }
1598
56.3k
    else if(ptr && u->scheme) {
1599
      /* there is a stored port number, but ask to inhibit if
1600
         it matches the default one for the scheme */
1601
4.22k
      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1602
4.22k
      if(h && (h->defport == u->portnum) &&
1603
186
         (flags & CURLU_NO_DEFAULT_PORT))
1604
0
        ptr = NULL;
1605
4.22k
    }
1606
217k
    break;
1607
145k
  case CURLUPART_PATH:
1608
145k
    ptr = u->path;
1609
145k
    if(!ptr)
1610
109k
      ptr = "/";
1611
145k
    break;
1612
146k
  case CURLUPART_QUERY:
1613
146k
    ptr = u->query;
1614
146k
    ifmissing = CURLUE_NO_QUERY;
1615
146k
    plusdecode = flags & CURLU_URLDECODE;
1616
146k
    if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
1617
      /* there was a blank query and the user do not ask for it */
1618
2.49k
      ptr = NULL;
1619
146k
    break;
1620
0
  case CURLUPART_FRAGMENT:
1621
0
    ptr = u->fragment;
1622
0
    ifmissing = CURLUE_NO_FRAGMENT;
1623
0
    if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
1624
      /* there was a blank fragment and the user asks for it */
1625
0
      ptr = "";
1626
0
    break;
1627
313k
  case CURLUPART_URL:
1628
313k
    return urlget_url(u, part, flags);
1629
0
  default:
1630
0
    ptr = NULL;
1631
0
    break;
1632
1.79M
  }
1633
1.47M
  if(ptr)
1634
818k
    return urlget_format(u, what, ptr, part, plusdecode, flags);
1635
1636
660k
  return ifmissing;
1637
1.47M
}
1638
1639
static CURLUcode set_url_scheme(CURLU *u, const char *scheme,
1640
                                unsigned int flags)
1641
2
{
1642
2
  size_t plen = strlen(scheme);
1643
2
  const struct Curl_handler *h = NULL;
1644
2
  if((plen > MAX_SCHEME_LEN) || (plen < 1))
1645
    /* too long or too short */
1646
0
    return CURLUE_BAD_SCHEME;
1647
  /* verify that it is a fine scheme */
1648
2
  h = Curl_get_scheme_handler(scheme);
1649
2
  if(!h) {
1650
0
    const char *s = scheme;
1651
0
    if(!(flags & CURLU_NON_SUPPORT_SCHEME))
1652
0
      return CURLUE_UNSUPPORTED_SCHEME;
1653
0
    if(ISALPHA(*s)) {
1654
      /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1655
0
      while(--plen) {
1656
0
        if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1657
0
          s++; /* fine */
1658
0
        else
1659
0
          return CURLUE_BAD_SCHEME;
1660
0
      }
1661
0
    }
1662
0
    else
1663
0
      return CURLUE_BAD_SCHEME;
1664
0
  }
1665
2
  u->guessed_scheme = FALSE;
1666
2
  return CURLUE_OK;
1667
2
}
1668
1669
static CURLUcode set_url_port(CURLU *u, const char *provided_port)
1670
691
{
1671
691
  char *tmp;
1672
691
  curl_off_t port;
1673
691
  if(!ISDIGIT(provided_port[0]))
1674
    /* not a number */
1675
0
    return CURLUE_BAD_PORT_NUMBER;
1676
691
  if(curlx_str_number(&provided_port, &port, 0xffff) || *provided_port)
1677
    /* weirdly provided number, not good! */
1678
0
    return CURLUE_BAD_PORT_NUMBER;
1679
691
  tmp = curl_maprintf("%" CURL_FORMAT_CURL_OFF_T, port);
1680
691
  if(!tmp)
1681
0
    return CURLUE_OUT_OF_MEMORY;
1682
691
  curlx_free(u->port);
1683
691
  u->port = tmp;
1684
691
  u->portnum = (unsigned short)port;
1685
691
  return CURLUE_OK;
1686
691
}
1687
1688
static CURLUcode set_url(CURLU *u, const char *url, size_t part_size,
1689
                         unsigned int flags)
1690
247k
{
1691
  /*
1692
   * Allow a new URL to replace the existing (if any) contents.
1693
   *
1694
   * If the existing contents is enough for a URL, allow a relative URL to
1695
   * replace it.
1696
   */
1697
247k
  CURLUcode uc;
1698
247k
  char *oldurl = NULL;
1699
1700
247k
  if(!part_size) {
1701
    /* a blank URL is not a valid URL unless we already have a complete one
1702
       and this is a redirect */
1703
3.68k
    uc = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1704
3.68k
    if(!uc) {
1705
      /* success, meaning the "" is a fine relative URL, but nothing
1706
         changes */
1707
0
      curlx_free(oldurl);
1708
0
      return CURLUE_OK;
1709
0
    }
1710
3.68k
    if(uc == CURLUE_OUT_OF_MEMORY)
1711
0
      return uc;
1712
3.68k
    return CURLUE_MALFORMED_INPUT;
1713
3.68k
  }
1714
1715
  /* if the new URL is absolute replace the existing with the new. */
1716
243k
  if(Curl_is_absolute_url(url, NULL, 0,
1717
243k
                          flags & (CURLU_GUESS_SCHEME | CURLU_DEFAULT_SCHEME)))
1718
115k
    return parseurl_and_replace(url, u, flags);
1719
1720
  /* if the old URL is incomplete (we cannot get an absolute URL in
1721
     'oldurl'), replace the existing with the new */
1722
127k
  uc = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1723
127k
  if(uc == CURLUE_OUT_OF_MEMORY)
1724
0
    return uc;
1725
127k
  else if(uc)
1726
112k
    return parseurl_and_replace(url, u, flags);
1727
1728
15.2k
  DEBUGASSERT(oldurl); /* it is set here */
1729
  /* apply the relative part to create a new URL */
1730
15.2k
  uc = redirect_url(oldurl, url, u, flags);
1731
15.2k
  curlx_free(oldurl);
1732
15.2k
  return uc;
1733
15.2k
}
1734
1735
static CURLUcode urlset_clear(CURLU *u, CURLUPart what)
1736
12.5k
{
1737
12.5k
  switch(what) {
1738
0
  case CURLUPART_URL:
1739
0
    free_urlhandle(u);
1740
0
    memset(u, 0, sizeof(struct Curl_URL));
1741
0
    break;
1742
0
  case CURLUPART_SCHEME:
1743
0
    Curl_safefree(u->scheme);
1744
0
    u->guessed_scheme = FALSE;
1745
0
    break;
1746
4.04k
  case CURLUPART_USER:
1747
4.04k
    Curl_safefree(u->user);
1748
4.04k
    break;
1749
4.04k
  case CURLUPART_PASSWORD:
1750
4.04k
    Curl_safefree(u->password);
1751
4.04k
    break;
1752
0
  case CURLUPART_OPTIONS:
1753
0
    Curl_safefree(u->options);
1754
0
    break;
1755
0
  case CURLUPART_HOST:
1756
0
    Curl_safefree(u->host);
1757
0
    break;
1758
0
  case CURLUPART_ZONEID:
1759
0
    Curl_safefree(u->zoneid);
1760
0
    break;
1761
0
  case CURLUPART_PORT:
1762
0
    u->portnum = 0;
1763
0
    Curl_safefree(u->port);
1764
0
    break;
1765
0
  case CURLUPART_PATH:
1766
0
    Curl_safefree(u->path);
1767
0
    break;
1768
0
  case CURLUPART_QUERY:
1769
0
    Curl_safefree(u->query);
1770
0
    u->query_present = FALSE;
1771
0
    break;
1772
4.43k
  case CURLUPART_FRAGMENT:
1773
4.43k
    Curl_safefree(u->fragment);
1774
4.43k
    u->fragment_present = FALSE;
1775
4.43k
    break;
1776
0
  default:
1777
0
    return CURLUE_UNKNOWN_PART;
1778
12.5k
  }
1779
12.5k
  return CURLUE_OK;
1780
12.5k
}
1781
1782
static bool allowed_in_path(unsigned char x)
1783
0
{
1784
0
  switch(x) {
1785
0
  case '!':
1786
0
  case '$':
1787
0
  case '&':
1788
0
  case '\'':
1789
0
  case '(':
1790
0
  case ')':
1791
0
  case '{':
1792
0
  case '}':
1793
0
  case '[':
1794
0
  case ']':
1795
0
  case '*':
1796
0
  case '+':
1797
0
  case ',':
1798
0
  case ';':
1799
0
  case '=':
1800
0
  case ':':
1801
0
  case '@':
1802
0
  case '/':
1803
0
    return TRUE;
1804
0
  }
1805
0
  return FALSE;
1806
0
}
1807
1808
CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1809
                       const char *part, unsigned int flags)
1810
298k
{
1811
298k
  char **storep = NULL;
1812
298k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1813
298k
  bool plusencode = FALSE;
1814
298k
  bool pathmode = FALSE;
1815
298k
  bool leadingslash = FALSE;
1816
298k
  bool appendquery = FALSE;
1817
298k
  bool equalsencode = FALSE;
1818
298k
  size_t nalloc;
1819
1820
298k
  if(!u)
1821
0
    return CURLUE_BAD_HANDLE;
1822
298k
  if(!part)
1823
    /* setting a part to NULL clears it */
1824
12.5k
    return urlset_clear(u, what);
1825
1826
286k
  nalloc = strlen(part);
1827
286k
  if(nalloc > CURL_MAX_INPUT_LENGTH)
1828
    /* excessive input length */
1829
0
    return CURLUE_MALFORMED_INPUT;
1830
1831
286k
  switch(what) {
1832
2
  case CURLUPART_SCHEME: {
1833
2
    CURLUcode status = set_url_scheme(u, part, flags);
1834
2
    if(status)
1835
0
      return status;
1836
2
    storep = &u->scheme;
1837
2
    urlencode = FALSE; /* never */
1838
2
    break;
1839
2
  }
1840
27.4k
  case CURLUPART_USER:
1841
27.4k
    storep = &u->user;
1842
27.4k
    break;
1843
10.8k
  case CURLUPART_PASSWORD:
1844
10.8k
    storep = &u->password;
1845
10.8k
    break;
1846
0
  case CURLUPART_OPTIONS:
1847
0
    storep = &u->options;
1848
0
    break;
1849
0
  case CURLUPART_HOST:
1850
0
    storep = &u->host;
1851
0
    Curl_safefree(u->zoneid);
1852
0
    break;
1853
0
  case CURLUPART_ZONEID:
1854
0
    storep = &u->zoneid;
1855
0
    break;
1856
691
  case CURLUPART_PORT:
1857
691
    return set_url_port(u, part);
1858
0
  case CURLUPART_PATH:
1859
0
    pathmode = TRUE;
1860
0
    leadingslash = TRUE; /* enforce */
1861
0
    storep = &u->path;
1862
0
    break;
1863
0
  case CURLUPART_QUERY:
1864
0
    plusencode = urlencode;
1865
0
    appendquery = (flags & CURLU_APPENDQUERY) ? 1 : 0;
1866
0
    equalsencode = appendquery;
1867
0
    storep = &u->query;
1868
0
    u->query_present = TRUE;
1869
0
    break;
1870
0
  case CURLUPART_FRAGMENT:
1871
0
    storep = &u->fragment;
1872
0
    u->fragment_present = TRUE;
1873
0
    break;
1874
247k
  case CURLUPART_URL:
1875
247k
    return set_url(u, part, nalloc, flags);
1876
0
  default:
1877
0
    return CURLUE_UNKNOWN_PART;
1878
286k
  }
1879
38.2k
  DEBUGASSERT(storep);
1880
38.2k
  {
1881
38.2k
    const char *newp;
1882
38.2k
    struct dynbuf enc;
1883
38.2k
    curlx_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1884
1885
38.2k
    if(leadingslash && (part[0] != '/')) {
1886
0
      CURLcode result = curlx_dyn_addn(&enc, "/", 1);
1887
0
      if(result)
1888
0
        return cc2cu(result);
1889
0
    }
1890
38.2k
    if(urlencode) {
1891
38.2k
      const unsigned char *i;
1892
1893
31.9M
      for(i = (const unsigned char *)part; *i; i++) {
1894
31.9M
        CURLcode result;
1895
31.9M
        if((*i == ' ') && plusencode) {
1896
0
          result = curlx_dyn_addn(&enc, "+", 1);
1897
0
          if(result)
1898
0
            return CURLUE_OUT_OF_MEMORY;
1899
0
        }
1900
31.9M
        else if(ISUNRESERVED(*i) ||
1901
27.5M
                (pathmode && allowed_in_path(*i)) ||
1902
27.5M
                ((*i == '=') && equalsencode)) {
1903
4.38M
          if((*i == '=') && equalsencode)
1904
            /* only skip the first equals sign */
1905
0
            equalsencode = FALSE;
1906
4.38M
          result = curlx_dyn_addn(&enc, i, 1);
1907
4.38M
          if(result)
1908
0
            return cc2cu(result);
1909
4.38M
        }
1910
27.5M
        else {
1911
27.5M
          unsigned char out[3] = { '%' };
1912
27.5M
          Curl_hexbyte(&out[1], *i);
1913
27.5M
          result = curlx_dyn_addn(&enc, out, 3);
1914
27.5M
          if(result)
1915
0
            return cc2cu(result);
1916
27.5M
        }
1917
31.9M
      }
1918
38.2k
    }
1919
2
    else {
1920
2
      char *p;
1921
2
      CURLcode result = curlx_dyn_add(&enc, part);
1922
2
      if(result)
1923
0
        return cc2cu(result);
1924
2
      p = curlx_dyn_ptr(&enc);
1925
12
      while(*p) {
1926
        /* make sure percent encoded are lower case */
1927
10
        if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1928
0
           (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1929
0
          p[1] = Curl_raw_tolower(p[1]);
1930
0
          p[2] = Curl_raw_tolower(p[2]);
1931
0
          p += 3;
1932
0
        }
1933
10
        else
1934
10
          p++;
1935
10
      }
1936
2
    }
1937
38.2k
    newp = curlx_dyn_ptr(&enc);
1938
1939
38.2k
    if(appendquery && newp) {
1940
      /* Append the 'newp' string onto the old query. Add a '&' separator if
1941
         none is present at the end of the existing query already */
1942
1943
0
      size_t querylen = u->query ? strlen(u->query) : 0;
1944
0
      bool addamperand = querylen && (u->query[querylen - 1] != '&');
1945
0
      if(querylen) {
1946
0
        struct dynbuf qbuf;
1947
0
        curlx_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1948
1949
0
        if(curlx_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1950
0
          goto nomem;
1951
1952
0
        if(addamperand) {
1953
0
          if(curlx_dyn_addn(&qbuf, "&", 1))
1954
0
            goto nomem;
1955
0
        }
1956
0
        if(curlx_dyn_add(&qbuf, newp))
1957
0
          goto nomem;
1958
0
        curlx_dyn_free(&enc);
1959
0
        curlx_free(*storep);
1960
0
        *storep = curlx_dyn_ptr(&qbuf);
1961
0
        return CURLUE_OK;
1962
0
nomem:
1963
0
        curlx_dyn_free(&enc);
1964
0
        return CURLUE_OUT_OF_MEMORY;
1965
0
      }
1966
0
    }
1967
1968
38.2k
    else if(what == CURLUPART_HOST) {
1969
0
      size_t n = curlx_dyn_len(&enc);
1970
0
      if(!n && (flags & CURLU_NO_AUTHORITY)) {
1971
        /* Skip hostname check, it is allowed to be empty. */
1972
0
      }
1973
0
      else {
1974
0
        bool bad = FALSE;
1975
0
        if(!n)
1976
0
          bad = TRUE; /* empty hostname is not okay */
1977
0
        else if(!urlencode) {
1978
          /* if the hostname part was not URL encoded here, it was set ready
1979
             URL encoded so we need to decode it to check */
1980
0
          size_t dlen;
1981
0
          char *decoded = NULL;
1982
0
          CURLcode result =
1983
0
            Curl_urldecode(newp, n, &decoded, &dlen, REJECT_CTRL);
1984
0
          if(result || hostname_check(u, decoded, dlen))
1985
0
            bad = TRUE;
1986
0
          curlx_free(decoded);
1987
0
        }
1988
0
        else if(hostname_check(u, (char *)CURL_UNCONST(newp), n))
1989
0
          bad = TRUE;
1990
0
        if(bad) {
1991
0
          curlx_dyn_free(&enc);
1992
0
          return CURLUE_BAD_HOSTNAME;
1993
0
        }
1994
0
      }
1995
0
    }
1996
1997
38.2k
    curlx_free(*storep);
1998
38.2k
    *storep = (char *)CURL_UNCONST(newp);
1999
38.2k
  }
2000
0
  return CURLUE_OK;
2001
38.2k
}