Coverage Report

Created: 2026-02-03 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/curl/lib/urlapi.c
Line
Count
Source
1
/***************************************************************************
2
 *                                  _   _ ____  _
3
 *  Project                     ___| | | |  _ \| |
4
 *                             / __| | | | |_) | |
5
 *                            | (__| |_| |  _ <| |___
6
 *                             \___|\___/|_| \_\_____|
7
 *
8
 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9
 *
10
 * This software is licensed as described in the file COPYING, which
11
 * you should have received as part of this distribution. The terms
12
 * are also available at https://curl.se/docs/copyright.html.
13
 *
14
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15
 * copies of the Software, and permit persons to whom the Software is
16
 * furnished to do so, under the terms of the COPYING file.
17
 *
18
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19
 * KIND, either express or implied.
20
 *
21
 * SPDX-License-Identifier: curl
22
 *
23
 ***************************************************************************/
24
#include "curl_setup.h"
25
26
#include "urldata.h"
27
#include "urlapi-int.h"
28
#include "strcase.h"
29
#include "url.h"
30
#include "escape.h"
31
#include "curlx/inet_pton.h"
32
#include "curlx/inet_ntop.h"
33
#include "strdup.h"
34
#include "idn.h"
35
#include "curlx/strparse.h"
36
#include "curl_memrchr.h"
37
38
#ifdef _WIN32
39
/* MS-DOS/Windows style drive prefix, eg c: in c:foo */
40
#define STARTS_WITH_DRIVE_PREFIX(str)    \
41
  ((('a' <= str[0] && str[0] <= 'z') ||  \
42
    ('A' <= str[0] && str[0] <= 'Z')) && \
43
   (str[1] == ':'))
44
#endif
45
46
/* MS-DOS/Windows style drive prefix, optionally with
47
 * a '|' instead of ':', followed by a slash or NUL */
48
#define STARTS_WITH_URL_DRIVE_PREFIX(str)                  \
49
5.35k
  ((('a' <= (str)[0] && (str)[0] <= 'z') ||                \
50
5.35k
    ('A' <= (str)[0] && (str)[0] <= 'Z')) &&               \
51
5.35k
   ((str)[1] == ':' || (str)[1] == '|') &&                 \
52
5.35k
   ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
53
54
/* scheme is not URL encoded, the longest libcurl supported ones are... */
55
1.45M
#define MAX_SCHEME_LEN 40
56
57
/*
58
 * If USE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
59
 * sure we have _some_ value for AF_INET6 without polluting our fake value
60
 * everywhere.
61
 */
62
#if !defined(USE_IPV6) && !defined(AF_INET6)
63
#define AF_INET6 (AF_INET + 1)
64
#endif
65
66
/* Internal representation of CURLU. Point to URL-encoded strings. */
67
struct Curl_URL {
68
  char *scheme;
69
  char *user;
70
  char *password;
71
  char *options; /* IMAP only? */
72
  char *host;
73
  char *zoneid; /* for numerical IPv6 addresses */
74
  char *port;
75
  char *path;
76
  char *query;
77
  char *fragment;
78
  unsigned short portnum; /* the numerical version (if 'port' is set) */
79
  BIT(query_present);    /* to support blank */
80
  BIT(fragment_present); /* to support blank */
81
  BIT(guessed_scheme);   /* when a URL without scheme is parsed */
82
};
83
84
0
#define DEFAULT_SCHEME "https"
85
86
static void free_urlhandle(struct Curl_URL *u)
87
475k
{
88
475k
  curlx_free(u->scheme);
89
475k
  curlx_free(u->user);
90
475k
  curlx_free(u->password);
91
475k
  curlx_free(u->options);
92
475k
  curlx_free(u->host);
93
475k
  curlx_free(u->zoneid);
94
475k
  curlx_free(u->port);
95
475k
  curlx_free(u->path);
96
475k
  curlx_free(u->query);
97
475k
  curlx_free(u->fragment);
98
475k
}
99
100
/*
101
 * Find the separator at the end of the hostname, or the '?' in cases like
102
 * http://www.example.com?id=2380
103
 */
104
static const char *find_host_sep(const char *url)
105
126
{
106
  /* Find the start of the hostname */
107
126
  const char *sep = strstr(url, "//");
108
126
  if(!sep)
109
110
    sep = url;
110
16
  else
111
16
    sep += 2;
112
113
  /* Find first / or ? */
114
2.12k
  while(*sep && *sep != '/' && *sep != '?')
115
2.00k
    sep++;
116
117
126
  return sep;
118
126
}
119
120
/* convert CURLcode to CURLUcode */
121
0
#define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE :   \
122
0
                  CURLUE_OUT_OF_MEMORY)
123
124
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
125
 * spaces in the source URL accordingly.
126
 *
127
 * URL encoding should be skipped for hostnames, otherwise IDN resolution
128
 * will fail.
129
 */
130
static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
131
                               size_t len, bool relative,
132
                               bool query)
133
192k
{
134
  /* we must add this with whitespace-replacing */
135
192k
  bool left = !query;
136
192k
  const unsigned char *iptr;
137
192k
  const unsigned char *host_sep = (const unsigned char *)url;
138
192k
  CURLcode result = CURLE_OK;
139
140
192k
  if(!relative) {
141
126
    size_t n;
142
126
    host_sep = (const unsigned char *)find_host_sep(url);
143
144
    /* output the first piece as-is */
145
126
    n = (const char *)host_sep - url;
146
126
    result = curlx_dyn_addn(o, url, n);
147
126
    len -= n;
148
126
  }
149
150
69.4M
  for(iptr = host_sep; len && !result; iptr++, len--) {
151
69.2M
    if(*iptr == ' ') {
152
33.1k
      if(left)
153
18.7k
        result = curlx_dyn_addn(o, "%20", 3);
154
14.3k
      else
155
14.3k
        result = curlx_dyn_addn(o, "+", 1);
156
33.1k
    }
157
69.1M
    else if((*iptr < ' ') || (*iptr >= 0x7f)) {
158
37.7M
      unsigned char out[3] = { '%' };
159
37.7M
      Curl_hexbyte(&out[1], *iptr);
160
37.7M
      result = curlx_dyn_addn(o, out, 3);
161
37.7M
    }
162
31.4M
    else {
163
31.4M
      result = curlx_dyn_addn(o, iptr, 1);
164
31.4M
      if(*iptr == '?')
165
20.2k
        left = FALSE;
166
31.4M
    }
167
69.2M
  }
168
169
192k
  if(result)
170
0
    return cc2cu(result);
171
192k
  return CURLUE_OK;
172
192k
}
173
174
/*
175
 * Returns the length of the scheme if the given URL is absolute (as opposed
176
 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
177
 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
178
 *
179
 * If 'guess_scheme' is TRUE, it means the URL might be provided without
180
 * scheme.
181
 */
182
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
183
                            bool guess_scheme)
184
526k
{
185
526k
  size_t i = 0;
186
526k
  DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
187
526k
  (void)buflen; /* only used in debug-builds */
188
526k
  if(buf)
189
246k
    buf[0] = 0; /* always leave a defined value in buf */
190
#ifdef _WIN32
191
  if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
192
    return 0;
193
#endif
194
526k
  if(ISALPHA(url[0]))
195
1.45M
    for(i = 1; i < MAX_SCHEME_LEN; ++i) {
196
1.45M
      char s = url[i];
197
1.45M
      if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.'))) {
198
        /* RFC 3986 3.1 explains:
199
           scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
200
        */
201
1.09M
      }
202
358k
      else {
203
358k
        break;
204
358k
      }
205
1.45M
    }
206
526k
  if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
207
    /* If this does not guess scheme, the scheme always ends with the colon so
208
       that this also detects data: URLs etc. In guessing mode, data: could
209
       be the hostname "data" with a specified port number. */
210
211
    /* the length of the scheme is the name part only */
212
256k
    size_t len = i;
213
256k
    if(buf) {
214
131k
      Curl_strntolower(buf, url, i);
215
131k
      buf[i] = 0;
216
131k
    }
217
256k
    return len;
218
256k
  }
219
269k
  return 0;
220
526k
}
221
222
/* scan for byte values <= 31, 127 and sometimes space */
223
CURLUcode Curl_junkscan(const char *url, size_t *urllen, bool allowspace)
224
246k
{
225
246k
  size_t n = strlen(url);
226
246k
  size_t i;
227
246k
  unsigned char control;
228
246k
  const unsigned char *p = (const unsigned char *)url;
229
246k
  if(n > CURL_MAX_INPUT_LENGTH)
230
0
    return CURLUE_MALFORMED_INPUT;
231
232
246k
  control = allowspace ? 0x1f : 0x20;
233
385M
  for(i = 0; i < n; i++) {
234
385M
    if(p[i] <= control || p[i] == 127)
235
500
      return CURLUE_MALFORMED_INPUT;
236
385M
  }
237
246k
  *urllen = n;
238
246k
  return CURLUE_OK;
239
246k
}
240
241
/*
242
 * parse_hostname_login()
243
 *
244
 * Parse the login details (username, password and options) from the URL and
245
 * strip them out of the hostname
246
 *
247
 */
248
static CURLUcode parse_hostname_login(struct Curl_URL *u,
249
                                      const char *login,
250
                                      size_t len,
251
                                      unsigned int flags,
252
                                      size_t *offset) /* to the hostname */
253
241k
{
254
241k
  CURLUcode result = CURLUE_OK;
255
241k
  CURLcode ccode;
256
241k
  char *userp = NULL;
257
241k
  char *passwdp = NULL;
258
241k
  char *optionsp = NULL;
259
241k
  const struct Curl_scheme *h = NULL;
260
261
  /* At this point, we assume all the other special cases have been taken
262
   * care of, so the host is at most
263
   *
264
   *   [user[:password][;options]]@]hostname
265
   *
266
   * We need somewhere to put the embedded details, so do that first.
267
   */
268
241k
  const char *ptr;
269
270
241k
  DEBUGASSERT(login);
271
272
241k
  *offset = 0;
273
241k
  ptr = memchr(login, '@', len);
274
241k
  if(!ptr)
275
184k
    goto out;
276
277
  /* We will now try to extract the
278
   * possible login information in a string like:
279
   * ftp://user:password@ftp.site.example:8021/README */
280
57.3k
  ptr++;
281
282
  /* if this is a known scheme, get some details */
283
57.3k
  if(u->scheme)
284
29.3k
    h = Curl_get_scheme(u->scheme);
285
286
  /* We could use the login information in the URL so extract it. Only parse
287
     options if the handler says we should. Note that 'h' might be NULL! */
288
57.3k
  ccode = Curl_parse_login_details(login, ptr - login - 1,
289
57.3k
                                   &userp, &passwdp,
290
57.3k
                                   (h && (h->flags & PROTOPT_URLOPTIONS)) ?
291
57.3k
                                   &optionsp : NULL);
292
57.3k
  if(ccode) {
293
    /* the only possible error from Curl_parse_login_details is out of
294
       memory: */
295
0
    result = CURLUE_OUT_OF_MEMORY;
296
0
    goto out;
297
0
  }
298
299
57.3k
  if(userp) {
300
57.3k
    if(flags & CURLU_DISALLOW_USER) {
301
      /* Option DISALLOW_USER is set and URL contains username. */
302
14
      result = CURLUE_USER_NOT_ALLOWED;
303
14
      goto out;
304
14
    }
305
57.3k
    curlx_free(u->user);
306
57.3k
    u->user = userp;
307
57.3k
  }
308
309
57.3k
  if(passwdp) {
310
18.3k
    curlx_free(u->password);
311
18.3k
    u->password = passwdp;
312
18.3k
  }
313
314
57.3k
  if(optionsp) {
315
227
    curlx_free(u->options);
316
227
    u->options = optionsp;
317
227
  }
318
319
  /* the hostname starts at this offset */
320
57.3k
  *offset = ptr - login;
321
57.3k
  return CURLUE_OK;
322
323
184k
out:
324
325
184k
  curlx_free(userp);
326
184k
  curlx_free(passwdp);
327
184k
  curlx_free(optionsp);
328
184k
  u->user = NULL;
329
184k
  u->password = NULL;
330
184k
  u->options = NULL;
331
332
184k
  return result;
333
57.3k
}
334
335
UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
336
                                   bool has_scheme)
337
241k
{
338
241k
  const char *portptr;
339
241k
  const char *hostname = curlx_dyn_ptr(host);
340
  /*
341
   * Find the end of an IPv6 address on the ']' ending bracket.
342
   */
343
241k
  if(hostname[0] == '[') {
344
2.42k
    portptr = strchr(hostname, ']');
345
2.42k
    if(!portptr)
346
54
      return CURLUE_BAD_IPV6;
347
2.36k
    portptr++;
348
    /* this is a RFC2732-style specified IP-address */
349
2.36k
    if(*portptr) {
350
274
      if(*portptr != ':')
351
119
        return CURLUE_BAD_PORT_NUMBER;
352
274
    }
353
2.09k
    else
354
2.09k
      portptr = NULL;
355
2.36k
  }
356
239k
  else
357
239k
    portptr = strchr(hostname, ':');
358
359
241k
  if(portptr) {
360
8.03k
    curl_off_t port;
361
8.03k
    size_t keep = portptr - hostname;
362
363
    /* Browser behavior adaptation. If there is a colon with no digits after,
364
       just cut off the name there which makes us ignore the colon and just
365
       use the default port. Firefox, Chrome and Safari all do that.
366
367
       Do not do it if the URL has no scheme, to make something that looks like
368
       a scheme not work!
369
    */
370
8.03k
    curlx_dyn_setlen(host, keep);
371
8.03k
    portptr++;
372
8.03k
    if(!*portptr)
373
2.57k
      return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
374
375
5.45k
    if(curlx_str_number(&portptr, &port, 0xffff) || *portptr)
376
466
      return CURLUE_BAD_PORT_NUMBER;
377
378
4.99k
    u->portnum = (unsigned short)port;
379
    /* generate a new port number string to get rid of leading zeroes etc */
380
4.99k
    curlx_free(u->port);
381
4.99k
    u->port = curl_maprintf("%" CURL_FORMAT_CURL_OFF_T, port);
382
4.99k
    if(!u->port)
383
0
      return CURLUE_OUT_OF_MEMORY;
384
4.99k
  }
385
386
238k
  return CURLUE_OK;
387
241k
}
388
389
/* this assumes 'hostname' now starts with [ */
390
static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
391
                            size_t hlen) /* length of hostname */
392
2.75k
{
393
2.75k
  size_t len;
394
2.75k
  DEBUGASSERT(*hostname == '[');
395
2.75k
  if(hlen < 4) /* '[::]' is the shortest possible valid string */
396
83
    return CURLUE_BAD_IPV6;
397
2.67k
  hostname++;
398
2.67k
  hlen -= 2;
399
400
  /* only valid IPv6 letters are ok */
401
2.67k
  len = strspn(hostname, "0123456789abcdefABCDEF:.");
402
403
2.67k
  if(hlen != len) {
404
2.26k
    hlen = len;
405
2.26k
    if(hostname[len] == '%') {
406
      /* this could now be '%[zone id]' */
407
2.08k
      char zoneid[16];
408
2.08k
      int i = 0;
409
2.08k
      char *h = &hostname[len + 1];
410
      /* pass '25' if present and is a URL encoded percent sign */
411
2.08k
      if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
412
409
        h += 2;
413
14.9k
      while(*h && (*h != ']') && (i < 15))
414
12.8k
        zoneid[i++] = *h++;
415
2.08k
      if(!i || (']' != *h))
416
469
        return CURLUE_BAD_IPV6;
417
1.61k
      zoneid[i] = 0;
418
1.61k
      u->zoneid = curlx_strdup(zoneid);
419
1.61k
      if(!u->zoneid)
420
0
        return CURLUE_OUT_OF_MEMORY;
421
1.61k
      hostname[len] = ']'; /* insert end bracket */
422
1.61k
      hostname[len + 1] = 0; /* terminate the hostname */
423
1.61k
    }
424
181
    else
425
181
      return CURLUE_BAD_IPV6;
426
    /* hostname is fine */
427
2.26k
  }
428
429
  /* Normalize the IPv6 address */
430
2.02k
  {
431
2.02k
    char dest[16]; /* fits a binary IPv6 address */
432
2.02k
    hostname[hlen] = 0; /* end the address there */
433
2.02k
    if(curlx_inet_pton(AF_INET6, hostname, dest) != 1)
434
349
      return CURLUE_BAD_IPV6;
435
1.67k
    if(curlx_inet_ntop(AF_INET6, dest, hostname, hlen + 1)) {
436
1.59k
      hlen = strlen(hostname); /* might be shorter now */
437
1.59k
      hostname[hlen + 1] = 0;
438
1.59k
    }
439
1.67k
    hostname[hlen] = ']'; /* restore ending bracket */
440
1.67k
  }
441
0
  return CURLUE_OK;
442
2.02k
}
443
444
static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
445
                                size_t hlen) /* length of hostname */
446
138k
{
447
138k
  size_t len;
448
138k
  DEBUGASSERT(hostname);
449
450
138k
  if(!hlen)
451
0
    return CURLUE_NO_HOST;
452
138k
  else if(hostname[0] == '[')
453
531
    return ipv6_parse(u, hostname, hlen);
454
137k
  else {
455
    /* letters from the second string are not ok */
456
137k
    len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
457
137k
    if(hlen != len)
458
      /* hostname with bad content */
459
1.69k
      return CURLUE_BAD_HOSTNAME;
460
137k
  }
461
136k
  return CURLUE_OK;
462
138k
}
463
464
/*
465
 * Handle partial IPv4 numerical addresses and different bases, like
466
 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
467
 *
468
 * If the given input string is syntactically wrong IPv4 or any part for
469
 * example is too big, this function returns HOST_NAME.
470
 *
471
 * Output the "normalized" version of that input string in plain quad decimal
472
 * integers.
473
 *
474
 * Returns the host type.
475
 */
476
477
0
#define HOST_ERROR   -1 /* out of memory */
478
479
277k
#define HOST_NAME    1
480
197k
#define HOST_IPV4    2
481
4.45k
#define HOST_IPV6    3
482
483
static int ipv4_normalize(struct dynbuf *host)
484
239k
{
485
239k
  bool done = FALSE;
486
239k
  int n = 0;
487
239k
  const char *c = curlx_dyn_ptr(host);
488
239k
  unsigned int parts[4] = { 0, 0, 0, 0 };
489
239k
  CURLcode result = CURLE_OK;
490
491
239k
  if(*c == '[')
492
2.22k
    return HOST_IPV6;
493
494
409k
  while(!done) {
495
305k
    int rc;
496
305k
    curl_off_t l;
497
305k
    if(*c == '0') {
498
74.4k
      if(c[1] == 'x') {
499
1.14k
        c += 2; /* skip the prefix */
500
1.14k
        rc = curlx_str_hex(&c, &l, UINT_MAX);
501
1.14k
      }
502
73.2k
      else
503
73.2k
        rc = curlx_str_octal(&c, &l, UINT_MAX);
504
74.4k
    }
505
230k
    else
506
230k
      rc = curlx_str_number(&c, &l, UINT_MAX);
507
508
305k
    if(rc)
509
123k
      return HOST_NAME;
510
511
181k
    parts[n] = (unsigned int)l;
512
513
181k
    switch(*c) {
514
67.7k
    case '.':
515
67.7k
      if(n == 3)
516
162
        return HOST_NAME;
517
67.6k
      n++;
518
67.6k
      c++;
519
67.6k
      break;
520
521
104k
    case '\0':
522
104k
      done = TRUE;
523
104k
      break;
524
525
9.87k
    default:
526
9.87k
      return HOST_NAME;
527
181k
    }
528
181k
  }
529
530
104k
  switch(n) {
531
84.3k
  case 0: /* a -- 32 bits */
532
84.3k
    curlx_dyn_reset(host);
533
534
84.3k
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
535
84.3k
                            (parts[0] >> 24),
536
84.3k
                            ((parts[0] >> 16) & 0xff),
537
84.3k
                            ((parts[0] >> 8) & 0xff),
538
84.3k
                            (parts[0] & 0xff));
539
84.3k
    break;
540
2.19k
  case 1: /* a.b -- 8.24 bits */
541
2.19k
    if((parts[0] > 0xff) || (parts[1] > 0xffffff))
542
940
      return HOST_NAME;
543
1.25k
    curlx_dyn_reset(host);
544
1.25k
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
545
1.25k
                            (parts[0]),
546
1.25k
                            ((parts[1] >> 16) & 0xff),
547
1.25k
                            ((parts[1] >> 8) & 0xff),
548
1.25k
                            (parts[1] & 0xff));
549
1.25k
    break;
550
2.37k
  case 2: /* a.b.c -- 8.8.16 bits */
551
2.37k
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
552
1.86k
      return HOST_NAME;
553
513
    curlx_dyn_reset(host);
554
513
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
555
513
                            (parts[0]),
556
513
                            (parts[1]),
557
513
                            ((parts[2] >> 8) & 0xff),
558
513
                            (parts[2] & 0xff));
559
513
    break;
560
15.4k
  case 3: /* a.b.c.d -- 8.8.8.8 bits */
561
15.4k
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
562
13.5k
       (parts[3] > 0xff))
563
2.59k
      return HOST_NAME;
564
12.8k
    curlx_dyn_reset(host);
565
12.8k
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
566
12.8k
                            (parts[0]),
567
12.8k
                            (parts[1]),
568
12.8k
                            (parts[2]),
569
12.8k
                            (parts[3]));
570
12.8k
    break;
571
104k
  }
572
98.9k
  if(result)
573
0
    return HOST_ERROR;
574
98.9k
  return HOST_IPV4;
575
98.9k
}
576
577
/* if necessary, replace the host content with a URL decoded version */
578
static CURLUcode urldecode_host(struct dynbuf *host)
579
138k
{
580
138k
  const char *per;
581
138k
  const char *hostname = curlx_dyn_ptr(host);
582
138k
  per = strchr(hostname, '%');
583
138k
  if(!per)
584
    /* nothing to decode */
585
135k
    return CURLUE_OK;
586
2.79k
  else {
587
    /* encoded */
588
2.79k
    size_t dlen;
589
2.79k
    char *decoded;
590
2.79k
    CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
591
2.79k
                                     REJECT_CTRL);
592
2.79k
    if(result)
593
115
      return CURLUE_BAD_HOSTNAME;
594
2.67k
    curlx_dyn_reset(host);
595
2.67k
    result = curlx_dyn_addn(host, decoded, dlen);
596
2.67k
    curlx_free(decoded);
597
2.67k
    if(result)
598
0
      return cc2cu(result);
599
2.67k
  }
600
601
2.67k
  return CURLUE_OK;
602
138k
}
603
604
static CURLUcode parse_authority(struct Curl_URL *u,
605
                                 const char *auth, size_t authlen,
606
                                 unsigned int flags,
607
                                 struct dynbuf *host,
608
                                 bool has_scheme)
609
241k
{
610
241k
  size_t offset;
611
241k
  CURLUcode uc;
612
241k
  CURLcode result;
613
614
  /*
615
   * Parse the login details and strip them out of the hostname.
616
   */
617
241k
  uc = parse_hostname_login(u, auth, authlen, flags, &offset);
618
241k
  if(uc)
619
14
    goto out;
620
621
241k
  result = curlx_dyn_addn(host, auth + offset, authlen - offset);
622
241k
  if(result) {
623
0
    uc = cc2cu(result);
624
0
    goto out;
625
0
  }
626
627
241k
  uc = Curl_parse_port(u, host, has_scheme);
628
241k
  if(uc)
629
1.51k
    goto out;
630
631
240k
  if(!curlx_dyn_len(host))
632
456
    return CURLUE_NO_HOST;
633
634
239k
  switch(ipv4_normalize(host)) {
635
98.9k
  case HOST_IPV4:
636
98.9k
    break;
637
2.22k
  case HOST_IPV6:
638
2.22k
    uc = ipv6_parse(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
639
2.22k
    break;
640
138k
  case HOST_NAME:
641
138k
    uc = urldecode_host(host);
642
138k
    if(!uc)
643
138k
      uc = hostname_check(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
644
138k
    break;
645
0
  case HOST_ERROR:
646
0
    uc = CURLUE_OUT_OF_MEMORY;
647
0
    break;
648
0
  default:
649
0
    uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
650
0
    break;
651
239k
  }
652
653
241k
out:
654
241k
  return uc;
655
239k
}
656
657
/* used for HTTP/2 server push */
658
CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
659
0
{
660
0
  CURLUcode result;
661
0
  struct dynbuf host;
662
663
0
  DEBUGASSERT(authority);
664
0
  curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
665
666
0
  result = parse_authority(u, authority, strlen(authority),
667
0
                           CURLU_DISALLOW_USER, &host, !!u->scheme);
668
0
  if(result)
669
0
    curlx_dyn_free(&host);
670
0
  else {
671
0
    curlx_free(u->host);
672
0
    u->host = curlx_dyn_ptr(&host);
673
0
  }
674
0
  return result;
675
0
}
676
677
/*
678
 * "Remove Dot Segments"
679
 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
680
 */
681
682
static bool is_dot(const char **str, size_t *clen)
683
24.6M
{
684
24.6M
  const char *p = *str;
685
24.6M
  if(*p == '.') {
686
631k
    (*str)++;
687
631k
    (*clen)--;
688
631k
    return TRUE;
689
631k
  }
690
23.9M
  else if((*clen >= 3) &&
691
23.9M
          (p[0] == '%') && (p[1] == '2') && ((p[2] | 0x20) == 'e')) {
692
40.8k
    *str += 3;
693
40.8k
    *clen -= 3;
694
40.8k
    return TRUE;
695
40.8k
  }
696
23.9M
  return FALSE;
697
24.6M
}
698
699
158M
#define ISSLASH(x) ((x) == '/')
700
701
/*
702
 * dedotdotify()
703
 * @unittest: 1395
704
 *
705
 * This function gets a null-terminated path with dot and dotdot sequences
706
 * passed in and strips them off according to the rules in RFC 3986 section
707
 * 5.2.4.
708
 *
709
 * The function handles a path. It should not contain the query nor fragment.
710
 *
711
 * RETURNS
712
 *
713
 * Zero for success and 'out' set to an allocated dedotdotified string.
714
 */
715
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
716
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
717
55.5k
{
718
55.5k
  struct dynbuf out;
719
55.5k
  CURLcode result = CURLE_OK;
720
721
55.5k
  *outp = NULL;
722
  /* the path always starts with a slash, and a slash has not dot */
723
55.5k
  if(clen < 2)
724
0
    return 0;
725
726
55.5k
  curlx_dyn_init(&out, clen + 1);
727
728
  /*  A. If the input buffer begins with a prefix of "../" or "./", then
729
      remove that prefix from the input buffer; otherwise, */
730
55.5k
  if(is_dot(&input, &clen)) {
731
41
    const char *p = input;
732
41
    size_t blen = clen;
733
734
41
    if(!clen)
735
      /* . [end] */
736
3
      goto end;
737
38
    else if(ISSLASH(*p)) {
738
      /* one dot followed by a slash */
739
5
      input = p + 1;
740
5
      clen--;
741
5
    }
742
743
    /*  D. if the input buffer consists only of "." or "..", then remove
744
        that from the input buffer; otherwise, */
745
33
    else if(is_dot(&p, &blen)) {
746
12
      if(!blen)
747
        /* .. [end] */
748
4
        goto end;
749
8
      else if(ISSLASH(*p)) {
750
        /* ../ */
751
4
        input = p + 1;
752
4
        clen = blen - 1;
753
4
      }
754
12
    }
755
41
  }
756
757
157M
  while(clen && !result) { /* until end of path content */
758
157M
    if(ISSLASH(*input)) {
759
24.2M
      const char *p = &input[1];
760
24.2M
      size_t blen = clen - 1;
761
      /*  B. if the input buffer begins with a prefix of "/./" or "/.", where
762
          "."  is a complete path segment, then replace that prefix with "/" in
763
          the input buffer; otherwise, */
764
24.2M
      if(is_dot(&p, &blen)) {
765
452k
        if(!blen) { /* /. */
766
839
          result = curlx_dyn_addn(&out, "/", 1);
767
839
          break;
768
839
        }
769
451k
        else if(ISSLASH(*p)) { /* /./ */
770
100k
          input = p;
771
100k
          clen = blen;
772
100k
          continue;
773
100k
        }
774
775
        /*  C. if the input buffer begins with a prefix of "/../" or "/..",
776
            where ".." is a complete path segment, then replace that prefix
777
            with "/" in the input buffer and remove the last segment and its
778
            preceding "/" (if any) from the output buffer; otherwise, */
779
350k
        else if(is_dot(&p, &blen) && (ISSLASH(*p) || !blen)) {
780
          /* remove the last segment from the output buffer */
781
151k
          size_t len = curlx_dyn_len(&out);
782
151k
          if(len) {
783
132k
            const char *ptr = curlx_dyn_ptr(&out);
784
132k
            const char *last = memrchr(ptr, '/', len);
785
132k
            if(last)
786
              /* trim the output at the slash */
787
132k
              curlx_dyn_setlen(&out, last - ptr);
788
132k
          }
789
790
151k
          if(blen) { /* /../ */
791
150k
            input = p;
792
150k
            clen = blen;
793
150k
            continue;
794
150k
          }
795
429
          result = curlx_dyn_addn(&out, "/", 1);
796
429
          break;
797
151k
        }
798
452k
      }
799
24.2M
    }
800
801
    /*  E. move the first path segment in the input buffer to the end of
802
        the output buffer, including the initial "/" character (if any) and
803
        any subsequent characters up to, but not including, the next "/"
804
        character or the end of the input buffer. */
805
806
157M
    result = curlx_dyn_addn(&out, input, 1);
807
157M
    input++;
808
157M
    clen--;
809
157M
  }
810
55.5k
end:
811
55.5k
  if(!result) {
812
55.5k
    if(curlx_dyn_len(&out))
813
55.5k
      *outp = curlx_dyn_ptr(&out);
814
12
    else {
815
12
      *outp = curlx_strdup("");
816
12
      if(!*outp)
817
0
        return 1;
818
12
    }
819
55.5k
  }
820
55.5k
  return result ? 1 : 0; /* success */
821
55.5k
}
822
823
static CURLUcode parse_file(const char *url, size_t urllen, CURLU *u,
824
                            struct dynbuf *host, const char **pathp,
825
                            size_t *pathlenp)
826
3.03k
{
827
3.03k
  const char *path;
828
3.03k
  size_t pathlen;
829
3.03k
  bool uncpath = FALSE;
830
3.03k
  if(urllen <= 6)
831
    /* file:/ is not enough to actually be a complete file: URL */
832
41
    return CURLUE_BAD_FILE_URL;
833
834
  /* path has been allocated large enough to hold this */
835
2.99k
  path = &url[5];
836
2.99k
  pathlen = urllen - 5;
837
838
2.99k
  u->scheme = curlx_strdup("file");
839
2.99k
  if(!u->scheme)
840
0
    return CURLUE_OUT_OF_MEMORY;
841
842
  /* Extra handling URLs with an authority component (i.e. that start with
843
   * "file://")
844
   *
845
   * We allow omitted hostname (e.g. file:/<path>) -- valid according to
846
   * RFC 8089, but not the (current) WHAT-WG URL spec.
847
   */
848
2.99k
  if(path[0] == '/' && path[1] == '/') {
849
    /* swallow the two slashes */
850
708
    const char *ptr = &path[2];
851
852
    /*
853
     * According to RFC 8089, a file: URL can be reliably dereferenced if:
854
     *
855
     *  o it has no/blank hostname, or
856
     *
857
     *  o the hostname matches "localhost" (case-insensitively), or
858
     *
859
     *  o the hostname is a FQDN that resolves to this machine, or
860
     *
861
     *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
862
     *    Appendix E.3).
863
     *
864
     * For brevity, we only consider URLs with empty, "localhost", or
865
     * "127.0.0.1" hostnames as local, otherwise as an UNC String.
866
     *
867
     * Additionally, there is an exception for URLs with a Windows drive
868
     * letter in the authority (which was accidentally omitted from RFC 8089
869
     * Appendix E, but believe me, it was meant to be there. --MK)
870
     */
871
708
    if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
872
      /* the URL includes a hostname, it must match "localhost" or
873
         "127.0.0.1" to be valid */
874
462
      if(checkprefix("localhost/", ptr) ||
875
441
         checkprefix("127.0.0.1/", ptr)) {
876
40
        ptr += 9; /* now points to the slash after the host */
877
40
      }
878
422
      else {
879
#ifdef _WIN32
880
        size_t len;
881
882
        /* the hostname, NetBIOS computer name, can not contain disallowed
883
           chars, and the delimiting slash character must be appended to the
884
           hostname */
885
        path = strpbrk(ptr, "/\\:*?\"<>|");
886
        if(!path || *path != '/')
887
          return CURLUE_BAD_FILE_URL;
888
889
        len = path - ptr;
890
        if(len) {
891
          CURLcode code = curlx_dyn_addn(host, ptr, len);
892
          if(code)
893
            return cc2cu(code);
894
          uncpath = TRUE;
895
        }
896
897
        ptr -= 2; /* now points to the // before the host in UNC */
898
#else
899
        /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
900
           none */
901
422
        return CURLUE_BAD_FILE_URL;
902
422
#endif
903
422
      }
904
462
    }
905
906
286
    path = ptr;
907
286
    pathlen = urllen - (ptr - url);
908
286
  }
909
910
2.57k
  if(!uncpath)
911
    /* no host for file: URLs by default */
912
2.57k
    curlx_dyn_reset(host);
913
914
2.57k
#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
915
  /* Do not allow Windows drive letters when not in Windows.
916
   * This catches both "file:/c:" and "file:c:" */
917
2.57k
  if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
918
2.42k
     STARTS_WITH_URL_DRIVE_PREFIX(path)) {
919
    /* File drive letters are only accepted in MS-DOS/Windows */
920
332
    return CURLUE_BAD_FILE_URL;
921
332
  }
922
#else
923
  /* If the path starts with a slash and a drive letter, ditch the slash */
924
  if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
925
    /* This cannot be done with strcpy, as the memory chunks overlap! */
926
    path++;
927
    pathlen--;
928
  }
929
#endif
930
2.23k
  *pathp = path;
931
2.23k
  *pathlenp = pathlen;
932
2.23k
  return CURLUE_OK;
933
2.57k
}
934
935
static CURLUcode parse_scheme(const char *url, CURLU *u, char *schemebuf,
936
                              size_t schemelen, unsigned int flags,
937
                              const char **hostpp)
938
243k
{
939
  /* clear path */
940
243k
  const char *schemep = NULL;
941
942
243k
  if(schemelen) {
943
128k
    int i = 0;
944
128k
    const char *p = &url[schemelen + 1];
945
319k
    while((*p == '/') && (i < 4)) {
946
190k
      p++;
947
190k
      i++;
948
190k
    }
949
950
128k
    schemep = schemebuf;
951
128k
    if(!Curl_get_scheme(schemep) &&
952
1.27k
       !(flags & CURLU_NON_SUPPORT_SCHEME))
953
61
      return CURLUE_UNSUPPORTED_SCHEME;
954
955
128k
    if((i < 1) || (i > 3))
956
      /* less than one or more than three slashes */
957
61
      return CURLUE_BAD_SLASHES;
958
959
128k
    *hostpp = p; /* hostname starts here */
960
128k
  }
961
114k
  else {
962
    /* no scheme! */
963
964
114k
    if(!(flags & (CURLU_DEFAULT_SCHEME | CURLU_GUESS_SCHEME)))
965
0
      return CURLUE_BAD_SCHEME;
966
967
114k
    if(flags & CURLU_DEFAULT_SCHEME)
968
0
      schemep = DEFAULT_SCHEME;
969
970
    /*
971
     * The URL was badly formatted, let's try without scheme specified.
972
     */
973
114k
    *hostpp = url;
974
114k
  }
975
976
242k
  if(schemep) {
977
128k
    u->scheme = curlx_strdup(schemep);
978
128k
    if(!u->scheme)
979
0
      return CURLUE_OUT_OF_MEMORY;
980
128k
  }
981
242k
  return CURLUE_OK;
982
242k
}
983
984
static CURLUcode guess_scheme(CURLU *u, struct dynbuf *host)
985
110k
{
986
110k
  const char *hostname = curlx_dyn_ptr(host);
987
110k
  const char *schemep = NULL;
988
  /* legacy curl-style guess based on hostname */
989
110k
  if(checkprefix("ftp.", hostname))
990
10.8k
    schemep = "ftp";
991
99.9k
  else if(checkprefix("dict.", hostname))
992
32
    schemep = "dict";
993
99.8k
  else if(checkprefix("ldap.", hostname))
994
237
    schemep = "ldap";
995
99.6k
  else if(checkprefix("imap.", hostname))
996
1.71k
    schemep = "imap";
997
97.9k
  else if(checkprefix("smtp.", hostname))
998
4.21k
    schemep = "smtp";
999
93.7k
  else if(checkprefix("pop3.", hostname))
1000
3.69k
    schemep = "pop3";
1001
90.0k
  else
1002
90.0k
    schemep = "http";
1003
1004
110k
  u->scheme = curlx_strdup(schemep);
1005
110k
  if(!u->scheme)
1006
0
    return CURLUE_OUT_OF_MEMORY;
1007
1008
110k
  u->guessed_scheme = TRUE;
1009
110k
  return CURLUE_OK;
1010
110k
}
1011
1012
static CURLUcode handle_fragment(CURLU *u, const char *fragment,
1013
                                 size_t fraglen, unsigned int flags)
1014
15.1k
{
1015
15.1k
  CURLUcode result;
1016
15.1k
  u->fragment_present = TRUE;
1017
15.1k
  if(fraglen > 1) {
1018
    /* skip the leading '#' in the copy but include the terminating null */
1019
10.8k
    if(flags & CURLU_URLENCODE) {
1020
2.08k
      struct dynbuf enc;
1021
2.08k
      curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1022
2.08k
      result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1023
2.08k
      if(result)
1024
0
        return result;
1025
2.08k
      u->fragment = curlx_dyn_ptr(&enc);
1026
2.08k
    }
1027
8.78k
    else {
1028
8.78k
      u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1029
8.78k
      if(!u->fragment)
1030
0
        return CURLUE_OUT_OF_MEMORY;
1031
8.78k
    }
1032
10.8k
  }
1033
15.1k
  return CURLUE_OK;
1034
15.1k
}
1035
1036
static CURLUcode handle_query(CURLU *u, const char *query,
1037
                              size_t qlen, unsigned int flags)
1038
36.1k
{
1039
36.1k
  u->query_present = TRUE;
1040
36.1k
  if(qlen > 1) {
1041
29.7k
    if(flags & CURLU_URLENCODE) {
1042
7.79k
      struct dynbuf enc;
1043
7.79k
      CURLUcode result;
1044
7.79k
      curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1045
      /* skip the leading question mark */
1046
7.79k
      result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1047
7.79k
      if(result)
1048
0
        return result;
1049
7.79k
      u->query = curlx_dyn_ptr(&enc);
1050
7.79k
    }
1051
21.9k
    else {
1052
21.9k
      u->query = Curl_memdup0(query + 1, qlen - 1);
1053
21.9k
      if(!u->query)
1054
0
        return CURLUE_OUT_OF_MEMORY;
1055
21.9k
    }
1056
29.7k
  }
1057
6.42k
  else {
1058
    /* single byte query */
1059
6.42k
    u->query = curlx_strdup("");
1060
6.42k
    if(!u->query)
1061
0
      return CURLUE_OUT_OF_MEMORY;
1062
6.42k
  }
1063
36.1k
  return CURLUE_OK;
1064
36.1k
}
1065
1066
static CURLUcode handle_path(CURLU *u, const char *path,
1067
                             size_t pathlen, unsigned int flags)
1068
239k
{
1069
239k
  CURLUcode result;
1070
239k
  if(pathlen && (flags & CURLU_URLENCODE)) {
1071
18.9k
    struct dynbuf enc;
1072
18.9k
    curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1073
18.9k
    result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1074
18.9k
    if(result)
1075
0
      return result;
1076
18.9k
    pathlen = curlx_dyn_len(&enc);
1077
18.9k
    path = u->path = curlx_dyn_ptr(&enc);
1078
18.9k
  }
1079
1080
239k
  if(pathlen <= 1) {
1081
    /* there is no path left or just the slash, unset */
1082
178k
    path = NULL;
1083
178k
  }
1084
60.8k
  else {
1085
60.8k
    if(!u->path) {
1086
45.9k
      u->path = Curl_memdup0(path, pathlen);
1087
45.9k
      if(!u->path)
1088
0
        return CURLUE_OUT_OF_MEMORY;
1089
45.9k
      path = u->path;
1090
45.9k
    }
1091
14.9k
    else if(flags & CURLU_URLENCODE)
1092
      /* it might have encoded more than just the path so cut it */
1093
14.9k
      u->path[pathlen] = 0;
1094
1095
60.8k
    if(!(flags & CURLU_PATH_AS_IS)) {
1096
      /* remove ../ and ./ sequences according to RFC3986 */
1097
55.5k
      char *dedot;
1098
55.5k
      int err = dedotdotify(path, pathlen, &dedot);
1099
55.5k
      if(err)
1100
0
        return CURLUE_OUT_OF_MEMORY;
1101
55.5k
      if(dedot) {
1102
55.5k
        curlx_free(u->path);
1103
55.5k
        u->path = dedot;
1104
55.5k
      }
1105
55.5k
    }
1106
60.8k
  }
1107
239k
  return CURLUE_OK;
1108
239k
}
1109
1110
static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1111
246k
{
1112
246k
  const char *path;
1113
246k
  size_t pathlen;
1114
246k
  char schemebuf[MAX_SCHEME_LEN + 1];
1115
246k
  size_t schemelen = 0;
1116
246k
  size_t urllen;
1117
246k
  CURLUcode result = CURLUE_OK;
1118
246k
  struct dynbuf host;
1119
1120
246k
  DEBUGASSERT(url);
1121
1122
246k
  curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
1123
1124
246k
  result = Curl_junkscan(url, &urllen, !!(flags & CURLU_ALLOW_SPACE));
1125
246k
  if(result)
1126
500
    goto fail;
1127
1128
246k
  schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1129
246k
                                   flags & (CURLU_GUESS_SCHEME |
1130
246k
                                            CURLU_DEFAULT_SCHEME));
1131
1132
  /* handle the file: scheme */
1133
246k
  if(schemelen && !strcmp(schemebuf, "file"))
1134
3.03k
    result = parse_file(url, urllen, u, &host, &path, &pathlen);
1135
243k
  else {
1136
243k
    const char *hostp = NULL;
1137
243k
    size_t hostlen;
1138
243k
    result = parse_scheme(url, u, schemebuf, schemelen, flags, &hostp);
1139
243k
    if(result)
1140
122
      goto fail;
1141
1142
    /* find the end of the hostname + port number */
1143
242k
    hostlen = strcspn(hostp, "/?#");
1144
242k
    path = &hostp[hostlen];
1145
1146
    /* this pathlen also contains the query and the fragment */
1147
242k
    pathlen = urllen - (path - url);
1148
242k
    if(hostlen) {
1149
241k
      result = parse_authority(u, hostp, hostlen, flags, &host,
1150
241k
                               u->scheme != NULL);
1151
241k
      if(!result && (flags & CURLU_GUESS_SCHEME) && !u->scheme)
1152
110k
        result = guess_scheme(u, &host);
1153
241k
    }
1154
1.16k
    else if(flags & CURLU_NO_AUTHORITY) {
1155
      /* allowed to be empty. */
1156
0
      if(curlx_dyn_add(&host, ""))
1157
0
        result = CURLUE_OUT_OF_MEMORY;
1158
0
    }
1159
1.16k
    else
1160
1.16k
      result = CURLUE_NO_HOST;
1161
242k
  }
1162
245k
  if(!result) {
1163
    /* The path might at this point contain a fragment and/or a query to
1164
       handle */
1165
239k
    const char *fragment = strchr(path, '#');
1166
239k
    if(fragment) {
1167
15.1k
      size_t fraglen = pathlen - (fragment - path);
1168
15.1k
      result = handle_fragment(u, fragment, fraglen, flags);
1169
      /* after this, pathlen still contains the query */
1170
15.1k
      pathlen -= fraglen;
1171
15.1k
    }
1172
239k
  }
1173
245k
  if(!result) {
1174
239k
    const char *query = memchr(path, '?', pathlen);
1175
239k
    if(query) {
1176
36.1k
      size_t qlen = pathlen - (query - path);
1177
36.1k
      result = handle_query(u, query, qlen, flags);
1178
36.1k
      pathlen -= qlen;
1179
36.1k
    }
1180
239k
  }
1181
245k
  if(!result)
1182
    /* the fragment and query parts are trimmed off from the path */
1183
239k
    result = handle_path(u, path, pathlen, flags);
1184
245k
  if(!result) {
1185
239k
    u->host = curlx_dyn_ptr(&host);
1186
239k
    return CURLUE_OK;
1187
239k
  }
1188
7.45k
fail:
1189
7.45k
  curlx_dyn_free(&host);
1190
7.45k
  free_urlhandle(u);
1191
7.45k
  return result;
1192
245k
}
1193
1194
/*
1195
 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1196
 */
1197
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1198
                                      unsigned int flags)
1199
246k
{
1200
246k
  CURLUcode result;
1201
246k
  CURLU tmpurl;
1202
246k
  memset(&tmpurl, 0, sizeof(tmpurl));
1203
246k
  result = parseurl(url, &tmpurl, flags);
1204
246k
  if(!result) {
1205
239k
    free_urlhandle(u);
1206
239k
    *u = tmpurl;
1207
239k
  }
1208
246k
  return result;
1209
246k
}
1210
1211
/*
1212
 * Concatenate a relative URL onto a base URL making it absolute.
1213
 */
1214
static CURLUcode redirect_url(const char *base, const char *relurl,
1215
                              CURLU *u, unsigned int flags)
1216
15.4k
{
1217
15.4k
  struct dynbuf urlbuf;
1218
15.4k
  bool host_changed = FALSE;
1219
15.4k
  const char *useurl = relurl;
1220
15.4k
  const char *cutoff = NULL;
1221
15.4k
  size_t prelen;
1222
15.4k
  CURLUcode uc;
1223
1224
  /* protsep points to the start of the hostname, after [scheme]:// */
1225
15.4k
  const char *protsep = base + strlen(u->scheme) + 3;
1226
15.4k
  DEBUGASSERT(base && relurl && u); /* all set here */
1227
15.4k
  if(!base)
1228
0
    return CURLUE_MALFORMED_INPUT; /* should never happen */
1229
1230
  /* handle different relative URL types */
1231
15.4k
  switch(relurl[0]) {
1232
1.14k
  case '/':
1233
1.14k
    if(relurl[1] == '/') {
1234
      /* protocol-relative URL: //example.com/path */
1235
126
      cutoff = protsep;
1236
126
      useurl = &relurl[2];
1237
126
      host_changed = TRUE;
1238
126
    }
1239
1.01k
    else
1240
      /* absolute /path */
1241
1.01k
      cutoff = strchr(protsep, '/');
1242
1.14k
    break;
1243
1244
571
  case '#':
1245
    /* fragment-only change */
1246
571
    if(u->fragment)
1247
392
      cutoff = strchr(protsep, '#');
1248
571
    break;
1249
1250
13.7k
  default:
1251
    /* path or query-only change */
1252
13.7k
    if(u->query && u->query[0])
1253
      /* remove existing query */
1254
6.69k
      cutoff = strchr(protsep, '?');
1255
7.01k
    else if(u->fragment && u->fragment[0])
1256
      /* Remove existing fragment */
1257
708
      cutoff = strchr(protsep, '#');
1258
1259
13.7k
    if(relurl[0] != '?') {
1260
      /* append a relative path after the last slash */
1261
11.2k
      cutoff = memrchr(protsep, '/',
1262
11.2k
                       cutoff ? (size_t)(cutoff - protsep) : strlen(protsep));
1263
11.2k
      if(cutoff)
1264
11.2k
        cutoff++; /* truncate after last slash */
1265
11.2k
    }
1266
13.7k
    break;
1267
15.4k
  }
1268
1269
15.4k
  prelen = cutoff ? (size_t)(cutoff - base) : strlen(base);
1270
1271
  /* build new URL */
1272
15.4k
  curlx_dyn_init(&urlbuf, CURL_MAX_INPUT_LENGTH);
1273
1274
15.4k
  if(!curlx_dyn_addn(&urlbuf, base, prelen) &&
1275
15.4k
     !urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed, FALSE)) {
1276
15.4k
    uc = parseurl_and_replace(curlx_dyn_ptr(&urlbuf), u,
1277
15.4k
                              flags & ~CURLU_PATH_AS_IS);
1278
15.4k
  }
1279
0
  else
1280
0
    uc = CURLUE_OUT_OF_MEMORY;
1281
1282
15.4k
  curlx_dyn_free(&urlbuf);
1283
15.4k
  return uc;
1284
15.4k
}
1285
1286
/*
1287
 */
1288
CURLU *curl_url(void)
1289
227k
{
1290
227k
  return curlx_calloc(1, sizeof(struct Curl_URL));
1291
227k
}
1292
1293
void curl_url_cleanup(CURLU *u)
1294
654k
{
1295
654k
  if(u) {
1296
228k
    free_urlhandle(u);
1297
228k
    curlx_free(u);
1298
228k
  }
1299
654k
}
1300
1301
#define DUP(dest, src, name)                    \
1302
10.4k
  do {                                          \
1303
10.4k
    if(src->name) {                             \
1304
3.20k
      dest->name = curlx_strdup(src->name);     \
1305
3.20k
      if(!dest->name)                           \
1306
3.20k
        goto fail;                              \
1307
3.20k
    }                                           \
1308
10.4k
  } while(0)
1309
1310
CURLU *curl_url_dup(const CURLU *in)
1311
1.04k
{
1312
1.04k
  struct Curl_URL *u = curlx_calloc(1, sizeof(struct Curl_URL));
1313
1.04k
  if(u) {
1314
1.04k
    DUP(u, in, scheme);
1315
1.04k
    DUP(u, in, user);
1316
1.04k
    DUP(u, in, password);
1317
1.04k
    DUP(u, in, options);
1318
1.04k
    DUP(u, in, host);
1319
1.04k
    DUP(u, in, port);
1320
1.04k
    DUP(u, in, path);
1321
1.04k
    DUP(u, in, query);
1322
1.04k
    DUP(u, in, fragment);
1323
1.04k
    DUP(u, in, zoneid);
1324
1.04k
    u->portnum = in->portnum;
1325
1.04k
    u->fragment_present = in->fragment_present;
1326
1.04k
    u->query_present = in->query_present;
1327
1.04k
  }
1328
1.04k
  return u;
1329
0
fail:
1330
0
  curl_url_cleanup(u);
1331
0
  return NULL;
1332
1.04k
}
1333
1334
#ifndef USE_IDN
1335
#define host_decode(x, y) CURLUE_LACKS_IDN
1336
#define host_encode(x, y) CURLUE_LACKS_IDN
1337
#else
1338
static CURLUcode host_decode(const char *host, char **allochost)
1339
0
{
1340
0
  CURLcode result = Curl_idn_decode(host, allochost);
1341
0
  if(result)
1342
0
    return (result == CURLE_OUT_OF_MEMORY) ?
1343
0
      CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1344
0
  return CURLUE_OK;
1345
0
}
1346
1347
static CURLUcode host_encode(const char *host, char **allochost)
1348
0
{
1349
0
  CURLcode result = Curl_idn_encode(host, allochost);
1350
0
  if(result)
1351
0
    return (result == CURLE_OUT_OF_MEMORY) ?
1352
0
      CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1353
0
  return CURLUE_OK;
1354
0
}
1355
#endif
1356
1357
static CURLUcode urlget_format(const CURLU *u, CURLUPart what,
1358
                               const char *ptr, char **partp,
1359
                               bool plusdecode, unsigned int flags)
1360
832k
{
1361
832k
  CURLUcode uc = CURLUE_OK;
1362
832k
  size_t partlen = strlen(ptr);
1363
832k
  bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0;
1364
832k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1365
832k
  bool punycode = (flags & CURLU_PUNYCODE) && (what == CURLUPART_HOST);
1366
832k
  bool depunyfy = (flags & CURLU_PUNY2IDN) && (what == CURLUPART_HOST);
1367
832k
  char *part = Curl_memdup0(ptr, partlen);
1368
832k
  *partp = NULL;
1369
832k
  if(!part)
1370
0
    return CURLUE_OUT_OF_MEMORY;
1371
832k
  if(plusdecode) {
1372
    /* convert + to space */
1373
214
    char *plus = part;
1374
214
    size_t i = 0;
1375
5.93k
    for(i = 0; i < partlen; ++plus, i++) {
1376
5.72k
      if(*plus == '+')
1377
218
        *plus = ' ';
1378
5.72k
    }
1379
214
  }
1380
832k
  if(urldecode) {
1381
82.0k
    char *decoded;
1382
82.0k
    size_t dlen;
1383
    /* this unconditional rejection of control bytes is documented
1384
       API behavior */
1385
82.0k
    CURLcode res = Curl_urldecode(part, partlen, &decoded, &dlen, REJECT_CTRL);
1386
82.0k
    curlx_free(part);
1387
82.0k
    if(res)
1388
36
      return CURLUE_URLDECODE;
1389
81.9k
    part = decoded;
1390
81.9k
    partlen = dlen;
1391
81.9k
  }
1392
832k
  if(urlencode) {
1393
148k
    struct dynbuf enc;
1394
148k
    curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1395
148k
    uc = urlencode_str(&enc, part, partlen, TRUE, what == CURLUPART_QUERY);
1396
148k
    curlx_free(part);
1397
148k
    if(uc)
1398
0
      return uc;
1399
148k
    part = curlx_dyn_ptr(&enc);
1400
148k
  }
1401
684k
  else if(punycode) {
1402
0
    if(!Curl_is_ASCII_name(u->host)) {
1403
0
      char *punyversion = NULL;
1404
0
      uc = host_decode(part, &punyversion);
1405
0
      curlx_free(part);
1406
0
      if(uc)
1407
0
        return uc;
1408
0
      part = punyversion;
1409
0
    }
1410
0
  }
1411
684k
  else if(depunyfy) {
1412
0
    if(Curl_is_ASCII_name(u->host)) {
1413
0
      char *unpunified = NULL;
1414
0
      uc = host_encode(part, &unpunified);
1415
0
      curlx_free(part);
1416
0
      if(uc)
1417
0
        return uc;
1418
0
      part = unpunified;
1419
0
    }
1420
0
  }
1421
832k
  *partp = part;
1422
832k
  return CURLUE_OK;
1423
832k
}
1424
1425
static CURLUcode urlget_url(const CURLU *u, char **part, unsigned int flags)
1426
318k
{
1427
318k
  char *url;
1428
318k
  const char *scheme;
1429
318k
  char *options = u->options;
1430
318k
  char *port = u->port;
1431
318k
  char *allochost = NULL;
1432
318k
  bool show_fragment =
1433
318k
    u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
1434
318k
  bool show_query = (u->query && u->query[0]) ||
1435
282k
    (u->query_present && flags & CURLU_GET_EMPTY);
1436
318k
  bool punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
1437
318k
  bool depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
1438
318k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1439
318k
  char portbuf[7];
1440
318k
  if(u->scheme && curl_strequal("file", u->scheme)) {
1441
2.14k
    url = curl_maprintf("file://%s%s%s%s%s",
1442
2.14k
                        u->path,
1443
2.14k
                        show_query ? "?": "",
1444
2.14k
                        u->query ? u->query : "",
1445
2.14k
                        show_fragment ? "#": "",
1446
2.14k
                        u->fragment ? u->fragment : "");
1447
2.14k
  }
1448
315k
  else if(!u->host)
1449
118k
    return CURLUE_NO_HOST;
1450
197k
  else {
1451
197k
    const struct Curl_scheme *h = NULL;
1452
197k
    char schemebuf[MAX_SCHEME_LEN + 5];
1453
197k
    if(u->scheme)
1454
197k
      scheme = u->scheme;
1455
0
    else if(flags & CURLU_DEFAULT_SCHEME)
1456
0
      scheme = DEFAULT_SCHEME;
1457
0
    else
1458
0
      return CURLUE_NO_SCHEME;
1459
1460
197k
    h = Curl_get_scheme(scheme);
1461
197k
    if(!port && (flags & CURLU_DEFAULT_PORT)) {
1462
      /* there is no stored port number, but asked to deliver
1463
         a default one for the scheme */
1464
0
      if(h) {
1465
0
        curl_msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1466
0
        port = portbuf;
1467
0
      }
1468
0
    }
1469
197k
    else if(port) {
1470
      /* there is a stored port number, but asked to inhibit if it matches
1471
         the default one for the scheme */
1472
4.30k
      if(h && (h->defport == u->portnum) &&
1473
309
         (flags & CURLU_NO_DEFAULT_PORT))
1474
4
        port = NULL;
1475
4.30k
    }
1476
1477
197k
    if(h && !(h->flags & PROTOPT_URLOPTIONS))
1478
174k
      options = NULL;
1479
1480
197k
    if(u->host[0] == '[') {
1481
1.31k
      if(u->zoneid) {
1482
        /* make it '[ host %25 zoneid ]' */
1483
1.03k
        struct dynbuf enc;
1484
1.03k
        size_t hostlen = strlen(u->host);
1485
1.03k
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1486
1.03k
        if(curlx_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1487
1.03k
                          u->zoneid))
1488
0
          return CURLUE_OUT_OF_MEMORY;
1489
1.03k
        allochost = curlx_dyn_ptr(&enc);
1490
1.03k
      }
1491
1.31k
    }
1492
196k
    else if(urlencode) {
1493
15.0k
      allochost = curl_easy_escape(NULL, u->host, 0);
1494
15.0k
      if(!allochost)
1495
0
        return CURLUE_OUT_OF_MEMORY;
1496
15.0k
    }
1497
181k
    else if(punycode) {
1498
0
      if(!Curl_is_ASCII_name(u->host)) {
1499
0
        CURLUcode ret = host_decode(u->host, &allochost);
1500
0
        if(ret)
1501
0
          return ret;
1502
0
      }
1503
0
    }
1504
181k
    else if(depunyfy) {
1505
0
      if(Curl_is_ASCII_name(u->host)) {
1506
0
        CURLUcode ret = host_encode(u->host, &allochost);
1507
0
        if(ret)
1508
0
          return ret;
1509
0
      }
1510
0
    }
1511
1512
197k
    if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme)
1513
197k
      curl_msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
1514
0
    else
1515
0
      schemebuf[0] = 0;
1516
1517
197k
    url = curl_maprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1518
197k
                        schemebuf,
1519
197k
                        u->user ? u->user : "",
1520
197k
                        u->password ? ":": "",
1521
197k
                        u->password ? u->password : "",
1522
197k
                        options ? ";" : "",
1523
197k
                        options ? options : "",
1524
197k
                        (u->user || u->password || options) ? "@": "",
1525
197k
                        allochost ? allochost : u->host,
1526
197k
                        port ? ":": "",
1527
197k
                        port ? port : "",
1528
197k
                        u->path ? u->path : "/",
1529
197k
                        show_query ? "?": "",
1530
197k
                        u->query ? u->query : "",
1531
197k
                        show_fragment ? "#": "",
1532
197k
                        u->fragment ? u->fragment : "");
1533
197k
    curlx_free(allochost);
1534
197k
  }
1535
200k
  if(!url)
1536
0
    return CURLUE_OUT_OF_MEMORY;
1537
200k
  *part = url;
1538
200k
  return CURLUE_OK;
1539
200k
}
1540
1541
CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1542
                       char **part, unsigned int flags)
1543
1.82M
{
1544
1.82M
  const char *ptr;
1545
1.82M
  CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1546
1.82M
  char portbuf[7];
1547
1.82M
  bool plusdecode = FALSE;
1548
1.82M
  if(!u)
1549
0
    return CURLUE_BAD_HANDLE;
1550
1.82M
  if(!part)
1551
0
    return CURLUE_BAD_PARTPOINTER;
1552
1.82M
  *part = NULL;
1553
1554
1.82M
  switch(what) {
1555
230k
  case CURLUPART_SCHEME:
1556
230k
    ptr = u->scheme;
1557
230k
    ifmissing = CURLUE_NO_SCHEME;
1558
230k
    flags &= ~CURLU_URLDECODE; /* never for schemes */
1559
230k
    if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme)
1560
0
      return CURLUE_NO_SCHEME;
1561
230k
    break;
1562
230k
  case CURLUPART_USER:
1563
197k
    ptr = u->user;
1564
197k
    ifmissing = CURLUE_NO_USER;
1565
197k
    break;
1566
198k
  case CURLUPART_PASSWORD:
1567
198k
    ptr = u->password;
1568
198k
    ifmissing = CURLUE_NO_PASSWORD;
1569
198k
    break;
1570
148k
  case CURLUPART_OPTIONS:
1571
148k
    ptr = u->options;
1572
148k
    ifmissing = CURLUE_NO_OPTIONS;
1573
148k
    break;
1574
212k
  case CURLUPART_HOST:
1575
212k
    ptr = u->host;
1576
212k
    ifmissing = CURLUE_NO_HOST;
1577
212k
    break;
1578
1.47k
  case CURLUPART_ZONEID:
1579
1.47k
    ptr = u->zoneid;
1580
1.47k
    ifmissing = CURLUE_NO_ZONEID;
1581
1.47k
    break;
1582
220k
  case CURLUPART_PORT:
1583
220k
    ptr = u->port;
1584
220k
    ifmissing = CURLUE_NO_PORT;
1585
220k
    flags &= ~CURLU_URLDECODE; /* never for port */
1586
220k
    if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1587
      /* there is no stored port number, but asked to deliver
1588
         a default one for the scheme */
1589
163k
      const struct Curl_scheme *h = Curl_get_scheme(u->scheme);
1590
163k
      if(h) {
1591
163k
        curl_msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1592
163k
        ptr = portbuf;
1593
163k
      }
1594
163k
    }
1595
57.1k
    else if(ptr && u->scheme) {
1596
      /* there is a stored port number, but ask to inhibit if
1597
         it matches the default one for the scheme */
1598
3.96k
      const struct Curl_scheme *h = Curl_get_scheme(u->scheme);
1599
3.96k
      if(h && (h->defport == u->portnum) &&
1600
181
         (flags & CURLU_NO_DEFAULT_PORT))
1601
0
        ptr = NULL;
1602
3.96k
    }
1603
220k
    break;
1604
148k
  case CURLUPART_PATH:
1605
148k
    ptr = u->path;
1606
148k
    if(!ptr)
1607
111k
      ptr = "/";
1608
148k
    break;
1609
149k
  case CURLUPART_QUERY:
1610
149k
    ptr = u->query;
1611
149k
    ifmissing = CURLUE_NO_QUERY;
1612
149k
    plusdecode = flags & CURLU_URLDECODE;
1613
149k
    if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
1614
      /* there was a blank query and the user do not ask for it */
1615
2.48k
      ptr = NULL;
1616
149k
    break;
1617
0
  case CURLUPART_FRAGMENT:
1618
0
    ptr = u->fragment;
1619
0
    ifmissing = CURLUE_NO_FRAGMENT;
1620
0
    if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
1621
      /* there was a blank fragment and the user asks for it */
1622
0
      ptr = "";
1623
0
    break;
1624
318k
  case CURLUPART_URL:
1625
318k
    return urlget_url(u, part, flags);
1626
0
  default:
1627
0
    ptr = NULL;
1628
0
    break;
1629
1.82M
  }
1630
1.50M
  if(ptr)
1631
832k
    return urlget_format(u, what, ptr, part, plusdecode, flags);
1632
1633
673k
  return ifmissing;
1634
1.50M
}
1635
1636
static CURLUcode set_url_scheme(CURLU *u, const char *scheme,
1637
                                unsigned int flags)
1638
1
{
1639
1
  size_t plen = strlen(scheme);
1640
1
  const struct Curl_scheme *h = NULL;
1641
1
  if((plen > MAX_SCHEME_LEN) || (plen < 1))
1642
    /* too long or too short */
1643
0
    return CURLUE_BAD_SCHEME;
1644
  /* verify that it is a fine scheme */
1645
1
  h = Curl_get_scheme(scheme);
1646
1
  if(!(flags & CURLU_NON_SUPPORT_SCHEME) && (!h || !h->run))
1647
0
    return CURLUE_UNSUPPORTED_SCHEME;
1648
1
  if(!h) {
1649
0
    const char *s = scheme;
1650
0
    if(ISALPHA(*s)) {
1651
      /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1652
0
      while(--plen) {
1653
0
        if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1654
0
          s++; /* fine */
1655
0
        else
1656
0
          return CURLUE_BAD_SCHEME;
1657
0
      }
1658
0
    }
1659
0
    else
1660
0
      return CURLUE_BAD_SCHEME;
1661
0
  }
1662
1
  u->guessed_scheme = FALSE;
1663
1
  return CURLUE_OK;
1664
1
}
1665
1666
static CURLUcode set_url_port(CURLU *u, const char *provided_port)
1667
701
{
1668
701
  char *tmp;
1669
701
  curl_off_t port;
1670
701
  if(!ISDIGIT(provided_port[0]))
1671
    /* not a number */
1672
0
    return CURLUE_BAD_PORT_NUMBER;
1673
701
  if(curlx_str_number(&provided_port, &port, 0xffff) || *provided_port)
1674
    /* weirdly provided number, not good! */
1675
0
    return CURLUE_BAD_PORT_NUMBER;
1676
701
  tmp = curl_maprintf("%" CURL_FORMAT_CURL_OFF_T, port);
1677
701
  if(!tmp)
1678
0
    return CURLUE_OUT_OF_MEMORY;
1679
701
  curlx_free(u->port);
1680
701
  u->port = tmp;
1681
701
  u->portnum = (unsigned short)port;
1682
701
  return CURLUE_OK;
1683
701
}
1684
1685
static CURLUcode set_url(CURLU *u, const char *url, size_t part_size,
1686
                         unsigned int flags)
1687
249k
{
1688
  /*
1689
   * Allow a new URL to replace the existing (if any) contents.
1690
   *
1691
   * If the existing contents is enough for a URL, allow a relative URL to
1692
   * replace it.
1693
   */
1694
249k
  CURLUcode uc;
1695
249k
  char *oldurl = NULL;
1696
1697
249k
  if(!part_size) {
1698
    /* a blank URL is not a valid URL unless we already have a complete one
1699
       and this is a redirect */
1700
3.35k
    uc = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1701
3.35k
    if(!uc) {
1702
      /* success, meaning the "" is a fine relative URL, but nothing
1703
         changes */
1704
0
      curlx_free(oldurl);
1705
0
      return CURLUE_OK;
1706
0
    }
1707
3.35k
    if(uc == CURLUE_OUT_OF_MEMORY)
1708
0
      return uc;
1709
3.35k
    return CURLUE_MALFORMED_INPUT;
1710
3.35k
  }
1711
1712
  /* if the new URL is absolute replace the existing with the new. */
1713
246k
  if(Curl_is_absolute_url(url, NULL, 0,
1714
246k
                          flags & (CURLU_GUESS_SCHEME | CURLU_DEFAULT_SCHEME)))
1715
116k
    return parseurl_and_replace(url, u, flags);
1716
1717
  /* if the old URL is incomplete (we cannot get an absolute URL in
1718
     'oldurl'), replace the existing with the new */
1719
130k
  uc = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1720
130k
  if(uc == CURLUE_OUT_OF_MEMORY)
1721
0
    return uc;
1722
130k
  else if(uc)
1723
114k
    return parseurl_and_replace(url, u, flags);
1724
1725
15.4k
  DEBUGASSERT(oldurl); /* it is set here */
1726
  /* apply the relative part to create a new URL */
1727
15.4k
  uc = redirect_url(oldurl, url, u, flags);
1728
15.4k
  curlx_free(oldurl);
1729
15.4k
  return uc;
1730
15.4k
}
1731
1732
static CURLUcode urlset_clear(CURLU *u, CURLUPart what)
1733
10.9k
{
1734
10.9k
  switch(what) {
1735
0
  case CURLUPART_URL:
1736
0
    free_urlhandle(u);
1737
0
    memset(u, 0, sizeof(struct Curl_URL));
1738
0
    break;
1739
0
  case CURLUPART_SCHEME:
1740
0
    Curl_safefree(u->scheme);
1741
0
    u->guessed_scheme = FALSE;
1742
0
    break;
1743
3.51k
  case CURLUPART_USER:
1744
3.51k
    Curl_safefree(u->user);
1745
3.51k
    break;
1746
3.51k
  case CURLUPART_PASSWORD:
1747
3.51k
    Curl_safefree(u->password);
1748
3.51k
    break;
1749
0
  case CURLUPART_OPTIONS:
1750
0
    Curl_safefree(u->options);
1751
0
    break;
1752
0
  case CURLUPART_HOST:
1753
0
    Curl_safefree(u->host);
1754
0
    break;
1755
0
  case CURLUPART_ZONEID:
1756
0
    Curl_safefree(u->zoneid);
1757
0
    break;
1758
0
  case CURLUPART_PORT:
1759
0
    u->portnum = 0;
1760
0
    Curl_safefree(u->port);
1761
0
    break;
1762
0
  case CURLUPART_PATH:
1763
0
    Curl_safefree(u->path);
1764
0
    break;
1765
0
  case CURLUPART_QUERY:
1766
0
    Curl_safefree(u->query);
1767
0
    u->query_present = FALSE;
1768
0
    break;
1769
3.91k
  case CURLUPART_FRAGMENT:
1770
3.91k
    Curl_safefree(u->fragment);
1771
3.91k
    u->fragment_present = FALSE;
1772
3.91k
    break;
1773
0
  default:
1774
0
    return CURLUE_UNKNOWN_PART;
1775
10.9k
  }
1776
10.9k
  return CURLUE_OK;
1777
10.9k
}
1778
1779
static bool allowed_in_path(unsigned char x)
1780
0
{
1781
0
  switch(x) {
1782
0
  case '!':
1783
0
  case '$':
1784
0
  case '&':
1785
0
  case '\'':
1786
0
  case '(':
1787
0
  case ')':
1788
0
  case '{':
1789
0
  case '}':
1790
0
  case '[':
1791
0
  case ']':
1792
0
  case '*':
1793
0
  case '+':
1794
0
  case ',':
1795
0
  case ';':
1796
0
  case '=':
1797
0
  case ':':
1798
0
  case '@':
1799
0
  case '/':
1800
0
    return TRUE;
1801
0
  }
1802
0
  return FALSE;
1803
0
}
1804
1805
CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1806
                       const char *part, unsigned int flags)
1807
300k
{
1808
300k
  char **storep = NULL;
1809
300k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1810
300k
  bool plusencode = FALSE;
1811
300k
  bool pathmode = FALSE;
1812
300k
  bool leadingslash = FALSE;
1813
300k
  bool appendquery = FALSE;
1814
300k
  bool equalsencode = FALSE;
1815
300k
  size_t nalloc;
1816
1817
300k
  if(!u)
1818
0
    return CURLUE_BAD_HANDLE;
1819
300k
  if(!part)
1820
    /* setting a part to NULL clears it */
1821
10.9k
    return urlset_clear(u, what);
1822
1823
289k
  nalloc = strlen(part);
1824
289k
  if(nalloc > CURL_MAX_INPUT_LENGTH)
1825
    /* excessive input length */
1826
0
    return CURLUE_MALFORMED_INPUT;
1827
1828
289k
  switch(what) {
1829
1
  case CURLUPART_SCHEME: {
1830
1
    CURLUcode status = set_url_scheme(u, part, flags);
1831
1
    if(status)
1832
0
      return status;
1833
1
    storep = &u->scheme;
1834
1
    urlencode = FALSE; /* never */
1835
1
    break;
1836
1
  }
1837
28.2k
  case CURLUPART_USER:
1838
28.2k
    storep = &u->user;
1839
28.2k
    break;
1840
10.3k
  case CURLUPART_PASSWORD:
1841
10.3k
    storep = &u->password;
1842
10.3k
    break;
1843
0
  case CURLUPART_OPTIONS:
1844
0
    storep = &u->options;
1845
0
    break;
1846
0
  case CURLUPART_HOST:
1847
0
    storep = &u->host;
1848
0
    Curl_safefree(u->zoneid);
1849
0
    break;
1850
0
  case CURLUPART_ZONEID:
1851
0
    storep = &u->zoneid;
1852
0
    break;
1853
701
  case CURLUPART_PORT:
1854
701
    return set_url_port(u, part);
1855
0
  case CURLUPART_PATH:
1856
0
    pathmode = TRUE;
1857
0
    leadingslash = TRUE; /* enforce */
1858
0
    storep = &u->path;
1859
0
    break;
1860
0
  case CURLUPART_QUERY:
1861
0
    plusencode = urlencode;
1862
0
    appendquery = (flags & CURLU_APPENDQUERY) ? 1 : 0;
1863
0
    equalsencode = appendquery;
1864
0
    storep = &u->query;
1865
0
    u->query_present = TRUE;
1866
0
    break;
1867
0
  case CURLUPART_FRAGMENT:
1868
0
    storep = &u->fragment;
1869
0
    u->fragment_present = TRUE;
1870
0
    break;
1871
249k
  case CURLUPART_URL:
1872
249k
    return set_url(u, part, nalloc, flags);
1873
0
  default:
1874
0
    return CURLUE_UNKNOWN_PART;
1875
289k
  }
1876
38.5k
  DEBUGASSERT(storep);
1877
38.5k
  {
1878
38.5k
    const char *newp;
1879
38.5k
    struct dynbuf enc;
1880
38.5k
    curlx_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1881
1882
38.5k
    if(leadingslash && (part[0] != '/')) {
1883
0
      CURLcode result = curlx_dyn_addn(&enc, "/", 1);
1884
0
      if(result)
1885
0
        return cc2cu(result);
1886
0
    }
1887
38.5k
    if(urlencode) {
1888
38.5k
      const unsigned char *i;
1889
1890
40.3M
      for(i = (const unsigned char *)part; *i; i++) {
1891
40.3M
        CURLcode result;
1892
40.3M
        if((*i == ' ') && plusencode) {
1893
0
          result = curlx_dyn_addn(&enc, "+", 1);
1894
0
          if(result)
1895
0
            return CURLUE_OUT_OF_MEMORY;
1896
0
        }
1897
40.3M
        else if(ISUNRESERVED(*i) ||
1898
35.7M
                (pathmode && allowed_in_path(*i)) ||
1899
35.7M
                ((*i == '=') && equalsencode)) {
1900
4.57M
          if((*i == '=') && equalsencode)
1901
            /* only skip the first equals sign */
1902
0
            equalsencode = FALSE;
1903
4.57M
          result = curlx_dyn_addn(&enc, i, 1);
1904
4.57M
          if(result)
1905
0
            return cc2cu(result);
1906
4.57M
        }
1907
35.7M
        else {
1908
35.7M
          unsigned char out[3] = { '%' };
1909
35.7M
          Curl_hexbyte(&out[1], *i);
1910
35.7M
          result = curlx_dyn_addn(&enc, out, 3);
1911
35.7M
          if(result)
1912
0
            return cc2cu(result);
1913
35.7M
        }
1914
40.3M
      }
1915
38.5k
    }
1916
1
    else {
1917
1
      char *p;
1918
1
      CURLcode result = curlx_dyn_add(&enc, part);
1919
1
      if(result)
1920
0
        return cc2cu(result);
1921
1
      p = curlx_dyn_ptr(&enc);
1922
6
      while(*p) {
1923
        /* make sure percent encoded are lower case */
1924
5
        if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1925
0
           (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1926
0
          p[1] = Curl_raw_tolower(p[1]);
1927
0
          p[2] = Curl_raw_tolower(p[2]);
1928
0
          p += 3;
1929
0
        }
1930
5
        else
1931
5
          p++;
1932
5
      }
1933
1
    }
1934
38.5k
    newp = curlx_dyn_ptr(&enc);
1935
1936
38.5k
    if(appendquery && newp) {
1937
      /* Append the 'newp' string onto the old query. Add a '&' separator if
1938
         none is present at the end of the existing query already */
1939
1940
0
      size_t querylen = u->query ? strlen(u->query) : 0;
1941
0
      bool addamperand = querylen && (u->query[querylen - 1] != '&');
1942
0
      if(querylen) {
1943
0
        struct dynbuf qbuf;
1944
0
        curlx_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1945
1946
0
        if(curlx_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1947
0
          goto nomem;
1948
1949
0
        if(addamperand) {
1950
0
          if(curlx_dyn_addn(&qbuf, "&", 1))
1951
0
            goto nomem;
1952
0
        }
1953
0
        if(curlx_dyn_add(&qbuf, newp))
1954
0
          goto nomem;
1955
0
        curlx_dyn_free(&enc);
1956
0
        curlx_free(*storep);
1957
0
        *storep = curlx_dyn_ptr(&qbuf);
1958
0
        return CURLUE_OK;
1959
0
nomem:
1960
0
        curlx_dyn_free(&enc);
1961
0
        return CURLUE_OUT_OF_MEMORY;
1962
0
      }
1963
0
    }
1964
1965
38.5k
    else if(what == CURLUPART_HOST) {
1966
0
      size_t n = curlx_dyn_len(&enc);
1967
0
      if(!n && (flags & CURLU_NO_AUTHORITY)) {
1968
        /* Skip hostname check, it is allowed to be empty. */
1969
0
      }
1970
0
      else {
1971
0
        bool bad = FALSE;
1972
0
        if(!n)
1973
0
          bad = TRUE; /* empty hostname is not okay */
1974
0
        else if(!urlencode) {
1975
          /* if the hostname part was not URL encoded here, it was set ready
1976
             URL encoded so we need to decode it to check */
1977
0
          size_t dlen;
1978
0
          char *decoded = NULL;
1979
0
          CURLcode result =
1980
0
            Curl_urldecode(newp, n, &decoded, &dlen, REJECT_CTRL);
1981
0
          if(result || hostname_check(u, decoded, dlen))
1982
0
            bad = TRUE;
1983
0
          curlx_free(decoded);
1984
0
        }
1985
0
        else if(hostname_check(u, (char *)CURL_UNCONST(newp), n))
1986
0
          bad = TRUE;
1987
0
        if(bad) {
1988
0
          curlx_dyn_free(&enc);
1989
0
          return CURLUE_BAD_HOSTNAME;
1990
0
        }
1991
0
      }
1992
0
    }
1993
1994
38.5k
    curlx_free(*storep);
1995
38.5k
    *storep = (char *)CURL_UNCONST(newp);
1996
38.5k
  }
1997
0
  return CURLUE_OK;
1998
38.5k
}