Coverage Report

Created: 2026-01-25 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/curl/lib/urlapi.c
Line
Count
Source
1
/***************************************************************************
2
 *                                  _   _ ____  _
3
 *  Project                     ___| | | |  _ \| |
4
 *                             / __| | | | |_) | |
5
 *                            | (__| |_| |  _ <| |___
6
 *                             \___|\___/|_| \_\_____|
7
 *
8
 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9
 *
10
 * This software is licensed as described in the file COPYING, which
11
 * you should have received as part of this distribution. The terms
12
 * are also available at https://curl.se/docs/copyright.html.
13
 *
14
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15
 * copies of the Software, and permit persons to whom the Software is
16
 * furnished to do so, under the terms of the COPYING file.
17
 *
18
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19
 * KIND, either express or implied.
20
 *
21
 * SPDX-License-Identifier: curl
22
 *
23
 ***************************************************************************/
24
#include "curl_setup.h"
25
26
#include "urldata.h"
27
#include "urlapi-int.h"
28
#include "strcase.h"
29
#include "url.h"
30
#include "escape.h"
31
#include "curlx/inet_pton.h"
32
#include "curlx/inet_ntop.h"
33
#include "strdup.h"
34
#include "idn.h"
35
#include "curlx/strparse.h"
36
#include "curl_memrchr.h"
37
38
#ifdef _WIN32
39
/* MS-DOS/Windows style drive prefix, eg c: in c:foo */
40
#define STARTS_WITH_DRIVE_PREFIX(str)    \
41
  ((('a' <= str[0] && str[0] <= 'z') ||  \
42
    ('A' <= str[0] && str[0] <= 'Z')) && \
43
   (str[1] == ':'))
44
#endif
45
46
/* MS-DOS/Windows style drive prefix, optionally with
47
 * a '|' instead of ':', followed by a slash or NUL */
48
#define STARTS_WITH_URL_DRIVE_PREFIX(str)                  \
49
5.36k
  ((('a' <= (str)[0] && (str)[0] <= 'z') ||                \
50
5.36k
    ('A' <= (str)[0] && (str)[0] <= 'Z')) &&               \
51
5.36k
   ((str)[1] == ':' || (str)[1] == '|') &&                 \
52
5.36k
   ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
53
54
/* scheme is not URL encoded, the longest libcurl supported ones are... */
55
1.46M
#define MAX_SCHEME_LEN 40
56
57
/*
58
 * If USE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
59
 * sure we have _some_ value for AF_INET6 without polluting our fake value
60
 * everywhere.
61
 */
62
#if !defined(USE_IPV6) && !defined(AF_INET6)
63
#define AF_INET6 (AF_INET + 1)
64
#endif
65
66
/* Internal representation of CURLU. Point to URL-encoded strings. */
67
struct Curl_URL {
68
  char *scheme;
69
  char *user;
70
  char *password;
71
  char *options; /* IMAP only? */
72
  char *host;
73
  char *zoneid; /* for numerical IPv6 addresses */
74
  char *port;
75
  char *path;
76
  char *query;
77
  char *fragment;
78
  unsigned short portnum; /* the numerical version (if 'port' is set) */
79
  BIT(query_present);    /* to support blank */
80
  BIT(fragment_present); /* to support blank */
81
  BIT(guessed_scheme);   /* when a URL without scheme is parsed */
82
};
83
84
0
#define DEFAULT_SCHEME "https"
85
86
static void free_urlhandle(struct Curl_URL *u)
87
474k
{
88
474k
  curlx_free(u->scheme);
89
474k
  curlx_free(u->user);
90
474k
  curlx_free(u->password);
91
474k
  curlx_free(u->options);
92
474k
  curlx_free(u->host);
93
474k
  curlx_free(u->zoneid);
94
474k
  curlx_free(u->port);
95
474k
  curlx_free(u->path);
96
474k
  curlx_free(u->query);
97
474k
  curlx_free(u->fragment);
98
474k
}
99
100
/*
101
 * Find the separator at the end of the hostname, or the '?' in cases like
102
 * http://www.example.com?id=2380
103
 */
104
static const char *find_host_sep(const char *url)
105
133
{
106
  /* Find the start of the hostname */
107
133
  const char *sep = strstr(url, "//");
108
133
  if(!sep)
109
115
    sep = url;
110
18
  else
111
18
    sep += 2;
112
113
  /* Find first / or ? */
114
2.04k
  while(*sep && *sep != '/' && *sep != '?')
115
1.91k
    sep++;
116
117
133
  return sep;
118
133
}
119
120
/* convert CURLcode to CURLUcode */
121
0
#define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE :   \
122
0
                  CURLUE_OUT_OF_MEMORY)
123
124
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
125
 * spaces in the source URL accordingly.
126
 *
127
 * URL encoding should be skipped for hostnames, otherwise IDN resolution
128
 * will fail.
129
 */
130
static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
131
                               size_t len, bool relative,
132
                               bool query)
133
194k
{
134
  /* we must add this with whitespace-replacing */
135
194k
  bool left = !query;
136
194k
  const unsigned char *iptr;
137
194k
  const unsigned char *host_sep = (const unsigned char *)url;
138
194k
  CURLcode result = CURLE_OK;
139
140
194k
  if(!relative) {
141
133
    size_t n;
142
133
    host_sep = (const unsigned char *)find_host_sep(url);
143
144
    /* output the first piece as-is */
145
133
    n = (const char *)host_sep - url;
146
133
    result = curlx_dyn_addn(o, url, n);
147
133
    len -= n;
148
133
  }
149
150
74.4M
  for(iptr = host_sep; len && !result; iptr++, len--) {
151
74.2M
    if(*iptr == ' ') {
152
33.3k
      if(left)
153
18.9k
        result = curlx_dyn_addn(o, "%20", 3);
154
14.4k
      else
155
14.4k
        result = curlx_dyn_addn(o, "+", 1);
156
33.3k
    }
157
74.2M
    else if((*iptr < ' ') || (*iptr >= 0x7f)) {
158
36.9M
      unsigned char out[3] = { '%' };
159
36.9M
      Curl_hexbyte(&out[1], *iptr);
160
36.9M
      result = curlx_dyn_addn(o, out, 3);
161
36.9M
    }
162
37.2M
    else {
163
37.2M
      result = curlx_dyn_addn(o, iptr, 1);
164
37.2M
      if(*iptr == '?')
165
22.4k
        left = FALSE;
166
37.2M
    }
167
74.2M
  }
168
169
194k
  if(result)
170
0
    return cc2cu(result);
171
194k
  return CURLUE_OK;
172
194k
}
173
174
/*
175
 * Returns the length of the scheme if the given URL is absolute (as opposed
176
 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
177
 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
178
 *
179
 * If 'guess_scheme' is TRUE, it means the URL might be provided without
180
 * scheme.
181
 */
182
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
183
                            bool guess_scheme)
184
528k
{
185
528k
  size_t i = 0;
186
528k
  DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
187
528k
  (void)buflen; /* only used in debug-builds */
188
528k
  if(buf)
189
246k
    buf[0] = 0; /* always leave a defined value in buf */
190
#ifdef _WIN32
191
  if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
192
    return 0;
193
#endif
194
528k
  if(ISALPHA(url[0]))
195
1.46M
    for(i = 1; i < MAX_SCHEME_LEN; ++i) {
196
1.46M
      char s = url[i];
197
1.46M
      if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.'))) {
198
        /* RFC 3986 3.1 explains:
199
           scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
200
        */
201
1.10M
      }
202
363k
      else {
203
363k
        break;
204
363k
      }
205
1.46M
    }
206
528k
  if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
207
    /* If this does not guess scheme, the scheme always ends with the colon so
208
       that this also detects data: URLs etc. In guessing mode, data: could
209
       be the hostname "data" with a specified port number. */
210
211
    /* the length of the scheme is the name part only */
212
260k
    size_t len = i;
213
260k
    if(buf) {
214
133k
      Curl_strntolower(buf, url, i);
215
133k
      buf[i] = 0;
216
133k
    }
217
260k
    return len;
218
260k
  }
219
267k
  return 0;
220
528k
}
221
222
/* scan for byte values <= 31, 127 and sometimes space */
223
CURLUcode Curl_junkscan(const char *url, size_t *urllen, bool allowspace)
224
247k
{
225
247k
  size_t n = strlen(url);
226
247k
  size_t i;
227
247k
  unsigned char control;
228
247k
  const unsigned char *p = (const unsigned char *)url;
229
247k
  if(n > CURL_MAX_INPUT_LENGTH)
230
0
    return CURLUE_MALFORMED_INPUT;
231
232
247k
  control = allowspace ? 0x1f : 0x20;
233
378M
  for(i = 0; i < n; i++) {
234
377M
    if(p[i] <= control || p[i] == 127)
235
466
      return CURLUE_MALFORMED_INPUT;
236
377M
  }
237
246k
  *urllen = n;
238
246k
  return CURLUE_OK;
239
247k
}
240
241
/*
242
 * parse_hostname_login()
243
 *
244
 * Parse the login details (username, password and options) from the URL and
245
 * strip them out of the hostname
246
 *
247
 */
248
static CURLUcode parse_hostname_login(struct Curl_URL *u,
249
                                      const char *login,
250
                                      size_t len,
251
                                      unsigned int flags,
252
                                      size_t *offset) /* to the hostname */
253
242k
{
254
242k
  CURLUcode result = CURLUE_OK;
255
242k
  CURLcode ccode;
256
242k
  char *userp = NULL;
257
242k
  char *passwdp = NULL;
258
242k
  char *optionsp = NULL;
259
242k
  const struct Curl_scheme *h = NULL;
260
261
  /* At this point, we assume all the other special cases have been taken
262
   * care of, so the host is at most
263
   *
264
   *   [user[:password][;options]]@]hostname
265
   *
266
   * We need somewhere to put the embedded details, so do that first.
267
   */
268
242k
  char *ptr;
269
270
242k
  DEBUGASSERT(login);
271
272
242k
  *offset = 0;
273
242k
  ptr = memchr(login, '@', len);
274
242k
  if(!ptr)
275
184k
    goto out;
276
277
  /* We will now try to extract the
278
   * possible login information in a string like:
279
   * ftp://user:password@ftp.site.example:8021/README */
280
58.0k
  ptr++;
281
282
  /* if this is a known scheme, get some details */
283
58.0k
  if(u->scheme)
284
29.9k
    h = Curl_get_scheme(u->scheme);
285
286
  /* We could use the login information in the URL so extract it. Only parse
287
     options if the handler says we should. Note that 'h' might be NULL! */
288
58.0k
  ccode = Curl_parse_login_details(login, ptr - login - 1,
289
58.0k
                                   &userp, &passwdp,
290
58.0k
                                   (h && (h->flags & PROTOPT_URLOPTIONS)) ?
291
58.0k
                                   &optionsp : NULL);
292
58.0k
  if(ccode) {
293
    /* the only possible error from Curl_parse_login_details is out of
294
       memory: */
295
0
    result = CURLUE_OUT_OF_MEMORY;
296
0
    goto out;
297
0
  }
298
299
58.0k
  if(userp) {
300
58.0k
    if(flags & CURLU_DISALLOW_USER) {
301
      /* Option DISALLOW_USER is set and URL contains username. */
302
14
      result = CURLUE_USER_NOT_ALLOWED;
303
14
      goto out;
304
14
    }
305
58.0k
    curlx_free(u->user);
306
58.0k
    u->user = userp;
307
58.0k
  }
308
309
58.0k
  if(passwdp) {
310
19.0k
    curlx_free(u->password);
311
19.0k
    u->password = passwdp;
312
19.0k
  }
313
314
58.0k
  if(optionsp) {
315
220
    curlx_free(u->options);
316
220
    u->options = optionsp;
317
220
  }
318
319
  /* the hostname starts at this offset */
320
58.0k
  *offset = ptr - login;
321
58.0k
  return CURLUE_OK;
322
323
184k
out:
324
325
184k
  curlx_free(userp);
326
184k
  curlx_free(passwdp);
327
184k
  curlx_free(optionsp);
328
184k
  u->user = NULL;
329
184k
  u->password = NULL;
330
184k
  u->options = NULL;
331
332
184k
  return result;
333
58.0k
}
334
335
UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
336
                                   bool has_scheme)
337
242k
{
338
242k
  const char *portptr;
339
242k
  char *hostname = curlx_dyn_ptr(host);
340
  /*
341
   * Find the end of an IPv6 address on the ']' ending bracket.
342
   */
343
242k
  if(hostname[0] == '[') {
344
2.31k
    portptr = strchr(hostname, ']');
345
2.31k
    if(!portptr)
346
61
      return CURLUE_BAD_IPV6;
347
2.25k
    portptr++;
348
    /* this is a RFC2732-style specified IP-address */
349
2.25k
    if(*portptr) {
350
259
      if(*portptr != ':')
351
112
        return CURLUE_BAD_PORT_NUMBER;
352
259
    }
353
1.99k
    else
354
1.99k
      portptr = NULL;
355
2.25k
  }
356
240k
  else
357
240k
    portptr = strchr(hostname, ':');
358
359
242k
  if(portptr) {
360
8.22k
    curl_off_t port;
361
8.22k
    size_t keep = portptr - hostname;
362
363
    /* Browser behavior adaptation. If there is a colon with no digits after,
364
       just cut off the name there which makes us ignore the colon and just
365
       use the default port. Firefox, Chrome and Safari all do that.
366
367
       Do not do it if the URL has no scheme, to make something that looks like
368
       a scheme not work!
369
    */
370
8.22k
    curlx_dyn_setlen(host, keep);
371
8.22k
    portptr++;
372
8.22k
    if(!*portptr)
373
2.56k
      return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
374
375
5.65k
    if(curlx_str_number(&portptr, &port, 0xffff) || *portptr)
376
488
      return CURLUE_BAD_PORT_NUMBER;
377
378
5.17k
    u->portnum = (unsigned short)port;
379
    /* generate a new port number string to get rid of leading zeroes etc */
380
5.17k
    curlx_free(u->port);
381
5.17k
    u->port = curl_maprintf("%" CURL_FORMAT_CURL_OFF_T, port);
382
5.17k
    if(!u->port)
383
0
      return CURLUE_OUT_OF_MEMORY;
384
5.17k
  }
385
386
239k
  return CURLUE_OK;
387
242k
}
388
389
/* this assumes 'hostname' now starts with [ */
390
static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
391
                            size_t hlen) /* length of hostname */
392
2.66k
{
393
2.66k
  size_t len;
394
2.66k
  DEBUGASSERT(*hostname == '[');
395
2.66k
  if(hlen < 4) /* '[::]' is the shortest possible valid string */
396
75
    return CURLUE_BAD_IPV6;
397
2.59k
  hostname++;
398
2.59k
  hlen -= 2;
399
400
  /* only valid IPv6 letters are ok */
401
2.59k
  len = strspn(hostname, "0123456789abcdefABCDEF:.");
402
403
2.59k
  if(hlen != len) {
404
2.20k
    hlen = len;
405
2.20k
    if(hostname[len] == '%') {
406
      /* this could now be '%[zone id]' */
407
2.01k
      char zoneid[16];
408
2.01k
      int i = 0;
409
2.01k
      char *h = &hostname[len + 1];
410
      /* pass '25' if present and is a URL encoded percent sign */
411
2.01k
      if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
412
429
        h += 2;
413
14.7k
      while(*h && (*h != ']') && (i < 15))
414
12.6k
        zoneid[i++] = *h++;
415
2.01k
      if(!i || (']' != *h))
416
472
        return CURLUE_BAD_IPV6;
417
1.54k
      zoneid[i] = 0;
418
1.54k
      u->zoneid = curlx_strdup(zoneid);
419
1.54k
      if(!u->zoneid)
420
0
        return CURLUE_OUT_OF_MEMORY;
421
1.54k
      hostname[len] = ']'; /* insert end bracket */
422
1.54k
      hostname[len + 1] = 0; /* terminate the hostname */
423
1.54k
    }
424
183
    else
425
183
      return CURLUE_BAD_IPV6;
426
    /* hostname is fine */
427
2.20k
  }
428
429
  /* Normalize the IPv6 address */
430
1.93k
  {
431
1.93k
    char dest[16]; /* fits a binary IPv6 address */
432
1.93k
    hostname[hlen] = 0; /* end the address there */
433
1.93k
    if(curlx_inet_pton(AF_INET6, hostname, dest) != 1)
434
347
      return CURLUE_BAD_IPV6;
435
1.59k
    if(curlx_inet_ntop(AF_INET6, dest, hostname, hlen + 1)) {
436
1.52k
      hlen = strlen(hostname); /* might be shorter now */
437
1.52k
      hostname[hlen + 1] = 0;
438
1.52k
    }
439
1.59k
    hostname[hlen] = ']'; /* restore ending bracket */
440
1.59k
  }
441
0
  return CURLUE_OK;
442
1.93k
}
443
444
static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
445
                                size_t hlen) /* length of hostname */
446
139k
{
447
139k
  size_t len;
448
139k
  DEBUGASSERT(hostname);
449
450
139k
  if(!hlen)
451
0
    return CURLUE_NO_HOST;
452
139k
  else if(hostname[0] == '[')
453
545
    return ipv6_parse(u, hostname, hlen);
454
139k
  else {
455
    /* letters from the second string are not ok */
456
139k
    len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
457
139k
    if(hlen != len)
458
      /* hostname with bad content */
459
1.57k
      return CURLUE_BAD_HOSTNAME;
460
139k
  }
461
137k
  return CURLUE_OK;
462
139k
}
463
464
/*
465
 * Handle partial IPv4 numerical addresses and different bases, like
466
 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
467
 *
468
 * If the given input string is syntactically wrong IPv4 or any part for
469
 * example is too big, this function returns HOST_NAME.
470
 *
471
 * Output the "normalized" version of that input string in plain quad decimal
472
 * integers.
473
 *
474
 * Returns the host type.
475
 */
476
477
0
#define HOST_ERROR   -1 /* out of memory */
478
479
279k
#define HOST_NAME    1
480
197k
#define HOST_IPV4    2
481
4.24k
#define HOST_IPV6    3
482
483
static int ipv4_normalize(struct dynbuf *host)
484
240k
{
485
240k
  bool done = FALSE;
486
240k
  int n = 0;
487
240k
  const char *c = curlx_dyn_ptr(host);
488
240k
  unsigned int parts[4] = { 0, 0, 0, 0 };
489
240k
  CURLcode result = CURLE_OK;
490
491
240k
  if(*c == '[')
492
2.12k
    return HOST_IPV6;
493
494
410k
  while(!done) {
495
307k
    int rc;
496
307k
    curl_off_t l;
497
307k
    if(*c == '0') {
498
74.7k
      if(c[1] == 'x') {
499
1.03k
        c += 2; /* skip the prefix */
500
1.03k
        rc = curlx_str_hex(&c, &l, UINT_MAX);
501
1.03k
      }
502
73.7k
      else
503
73.7k
        rc = curlx_str_octal(&c, &l, UINT_MAX);
504
74.7k
    }
505
232k
    else
506
232k
      rc = curlx_str_number(&c, &l, UINT_MAX);
507
508
307k
    if(rc)
509
124k
      return HOST_NAME;
510
511
182k
    parts[n] = (unsigned int)l;
512
513
182k
    switch(*c) {
514
68.8k
    case '.':
515
68.8k
      if(n == 3)
516
155
        return HOST_NAME;
517
68.7k
      n++;
518
68.7k
      c++;
519
68.7k
      break;
520
521
103k
    case '\0':
522
103k
      done = TRUE;
523
103k
      break;
524
525
10.6k
    default:
526
10.6k
      return HOST_NAME;
527
182k
    }
528
182k
  }
529
530
103k
  switch(n) {
531
83.9k
  case 0: /* a -- 32 bits */
532
83.9k
    curlx_dyn_reset(host);
533
534
83.9k
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
535
83.9k
                            (parts[0] >> 24),
536
83.9k
                            ((parts[0] >> 16) & 0xff),
537
83.9k
                            ((parts[0] >> 8) & 0xff),
538
83.9k
                            (parts[0] & 0xff));
539
83.9k
    break;
540
2.10k
  case 1: /* a.b -- 8.24 bits */
541
2.10k
    if((parts[0] > 0xff) || (parts[1] > 0xffffff))
542
909
      return HOST_NAME;
543
1.19k
    curlx_dyn_reset(host);
544
1.19k
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
545
1.19k
                            (parts[0]),
546
1.19k
                            ((parts[1] >> 16) & 0xff),
547
1.19k
                            ((parts[1] >> 8) & 0xff),
548
1.19k
                            (parts[1] & 0xff));
549
1.19k
    break;
550
2.07k
  case 2: /* a.b.c -- 8.8.16 bits */
551
2.07k
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
552
1.58k
      return HOST_NAME;
553
492
    curlx_dyn_reset(host);
554
492
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
555
492
                            (parts[0]),
556
492
                            (parts[1]),
557
492
                            ((parts[2] >> 8) & 0xff),
558
492
                            (parts[2] & 0xff));
559
492
    break;
560
15.3k
  case 3: /* a.b.c.d -- 8.8.8.8 bits */
561
15.3k
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
562
13.5k
       (parts[3] > 0xff))
563
2.44k
      return HOST_NAME;
564
12.9k
    curlx_dyn_reset(host);
565
12.9k
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
566
12.9k
                            (parts[0]),
567
12.9k
                            (parts[1]),
568
12.9k
                            (parts[2]),
569
12.9k
                            (parts[3]));
570
12.9k
    break;
571
103k
  }
572
98.5k
  if(result)
573
0
    return HOST_ERROR;
574
98.5k
  return HOST_IPV4;
575
98.5k
}
576
577
/* if necessary, replace the host content with a URL decoded version */
578
static CURLUcode urldecode_host(struct dynbuf *host)
579
139k
{
580
139k
  char *per = NULL;
581
139k
  const char *hostname = curlx_dyn_ptr(host);
582
139k
  per = strchr(hostname, '%');
583
139k
  if(!per)
584
    /* nothing to decode */
585
137k
    return CURLUE_OK;
586
2.65k
  else {
587
    /* encoded */
588
2.65k
    size_t dlen;
589
2.65k
    char *decoded;
590
2.65k
    CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
591
2.65k
                                     REJECT_CTRL);
592
2.65k
    if(result)
593
105
      return CURLUE_BAD_HOSTNAME;
594
2.54k
    curlx_dyn_reset(host);
595
2.54k
    result = curlx_dyn_addn(host, decoded, dlen);
596
2.54k
    curlx_free(decoded);
597
2.54k
    if(result)
598
0
      return cc2cu(result);
599
2.54k
  }
600
601
2.54k
  return CURLUE_OK;
602
139k
}
603
604
static CURLUcode parse_authority(struct Curl_URL *u,
605
                                 const char *auth, size_t authlen,
606
                                 unsigned int flags,
607
                                 struct dynbuf *host,
608
                                 bool has_scheme)
609
242k
{
610
242k
  size_t offset;
611
242k
  CURLUcode uc;
612
242k
  CURLcode result;
613
614
  /*
615
   * Parse the login details and strip them out of the hostname.
616
   */
617
242k
  uc = parse_hostname_login(u, auth, authlen, flags, &offset);
618
242k
  if(uc)
619
14
    goto out;
620
621
242k
  result = curlx_dyn_addn(host, auth + offset, authlen - offset);
622
242k
  if(result) {
623
0
    uc = cc2cu(result);
624
0
    goto out;
625
0
  }
626
627
242k
  uc = Curl_parse_port(u, host, has_scheme);
628
242k
  if(uc)
629
1.47k
    goto out;
630
631
241k
  if(!curlx_dyn_len(host))
632
438
    return CURLUE_NO_HOST;
633
634
240k
  switch(ipv4_normalize(host)) {
635
98.5k
  case HOST_IPV4:
636
98.5k
    break;
637
2.12k
  case HOST_IPV6:
638
2.12k
    uc = ipv6_parse(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
639
2.12k
    break;
640
139k
  case HOST_NAME:
641
139k
    uc = urldecode_host(host);
642
139k
    if(!uc)
643
139k
      uc = hostname_check(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
644
139k
    break;
645
0
  case HOST_ERROR:
646
0
    uc = CURLUE_OUT_OF_MEMORY;
647
0
    break;
648
0
  default:
649
0
    uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
650
0
    break;
651
240k
  }
652
653
242k
out:
654
242k
  return uc;
655
240k
}
656
657
/* used for HTTP/2 server push */
658
CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
659
0
{
660
0
  CURLUcode result;
661
0
  struct dynbuf host;
662
663
0
  DEBUGASSERT(authority);
664
0
  curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
665
666
0
  result = parse_authority(u, authority, strlen(authority),
667
0
                           CURLU_DISALLOW_USER, &host, !!u->scheme);
668
0
  if(result)
669
0
    curlx_dyn_free(&host);
670
0
  else {
671
0
    curlx_free(u->host);
672
0
    u->host = curlx_dyn_ptr(&host);
673
0
  }
674
0
  return result;
675
0
}
676
677
/*
678
 * "Remove Dot Segments"
679
 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
680
 */
681
682
static bool is_dot(const char **str, size_t *clen)
683
19.8M
{
684
19.8M
  const char *p = *str;
685
19.8M
  if(*p == '.') {
686
657k
    (*str)++;
687
657k
    (*clen)--;
688
657k
    return TRUE;
689
657k
  }
690
19.1M
  else if((*clen >= 3) &&
691
19.1M
          (p[0] == '%') && (p[1] == '2') && ((p[2] | 0x20) == 'e')) {
692
54.8k
    *str += 3;
693
54.8k
    *clen -= 3;
694
54.8k
    return TRUE;
695
54.8k
  }
696
19.1M
  return FALSE;
697
19.8M
}
698
699
153M
#define ISSLASH(x) ((x) == '/')
700
701
/*
702
 * dedotdotify()
703
 * @unittest: 1395
704
 *
705
 * This function gets a null-terminated path with dot and dotdot sequences
706
 * passed in and strips them off according to the rules in RFC 3986 section
707
 * 5.2.4.
708
 *
709
 * The function handles a path. It should not contain the query nor fragment.
710
 *
711
 * RETURNS
712
 *
713
 * Zero for success and 'out' set to an allocated dedotdotified string.
714
 */
715
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
716
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
717
56.1k
{
718
56.1k
  struct dynbuf out;
719
56.1k
  CURLcode result = CURLE_OK;
720
721
56.1k
  *outp = NULL;
722
  /* the path always starts with a slash, and a slash has not dot */
723
56.1k
  if(clen < 2)
724
0
    return 0;
725
726
56.1k
  curlx_dyn_init(&out, clen + 1);
727
728
  /*  A. If the input buffer begins with a prefix of "../" or "./", then
729
      remove that prefix from the input buffer; otherwise, */
730
56.1k
  if(is_dot(&input, &clen)) {
731
39
    const char *p = input;
732
39
    size_t blen = clen;
733
734
39
    if(!clen)
735
      /* . [end] */
736
2
      goto end;
737
37
    else if(ISSLASH(*p)) {
738
      /* one dot followed by a slash */
739
4
      input = p + 1;
740
4
      clen--;
741
4
    }
742
743
    /*  D. if the input buffer consists only of "." or "..", then remove
744
        that from the input buffer; otherwise, */
745
33
    else if(is_dot(&p, &blen)) {
746
12
      if(!blen)
747
        /* .. [end] */
748
4
        goto end;
749
8
      else if(ISSLASH(*p)) {
750
        /* ../ */
751
4
        input = p + 1;
752
4
        clen = blen - 1;
753
4
      }
754
12
    }
755
39
  }
756
757
152M
  while(clen && !result) { /* until end of path content */
758
152M
    if(ISSLASH(*input)) {
759
19.3M
      const char *p = &input[1];
760
19.3M
      size_t blen = clen - 1;
761
      /*  B. if the input buffer begins with a prefix of "/./" or "/.", where
762
          "."  is a complete path segment, then replace that prefix with "/" in
763
          the input buffer; otherwise, */
764
19.3M
      if(is_dot(&p, &blen)) {
765
483k
        if(!blen) { /* /. */
766
866
          result = curlx_dyn_addn(&out, "/", 1);
767
866
          break;
768
866
        }
769
482k
        else if(ISSLASH(*p)) { /* /./ */
770
106k
          input = p;
771
106k
          clen = blen;
772
106k
          continue;
773
106k
        }
774
775
        /*  C. if the input buffer begins with a prefix of "/../" or "/..",
776
            where ".." is a complete path segment, then replace that prefix
777
            with "/" in the input buffer and remove the last segment and its
778
            preceding "/" (if any) from the output buffer; otherwise, */
779
375k
        else if(is_dot(&p, &blen) && (ISSLASH(*p) || !blen)) {
780
          /* remove the last segment from the output buffer */
781
161k
          size_t len = curlx_dyn_len(&out);
782
161k
          if(len) {
783
141k
            char *ptr = curlx_dyn_ptr(&out);
784
141k
            char *last = memrchr(ptr, '/', len);
785
141k
            if(last)
786
              /* trim the output at the slash */
787
141k
              curlx_dyn_setlen(&out, last - ptr);
788
141k
          }
789
790
161k
          if(blen) { /* /../ */
791
161k
            input = p;
792
161k
            clen = blen;
793
161k
            continue;
794
161k
          }
795
429
          result = curlx_dyn_addn(&out, "/", 1);
796
429
          break;
797
161k
        }
798
483k
      }
799
19.3M
    }
800
801
    /*  E. move the first path segment in the input buffer to the end of
802
        the output buffer, including the initial "/" character (if any) and
803
        any subsequent characters up to, but not including, the next "/"
804
        character or the end of the input buffer. */
805
806
152M
    result = curlx_dyn_addn(&out, input, 1);
807
152M
    input++;
808
152M
    clen--;
809
152M
  }
810
56.1k
end:
811
56.1k
  if(!result) {
812
56.1k
    if(curlx_dyn_len(&out))
813
56.1k
      *outp = curlx_dyn_ptr(&out);
814
11
    else {
815
11
      *outp = curlx_strdup("");
816
11
      if(!*outp)
817
0
        return 1;
818
11
    }
819
56.1k
  }
820
56.1k
  return result ? 1 : 0; /* success */
821
56.1k
}
822
823
static CURLUcode parse_file(const char *url, size_t urllen, CURLU *u,
824
                            struct dynbuf *host, const char **pathp,
825
                            size_t *pathlenp)
826
3.02k
{
827
3.02k
  const char *path;
828
3.02k
  size_t pathlen;
829
3.02k
  bool uncpath = FALSE;
830
3.02k
  if(urllen <= 6)
831
    /* file:/ is not enough to actually be a complete file: URL */
832
41
    return CURLUE_BAD_FILE_URL;
833
834
  /* path has been allocated large enough to hold this */
835
2.98k
  path = &url[5];
836
2.98k
  pathlen = urllen - 5;
837
838
2.98k
  u->scheme = curlx_strdup("file");
839
2.98k
  if(!u->scheme)
840
0
    return CURLUE_OUT_OF_MEMORY;
841
842
  /* Extra handling URLs with an authority component (i.e. that start with
843
   * "file://")
844
   *
845
   * We allow omitted hostname (e.g. file:/<path>) -- valid according to
846
   * RFC 8089, but not the (current) WHAT-WG URL spec.
847
   */
848
2.98k
  if(path[0] == '/' && path[1] == '/') {
849
    /* swallow the two slashes */
850
688
    const char *ptr = &path[2];
851
852
    /*
853
     * According to RFC 8089, a file: URL can be reliably dereferenced if:
854
     *
855
     *  o it has no/blank hostname, or
856
     *
857
     *  o the hostname matches "localhost" (case-insensitively), or
858
     *
859
     *  o the hostname is a FQDN that resolves to this machine, or
860
     *
861
     *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
862
     *    Appendix E.3).
863
     *
864
     * For brevity, we only consider URLs with empty, "localhost", or
865
     * "127.0.0.1" hostnames as local, otherwise as an UNC String.
866
     *
867
     * Additionally, there is an exception for URLs with a Windows drive
868
     * letter in the authority (which was accidentally omitted from RFC 8089
869
     * Appendix E, but believe me, it was meant to be there. --MK)
870
     */
871
688
    if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
872
      /* the URL includes a hostname, it must match "localhost" or
873
         "127.0.0.1" to be valid */
874
440
      if(checkprefix("localhost/", ptr) ||
875
421
         checkprefix("127.0.0.1/", ptr)) {
876
38
        ptr += 9; /* now points to the slash after the host */
877
38
      }
878
402
      else {
879
#ifdef _WIN32
880
        size_t len;
881
882
        /* the hostname, NetBIOS computer name, can not contain disallowed
883
           chars, and the delimiting slash character must be appended to the
884
           hostname */
885
        path = strpbrk(ptr, "/\\:*?\"<>|");
886
        if(!path || *path != '/')
887
          return CURLUE_BAD_FILE_URL;
888
889
        len = path - ptr;
890
        if(len) {
891
          CURLcode code = curlx_dyn_addn(host, ptr, len);
892
          if(code)
893
            return cc2cu(code);
894
          uncpath = TRUE;
895
        }
896
897
        ptr -= 2; /* now points to the // before the host in UNC */
898
#else
899
        /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
900
           none */
901
402
        return CURLUE_BAD_FILE_URL;
902
402
#endif
903
402
      }
904
440
    }
905
906
286
    path = ptr;
907
286
    pathlen = urllen - (ptr - url);
908
286
  }
909
910
2.58k
  if(!uncpath)
911
    /* no host for file: URLs by default */
912
2.58k
    curlx_dyn_reset(host);
913
914
2.58k
#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
915
  /* Do not allow Windows drive letters when not in Windows.
916
   * This catches both "file:/c:" and "file:c:" */
917
2.58k
  if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
918
2.43k
     STARTS_WITH_URL_DRIVE_PREFIX(path)) {
919
    /* File drive letters are only accepted in MS-DOS/Windows */
920
333
    return CURLUE_BAD_FILE_URL;
921
333
  }
922
#else
923
  /* If the path starts with a slash and a drive letter, ditch the slash */
924
  if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
925
    /* This cannot be done with strcpy, as the memory chunks overlap! */
926
    path++;
927
    pathlen--;
928
  }
929
#endif
930
2.25k
  *pathp = path;
931
2.25k
  *pathlenp = pathlen;
932
2.25k
  return CURLUE_OK;
933
2.58k
}
934
935
static CURLUcode parse_scheme(const char *url, CURLU *u, char *schemebuf,
936
                              size_t schemelen, unsigned int flags,
937
                              const char **hostpp)
938
243k
{
939
  /* clear path */
940
243k
  const char *schemep = NULL;
941
942
243k
  if(schemelen) {
943
130k
    int i = 0;
944
130k
    const char *p = &url[schemelen + 1];
945
326k
    while((*p == '/') && (i < 4)) {
946
196k
      p++;
947
196k
      i++;
948
196k
    }
949
950
130k
    schemep = schemebuf;
951
130k
    if(!Curl_get_scheme(schemep) &&
952
1.24k
       !(flags & CURLU_NON_SUPPORT_SCHEME))
953
58
      return CURLUE_UNSUPPORTED_SCHEME;
954
955
130k
    if((i < 1) || (i > 3))
956
      /* less than one or more than three slashes */
957
63
      return CURLUE_BAD_SLASHES;
958
959
130k
    *hostpp = p; /* hostname starts here */
960
130k
  }
961
113k
  else {
962
    /* no scheme! */
963
964
113k
    if(!(flags & (CURLU_DEFAULT_SCHEME | CURLU_GUESS_SCHEME)))
965
0
      return CURLUE_BAD_SCHEME;
966
967
113k
    if(flags & CURLU_DEFAULT_SCHEME)
968
0
      schemep = DEFAULT_SCHEME;
969
970
    /*
971
     * The URL was badly formatted, let's try without scheme specified.
972
     */
973
113k
    *hostpp = url;
974
113k
  }
975
976
243k
  if(schemep) {
977
130k
    u->scheme = curlx_strdup(schemep);
978
130k
    if(!u->scheme)
979
0
      return CURLUE_OUT_OF_MEMORY;
980
130k
  }
981
243k
  return CURLUE_OK;
982
243k
}
983
984
static CURLUcode guess_scheme(CURLU *u, struct dynbuf *host)
985
109k
{
986
109k
  const char *hostname = curlx_dyn_ptr(host);
987
109k
  const char *schemep = NULL;
988
  /* legacy curl-style guess based on hostname */
989
109k
  if(checkprefix("ftp.", hostname))
990
10.7k
    schemep = "ftp";
991
98.8k
  else if(checkprefix("dict.", hostname))
992
30
    schemep = "dict";
993
98.8k
  else if(checkprefix("ldap.", hostname))
994
230
    schemep = "ldap";
995
98.5k
  else if(checkprefix("imap.", hostname))
996
1.63k
    schemep = "imap";
997
96.9k
  else if(checkprefix("smtp.", hostname))
998
4.02k
    schemep = "smtp";
999
92.9k
  else if(checkprefix("pop3.", hostname))
1000
3.66k
    schemep = "pop3";
1001
89.2k
  else
1002
89.2k
    schemep = "http";
1003
1004
109k
  u->scheme = curlx_strdup(schemep);
1005
109k
  if(!u->scheme)
1006
0
    return CURLUE_OUT_OF_MEMORY;
1007
1008
109k
  u->guessed_scheme = TRUE;
1009
109k
  return CURLUE_OK;
1010
109k
}
1011
1012
static CURLUcode handle_fragment(CURLU *u, const char *fragment,
1013
                                 size_t fraglen, unsigned int flags)
1014
15.3k
{
1015
15.3k
  CURLUcode result;
1016
15.3k
  u->fragment_present = TRUE;
1017
15.3k
  if(fraglen > 1) {
1018
    /* skip the leading '#' in the copy but include the terminating null */
1019
11.1k
    if(flags & CURLU_URLENCODE) {
1020
2.15k
      struct dynbuf enc;
1021
2.15k
      curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1022
2.15k
      result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1023
2.15k
      if(result)
1024
0
        return result;
1025
2.15k
      u->fragment = curlx_dyn_ptr(&enc);
1026
2.15k
    }
1027
9.00k
    else {
1028
9.00k
      u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1029
9.00k
      if(!u->fragment)
1030
0
        return CURLUE_OUT_OF_MEMORY;
1031
9.00k
    }
1032
11.1k
  }
1033
15.3k
  return CURLUE_OK;
1034
15.3k
}
1035
1036
static CURLUcode handle_query(CURLU *u, const char *query,
1037
                              size_t qlen, unsigned int flags)
1038
37.6k
{
1039
37.6k
  u->query_present = TRUE;
1040
37.6k
  if(qlen > 1) {
1041
31.1k
    if(flags & CURLU_URLENCODE) {
1042
8.31k
      struct dynbuf enc;
1043
8.31k
      CURLUcode result;
1044
8.31k
      curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1045
      /* skip the leading question mark */
1046
8.31k
      result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1047
8.31k
      if(result)
1048
0
        return result;
1049
8.31k
      u->query = curlx_dyn_ptr(&enc);
1050
8.31k
    }
1051
22.8k
    else {
1052
22.8k
      u->query = Curl_memdup0(query + 1, qlen - 1);
1053
22.8k
      if(!u->query)
1054
0
        return CURLUE_OUT_OF_MEMORY;
1055
22.8k
    }
1056
31.1k
  }
1057
6.44k
  else {
1058
    /* single byte query */
1059
6.44k
    u->query = curlx_strdup("");
1060
6.44k
    if(!u->query)
1061
0
      return CURLUE_OUT_OF_MEMORY;
1062
6.44k
  }
1063
37.6k
  return CURLUE_OK;
1064
37.6k
}
1065
1066
static CURLUcode handle_path(CURLU *u, const char *path,
1067
                             size_t pathlen, unsigned int flags)
1068
240k
{
1069
240k
  CURLUcode result;
1070
240k
  if(pathlen && (flags & CURLU_URLENCODE)) {
1071
20.2k
    struct dynbuf enc;
1072
20.2k
    curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1073
20.2k
    result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1074
20.2k
    if(result)
1075
0
      return result;
1076
20.2k
    pathlen = curlx_dyn_len(&enc);
1077
20.2k
    path = u->path = curlx_dyn_ptr(&enc);
1078
20.2k
  }
1079
1080
240k
  if(pathlen <= 1) {
1081
    /* there is no path left or just the slash, unset */
1082
178k
    path = NULL;
1083
178k
  }
1084
61.8k
  else {
1085
61.8k
    if(!u->path) {
1086
46.3k
      u->path = Curl_memdup0(path, pathlen);
1087
46.3k
      if(!u->path)
1088
0
        return CURLUE_OUT_OF_MEMORY;
1089
46.3k
      path = u->path;
1090
46.3k
    }
1091
15.4k
    else if(flags & CURLU_URLENCODE)
1092
      /* it might have encoded more than just the path so cut it */
1093
15.4k
      u->path[pathlen] = 0;
1094
1095
61.8k
    if(!(flags & CURLU_PATH_AS_IS)) {
1096
      /* remove ../ and ./ sequences according to RFC3986 */
1097
56.1k
      char *dedot;
1098
56.1k
      int err = dedotdotify(path, pathlen, &dedot);
1099
56.1k
      if(err)
1100
0
        return CURLUE_OUT_OF_MEMORY;
1101
56.1k
      if(dedot) {
1102
56.1k
        curlx_free(u->path);
1103
56.1k
        u->path = dedot;
1104
56.1k
      }
1105
56.1k
    }
1106
61.8k
  }
1107
240k
  return CURLUE_OK;
1108
240k
}
1109
1110
static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1111
247k
{
1112
247k
  const char *path;
1113
247k
  size_t pathlen;
1114
247k
  char schemebuf[MAX_SCHEME_LEN + 1];
1115
247k
  size_t schemelen = 0;
1116
247k
  size_t urllen;
1117
247k
  CURLUcode result = CURLUE_OK;
1118
247k
  struct dynbuf host;
1119
1120
247k
  DEBUGASSERT(url);
1121
1122
247k
  curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
1123
1124
247k
  result = Curl_junkscan(url, &urllen, !!(flags & CURLU_ALLOW_SPACE));
1125
247k
  if(result)
1126
466
    goto fail;
1127
1128
246k
  schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1129
246k
                                   flags & (CURLU_GUESS_SCHEME |
1130
246k
                                            CURLU_DEFAULT_SCHEME));
1131
1132
  /* handle the file: scheme */
1133
246k
  if(schemelen && !strcmp(schemebuf, "file"))
1134
3.02k
    result = parse_file(url, urllen, u, &host, &path, &pathlen);
1135
243k
  else {
1136
243k
    const char *hostp = NULL;
1137
243k
    size_t hostlen;
1138
243k
    result = parse_scheme(url, u, schemebuf, schemelen, flags, &hostp);
1139
243k
    if(result)
1140
121
      goto fail;
1141
1142
    /* find the end of the hostname + port number */
1143
243k
    hostlen = strcspn(hostp, "/?#");
1144
243k
    path = &hostp[hostlen];
1145
1146
    /* this pathlen also contains the query and the fragment */
1147
243k
    pathlen = urllen - (path - url);
1148
243k
    if(hostlen) {
1149
242k
      result = parse_authority(u, hostp, hostlen, flags, &host,
1150
242k
                               u->scheme != NULL);
1151
242k
      if(!result && (flags & CURLU_GUESS_SCHEME) && !u->scheme)
1152
109k
        result = guess_scheme(u, &host);
1153
242k
    }
1154
1.11k
    else if(flags & CURLU_NO_AUTHORITY) {
1155
      /* allowed to be empty. */
1156
0
      if(curlx_dyn_add(&host, ""))
1157
0
        result = CURLUE_OUT_OF_MEMORY;
1158
0
    }
1159
1.11k
    else
1160
1.11k
      result = CURLUE_NO_HOST;
1161
243k
  }
1162
246k
  if(!result) {
1163
    /* The path might at this point contain a fragment and/or a query to
1164
       handle */
1165
240k
    const char *fragment = strchr(path, '#');
1166
240k
    if(fragment) {
1167
15.3k
      size_t fraglen = pathlen - (fragment - path);
1168
15.3k
      result = handle_fragment(u, fragment, fraglen, flags);
1169
      /* after this, pathlen still contains the query */
1170
15.3k
      pathlen -= fraglen;
1171
15.3k
    }
1172
240k
  }
1173
246k
  if(!result) {
1174
240k
    const char *query = memchr(path, '?', pathlen);
1175
240k
    if(query) {
1176
37.6k
      size_t qlen = pathlen - (query - path);
1177
37.6k
      result = handle_query(u, query, qlen, flags);
1178
37.6k
      pathlen -= qlen;
1179
37.6k
    }
1180
240k
  }
1181
246k
  if(!result)
1182
    /* the fragment and query parts are trimmed off from the path */
1183
240k
    result = handle_path(u, path, pathlen, flags);
1184
246k
  if(!result) {
1185
240k
    u->host = curlx_dyn_ptr(&host);
1186
240k
    return CURLUE_OK;
1187
240k
  }
1188
7.15k
fail:
1189
7.15k
  curlx_dyn_free(&host);
1190
7.15k
  free_urlhandle(u);
1191
7.15k
  return result;
1192
246k
}
1193
1194
/*
1195
 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1196
 */
1197
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1198
                                      unsigned int flags)
1199
247k
{
1200
247k
  CURLUcode result;
1201
247k
  CURLU tmpurl;
1202
247k
  memset(&tmpurl, 0, sizeof(tmpurl));
1203
247k
  result = parseurl(url, &tmpurl, flags);
1204
247k
  if(!result) {
1205
240k
    free_urlhandle(u);
1206
240k
    *u = tmpurl;
1207
240k
  }
1208
247k
  return result;
1209
247k
}
1210
1211
/*
1212
 * Concatenate a relative URL onto a base URL making it absolute.
1213
 */
1214
static CURLUcode redirect_url(const char *base, const char *relurl,
1215
                              CURLU *u, unsigned int flags)
1216
15.7k
{
1217
15.7k
  struct dynbuf urlbuf;
1218
15.7k
  bool host_changed = FALSE;
1219
15.7k
  const char *useurl = relurl;
1220
15.7k
  const char *cutoff = NULL;
1221
15.7k
  size_t prelen;
1222
15.7k
  CURLUcode uc;
1223
1224
  /* protsep points to the start of the hostname, after [scheme]:// */
1225
15.7k
  const char *protsep = base + strlen(u->scheme) + 3;
1226
15.7k
  DEBUGASSERT(base && relurl && u); /* all set here */
1227
15.7k
  if(!base)
1228
0
    return CURLUE_MALFORMED_INPUT; /* should never happen */
1229
1230
  /* handle different relative URL types */
1231
15.7k
  switch(relurl[0]) {
1232
1.22k
  case '/':
1233
1.22k
    if(relurl[1] == '/') {
1234
      /* protocol-relative URL: //example.com/path */
1235
133
      cutoff = protsep;
1236
133
      useurl = &relurl[2];
1237
133
      host_changed = TRUE;
1238
133
    }
1239
1.08k
    else
1240
      /* absolute /path */
1241
1.08k
      cutoff = strchr(protsep, '/');
1242
1.22k
    break;
1243
1244
569
  case '#':
1245
    /* fragment-only change */
1246
569
    if(u->fragment)
1247
393
      cutoff = strchr(protsep, '#');
1248
569
    break;
1249
1250
13.9k
  default:
1251
    /* path or query-only change */
1252
13.9k
    if(u->query && u->query[0])
1253
      /* remove existing query */
1254
6.87k
      cutoff = strchr(protsep, '?');
1255
7.09k
    else if(u->fragment && u->fragment[0])
1256
      /* Remove existing fragment */
1257
786
      cutoff = strchr(protsep, '#');
1258
1259
13.9k
    if(relurl[0] != '?') {
1260
      /* append a relative path after the last slash */
1261
11.5k
      cutoff = memrchr(protsep, '/',
1262
11.5k
                       cutoff ? (size_t)(cutoff - protsep) : strlen(protsep));
1263
11.5k
      if(cutoff)
1264
11.5k
        cutoff++; /* truncate after last slash */
1265
11.5k
    }
1266
13.9k
    break;
1267
15.7k
  }
1268
1269
15.7k
  prelen = cutoff ? (size_t)(cutoff - base) : strlen(base);
1270
1271
  /* build new URL */
1272
15.7k
  curlx_dyn_init(&urlbuf, CURL_MAX_INPUT_LENGTH);
1273
1274
15.7k
  if(!curlx_dyn_addn(&urlbuf, base, prelen) &&
1275
15.7k
     !urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed, FALSE)) {
1276
15.7k
    uc = parseurl_and_replace(curlx_dyn_ptr(&urlbuf), u,
1277
15.7k
                              flags & ~CURLU_PATH_AS_IS);
1278
15.7k
  }
1279
0
  else
1280
0
    uc = CURLUE_OUT_OF_MEMORY;
1281
1282
15.7k
  curlx_dyn_free(&urlbuf);
1283
15.7k
  return uc;
1284
15.7k
}
1285
1286
/*
1287
 */
1288
CURLU *curl_url(void)
1289
226k
{
1290
226k
  return curlx_calloc(1, sizeof(struct Curl_URL));
1291
226k
}
1292
1293
void curl_url_cleanup(CURLU *u)
1294
644k
{
1295
644k
  if(u) {
1296
227k
    free_urlhandle(u);
1297
227k
    curlx_free(u);
1298
227k
  }
1299
644k
}
1300
1301
#define DUP(dest, src, name)                    \
1302
10.4k
  do {                                          \
1303
10.4k
    if(src->name) {                             \
1304
3.24k
      dest->name = curlx_strdup(src->name);     \
1305
3.24k
      if(!dest->name)                           \
1306
3.24k
        goto fail;                              \
1307
3.24k
    }                                           \
1308
10.4k
  } while(0)
1309
1310
CURLU *curl_url_dup(const CURLU *in)
1311
1.04k
{
1312
1.04k
  struct Curl_URL *u = curlx_calloc(1, sizeof(struct Curl_URL));
1313
1.04k
  if(u) {
1314
1.04k
    DUP(u, in, scheme);
1315
1.04k
    DUP(u, in, user);
1316
1.04k
    DUP(u, in, password);
1317
1.04k
    DUP(u, in, options);
1318
1.04k
    DUP(u, in, host);
1319
1.04k
    DUP(u, in, port);
1320
1.04k
    DUP(u, in, path);
1321
1.04k
    DUP(u, in, query);
1322
1.04k
    DUP(u, in, fragment);
1323
1.04k
    DUP(u, in, zoneid);
1324
1.04k
    u->portnum = in->portnum;
1325
1.04k
    u->fragment_present = in->fragment_present;
1326
1.04k
    u->query_present = in->query_present;
1327
1.04k
  }
1328
1.04k
  return u;
1329
0
fail:
1330
0
  curl_url_cleanup(u);
1331
0
  return NULL;
1332
1.04k
}
1333
1334
#ifndef USE_IDN
1335
#define host_decode(x, y) CURLUE_LACKS_IDN
1336
#define host_encode(x, y) CURLUE_LACKS_IDN
1337
#else
1338
static CURLUcode host_decode(const char *host, char **allochost)
1339
0
{
1340
0
  CURLcode result = Curl_idn_decode(host, allochost);
1341
0
  if(result)
1342
0
    return (result == CURLE_OUT_OF_MEMORY) ?
1343
0
      CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1344
0
  return CURLUE_OK;
1345
0
}
1346
1347
static CURLUcode host_encode(const char *host, char **allochost)
1348
0
{
1349
0
  CURLcode result = Curl_idn_encode(host, allochost);
1350
0
  if(result)
1351
0
    return (result == CURLE_OUT_OF_MEMORY) ?
1352
0
      CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1353
0
  return CURLUE_OK;
1354
0
}
1355
#endif
1356
1357
static CURLUcode urlget_format(const CURLU *u, CURLUPart what,
1358
                               const char *ptr, char **partp,
1359
                               bool plusdecode, unsigned int flags)
1360
832k
{
1361
832k
  CURLUcode uc = CURLUE_OK;
1362
832k
  size_t partlen = strlen(ptr);
1363
832k
  bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0;
1364
832k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1365
832k
  bool punycode = (flags & CURLU_PUNYCODE) && (what == CURLUPART_HOST);
1366
832k
  bool depunyfy = (flags & CURLU_PUNY2IDN) && (what == CURLUPART_HOST);
1367
832k
  char *part = Curl_memdup0(ptr, partlen);
1368
832k
  *partp = NULL;
1369
832k
  if(!part)
1370
0
    return CURLUE_OUT_OF_MEMORY;
1371
832k
  if(plusdecode) {
1372
    /* convert + to space */
1373
224
    char *plus = part;
1374
224
    size_t i = 0;
1375
2.18k
    for(i = 0; i < partlen; ++plus, i++) {
1376
1.95k
      if(*plus == '+')
1377
214
        *plus = ' ';
1378
1.95k
    }
1379
224
  }
1380
832k
  if(urldecode) {
1381
81.8k
    char *decoded;
1382
81.8k
    size_t dlen;
1383
    /* this unconditional rejection of control bytes is documented
1384
       API behavior */
1385
81.8k
    CURLcode res = Curl_urldecode(part, partlen, &decoded, &dlen, REJECT_CTRL);
1386
81.8k
    curlx_free(part);
1387
81.8k
    if(res)
1388
36
      return CURLUE_URLDECODE;
1389
81.8k
    part = decoded;
1390
81.8k
    partlen = dlen;
1391
81.8k
  }
1392
832k
  if(urlencode) {
1393
148k
    struct dynbuf enc;
1394
148k
    curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1395
148k
    uc = urlencode_str(&enc, part, partlen, TRUE, what == CURLUPART_QUERY);
1396
148k
    curlx_free(part);
1397
148k
    if(uc)
1398
0
      return uc;
1399
148k
    part = curlx_dyn_ptr(&enc);
1400
148k
  }
1401
684k
  else if(punycode) {
1402
0
    if(!Curl_is_ASCII_name(u->host)) {
1403
0
      char *punyversion = NULL;
1404
0
      uc = host_decode(part, &punyversion);
1405
0
      curlx_free(part);
1406
0
      if(uc)
1407
0
        return uc;
1408
0
      part = punyversion;
1409
0
    }
1410
0
  }
1411
684k
  else if(depunyfy) {
1412
0
    if(Curl_is_ASCII_name(u->host)) {
1413
0
      char *unpunified = NULL;
1414
0
      uc = host_encode(part, &unpunified);
1415
0
      curlx_free(part);
1416
0
      if(uc)
1417
0
        return uc;
1418
0
      part = unpunified;
1419
0
    }
1420
0
  }
1421
832k
  *partp = part;
1422
832k
  return CURLUE_OK;
1423
832k
}
1424
1425
static CURLUcode urlget_url(const CURLU *u, char **part, unsigned int flags)
1426
318k
{
1427
318k
  char *url;
1428
318k
  const char *scheme;
1429
318k
  char *options = u->options;
1430
318k
  char *port = u->port;
1431
318k
  char *allochost = NULL;
1432
318k
  bool show_fragment =
1433
318k
    u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
1434
318k
  bool show_query = (u->query && u->query[0]) ||
1435
281k
    (u->query_present && flags & CURLU_GET_EMPTY);
1436
318k
  bool punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
1437
318k
  bool depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
1438
318k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1439
318k
  char portbuf[7];
1440
318k
  if(u->scheme && curl_strequal("file", u->scheme)) {
1441
2.15k
    url = curl_maprintf("file://%s%s%s%s%s",
1442
2.15k
                        u->path,
1443
2.15k
                        show_query ? "?": "",
1444
2.15k
                        u->query ? u->query : "",
1445
2.15k
                        show_fragment ? "#": "",
1446
2.15k
                        u->fragment ? u->fragment : "");
1447
2.15k
  }
1448
316k
  else if(!u->host)
1449
116k
    return CURLUE_NO_HOST;
1450
199k
  else {
1451
199k
    const struct Curl_scheme *h = NULL;
1452
199k
    char schemebuf[MAX_SCHEME_LEN + 5];
1453
199k
    if(u->scheme)
1454
199k
      scheme = u->scheme;
1455
0
    else if(flags & CURLU_DEFAULT_SCHEME)
1456
0
      scheme = DEFAULT_SCHEME;
1457
0
    else
1458
0
      return CURLUE_NO_SCHEME;
1459
1460
199k
    h = Curl_get_scheme(scheme);
1461
199k
    if(!port && (flags & CURLU_DEFAULT_PORT)) {
1462
      /* there is no stored port number, but asked to deliver
1463
         a default one for the scheme */
1464
0
      if(h) {
1465
0
        curl_msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1466
0
        port = portbuf;
1467
0
      }
1468
0
    }
1469
199k
    else if(port) {
1470
      /* there is a stored port number, but asked to inhibit if it matches
1471
         the default one for the scheme */
1472
4.43k
      if(h && (h->defport == u->portnum) &&
1473
316
         (flags & CURLU_NO_DEFAULT_PORT))
1474
4
        port = NULL;
1475
4.43k
    }
1476
1477
199k
    if(h && !(h->flags & PROTOPT_URLOPTIONS))
1478
176k
      options = NULL;
1479
1480
199k
    if(u->host[0] == '[') {
1481
1.23k
      if(u->zoneid) {
1482
        /* make it '[ host %25 zoneid ]' */
1483
972
        struct dynbuf enc;
1484
972
        size_t hostlen = strlen(u->host);
1485
972
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1486
972
        if(curlx_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1487
972
                          u->zoneid))
1488
0
          return CURLUE_OUT_OF_MEMORY;
1489
972
        allochost = curlx_dyn_ptr(&enc);
1490
972
      }
1491
1.23k
    }
1492
198k
    else if(urlencode) {
1493
15.4k
      allochost = curl_easy_escape(NULL, u->host, 0);
1494
15.4k
      if(!allochost)
1495
0
        return CURLUE_OUT_OF_MEMORY;
1496
15.4k
    }
1497
182k
    else if(punycode) {
1498
0
      if(!Curl_is_ASCII_name(u->host)) {
1499
0
        CURLUcode ret = host_decode(u->host, &allochost);
1500
0
        if(ret)
1501
0
          return ret;
1502
0
      }
1503
0
    }
1504
182k
    else if(depunyfy) {
1505
0
      if(Curl_is_ASCII_name(u->host)) {
1506
0
        CURLUcode ret = host_encode(u->host, &allochost);
1507
0
        if(ret)
1508
0
          return ret;
1509
0
      }
1510
0
    }
1511
1512
199k
    if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme)
1513
199k
      curl_msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
1514
0
    else
1515
0
      schemebuf[0] = 0;
1516
1517
199k
    url = curl_maprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1518
199k
                        schemebuf,
1519
199k
                        u->user ? u->user : "",
1520
199k
                        u->password ? ":": "",
1521
199k
                        u->password ? u->password : "",
1522
199k
                        options ? ";" : "",
1523
199k
                        options ? options : "",
1524
199k
                        (u->user || u->password || options) ? "@": "",
1525
199k
                        allochost ? allochost : u->host,
1526
199k
                        port ? ":": "",
1527
199k
                        port ? port : "",
1528
199k
                        u->path ? u->path : "/",
1529
199k
                        show_query ? "?": "",
1530
199k
                        u->query ? u->query : "",
1531
199k
                        show_fragment ? "#": "",
1532
199k
                        u->fragment ? u->fragment : "");
1533
199k
    curlx_free(allochost);
1534
199k
  }
1535
201k
  if(!url)
1536
0
    return CURLUE_OUT_OF_MEMORY;
1537
201k
  *part = url;
1538
201k
  return CURLUE_OK;
1539
201k
}
1540
1541
CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1542
                       char **part, unsigned int flags)
1543
1.82M
{
1544
1.82M
  const char *ptr;
1545
1.82M
  CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1546
1.82M
  char portbuf[7];
1547
1.82M
  bool plusdecode = FALSE;
1548
1.82M
  if(!u)
1549
0
    return CURLUE_BAD_HANDLE;
1550
1.82M
  if(!part)
1551
0
    return CURLUE_BAD_PARTPOINTER;
1552
1.82M
  *part = NULL;
1553
1554
1.82M
  switch(what) {
1555
230k
  case CURLUPART_SCHEME:
1556
230k
    ptr = u->scheme;
1557
230k
    ifmissing = CURLUE_NO_SCHEME;
1558
230k
    flags &= ~CURLU_URLDECODE; /* never for schemes */
1559
230k
    if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme)
1560
0
      return CURLUE_NO_SCHEME;
1561
230k
    break;
1562
230k
  case CURLUPART_USER:
1563
197k
    ptr = u->user;
1564
197k
    ifmissing = CURLUE_NO_USER;
1565
197k
    break;
1566
197k
  case CURLUPART_PASSWORD:
1567
197k
    ptr = u->password;
1568
197k
    ifmissing = CURLUE_NO_PASSWORD;
1569
197k
    break;
1570
148k
  case CURLUPART_OPTIONS:
1571
148k
    ptr = u->options;
1572
148k
    ifmissing = CURLUE_NO_OPTIONS;
1573
148k
    break;
1574
211k
  case CURLUPART_HOST:
1575
211k
    ptr = u->host;
1576
211k
    ifmissing = CURLUE_NO_HOST;
1577
211k
    break;
1578
1.38k
  case CURLUPART_ZONEID:
1579
1.38k
    ptr = u->zoneid;
1580
1.38k
    ifmissing = CURLUE_NO_ZONEID;
1581
1.38k
    break;
1582
220k
  case CURLUPART_PORT:
1583
220k
    ptr = u->port;
1584
220k
    ifmissing = CURLUE_NO_PORT;
1585
220k
    flags &= ~CURLU_URLDECODE; /* never for port */
1586
220k
    if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1587
      /* there is no stored port number, but asked to deliver
1588
         a default one for the scheme */
1589
163k
      const struct Curl_scheme *h = Curl_get_scheme(u->scheme);
1590
163k
      if(h) {
1591
163k
        curl_msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1592
163k
        ptr = portbuf;
1593
163k
      }
1594
163k
    }
1595
57.0k
    else if(ptr && u->scheme) {
1596
      /* there is a stored port number, but ask to inhibit if
1597
         it matches the default one for the scheme */
1598
4.07k
      const struct Curl_scheme *h = Curl_get_scheme(u->scheme);
1599
4.07k
      if(h && (h->defport == u->portnum) &&
1600
201
         (flags & CURLU_NO_DEFAULT_PORT))
1601
0
        ptr = NULL;
1602
4.07k
    }
1603
220k
    break;
1604
148k
  case CURLUPART_PATH:
1605
148k
    ptr = u->path;
1606
148k
    if(!ptr)
1607
110k
      ptr = "/";
1608
148k
    break;
1609
148k
  case CURLUPART_QUERY:
1610
148k
    ptr = u->query;
1611
148k
    ifmissing = CURLUE_NO_QUERY;
1612
148k
    plusdecode = flags & CURLU_URLDECODE;
1613
148k
    if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
1614
      /* there was a blank query and the user do not ask for it */
1615
2.42k
      ptr = NULL;
1616
148k
    break;
1617
0
  case CURLUPART_FRAGMENT:
1618
0
    ptr = u->fragment;
1619
0
    ifmissing = CURLUE_NO_FRAGMENT;
1620
0
    if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
1621
      /* there was a blank fragment and the user asks for it */
1622
0
      ptr = "";
1623
0
    break;
1624
318k
  case CURLUPART_URL:
1625
318k
    return urlget_url(u, part, flags);
1626
0
  default:
1627
0
    ptr = NULL;
1628
0
    break;
1629
1.82M
  }
1630
1.50M
  if(ptr)
1631
832k
    return urlget_format(u, what, ptr, part, plusdecode, flags);
1632
1633
672k
  return ifmissing;
1634
1.50M
}
1635
1636
static CURLUcode set_url_scheme(CURLU *u, const char *scheme,
1637
                                unsigned int flags)
1638
2
{
1639
2
  size_t plen = strlen(scheme);
1640
2
  const struct Curl_scheme *h = NULL;
1641
2
  if((plen > MAX_SCHEME_LEN) || (plen < 1))
1642
    /* too long or too short */
1643
0
    return CURLUE_BAD_SCHEME;
1644
  /* verify that it is a fine scheme */
1645
2
  h = Curl_get_scheme(scheme);
1646
2
  if(!(flags & CURLU_NON_SUPPORT_SCHEME) && (!h || !h->run))
1647
0
    return CURLUE_UNSUPPORTED_SCHEME;
1648
2
  if(!h) {
1649
0
    const char *s = scheme;
1650
0
    if(ISALPHA(*s)) {
1651
      /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1652
0
      while(--plen) {
1653
0
        if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1654
0
          s++; /* fine */
1655
0
        else
1656
0
          return CURLUE_BAD_SCHEME;
1657
0
      }
1658
0
    }
1659
0
    else
1660
0
      return CURLUE_BAD_SCHEME;
1661
0
  }
1662
2
  u->guessed_scheme = FALSE;
1663
2
  return CURLUE_OK;
1664
2
}
1665
1666
static CURLUcode set_url_port(CURLU *u, const char *provided_port)
1667
756
{
1668
756
  char *tmp;
1669
756
  curl_off_t port;
1670
756
  if(!ISDIGIT(provided_port[0]))
1671
    /* not a number */
1672
0
    return CURLUE_BAD_PORT_NUMBER;
1673
756
  if(curlx_str_number(&provided_port, &port, 0xffff) || *provided_port)
1674
    /* weirdly provided number, not good! */
1675
0
    return CURLUE_BAD_PORT_NUMBER;
1676
756
  tmp = curl_maprintf("%" CURL_FORMAT_CURL_OFF_T, port);
1677
756
  if(!tmp)
1678
0
    return CURLUE_OUT_OF_MEMORY;
1679
756
  curlx_free(u->port);
1680
756
  u->port = tmp;
1681
756
  u->portnum = (unsigned short)port;
1682
756
  return CURLUE_OK;
1683
756
}
1684
1685
static CURLUcode set_url(CURLU *u, const char *url, size_t part_size,
1686
                         unsigned int flags)
1687
250k
{
1688
  /*
1689
   * Allow a new URL to replace the existing (if any) contents.
1690
   *
1691
   * If the existing contents is enough for a URL, allow a relative URL to
1692
   * replace it.
1693
   */
1694
250k
  CURLUcode uc;
1695
250k
  char *oldurl = NULL;
1696
1697
250k
  if(!part_size) {
1698
    /* a blank URL is not a valid URL unless we already have a complete one
1699
       and this is a redirect */
1700
3.32k
    uc = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1701
3.32k
    if(!uc) {
1702
      /* success, meaning the "" is a fine relative URL, but nothing
1703
         changes */
1704
0
      curlx_free(oldurl);
1705
0
      return CURLUE_OK;
1706
0
    }
1707
3.32k
    if(uc == CURLUE_OUT_OF_MEMORY)
1708
0
      return uc;
1709
3.32k
    return CURLUE_MALFORMED_INPUT;
1710
3.32k
  }
1711
1712
  /* if the new URL is absolute replace the existing with the new. */
1713
247k
  if(Curl_is_absolute_url(url, NULL, 0,
1714
247k
                          flags & (CURLU_GUESS_SCHEME | CURLU_DEFAULT_SCHEME)))
1715
118k
    return parseurl_and_replace(url, u, flags);
1716
1717
  /* if the old URL is incomplete (we cannot get an absolute URL in
1718
     'oldurl'), replace the existing with the new */
1719
129k
  uc = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1720
129k
  if(uc == CURLUE_OUT_OF_MEMORY)
1721
0
    return uc;
1722
129k
  else if(uc)
1723
113k
    return parseurl_and_replace(url, u, flags);
1724
1725
15.7k
  DEBUGASSERT(oldurl); /* it is set here */
1726
  /* apply the relative part to create a new URL */
1727
15.7k
  uc = redirect_url(oldurl, url, u, flags);
1728
15.7k
  curlx_free(oldurl);
1729
15.7k
  return uc;
1730
15.7k
}
1731
1732
static CURLUcode urlset_clear(CURLU *u, CURLUPart what)
1733
12.2k
{
1734
12.2k
  switch(what) {
1735
0
  case CURLUPART_URL:
1736
0
    free_urlhandle(u);
1737
0
    memset(u, 0, sizeof(struct Curl_URL));
1738
0
    break;
1739
0
  case CURLUPART_SCHEME:
1740
0
    Curl_safefree(u->scheme);
1741
0
    u->guessed_scheme = FALSE;
1742
0
    break;
1743
3.95k
  case CURLUPART_USER:
1744
3.95k
    Curl_safefree(u->user);
1745
3.95k
    break;
1746
3.95k
  case CURLUPART_PASSWORD:
1747
3.95k
    Curl_safefree(u->password);
1748
3.95k
    break;
1749
0
  case CURLUPART_OPTIONS:
1750
0
    Curl_safefree(u->options);
1751
0
    break;
1752
0
  case CURLUPART_HOST:
1753
0
    Curl_safefree(u->host);
1754
0
    break;
1755
0
  case CURLUPART_ZONEID:
1756
0
    Curl_safefree(u->zoneid);
1757
0
    break;
1758
0
  case CURLUPART_PORT:
1759
0
    u->portnum = 0;
1760
0
    Curl_safefree(u->port);
1761
0
    break;
1762
0
  case CURLUPART_PATH:
1763
0
    Curl_safefree(u->path);
1764
0
    break;
1765
0
  case CURLUPART_QUERY:
1766
0
    Curl_safefree(u->query);
1767
0
    u->query_present = FALSE;
1768
0
    break;
1769
4.34k
  case CURLUPART_FRAGMENT:
1770
4.34k
    Curl_safefree(u->fragment);
1771
4.34k
    u->fragment_present = FALSE;
1772
4.34k
    break;
1773
0
  default:
1774
0
    return CURLUE_UNKNOWN_PART;
1775
12.2k
  }
1776
12.2k
  return CURLUE_OK;
1777
12.2k
}
1778
1779
static bool allowed_in_path(unsigned char x)
1780
0
{
1781
0
  switch(x) {
1782
0
  case '!':
1783
0
  case '$':
1784
0
  case '&':
1785
0
  case '\'':
1786
0
  case '(':
1787
0
  case ')':
1788
0
  case '{':
1789
0
  case '}':
1790
0
  case '[':
1791
0
  case ']':
1792
0
  case '*':
1793
0
  case '+':
1794
0
  case ',':
1795
0
  case ';':
1796
0
  case '=':
1797
0
  case ':':
1798
0
  case '@':
1799
0
  case '/':
1800
0
    return TRUE;
1801
0
  }
1802
0
  return FALSE;
1803
0
}
1804
1805
CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1806
                       const char *part, unsigned int flags)
1807
302k
{
1808
302k
  char **storep = NULL;
1809
302k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1810
302k
  bool plusencode = FALSE;
1811
302k
  bool pathmode = FALSE;
1812
302k
  bool leadingslash = FALSE;
1813
302k
  bool appendquery = FALSE;
1814
302k
  bool equalsencode = FALSE;
1815
302k
  size_t nalloc;
1816
1817
302k
  if(!u)
1818
0
    return CURLUE_BAD_HANDLE;
1819
302k
  if(!part)
1820
    /* setting a part to NULL clears it */
1821
12.2k
    return urlset_clear(u, what);
1822
1823
290k
  nalloc = strlen(part);
1824
290k
  if(nalloc > CURL_MAX_INPUT_LENGTH)
1825
    /* excessive input length */
1826
0
    return CURLUE_MALFORMED_INPUT;
1827
1828
290k
  switch(what) {
1829
2
  case CURLUPART_SCHEME: {
1830
2
    CURLUcode status = set_url_scheme(u, part, flags);
1831
2
    if(status)
1832
0
      return status;
1833
2
    storep = &u->scheme;
1834
2
    urlencode = FALSE; /* never */
1835
2
    break;
1836
2
  }
1837
28.0k
  case CURLUPART_USER:
1838
28.0k
    storep = &u->user;
1839
28.0k
    break;
1840
10.9k
  case CURLUPART_PASSWORD:
1841
10.9k
    storep = &u->password;
1842
10.9k
    break;
1843
0
  case CURLUPART_OPTIONS:
1844
0
    storep = &u->options;
1845
0
    break;
1846
0
  case CURLUPART_HOST:
1847
0
    storep = &u->host;
1848
0
    Curl_safefree(u->zoneid);
1849
0
    break;
1850
0
  case CURLUPART_ZONEID:
1851
0
    storep = &u->zoneid;
1852
0
    break;
1853
756
  case CURLUPART_PORT:
1854
756
    return set_url_port(u, part);
1855
0
  case CURLUPART_PATH:
1856
0
    pathmode = TRUE;
1857
0
    leadingslash = TRUE; /* enforce */
1858
0
    storep = &u->path;
1859
0
    break;
1860
0
  case CURLUPART_QUERY:
1861
0
    plusencode = urlencode;
1862
0
    appendquery = (flags & CURLU_APPENDQUERY) ? 1 : 0;
1863
0
    equalsencode = appendquery;
1864
0
    storep = &u->query;
1865
0
    u->query_present = TRUE;
1866
0
    break;
1867
0
  case CURLUPART_FRAGMENT:
1868
0
    storep = &u->fragment;
1869
0
    u->fragment_present = TRUE;
1870
0
    break;
1871
250k
  case CURLUPART_URL:
1872
250k
    return set_url(u, part, nalloc, flags);
1873
0
  default:
1874
0
    return CURLUE_UNKNOWN_PART;
1875
290k
  }
1876
38.9k
  DEBUGASSERT(storep);
1877
38.9k
  {
1878
38.9k
    const char *newp;
1879
38.9k
    struct dynbuf enc;
1880
38.9k
    curlx_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1881
1882
38.9k
    if(leadingslash && (part[0] != '/')) {
1883
0
      CURLcode result = curlx_dyn_addn(&enc, "/", 1);
1884
0
      if(result)
1885
0
        return cc2cu(result);
1886
0
    }
1887
38.9k
    if(urlencode) {
1888
38.9k
      const unsigned char *i;
1889
1890
33.6M
      for(i = (const unsigned char *)part; *i; i++) {
1891
33.5M
        CURLcode result;
1892
33.5M
        if((*i == ' ') && plusencode) {
1893
0
          result = curlx_dyn_addn(&enc, "+", 1);
1894
0
          if(result)
1895
0
            return CURLUE_OUT_OF_MEMORY;
1896
0
        }
1897
33.5M
        else if(ISUNRESERVED(*i) ||
1898
29.3M
                (pathmode && allowed_in_path(*i)) ||
1899
29.3M
                ((*i == '=') && equalsencode)) {
1900
4.24M
          if((*i == '=') && equalsencode)
1901
            /* only skip the first equals sign */
1902
0
            equalsencode = FALSE;
1903
4.24M
          result = curlx_dyn_addn(&enc, i, 1);
1904
4.24M
          if(result)
1905
0
            return cc2cu(result);
1906
4.24M
        }
1907
29.3M
        else {
1908
29.3M
          unsigned char out[3] = { '%' };
1909
29.3M
          Curl_hexbyte(&out[1], *i);
1910
29.3M
          result = curlx_dyn_addn(&enc, out, 3);
1911
29.3M
          if(result)
1912
0
            return cc2cu(result);
1913
29.3M
        }
1914
33.5M
      }
1915
38.9k
    }
1916
2
    else {
1917
2
      char *p;
1918
2
      CURLcode result = curlx_dyn_add(&enc, part);
1919
2
      if(result)
1920
0
        return cc2cu(result);
1921
2
      p = curlx_dyn_ptr(&enc);
1922
12
      while(*p) {
1923
        /* make sure percent encoded are lower case */
1924
10
        if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1925
0
           (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1926
0
          p[1] = Curl_raw_tolower(p[1]);
1927
0
          p[2] = Curl_raw_tolower(p[2]);
1928
0
          p += 3;
1929
0
        }
1930
10
        else
1931
10
          p++;
1932
10
      }
1933
2
    }
1934
38.9k
    newp = curlx_dyn_ptr(&enc);
1935
1936
38.9k
    if(appendquery && newp) {
1937
      /* Append the 'newp' string onto the old query. Add a '&' separator if
1938
         none is present at the end of the existing query already */
1939
1940
0
      size_t querylen = u->query ? strlen(u->query) : 0;
1941
0
      bool addamperand = querylen && (u->query[querylen - 1] != '&');
1942
0
      if(querylen) {
1943
0
        struct dynbuf qbuf;
1944
0
        curlx_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1945
1946
0
        if(curlx_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1947
0
          goto nomem;
1948
1949
0
        if(addamperand) {
1950
0
          if(curlx_dyn_addn(&qbuf, "&", 1))
1951
0
            goto nomem;
1952
0
        }
1953
0
        if(curlx_dyn_add(&qbuf, newp))
1954
0
          goto nomem;
1955
0
        curlx_dyn_free(&enc);
1956
0
        curlx_free(*storep);
1957
0
        *storep = curlx_dyn_ptr(&qbuf);
1958
0
        return CURLUE_OK;
1959
0
nomem:
1960
0
        curlx_dyn_free(&enc);
1961
0
        return CURLUE_OUT_OF_MEMORY;
1962
0
      }
1963
0
    }
1964
1965
38.9k
    else if(what == CURLUPART_HOST) {
1966
0
      size_t n = curlx_dyn_len(&enc);
1967
0
      if(!n && (flags & CURLU_NO_AUTHORITY)) {
1968
        /* Skip hostname check, it is allowed to be empty. */
1969
0
      }
1970
0
      else {
1971
0
        bool bad = FALSE;
1972
0
        if(!n)
1973
0
          bad = TRUE; /* empty hostname is not okay */
1974
0
        else if(!urlencode) {
1975
          /* if the hostname part was not URL encoded here, it was set ready
1976
             URL encoded so we need to decode it to check */
1977
0
          size_t dlen;
1978
0
          char *decoded = NULL;
1979
0
          CURLcode result =
1980
0
            Curl_urldecode(newp, n, &decoded, &dlen, REJECT_CTRL);
1981
0
          if(result || hostname_check(u, decoded, dlen))
1982
0
            bad = TRUE;
1983
0
          curlx_free(decoded);
1984
0
        }
1985
0
        else if(hostname_check(u, (char *)CURL_UNCONST(newp), n))
1986
0
          bad = TRUE;
1987
0
        if(bad) {
1988
0
          curlx_dyn_free(&enc);
1989
0
          return CURLUE_BAD_HOSTNAME;
1990
0
        }
1991
0
      }
1992
0
    }
1993
1994
38.9k
    curlx_free(*storep);
1995
38.9k
    *storep = (char *)CURL_UNCONST(newp);
1996
38.9k
  }
1997
0
  return CURLUE_OK;
1998
38.9k
}