Coverage Report

Created: 2025-12-04 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/curl/lib/urlapi.c
Line
Count
Source
1
/***************************************************************************
2
 *                                  _   _ ____  _
3
 *  Project                     ___| | | |  _ \| |
4
 *                             / __| | | | |_) | |
5
 *                            | (__| |_| |  _ <| |___
6
 *                             \___|\___/|_| \_\_____|
7
 *
8
 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9
 *
10
 * This software is licensed as described in the file COPYING, which
11
 * you should have received as part of this distribution. The terms
12
 * are also available at https://curl.se/docs/copyright.html.
13
 *
14
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15
 * copies of the Software, and permit persons to whom the Software is
16
 * furnished to do so, under the terms of the COPYING file.
17
 *
18
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19
 * KIND, either express or implied.
20
 *
21
 * SPDX-License-Identifier: curl
22
 *
23
 ***************************************************************************/
24
25
#include "curl_setup.h"
26
27
#include "urldata.h"
28
#include "urlapi-int.h"
29
#include "strcase.h"
30
#include "url.h"
31
#include "escape.h"
32
#include "curl_ctype.h"
33
#include "curlx/inet_pton.h"
34
#include "curlx/inet_ntop.h"
35
#include "strdup.h"
36
#include "idn.h"
37
#include "curlx/strparse.h"
38
#include "curl_memrchr.h"
39
40
#ifdef _WIN32
41
/* MS-DOS/Windows style drive prefix, eg c: in c:foo */
42
#define STARTS_WITH_DRIVE_PREFIX(str)    \
43
  ((('a' <= str[0] && str[0] <= 'z') ||  \
44
    ('A' <= str[0] && str[0] <= 'Z')) && \
45
   (str[1] == ':'))
46
#endif
47
48
/* MS-DOS/Windows style drive prefix, optionally with
49
 * a '|' instead of ':', followed by a slash or NUL */
50
#define STARTS_WITH_URL_DRIVE_PREFIX(str)                  \
51
261
  ((('a' <= (str)[0] && (str)[0] <= 'z') ||                \
52
261
    ('A' <= (str)[0] && (str)[0] <= 'Z')) &&               \
53
261
   ((str)[1] == ':' || (str)[1] == '|') &&                 \
54
261
   ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
55
56
/* scheme is not URL encoded, the longest libcurl supported ones are... */
57
5.51k
#define MAX_SCHEME_LEN 40
58
59
/*
60
 * If USE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
61
 * sure we have _some_ value for AF_INET6 without polluting our fake value
62
 * everywhere.
63
 */
64
#if !defined(USE_IPV6) && !defined(AF_INET6)
65
#define AF_INET6 (AF_INET + 1)
66
#endif
67
68
/* Internal representation of CURLU. Point to URL-encoded strings. */
69
struct Curl_URL {
70
  char *scheme;
71
  char *user;
72
  char *password;
73
  char *options; /* IMAP only? */
74
  char *host;
75
  char *zoneid; /* for numerical IPv6 addresses */
76
  char *port;
77
  char *path;
78
  char *query;
79
  char *fragment;
80
  unsigned short portnum; /* the numerical version (if 'port' is set) */
81
  BIT(query_present);    /* to support blank */
82
  BIT(fragment_present); /* to support blank */
83
  BIT(guessed_scheme);   /* when a URL without scheme is parsed */
84
};
85
86
0
#define DEFAULT_SCHEME "https"
87
88
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
89
                                      unsigned int flags);
90
91
static void free_urlhandle(struct Curl_URL *u)
92
3.87k
{
93
3.87k
  curlx_free(u->scheme);
94
3.87k
  curlx_free(u->user);
95
3.87k
  curlx_free(u->password);
96
3.87k
  curlx_free(u->options);
97
3.87k
  curlx_free(u->host);
98
3.87k
  curlx_free(u->zoneid);
99
3.87k
  curlx_free(u->port);
100
3.87k
  curlx_free(u->path);
101
3.87k
  curlx_free(u->query);
102
3.87k
  curlx_free(u->fragment);
103
3.87k
}
104
105
/*
106
 * Find the separator at the end of the hostname, or the '?' in cases like
107
 * http://www.example.com?id=2380
108
 */
109
static const char *find_host_sep(const char *url)
110
0
{
111
  /* Find the start of the hostname */
112
0
  const char *sep = strstr(url, "//");
113
0
  if(!sep)
114
0
    sep = url;
115
0
  else
116
0
    sep += 2;
117
118
  /* Find first / or ? */
119
0
  while(*sep && *sep != '/' && *sep != '?')
120
0
    sep++;
121
122
0
  return sep;
123
0
}
124
125
/* convert CURLcode to CURLUcode */
126
0
#define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE :   \
127
0
                  CURLUE_OUT_OF_MEMORY)
128
129
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
130
 * spaces in the source URL accordingly.
131
 *
132
 * URL encoding should be skipped for hostnames, otherwise IDN resolution
133
 * will fail.
134
 */
135
static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
136
                               size_t len, bool relative,
137
                               bool query)
138
0
{
139
  /* we must add this with whitespace-replacing */
140
0
  bool left = !query;
141
0
  const unsigned char *iptr;
142
0
  const unsigned char *host_sep = (const unsigned char *)url;
143
0
  CURLcode result = CURLE_OK;
144
145
0
  if(!relative) {
146
0
    size_t n;
147
0
    host_sep = (const unsigned char *)find_host_sep(url);
148
149
    /* output the first piece as-is */
150
0
    n = (const char *)host_sep - url;
151
0
    result = curlx_dyn_addn(o, url, n);
152
0
    len -= n;
153
0
  }
154
155
0
  for(iptr = host_sep; len && !result; iptr++, len--) {
156
0
    if(*iptr == ' ') {
157
0
      if(left)
158
0
        result = curlx_dyn_addn(o, "%20", 3);
159
0
      else
160
0
        result = curlx_dyn_addn(o, "+", 1);
161
0
    }
162
0
    else if((*iptr < ' ') || (*iptr >= 0x7f)) {
163
0
      unsigned char out[3] = { '%' };
164
0
      Curl_hexbyte(&out[1], *iptr);
165
0
      result = curlx_dyn_addn(o, out, 3);
166
0
    }
167
0
    else {
168
0
      result = curlx_dyn_addn(o, iptr, 1);
169
0
      if(*iptr == '?')
170
0
        left = FALSE;
171
0
    }
172
0
  }
173
174
0
  if(result)
175
0
    return cc2cu(result);
176
0
  return CURLUE_OK;
177
0
}
178
179
/*
180
 * Returns the length of the scheme if the given URL is absolute (as opposed
181
 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
182
 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
183
 *
184
 * If 'guess_scheme' is TRUE, it means the URL might be provided without
185
 * scheme.
186
 */
187
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
188
                            bool guess_scheme)
189
3.86k
{
190
3.86k
  size_t i = 0;
191
3.86k
  DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
192
3.86k
  (void)buflen; /* only used in debug-builds */
193
3.86k
  if(buf)
194
1.92k
    buf[0] = 0; /* always leave a defined value in buf */
195
#ifdef _WIN32
196
  if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
197
    return 0;
198
#endif
199
3.86k
  if(ISALPHA(url[0]))
200
5.51k
    for(i = 1; i < MAX_SCHEME_LEN; ++i) {
201
5.47k
      char s = url[i];
202
5.47k
      if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.'))) {
203
        /* RFC 3986 3.1 explains:
204
           scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
205
        */
206
4.38k
      }
207
1.08k
      else {
208
1.08k
        break;
209
1.08k
      }
210
5.47k
    }
211
3.86k
  if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
212
    /* If this does not guess scheme, the scheme always ends with the colon so
213
       that this also detects data: URLs etc. In guessing mode, data: could
214
       be the hostname "data" with a specified port number. */
215
216
    /* the length of the scheme is the name part only */
217
494
    size_t len = i;
218
494
    if(buf) {
219
247
      Curl_strntolower(buf, url, i);
220
247
      buf[i] = 0;
221
247
    }
222
494
    return len;
223
494
  }
224
3.36k
  return 0;
225
3.86k
}
226
227
/*
228
 * Concatenate a relative URL onto a base URL making it absolute.
229
 */
230
static CURLUcode redirect_url(const char *base, const char *relurl,
231
                              CURLU *u, unsigned int flags)
232
0
{
233
0
  struct dynbuf urlbuf;
234
0
  bool host_changed = FALSE;
235
0
  const char *useurl = relurl;
236
0
  const char *cutoff = NULL;
237
0
  size_t prelen;
238
0
  CURLUcode uc;
239
240
  /* protsep points to the start of the hostname, after [scheme]:// */
241
0
  const char *protsep = base + strlen(u->scheme) + 3;
242
0
  DEBUGASSERT(base && relurl && u); /* all set here */
243
0
  if(!base)
244
0
    return CURLUE_MALFORMED_INPUT; /* should never happen */
245
246
  /* handle different relative URL types */
247
0
  switch(relurl[0]) {
248
0
  case '/':
249
0
    if(relurl[1] == '/') {
250
      /* protocol-relative URL: //example.com/path */
251
0
      cutoff = protsep;
252
0
      useurl = &relurl[2];
253
0
      host_changed = TRUE;
254
0
    }
255
0
    else
256
      /* absolute /path */
257
0
      cutoff = strchr(protsep, '/');
258
0
    break;
259
260
0
  case '#':
261
    /* fragment-only change */
262
0
    if(u->fragment)
263
0
      cutoff = strchr(protsep, '#');
264
0
    break;
265
266
0
  default:
267
    /* path or query-only change */
268
0
    if(u->query && u->query[0])
269
      /* remove existing query */
270
0
      cutoff = strchr(protsep, '?');
271
0
    else if(u->fragment && u->fragment[0])
272
      /* Remove existing fragment */
273
0
      cutoff = strchr(protsep, '#');
274
275
0
    if(relurl[0] != '?') {
276
      /* append a relative path after the last slash */
277
0
      cutoff = memrchr(protsep, '/',
278
0
                       cutoff ? (size_t)(cutoff - protsep) : strlen(protsep));
279
0
      if(cutoff)
280
0
        cutoff++; /* truncate after last slash */
281
0
    }
282
0
    break;
283
0
  }
284
285
0
  prelen = cutoff ? (size_t)(cutoff - base) : strlen(base);
286
287
  /* build new URL */
288
0
  curlx_dyn_init(&urlbuf, CURL_MAX_INPUT_LENGTH);
289
290
0
  if(!curlx_dyn_addn(&urlbuf, base, prelen) &&
291
0
     !urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed, FALSE)) {
292
0
    uc = parseurl_and_replace(curlx_dyn_ptr(&urlbuf), u,
293
0
                              flags & ~CURLU_PATH_AS_IS);
294
0
  }
295
0
  else
296
0
    uc = CURLUE_OUT_OF_MEMORY;
297
298
0
  curlx_dyn_free(&urlbuf);
299
0
  return uc;
300
0
}
301
302
/* scan for byte values <= 31, 127 and sometimes space */
303
CURLUcode Curl_junkscan(const char *url, size_t *urllen, bool allowspace)
304
1.93k
{
305
1.93k
  size_t n = strlen(url);
306
1.93k
  size_t i;
307
1.93k
  unsigned char control;
308
1.93k
  const unsigned char *p = (const unsigned char *)url;
309
1.93k
  if(n > CURL_MAX_INPUT_LENGTH)
310
0
    return CURLUE_MALFORMED_INPUT;
311
312
1.93k
  control = allowspace ? 0x1f : 0x20;
313
52.1M
  for(i = 0; i < n; i++) {
314
52.1M
    if(p[i] <= control || p[i] == 127)
315
17
      return CURLUE_MALFORMED_INPUT;
316
52.1M
  }
317
1.92k
  *urllen = n;
318
1.92k
  return CURLUE_OK;
319
1.93k
}
320
321
/*
322
 * parse_hostname_login()
323
 *
324
 * Parse the login details (username, password and options) from the URL and
325
 * strip them out of the hostname
326
 *
327
 */
328
static CURLUcode parse_hostname_login(struct Curl_URL *u,
329
                                      const char *login,
330
                                      size_t len,
331
                                      unsigned int flags,
332
                                      size_t *offset) /* to the hostname */
333
1.70k
{
334
1.70k
  CURLUcode result = CURLUE_OK;
335
1.70k
  CURLcode ccode;
336
1.70k
  char *userp = NULL;
337
1.70k
  char *passwdp = NULL;
338
1.70k
  char *optionsp = NULL;
339
1.70k
  const struct Curl_handler *h = NULL;
340
341
  /* At this point, we assume all the other special cases have been taken
342
   * care of, so the host is at most
343
   *
344
   *   [user[:password][;options]]@]hostname
345
   *
346
   * We need somewhere to put the embedded details, so do that first.
347
   */
348
1.70k
  char *ptr;
349
350
1.70k
  DEBUGASSERT(login);
351
352
1.70k
  *offset = 0;
353
1.70k
  ptr = memchr(login, '@', len);
354
1.70k
  if(!ptr)
355
1.66k
    goto out;
356
357
  /* We will now try to extract the
358
   * possible login information in a string like:
359
   * ftp://user:password@ftp.site.example:8021/README */
360
39
  ptr++;
361
362
  /* if this is a known scheme, get some details */
363
39
  if(u->scheme)
364
28
    h = Curl_get_scheme_handler(u->scheme);
365
366
  /* We could use the login information in the URL so extract it. Only parse
367
     options if the handler says we should. Note that 'h' might be NULL! */
368
39
  ccode = Curl_parse_login_details(login, ptr - login - 1,
369
39
                                   &userp, &passwdp,
370
39
                                   (h && (h->flags & PROTOPT_URLOPTIONS)) ?
371
39
                                   &optionsp : NULL);
372
39
  if(ccode) {
373
0
    result = CURLUE_BAD_LOGIN;
374
0
    goto out;
375
0
  }
376
377
39
  if(userp) {
378
39
    if(flags & CURLU_DISALLOW_USER) {
379
      /* Option DISALLOW_USER is set and URL contains username. */
380
0
      result = CURLUE_USER_NOT_ALLOWED;
381
0
      goto out;
382
0
    }
383
39
    curlx_free(u->user);
384
39
    u->user = userp;
385
39
  }
386
387
39
  if(passwdp) {
388
12
    curlx_free(u->password);
389
12
    u->password = passwdp;
390
12
  }
391
392
39
  if(optionsp) {
393
20
    curlx_free(u->options);
394
20
    u->options = optionsp;
395
20
  }
396
397
  /* the hostname starts at this offset */
398
39
  *offset = ptr - login;
399
39
  return CURLUE_OK;
400
401
1.66k
out:
402
403
1.66k
  curlx_free(userp);
404
1.66k
  curlx_free(passwdp);
405
1.66k
  curlx_free(optionsp);
406
1.66k
  u->user = NULL;
407
1.66k
  u->password = NULL;
408
1.66k
  u->options = NULL;
409
410
1.66k
  return result;
411
39
}
412
413
UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
414
                                   bool has_scheme)
415
1.70k
{
416
1.70k
  const char *portptr;
417
1.70k
  char *hostname = curlx_dyn_ptr(host);
418
  /*
419
   * Find the end of an IPv6 address on the ']' ending bracket.
420
   */
421
1.70k
  if(hostname[0] == '[') {
422
135
    portptr = strchr(hostname, ']');
423
135
    if(!portptr)
424
1
      return CURLUE_BAD_IPV6;
425
134
    portptr++;
426
    /* this is a RFC2732-style specified IP-address */
427
134
    if(*portptr) {
428
17
      if(*portptr != ':')
429
15
        return CURLUE_BAD_PORT_NUMBER;
430
17
    }
431
117
    else
432
117
      portptr = NULL;
433
134
  }
434
1.56k
  else
435
1.56k
    portptr = strchr(hostname, ':');
436
437
1.68k
  if(portptr) {
438
127
    curl_off_t port;
439
127
    size_t keep = portptr - hostname;
440
441
    /* Browser behavior adaptation. If there is a colon with no digits after,
442
       just cut off the name there which makes us ignore the colon and just
443
       use the default port. Firefox, Chrome and Safari all do that.
444
445
       Do not do it if the URL has no scheme, to make something that looks like
446
       a scheme not work!
447
    */
448
127
    curlx_dyn_setlen(host, keep);
449
127
    portptr++;
450
127
    if(!*portptr)
451
18
      return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
452
453
109
    if(curlx_str_number(&portptr, &port, 0xffff) || *portptr)
454
57
      return CURLUE_BAD_PORT_NUMBER;
455
456
52
    u->portnum = (unsigned short)port;
457
    /* generate a new port number string to get rid of leading zeroes etc */
458
52
    curlx_free(u->port);
459
52
    u->port = curl_maprintf("%" CURL_FORMAT_CURL_OFF_T, port);
460
52
    if(!u->port)
461
0
      return CURLUE_OUT_OF_MEMORY;
462
52
  }
463
464
1.61k
  return CURLUE_OK;
465
1.68k
}
466
467
/* this assumes 'hostname' now starts with [ */
468
static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
469
                            size_t hlen) /* length of hostname */
470
147
{
471
147
  size_t len;
472
147
  DEBUGASSERT(*hostname == '[');
473
147
  if(hlen < 4) /* '[::]' is the shortest possible valid string */
474
4
    return CURLUE_BAD_IPV6;
475
143
  hostname++;
476
143
  hlen -= 2;
477
478
  /* only valid IPv6 letters are ok */
479
143
  len = strspn(hostname, "0123456789abcdefABCDEF:.");
480
481
143
  if(hlen != len) {
482
140
    hlen = len;
483
140
    if(hostname[len] == '%') {
484
      /* this could now be '%[zone id]' */
485
101
      char zoneid[16];
486
101
      int i = 0;
487
101
      char *h = &hostname[len + 1];
488
      /* pass '25' if present and is a URL encoded percent sign */
489
101
      if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
490
21
        h += 2;
491
840
      while(*h && (*h != ']') && (i < 15))
492
739
        zoneid[i++] = *h++;
493
101
      if(!i || (']' != *h))
494
49
        return CURLUE_BAD_IPV6;
495
52
      zoneid[i] = 0;
496
52
      u->zoneid = curlx_strdup(zoneid);
497
52
      if(!u->zoneid)
498
0
        return CURLUE_OUT_OF_MEMORY;
499
52
      hostname[len] = ']'; /* insert end bracket */
500
52
      hostname[len + 1] = 0; /* terminate the hostname */
501
52
    }
502
39
    else
503
39
      return CURLUE_BAD_IPV6;
504
    /* hostname is fine */
505
140
  }
506
507
  /* Normalize the IPv6 address */
508
55
  {
509
55
    char dest[16]; /* fits a binary IPv6 address */
510
55
    hostname[hlen] = 0; /* end the address there */
511
55
    if(curlx_inet_pton(AF_INET6, hostname, dest) != 1)
512
52
      return CURLUE_BAD_IPV6;
513
3
    if(curlx_inet_ntop(AF_INET6, dest, hostname, hlen)) {
514
1
      hlen = strlen(hostname); /* might be shorter now */
515
1
      hostname[hlen + 1] = 0;
516
1
    }
517
3
    hostname[hlen] = ']'; /* restore ending bracket */
518
3
  }
519
0
  return CURLUE_OK;
520
55
}
521
522
static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
523
                                size_t hlen) /* length of hostname */
524
1.10k
{
525
1.10k
  size_t len;
526
1.10k
  DEBUGASSERT(hostname);
527
528
1.10k
  if(!hlen)
529
0
    return CURLUE_NO_HOST;
530
1.10k
  else if(hostname[0] == '[')
531
30
    return ipv6_parse(u, hostname, hlen);
532
1.07k
  else {
533
    /* letters from the second string are not ok */
534
1.07k
    len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
535
1.07k
    if(hlen != len)
536
      /* hostname with bad content */
537
232
      return CURLUE_BAD_HOSTNAME;
538
1.07k
  }
539
842
  return CURLUE_OK;
540
1.10k
}
541
542
/*
543
 * Handle partial IPv4 numerical addresses and different bases, like
544
 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
545
 *
546
 * If the given input string is syntactically wrong IPv4 or any part for
547
 * example is too big, this function returns HOST_NAME.
548
 *
549
 * Output the "normalized" version of that input string in plain quad decimal
550
 * integers.
551
 *
552
 * Returns the host type.
553
 */
554
555
0
#define HOST_ERROR   -1 /* out of memory */
556
557
2.23k
#define HOST_NAME    1
558
648
#define HOST_IPV4    2
559
234
#define HOST_IPV6    3
560
561
static int ipv4_normalize(struct dynbuf *host)
562
1.55k
{
563
1.55k
  bool done = FALSE;
564
1.55k
  int n = 0;
565
1.55k
  const char *c = curlx_dyn_ptr(host);
566
1.55k
  unsigned int parts[4] = { 0, 0, 0, 0 };
567
1.55k
  CURLcode result = CURLE_OK;
568
569
1.55k
  if(*c == '[')
570
117
    return HOST_IPV6;
571
572
3.29k
  while(!done) {
573
2.61k
    int rc;
574
2.61k
    curl_off_t l;
575
2.61k
    if(*c == '0') {
576
696
      if(c[1] == 'x') {
577
243
        c += 2; /* skip the prefix */
578
243
        rc = curlx_str_hex(&c, &l, UINT_MAX);
579
243
      }
580
453
      else
581
453
        rc = curlx_str_octal(&c, &l, UINT_MAX);
582
696
    }
583
1.92k
    else
584
1.92k
      rc = curlx_str_number(&c, &l, UINT_MAX);
585
586
2.61k
    if(rc)
587
732
      return HOST_NAME;
588
589
1.88k
    parts[n] = (unsigned int)l;
590
591
1.88k
    switch(*c) {
592
1.17k
    case '.':
593
1.17k
      if(n == 3)
594
1
        return HOST_NAME;
595
1.17k
      n++;
596
1.17k
      c++;
597
1.17k
      break;
598
599
679
    case '\0':
600
679
      done = TRUE;
601
679
      break;
602
603
30
    default:
604
30
      return HOST_NAME;
605
1.88k
    }
606
1.88k
  }
607
608
679
  switch(n) {
609
161
  case 0: /* a -- 32 bits */
610
161
    curlx_dyn_reset(host);
611
612
161
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
613
161
                            (parts[0] >> 24),
614
161
                            ((parts[0] >> 16) & 0xff),
615
161
                            ((parts[0] >> 8) & 0xff),
616
161
                            (parts[0] & 0xff));
617
161
    break;
618
116
  case 1: /* a.b -- 8.24 bits */
619
116
    if((parts[0] > 0xff) || (parts[1] > 0xffffff))
620
59
      return HOST_NAME;
621
57
    curlx_dyn_reset(host);
622
57
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
623
57
                            (parts[0]),
624
57
                            ((parts[1] >> 16) & 0xff),
625
57
                            ((parts[1] >> 8) & 0xff),
626
57
                            (parts[1] & 0xff));
627
57
    break;
628
173
  case 2: /* a.b.c -- 8.8.16 bits */
629
173
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
630
117
      return HOST_NAME;
631
56
    curlx_dyn_reset(host);
632
56
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
633
56
                            (parts[0]),
634
56
                            (parts[1]),
635
56
                            ((parts[2] >> 8) & 0xff),
636
56
                            (parts[2] & 0xff));
637
56
    break;
638
229
  case 3: /* a.b.c.d -- 8.8.8.8 bits */
639
229
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
640
94
       (parts[3] > 0xff))
641
179
      return HOST_NAME;
642
50
    curlx_dyn_reset(host);
643
50
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
644
50
                            (parts[0]),
645
50
                            (parts[1]),
646
50
                            (parts[2]),
647
50
                            (parts[3]));
648
50
    break;
649
679
  }
650
324
  if(result)
651
0
    return HOST_ERROR;
652
324
  return HOST_IPV4;
653
324
}
654
655
/* if necessary, replace the host content with a URL decoded version */
656
static CURLUcode urldecode_host(struct dynbuf *host)
657
1.11k
{
658
1.11k
  char *per = NULL;
659
1.11k
  const char *hostname = curlx_dyn_ptr(host);
660
1.11k
  per = strchr(hostname, '%');
661
1.11k
  if(!per)
662
    /* nothing to decode */
663
871
    return CURLUE_OK;
664
247
  else {
665
    /* encoded */
666
247
    size_t dlen;
667
247
    char *decoded;
668
247
    CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
669
247
                                     REJECT_CTRL);
670
247
    if(result)
671
14
      return CURLUE_BAD_HOSTNAME;
672
233
    curlx_dyn_reset(host);
673
233
    result = curlx_dyn_addn(host, decoded, dlen);
674
233
    curlx_free(decoded);
675
233
    if(result)
676
0
      return cc2cu(result);
677
233
  }
678
679
233
  return CURLUE_OK;
680
1.11k
}
681
682
static CURLUcode parse_authority(struct Curl_URL *u,
683
                                 const char *auth, size_t authlen,
684
                                 unsigned int flags,
685
                                 struct dynbuf *host,
686
                                 bool has_scheme)
687
1.70k
{
688
1.70k
  size_t offset;
689
1.70k
  CURLUcode uc;
690
1.70k
  CURLcode result;
691
692
  /*
693
   * Parse the login details and strip them out of the hostname.
694
   */
695
1.70k
  uc = parse_hostname_login(u, auth, authlen, flags, &offset);
696
1.70k
  if(uc)
697
0
    goto out;
698
699
1.70k
  result = curlx_dyn_addn(host, auth + offset, authlen - offset);
700
1.70k
  if(result) {
701
0
    uc = cc2cu(result);
702
0
    goto out;
703
0
  }
704
705
1.70k
  uc = Curl_parse_port(u, host, has_scheme);
706
1.70k
  if(uc)
707
91
    goto out;
708
709
1.61k
  if(!curlx_dyn_len(host))
710
53
    return CURLUE_NO_HOST;
711
712
1.55k
  switch(ipv4_normalize(host)) {
713
324
  case HOST_IPV4:
714
324
    break;
715
117
  case HOST_IPV6:
716
117
    uc = ipv6_parse(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
717
117
    break;
718
1.11k
  case HOST_NAME:
719
1.11k
    uc = urldecode_host(host);
720
1.11k
    if(!uc)
721
1.10k
      uc = hostname_check(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
722
1.11k
    break;
723
0
  case HOST_ERROR:
724
0
    uc = CURLUE_OUT_OF_MEMORY;
725
0
    break;
726
0
  default:
727
0
    uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
728
0
    break;
729
1.55k
  }
730
731
1.65k
out:
732
1.65k
  return uc;
733
1.55k
}
734
735
/* used for HTTP/2 server push */
736
CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
737
0
{
738
0
  CURLUcode result;
739
0
  struct dynbuf host;
740
741
0
  DEBUGASSERT(authority);
742
0
  curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
743
744
0
  result = parse_authority(u, authority, strlen(authority),
745
0
                           CURLU_DISALLOW_USER, &host, !!u->scheme);
746
0
  if(result)
747
0
    curlx_dyn_free(&host);
748
0
  else {
749
0
    curlx_free(u->host);
750
0
    u->host = curlx_dyn_ptr(&host);
751
0
  }
752
0
  return result;
753
0
}
754
755
/*
756
 * "Remove Dot Segments"
757
 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
758
 */
759
760
static bool is_dot(const char **str, size_t *clen)
761
380k
{
762
380k
  const char *p = *str;
763
380k
  if(*p == '.') {
764
322k
    (*str)++;
765
322k
    (*clen)--;
766
322k
    return TRUE;
767
322k
  }
768
57.8k
  else if((*clen >= 3) &&
769
57.6k
          (p[0] == '%') && (p[1] == '2') && ((p[2] | 0x20) == 'e')) {
770
4.29k
    *str += 3;
771
4.29k
    *clen -= 3;
772
4.29k
    return TRUE;
773
4.29k
  }
774
53.5k
  return FALSE;
775
380k
}
776
777
21.9M
#define ISSLASH(x) ((x) == '/')
778
779
/*
780
 * dedotdotify()
781
 * @unittest: 1395
782
 *
783
 * This function gets a null-terminated path with dot and dotdot sequences
784
 * passed in and strips them off according to the rules in RFC 3986 section
785
 * 5.2.4.
786
 *
787
 * The function handles a path. It should not contain the query nor fragment.
788
 *
789
 * RETURNS
790
 *
791
 * Zero for success and 'out' set to an allocated dedotdotified string.
792
 */
793
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
794
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
795
473
{
796
473
  struct dynbuf out;
797
473
  CURLcode result = CURLE_OK;
798
799
473
  *outp = NULL;
800
  /* the path always starts with a slash, and a slash has not dot */
801
473
  if(clen < 2)
802
0
    return 0;
803
804
473
  curlx_dyn_init(&out, clen + 1);
805
806
  /*  A. If the input buffer begins with a prefix of "../" or "./", then
807
      remove that prefix from the input buffer; otherwise, */
808
473
  if(is_dot(&input, &clen)) {
809
0
    const char *p = input;
810
0
    size_t blen = clen;
811
812
0
    if(!clen)
813
      /* . [end] */
814
0
      goto end;
815
0
    else if(ISSLASH(*p)) {
816
      /* one dot followed by a slash */
817
0
      input = p + 1;
818
0
      clen--;
819
0
    }
820
821
    /*  D. if the input buffer consists only of "." or "..", then remove
822
        that from the input buffer; otherwise, */
823
0
    else if(is_dot(&p, &blen)) {
824
0
      if(!blen)
825
        /* .. [end] */
826
0
        goto end;
827
0
      else if(ISSLASH(*p)) {
828
        /* ../ */
829
0
        input = p + 1;
830
0
        clen = blen - 1;
831
0
      }
832
0
    }
833
0
  }
834
835
21.4M
  while(clen && !result) { /* until end of path content */
836
21.4M
    if(ISSLASH(*input)) {
837
207k
      const char *p = &input[1];
838
207k
      size_t blen = clen - 1;
839
      /*  B. if the input buffer begins with a prefix of "/./" or "/.", where
840
          "."  is a complete path segment, then replace that prefix with "/" in
841
          the input buffer; otherwise, */
842
207k
      if(is_dot(&p, &blen)) {
843
177k
        if(!blen) { /* /. */
844
24
          result = curlx_dyn_addn(&out, "/", 1);
845
24
          break;
846
24
        }
847
177k
        else if(ISSLASH(*p)) { /* /./ */
848
5.78k
          input = p;
849
5.78k
          clen = blen;
850
5.78k
          continue;
851
5.78k
        }
852
853
        /*  C. if the input buffer begins with a prefix of "/../" or "/..",
854
            where ".." is a complete path segment, then replace that prefix
855
            with "/" in the input buffer and remove the last segment and its
856
            preceding "/" (if any) from the output buffer; otherwise, */
857
172k
        else if(is_dot(&p, &blen) && (ISSLASH(*p) || !blen)) {
858
          /* remove the last segment from the output buffer */
859
12.1k
          size_t len = curlx_dyn_len(&out);
860
12.1k
          if(len) {
861
5.58k
            char *ptr = curlx_dyn_ptr(&out);
862
5.58k
            char *last = memrchr(ptr, '/', len);
863
5.58k
            if(last)
864
              /* trim the output at the slash */
865
5.58k
              curlx_dyn_setlen(&out, last - ptr);
866
5.58k
          }
867
868
12.1k
          if(blen) { /* /../ */
869
12.0k
            input = p;
870
12.0k
            clen = blen;
871
12.0k
            continue;
872
12.0k
          }
873
77
          result = curlx_dyn_addn(&out, "/", 1);
874
77
          break;
875
12.1k
        }
876
177k
      }
877
207k
    }
878
879
    /*  E. move the first path segment in the input buffer to the end of
880
        the output buffer, including the initial "/" character (if any) and
881
        any subsequent characters up to, but not including, the next "/"
882
        character or the end of the input buffer. */
883
884
21.4M
    result = curlx_dyn_addn(&out, input, 1);
885
21.4M
    input++;
886
21.4M
    clen--;
887
21.4M
  }
888
473
end:
889
473
  if(!result) {
890
473
    if(curlx_dyn_len(&out))
891
473
      *outp = curlx_dyn_ptr(&out);
892
0
    else {
893
0
      *outp = curlx_strdup("");
894
0
      if(!*outp)
895
0
        return 1;
896
0
    }
897
473
  }
898
473
  return result ? 1 : 0; /* success */
899
473
}
900
901
static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
902
1.93k
{
903
1.93k
  const char *path;
904
1.93k
  size_t pathlen;
905
1.93k
  char *query = NULL;
906
1.93k
  char *fragment = NULL;
907
1.93k
  char schemebuf[MAX_SCHEME_LEN + 1];
908
1.93k
  size_t schemelen = 0;
909
1.93k
  size_t urllen;
910
1.93k
  CURLUcode result = CURLUE_OK;
911
1.93k
  size_t fraglen = 0;
912
1.93k
  struct dynbuf host;
913
914
1.93k
  DEBUGASSERT(url);
915
916
1.93k
  curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
917
918
1.93k
  result = Curl_junkscan(url, &urllen, !!(flags & CURLU_ALLOW_SPACE));
919
1.93k
  if(result)
920
17
    goto fail;
921
922
1.92k
  schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
923
1.92k
                                   flags & (CURLU_GUESS_SCHEME |
924
1.92k
                                            CURLU_DEFAULT_SCHEME));
925
926
  /* handle the file: scheme */
927
1.92k
  if(schemelen && !strcmp(schemebuf, "file")) {
928
163
    bool uncpath = FALSE;
929
163
    if(urllen <= 6) {
930
      /* file:/ is not enough to actually be a complete file: URL */
931
1
      result = CURLUE_BAD_FILE_URL;
932
1
      goto fail;
933
1
    }
934
935
    /* path has been allocated large enough to hold this */
936
162
    path = &url[5];
937
162
    pathlen = urllen - 5;
938
939
162
    u->scheme = curlx_strdup("file");
940
162
    if(!u->scheme) {
941
0
      result = CURLUE_OUT_OF_MEMORY;
942
0
      goto fail;
943
0
    }
944
945
    /* Extra handling URLs with an authority component (i.e. that start with
946
     * "file://")
947
     *
948
     * We allow omitted hostname (e.g. file:/<path>) -- valid according to
949
     * RFC 8089, but not the (current) WHAT-WG URL spec.
950
     */
951
162
    if(path[0] == '/' && path[1] == '/') {
952
      /* swallow the two slashes */
953
78
      const char *ptr = &path[2];
954
955
      /*
956
       * According to RFC 8089, a file: URL can be reliably dereferenced if:
957
       *
958
       *  o it has no/blank hostname, or
959
       *
960
       *  o the hostname matches "localhost" (case-insensitively), or
961
       *
962
       *  o the hostname is a FQDN that resolves to this machine, or
963
       *
964
       *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
965
       *    Appendix E.3).
966
       *
967
       * For brevity, we only consider URLs with empty, "localhost", or
968
       * "127.0.0.1" hostnames as local, otherwise as an UNC String.
969
       *
970
       * Additionally, there is an exception for URLs with a Windows drive
971
       * letter in the authority (which was accidentally omitted from RFC 8089
972
       * Appendix E, but believe me, it was meant to be there. --MK)
973
       */
974
78
      if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
975
        /* the URL includes a hostname, it must match "localhost" or
976
           "127.0.0.1" to be valid */
977
63
        if(checkprefix("localhost/", ptr) ||
978
62
           checkprefix("127.0.0.1/", ptr)) {
979
2
          ptr += 9; /* now points to the slash after the host */
980
2
        }
981
61
        else {
982
#ifdef _WIN32
983
          size_t len;
984
985
          /* the hostname, NetBIOS computer name, can not contain disallowed
986
             chars, and the delimiting slash character must be appended to the
987
             hostname */
988
          path = strpbrk(ptr, "/\\:*?\"<>|");
989
          if(!path || *path != '/') {
990
            result = CURLUE_BAD_FILE_URL;
991
            goto fail;
992
          }
993
994
          len = path - ptr;
995
          if(len) {
996
            CURLcode code = curlx_dyn_addn(&host, ptr, len);
997
            if(code) {
998
              result = cc2cu(code);
999
              goto fail;
1000
            }
1001
            uncpath = TRUE;
1002
          }
1003
1004
          ptr -= 2; /* now points to the // before the host in UNC */
1005
#else
1006
          /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1007
             none */
1008
61
          result = CURLUE_BAD_FILE_URL;
1009
61
          goto fail;
1010
61
#endif
1011
61
        }
1012
63
      }
1013
1014
17
      path = ptr;
1015
17
      pathlen = urllen - (ptr - url);
1016
17
    }
1017
1018
101
    if(!uncpath)
1019
      /* no host for file: URLs by default */
1020
101
      curlx_dyn_reset(&host);
1021
1022
101
#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1023
    /* Do not allow Windows drive letters when not in Windows.
1024
     * This catches both "file:/c:" and "file:c:" */
1025
101
    if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1026
97
       STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1027
      /* File drive letters are only accepted in MS-DOS/Windows */
1028
18
      result = CURLUE_BAD_FILE_URL;
1029
18
      goto fail;
1030
18
    }
1031
#else
1032
    /* If the path starts with a slash and a drive letter, ditch the slash */
1033
    if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1034
      /* This cannot be done with strcpy, as the memory chunks overlap! */
1035
      path++;
1036
      pathlen--;
1037
    }
1038
#endif
1039
101
  }
1040
1.75k
  else {
1041
    /* clear path */
1042
1.75k
    const char *schemep = NULL;
1043
1.75k
    const char *hostp;
1044
1.75k
    size_t hostlen;
1045
1046
1.75k
    if(schemelen) {
1047
84
      int i = 0;
1048
84
      const char *p = &url[schemelen + 1];
1049
179
      while((*p == '/') && (i < 4)) {
1050
95
        p++;
1051
95
        i++;
1052
95
      }
1053
1054
84
      schemep = schemebuf;
1055
84
      if(!Curl_get_scheme_handler(schemep) &&
1056
52
         !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1057
52
        result = CURLUE_UNSUPPORTED_SCHEME;
1058
52
        goto fail;
1059
52
      }
1060
1061
32
      if((i < 1) || (i > 3)) {
1062
        /* less than one or more than three slashes */
1063
1
        result = CURLUE_BAD_SLASHES;
1064
1
        goto fail;
1065
1
      }
1066
31
      hostp = p; /* hostname starts here */
1067
31
    }
1068
1.67k
    else {
1069
      /* no scheme! */
1070
1071
1.67k
      if(!(flags & (CURLU_DEFAULT_SCHEME | CURLU_GUESS_SCHEME))) {
1072
0
        result = CURLUE_BAD_SCHEME;
1073
0
        goto fail;
1074
0
      }
1075
1.67k
      if(flags & CURLU_DEFAULT_SCHEME)
1076
0
        schemep = DEFAULT_SCHEME;
1077
1078
      /*
1079
       * The URL was badly formatted, let's try without scheme specified.
1080
       */
1081
1.67k
      hostp = url;
1082
1.67k
    }
1083
1084
1.70k
    if(schemep) {
1085
31
      u->scheme = curlx_strdup(schemep);
1086
31
      if(!u->scheme) {
1087
0
        result = CURLUE_OUT_OF_MEMORY;
1088
0
        goto fail;
1089
0
      }
1090
31
    }
1091
1092
    /* find the end of the hostname + port number */
1093
1.70k
    hostlen = strcspn(hostp, "/?#");
1094
1.70k
    path = &hostp[hostlen];
1095
1096
    /* this pathlen also contains the query and the fragment */
1097
1.70k
    pathlen = urllen - (path - url);
1098
1.70k
    if(hostlen) {
1099
1100
1.70k
      result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1101
1.70k
      if(result)
1102
534
        goto fail;
1103
1104
1.16k
      if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1105
1.16k
        const char *hostname = curlx_dyn_ptr(&host);
1106
        /* legacy curl-style guess based on hostname */
1107
1.16k
        if(checkprefix("ftp.", hostname))
1108
1
          schemep = "ftp";
1109
1.16k
        else if(checkprefix("dict.", hostname))
1110
1
          schemep = "dict";
1111
1.16k
        else if(checkprefix("ldap.", hostname))
1112
1
          schemep = "ldap";
1113
1.16k
        else if(checkprefix("imap.", hostname))
1114
1
          schemep = "imap";
1115
1.16k
        else if(checkprefix("smtp.", hostname))
1116
2
          schemep = "smtp";
1117
1.16k
        else if(checkprefix("pop3.", hostname))
1118
1
          schemep = "pop3";
1119
1.16k
        else
1120
1.16k
          schemep = "http";
1121
1122
1.16k
        u->scheme = curlx_strdup(schemep);
1123
1.16k
        if(!u->scheme) {
1124
0
          result = CURLUE_OUT_OF_MEMORY;
1125
0
          goto fail;
1126
0
        }
1127
1.16k
        u->guessed_scheme = TRUE;
1128
1.16k
      }
1129
1.16k
    }
1130
3
    else if(flags & CURLU_NO_AUTHORITY) {
1131
      /* allowed to be empty. */
1132
0
      if(curlx_dyn_add(&host, "")) {
1133
0
        result = CURLUE_OUT_OF_MEMORY;
1134
0
        goto fail;
1135
0
      }
1136
0
    }
1137
3
    else {
1138
3
      result = CURLUE_NO_HOST;
1139
3
      goto fail;
1140
3
    }
1141
1.70k
  }
1142
1143
1.25k
  fragment = strchr(path, '#');
1144
1.25k
  if(fragment) {
1145
30
    fraglen = pathlen - (fragment - path);
1146
30
    u->fragment_present = TRUE;
1147
30
    if(fraglen > 1) {
1148
      /* skip the leading '#' in the copy but include the terminating null */
1149
27
      if(flags & CURLU_URLENCODE) {
1150
0
        struct dynbuf enc;
1151
0
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1152
0
        result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1153
0
        if(result)
1154
0
          goto fail;
1155
0
        u->fragment = curlx_dyn_ptr(&enc);
1156
0
      }
1157
27
      else {
1158
27
        u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1159
27
        if(!u->fragment) {
1160
0
          result = CURLUE_OUT_OF_MEMORY;
1161
0
          goto fail;
1162
0
        }
1163
27
      }
1164
27
    }
1165
    /* after this, pathlen still contains the query */
1166
30
    pathlen -= fraglen;
1167
30
  }
1168
1169
1.25k
  query = memchr(path, '?', pathlen);
1170
1.25k
  if(query) {
1171
29
    size_t qlen = fragment ? (size_t)(fragment - query) :
1172
29
      pathlen - (query - path);
1173
29
    pathlen -= qlen;
1174
29
    u->query_present = TRUE;
1175
29
    if(qlen > 1) {
1176
22
      if(flags & CURLU_URLENCODE) {
1177
0
        struct dynbuf enc;
1178
0
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1179
        /* skip the leading question mark */
1180
0
        result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1181
0
        if(result)
1182
0
          goto fail;
1183
0
        u->query = curlx_dyn_ptr(&enc);
1184
0
      }
1185
22
      else {
1186
22
        u->query = Curl_memdup0(query + 1, qlen - 1);
1187
22
        if(!u->query) {
1188
0
          result = CURLUE_OUT_OF_MEMORY;
1189
0
          goto fail;
1190
0
        }
1191
22
      }
1192
22
    }
1193
7
    else {
1194
      /* single byte query */
1195
7
      u->query = curlx_strdup("");
1196
7
      if(!u->query) {
1197
0
        result = CURLUE_OUT_OF_MEMORY;
1198
0
        goto fail;
1199
0
      }
1200
7
    }
1201
29
  }
1202
1203
1.25k
  if(pathlen && (flags & CURLU_URLENCODE)) {
1204
0
    struct dynbuf enc;
1205
0
    curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1206
0
    result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1207
0
    if(result)
1208
0
      goto fail;
1209
0
    pathlen = curlx_dyn_len(&enc);
1210
0
    path = u->path = curlx_dyn_ptr(&enc);
1211
0
  }
1212
1213
1.25k
  if(pathlen <= 1) {
1214
    /* there is no path left or just the slash, unset */
1215
779
    path = NULL;
1216
779
  }
1217
473
  else {
1218
473
    if(!u->path) {
1219
473
      u->path = Curl_memdup0(path, pathlen);
1220
473
      if(!u->path) {
1221
0
        result = CURLUE_OUT_OF_MEMORY;
1222
0
        goto fail;
1223
0
      }
1224
473
      path = u->path;
1225
473
    }
1226
0
    else if(flags & CURLU_URLENCODE)
1227
      /* it might have encoded more than just the path so cut it */
1228
0
      u->path[pathlen] = 0;
1229
1230
473
    if(!(flags & CURLU_PATH_AS_IS)) {
1231
      /* remove ../ and ./ sequences according to RFC3986 */
1232
473
      char *dedot;
1233
473
      int err = dedotdotify(path, pathlen, &dedot);
1234
473
      if(err) {
1235
0
        result = CURLUE_OUT_OF_MEMORY;
1236
0
        goto fail;
1237
0
      }
1238
473
      if(dedot) {
1239
473
        curlx_free(u->path);
1240
473
        u->path = dedot;
1241
473
      }
1242
473
    }
1243
473
  }
1244
1245
1.25k
  u->host = curlx_dyn_ptr(&host);
1246
1247
1.25k
  return result;
1248
687
fail:
1249
687
  curlx_dyn_free(&host);
1250
687
  free_urlhandle(u);
1251
687
  return result;
1252
1.25k
}
1253
1254
/*
1255
 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1256
 */
1257
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1258
                                      unsigned int flags)
1259
1.93k
{
1260
1.93k
  CURLUcode result;
1261
1.93k
  CURLU tmpurl;
1262
1.93k
  memset(&tmpurl, 0, sizeof(tmpurl));
1263
1.93k
  result = parseurl(url, &tmpurl, flags);
1264
1.93k
  if(!result) {
1265
1.25k
    free_urlhandle(u);
1266
1.25k
    *u = tmpurl;
1267
1.25k
  }
1268
1.93k
  return result;
1269
1.93k
}
1270
1271
/*
1272
 */
1273
CURLU *curl_url(void)
1274
1.94k
{
1275
1.94k
  return curlx_calloc(1, sizeof(struct Curl_URL));
1276
1.94k
}
1277
1278
void curl_url_cleanup(CURLU *u)
1279
1.94k
{
1280
1.94k
  if(u) {
1281
1.94k
    free_urlhandle(u);
1282
1.94k
    curlx_free(u);
1283
1.94k
  }
1284
1.94k
}
1285
1286
#define DUP(dest, src, name)                    \
1287
0
  do {                                          \
1288
0
    if(src->name) {                             \
1289
0
      dest->name = curlx_strdup(src->name);     \
1290
0
      if(!dest->name)                           \
1291
0
        goto fail;                              \
1292
0
    }                                           \
1293
0
  } while(0)
1294
1295
CURLU *curl_url_dup(const CURLU *in)
1296
0
{
1297
0
  struct Curl_URL *u = curlx_calloc(1, sizeof(struct Curl_URL));
1298
0
  if(u) {
1299
0
    DUP(u, in, scheme);
1300
0
    DUP(u, in, user);
1301
0
    DUP(u, in, password);
1302
0
    DUP(u, in, options);
1303
0
    DUP(u, in, host);
1304
0
    DUP(u, in, port);
1305
0
    DUP(u, in, path);
1306
0
    DUP(u, in, query);
1307
0
    DUP(u, in, fragment);
1308
0
    DUP(u, in, zoneid);
1309
0
    u->portnum = in->portnum;
1310
0
    u->fragment_present = in->fragment_present;
1311
0
    u->query_present = in->query_present;
1312
0
  }
1313
0
  return u;
1314
0
fail:
1315
0
  curl_url_cleanup(u);
1316
0
  return NULL;
1317
0
}
1318
1319
#ifndef USE_IDN
1320
#define host_decode(x, y) CURLUE_LACKS_IDN
1321
#define host_encode(x, y) CURLUE_LACKS_IDN
1322
#else
1323
static CURLUcode host_decode(const char *host, char **allochost)
1324
0
{
1325
0
  CURLcode result = Curl_idn_decode(host, allochost);
1326
0
  if(result)
1327
0
    return (result == CURLE_OUT_OF_MEMORY) ?
1328
0
      CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1329
0
  return CURLUE_OK;
1330
0
}
1331
1332
static CURLUcode host_encode(const char *host, char **allochost)
1333
0
{
1334
0
  CURLcode result = Curl_idn_encode(host, allochost);
1335
0
  if(result)
1336
0
    return (result == CURLE_OUT_OF_MEMORY) ?
1337
0
      CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1338
0
  return CURLUE_OK;
1339
0
}
1340
#endif
1341
1342
static CURLUcode urlget_format(const CURLU *u, CURLUPart what,
1343
                               const char *ptr, char **partp,
1344
                               bool plusdecode, unsigned int flags)
1345
0
{
1346
0
  CURLUcode uc = CURLUE_OK;
1347
0
  size_t partlen = strlen(ptr);
1348
0
  bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0;
1349
0
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1350
0
  bool punycode = (flags & CURLU_PUNYCODE) && (what == CURLUPART_HOST);
1351
0
  bool depunyfy = (flags & CURLU_PUNY2IDN) && (what == CURLUPART_HOST);
1352
0
  char *part = Curl_memdup0(ptr, partlen);
1353
0
  *partp = NULL;
1354
0
  if(!part)
1355
0
    return CURLUE_OUT_OF_MEMORY;
1356
0
  if(plusdecode) {
1357
    /* convert + to space */
1358
0
    char *plus = part;
1359
0
    size_t i = 0;
1360
0
    for(i = 0; i < partlen; ++plus, i++) {
1361
0
      if(*plus == '+')
1362
0
        *plus = ' ';
1363
0
    }
1364
0
  }
1365
0
  if(urldecode) {
1366
0
    char *decoded;
1367
0
    size_t dlen;
1368
    /* this unconditional rejection of control bytes is documented
1369
       API behavior */
1370
0
    CURLcode res = Curl_urldecode(part, partlen, &decoded, &dlen, REJECT_CTRL);
1371
0
    curlx_free(part);
1372
0
    if(res)
1373
0
      return CURLUE_URLDECODE;
1374
0
    part = decoded;
1375
0
    partlen = dlen;
1376
0
  }
1377
0
  if(urlencode) {
1378
0
    struct dynbuf enc;
1379
0
    curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1380
0
    uc = urlencode_str(&enc, part, partlen, TRUE, what == CURLUPART_QUERY);
1381
0
    curlx_free(part);
1382
0
    if(uc)
1383
0
      return uc;
1384
0
    part = curlx_dyn_ptr(&enc);
1385
0
  }
1386
0
  else if(punycode) {
1387
0
    if(!Curl_is_ASCII_name(u->host)) {
1388
0
      char *punyversion = NULL;
1389
0
      uc = host_decode(part, &punyversion);
1390
0
      curlx_free(part);
1391
0
      if(uc)
1392
0
        return uc;
1393
0
      part = punyversion;
1394
0
    }
1395
0
  }
1396
0
  else if(depunyfy) {
1397
0
    if(Curl_is_ASCII_name(u->host)) {
1398
0
      char *unpunified = NULL;
1399
0
      uc = host_encode(part, &unpunified);
1400
0
      curlx_free(part);
1401
0
      if(uc)
1402
0
        return uc;
1403
0
      part = unpunified;
1404
0
    }
1405
0
  }
1406
0
  *partp = part;
1407
0
  return CURLUE_OK;
1408
0
}
1409
1410
static CURLUcode urlget_url(const CURLU *u, char **part, unsigned int flags)
1411
1.69k
{
1412
1.69k
  char *url;
1413
1.69k
  const char *scheme;
1414
1.69k
  char *options = u->options;
1415
1.69k
  char *port = u->port;
1416
1.69k
  char *allochost = NULL;
1417
1.69k
  bool show_fragment =
1418
1.69k
    u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
1419
1.69k
  bool show_query = (u->query && u->query[0]) ||
1420
1.69k
    (u->query_present && flags & CURLU_GET_EMPTY);
1421
1.69k
  bool punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
1422
1.69k
  bool depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
1423
1.69k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1424
1.69k
  char portbuf[7];
1425
1.69k
  if(u->scheme && curl_strequal("file", u->scheme)) {
1426
0
    url = curl_maprintf("file://%s%s%s%s%s",
1427
0
                        u->path,
1428
0
                        show_query ? "?": "",
1429
0
                        u->query ? u->query : "",
1430
0
                        show_fragment ? "#": "",
1431
0
                        u->fragment ? u->fragment : "");
1432
0
  }
1433
1.69k
  else if(!u->host)
1434
1.69k
    return CURLUE_NO_HOST;
1435
0
  else {
1436
0
    const struct Curl_handler *h = NULL;
1437
0
    char schemebuf[MAX_SCHEME_LEN + 5];
1438
0
    if(u->scheme)
1439
0
      scheme = u->scheme;
1440
0
    else if(flags & CURLU_DEFAULT_SCHEME)
1441
0
      scheme = DEFAULT_SCHEME;
1442
0
    else
1443
0
      return CURLUE_NO_SCHEME;
1444
1445
0
    h = Curl_get_scheme_handler(scheme);
1446
0
    if(!port && (flags & CURLU_DEFAULT_PORT)) {
1447
      /* there is no stored port number, but asked to deliver
1448
         a default one for the scheme */
1449
0
      if(h) {
1450
0
        curl_msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1451
0
        port = portbuf;
1452
0
      }
1453
0
    }
1454
0
    else if(port) {
1455
      /* there is a stored port number, but asked to inhibit if it matches
1456
         the default one for the scheme */
1457
0
      if(h && (h->defport == u->portnum) &&
1458
0
         (flags & CURLU_NO_DEFAULT_PORT))
1459
0
        port = NULL;
1460
0
    }
1461
1462
0
    if(h && !(h->flags & PROTOPT_URLOPTIONS))
1463
0
      options = NULL;
1464
1465
0
    if(u->host[0] == '[') {
1466
0
      if(u->zoneid) {
1467
        /* make it '[ host %25 zoneid ]' */
1468
0
        struct dynbuf enc;
1469
0
        size_t hostlen = strlen(u->host);
1470
0
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1471
0
        if(curlx_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1472
0
                          u->zoneid))
1473
0
          return CURLUE_OUT_OF_MEMORY;
1474
0
        allochost = curlx_dyn_ptr(&enc);
1475
0
      }
1476
0
    }
1477
0
    else if(urlencode) {
1478
0
      allochost = curl_easy_escape(NULL, u->host, 0);
1479
0
      if(!allochost)
1480
0
        return CURLUE_OUT_OF_MEMORY;
1481
0
    }
1482
0
    else if(punycode) {
1483
0
      if(!Curl_is_ASCII_name(u->host)) {
1484
0
        CURLUcode ret = host_decode(u->host, &allochost);
1485
0
        if(ret)
1486
0
          return ret;
1487
0
      }
1488
0
    }
1489
0
    else if(depunyfy) {
1490
0
      if(Curl_is_ASCII_name(u->host)) {
1491
0
        CURLUcode ret = host_encode(u->host, &allochost);
1492
0
        if(ret)
1493
0
          return ret;
1494
0
      }
1495
0
    }
1496
1497
0
    if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme)
1498
0
      curl_msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
1499
0
    else
1500
0
      schemebuf[0] = 0;
1501
1502
0
    url = curl_maprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1503
0
                        schemebuf,
1504
0
                        u->user ? u->user : "",
1505
0
                        u->password ? ":": "",
1506
0
                        u->password ? u->password : "",
1507
0
                        options ? ";" : "",
1508
0
                        options ? options : "",
1509
0
                        (u->user || u->password || options) ? "@": "",
1510
0
                        allochost ? allochost : u->host,
1511
0
                        port ? ":": "",
1512
0
                        port ? port : "",
1513
0
                        u->path ? u->path : "/",
1514
0
                        show_query ? "?": "",
1515
0
                        u->query ? u->query : "",
1516
0
                        show_fragment ? "#": "",
1517
0
                        u->fragment ? u->fragment : "");
1518
0
    curlx_free(allochost);
1519
0
  }
1520
0
  if(!url)
1521
0
    return CURLUE_OUT_OF_MEMORY;
1522
0
  *part = url;
1523
0
  return CURLUE_OK;
1524
0
}
1525
1526
CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1527
                       char **part, unsigned int flags)
1528
1.69k
{
1529
1.69k
  const char *ptr;
1530
1.69k
  CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1531
1.69k
  char portbuf[7];
1532
1.69k
  bool plusdecode = FALSE;
1533
1.69k
  if(!u)
1534
0
    return CURLUE_BAD_HANDLE;
1535
1.69k
  if(!part)
1536
0
    return CURLUE_BAD_PARTPOINTER;
1537
1.69k
  *part = NULL;
1538
1539
1.69k
  switch(what) {
1540
0
  case CURLUPART_SCHEME:
1541
0
    ptr = u->scheme;
1542
0
    ifmissing = CURLUE_NO_SCHEME;
1543
0
    flags &= ~CURLU_URLDECODE; /* never for schemes */
1544
0
    if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme)
1545
0
      return CURLUE_NO_SCHEME;
1546
0
    break;
1547
0
  case CURLUPART_USER:
1548
0
    ptr = u->user;
1549
0
    ifmissing = CURLUE_NO_USER;
1550
0
    break;
1551
0
  case CURLUPART_PASSWORD:
1552
0
    ptr = u->password;
1553
0
    ifmissing = CURLUE_NO_PASSWORD;
1554
0
    break;
1555
0
  case CURLUPART_OPTIONS:
1556
0
    ptr = u->options;
1557
0
    ifmissing = CURLUE_NO_OPTIONS;
1558
0
    break;
1559
0
  case CURLUPART_HOST:
1560
0
    ptr = u->host;
1561
0
    ifmissing = CURLUE_NO_HOST;
1562
0
    break;
1563
0
  case CURLUPART_ZONEID:
1564
0
    ptr = u->zoneid;
1565
0
    ifmissing = CURLUE_NO_ZONEID;
1566
0
    break;
1567
0
  case CURLUPART_PORT:
1568
0
    ptr = u->port;
1569
0
    ifmissing = CURLUE_NO_PORT;
1570
0
    flags &= ~CURLU_URLDECODE; /* never for port */
1571
0
    if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1572
      /* there is no stored port number, but asked to deliver
1573
         a default one for the scheme */
1574
0
      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1575
0
      if(h) {
1576
0
        curl_msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1577
0
        ptr = portbuf;
1578
0
      }
1579
0
    }
1580
0
    else if(ptr && u->scheme) {
1581
      /* there is a stored port number, but ask to inhibit if
1582
         it matches the default one for the scheme */
1583
0
      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1584
0
      if(h && (h->defport == u->portnum) &&
1585
0
         (flags & CURLU_NO_DEFAULT_PORT))
1586
0
        ptr = NULL;
1587
0
    }
1588
0
    break;
1589
0
  case CURLUPART_PATH:
1590
0
    ptr = u->path;
1591
0
    if(!ptr)
1592
0
      ptr = "/";
1593
0
    break;
1594
0
  case CURLUPART_QUERY:
1595
0
    ptr = u->query;
1596
0
    ifmissing = CURLUE_NO_QUERY;
1597
0
    plusdecode = flags & CURLU_URLDECODE;
1598
0
    if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
1599
      /* there was a blank query and the user do not ask for it */
1600
0
      ptr = NULL;
1601
0
    break;
1602
0
  case CURLUPART_FRAGMENT:
1603
0
    ptr = u->fragment;
1604
0
    ifmissing = CURLUE_NO_FRAGMENT;
1605
0
    if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
1606
      /* there was a blank fragment and the user asks for it */
1607
0
      ptr = "";
1608
0
    break;
1609
1.69k
  case CURLUPART_URL:
1610
1.69k
    return urlget_url(u, part, flags);
1611
0
  default:
1612
0
    ptr = NULL;
1613
0
    break;
1614
1.69k
  }
1615
0
  if(ptr)
1616
0
    return urlget_format(u, what, ptr, part, plusdecode, flags);
1617
1618
0
  return ifmissing;
1619
0
}
1620
1621
static CURLUcode set_url_scheme(CURLU *u, const char *scheme,
1622
                                unsigned int flags)
1623
0
{
1624
0
  size_t plen = strlen(scheme);
1625
0
  const struct Curl_handler *h = NULL;
1626
0
  if((plen > MAX_SCHEME_LEN) || (plen < 1))
1627
    /* too long or too short */
1628
0
    return CURLUE_BAD_SCHEME;
1629
  /* verify that it is a fine scheme */
1630
0
  h = Curl_get_scheme_handler(scheme);
1631
0
  if(!h) {
1632
0
    const char *s = scheme;
1633
0
    if(!(flags & CURLU_NON_SUPPORT_SCHEME))
1634
0
      return CURLUE_UNSUPPORTED_SCHEME;
1635
0
    if(ISALPHA(*s)) {
1636
      /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1637
0
      while(--plen) {
1638
0
        if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1639
0
          s++; /* fine */
1640
0
        else
1641
0
          return CURLUE_BAD_SCHEME;
1642
0
      }
1643
0
    }
1644
0
    else
1645
0
      return CURLUE_BAD_SCHEME;
1646
0
  }
1647
0
  u->guessed_scheme = FALSE;
1648
0
  return CURLUE_OK;
1649
0
}
1650
1651
static CURLUcode set_url_port(CURLU *u, const char *provided_port)
1652
0
{
1653
0
  char *tmp;
1654
0
  curl_off_t port;
1655
0
  if(!ISDIGIT(provided_port[0]))
1656
    /* not a number */
1657
0
    return CURLUE_BAD_PORT_NUMBER;
1658
0
  if(curlx_str_number(&provided_port, &port, 0xffff) || *provided_port)
1659
    /* weirdly provided number, not good! */
1660
0
    return CURLUE_BAD_PORT_NUMBER;
1661
0
  tmp = curl_maprintf("%" CURL_FORMAT_CURL_OFF_T, port);
1662
0
  if(!tmp)
1663
0
    return CURLUE_OUT_OF_MEMORY;
1664
0
  curlx_free(u->port);
1665
0
  u->port = tmp;
1666
0
  u->portnum = (unsigned short)port;
1667
0
  return CURLUE_OK;
1668
0
}
1669
1670
static CURLUcode set_url(CURLU *u, const char *url, size_t part_size,
1671
                         unsigned int flags)
1672
1.94k
{
1673
  /*
1674
   * Allow a new URL to replace the existing (if any) contents.
1675
   *
1676
   * If the existing contents is enough for a URL, allow a relative URL to
1677
   * replace it.
1678
   */
1679
1.94k
  CURLUcode uc;
1680
1.94k
  char *oldurl = NULL;
1681
1682
1.94k
  if(!part_size) {
1683
    /* a blank URL is not a valid URL unless we already have a complete one
1684
       and this is a redirect */
1685
1
    uc = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1686
1
    if(!uc) {
1687
      /* success, meaning the "" is a fine relative URL, but nothing
1688
         changes */
1689
0
      curlx_free(oldurl);
1690
0
      return CURLUE_OK;
1691
0
    }
1692
1
    if(uc == CURLUE_OUT_OF_MEMORY)
1693
0
      return uc;
1694
1
    return CURLUE_MALFORMED_INPUT;
1695
1
  }
1696
1697
  /* if the new URL is absolute replace the existing with the new. */
1698
1.93k
  if(Curl_is_absolute_url(url, NULL, 0,
1699
1.93k
                          flags & (CURLU_GUESS_SCHEME | CURLU_DEFAULT_SCHEME)))
1700
247
    return parseurl_and_replace(url, u, flags);
1701
1702
  /* if the old URL is incomplete (we cannot get an absolute URL in
1703
     'oldurl'), replace the existing with the new */
1704
1.69k
  uc = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1705
1.69k
  if(uc == CURLUE_OUT_OF_MEMORY)
1706
0
    return uc;
1707
1.69k
  else if(uc)
1708
1.69k
    return parseurl_and_replace(url, u, flags);
1709
1710
0
  DEBUGASSERT(oldurl); /* it is set here */
1711
  /* apply the relative part to create a new URL */
1712
0
  uc = redirect_url(oldurl, url, u, flags);
1713
0
  curlx_free(oldurl);
1714
0
  return uc;
1715
0
}
1716
1717
static CURLUcode urlset_clear(CURLU *u, CURLUPart what)
1718
0
{
1719
0
  switch(what) {
1720
0
  case CURLUPART_URL:
1721
0
    free_urlhandle(u);
1722
0
    memset(u, 0, sizeof(struct Curl_URL));
1723
0
    break;
1724
0
  case CURLUPART_SCHEME:
1725
0
    Curl_safefree(u->scheme);
1726
0
    u->guessed_scheme = FALSE;
1727
0
    break;
1728
0
  case CURLUPART_USER:
1729
0
    Curl_safefree(u->user);
1730
0
    break;
1731
0
  case CURLUPART_PASSWORD:
1732
0
    Curl_safefree(u->password);
1733
0
    break;
1734
0
  case CURLUPART_OPTIONS:
1735
0
    Curl_safefree(u->options);
1736
0
    break;
1737
0
  case CURLUPART_HOST:
1738
0
    Curl_safefree(u->host);
1739
0
    break;
1740
0
  case CURLUPART_ZONEID:
1741
0
    Curl_safefree(u->zoneid);
1742
0
    break;
1743
0
  case CURLUPART_PORT:
1744
0
    u->portnum = 0;
1745
0
    Curl_safefree(u->port);
1746
0
    break;
1747
0
  case CURLUPART_PATH:
1748
0
    Curl_safefree(u->path);
1749
0
    break;
1750
0
  case CURLUPART_QUERY:
1751
0
    Curl_safefree(u->query);
1752
0
    u->query_present = FALSE;
1753
0
    break;
1754
0
  case CURLUPART_FRAGMENT:
1755
0
    Curl_safefree(u->fragment);
1756
0
    u->fragment_present = FALSE;
1757
0
    break;
1758
0
  default:
1759
0
    return CURLUE_UNKNOWN_PART;
1760
0
  }
1761
0
  return CURLUE_OK;
1762
0
}
1763
1764
static bool allowed_in_path(unsigned char x)
1765
0
{
1766
0
  switch(x) {
1767
0
  case '!':
1768
0
  case '$':
1769
0
  case '&':
1770
0
  case '\'':
1771
0
  case '(':
1772
0
  case ')':
1773
0
  case '{':
1774
0
  case '}':
1775
0
  case '[':
1776
0
  case ']':
1777
0
  case '*':
1778
0
  case '+':
1779
0
  case ',':
1780
0
  case ';':
1781
0
  case '=':
1782
0
  case ':':
1783
0
  case '@':
1784
0
  case '/':
1785
0
    return TRUE;
1786
0
  }
1787
0
  return FALSE;
1788
0
}
1789
1790
CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1791
                       const char *part, unsigned int flags)
1792
1.94k
{
1793
1.94k
  char **storep = NULL;
1794
1.94k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1795
1.94k
  bool plusencode = FALSE;
1796
1.94k
  bool pathmode = FALSE;
1797
1.94k
  bool leadingslash = FALSE;
1798
1.94k
  bool appendquery = FALSE;
1799
1.94k
  bool equalsencode = FALSE;
1800
1.94k
  size_t nalloc;
1801
1802
1.94k
  if(!u)
1803
0
    return CURLUE_BAD_HANDLE;
1804
1.94k
  if(!part)
1805
    /* setting a part to NULL clears it */
1806
0
    return urlset_clear(u, what);
1807
1808
1.94k
  nalloc = strlen(part);
1809
1.94k
  if(nalloc > CURL_MAX_INPUT_LENGTH)
1810
    /* excessive input length */
1811
0
    return CURLUE_MALFORMED_INPUT;
1812
1813
1.94k
  switch(what) {
1814
0
  case CURLUPART_SCHEME: {
1815
0
    CURLUcode status = set_url_scheme(u, part, flags);
1816
0
    if(status)
1817
0
      return status;
1818
0
    storep = &u->scheme;
1819
0
    urlencode = FALSE; /* never */
1820
0
    break;
1821
0
  }
1822
0
  case CURLUPART_USER:
1823
0
    storep = &u->user;
1824
0
    break;
1825
0
  case CURLUPART_PASSWORD:
1826
0
    storep = &u->password;
1827
0
    break;
1828
0
  case CURLUPART_OPTIONS:
1829
0
    storep = &u->options;
1830
0
    break;
1831
0
  case CURLUPART_HOST:
1832
0
    storep = &u->host;
1833
0
    Curl_safefree(u->zoneid);
1834
0
    break;
1835
0
  case CURLUPART_ZONEID:
1836
0
    storep = &u->zoneid;
1837
0
    break;
1838
0
  case CURLUPART_PORT:
1839
0
    return set_url_port(u, part);
1840
0
  case CURLUPART_PATH:
1841
0
    pathmode = TRUE;
1842
0
    leadingslash = TRUE; /* enforce */
1843
0
    storep = &u->path;
1844
0
    break;
1845
0
  case CURLUPART_QUERY:
1846
0
    plusencode = urlencode;
1847
0
    appendquery = (flags & CURLU_APPENDQUERY) ? 1 : 0;
1848
0
    equalsencode = appendquery;
1849
0
    storep = &u->query;
1850
0
    u->query_present = TRUE;
1851
0
    break;
1852
0
  case CURLUPART_FRAGMENT:
1853
0
    storep = &u->fragment;
1854
0
    u->fragment_present = TRUE;
1855
0
    break;
1856
1.94k
  case CURLUPART_URL:
1857
1.94k
    return set_url(u, part, nalloc, flags);
1858
0
  default:
1859
0
    return CURLUE_UNKNOWN_PART;
1860
1.94k
  }
1861
0
  DEBUGASSERT(storep);
1862
0
  {
1863
0
    const char *newp;
1864
0
    struct dynbuf enc;
1865
0
    curlx_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1866
1867
0
    if(leadingslash && (part[0] != '/')) {
1868
0
      CURLcode result = curlx_dyn_addn(&enc, "/", 1);
1869
0
      if(result)
1870
0
        return cc2cu(result);
1871
0
    }
1872
0
    if(urlencode) {
1873
0
      const unsigned char *i;
1874
1875
0
      for(i = (const unsigned char *)part; *i; i++) {
1876
0
        CURLcode result;
1877
0
        if((*i == ' ') && plusencode) {
1878
0
          result = curlx_dyn_addn(&enc, "+", 1);
1879
0
          if(result)
1880
0
            return CURLUE_OUT_OF_MEMORY;
1881
0
        }
1882
0
        else if(ISUNRESERVED(*i) ||
1883
0
                (pathmode && allowed_in_path(*i)) ||
1884
0
                ((*i == '=') && equalsencode)) {
1885
0
          if((*i == '=') && equalsencode)
1886
            /* only skip the first equals sign */
1887
0
            equalsencode = FALSE;
1888
0
          result = curlx_dyn_addn(&enc, i, 1);
1889
0
          if(result)
1890
0
            return cc2cu(result);
1891
0
        }
1892
0
        else {
1893
0
          unsigned char out[3] = { '%' };
1894
0
          Curl_hexbyte(&out[1], *i);
1895
0
          result = curlx_dyn_addn(&enc, out, 3);
1896
0
          if(result)
1897
0
            return cc2cu(result);
1898
0
        }
1899
0
      }
1900
0
    }
1901
0
    else {
1902
0
      char *p;
1903
0
      CURLcode result = curlx_dyn_add(&enc, part);
1904
0
      if(result)
1905
0
        return cc2cu(result);
1906
0
      p = curlx_dyn_ptr(&enc);
1907
0
      while(*p) {
1908
        /* make sure percent encoded are lower case */
1909
0
        if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1910
0
           (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1911
0
          p[1] = Curl_raw_tolower(p[1]);
1912
0
          p[2] = Curl_raw_tolower(p[2]);
1913
0
          p += 3;
1914
0
        }
1915
0
        else
1916
0
          p++;
1917
0
      }
1918
0
    }
1919
0
    newp = curlx_dyn_ptr(&enc);
1920
1921
0
    if(appendquery && newp) {
1922
      /* Append the 'newp' string onto the old query. Add a '&' separator if
1923
         none is present at the end of the existing query already */
1924
1925
0
      size_t querylen = u->query ? strlen(u->query) : 0;
1926
0
      bool addamperand = querylen && (u->query[querylen - 1] != '&');
1927
0
      if(querylen) {
1928
0
        struct dynbuf qbuf;
1929
0
        curlx_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1930
1931
0
        if(curlx_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1932
0
          goto nomem;
1933
1934
0
        if(addamperand) {
1935
0
          if(curlx_dyn_addn(&qbuf, "&", 1))
1936
0
            goto nomem;
1937
0
        }
1938
0
        if(curlx_dyn_add(&qbuf, newp))
1939
0
          goto nomem;
1940
0
        curlx_dyn_free(&enc);
1941
0
        curlx_free(*storep);
1942
0
        *storep = curlx_dyn_ptr(&qbuf);
1943
0
        return CURLUE_OK;
1944
0
nomem:
1945
0
        curlx_dyn_free(&enc);
1946
0
        return CURLUE_OUT_OF_MEMORY;
1947
0
      }
1948
0
    }
1949
1950
0
    else if(what == CURLUPART_HOST) {
1951
0
      size_t n = curlx_dyn_len(&enc);
1952
0
      if(!n && (flags & CURLU_NO_AUTHORITY)) {
1953
        /* Skip hostname check, it is allowed to be empty. */
1954
0
      }
1955
0
      else {
1956
0
        bool bad = FALSE;
1957
0
        if(!n)
1958
0
          bad = TRUE; /* empty hostname is not okay */
1959
0
        else if(!urlencode) {
1960
          /* if the hostname part was not URL encoded here, it was set ready
1961
             URL encoded so we need to decode it to check */
1962
0
          size_t dlen;
1963
0
          char *decoded = NULL;
1964
0
          CURLcode result =
1965
0
            Curl_urldecode(newp, n, &decoded, &dlen, REJECT_CTRL);
1966
0
          if(result || hostname_check(u, decoded, dlen))
1967
0
            bad = TRUE;
1968
0
          curlx_free(decoded);
1969
0
        }
1970
0
        else if(hostname_check(u, (char *)CURL_UNCONST(newp), n))
1971
0
          bad = TRUE;
1972
0
        if(bad) {
1973
0
          curlx_dyn_free(&enc);
1974
0
          return CURLUE_BAD_HOSTNAME;
1975
0
        }
1976
0
      }
1977
0
    }
1978
1979
0
    curlx_free(*storep);
1980
0
    *storep = (char *)CURL_UNCONST(newp);
1981
0
  }
1982
0
  return CURLUE_OK;
1983
0
}