Coverage Report

Created: 2025-07-23 09:13

/src/gdal/curl/lib/urlapi.c
Line
Count
Source (jump to first uncovered line)
1
/***************************************************************************
2
 *                                  _   _ ____  _
3
 *  Project                     ___| | | |  _ \| |
4
 *                             / __| | | | |_) | |
5
 *                            | (__| |_| |  _ <| |___
6
 *                             \___|\___/|_| \_\_____|
7
 *
8
 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9
 *
10
 * This software is licensed as described in the file COPYING, which
11
 * you should have received as part of this distribution. The terms
12
 * are also available at https://curl.se/docs/copyright.html.
13
 *
14
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15
 * copies of the Software, and permit persons to whom the Software is
16
 * furnished to do so, under the terms of the COPYING file.
17
 *
18
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19
 * KIND, either express or implied.
20
 *
21
 * SPDX-License-Identifier: curl
22
 *
23
 ***************************************************************************/
24
25
#include "curl_setup.h"
26
27
#include "urldata.h"
28
#include "urlapi-int.h"
29
#include "strcase.h"
30
#include "url.h"
31
#include "escape.h"
32
#include "curl_ctype.h"
33
#include "curlx/inet_pton.h"
34
#include "curlx/inet_ntop.h"
35
#include "strdup.h"
36
#include "idn.h"
37
#include "curlx/strparse.h"
38
#include "curl_memrchr.h"
39
40
/* The last 3 #include files should be in this order */
41
#include "curl_printf.h"
42
#include "curl_memory.h"
43
#include "memdebug.h"
44
45
  /* MS-DOS/Windows style drive prefix, eg c: in c:foo */
46
#define STARTS_WITH_DRIVE_PREFIX(str) \
47
  ((('a' <= str[0] && str[0] <= 'z') || \
48
    ('A' <= str[0] && str[0] <= 'Z')) && \
49
   (str[1] == ':'))
50
51
  /* MS-DOS/Windows style drive prefix, optionally with
52
   * a '|' instead of ':', followed by a slash or NUL */
53
#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
54
39.2k
  ((('a' <= (str)[0] && (str)[0] <= 'z') || \
55
39.2k
    ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
56
39.2k
   ((str)[1] == ':' || (str)[1] == '|') && \
57
39.2k
   ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
58
59
/* scheme is not URL encoded, the longest libcurl supported ones are... */
60
1.59M
#define MAX_SCHEME_LEN 40
61
62
/*
63
 * If USE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
64
 * sure we have _some_ value for AF_INET6 without polluting our fake value
65
 * everywhere.
66
 */
67
#if !defined(USE_IPV6) && !defined(AF_INET6)
68
#define AF_INET6 (AF_INET + 1)
69
#endif
70
71
/* Internal representation of CURLU. Point to URL-encoded strings. */
72
struct Curl_URL {
73
  char *scheme;
74
  char *user;
75
  char *password;
76
  char *options; /* IMAP only? */
77
  char *host;
78
  char *zoneid; /* for numerical IPv6 addresses */
79
  char *port;
80
  char *path;
81
  char *query;
82
  char *fragment;
83
  unsigned short portnum; /* the numerical version (if 'port' is set) */
84
  BIT(query_present);    /* to support blank */
85
  BIT(fragment_present); /* to support blank */
86
  BIT(guessed_scheme);   /* when a URL without scheme is parsed */
87
};
88
89
0
#define DEFAULT_SCHEME "https"
90
91
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
92
                                      unsigned int flags);
93
94
static void free_urlhandle(struct Curl_URL *u)
95
666k
{
96
666k
  free(u->scheme);
97
666k
  free(u->user);
98
666k
  free(u->password);
99
666k
  free(u->options);
100
666k
  free(u->host);
101
666k
  free(u->zoneid);
102
666k
  free(u->port);
103
666k
  free(u->path);
104
666k
  free(u->query);
105
666k
  free(u->fragment);
106
666k
}
107
108
/*
109
 * Find the separator at the end of the hostname, or the '?' in cases like
110
 * http://www.example.com?id=2380
111
 */
112
static const char *find_host_sep(const char *url)
113
0
{
114
  /* Find the start of the hostname */
115
0
  const char *sep = strstr(url, "//");
116
0
  if(!sep)
117
0
    sep = url;
118
0
  else
119
0
    sep += 2;
120
121
  /* Find first / or ? */
122
0
  while(*sep && *sep != '/' && *sep != '?')
123
0
    sep++;
124
125
0
  return sep;
126
0
}
127
128
/* convert CURLcode to CURLUcode */
129
0
#define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE :   \
130
0
                  CURLUE_OUT_OF_MEMORY)
131
132
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
133
 * spaces in the source URL accordingly.
134
 *
135
 * URL encoding should be skipped for hostnames, otherwise IDN resolution
136
 * will fail.
137
 */
138
static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
139
                               size_t len, bool relative,
140
                               bool query)
141
120k
{
142
  /* we must add this with whitespace-replacing */
143
120k
  bool left = !query;
144
120k
  const unsigned char *iptr;
145
120k
  const unsigned char *host_sep = (const unsigned char *) url;
146
120k
  CURLcode result = CURLE_OK;
147
148
120k
  if(!relative) {
149
0
    size_t n;
150
0
    host_sep = (const unsigned char *) find_host_sep(url);
151
152
    /* output the first piece as-is */
153
0
    n = (const char *)host_sep - url;
154
0
    result = curlx_dyn_addn(o, url, n);
155
0
    len -= n;
156
0
  }
157
158
58.1M
  for(iptr = host_sep; len && !result; iptr++, len--) {
159
58.0M
    if(*iptr == ' ') {
160
0
      if(left)
161
0
        result = curlx_dyn_addn(o, "%20", 3);
162
0
      else
163
0
        result = curlx_dyn_addn(o, "+", 1);
164
0
    }
165
58.0M
    else if((*iptr < ' ') || (*iptr >= 0x7f)) {
166
6.85M
      unsigned char out[3]={'%'};
167
6.85M
      Curl_hexbyte(&out[1], *iptr);
168
6.85M
      result = curlx_dyn_addn(o, out, 3);
169
6.85M
    }
170
51.1M
    else {
171
51.1M
      result = curlx_dyn_addn(o, iptr, 1);
172
51.1M
      if(*iptr == '?')
173
0
        left = FALSE;
174
51.1M
    }
175
58.0M
  }
176
177
120k
  if(result)
178
0
    return cc2cu(result);
179
120k
  return CURLUE_OK;
180
120k
}
181
182
/*
183
 * Returns the length of the scheme if the given URL is absolute (as opposed
184
 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
185
 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
186
 *
187
 * If 'guess_scheme' is TRUE, it means the URL might be provided without
188
 * scheme.
189
 */
190
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
191
                            bool guess_scheme)
192
533k
{
193
533k
  size_t i = 0;
194
533k
  DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
195
533k
  (void)buflen; /* only used in debug-builds */
196
533k
  if(buf)
197
200k
    buf[0] = 0; /* always leave a defined value in buf */
198
#ifdef _WIN32
199
  if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
200
    return 0;
201
#endif
202
533k
  if(ISALPHA(url[0]))
203
1.59M
    for(i = 1; i < MAX_SCHEME_LEN; ++i) {
204
1.59M
      char s = url[i];
205
1.59M
      if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
206
        /* RFC 3986 3.1 explains:
207
           scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
208
        */
209
1.21M
      }
210
377k
      else {
211
377k
        break;
212
377k
      }
213
1.59M
    }
214
533k
  if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
215
    /* If this does not guess scheme, the scheme always ends with the colon so
216
       that this also detects data: URLs etc. In guessing mode, data: could
217
       be the hostname "data" with a specified port number. */
218
219
    /* the length of the scheme is the name part only */
220
289k
    size_t len = i;
221
289k
    if(buf) {
222
95.0k
      Curl_strntolower(buf, url, i);
223
95.0k
      buf[i] = 0;
224
95.0k
    }
225
289k
    return len;
226
289k
  }
227
244k
  return 0;
228
533k
}
229
230
/*
231
 * Concatenate a relative URL onto a base URL making it absolute.
232
 */
233
static CURLUcode redirect_url(const char *base, const char *relurl,
234
                              CURLU *u, unsigned int flags)
235
0
{
236
0
  struct dynbuf urlbuf;
237
0
  bool host_changed = FALSE;
238
0
  const char *useurl = relurl;
239
0
  const char *cutoff = NULL;
240
0
  size_t prelen;
241
0
  CURLUcode uc;
242
243
  /* protsep points to the start of the hostname, after [scheme]:// */
244
0
  const char *protsep = base + strlen(u->scheme) + 3;
245
0
  DEBUGASSERT(base && relurl && u); /* all set here */
246
0
  if(!base)
247
0
    return CURLUE_MALFORMED_INPUT; /* should never happen */
248
249
  /* handle different relative URL types */
250
0
  switch(relurl[0]) {
251
0
  case '/':
252
0
    if(relurl[1] == '/') {
253
      /* protocol-relative URL: //example.com/path */
254
0
      cutoff = protsep;
255
0
      useurl = &relurl[2];
256
0
      host_changed = TRUE;
257
0
    }
258
0
    else
259
      /* absolute /path */
260
0
      cutoff = strchr(protsep, '/');
261
0
    break;
262
263
0
  case '#':
264
    /* fragment-only change */
265
0
    if(u->fragment)
266
0
      cutoff = strchr(protsep, '#');
267
0
    break;
268
269
0
  default:
270
    /* path or query-only change */
271
0
    if(u->query && u->query[0])
272
      /* remove existing query */
273
0
      cutoff = strchr(protsep, '?');
274
0
    else if(u->fragment && u->fragment[0])
275
      /* Remove existing fragment */
276
0
      cutoff = strchr(protsep, '#');
277
278
0
    if(relurl[0] != '?') {
279
      /* append a relative path after the last slash */
280
0
      cutoff = memrchr(protsep, '/',
281
0
                       cutoff ? (size_t)(cutoff - protsep) : strlen(protsep));
282
0
      if(cutoff)
283
0
        cutoff++; /* truncate after last slash */
284
0
    }
285
0
    break;
286
0
  }
287
288
0
  prelen = cutoff ? (size_t)(cutoff - base) : strlen(base);
289
290
  /* build new URL */
291
0
  curlx_dyn_init(&urlbuf, CURL_MAX_INPUT_LENGTH);
292
293
0
  if(!curlx_dyn_addn(&urlbuf, base, prelen) &&
294
0
     !urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed, FALSE)) {
295
0
    uc = parseurl_and_replace(curlx_dyn_ptr(&urlbuf), u,
296
0
                              flags & ~CURLU_PATH_AS_IS);
297
0
  }
298
0
  else
299
0
    uc = CURLUE_OUT_OF_MEMORY;
300
301
0
  curlx_dyn_free(&urlbuf);
302
0
  return uc;
303
0
}
304
305
/* scan for byte values <= 31, 127 and sometimes space */
306
CURLUcode Curl_junkscan(const char *url, size_t *urllen, bool allowspace)
307
333k
{
308
333k
  size_t n = strlen(url);
309
333k
  size_t i;
310
333k
  unsigned char control;
311
333k
  const unsigned char *p = (const unsigned char *)url;
312
333k
  if(n > CURL_MAX_INPUT_LENGTH)
313
0
    return CURLUE_MALFORMED_INPUT;
314
315
333k
  control = allowspace ? 0x1f : 0x20;
316
316M
  for(i = 0; i < n; i++) {
317
316M
    if(p[i] <= control || p[i] == 127)
318
132k
      return CURLUE_MALFORMED_INPUT;
319
316M
  }
320
200k
  *urllen = n;
321
200k
  return CURLUE_OK;
322
333k
}
323
324
/*
325
 * parse_hostname_login()
326
 *
327
 * Parse the login details (username, password and options) from the URL and
328
 * strip them out of the hostname
329
 *
330
 */
331
static CURLUcode parse_hostname_login(struct Curl_URL *u,
332
                                      const char *login,
333
                                      size_t len,
334
                                      unsigned int flags,
335
                                      size_t *offset) /* to the hostname */
336
175k
{
337
175k
  CURLUcode result = CURLUE_OK;
338
175k
  CURLcode ccode;
339
175k
  char *userp = NULL;
340
175k
  char *passwdp = NULL;
341
175k
  char *optionsp = NULL;
342
175k
  const struct Curl_handler *h = NULL;
343
344
  /* At this point, we assume all the other special cases have been taken
345
   * care of, so the host is at most
346
   *
347
   *   [user[:password][;options]]@]hostname
348
   *
349
   * We need somewhere to put the embedded details, so do that first.
350
   */
351
175k
  char *ptr;
352
353
175k
  DEBUGASSERT(login);
354
355
175k
  *offset = 0;
356
175k
  ptr = memchr(login, '@', len);
357
175k
  if(!ptr)
358
136k
    goto out;
359
360
  /* We will now try to extract the
361
   * possible login information in a string like:
362
   * ftp://user:password@ftp.site.example:8021/README */
363
38.8k
  ptr++;
364
365
  /* if this is a known scheme, get some details */
366
38.8k
  if(u->scheme)
367
17.5k
    h = Curl_get_scheme_handler(u->scheme);
368
369
  /* We could use the login information in the URL so extract it. Only parse
370
     options if the handler says we should. Note that 'h' might be NULL! */
371
38.8k
  ccode = Curl_parse_login_details(login, ptr - login - 1,
372
38.8k
                                   &userp, &passwdp,
373
38.8k
                                   (h && (h->flags & PROTOPT_URLOPTIONS)) ?
374
0
                                   &optionsp : NULL);
375
38.8k
  if(ccode) {
376
0
    result = CURLUE_BAD_LOGIN;
377
0
    goto out;
378
0
  }
379
380
38.8k
  if(userp) {
381
38.8k
    if(flags & CURLU_DISALLOW_USER) {
382
      /* Option DISALLOW_USER is set and URL contains username. */
383
0
      result = CURLUE_USER_NOT_ALLOWED;
384
0
      goto out;
385
0
    }
386
38.8k
    free(u->user);
387
38.8k
    u->user = userp;
388
38.8k
  }
389
390
38.8k
  if(passwdp) {
391
23.0k
    free(u->password);
392
23.0k
    u->password = passwdp;
393
23.0k
  }
394
395
38.8k
  if(optionsp) {
396
0
    free(u->options);
397
0
    u->options = optionsp;
398
0
  }
399
400
  /* the hostname starts at this offset */
401
38.8k
  *offset = ptr - login;
402
38.8k
  return CURLUE_OK;
403
404
136k
out:
405
406
136k
  free(userp);
407
136k
  free(passwdp);
408
136k
  free(optionsp);
409
136k
  u->user = NULL;
410
136k
  u->password = NULL;
411
136k
  u->options = NULL;
412
413
136k
  return result;
414
38.8k
}
415
416
UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
417
                                   bool has_scheme)
418
175k
{
419
175k
  const char *portptr;
420
175k
  char *hostname = curlx_dyn_ptr(host);
421
  /*
422
   * Find the end of an IPv6 address on the ']' ending bracket.
423
   */
424
175k
  if(hostname[0] == '[') {
425
1.87k
    portptr = strchr(hostname, ']');
426
1.87k
    if(!portptr)
427
670
      return CURLUE_BAD_IPV6;
428
1.20k
    portptr++;
429
    /* this is a RFC2732-style specified IP-address */
430
1.20k
    if(*portptr) {
431
141
      if(*portptr != ':')
432
90
        return CURLUE_BAD_PORT_NUMBER;
433
141
    }
434
1.06k
    else
435
1.06k
      portptr = NULL;
436
1.20k
  }
437
173k
  else
438
173k
    portptr = strchr(hostname, ':');
439
440
174k
  if(portptr) {
441
13.1k
    curl_off_t port;
442
13.1k
    size_t keep = portptr - hostname;
443
444
    /* Browser behavior adaptation. If there is a colon with no digits after,
445
       just cut off the name there which makes us ignore the colon and just
446
       use the default port. Firefox, Chrome and Safari all do that.
447
448
       Do not do it if the URL has no scheme, to make something that looks like
449
       a scheme not work!
450
    */
451
13.1k
    curlx_dyn_setlen(host, keep);
452
13.1k
    portptr++;
453
13.1k
    if(!*portptr)
454
2.14k
      return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
455
456
11.0k
    if(curlx_str_number(&portptr, &port, 0xffff) || *portptr)
457
7.20k
      return CURLUE_BAD_PORT_NUMBER;
458
459
3.82k
    u->portnum = (unsigned short) port;
460
    /* generate a new port number string to get rid of leading zeroes etc */
461
3.82k
    free(u->port);
462
3.82k
    u->port = aprintf("%" CURL_FORMAT_CURL_OFF_T, port);
463
3.82k
    if(!u->port)
464
0
      return CURLUE_OUT_OF_MEMORY;
465
3.82k
  }
466
467
165k
  return CURLUE_OK;
468
174k
}
469
470
/* this assumes 'hostname' now starts with [ */
471
static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
472
                            size_t hlen) /* length of hostname */
473
1.10k
{
474
1.10k
  size_t len;
475
1.10k
  DEBUGASSERT(*hostname == '[');
476
1.10k
  if(hlen < 4) /* '[::]' is the shortest possible valid string */
477
22
    return CURLUE_BAD_IPV6;
478
1.07k
  hostname++;
479
1.07k
  hlen -= 2;
480
481
  /* only valid IPv6 letters are ok */
482
1.07k
  len = strspn(hostname, "0123456789abcdefABCDEF:.");
483
484
1.07k
  if(hlen != len) {
485
1.00k
    hlen = len;
486
1.00k
    if(hostname[len] == '%') {
487
      /* this could now be '%[zone id]' */
488
895
      char zoneid[16];
489
895
      int i = 0;
490
895
      char *h = &hostname[len + 1];
491
      /* pass '25' if present and is a URL encoded percent sign */
492
895
      if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
493
27
        h += 2;
494
7.61k
      while(*h && (*h != ']') && (i < 15))
495
6.72k
        zoneid[i++] = *h++;
496
895
      if(!i || (']' != *h))
497
325
        return CURLUE_BAD_IPV6;
498
570
      zoneid[i] = 0;
499
570
      u->zoneid = strdup(zoneid);
500
570
      if(!u->zoneid)
501
0
        return CURLUE_OUT_OF_MEMORY;
502
570
      hostname[len] = ']'; /* insert end bracket */
503
570
      hostname[len + 1] = 0; /* terminate the hostname */
504
570
    }
505
107
    else
506
107
      return CURLUE_BAD_IPV6;
507
    /* hostname is fine */
508
1.00k
  }
509
510
  /* Normalize the IPv6 address */
511
646
  {
512
646
    char dest[16]; /* fits a binary IPv6 address */
513
646
    hostname[hlen] = 0; /* end the address there */
514
646
    if(1 != curlx_inet_pton(AF_INET6, hostname, dest))
515
592
      return CURLUE_BAD_IPV6;
516
54
    if(curlx_inet_ntop(AF_INET6, dest, hostname, hlen)) {
517
24
      hlen = strlen(hostname); /* might be shorter now */
518
24
      hostname[hlen + 1] = 0;
519
24
    }
520
54
    hostname[hlen] = ']'; /* restore ending bracket */
521
54
  }
522
0
  return CURLUE_OK;
523
646
}
524
525
static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
526
                                size_t hlen) /* length of hostname */
527
148k
{
528
148k
  size_t len;
529
148k
  DEBUGASSERT(hostname);
530
531
148k
  if(!hlen)
532
0
    return CURLUE_NO_HOST;
533
148k
  else if(hostname[0] == '[')
534
1
    return ipv6_parse(u, hostname, hlen);
535
148k
  else {
536
    /* letters from the second string are not ok */
537
148k
    len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
538
148k
    if(hlen != len)
539
      /* hostname with bad content */
540
44.7k
      return CURLUE_BAD_HOSTNAME;
541
148k
  }
542
103k
  return CURLUE_OK;
543
148k
}
544
545
/*
546
 * Handle partial IPv4 numerical addresses and different bases, like
547
 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
548
 *
549
 * If the given input string is syntactically wrong IPv4 or any part for
550
 * example is too big, this function returns HOST_NAME.
551
 *
552
 * Output the "normalized" version of that input string in plain quad decimal
553
 * integers.
554
 *
555
 * Returns the host type.
556
 */
557
558
0
#define HOST_ERROR   -1 /* out of memory */
559
560
297k
#define HOST_NAME    1
561
32.6k
#define HOST_IPV4    2
562
2.19k
#define HOST_IPV6    3
563
564
static int ipv4_normalize(struct dynbuf *host)
565
166k
{
566
166k
  bool done = FALSE;
567
166k
  int n = 0;
568
166k
  const char *c = curlx_dyn_ptr(host);
569
166k
  unsigned int parts[4] = {0, 0, 0, 0};
570
166k
  CURLcode result = CURLE_OK;
571
572
166k
  if(*c == '[')
573
1.09k
    return HOST_IPV6;
574
575
190k
  while(!done) {
576
171k
    int rc;
577
171k
    curl_off_t l;
578
171k
    if(*c == '0') {
579
17.3k
      if(c[1] == 'x') {
580
34
        c += 2; /* skip the prefix */
581
34
        rc = curlx_str_hex(&c, &l, UINT_MAX);
582
34
      }
583
17.3k
      else
584
17.3k
        rc = curlx_str_octal(&c, &l, UINT_MAX);
585
17.3k
    }
586
154k
    else
587
154k
      rc = curlx_str_number(&c, &l, UINT_MAX);
588
589
171k
    if(rc)
590
139k
      return HOST_NAME;
591
592
32.2k
    parts[n] = (unsigned int)l;
593
594
32.2k
    switch(*c) {
595
6.93k
    case '.':
596
6.93k
      if(n == 3)
597
89
        return HOST_NAME;
598
6.84k
      n++;
599
6.84k
      c++;
600
6.84k
      break;
601
602
18.0k
    case '\0':
603
18.0k
      done = TRUE;
604
18.0k
      break;
605
606
7.26k
    default:
607
7.26k
      return HOST_NAME;
608
32.2k
    }
609
32.2k
  }
610
611
18.0k
  switch(n) {
612
15.0k
  case 0: /* a -- 32 bits */
613
15.0k
    curlx_dyn_reset(host);
614
615
15.0k
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
616
15.0k
                            (parts[0] >> 24),
617
15.0k
                            ((parts[0] >> 16) & 0xff),
618
15.0k
                            ((parts[0] >> 8) & 0xff),
619
15.0k
                            (parts[0] & 0xff));
620
15.0k
    break;
621
1.19k
  case 1: /* a.b -- 8.24 bits */
622
1.19k
    if((parts[0] > 0xff) || (parts[1] > 0xffffff))
623
294
      return HOST_NAME;
624
898
    curlx_dyn_reset(host);
625
898
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
626
898
                            (parts[0]),
627
898
                            ((parts[1] >> 16) & 0xff),
628
898
                            ((parts[1] >> 8) & 0xff),
629
898
                            (parts[1] & 0xff));
630
898
    break;
631
822
  case 2: /* a.b.c -- 8.8.16 bits */
632
822
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
633
613
      return HOST_NAME;
634
209
    curlx_dyn_reset(host);
635
209
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
636
209
                            (parts[0]),
637
209
                            (parts[1]),
638
209
                            ((parts[2] >> 8) & 0xff),
639
209
                            (parts[2] & 0xff));
640
209
    break;
641
947
  case 3: /* a.b.c.d -- 8.8.8.8 bits */
642
947
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
643
947
       (parts[3] > 0xff))
644
823
      return HOST_NAME;
645
124
    curlx_dyn_reset(host);
646
124
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
647
124
                            (parts[0]),
648
124
                            (parts[1]),
649
124
                            (parts[2]),
650
124
                            (parts[3]));
651
124
    break;
652
18.0k
  }
653
16.3k
  if(result)
654
0
    return HOST_ERROR;
655
16.3k
  return HOST_IPV4;
656
16.3k
}
657
658
/* if necessary, replace the host content with a URL decoded version */
659
static CURLUcode urldecode_host(struct dynbuf *host)
660
148k
{
661
148k
  char *per = NULL;
662
148k
  const char *hostname = curlx_dyn_ptr(host);
663
148k
  per = strchr(hostname, '%');
664
148k
  if(!per)
665
    /* nothing to decode */
666
141k
    return CURLUE_OK;
667
7.32k
  else {
668
    /* encoded */
669
7.32k
    size_t dlen;
670
7.32k
    char *decoded;
671
7.32k
    CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
672
7.32k
                                     REJECT_CTRL);
673
7.32k
    if(result)
674
772
      return CURLUE_BAD_HOSTNAME;
675
6.55k
    curlx_dyn_reset(host);
676
6.55k
    result = curlx_dyn_addn(host, decoded, dlen);
677
6.55k
    free(decoded);
678
6.55k
    if(result)
679
0
      return cc2cu(result);
680
6.55k
  }
681
682
6.55k
  return CURLUE_OK;
683
148k
}
684
685
static CURLUcode parse_authority(struct Curl_URL *u,
686
                                 const char *auth, size_t authlen,
687
                                 unsigned int flags,
688
                                 struct dynbuf *host,
689
                                 bool has_scheme)
690
175k
{
691
175k
  size_t offset;
692
175k
  CURLUcode uc;
693
175k
  CURLcode result;
694
695
  /*
696
   * Parse the login details and strip them out of the hostname.
697
   */
698
175k
  uc = parse_hostname_login(u, auth, authlen, flags, &offset);
699
175k
  if(uc)
700
0
    goto out;
701
702
175k
  result = curlx_dyn_addn(host, auth + offset, authlen - offset);
703
175k
  if(result) {
704
0
    uc = cc2cu(result);
705
0
    goto out;
706
0
  }
707
708
175k
  uc = Curl_parse_port(u, host, has_scheme);
709
175k
  if(uc)
710
8.95k
    goto out;
711
712
166k
  if(!curlx_dyn_len(host))
713
412
    return CURLUE_NO_HOST;
714
715
166k
  switch(ipv4_normalize(host)) {
716
16.3k
  case HOST_IPV4:
717
16.3k
    break;
718
1.09k
  case HOST_IPV6:
719
1.09k
    uc = ipv6_parse(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
720
1.09k
    break;
721
148k
  case HOST_NAME:
722
148k
    uc = urldecode_host(host);
723
148k
    if(!uc)
724
148k
      uc = hostname_check(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
725
148k
    break;
726
0
  case HOST_ERROR:
727
0
    uc = CURLUE_OUT_OF_MEMORY;
728
0
    break;
729
0
  default:
730
0
    uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
731
0
    break;
732
166k
  }
733
734
175k
out:
735
175k
  return uc;
736
166k
}
737
738
/* used for HTTP/2 server push */
739
CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
740
0
{
741
0
  CURLUcode result;
742
0
  struct dynbuf host;
743
744
0
  DEBUGASSERT(authority);
745
0
  curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
746
747
0
  result = parse_authority(u, authority, strlen(authority),
748
0
                           CURLU_DISALLOW_USER, &host, !!u->scheme);
749
0
  if(result)
750
0
    curlx_dyn_free(&host);
751
0
  else {
752
0
    free(u->host);
753
0
    u->host = curlx_dyn_ptr(&host);
754
0
  }
755
0
  return result;
756
0
}
757
758
/*
759
 * "Remove Dot Segments"
760
 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
761
 */
762
763
static bool is_dot(const char **str, size_t *clen)
764
677k
{
765
677k
  const char *p = *str;
766
677k
  if(*p == '.') {
767
107k
    (*str)++;
768
107k
    (*clen)--;
769
107k
    return TRUE;
770
107k
  }
771
570k
  else if((*clen >= 3) &&
772
570k
          (p[0] == '%') && (p[1] == '2') && ((p[2] | 0x20) == 'e')) {
773
748
    *str += 3;
774
748
    *clen -= 3;
775
748
    return TRUE;
776
748
  }
777
569k
  return FALSE;
778
677k
}
779
780
65.9M
#define ISSLASH(x) ((x) == '/')
781
782
/*
783
 * dedotdotify()
784
 * @unittest: 1395
785
 *
786
 * This function gets a null-terminated path with dot and dotdot sequences
787
 * passed in and strips them off according to the rules in RFC 3986 section
788
 * 5.2.4.
789
 *
790
 * The function handles a path. It should not contain the query nor fragment.
791
 *
792
 * RETURNS
793
 *
794
 * Zero for success and 'out' set to an allocated dedotdotified string.
795
 */
796
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
797
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
798
76.6k
{
799
76.6k
  struct dynbuf out;
800
76.6k
  CURLcode result = CURLE_OK;
801
802
76.6k
  *outp = NULL;
803
  /* the path always starts with a slash, and a slash has not dot */
804
76.6k
  if(clen < 2)
805
0
    return 0;
806
807
76.6k
  curlx_dyn_init(&out, clen + 1);
808
809
  /*  A. If the input buffer begins with a prefix of "../" or "./", then
810
      remove that prefix from the input buffer; otherwise, */
811
76.6k
  if(is_dot(&input, &clen)) {
812
0
    const char *p = input;
813
0
    size_t blen = clen;
814
815
0
    if(!clen)
816
      /* . [end] */
817
0
      goto end;
818
0
    else if(ISSLASH(*p)) {
819
      /* one dot followed by a slash */
820
0
      input = p + 1;
821
0
      clen--;
822
0
    }
823
824
    /*  D. if the input buffer consists only of "." or "..", then remove
825
        that from the input buffer; otherwise, */
826
0
    else if(is_dot(&p, &blen)) {
827
0
      if(!blen)
828
        /* .. [end] */
829
0
        goto end;
830
0
      else if(ISSLASH(*p)) {
831
        /* ../ */
832
0
        input = p + 1;
833
0
        clen = blen - 1;
834
0
      }
835
0
    }
836
0
  }
837
838
65.8M
  while(clen && !result) { /* until end of path content */
839
65.8M
    if(ISSLASH(*input)) {
840
544k
      const char *p = &input[1];
841
544k
      size_t blen = clen - 1;
842
      /*  B. if the input buffer begins with a prefix of "/./" or "/.", where
843
          "."  is a complete path segment, then replace that prefix with "/" in
844
          the input buffer; otherwise, */
845
544k
      if(is_dot(&p, &blen)) {
846
92.9k
        if(!blen) { /* /. */
847
1.34k
          result = curlx_dyn_addn(&out, "/", 1);
848
1.34k
          break;
849
1.34k
        }
850
91.6k
        else if(ISSLASH(*p)) { /* /./ */
851
34.6k
          input = p;
852
34.6k
          clen = blen;
853
34.6k
          continue;
854
34.6k
        }
855
856
        /*  C. if the input buffer begins with a prefix of "/../" or "/..",
857
            where ".." is a complete path segment, then replace that prefix
858
            with "/" in the input buffer and remove the last segment and its
859
            preceding "/" (if any) from the output buffer; otherwise, */
860
56.9k
        else if(is_dot(&p, &blen) && (ISSLASH(*p) || !blen)) {
861
          /* remove the last segment from the output buffer */
862
9.38k
          size_t len = curlx_dyn_len(&out);
863
9.38k
          if(len) {
864
8.84k
            char *ptr = curlx_dyn_ptr(&out);
865
8.84k
            char *last = memrchr(ptr, '/', len);
866
8.84k
            if(last)
867
              /* trim the output at the slash */
868
8.84k
              curlx_dyn_setlen(&out, last - ptr);
869
8.84k
          }
870
871
9.38k
          if(blen) { /* /../ */
872
8.95k
            input = p;
873
8.95k
            clen = blen;
874
8.95k
            continue;
875
8.95k
          }
876
429
          result = curlx_dyn_addn(&out, "/", 1);
877
429
          break;
878
9.38k
        }
879
92.9k
      }
880
544k
    }
881
882
    /*  E. move the first path segment in the input buffer to the end of
883
        the output buffer, including the initial "/" character (if any) and
884
        any subsequent characters up to, but not including, the next "/"
885
        character or the end of the input buffer. */
886
887
65.7M
    result = curlx_dyn_addn(&out, input, 1);
888
65.7M
    input++;
889
65.7M
    clen--;
890
65.7M
  }
891
76.6k
end:
892
76.6k
  if(!result) {
893
76.6k
    if(curlx_dyn_len(&out))
894
76.6k
      *outp = curlx_dyn_ptr(&out);
895
0
    else {
896
0
      *outp = strdup("");
897
0
      if(!*outp)
898
0
        return 1;
899
0
    }
900
76.6k
  }
901
76.6k
  return result ? 1 : 0; /* success */
902
76.6k
}
903
904
static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
905
333k
{
906
333k
  const char *path;
907
333k
  size_t pathlen;
908
333k
  char *query = NULL;
909
333k
  char *fragment = NULL;
910
333k
  char schemebuf[MAX_SCHEME_LEN + 1];
911
333k
  size_t schemelen = 0;
912
333k
  size_t urllen;
913
333k
  CURLUcode result = CURLUE_OK;
914
333k
  size_t fraglen = 0;
915
333k
  struct dynbuf host;
916
917
333k
  DEBUGASSERT(url);
918
919
333k
  curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
920
921
333k
  result = Curl_junkscan(url, &urllen, !!(flags & CURLU_ALLOW_SPACE));
922
333k
  if(result)
923
132k
    goto fail;
924
925
200k
  schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
926
200k
                                   flags & (CURLU_GUESS_SCHEME|
927
200k
                                            CURLU_DEFAULT_SCHEME));
928
929
  /* handle the file: scheme */
930
200k
  if(schemelen && !strcmp(schemebuf, "file")) {
931
19.7k
    bool uncpath = FALSE;
932
19.7k
    if(urllen <= 6) {
933
      /* file:/ is not enough to actually be a complete file: URL */
934
1
      result = CURLUE_BAD_FILE_URL;
935
1
      goto fail;
936
1
    }
937
938
    /* path has been allocated large enough to hold this */
939
19.7k
    path = &url[5];
940
19.7k
    pathlen = urllen - 5;
941
942
19.7k
    u->scheme = strdup("file");
943
19.7k
    if(!u->scheme) {
944
0
      result = CURLUE_OUT_OF_MEMORY;
945
0
      goto fail;
946
0
    }
947
948
    /* Extra handling URLs with an authority component (i.e. that start with
949
     * "file://")
950
     *
951
     * We allow omitted hostname (e.g. file:/<path>) -- valid according to
952
     * RFC 8089, but not the (current) WHAT-WG URL spec.
953
     */
954
19.7k
    if(path[0] == '/' && path[1] == '/') {
955
      /* swallow the two slashes */
956
576
      const char *ptr = &path[2];
957
958
      /*
959
       * According to RFC 8089, a file: URL can be reliably dereferenced if:
960
       *
961
       *  o it has no/blank hostname, or
962
       *
963
       *  o the hostname matches "localhost" (case-insensitively), or
964
       *
965
       *  o the hostname is a FQDN that resolves to this machine, or
966
       *
967
       *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
968
       *    Appendix E.3).
969
       *
970
       * For brevity, we only consider URLs with empty, "localhost", or
971
       * "127.0.0.1" hostnames as local, otherwise as an UNC String.
972
       *
973
       * Additionally, there is an exception for URLs with a Windows drive
974
       * letter in the authority (which was accidentally omitted from RFC 8089
975
       * Appendix E, but believe me, it was meant to be there. --MK)
976
       */
977
576
      if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
978
        /* the URL includes a hostname, it must match "localhost" or
979
           "127.0.0.1" to be valid */
980
267
        if(checkprefix("localhost/", ptr) ||
981
267
           checkprefix("127.0.0.1/", ptr)) {
982
0
          ptr += 9; /* now points to the slash after the host */
983
0
        }
984
267
        else {
985
#ifdef _WIN32
986
          size_t len;
987
988
          /* the hostname, NetBIOS computer name, can not contain disallowed
989
             chars, and the delimiting slash character must be appended to the
990
             hostname */
991
          path = strpbrk(ptr, "/\\:*?\"<>|");
992
          if(!path || *path != '/') {
993
            result = CURLUE_BAD_FILE_URL;
994
            goto fail;
995
          }
996
997
          len = path - ptr;
998
          if(len) {
999
            CURLcode code = curlx_dyn_addn(&host, ptr, len);
1000
            if(code) {
1001
              result = cc2cu(code);
1002
              goto fail;
1003
            }
1004
            uncpath = TRUE;
1005
          }
1006
1007
          ptr -= 2; /* now points to the // before the host in UNC */
1008
#else
1009
          /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1010
             none */
1011
267
          result = CURLUE_BAD_FILE_URL;
1012
267
          goto fail;
1013
267
#endif
1014
267
        }
1015
267
      }
1016
1017
309
      path = ptr;
1018
309
      pathlen = urllen - (ptr - url);
1019
309
    }
1020
1021
19.4k
    if(!uncpath)
1022
      /* no host for file: URLs by default */
1023
19.4k
      curlx_dyn_reset(&host);
1024
1025
19.4k
#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1026
    /* Do not allow Windows drive letters when not in Windows.
1027
     * This catches both "file:/c:" and "file:c:" */
1028
19.4k
    if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1029
19.4k
       STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1030
      /* File drive letters are only accepted in MS-DOS/Windows */
1031
72
      result = CURLUE_BAD_FILE_URL;
1032
72
      goto fail;
1033
72
    }
1034
#else
1035
    /* If the path starts with a slash and a drive letter, ditch the slash */
1036
    if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1037
      /* This cannot be done with strcpy, as the memory chunks overlap! */
1038
      path++;
1039
      pathlen--;
1040
    }
1041
#endif
1042
1043
19.4k
  }
1044
180k
  else {
1045
    /* clear path */
1046
180k
    const char *schemep = NULL;
1047
180k
    const char *hostp;
1048
180k
    size_t hostlen;
1049
1050
180k
    if(schemelen) {
1051
75.2k
      int i = 0;
1052
75.2k
      const char *p = &url[schemelen + 1];
1053
226k
      while((*p == '/') && (i < 4)) {
1054
151k
        p++;
1055
151k
        i++;
1056
151k
      }
1057
1058
75.2k
      schemep = schemebuf;
1059
75.2k
      if(!Curl_get_scheme_handler(schemep) &&
1060
75.2k
         !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1061
0
        result = CURLUE_UNSUPPORTED_SCHEME;
1062
0
        goto fail;
1063
0
      }
1064
1065
75.2k
      if((i < 1) || (i > 3)) {
1066
        /* less than one or more than three slashes */
1067
438
        result = CURLUE_BAD_SLASHES;
1068
438
        goto fail;
1069
438
      }
1070
74.8k
      hostp = p; /* hostname starts here */
1071
74.8k
    }
1072
105k
    else {
1073
      /* no scheme! */
1074
1075
105k
      if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1076
0
        result = CURLUE_BAD_SCHEME;
1077
0
        goto fail;
1078
0
      }
1079
105k
      if(flags & CURLU_DEFAULT_SCHEME)
1080
0
        schemep = DEFAULT_SCHEME;
1081
1082
      /*
1083
       * The URL was badly formatted, let's try without scheme specified.
1084
       */
1085
105k
      hostp = url;
1086
105k
    }
1087
1088
180k
    if(schemep) {
1089
74.8k
      u->scheme = strdup(schemep);
1090
74.8k
      if(!u->scheme) {
1091
0
        result = CURLUE_OUT_OF_MEMORY;
1092
0
        goto fail;
1093
0
      }
1094
74.8k
    }
1095
1096
    /* find the end of the hostname + port number */
1097
180k
    hostlen = strcspn(hostp, "/?#");
1098
180k
    path = &hostp[hostlen];
1099
1100
    /* this pathlen also contains the query and the fragment */
1101
180k
    pathlen = urllen - (path - url);
1102
180k
    if(hostlen) {
1103
1104
175k
      result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1105
175k
      if(result)
1106
55.9k
        goto fail;
1107
1108
119k
      if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1109
57.6k
        const char *hostname = curlx_dyn_ptr(&host);
1110
        /* legacy curl-style guess based on hostname */
1111
57.6k
        if(checkprefix("ftp.", hostname))
1112
158
          schemep = "ftp";
1113
57.4k
        else if(checkprefix("dict.", hostname))
1114
0
          schemep = "dict";
1115
57.4k
        else if(checkprefix("ldap.", hostname))
1116
0
          schemep = "ldap";
1117
57.4k
        else if(checkprefix("imap.", hostname))
1118
0
          schemep = "imap";
1119
57.4k
        else if(checkprefix("smtp.", hostname))
1120
0
          schemep = "smtp";
1121
57.4k
        else if(checkprefix("pop3.", hostname))
1122
0
          schemep = "pop3";
1123
57.4k
        else
1124
57.4k
          schemep = "http";
1125
1126
57.6k
        u->scheme = strdup(schemep);
1127
57.6k
        if(!u->scheme) {
1128
0
          result = CURLUE_OUT_OF_MEMORY;
1129
0
          goto fail;
1130
0
        }
1131
57.6k
        u->guessed_scheme = TRUE;
1132
57.6k
      }
1133
119k
    }
1134
4.97k
    else if(flags & CURLU_NO_AUTHORITY) {
1135
      /* allowed to be empty. */
1136
0
      if(curlx_dyn_add(&host, "")) {
1137
0
        result = CURLUE_OUT_OF_MEMORY;
1138
0
        goto fail;
1139
0
      }
1140
0
    }
1141
4.97k
    else {
1142
4.97k
      result = CURLUE_NO_HOST;
1143
4.97k
      goto fail;
1144
4.97k
    }
1145
180k
  }
1146
1147
139k
  fragment = strchr(path, '#');
1148
139k
  if(fragment) {
1149
34.6k
    fraglen = pathlen - (fragment - path);
1150
34.6k
    u->fragment_present = TRUE;
1151
34.6k
    if(fraglen > 1) {
1152
      /* skip the leading '#' in the copy but include the terminating null */
1153
33.8k
      if(flags & CURLU_URLENCODE) {
1154
0
        struct dynbuf enc;
1155
0
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1156
0
        result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1157
0
        if(result)
1158
0
          goto fail;
1159
0
        u->fragment = curlx_dyn_ptr(&enc);
1160
0
      }
1161
33.8k
      else {
1162
33.8k
        u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1163
33.8k
        if(!u->fragment) {
1164
0
          result = CURLUE_OUT_OF_MEMORY;
1165
0
          goto fail;
1166
0
        }
1167
33.8k
      }
1168
33.8k
    }
1169
    /* after this, pathlen still contains the query */
1170
34.6k
    pathlen -= fraglen;
1171
34.6k
  }
1172
1173
139k
  query = memchr(path, '?', pathlen);
1174
139k
  if(query) {
1175
74.7k
    size_t qlen = fragment ? (size_t)(fragment - query) :
1176
74.7k
      pathlen - (query - path);
1177
74.7k
    pathlen -= qlen;
1178
74.7k
    u->query_present = TRUE;
1179
74.7k
    if(qlen > 1) {
1180
74.2k
      if(flags & CURLU_URLENCODE) {
1181
1
        struct dynbuf enc;
1182
1
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1183
        /* skip the leading question mark */
1184
1
        result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1185
1
        if(result)
1186
0
          goto fail;
1187
1
        u->query = curlx_dyn_ptr(&enc);
1188
1
      }
1189
74.2k
      else {
1190
74.2k
        u->query = Curl_memdup0(query + 1, qlen - 1);
1191
74.2k
        if(!u->query) {
1192
0
          result = CURLUE_OUT_OF_MEMORY;
1193
0
          goto fail;
1194
0
        }
1195
74.2k
      }
1196
74.2k
    }
1197
437
    else {
1198
      /* single byte query */
1199
437
      u->query = strdup("");
1200
437
      if(!u->query) {
1201
0
        result = CURLUE_OUT_OF_MEMORY;
1202
0
        goto fail;
1203
0
      }
1204
437
    }
1205
74.7k
  }
1206
1207
139k
  if(pathlen && (flags & CURLU_URLENCODE)) {
1208
100
    struct dynbuf enc;
1209
100
    curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1210
100
    result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1211
100
    if(result)
1212
0
      goto fail;
1213
100
    pathlen = curlx_dyn_len(&enc);
1214
100
    path = u->path = curlx_dyn_ptr(&enc);
1215
100
  }
1216
1217
139k
  if(pathlen <= 1) {
1218
    /* there is no path left or just the slash, unset */
1219
62.3k
    path = NULL;
1220
62.3k
  }
1221
76.6k
  else {
1222
76.6k
    if(!u->path) {
1223
76.5k
      u->path = Curl_memdup0(path, pathlen);
1224
76.5k
      if(!u->path) {
1225
0
        result = CURLUE_OUT_OF_MEMORY;
1226
0
        goto fail;
1227
0
      }
1228
76.5k
      path = u->path;
1229
76.5k
    }
1230
100
    else if(flags & CURLU_URLENCODE)
1231
      /* it might have encoded more than just the path so cut it */
1232
100
      u->path[pathlen] = 0;
1233
1234
76.6k
    if(!(flags & CURLU_PATH_AS_IS)) {
1235
      /* remove ../ and ./ sequences according to RFC3986 */
1236
76.6k
      char *dedot;
1237
76.6k
      int err = dedotdotify(path, pathlen, &dedot);
1238
76.6k
      if(err) {
1239
0
        result = CURLUE_OUT_OF_MEMORY;
1240
0
        goto fail;
1241
0
      }
1242
76.6k
      if(dedot) {
1243
76.6k
        free(u->path);
1244
76.6k
        u->path = dedot;
1245
76.6k
      }
1246
76.6k
    }
1247
76.6k
  }
1248
1249
139k
  u->host = curlx_dyn_ptr(&host);
1250
1251
139k
  return result;
1252
194k
fail:
1253
194k
  curlx_dyn_free(&host);
1254
194k
  free_urlhandle(u);
1255
194k
  return result;
1256
139k
}
1257
1258
/*
1259
 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1260
 */
1261
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1262
                                      unsigned int flags)
1263
333k
{
1264
333k
  CURLUcode result;
1265
333k
  CURLU tmpurl;
1266
333k
  memset(&tmpurl, 0, sizeof(tmpurl));
1267
333k
  result = parseurl(url, &tmpurl, flags);
1268
333k
  if(!result) {
1269
139k
    free_urlhandle(u);
1270
139k
    *u = tmpurl;
1271
139k
  }
1272
333k
  return result;
1273
333k
}
1274
1275
/*
1276
 */
1277
CURLU *curl_url(void)
1278
332k
{
1279
332k
  return calloc(1, sizeof(struct Curl_URL));
1280
332k
}
1281
1282
void curl_url_cleanup(CURLU *u)
1283
820k
{
1284
820k
  if(u) {
1285
332k
    free_urlhandle(u);
1286
332k
    free(u);
1287
332k
  }
1288
820k
}
1289
1290
#define DUP(dest, src, name)                    \
1291
0
  do {                                          \
1292
0
    if(src->name) {                             \
1293
0
      dest->name = strdup(src->name);           \
1294
0
      if(!dest->name)                           \
1295
0
        goto fail;                              \
1296
0
    }                                           \
1297
0
  } while(0)
1298
1299
CURLU *curl_url_dup(const CURLU *in)
1300
0
{
1301
0
  struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
1302
0
  if(u) {
1303
0
    DUP(u, in, scheme);
1304
0
    DUP(u, in, user);
1305
0
    DUP(u, in, password);
1306
0
    DUP(u, in, options);
1307
0
    DUP(u, in, host);
1308
0
    DUP(u, in, port);
1309
0
    DUP(u, in, path);
1310
0
    DUP(u, in, query);
1311
0
    DUP(u, in, fragment);
1312
0
    DUP(u, in, zoneid);
1313
0
    u->portnum = in->portnum;
1314
0
    u->fragment_present = in->fragment_present;
1315
0
    u->query_present = in->query_present;
1316
0
  }
1317
0
  return u;
1318
0
fail:
1319
0
  curl_url_cleanup(u);
1320
0
  return NULL;
1321
0
}
1322
1323
#ifndef USE_IDN
1324
0
#define host_decode(x,y) CURLUE_LACKS_IDN
1325
0
#define host_encode(x,y) CURLUE_LACKS_IDN
1326
#else
1327
static CURLUcode host_decode(const char *host, char **allochost)
1328
{
1329
  CURLcode result = Curl_idn_decode(host, allochost);
1330
  if(result)
1331
    return (result == CURLE_OUT_OF_MEMORY) ?
1332
      CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1333
  return CURLUE_OK;
1334
}
1335
1336
static CURLUcode host_encode(const char *host, char **allochost)
1337
{
1338
  CURLcode result = Curl_idn_encode(host, allochost);
1339
  if(result)
1340
    return (result == CURLE_OUT_OF_MEMORY) ?
1341
      CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1342
  return CURLUE_OK;
1343
}
1344
#endif
1345
1346
static CURLUcode urlget_format(const CURLU *u, CURLUPart what,
1347
                               const char *ptr, char **part,
1348
                               bool plusdecode, unsigned int flags)
1349
600k
{
1350
600k
  size_t partlen = strlen(ptr);
1351
600k
  bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0;
1352
600k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1353
600k
  bool punycode = (flags & CURLU_PUNYCODE) && (what == CURLUPART_HOST);
1354
600k
  bool depunyfy = (flags & CURLU_PUNY2IDN) && (what == CURLUPART_HOST);
1355
600k
  *part = Curl_memdup0(ptr, partlen);
1356
600k
  if(!*part)
1357
0
    return CURLUE_OUT_OF_MEMORY;
1358
600k
  if(plusdecode) {
1359
    /* convert + to space */
1360
0
    char *plus = *part;
1361
0
    size_t i = 0;
1362
0
    for(i = 0; i < partlen; ++plus, i++) {
1363
0
      if(*plus == '+')
1364
0
        *plus = ' ';
1365
0
    }
1366
0
  }
1367
600k
  if(urldecode) {
1368
1.12k
    char *decoded;
1369
1.12k
    size_t dlen;
1370
    /* this unconditional rejection of control bytes is documented
1371
       API behavior */
1372
1.12k
    CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1373
1.12k
    free(*part);
1374
1.12k
    if(res) {
1375
0
      *part = NULL;
1376
0
      return CURLUE_URLDECODE;
1377
0
    }
1378
1.12k
    *part = decoded;
1379
1.12k
    partlen = dlen;
1380
1.12k
  }
1381
600k
  if(urlencode) {
1382
120k
    struct dynbuf enc;
1383
120k
    CURLUcode uc;
1384
120k
    curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1385
120k
    uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
1386
120k
    if(uc)
1387
0
      return uc;
1388
120k
    free(*part);
1389
120k
    *part = curlx_dyn_ptr(&enc);
1390
120k
  }
1391
479k
  else if(punycode) {
1392
0
    if(!Curl_is_ASCII_name(u->host)) {
1393
0
      char *allochost = NULL;
1394
0
      CURLUcode ret = host_decode(*part, &allochost);
1395
0
      if(ret)
1396
0
        return ret;
1397
0
      free(*part);
1398
0
      *part = allochost;
1399
0
    }
1400
0
  }
1401
479k
  else if(depunyfy) {
1402
0
    if(Curl_is_ASCII_name(u->host)) {
1403
0
      char *allochost = NULL;
1404
0
      CURLUcode ret = host_encode(*part, &allochost);
1405
0
      if(ret)
1406
0
        return ret;
1407
0
      free(*part);
1408
0
      *part = allochost;
1409
0
    }
1410
0
  }
1411
1412
600k
  return CURLUE_OK;
1413
600k
}
1414
1415
static CURLUcode urlget_url(const CURLU *u, char **part, unsigned int flags)
1416
276k
{
1417
276k
  char *url;
1418
276k
  const char *scheme;
1419
276k
  char *options = u->options;
1420
276k
  char *port = u->port;
1421
276k
  char *allochost = NULL;
1422
276k
  bool show_fragment =
1423
276k
    u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
1424
276k
  bool show_query = (u->query && u->query[0]) ||
1425
276k
    (u->query_present && flags & CURLU_GET_EMPTY);
1426
276k
  bool punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
1427
276k
  bool depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
1428
276k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1429
276k
  char portbuf[7];
1430
276k
  if(u->scheme && curl_strequal("file", u->scheme)) {
1431
19.3k
    url = aprintf("file://%s%s%s%s%s",
1432
19.3k
                  u->path,
1433
19.3k
                  show_query ? "?": "",
1434
19.3k
                  u->query ? u->query : "",
1435
19.3k
                  show_fragment ? "#": "",
1436
19.3k
                  u->fragment ? u->fragment : "");
1437
19.3k
  }
1438
257k
  else if(!u->host)
1439
138k
    return CURLUE_NO_HOST;
1440
118k
  else {
1441
118k
    const struct Curl_handler *h = NULL;
1442
118k
    char schemebuf[MAX_SCHEME_LEN + 5];
1443
118k
    if(u->scheme)
1444
118k
      scheme = u->scheme;
1445
0
    else if(flags & CURLU_DEFAULT_SCHEME)
1446
0
      scheme = DEFAULT_SCHEME;
1447
0
    else
1448
0
      return CURLUE_NO_SCHEME;
1449
1450
118k
    h = Curl_get_scheme_handler(scheme);
1451
118k
    if(!port && (flags & CURLU_DEFAULT_PORT)) {
1452
      /* there is no stored port number, but asked to deliver
1453
         a default one for the scheme */
1454
0
      if(h) {
1455
0
        msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1456
0
        port = portbuf;
1457
0
      }
1458
0
    }
1459
118k
    else if(port) {
1460
      /* there is a stored port number, but asked to inhibit if it matches
1461
         the default one for the scheme */
1462
3.40k
      if(h && (h->defport == u->portnum) &&
1463
3.40k
         (flags & CURLU_NO_DEFAULT_PORT))
1464
0
        port = NULL;
1465
3.40k
    }
1466
1467
118k
    if(h && !(h->flags & PROTOPT_URLOPTIONS))
1468
116k
      options = NULL;
1469
1470
118k
    if(u->host[0] == '[') {
1471
54
      if(u->zoneid) {
1472
        /* make it '[ host %25 zoneid ]' */
1473
40
        struct dynbuf enc;
1474
40
        size_t hostlen = strlen(u->host);
1475
40
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1476
40
        if(curlx_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1477
40
                          u->zoneid))
1478
0
          return CURLUE_OUT_OF_MEMORY;
1479
40
        allochost = curlx_dyn_ptr(&enc);
1480
40
      }
1481
54
    }
1482
118k
    else if(urlencode) {
1483
0
      allochost = curl_easy_escape(NULL, u->host, 0);
1484
0
      if(!allochost)
1485
0
        return CURLUE_OUT_OF_MEMORY;
1486
0
    }
1487
118k
    else if(punycode) {
1488
0
      if(!Curl_is_ASCII_name(u->host)) {
1489
0
        CURLUcode ret = host_decode(u->host, &allochost);
1490
0
        if(ret)
1491
0
          return ret;
1492
0
      }
1493
0
    }
1494
118k
    else if(depunyfy) {
1495
0
      if(Curl_is_ASCII_name(u->host)) {
1496
0
        CURLUcode ret = host_encode(u->host, &allochost);
1497
0
        if(ret)
1498
0
          return ret;
1499
0
      }
1500
0
    }
1501
1502
118k
    if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme)
1503
118k
      msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
1504
0
    else
1505
0
      schemebuf[0] = 0;
1506
1507
118k
    url = aprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1508
118k
                  schemebuf,
1509
118k
                  u->user ? u->user : "",
1510
118k
                  u->password ? ":": "",
1511
118k
                  u->password ? u->password : "",
1512
118k
                  options ? ";" : "",
1513
118k
                  options ? options : "",
1514
118k
                  (u->user || u->password || options) ? "@": "",
1515
118k
                  allochost ? allochost : u->host,
1516
118k
                  port ? ":": "",
1517
118k
                  port ? port : "",
1518
118k
                  u->path ? u->path : "/",
1519
118k
                  show_query ? "?": "",
1520
118k
                  u->query ? u->query : "",
1521
118k
                  show_fragment ? "#": "",
1522
118k
                  u->fragment ? u->fragment : "");
1523
118k
    free(allochost);
1524
118k
  }
1525
138k
  if(!url)
1526
0
    return CURLUE_OUT_OF_MEMORY;
1527
138k
  *part = url;
1528
138k
  return CURLUE_OK;
1529
138k
}
1530
1531
CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1532
                       char **part, unsigned int flags)
1533
1.29M
{
1534
1.29M
  const char *ptr;
1535
1.29M
  CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1536
1.29M
  char portbuf[7];
1537
1.29M
  bool plusdecode = FALSE;
1538
1.29M
  if(!u)
1539
0
    return CURLUE_BAD_HANDLE;
1540
1.29M
  if(!part)
1541
0
    return CURLUE_BAD_PARTPOINTER;
1542
1.29M
  *part = NULL;
1543
1544
1.29M
  switch(what) {
1545
138k
  case CURLUPART_SCHEME:
1546
138k
    ptr = u->scheme;
1547
138k
    ifmissing = CURLUE_NO_SCHEME;
1548
138k
    flags &= ~CURLU_URLDECODE; /* never for schemes */
1549
138k
    if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme)
1550
0
      return CURLUE_NO_SCHEME;
1551
138k
    break;
1552
138k
  case CURLUPART_USER:
1553
122k
    ptr = u->user;
1554
122k
    ifmissing = CURLUE_NO_USER;
1555
122k
    break;
1556
136k
  case CURLUPART_PASSWORD:
1557
136k
    ptr = u->password;
1558
136k
    ifmissing = CURLUE_NO_PASSWORD;
1559
136k
    break;
1560
120k
  case CURLUPART_OPTIONS:
1561
120k
    ptr = u->options;
1562
120k
    ifmissing = CURLUE_NO_OPTIONS;
1563
120k
    break;
1564
138k
  case CURLUPART_HOST:
1565
138k
    ptr = u->host;
1566
138k
    ifmissing = CURLUE_NO_HOST;
1567
138k
    break;
1568
54
  case CURLUPART_ZONEID:
1569
54
    ptr = u->zoneid;
1570
54
    ifmissing = CURLUE_NO_ZONEID;
1571
54
    break;
1572
121k
  case CURLUPART_PORT:
1573
121k
    ptr = u->port;
1574
121k
    ifmissing = CURLUE_NO_PORT;
1575
121k
    flags &= ~CURLU_URLDECODE; /* never for port */
1576
121k
    if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1577
      /* there is no stored port number, but asked to deliver
1578
         a default one for the scheme */
1579
117k
      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1580
117k
      if(h) {
1581
117k
        msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1582
117k
        ptr = portbuf;
1583
117k
      }
1584
117k
    }
1585
4.01k
    else if(ptr && u->scheme) {
1586
      /* there is a stored port number, but ask to inhibit if
1587
         it matches the default one for the scheme */
1588
3.37k
      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1589
3.37k
      if(h && (h->defport == u->portnum) &&
1590
3.37k
         (flags & CURLU_NO_DEFAULT_PORT))
1591
0
        ptr = NULL;
1592
3.37k
    }
1593
121k
    break;
1594
120k
  case CURLUPART_PATH:
1595
120k
    ptr = u->path;
1596
120k
    if(!ptr)
1597
59.9k
      ptr = "/";
1598
120k
    break;
1599
120k
  case CURLUPART_QUERY:
1600
120k
    ptr = u->query;
1601
120k
    ifmissing = CURLUE_NO_QUERY;
1602
120k
    plusdecode = flags & CURLU_URLDECODE;
1603
120k
    if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
1604
      /* there was a blank query and the user do not ask for it */
1605
437
      ptr = NULL;
1606
120k
    break;
1607
0
  case CURLUPART_FRAGMENT:
1608
0
    ptr = u->fragment;
1609
0
    ifmissing = CURLUE_NO_FRAGMENT;
1610
0
    if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
1611
      /* there was a blank fragment and the user asks for it */
1612
0
      ptr = "";
1613
0
    break;
1614
276k
  case CURLUPART_URL:
1615
276k
    return urlget_url(u, part, flags);
1616
0
  default:
1617
0
    ptr = NULL;
1618
0
    break;
1619
1.29M
  }
1620
1.02M
  if(ptr)
1621
600k
    return urlget_format(u, what, ptr, part, plusdecode, flags);
1622
1623
420k
  return ifmissing;
1624
1.02M
}
1625
1626
static CURLUcode set_url_scheme(CURLU *u, const char *scheme,
1627
                                unsigned int flags)
1628
0
{
1629
0
  size_t plen = strlen(scheme);
1630
0
  const struct Curl_handler *h = NULL;
1631
0
  if((plen > MAX_SCHEME_LEN) || (plen < 1))
1632
    /* too long or too short */
1633
0
    return CURLUE_BAD_SCHEME;
1634
  /* verify that it is a fine scheme */
1635
0
  h = Curl_get_scheme_handler(scheme);
1636
0
  if(!h) {
1637
0
    const char *s = scheme;
1638
0
    if(!(flags & CURLU_NON_SUPPORT_SCHEME))
1639
0
      return CURLUE_UNSUPPORTED_SCHEME;
1640
0
    if(ISALPHA(*s)) {
1641
      /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1642
0
      while(--plen) {
1643
0
        if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1644
0
          s++; /* fine */
1645
0
        else
1646
0
          return CURLUE_BAD_SCHEME;
1647
0
      }
1648
0
    }
1649
0
    else
1650
0
      return CURLUE_BAD_SCHEME;
1651
0
  }
1652
0
  u->guessed_scheme = FALSE;
1653
0
  return CURLUE_OK;
1654
0
}
1655
1656
static CURLUcode set_url_port(CURLU *u, const char *provided_port)
1657
0
{
1658
0
  char *tmp;
1659
0
  curl_off_t port;
1660
0
  if(!ISDIGIT(provided_port[0]))
1661
    /* not a number */
1662
0
    return CURLUE_BAD_PORT_NUMBER;
1663
0
  if(curlx_str_number(&provided_port, &port, 0xffff) || *provided_port)
1664
    /* weirdly provided number, not good! */
1665
0
    return CURLUE_BAD_PORT_NUMBER;
1666
0
  tmp = aprintf("%" CURL_FORMAT_CURL_OFF_T, port);
1667
0
  if(!tmp)
1668
0
    return CURLUE_OUT_OF_MEMORY;
1669
0
  free(u->port);
1670
0
  u->port = tmp;
1671
0
  u->portnum = (unsigned short)port;
1672
0
  return CURLUE_OK;
1673
0
}
1674
1675
static CURLUcode set_url(CURLU *u, const char *url, size_t part_size,
1676
                         unsigned int flags)
1677
333k
{
1678
  /*
1679
   * Allow a new URL to replace the existing (if any) contents.
1680
   *
1681
   * If the existing contents is enough for a URL, allow a relative URL to
1682
   * replace it.
1683
   */
1684
333k
  CURLUcode uc;
1685
333k
  char *oldurl = NULL;
1686
1687
333k
  if(!part_size) {
1688
    /* a blank URL is not a valid URL unless we already have a complete one
1689
       and this is a redirect */
1690
12
    if(!curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1691
      /* success, meaning the "" is a fine relative URL, but nothing
1692
         changes */
1693
0
      free(oldurl);
1694
0
      return CURLUE_OK;
1695
0
    }
1696
12
    return CURLUE_MALFORMED_INPUT;
1697
12
  }
1698
1699
  /* if the new thing is absolute or the old one is not (we could not get an
1700
   * absolute URL in 'oldurl'), then replace the existing with the new. */
1701
333k
  if(Curl_is_absolute_url(url, NULL, 0,
1702
333k
                          flags & (CURLU_GUESS_SCHEME|CURLU_DEFAULT_SCHEME))
1703
333k
     || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1704
333k
    return parseurl_and_replace(url, u, flags);
1705
333k
  }
1706
0
  DEBUGASSERT(oldurl); /* it is set here */
1707
  /* apply the relative part to create a new URL */
1708
0
  uc = redirect_url(oldurl, url, u, flags);
1709
0
  free(oldurl);
1710
0
  return uc;
1711
333k
}
1712
1713
static CURLUcode urlset_clear(CURLU *u, CURLUPart what)
1714
0
{
1715
0
  switch(what) {
1716
0
  case CURLUPART_URL:
1717
0
    free_urlhandle(u);
1718
0
    memset(u, 0, sizeof(struct Curl_URL));
1719
0
    break;
1720
0
  case CURLUPART_SCHEME:
1721
0
    Curl_safefree(u->scheme);
1722
0
    u->guessed_scheme = FALSE;
1723
0
    break;
1724
0
  case CURLUPART_USER:
1725
0
    Curl_safefree(u->user);
1726
0
    break;
1727
0
  case CURLUPART_PASSWORD:
1728
0
    Curl_safefree(u->password);
1729
0
    break;
1730
0
  case CURLUPART_OPTIONS:
1731
0
    Curl_safefree(u->options);
1732
0
    break;
1733
0
  case CURLUPART_HOST:
1734
0
    Curl_safefree(u->host);
1735
0
    break;
1736
0
  case CURLUPART_ZONEID:
1737
0
    Curl_safefree(u->zoneid);
1738
0
    break;
1739
0
  case CURLUPART_PORT:
1740
0
    u->portnum = 0;
1741
0
    Curl_safefree(u->port);
1742
0
    break;
1743
0
  case CURLUPART_PATH:
1744
0
    Curl_safefree(u->path);
1745
0
    break;
1746
0
  case CURLUPART_QUERY:
1747
0
    Curl_safefree(u->query);
1748
0
    u->query_present = FALSE;
1749
0
    break;
1750
0
  case CURLUPART_FRAGMENT:
1751
0
    Curl_safefree(u->fragment);
1752
0
    u->fragment_present = FALSE;
1753
0
    break;
1754
0
  default:
1755
0
    return CURLUE_UNKNOWN_PART;
1756
0
  }
1757
0
  return CURLUE_OK;
1758
0
}
1759
1760
CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1761
                       const char *part, unsigned int flags)
1762
356k
{
1763
356k
  char **storep = NULL;
1764
356k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1765
356k
  bool plusencode = FALSE;
1766
356k
  bool urlskipslash = FALSE;
1767
356k
  bool leadingslash = FALSE;
1768
356k
  bool appendquery = FALSE;
1769
356k
  bool equalsencode = FALSE;
1770
356k
  size_t nalloc;
1771
1772
356k
  if(!u)
1773
0
    return CURLUE_BAD_HANDLE;
1774
356k
  if(!part)
1775
    /* setting a part to NULL clears it */
1776
0
    return urlset_clear(u, what);
1777
1778
356k
  nalloc = strlen(part);
1779
356k
  if(nalloc > CURL_MAX_INPUT_LENGTH)
1780
    /* excessive input length */
1781
0
    return CURLUE_MALFORMED_INPUT;
1782
1783
356k
  switch(what) {
1784
0
  case CURLUPART_SCHEME: {
1785
0
    CURLUcode status = set_url_scheme(u, part, flags);
1786
0
    if(status)
1787
0
      return status;
1788
0
    storep = &u->scheme;
1789
0
    urlencode = FALSE; /* never */
1790
0
    break;
1791
0
  }
1792
17.0k
  case CURLUPART_USER:
1793
17.0k
    storep = &u->user;
1794
17.0k
    break;
1795
6.76k
  case CURLUPART_PASSWORD:
1796
6.76k
    storep = &u->password;
1797
6.76k
    break;
1798
0
  case CURLUPART_OPTIONS:
1799
0
    storep = &u->options;
1800
0
    break;
1801
0
  case CURLUPART_HOST:
1802
0
    storep = &u->host;
1803
0
    Curl_safefree(u->zoneid);
1804
0
    break;
1805
0
  case CURLUPART_ZONEID:
1806
0
    storep = &u->zoneid;
1807
0
    break;
1808
0
  case CURLUPART_PORT:
1809
0
    return set_url_port(u, part);
1810
0
  case CURLUPART_PATH:
1811
0
    urlskipslash = TRUE;
1812
0
    leadingslash = TRUE; /* enforce */
1813
0
    storep = &u->path;
1814
0
    break;
1815
0
  case CURLUPART_QUERY:
1816
0
    plusencode = urlencode;
1817
0
    appendquery = (flags & CURLU_APPENDQUERY) ? 1 : 0;
1818
0
    equalsencode = appendquery;
1819
0
    storep = &u->query;
1820
0
    u->query_present = TRUE;
1821
0
    break;
1822
0
  case CURLUPART_FRAGMENT:
1823
0
    storep = &u->fragment;
1824
0
    u->fragment_present = TRUE;
1825
0
    break;
1826
333k
  case CURLUPART_URL:
1827
333k
    return set_url(u, part, nalloc, flags);
1828
0
  default:
1829
0
    return CURLUE_UNKNOWN_PART;
1830
356k
  }
1831
23.7k
  DEBUGASSERT(storep);
1832
23.7k
  {
1833
23.7k
    const char *newp;
1834
23.7k
    struct dynbuf enc;
1835
23.7k
    curlx_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1836
1837
23.7k
    if(leadingslash && (part[0] != '/')) {
1838
0
      CURLcode result = curlx_dyn_addn(&enc, "/", 1);
1839
0
      if(result)
1840
0
        return cc2cu(result);
1841
0
    }
1842
23.7k
    if(urlencode) {
1843
23.7k
      const unsigned char *i;
1844
1845
72.6M
      for(i = (const unsigned char *)part; *i; i++) {
1846
72.6M
        CURLcode result;
1847
72.6M
        if((*i == ' ') && plusencode) {
1848
0
          result = curlx_dyn_addn(&enc, "+", 1);
1849
0
          if(result)
1850
0
            return CURLUE_OUT_OF_MEMORY;
1851
0
        }
1852
72.6M
        else if(ISUNRESERVED(*i) ||
1853
72.6M
                ((*i == '/') && urlskipslash) ||
1854
72.6M
                ((*i == '=') && equalsencode)) {
1855
14.5M
          if((*i == '=') && equalsencode)
1856
            /* only skip the first equals sign */
1857
0
            equalsencode = FALSE;
1858
14.5M
          result = curlx_dyn_addn(&enc, i, 1);
1859
14.5M
          if(result)
1860
0
            return cc2cu(result);
1861
14.5M
        }
1862
58.0M
        else {
1863
58.0M
          unsigned char out[3]={'%'};
1864
58.0M
          Curl_hexbyte(&out[1], *i);
1865
58.0M
          result = curlx_dyn_addn(&enc, out, 3);
1866
58.0M
          if(result)
1867
0
            return cc2cu(result);
1868
58.0M
        }
1869
72.6M
      }
1870
23.7k
    }
1871
0
    else {
1872
0
      char *p;
1873
0
      CURLcode result = curlx_dyn_add(&enc, part);
1874
0
      if(result)
1875
0
        return cc2cu(result);
1876
0
      p = curlx_dyn_ptr(&enc);
1877
0
      while(*p) {
1878
        /* make sure percent encoded are lower case */
1879
0
        if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1880
0
           (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1881
0
          p[1] = Curl_raw_tolower(p[1]);
1882
0
          p[2] = Curl_raw_tolower(p[2]);
1883
0
          p += 3;
1884
0
        }
1885
0
        else
1886
0
          p++;
1887
0
      }
1888
0
    }
1889
23.7k
    newp = curlx_dyn_ptr(&enc);
1890
1891
23.7k
    if(appendquery && newp) {
1892
      /* Append the 'newp' string onto the old query. Add a '&' separator if
1893
         none is present at the end of the existing query already */
1894
1895
0
      size_t querylen = u->query ? strlen(u->query) : 0;
1896
0
      bool addamperand = querylen && (u->query[querylen -1] != '&');
1897
0
      if(querylen) {
1898
0
        struct dynbuf qbuf;
1899
0
        curlx_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1900
1901
0
        if(curlx_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1902
0
          goto nomem;
1903
1904
0
        if(addamperand) {
1905
0
          if(curlx_dyn_addn(&qbuf, "&", 1))
1906
0
            goto nomem;
1907
0
        }
1908
0
        if(curlx_dyn_add(&qbuf, newp))
1909
0
          goto nomem;
1910
0
        curlx_dyn_free(&enc);
1911
0
        free(*storep);
1912
0
        *storep = curlx_dyn_ptr(&qbuf);
1913
0
        return CURLUE_OK;
1914
0
nomem:
1915
0
        curlx_dyn_free(&enc);
1916
0
        return CURLUE_OUT_OF_MEMORY;
1917
0
      }
1918
0
    }
1919
1920
23.7k
    else if(what == CURLUPART_HOST) {
1921
0
      size_t n = curlx_dyn_len(&enc);
1922
0
      if(!n && (flags & CURLU_NO_AUTHORITY)) {
1923
        /* Skip hostname check, it is allowed to be empty. */
1924
0
      }
1925
0
      else {
1926
0
        bool bad = FALSE;
1927
0
        if(!n)
1928
0
          bad = TRUE; /* empty hostname is not okay */
1929
0
        else if(!urlencode) {
1930
          /* if the host name part was not URL encoded here, it was set ready
1931
             URL encoded so we need to decode it to check */
1932
0
          size_t dlen;
1933
0
          char *decoded = NULL;
1934
0
          CURLcode result =
1935
0
            Curl_urldecode(newp, n, &decoded, &dlen, REJECT_CTRL);
1936
0
          if(result || hostname_check(u, decoded, dlen))
1937
0
            bad = TRUE;
1938
0
          free(decoded);
1939
0
        }
1940
0
        else if(hostname_check(u, (char *)CURL_UNCONST(newp), n))
1941
0
          bad = TRUE;
1942
0
        if(bad) {
1943
0
          curlx_dyn_free(&enc);
1944
0
          return CURLUE_BAD_HOSTNAME;
1945
0
        }
1946
0
      }
1947
0
    }
1948
1949
23.7k
    free(*storep);
1950
23.7k
    *storep = (char *)CURL_UNCONST(newp);
1951
23.7k
  }
1952
0
  return CURLUE_OK;
1953
23.7k
}