Coverage Report

Created: 2026-03-12 06:35

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/CMake/Utilities/cmcurl/lib/urlapi.c
Line
Count
Source
1
/***************************************************************************
2
 *                                  _   _ ____  _
3
 *  Project                     ___| | | |  _ \| |
4
 *                             / __| | | | |_) | |
5
 *                            | (__| |_| |  _ <| |___
6
 *                             \___|\___/|_| \_\_____|
7
 *
8
 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9
 *
10
 * This software is licensed as described in the file COPYING, which
11
 * you should have received as part of this distribution. The terms
12
 * are also available at https://curl.se/docs/copyright.html.
13
 *
14
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15
 * copies of the Software, and permit persons to whom the Software is
16
 * furnished to do so, under the terms of the COPYING file.
17
 *
18
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19
 * KIND, either express or implied.
20
 *
21
 * SPDX-License-Identifier: curl
22
 *
23
 ***************************************************************************/
24
#include "curl_setup.h"
25
26
#include "urldata.h"
27
#include "urlapi-int.h"
28
#include "strcase.h"
29
#include "url.h"
30
#include "escape.h"
31
#include "curlx/inet_pton.h"
32
#include "curlx/inet_ntop.h"
33
#include "strdup.h"
34
#include "idn.h"
35
#include "curlx/strparse.h"
36
#include "curl_memrchr.h"
37
38
#ifdef _WIN32
39
/* MS-DOS/Windows style drive prefix, eg c: in c:foo */
40
#define STARTS_WITH_DRIVE_PREFIX(str)    \
41
  ((('a' <= str[0] && str[0] <= 'z') ||  \
42
    ('A' <= str[0] && str[0] <= 'Z')) && \
43
   (str[1] == ':'))
44
#endif
45
46
/* MS-DOS/Windows style drive prefix, optionally with
47
 * a '|' instead of ':', followed by a slash or NUL */
48
#define STARTS_WITH_URL_DRIVE_PREFIX(str)                  \
49
0
  ((('a' <= (str)[0] && (str)[0] <= 'z') ||                \
50
0
    ('A' <= (str)[0] && (str)[0] <= 'Z')) &&               \
51
0
   ((str)[1] == ':' || (str)[1] == '|') &&                 \
52
0
   ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
53
54
/* scheme is not URL encoded, the longest libcurl supported ones are... */
55
0
#define MAX_SCHEME_LEN 40
56
57
/*
58
 * If USE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
59
 * sure we have _some_ value for AF_INET6 without polluting our fake value
60
 * everywhere.
61
 */
62
#if !defined(USE_IPV6) && !defined(AF_INET6)
63
#define AF_INET6 (AF_INET + 1)
64
#endif
65
66
/* Internal representation of CURLU. Point to URL-encoded strings. */
67
struct Curl_URL {
68
  char *scheme;
69
  char *user;
70
  char *password;
71
  char *options; /* IMAP only? */
72
  char *host;
73
  char *zoneid; /* for numerical IPv6 addresses */
74
  char *port;
75
  char *path;
76
  char *query;
77
  char *fragment;
78
  unsigned short portnum; /* the numerical version (if 'port' is set) */
79
  BIT(query_present);    /* to support blank */
80
  BIT(fragment_present); /* to support blank */
81
  BIT(guessed_scheme);   /* when a URL without scheme is parsed */
82
};
83
84
0
#define DEFAULT_SCHEME "https"
85
86
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
87
                                      unsigned int flags);
88
89
static void free_urlhandle(struct Curl_URL *u)
90
0
{
91
0
  curlx_free(u->scheme);
92
0
  curlx_free(u->user);
93
0
  curlx_free(u->password);
94
0
  curlx_free(u->options);
95
0
  curlx_free(u->host);
96
0
  curlx_free(u->zoneid);
97
0
  curlx_free(u->port);
98
0
  curlx_free(u->path);
99
0
  curlx_free(u->query);
100
0
  curlx_free(u->fragment);
101
0
}
102
103
/*
104
 * Find the separator at the end of the hostname, or the '?' in cases like
105
 * http://www.example.com?id=2380
106
 */
107
static const char *find_host_sep(const char *url)
108
0
{
109
  /* Find the start of the hostname */
110
0
  const char *sep = strstr(url, "//");
111
0
  if(!sep)
112
0
    sep = url;
113
0
  else
114
0
    sep += 2;
115
116
  /* Find first / or ? */
117
0
  while(*sep && *sep != '/' && *sep != '?')
118
0
    sep++;
119
120
0
  return sep;
121
0
}
122
123
/* convert CURLcode to CURLUcode */
124
0
#define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE :   \
125
0
                  CURLUE_OUT_OF_MEMORY)
126
127
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
128
 * spaces in the source URL accordingly.
129
 *
130
 * URL encoding should be skipped for hostnames, otherwise IDN resolution
131
 * will fail.
132
 */
133
static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
134
                               size_t len, bool relative,
135
                               bool query)
136
0
{
137
  /* we must add this with whitespace-replacing */
138
0
  bool left = !query;
139
0
  const unsigned char *iptr;
140
0
  const unsigned char *host_sep = (const unsigned char *)url;
141
0
  CURLcode result = CURLE_OK;
142
143
0
  if(!relative) {
144
0
    size_t n;
145
0
    host_sep = (const unsigned char *)find_host_sep(url);
146
147
    /* output the first piece as-is */
148
0
    n = (const char *)host_sep - url;
149
0
    result = curlx_dyn_addn(o, url, n);
150
0
    len -= n;
151
0
  }
152
153
0
  for(iptr = host_sep; len && !result; iptr++, len--) {
154
0
    if(*iptr == ' ') {
155
0
      if(left)
156
0
        result = curlx_dyn_addn(o, "%20", 3);
157
0
      else
158
0
        result = curlx_dyn_addn(o, "+", 1);
159
0
    }
160
0
    else if((*iptr < ' ') || (*iptr >= 0x7f)) {
161
0
      unsigned char out[3] = { '%' };
162
0
      Curl_hexbyte(&out[1], *iptr);
163
0
      result = curlx_dyn_addn(o, out, 3);
164
0
    }
165
0
    else {
166
0
      result = curlx_dyn_addn(o, iptr, 1);
167
0
      if(*iptr == '?')
168
0
        left = FALSE;
169
0
    }
170
0
  }
171
172
0
  if(result)
173
0
    return cc2cu(result);
174
0
  return CURLUE_OK;
175
0
}
176
177
/*
178
 * Returns the length of the scheme if the given URL is absolute (as opposed
179
 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
180
 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
181
 *
182
 * If 'guess_scheme' is TRUE, it means the URL might be provided without
183
 * scheme.
184
 */
185
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
186
                            bool guess_scheme)
187
0
{
188
0
  size_t i = 0;
189
0
  DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
190
0
  (void)buflen; /* only used in debug-builds */
191
0
  if(buf)
192
0
    buf[0] = 0; /* always leave a defined value in buf */
193
#ifdef _WIN32
194
  if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
195
    return 0;
196
#endif
197
0
  if(ISALPHA(url[0]))
198
0
    for(i = 1; i < MAX_SCHEME_LEN; ++i) {
199
0
      char s = url[i];
200
0
      if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.'))) {
201
        /* RFC 3986 3.1 explains:
202
           scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
203
        */
204
0
      }
205
0
      else {
206
0
        break;
207
0
      }
208
0
    }
209
0
  if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
210
    /* If this does not guess scheme, the scheme always ends with the colon so
211
       that this also detects data: URLs etc. In guessing mode, data: could
212
       be the hostname "data" with a specified port number. */
213
214
    /* the length of the scheme is the name part only */
215
0
    size_t len = i;
216
0
    if(buf) {
217
0
      Curl_strntolower(buf, url, i);
218
0
      buf[i] = 0;
219
0
    }
220
0
    return len;
221
0
  }
222
0
  return 0;
223
0
}
224
225
/*
226
 * Concatenate a relative URL onto a base URL making it absolute.
227
 */
228
static CURLUcode redirect_url(const char *base, const char *relurl,
229
                              CURLU *u, unsigned int flags)
230
0
{
231
0
  struct dynbuf urlbuf;
232
0
  bool host_changed = FALSE;
233
0
  const char *useurl = relurl;
234
0
  const char *cutoff = NULL;
235
0
  size_t prelen;
236
0
  CURLUcode uc;
237
238
  /* protsep points to the start of the hostname, after [scheme]:// */
239
0
  const char *protsep = base + strlen(u->scheme) + 3;
240
0
  DEBUGASSERT(base && relurl && u); /* all set here */
241
0
  if(!base)
242
0
    return CURLUE_MALFORMED_INPUT; /* should never happen */
243
244
  /* handle different relative URL types */
245
0
  switch(relurl[0]) {
246
0
  case '/':
247
0
    if(relurl[1] == '/') {
248
      /* protocol-relative URL: //example.com/path */
249
0
      cutoff = protsep;
250
0
      useurl = &relurl[2];
251
0
      host_changed = TRUE;
252
0
    }
253
0
    else
254
      /* absolute /path */
255
0
      cutoff = strchr(protsep, '/');
256
0
    break;
257
258
0
  case '#':
259
    /* fragment-only change */
260
0
    if(u->fragment)
261
0
      cutoff = strchr(protsep, '#');
262
0
    break;
263
264
0
  default:
265
    /* path or query-only change */
266
0
    if(u->query && u->query[0])
267
      /* remove existing query */
268
0
      cutoff = strchr(protsep, '?');
269
0
    else if(u->fragment && u->fragment[0])
270
      /* Remove existing fragment */
271
0
      cutoff = strchr(protsep, '#');
272
273
0
    if(relurl[0] != '?') {
274
      /* append a relative path after the last slash */
275
0
      cutoff = memrchr(protsep, '/',
276
0
                       cutoff ? (size_t)(cutoff - protsep) : strlen(protsep));
277
0
      if(cutoff)
278
0
        cutoff++; /* truncate after last slash */
279
0
    }
280
0
    break;
281
0
  }
282
283
0
  prelen = cutoff ? (size_t)(cutoff - base) : strlen(base);
284
285
  /* build new URL */
286
0
  curlx_dyn_init(&urlbuf, CURL_MAX_INPUT_LENGTH);
287
288
0
  if(!curlx_dyn_addn(&urlbuf, base, prelen) &&
289
0
     !urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed, FALSE)) {
290
0
    uc = parseurl_and_replace(curlx_dyn_ptr(&urlbuf), u,
291
0
                              flags & ~CURLU_PATH_AS_IS);
292
0
  }
293
0
  else
294
0
    uc = CURLUE_OUT_OF_MEMORY;
295
296
0
  curlx_dyn_free(&urlbuf);
297
0
  return uc;
298
0
}
299
300
/* scan for byte values <= 31, 127 and sometimes space */
301
CURLUcode Curl_junkscan(const char *url, size_t *urllen, bool allowspace)
302
0
{
303
0
  size_t n = strlen(url);
304
0
  size_t i;
305
0
  unsigned char control;
306
0
  const unsigned char *p = (const unsigned char *)url;
307
0
  if(n > CURL_MAX_INPUT_LENGTH)
308
0
    return CURLUE_MALFORMED_INPUT;
309
310
0
  control = allowspace ? 0x1f : 0x20;
311
0
  for(i = 0; i < n; i++) {
312
0
    if(p[i] <= control || p[i] == 127)
313
0
      return CURLUE_MALFORMED_INPUT;
314
0
  }
315
0
  *urllen = n;
316
0
  return CURLUE_OK;
317
0
}
318
319
/*
320
 * parse_hostname_login()
321
 *
322
 * Parse the login details (username, password and options) from the URL and
323
 * strip them out of the hostname
324
 *
325
 */
326
static CURLUcode parse_hostname_login(struct Curl_URL *u,
327
                                      const char *login,
328
                                      size_t len,
329
                                      unsigned int flags,
330
                                      size_t *offset) /* to the hostname */
331
0
{
332
0
  CURLUcode result = CURLUE_OK;
333
0
  CURLcode ccode;
334
0
  char *userp = NULL;
335
0
  char *passwdp = NULL;
336
0
  char *optionsp = NULL;
337
0
  const struct Curl_handler *h = NULL;
338
339
  /* At this point, we assume all the other special cases have been taken
340
   * care of, so the host is at most
341
   *
342
   *   [user[:password][;options]]@]hostname
343
   *
344
   * We need somewhere to put the embedded details, so do that first.
345
   */
346
0
  char *ptr;
347
348
0
  DEBUGASSERT(login);
349
350
0
  *offset = 0;
351
0
  ptr = memchr(login, '@', len);
352
0
  if(!ptr)
353
0
    goto out;
354
355
  /* We will now try to extract the
356
   * possible login information in a string like:
357
   * ftp://user:password@ftp.site.example:8021/README */
358
0
  ptr++;
359
360
  /* if this is a known scheme, get some details */
361
0
  if(u->scheme)
362
0
    h = Curl_get_scheme_handler(u->scheme);
363
364
  /* We could use the login information in the URL so extract it. Only parse
365
     options if the handler says we should. Note that 'h' might be NULL! */
366
0
  ccode = Curl_parse_login_details(login, ptr - login - 1,
367
0
                                   &userp, &passwdp,
368
0
                                   (h && (h->flags & PROTOPT_URLOPTIONS)) ?
369
0
                                   &optionsp : NULL);
370
0
  if(ccode) {
371
    /* the only possible error from Curl_parse_login_details is out of
372
       memory: */
373
0
    result = CURLUE_OUT_OF_MEMORY;
374
0
    goto out;
375
0
  }
376
377
0
  if(userp) {
378
0
    if(flags & CURLU_DISALLOW_USER) {
379
      /* Option DISALLOW_USER is set and URL contains username. */
380
0
      result = CURLUE_USER_NOT_ALLOWED;
381
0
      goto out;
382
0
    }
383
0
    curlx_free(u->user);
384
0
    u->user = userp;
385
0
  }
386
387
0
  if(passwdp) {
388
0
    curlx_free(u->password);
389
0
    u->password = passwdp;
390
0
  }
391
392
0
  if(optionsp) {
393
0
    curlx_free(u->options);
394
0
    u->options = optionsp;
395
0
  }
396
397
  /* the hostname starts at this offset */
398
0
  *offset = ptr - login;
399
0
  return CURLUE_OK;
400
401
0
out:
402
403
0
  curlx_free(userp);
404
0
  curlx_free(passwdp);
405
0
  curlx_free(optionsp);
406
0
  u->user = NULL;
407
0
  u->password = NULL;
408
0
  u->options = NULL;
409
410
0
  return result;
411
0
}
412
413
UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
414
                                   bool has_scheme)
415
0
{
416
0
  const char *portptr;
417
0
  char *hostname = curlx_dyn_ptr(host);
418
  /*
419
   * Find the end of an IPv6 address on the ']' ending bracket.
420
   */
421
0
  if(hostname[0] == '[') {
422
0
    portptr = strchr(hostname, ']');
423
0
    if(!portptr)
424
0
      return CURLUE_BAD_IPV6;
425
0
    portptr++;
426
    /* this is a RFC2732-style specified IP-address */
427
0
    if(*portptr) {
428
0
      if(*portptr != ':')
429
0
        return CURLUE_BAD_PORT_NUMBER;
430
0
    }
431
0
    else
432
0
      portptr = NULL;
433
0
  }
434
0
  else
435
0
    portptr = strchr(hostname, ':');
436
437
0
  if(portptr) {
438
0
    curl_off_t port;
439
0
    size_t keep = portptr - hostname;
440
441
    /* Browser behavior adaptation. If there is a colon with no digits after,
442
       just cut off the name there which makes us ignore the colon and just
443
       use the default port. Firefox, Chrome and Safari all do that.
444
445
       Do not do it if the URL has no scheme, to make something that looks like
446
       a scheme not work!
447
    */
448
0
    curlx_dyn_setlen(host, keep);
449
0
    portptr++;
450
0
    if(!*portptr)
451
0
      return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
452
453
0
    if(curlx_str_number(&portptr, &port, 0xffff) || *portptr)
454
0
      return CURLUE_BAD_PORT_NUMBER;
455
456
0
    u->portnum = (unsigned short)port;
457
    /* generate a new port number string to get rid of leading zeroes etc */
458
0
    curlx_free(u->port);
459
0
    u->port = curl_maprintf("%" CURL_FORMAT_CURL_OFF_T, port);
460
0
    if(!u->port)
461
0
      return CURLUE_OUT_OF_MEMORY;
462
0
  }
463
464
0
  return CURLUE_OK;
465
0
}
466
467
/* this assumes 'hostname' now starts with [ */
468
static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
469
                            size_t hlen) /* length of hostname */
470
0
{
471
0
  size_t len;
472
0
  DEBUGASSERT(*hostname == '[');
473
0
  if(hlen < 4) /* '[::]' is the shortest possible valid string */
474
0
    return CURLUE_BAD_IPV6;
475
0
  hostname++;
476
0
  hlen -= 2;
477
478
  /* only valid IPv6 letters are ok */
479
0
  len = strspn(hostname, "0123456789abcdefABCDEF:.");
480
481
0
  if(hlen != len) {
482
0
    hlen = len;
483
0
    if(hostname[len] == '%') {
484
      /* this could now be '%[zone id]' */
485
0
      char zoneid[16];
486
0
      int i = 0;
487
0
      char *h = &hostname[len + 1];
488
      /* pass '25' if present and is a URL encoded percent sign */
489
0
      if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
490
0
        h += 2;
491
0
      while(*h && (*h != ']') && (i < 15))
492
0
        zoneid[i++] = *h++;
493
0
      if(!i || (']' != *h))
494
0
        return CURLUE_BAD_IPV6;
495
0
      zoneid[i] = 0;
496
0
      u->zoneid = curlx_strdup(zoneid);
497
0
      if(!u->zoneid)
498
0
        return CURLUE_OUT_OF_MEMORY;
499
0
      hostname[len] = ']'; /* insert end bracket */
500
0
      hostname[len + 1] = 0; /* terminate the hostname */
501
0
    }
502
0
    else
503
0
      return CURLUE_BAD_IPV6;
504
    /* hostname is fine */
505
0
  }
506
507
  /* Normalize the IPv6 address */
508
0
  {
509
0
    char dest[16]; /* fits a binary IPv6 address */
510
0
    hostname[hlen] = 0; /* end the address there */
511
0
    if(curlx_inet_pton(AF_INET6, hostname, dest) != 1)
512
0
      return CURLUE_BAD_IPV6;
513
0
    if(curlx_inet_ntop(AF_INET6, dest, hostname, hlen + 1)) {
514
0
      hlen = strlen(hostname); /* might be shorter now */
515
0
      hostname[hlen + 1] = 0;
516
0
    }
517
0
    hostname[hlen] = ']'; /* restore ending bracket */
518
0
  }
519
0
  return CURLUE_OK;
520
0
}
521
522
static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
523
                                size_t hlen) /* length of hostname */
524
0
{
525
0
  size_t len;
526
0
  DEBUGASSERT(hostname);
527
528
0
  if(!hlen)
529
0
    return CURLUE_NO_HOST;
530
0
  else if(hostname[0] == '[')
531
0
    return ipv6_parse(u, hostname, hlen);
532
0
  else {
533
    /* letters from the second string are not ok */
534
0
    len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
535
0
    if(hlen != len)
536
      /* hostname with bad content */
537
0
      return CURLUE_BAD_HOSTNAME;
538
0
  }
539
0
  return CURLUE_OK;
540
0
}
541
542
/*
543
 * Handle partial IPv4 numerical addresses and different bases, like
544
 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
545
 *
546
 * If the given input string is syntactically wrong IPv4 or any part for
547
 * example is too big, this function returns HOST_NAME.
548
 *
549
 * Output the "normalized" version of that input string in plain quad decimal
550
 * integers.
551
 *
552
 * Returns the host type.
553
 */
554
555
0
#define HOST_ERROR   -1 /* out of memory */
556
557
0
#define HOST_NAME    1
558
0
#define HOST_IPV4    2
559
0
#define HOST_IPV6    3
560
561
static int ipv4_normalize(struct dynbuf *host)
562
0
{
563
0
  bool done = FALSE;
564
0
  int n = 0;
565
0
  const char *c = curlx_dyn_ptr(host);
566
0
  unsigned int parts[4] = { 0, 0, 0, 0 };
567
0
  CURLcode result = CURLE_OK;
568
569
0
  if(*c == '[')
570
0
    return HOST_IPV6;
571
572
0
  while(!done) {
573
0
    int rc;
574
0
    curl_off_t l;
575
0
    if(*c == '0') {
576
0
      if(c[1] == 'x') {
577
0
        c += 2; /* skip the prefix */
578
0
        rc = curlx_str_hex(&c, &l, UINT_MAX);
579
0
      }
580
0
      else
581
0
        rc = curlx_str_octal(&c, &l, UINT_MAX);
582
0
    }
583
0
    else
584
0
      rc = curlx_str_number(&c, &l, UINT_MAX);
585
586
0
    if(rc)
587
0
      return HOST_NAME;
588
589
0
    parts[n] = (unsigned int)l;
590
591
0
    switch(*c) {
592
0
    case '.':
593
0
      if(n == 3)
594
0
        return HOST_NAME;
595
0
      n++;
596
0
      c++;
597
0
      break;
598
599
0
    case '\0':
600
0
      done = TRUE;
601
0
      break;
602
603
0
    default:
604
0
      return HOST_NAME;
605
0
    }
606
0
  }
607
608
0
  switch(n) {
609
0
  case 0: /* a -- 32 bits */
610
0
    curlx_dyn_reset(host);
611
612
0
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
613
0
                            (parts[0] >> 24),
614
0
                            ((parts[0] >> 16) & 0xff),
615
0
                            ((parts[0] >> 8) & 0xff),
616
0
                            (parts[0] & 0xff));
617
0
    break;
618
0
  case 1: /* a.b -- 8.24 bits */
619
0
    if((parts[0] > 0xff) || (parts[1] > 0xffffff))
620
0
      return HOST_NAME;
621
0
    curlx_dyn_reset(host);
622
0
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
623
0
                            (parts[0]),
624
0
                            ((parts[1] >> 16) & 0xff),
625
0
                            ((parts[1] >> 8) & 0xff),
626
0
                            (parts[1] & 0xff));
627
0
    break;
628
0
  case 2: /* a.b.c -- 8.8.16 bits */
629
0
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
630
0
      return HOST_NAME;
631
0
    curlx_dyn_reset(host);
632
0
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
633
0
                            (parts[0]),
634
0
                            (parts[1]),
635
0
                            ((parts[2] >> 8) & 0xff),
636
0
                            (parts[2] & 0xff));
637
0
    break;
638
0
  case 3: /* a.b.c.d -- 8.8.8.8 bits */
639
0
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
640
0
       (parts[3] > 0xff))
641
0
      return HOST_NAME;
642
0
    curlx_dyn_reset(host);
643
0
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
644
0
                            (parts[0]),
645
0
                            (parts[1]),
646
0
                            (parts[2]),
647
0
                            (parts[3]));
648
0
    break;
649
0
  }
650
0
  if(result)
651
0
    return HOST_ERROR;
652
0
  return HOST_IPV4;
653
0
}
654
655
/* if necessary, replace the host content with a URL decoded version */
656
static CURLUcode urldecode_host(struct dynbuf *host)
657
0
{
658
0
  char *per = NULL;
659
0
  const char *hostname = curlx_dyn_ptr(host);
660
0
  per = strchr(hostname, '%');
661
0
  if(!per)
662
    /* nothing to decode */
663
0
    return CURLUE_OK;
664
0
  else {
665
    /* encoded */
666
0
    size_t dlen;
667
0
    char *decoded;
668
0
    CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
669
0
                                     REJECT_CTRL);
670
0
    if(result)
671
0
      return CURLUE_BAD_HOSTNAME;
672
0
    curlx_dyn_reset(host);
673
0
    result = curlx_dyn_addn(host, decoded, dlen);
674
0
    curlx_free(decoded);
675
0
    if(result)
676
0
      return cc2cu(result);
677
0
  }
678
679
0
  return CURLUE_OK;
680
0
}
681
682
static CURLUcode parse_authority(struct Curl_URL *u,
683
                                 const char *auth, size_t authlen,
684
                                 unsigned int flags,
685
                                 struct dynbuf *host,
686
                                 bool has_scheme)
687
0
{
688
0
  size_t offset;
689
0
  CURLUcode uc;
690
0
  CURLcode result;
691
692
  /*
693
   * Parse the login details and strip them out of the hostname.
694
   */
695
0
  uc = parse_hostname_login(u, auth, authlen, flags, &offset);
696
0
  if(uc)
697
0
    goto out;
698
699
0
  result = curlx_dyn_addn(host, auth + offset, authlen - offset);
700
0
  if(result) {
701
0
    uc = cc2cu(result);
702
0
    goto out;
703
0
  }
704
705
0
  uc = Curl_parse_port(u, host, has_scheme);
706
0
  if(uc)
707
0
    goto out;
708
709
0
  if(!curlx_dyn_len(host))
710
0
    return CURLUE_NO_HOST;
711
712
0
  switch(ipv4_normalize(host)) {
713
0
  case HOST_IPV4:
714
0
    break;
715
0
  case HOST_IPV6:
716
0
    uc = ipv6_parse(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
717
0
    break;
718
0
  case HOST_NAME:
719
0
    uc = urldecode_host(host);
720
0
    if(!uc)
721
0
      uc = hostname_check(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
722
0
    break;
723
0
  case HOST_ERROR:
724
0
    uc = CURLUE_OUT_OF_MEMORY;
725
0
    break;
726
0
  default:
727
0
    uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
728
0
    break;
729
0
  }
730
731
0
out:
732
0
  return uc;
733
0
}
734
735
/* used for HTTP/2 server push */
736
CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
737
0
{
738
0
  CURLUcode result;
739
0
  struct dynbuf host;
740
741
0
  DEBUGASSERT(authority);
742
0
  curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
743
744
0
  result = parse_authority(u, authority, strlen(authority),
745
0
                           CURLU_DISALLOW_USER, &host, !!u->scheme);
746
0
  if(result)
747
0
    curlx_dyn_free(&host);
748
0
  else {
749
0
    curlx_free(u->host);
750
0
    u->host = curlx_dyn_ptr(&host);
751
0
  }
752
0
  return result;
753
0
}
754
755
/*
756
 * "Remove Dot Segments"
757
 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
758
 */
759
760
static bool is_dot(const char **str, size_t *clen)
761
0
{
762
0
  const char *p = *str;
763
0
  if(*p == '.') {
764
0
    (*str)++;
765
0
    (*clen)--;
766
0
    return TRUE;
767
0
  }
768
0
  else if((*clen >= 3) &&
769
0
          (p[0] == '%') && (p[1] == '2') && ((p[2] | 0x20) == 'e')) {
770
0
    *str += 3;
771
0
    *clen -= 3;
772
0
    return TRUE;
773
0
  }
774
0
  return FALSE;
775
0
}
776
777
0
#define ISSLASH(x) ((x) == '/')
778
779
/*
780
 * dedotdotify()
781
 * @unittest: 1395
782
 *
783
 * This function gets a null-terminated path with dot and dotdot sequences
784
 * passed in and strips them off according to the rules in RFC 3986 section
785
 * 5.2.4.
786
 *
787
 * The function handles a path. It should not contain the query nor fragment.
788
 *
789
 * RETURNS
790
 *
791
 * Zero for success and 'out' set to an allocated dedotdotified string.
792
 */
793
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
794
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
795
0
{
796
0
  struct dynbuf out;
797
0
  CURLcode result = CURLE_OK;
798
799
0
  *outp = NULL;
800
  /* the path always starts with a slash, and a slash has not dot */
801
0
  if(clen < 2)
802
0
    return 0;
803
804
0
  curlx_dyn_init(&out, clen + 1);
805
806
  /*  A. If the input buffer begins with a prefix of "../" or "./", then
807
      remove that prefix from the input buffer; otherwise, */
808
0
  if(is_dot(&input, &clen)) {
809
0
    const char *p = input;
810
0
    size_t blen = clen;
811
812
0
    if(!clen)
813
      /* . [end] */
814
0
      goto end;
815
0
    else if(ISSLASH(*p)) {
816
      /* one dot followed by a slash */
817
0
      input = p + 1;
818
0
      clen--;
819
0
    }
820
821
    /*  D. if the input buffer consists only of "." or "..", then remove
822
        that from the input buffer; otherwise, */
823
0
    else if(is_dot(&p, &blen)) {
824
0
      if(!blen)
825
        /* .. [end] */
826
0
        goto end;
827
0
      else if(ISSLASH(*p)) {
828
        /* ../ */
829
0
        input = p + 1;
830
0
        clen = blen - 1;
831
0
      }
832
0
    }
833
0
  }
834
835
0
  while(clen && !result) { /* until end of path content */
836
0
    if(ISSLASH(*input)) {
837
0
      const char *p = &input[1];
838
0
      size_t blen = clen - 1;
839
      /*  B. if the input buffer begins with a prefix of "/./" or "/.", where
840
          "."  is a complete path segment, then replace that prefix with "/" in
841
          the input buffer; otherwise, */
842
0
      if(is_dot(&p, &blen)) {
843
0
        if(!blen) { /* /. */
844
0
          result = curlx_dyn_addn(&out, "/", 1);
845
0
          break;
846
0
        }
847
0
        else if(ISSLASH(*p)) { /* /./ */
848
0
          input = p;
849
0
          clen = blen;
850
0
          continue;
851
0
        }
852
853
        /*  C. if the input buffer begins with a prefix of "/../" or "/..",
854
            where ".." is a complete path segment, then replace that prefix
855
            with "/" in the input buffer and remove the last segment and its
856
            preceding "/" (if any) from the output buffer; otherwise, */
857
0
        else if(is_dot(&p, &blen) && (ISSLASH(*p) || !blen)) {
858
          /* remove the last segment from the output buffer */
859
0
          size_t len = curlx_dyn_len(&out);
860
0
          if(len) {
861
0
            char *ptr = curlx_dyn_ptr(&out);
862
0
            char *last = memrchr(ptr, '/', len);
863
0
            if(last)
864
              /* trim the output at the slash */
865
0
              curlx_dyn_setlen(&out, last - ptr);
866
0
          }
867
868
0
          if(blen) { /* /../ */
869
0
            input = p;
870
0
            clen = blen;
871
0
            continue;
872
0
          }
873
0
          result = curlx_dyn_addn(&out, "/", 1);
874
0
          break;
875
0
        }
876
0
      }
877
0
    }
878
879
    /*  E. move the first path segment in the input buffer to the end of
880
        the output buffer, including the initial "/" character (if any) and
881
        any subsequent characters up to, but not including, the next "/"
882
        character or the end of the input buffer. */
883
884
0
    result = curlx_dyn_addn(&out, input, 1);
885
0
    input++;
886
0
    clen--;
887
0
  }
888
0
end:
889
0
  if(!result) {
890
0
    if(curlx_dyn_len(&out))
891
0
      *outp = curlx_dyn_ptr(&out);
892
0
    else {
893
0
      *outp = curlx_strdup("");
894
0
      if(!*outp)
895
0
        return 1;
896
0
    }
897
0
  }
898
0
  return result ? 1 : 0; /* success */
899
0
}
900
901
static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
902
0
{
903
0
  const char *path;
904
0
  size_t pathlen;
905
0
  char *query = NULL;
906
0
  char *fragment = NULL;
907
0
  char schemebuf[MAX_SCHEME_LEN + 1];
908
0
  size_t schemelen = 0;
909
0
  size_t urllen;
910
0
  CURLUcode result = CURLUE_OK;
911
0
  size_t fraglen = 0;
912
0
  struct dynbuf host;
913
914
0
  DEBUGASSERT(url);
915
916
0
  curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
917
918
0
  result = Curl_junkscan(url, &urllen, !!(flags & CURLU_ALLOW_SPACE));
919
0
  if(result)
920
0
    goto fail;
921
922
0
  schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
923
0
                                   flags & (CURLU_GUESS_SCHEME |
924
0
                                            CURLU_DEFAULT_SCHEME));
925
926
  /* handle the file: scheme */
927
0
  if(schemelen && !strcmp(schemebuf, "file")) {
928
0
    bool uncpath = FALSE;
929
0
    if(urllen <= 6) {
930
      /* file:/ is not enough to actually be a complete file: URL */
931
0
      result = CURLUE_BAD_FILE_URL;
932
0
      goto fail;
933
0
    }
934
935
    /* path has been allocated large enough to hold this */
936
0
    path = &url[5];
937
0
    pathlen = urllen - 5;
938
939
0
    u->scheme = curlx_strdup("file");
940
0
    if(!u->scheme) {
941
0
      result = CURLUE_OUT_OF_MEMORY;
942
0
      goto fail;
943
0
    }
944
945
    /* Extra handling URLs with an authority component (i.e. that start with
946
     * "file://")
947
     *
948
     * We allow omitted hostname (e.g. file:/<path>) -- valid according to
949
     * RFC 8089, but not the (current) WHAT-WG URL spec.
950
     */
951
0
    if(path[0] == '/' && path[1] == '/') {
952
      /* swallow the two slashes */
953
0
      const char *ptr = &path[2];
954
955
      /*
956
       * According to RFC 8089, a file: URL can be reliably dereferenced if:
957
       *
958
       *  o it has no/blank hostname, or
959
       *
960
       *  o the hostname matches "localhost" (case-insensitively), or
961
       *
962
       *  o the hostname is a FQDN that resolves to this machine, or
963
       *
964
       *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
965
       *    Appendix E.3).
966
       *
967
       * For brevity, we only consider URLs with empty, "localhost", or
968
       * "127.0.0.1" hostnames as local, otherwise as an UNC String.
969
       *
970
       * Additionally, there is an exception for URLs with a Windows drive
971
       * letter in the authority (which was accidentally omitted from RFC 8089
972
       * Appendix E, but believe me, it was meant to be there. --MK)
973
       */
974
0
      if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
975
        /* the URL includes a hostname, it must match "localhost" or
976
           "127.0.0.1" to be valid */
977
0
        if(checkprefix("localhost/", ptr) ||
978
0
           checkprefix("127.0.0.1/", ptr)) {
979
0
          ptr += 9; /* now points to the slash after the host */
980
0
        }
981
0
        else {
982
#ifdef _WIN32
983
          size_t len;
984
985
          /* the hostname, NetBIOS computer name, can not contain disallowed
986
             chars, and the delimiting slash character must be appended to the
987
             hostname */
988
          path = strpbrk(ptr, "/\\:*?\"<>|");
989
          if(!path || *path != '/') {
990
            result = CURLUE_BAD_FILE_URL;
991
            goto fail;
992
          }
993
994
          len = path - ptr;
995
          if(len) {
996
            CURLcode code = curlx_dyn_addn(&host, ptr, len);
997
            if(code) {
998
              result = cc2cu(code);
999
              goto fail;
1000
            }
1001
            uncpath = TRUE;
1002
          }
1003
1004
          ptr -= 2; /* now points to the // before the host in UNC */
1005
#else
1006
          /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1007
             none */
1008
0
          result = CURLUE_BAD_FILE_URL;
1009
0
          goto fail;
1010
0
#endif
1011
0
        }
1012
0
      }
1013
1014
0
      path = ptr;
1015
0
      pathlen = urllen - (ptr - url);
1016
0
    }
1017
1018
0
    if(!uncpath)
1019
      /* no host for file: URLs by default */
1020
0
      curlx_dyn_reset(&host);
1021
1022
0
#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1023
    /* Do not allow Windows drive letters when not in Windows.
1024
     * This catches both "file:/c:" and "file:c:" */
1025
0
    if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1026
0
       STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1027
      /* File drive letters are only accepted in MS-DOS/Windows */
1028
0
      result = CURLUE_BAD_FILE_URL;
1029
0
      goto fail;
1030
0
    }
1031
#else
1032
    /* If the path starts with a slash and a drive letter, ditch the slash */
1033
    if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1034
      /* This cannot be done with strcpy, as the memory chunks overlap! */
1035
      path++;
1036
      pathlen--;
1037
    }
1038
#endif
1039
0
  }
1040
0
  else {
1041
    /* clear path */
1042
0
    const char *schemep = NULL;
1043
0
    const char *hostp;
1044
0
    size_t hostlen;
1045
1046
0
    if(schemelen) {
1047
0
      int i = 0;
1048
0
      const char *p = &url[schemelen + 1];
1049
0
      while((*p == '/') && (i < 4)) {
1050
0
        p++;
1051
0
        i++;
1052
0
      }
1053
1054
0
      schemep = schemebuf;
1055
0
      if(!Curl_get_scheme_handler(schemep) &&
1056
0
         !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1057
0
        result = CURLUE_UNSUPPORTED_SCHEME;
1058
0
        goto fail;
1059
0
      }
1060
1061
0
      if((i < 1) || (i > 3)) {
1062
        /* less than one or more than three slashes */
1063
0
        result = CURLUE_BAD_SLASHES;
1064
0
        goto fail;
1065
0
      }
1066
0
      hostp = p; /* hostname starts here */
1067
0
    }
1068
0
    else {
1069
      /* no scheme! */
1070
1071
0
      if(!(flags & (CURLU_DEFAULT_SCHEME | CURLU_GUESS_SCHEME))) {
1072
0
        result = CURLUE_BAD_SCHEME;
1073
0
        goto fail;
1074
0
      }
1075
0
      if(flags & CURLU_DEFAULT_SCHEME)
1076
0
        schemep = DEFAULT_SCHEME;
1077
1078
      /*
1079
       * The URL was badly formatted, let's try without scheme specified.
1080
       */
1081
0
      hostp = url;
1082
0
    }
1083
1084
0
    if(schemep) {
1085
0
      u->scheme = curlx_strdup(schemep);
1086
0
      if(!u->scheme) {
1087
0
        result = CURLUE_OUT_OF_MEMORY;
1088
0
        goto fail;
1089
0
      }
1090
0
    }
1091
1092
    /* find the end of the hostname + port number */
1093
0
    hostlen = strcspn(hostp, "/?#");
1094
0
    path = &hostp[hostlen];
1095
1096
    /* this pathlen also contains the query and the fragment */
1097
0
    pathlen = urllen - (path - url);
1098
0
    if(hostlen) {
1099
1100
0
      result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1101
0
      if(result)
1102
0
        goto fail;
1103
1104
0
      if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1105
0
        const char *hostname = curlx_dyn_ptr(&host);
1106
        /* legacy curl-style guess based on hostname */
1107
0
        if(checkprefix("ftp.", hostname))
1108
0
          schemep = "ftp";
1109
0
        else if(checkprefix("dict.", hostname))
1110
0
          schemep = "dict";
1111
0
        else if(checkprefix("ldap.", hostname))
1112
0
          schemep = "ldap";
1113
0
        else if(checkprefix("imap.", hostname))
1114
0
          schemep = "imap";
1115
0
        else if(checkprefix("smtp.", hostname))
1116
0
          schemep = "smtp";
1117
0
        else if(checkprefix("pop3.", hostname))
1118
0
          schemep = "pop3";
1119
0
        else
1120
0
          schemep = "http";
1121
1122
0
        u->scheme = curlx_strdup(schemep);
1123
0
        if(!u->scheme) {
1124
0
          result = CURLUE_OUT_OF_MEMORY;
1125
0
          goto fail;
1126
0
        }
1127
0
        u->guessed_scheme = TRUE;
1128
0
      }
1129
0
    }
1130
0
    else if(flags & CURLU_NO_AUTHORITY) {
1131
      /* allowed to be empty. */
1132
0
      if(curlx_dyn_add(&host, "")) {
1133
0
        result = CURLUE_OUT_OF_MEMORY;
1134
0
        goto fail;
1135
0
      }
1136
0
    }
1137
0
    else {
1138
0
      result = CURLUE_NO_HOST;
1139
0
      goto fail;
1140
0
    }
1141
0
  }
1142
1143
0
  fragment = strchr(path, '#');
1144
0
  if(fragment) {
1145
0
    fraglen = pathlen - (fragment - path);
1146
0
    u->fragment_present = TRUE;
1147
0
    if(fraglen > 1) {
1148
      /* skip the leading '#' in the copy but include the terminating null */
1149
0
      if(flags & CURLU_URLENCODE) {
1150
0
        struct dynbuf enc;
1151
0
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1152
0
        result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1153
0
        if(result)
1154
0
          goto fail;
1155
0
        u->fragment = curlx_dyn_ptr(&enc);
1156
0
      }
1157
0
      else {
1158
0
        u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1159
0
        if(!u->fragment) {
1160
0
          result = CURLUE_OUT_OF_MEMORY;
1161
0
          goto fail;
1162
0
        }
1163
0
      }
1164
0
    }
1165
    /* after this, pathlen still contains the query */
1166
0
    pathlen -= fraglen;
1167
0
  }
1168
1169
0
  query = memchr(path, '?', pathlen);
1170
0
  if(query) {
1171
0
    size_t qlen = fragment ? (size_t)(fragment - query) :
1172
0
      pathlen - (query - path);
1173
0
    pathlen -= qlen;
1174
0
    u->query_present = TRUE;
1175
0
    if(qlen > 1) {
1176
0
      if(flags & CURLU_URLENCODE) {
1177
0
        struct dynbuf enc;
1178
0
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1179
        /* skip the leading question mark */
1180
0
        result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1181
0
        if(result)
1182
0
          goto fail;
1183
0
        u->query = curlx_dyn_ptr(&enc);
1184
0
      }
1185
0
      else {
1186
0
        u->query = Curl_memdup0(query + 1, qlen - 1);
1187
0
        if(!u->query) {
1188
0
          result = CURLUE_OUT_OF_MEMORY;
1189
0
          goto fail;
1190
0
        }
1191
0
      }
1192
0
    }
1193
0
    else {
1194
      /* single byte query */
1195
0
      u->query = curlx_strdup("");
1196
0
      if(!u->query) {
1197
0
        result = CURLUE_OUT_OF_MEMORY;
1198
0
        goto fail;
1199
0
      }
1200
0
    }
1201
0
  }
1202
1203
0
  if(pathlen && (flags & CURLU_URLENCODE)) {
1204
0
    struct dynbuf enc;
1205
0
    curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1206
0
    result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1207
0
    if(result)
1208
0
      goto fail;
1209
0
    pathlen = curlx_dyn_len(&enc);
1210
0
    path = u->path = curlx_dyn_ptr(&enc);
1211
0
  }
1212
1213
0
  if(pathlen <= 1) {
1214
    /* there is no path left or just the slash, unset */
1215
0
    path = NULL;
1216
0
  }
1217
0
  else {
1218
0
    if(!u->path) {
1219
0
      u->path = Curl_memdup0(path, pathlen);
1220
0
      if(!u->path) {
1221
0
        result = CURLUE_OUT_OF_MEMORY;
1222
0
        goto fail;
1223
0
      }
1224
0
      path = u->path;
1225
0
    }
1226
0
    else if(flags & CURLU_URLENCODE)
1227
      /* it might have encoded more than just the path so cut it */
1228
0
      u->path[pathlen] = 0;
1229
1230
0
    if(!(flags & CURLU_PATH_AS_IS)) {
1231
      /* remove ../ and ./ sequences according to RFC3986 */
1232
0
      char *dedot;
1233
0
      int err = dedotdotify(path, pathlen, &dedot);
1234
0
      if(err) {
1235
0
        result = CURLUE_OUT_OF_MEMORY;
1236
0
        goto fail;
1237
0
      }
1238
0
      if(dedot) {
1239
0
        curlx_free(u->path);
1240
0
        u->path = dedot;
1241
0
      }
1242
0
    }
1243
0
  }
1244
1245
0
  u->host = curlx_dyn_ptr(&host);
1246
1247
0
  return result;
1248
0
fail:
1249
0
  curlx_dyn_free(&host);
1250
0
  free_urlhandle(u);
1251
0
  return result;
1252
0
}
1253
1254
/*
1255
 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1256
 */
1257
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1258
                                      unsigned int flags)
1259
0
{
1260
0
  CURLUcode result;
1261
0
  CURLU tmpurl;
1262
0
  memset(&tmpurl, 0, sizeof(tmpurl));
1263
0
  result = parseurl(url, &tmpurl, flags);
1264
0
  if(!result) {
1265
0
    free_urlhandle(u);
1266
0
    *u = tmpurl;
1267
0
  }
1268
0
  return result;
1269
0
}
1270
1271
/*
1272
 */
1273
CURLU *curl_url(void)
1274
0
{
1275
0
  return curlx_calloc(1, sizeof(struct Curl_URL));
1276
0
}
1277
1278
void curl_url_cleanup(CURLU *u)
1279
0
{
1280
0
  if(u) {
1281
0
    free_urlhandle(u);
1282
0
    curlx_free(u);
1283
0
  }
1284
0
}
1285
1286
#define DUP(dest, src, name)                    \
1287
0
  do {                                          \
1288
0
    if(src->name) {                             \
1289
0
      dest->name = curlx_strdup(src->name);     \
1290
0
      if(!dest->name)                           \
1291
0
        goto fail;                              \
1292
0
    }                                           \
1293
0
  } while(0)
1294
1295
CURLU *curl_url_dup(const CURLU *in)
1296
0
{
1297
0
  struct Curl_URL *u = curlx_calloc(1, sizeof(struct Curl_URL));
1298
0
  if(u) {
1299
0
    DUP(u, in, scheme);
1300
0
    DUP(u, in, user);
1301
0
    DUP(u, in, password);
1302
0
    DUP(u, in, options);
1303
0
    DUP(u, in, host);
1304
0
    DUP(u, in, port);
1305
0
    DUP(u, in, path);
1306
0
    DUP(u, in, query);
1307
0
    DUP(u, in, fragment);
1308
0
    DUP(u, in, zoneid);
1309
0
    u->portnum = in->portnum;
1310
0
    u->fragment_present = in->fragment_present;
1311
0
    u->query_present = in->query_present;
1312
0
  }
1313
0
  return u;
1314
0
fail:
1315
0
  curl_url_cleanup(u);
1316
0
  return NULL;
1317
0
}
1318
1319
#ifndef USE_IDN
1320
0
#define host_decode(x, y) CURLUE_LACKS_IDN
1321
0
#define host_encode(x, y) CURLUE_LACKS_IDN
1322
#else
1323
static CURLUcode host_decode(const char *host, char **allochost)
1324
{
1325
  CURLcode result = Curl_idn_decode(host, allochost);
1326
  if(result)
1327
    return (result == CURLE_OUT_OF_MEMORY) ?
1328
      CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1329
  return CURLUE_OK;
1330
}
1331
1332
static CURLUcode host_encode(const char *host, char **allochost)
1333
{
1334
  CURLcode result = Curl_idn_encode(host, allochost);
1335
  if(result)
1336
    return (result == CURLE_OUT_OF_MEMORY) ?
1337
      CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1338
  return CURLUE_OK;
1339
}
1340
#endif
1341
1342
static CURLUcode urlget_format(const CURLU *u, CURLUPart what,
1343
                               const char *ptr, char **partp,
1344
                               bool plusdecode, unsigned int flags)
1345
0
{
1346
0
  CURLUcode uc = CURLUE_OK;
1347
0
  size_t partlen = strlen(ptr);
1348
0
  bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0;
1349
0
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1350
0
  bool punycode = (flags & CURLU_PUNYCODE) && (what == CURLUPART_HOST);
1351
0
  bool depunyfy = (flags & CURLU_PUNY2IDN) && (what == CURLUPART_HOST);
1352
0
  char *part = Curl_memdup0(ptr, partlen);
1353
0
  *partp = NULL;
1354
0
  if(!part)
1355
0
    return CURLUE_OUT_OF_MEMORY;
1356
0
  if(plusdecode) {
1357
    /* convert + to space */
1358
0
    char *plus = part;
1359
0
    size_t i = 0;
1360
0
    for(i = 0; i < partlen; ++plus, i++) {
1361
0
      if(*plus == '+')
1362
0
        *plus = ' ';
1363
0
    }
1364
0
  }
1365
0
  if(urldecode) {
1366
0
    char *decoded;
1367
0
    size_t dlen;
1368
    /* this unconditional rejection of control bytes is documented
1369
       API behavior */
1370
0
    CURLcode res = Curl_urldecode(part, partlen, &decoded, &dlen, REJECT_CTRL);
1371
0
    curlx_free(part);
1372
0
    if(res)
1373
0
      return CURLUE_URLDECODE;
1374
0
    part = decoded;
1375
0
    partlen = dlen;
1376
0
  }
1377
0
  if(urlencode) {
1378
0
    struct dynbuf enc;
1379
0
    curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1380
0
    uc = urlencode_str(&enc, part, partlen, TRUE, what == CURLUPART_QUERY);
1381
0
    curlx_free(part);
1382
0
    if(uc)
1383
0
      return uc;
1384
0
    part = curlx_dyn_ptr(&enc);
1385
0
  }
1386
0
  else if(punycode) {
1387
0
    if(!Curl_is_ASCII_name(u->host)) {
1388
0
      char *punyversion = NULL;
1389
0
      uc = host_decode(part, &punyversion);
1390
0
      curlx_free(part);
1391
0
      if(uc)
1392
0
        return uc;
1393
0
      part = punyversion;
1394
0
    }
1395
0
  }
1396
0
  else if(depunyfy) {
1397
0
    if(Curl_is_ASCII_name(u->host)) {
1398
0
      char *unpunified = NULL;
1399
0
      uc = host_encode(part, &unpunified);
1400
0
      curlx_free(part);
1401
0
      if(uc)
1402
0
        return uc;
1403
0
      part = unpunified;
1404
0
    }
1405
0
  }
1406
0
  *partp = part;
1407
0
  return CURLUE_OK;
1408
0
}
1409
1410
static CURLUcode urlget_url(const CURLU *u, char **part, unsigned int flags)
1411
0
{
1412
0
  char *url;
1413
0
  const char *scheme;
1414
0
  char *options = u->options;
1415
0
  char *port = u->port;
1416
0
  char *allochost = NULL;
1417
0
  bool show_fragment =
1418
0
    u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
1419
0
  bool show_query = (u->query && u->query[0]) ||
1420
0
    (u->query_present && flags & CURLU_GET_EMPTY);
1421
0
  bool punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
1422
0
  bool depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
1423
0
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1424
0
  char portbuf[7];
1425
0
  if(u->scheme && curl_strequal("file", u->scheme)) {
1426
0
    url = curl_maprintf("file://%s%s%s%s%s",
1427
0
                        u->path,
1428
0
                        show_query ? "?": "",
1429
0
                        u->query ? u->query : "",
1430
0
                        show_fragment ? "#": "",
1431
0
                        u->fragment ? u->fragment : "");
1432
0
  }
1433
0
  else if(!u->host)
1434
0
    return CURLUE_NO_HOST;
1435
0
  else {
1436
0
    const struct Curl_handler *h = NULL;
1437
0
    char schemebuf[MAX_SCHEME_LEN + 5];
1438
0
    if(u->scheme)
1439
0
      scheme = u->scheme;
1440
0
    else if(flags & CURLU_DEFAULT_SCHEME)
1441
0
      scheme = DEFAULT_SCHEME;
1442
0
    else
1443
0
      return CURLUE_NO_SCHEME;
1444
1445
0
    h = Curl_get_scheme_handler(scheme);
1446
0
    if(!port && (flags & CURLU_DEFAULT_PORT)) {
1447
      /* there is no stored port number, but asked to deliver
1448
         a default one for the scheme */
1449
0
      if(h) {
1450
0
        curl_msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1451
0
        port = portbuf;
1452
0
      }
1453
0
    }
1454
0
    else if(port) {
1455
      /* there is a stored port number, but asked to inhibit if it matches
1456
         the default one for the scheme */
1457
0
      if(h && (h->defport == u->portnum) &&
1458
0
         (flags & CURLU_NO_DEFAULT_PORT))
1459
0
        port = NULL;
1460
0
    }
1461
1462
0
    if(h && !(h->flags & PROTOPT_URLOPTIONS))
1463
0
      options = NULL;
1464
1465
0
    if(u->host[0] == '[') {
1466
0
      if(u->zoneid) {
1467
        /* make it '[ host %25 zoneid ]' */
1468
0
        struct dynbuf enc;
1469
0
        size_t hostlen = strlen(u->host);
1470
0
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1471
0
        if(curlx_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1472
0
                          u->zoneid))
1473
0
          return CURLUE_OUT_OF_MEMORY;
1474
0
        allochost = curlx_dyn_ptr(&enc);
1475
0
      }
1476
0
    }
1477
0
    else if(urlencode) {
1478
0
      allochost = curl_easy_escape(NULL, u->host, 0);
1479
0
      if(!allochost)
1480
0
        return CURLUE_OUT_OF_MEMORY;
1481
0
    }
1482
0
    else if(punycode) {
1483
0
      if(!Curl_is_ASCII_name(u->host)) {
1484
0
        CURLUcode ret = host_decode(u->host, &allochost);
1485
0
        if(ret)
1486
0
          return ret;
1487
0
      }
1488
0
    }
1489
0
    else if(depunyfy) {
1490
0
      if(Curl_is_ASCII_name(u->host)) {
1491
0
        CURLUcode ret = host_encode(u->host, &allochost);
1492
0
        if(ret)
1493
0
          return ret;
1494
0
      }
1495
0
    }
1496
1497
0
    if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme)
1498
0
      curl_msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
1499
0
    else
1500
0
      schemebuf[0] = 0;
1501
1502
0
    url = curl_maprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1503
0
                        schemebuf,
1504
0
                        u->user ? u->user : "",
1505
0
                        u->password ? ":": "",
1506
0
                        u->password ? u->password : "",
1507
0
                        options ? ";" : "",
1508
0
                        options ? options : "",
1509
0
                        (u->user || u->password || options) ? "@": "",
1510
0
                        allochost ? allochost : u->host,
1511
0
                        port ? ":": "",
1512
0
                        port ? port : "",
1513
0
                        u->path ? u->path : "/",
1514
0
                        show_query ? "?": "",
1515
0
                        u->query ? u->query : "",
1516
0
                        show_fragment ? "#": "",
1517
0
                        u->fragment ? u->fragment : "");
1518
0
    curlx_free(allochost);
1519
0
  }
1520
0
  if(!url)
1521
0
    return CURLUE_OUT_OF_MEMORY;
1522
0
  *part = url;
1523
0
  return CURLUE_OK;
1524
0
}
1525
1526
CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1527
                       char **part, unsigned int flags)
1528
0
{
1529
0
  const char *ptr;
1530
0
  CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1531
0
  char portbuf[7];
1532
0
  bool plusdecode = FALSE;
1533
0
  if(!u)
1534
0
    return CURLUE_BAD_HANDLE;
1535
0
  if(!part)
1536
0
    return CURLUE_BAD_PARTPOINTER;
1537
0
  *part = NULL;
1538
1539
0
  switch(what) {
1540
0
  case CURLUPART_SCHEME:
1541
0
    ptr = u->scheme;
1542
0
    ifmissing = CURLUE_NO_SCHEME;
1543
0
    flags &= ~CURLU_URLDECODE; /* never for schemes */
1544
0
    if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme)
1545
0
      return CURLUE_NO_SCHEME;
1546
0
    break;
1547
0
  case CURLUPART_USER:
1548
0
    ptr = u->user;
1549
0
    ifmissing = CURLUE_NO_USER;
1550
0
    break;
1551
0
  case CURLUPART_PASSWORD:
1552
0
    ptr = u->password;
1553
0
    ifmissing = CURLUE_NO_PASSWORD;
1554
0
    break;
1555
0
  case CURLUPART_OPTIONS:
1556
0
    ptr = u->options;
1557
0
    ifmissing = CURLUE_NO_OPTIONS;
1558
0
    break;
1559
0
  case CURLUPART_HOST:
1560
0
    ptr = u->host;
1561
0
    ifmissing = CURLUE_NO_HOST;
1562
0
    break;
1563
0
  case CURLUPART_ZONEID:
1564
0
    ptr = u->zoneid;
1565
0
    ifmissing = CURLUE_NO_ZONEID;
1566
0
    break;
1567
0
  case CURLUPART_PORT:
1568
0
    ptr = u->port;
1569
0
    ifmissing = CURLUE_NO_PORT;
1570
0
    flags &= ~CURLU_URLDECODE; /* never for port */
1571
0
    if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1572
      /* there is no stored port number, but asked to deliver
1573
         a default one for the scheme */
1574
0
      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1575
0
      if(h) {
1576
0
        curl_msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1577
0
        ptr = portbuf;
1578
0
      }
1579
0
    }
1580
0
    else if(ptr && u->scheme) {
1581
      /* there is a stored port number, but ask to inhibit if
1582
         it matches the default one for the scheme */
1583
0
      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1584
0
      if(h && (h->defport == u->portnum) &&
1585
0
         (flags & CURLU_NO_DEFAULT_PORT))
1586
0
        ptr = NULL;
1587
0
    }
1588
0
    break;
1589
0
  case CURLUPART_PATH:
1590
0
    ptr = u->path;
1591
0
    if(!ptr)
1592
0
      ptr = "/";
1593
0
    break;
1594
0
  case CURLUPART_QUERY:
1595
0
    ptr = u->query;
1596
0
    ifmissing = CURLUE_NO_QUERY;
1597
0
    plusdecode = flags & CURLU_URLDECODE;
1598
0
    if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
1599
      /* there was a blank query and the user do not ask for it */
1600
0
      ptr = NULL;
1601
0
    break;
1602
0
  case CURLUPART_FRAGMENT:
1603
0
    ptr = u->fragment;
1604
0
    ifmissing = CURLUE_NO_FRAGMENT;
1605
0
    if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
1606
      /* there was a blank fragment and the user asks for it */
1607
0
      ptr = "";
1608
0
    break;
1609
0
  case CURLUPART_URL:
1610
0
    return urlget_url(u, part, flags);
1611
0
  default:
1612
0
    ptr = NULL;
1613
0
    break;
1614
0
  }
1615
0
  if(ptr)
1616
0
    return urlget_format(u, what, ptr, part, plusdecode, flags);
1617
1618
0
  return ifmissing;
1619
0
}
1620
1621
static CURLUcode set_url_scheme(CURLU *u, const char *scheme,
1622
                                unsigned int flags)
1623
0
{
1624
0
  size_t plen = strlen(scheme);
1625
0
  const struct Curl_handler *h = NULL;
1626
0
  if((plen > MAX_SCHEME_LEN) || (plen < 1))
1627
    /* too long or too short */
1628
0
    return CURLUE_BAD_SCHEME;
1629
  /* verify that it is a fine scheme */
1630
0
  h = Curl_get_scheme_handler(scheme);
1631
0
  if(!h) {
1632
0
    const char *s = scheme;
1633
0
    if(!(flags & CURLU_NON_SUPPORT_SCHEME))
1634
0
      return CURLUE_UNSUPPORTED_SCHEME;
1635
0
    if(ISALPHA(*s)) {
1636
      /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1637
0
      while(--plen) {
1638
0
        if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1639
0
          s++; /* fine */
1640
0
        else
1641
0
          return CURLUE_BAD_SCHEME;
1642
0
      }
1643
0
    }
1644
0
    else
1645
0
      return CURLUE_BAD_SCHEME;
1646
0
  }
1647
0
  u->guessed_scheme = FALSE;
1648
0
  return CURLUE_OK;
1649
0
}
1650
1651
static CURLUcode set_url_port(CURLU *u, const char *provided_port)
1652
0
{
1653
0
  char *tmp;
1654
0
  curl_off_t port;
1655
0
  if(!ISDIGIT(provided_port[0]))
1656
    /* not a number */
1657
0
    return CURLUE_BAD_PORT_NUMBER;
1658
0
  if(curlx_str_number(&provided_port, &port, 0xffff) || *provided_port)
1659
    /* weirdly provided number, not good! */
1660
0
    return CURLUE_BAD_PORT_NUMBER;
1661
0
  tmp = curl_maprintf("%" CURL_FORMAT_CURL_OFF_T, port);
1662
0
  if(!tmp)
1663
0
    return CURLUE_OUT_OF_MEMORY;
1664
0
  curlx_free(u->port);
1665
0
  u->port = tmp;
1666
0
  u->portnum = (unsigned short)port;
1667
0
  return CURLUE_OK;
1668
0
}
1669
1670
static CURLUcode set_url(CURLU *u, const char *url, size_t part_size,
1671
                         unsigned int flags)
1672
0
{
1673
  /*
1674
   * Allow a new URL to replace the existing (if any) contents.
1675
   *
1676
   * If the existing contents is enough for a URL, allow a relative URL to
1677
   * replace it.
1678
   */
1679
0
  CURLUcode uc;
1680
0
  char *oldurl = NULL;
1681
1682
0
  if(!part_size) {
1683
    /* a blank URL is not a valid URL unless we already have a complete one
1684
       and this is a redirect */
1685
0
    uc = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1686
0
    if(!uc) {
1687
      /* success, meaning the "" is a fine relative URL, but nothing
1688
         changes */
1689
0
      curlx_free(oldurl);
1690
0
      return CURLUE_OK;
1691
0
    }
1692
0
    if(uc == CURLUE_OUT_OF_MEMORY)
1693
0
      return uc;
1694
0
    return CURLUE_MALFORMED_INPUT;
1695
0
  }
1696
1697
  /* if the new URL is absolute replace the existing with the new. */
1698
0
  if(Curl_is_absolute_url(url, NULL, 0,
1699
0
                          flags & (CURLU_GUESS_SCHEME | CURLU_DEFAULT_SCHEME)))
1700
0
    return parseurl_and_replace(url, u, flags);
1701
1702
  /* if the old URL is incomplete (we cannot get an absolute URL in
1703
     'oldurl'), replace the existing with the new */
1704
0
  uc = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1705
0
  if(uc == CURLUE_OUT_OF_MEMORY)
1706
0
    return uc;
1707
0
  else if(uc)
1708
0
    return parseurl_and_replace(url, u, flags);
1709
1710
0
  DEBUGASSERT(oldurl); /* it is set here */
1711
  /* apply the relative part to create a new URL */
1712
0
  uc = redirect_url(oldurl, url, u, flags);
1713
0
  curlx_free(oldurl);
1714
0
  return uc;
1715
0
}
1716
1717
static CURLUcode urlset_clear(CURLU *u, CURLUPart what)
1718
0
{
1719
0
  switch(what) {
1720
0
  case CURLUPART_URL:
1721
0
    free_urlhandle(u);
1722
0
    memset(u, 0, sizeof(struct Curl_URL));
1723
0
    break;
1724
0
  case CURLUPART_SCHEME:
1725
0
    Curl_safefree(u->scheme);
1726
0
    u->guessed_scheme = FALSE;
1727
0
    break;
1728
0
  case CURLUPART_USER:
1729
0
    Curl_safefree(u->user);
1730
0
    break;
1731
0
  case CURLUPART_PASSWORD:
1732
0
    Curl_safefree(u->password);
1733
0
    break;
1734
0
  case CURLUPART_OPTIONS:
1735
0
    Curl_safefree(u->options);
1736
0
    break;
1737
0
  case CURLUPART_HOST:
1738
0
    Curl_safefree(u->host);
1739
0
    break;
1740
0
  case CURLUPART_ZONEID:
1741
0
    Curl_safefree(u->zoneid);
1742
0
    break;
1743
0
  case CURLUPART_PORT:
1744
0
    u->portnum = 0;
1745
0
    Curl_safefree(u->port);
1746
0
    break;
1747
0
  case CURLUPART_PATH:
1748
0
    Curl_safefree(u->path);
1749
0
    break;
1750
0
  case CURLUPART_QUERY:
1751
0
    Curl_safefree(u->query);
1752
0
    u->query_present = FALSE;
1753
0
    break;
1754
0
  case CURLUPART_FRAGMENT:
1755
0
    Curl_safefree(u->fragment);
1756
0
    u->fragment_present = FALSE;
1757
0
    break;
1758
0
  default:
1759
0
    return CURLUE_UNKNOWN_PART;
1760
0
  }
1761
0
  return CURLUE_OK;
1762
0
}
1763
1764
static bool allowed_in_path(unsigned char x)
1765
0
{
1766
0
  switch(x) {
1767
0
  case '!':
1768
0
  case '$':
1769
0
  case '&':
1770
0
  case '\'':
1771
0
  case '(':
1772
0
  case ')':
1773
0
  case '{':
1774
0
  case '}':
1775
0
  case '[':
1776
0
  case ']':
1777
0
  case '*':
1778
0
  case '+':
1779
0
  case ',':
1780
0
  case ';':
1781
0
  case '=':
1782
0
  case ':':
1783
0
  case '@':
1784
0
  case '/':
1785
0
    return TRUE;
1786
0
  }
1787
0
  return FALSE;
1788
0
}
1789
1790
CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1791
                       const char *part, unsigned int flags)
1792
0
{
1793
0
  char **storep = NULL;
1794
0
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1795
0
  bool plusencode = FALSE;
1796
0
  bool pathmode = FALSE;
1797
0
  bool leadingslash = FALSE;
1798
0
  bool appendquery = FALSE;
1799
0
  bool equalsencode = FALSE;
1800
0
  size_t nalloc;
1801
1802
0
  if(!u)
1803
0
    return CURLUE_BAD_HANDLE;
1804
0
  if(!part)
1805
    /* setting a part to NULL clears it */
1806
0
    return urlset_clear(u, what);
1807
1808
0
  nalloc = strlen(part);
1809
0
  if(nalloc > CURL_MAX_INPUT_LENGTH)
1810
    /* excessive input length */
1811
0
    return CURLUE_MALFORMED_INPUT;
1812
1813
0
  switch(what) {
1814
0
  case CURLUPART_SCHEME: {
1815
0
    CURLUcode status = set_url_scheme(u, part, flags);
1816
0
    if(status)
1817
0
      return status;
1818
0
    storep = &u->scheme;
1819
0
    urlencode = FALSE; /* never */
1820
0
    break;
1821
0
  }
1822
0
  case CURLUPART_USER:
1823
0
    storep = &u->user;
1824
0
    break;
1825
0
  case CURLUPART_PASSWORD:
1826
0
    storep = &u->password;
1827
0
    break;
1828
0
  case CURLUPART_OPTIONS:
1829
0
    storep = &u->options;
1830
0
    break;
1831
0
  case CURLUPART_HOST:
1832
0
    storep = &u->host;
1833
0
    Curl_safefree(u->zoneid);
1834
0
    break;
1835
0
  case CURLUPART_ZONEID:
1836
0
    storep = &u->zoneid;
1837
0
    break;
1838
0
  case CURLUPART_PORT:
1839
0
    return set_url_port(u, part);
1840
0
  case CURLUPART_PATH:
1841
0
    pathmode = TRUE;
1842
0
    leadingslash = TRUE; /* enforce */
1843
0
    storep = &u->path;
1844
0
    break;
1845
0
  case CURLUPART_QUERY:
1846
0
    plusencode = urlencode;
1847
0
    appendquery = (flags & CURLU_APPENDQUERY) ? 1 : 0;
1848
0
    equalsencode = appendquery;
1849
0
    storep = &u->query;
1850
0
    u->query_present = TRUE;
1851
0
    break;
1852
0
  case CURLUPART_FRAGMENT:
1853
0
    storep = &u->fragment;
1854
0
    u->fragment_present = TRUE;
1855
0
    break;
1856
0
  case CURLUPART_URL:
1857
0
    return set_url(u, part, nalloc, flags);
1858
0
  default:
1859
0
    return CURLUE_UNKNOWN_PART;
1860
0
  }
1861
0
  DEBUGASSERT(storep);
1862
0
  {
1863
0
    const char *newp;
1864
0
    struct dynbuf enc;
1865
0
    curlx_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1866
1867
0
    if(leadingslash && (part[0] != '/')) {
1868
0
      CURLcode result = curlx_dyn_addn(&enc, "/", 1);
1869
0
      if(result)
1870
0
        return cc2cu(result);
1871
0
    }
1872
0
    if(urlencode) {
1873
0
      const unsigned char *i;
1874
1875
0
      for(i = (const unsigned char *)part; *i; i++) {
1876
0
        CURLcode result;
1877
0
        if((*i == ' ') && plusencode) {
1878
0
          result = curlx_dyn_addn(&enc, "+", 1);
1879
0
          if(result)
1880
0
            return CURLUE_OUT_OF_MEMORY;
1881
0
        }
1882
0
        else if(ISUNRESERVED(*i) ||
1883
0
                (pathmode && allowed_in_path(*i)) ||
1884
0
                ((*i == '=') && equalsencode)) {
1885
0
          if((*i == '=') && equalsencode)
1886
            /* only skip the first equals sign */
1887
0
            equalsencode = FALSE;
1888
0
          result = curlx_dyn_addn(&enc, i, 1);
1889
0
          if(result)
1890
0
            return cc2cu(result);
1891
0
        }
1892
0
        else {
1893
0
          unsigned char out[3] = { '%' };
1894
0
          Curl_hexbyte(&out[1], *i);
1895
0
          result = curlx_dyn_addn(&enc, out, 3);
1896
0
          if(result)
1897
0
            return cc2cu(result);
1898
0
        }
1899
0
      }
1900
0
    }
1901
0
    else {
1902
0
      char *p;
1903
0
      CURLcode result = curlx_dyn_add(&enc, part);
1904
0
      if(result)
1905
0
        return cc2cu(result);
1906
0
      p = curlx_dyn_ptr(&enc);
1907
0
      while(*p) {
1908
        /* make sure percent encoded are lower case */
1909
0
        if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1910
0
           (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1911
0
          p[1] = Curl_raw_tolower(p[1]);
1912
0
          p[2] = Curl_raw_tolower(p[2]);
1913
0
          p += 3;
1914
0
        }
1915
0
        else
1916
0
          p++;
1917
0
      }
1918
0
    }
1919
0
    newp = curlx_dyn_ptr(&enc);
1920
1921
0
    if(appendquery && newp) {
1922
      /* Append the 'newp' string onto the old query. Add a '&' separator if
1923
         none is present at the end of the existing query already */
1924
1925
0
      size_t querylen = u->query ? strlen(u->query) : 0;
1926
0
      bool addamperand = querylen && (u->query[querylen - 1] != '&');
1927
0
      if(querylen) {
1928
0
        struct dynbuf qbuf;
1929
0
        curlx_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1930
1931
0
        if(curlx_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1932
0
          goto nomem;
1933
1934
0
        if(addamperand) {
1935
0
          if(curlx_dyn_addn(&qbuf, "&", 1))
1936
0
            goto nomem;
1937
0
        }
1938
0
        if(curlx_dyn_add(&qbuf, newp))
1939
0
          goto nomem;
1940
0
        curlx_dyn_free(&enc);
1941
0
        curlx_free(*storep);
1942
0
        *storep = curlx_dyn_ptr(&qbuf);
1943
0
        return CURLUE_OK;
1944
0
nomem:
1945
0
        curlx_dyn_free(&enc);
1946
0
        return CURLUE_OUT_OF_MEMORY;
1947
0
      }
1948
0
    }
1949
1950
0
    else if(what == CURLUPART_HOST) {
1951
0
      size_t n = curlx_dyn_len(&enc);
1952
0
      if(!n && (flags & CURLU_NO_AUTHORITY)) {
1953
        /* Skip hostname check, it is allowed to be empty. */
1954
0
      }
1955
0
      else {
1956
0
        bool bad = FALSE;
1957
0
        if(!n)
1958
0
          bad = TRUE; /* empty hostname is not okay */
1959
0
        else if(!urlencode) {
1960
          /* if the hostname part was not URL encoded here, it was set ready
1961
             URL encoded so we need to decode it to check */
1962
0
          size_t dlen;
1963
0
          char *decoded = NULL;
1964
0
          CURLcode result =
1965
0
            Curl_urldecode(newp, n, &decoded, &dlen, REJECT_CTRL);
1966
0
          if(result || hostname_check(u, decoded, dlen))
1967
0
            bad = TRUE;
1968
0
          curlx_free(decoded);
1969
0
        }
1970
0
        else if(hostname_check(u, (char *)CURL_UNCONST(newp), n))
1971
0
          bad = TRUE;
1972
0
        if(bad) {
1973
0
          curlx_dyn_free(&enc);
1974
0
          return CURLUE_BAD_HOSTNAME;
1975
0
        }
1976
0
      }
1977
0
    }
1978
1979
0
    curlx_free(*storep);
1980
0
    *storep = (char *)CURL_UNCONST(newp);
1981
0
  }
1982
0
  return CURLUE_OK;
1983
0
}