Coverage Report

Created: 2026-04-29 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/CMake/Utilities/cmcurl/lib/urlapi.c
Line
Count
Source
1
/***************************************************************************
2
 *                                  _   _ ____  _
3
 *  Project                     ___| | | |  _ \| |
4
 *                             / __| | | | |_) | |
5
 *                            | (__| |_| |  _ <| |___
6
 *                             \___|\___/|_| \_\_____|
7
 *
8
 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9
 *
10
 * This software is licensed as described in the file COPYING, which
11
 * you should have received as part of this distribution. The terms
12
 * are also available at https://curl.se/docs/copyright.html.
13
 *
14
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15
 * copies of the Software, and permit persons to whom the Software is
16
 * furnished to do so, under the terms of the COPYING file.
17
 *
18
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19
 * KIND, either express or implied.
20
 *
21
 * SPDX-License-Identifier: curl
22
 *
23
 ***************************************************************************/
24
#include "curl_setup.h"
25
26
#include "urldata.h"
27
#include "urlapi-int.h"
28
#include "strcase.h"
29
#include "url.h"
30
#include "escape.h"
31
#include "curlx/inet_pton.h"
32
#include "curlx/inet_ntop.h"
33
#include "curlx/strdup.h"
34
#include "idn.h"
35
#include "curlx/strparse.h"
36
#include "curl_memrchr.h"
37
38
#ifdef _WIN32
39
/* MS-DOS/Windows style drive prefix, eg c: in c:foo */
40
#define STARTS_WITH_DRIVE_PREFIX(str)        \
41
  ((('a' <= (str)[0] && (str)[0] <= 'z') ||  \
42
    ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
43
   ((str)[1] == ':'))
44
#endif
45
46
/* MS-DOS/Windows style drive prefix, optionally with
47
 * a '|' instead of ':', followed by a slash or NUL */
48
#define STARTS_WITH_URL_DRIVE_PREFIX(str)                  \
49
0
  ((('a' <= (str)[0] && (str)[0] <= 'z') ||                \
50
0
    ('A' <= (str)[0] && (str)[0] <= 'Z')) &&               \
51
0
   ((str)[1] == ':' || (str)[1] == '|') &&                 \
52
0
   ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
53
54
/* scheme is not URL encoded, the longest libcurl supported ones are... */
55
0
#define MAX_SCHEME_LEN 40
56
57
/*
58
 * If USE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
59
 * sure we have _some_ value for AF_INET6 without polluting our fake value
60
 * everywhere.
61
 */
62
#if !defined(USE_IPV6) && !defined(AF_INET6)
63
#define AF_INET6 (AF_INET + 1)
64
#endif
65
66
/* Internal representation of CURLU. Point to URL-encoded strings. */
67
struct Curl_URL {
68
  char *scheme;
69
  char *user;
70
  char *password;
71
  char *options; /* IMAP only? */
72
  char *host;
73
  char *zoneid; /* for numerical IPv6 addresses */
74
  char *port;
75
  char *path;
76
  char *query;
77
  char *fragment;
78
  unsigned short portnum; /* the numerical version (if 'port' is set) */
79
  BIT(query_present);    /* to support blank */
80
  BIT(fragment_present); /* to support blank */
81
  BIT(guessed_scheme);   /* when a URL without scheme is parsed */
82
};
83
84
0
#define DEFAULT_SCHEME "https"
85
86
static void free_urlhandle(struct Curl_URL *u)
87
0
{
88
0
  curlx_free(u->scheme);
89
0
  curlx_free(u->user);
90
0
  curlx_free(u->password);
91
0
  curlx_free(u->options);
92
0
  curlx_free(u->host);
93
0
  curlx_free(u->zoneid);
94
0
  curlx_free(u->port);
95
0
  curlx_free(u->path);
96
0
  curlx_free(u->query);
97
0
  curlx_free(u->fragment);
98
0
}
99
100
/*
101
 * Find the separator at the end of the hostname, or the '?' in cases like
102
 * http://www.example.com?id=2380
103
 */
104
static const char *find_host_sep(const char *url)
105
0
{
106
  /* Find the start of the hostname */
107
0
  const char *sep = strstr(url, "//");
108
0
  if(!sep)
109
0
    sep = url;
110
0
  else
111
0
    sep += 2;
112
113
  /* Find first / or ? */
114
0
  while(*sep && *sep != '/' && *sep != '?')
115
0
    sep++;
116
117
0
  return sep;
118
0
}
119
120
/* convert CURLcode to CURLUcode */
121
#define cc2cu(x) \
122
0
  ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE : CURLUE_OUT_OF_MEMORY)
123
124
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
125
 * spaces in the source URL accordingly.
126
 *
127
 * URL encoding should be skipped for hostnames, otherwise IDN resolution
128
 * will fail.
129
 */
130
static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
131
                               size_t len, bool relative,
132
                               bool query)
133
0
{
134
  /* we must add this with whitespace-replacing */
135
0
  bool left = !query;
136
0
  const unsigned char *iptr;
137
0
  const unsigned char *host_sep = (const unsigned char *)url;
138
0
  CURLcode result = CURLE_OK;
139
140
0
  if(!relative) {
141
0
    size_t n;
142
0
    host_sep = (const unsigned char *)find_host_sep(url);
143
144
    /* output the first piece as-is */
145
0
    n = (const char *)host_sep - url;
146
0
    result = curlx_dyn_addn(o, url, n);
147
0
    len -= n;
148
0
  }
149
150
0
  for(iptr = host_sep; len && !result; iptr++, len--) {
151
0
    if(*iptr == ' ') {
152
0
      if(left)
153
0
        result = curlx_dyn_addn(o, "%20", 3);
154
0
      else
155
0
        result = curlx_dyn_addn(o, "+", 1);
156
0
    }
157
0
    else if((*iptr < ' ') || (*iptr >= 0x7f)) {
158
0
      unsigned char out[3] = { '%' };
159
0
      Curl_hexbyte(&out[1], *iptr);
160
0
      result = curlx_dyn_addn(o, out, 3);
161
0
    }
162
0
    else {
163
0
      result = curlx_dyn_addn(o, iptr, 1);
164
0
      if(*iptr == '?')
165
0
        left = FALSE;
166
0
    }
167
0
  }
168
169
0
  if(result)
170
0
    return cc2cu(result);
171
0
  return CURLUE_OK;
172
0
}
173
174
/*
175
 * Returns the length of the scheme if the given URL is absolute (as opposed
176
 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
177
 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
178
 *
179
 * If 'guess_scheme' is TRUE, it means the URL might be provided without
180
 * scheme.
181
 */
182
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
183
                            bool guess_scheme)
184
0
{
185
0
  size_t i = 0;
186
0
  DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
187
0
  (void)buflen; /* only used in debug-builds */
188
0
  if(buf)
189
0
    buf[0] = 0; /* always leave a defined value in buf */
190
#ifdef _WIN32
191
  if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
192
    return 0;
193
#endif
194
0
  if(ISALPHA(url[0]))
195
0
    for(i = 1; i < MAX_SCHEME_LEN; ++i) {
196
0
      char s = url[i];
197
0
      if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.'))) {
198
        /* RFC 3986 3.1 explains:
199
           scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
200
        */
201
0
      }
202
0
      else {
203
0
        break;
204
0
      }
205
0
    }
206
0
  if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
207
    /* If this does not guess scheme, the scheme always ends with the colon so
208
       that this also detects data: URLs etc. In guessing mode, data: could
209
       be the hostname "data" with a specified port number. */
210
211
    /* the length of the scheme is the name part only */
212
0
    size_t len = i;
213
0
    if(buf) {
214
0
      Curl_strntolower(buf, url, i);
215
0
      buf[i] = 0;
216
0
    }
217
0
    return len;
218
0
  }
219
0
  return 0;
220
0
}
221
222
/* scan for byte values <= 31, 127 and sometimes space */
223
CURLUcode Curl_junkscan(const char *url, size_t *urllen, bool allowspace)
224
0
{
225
0
  size_t n = strlen(url);
226
0
  size_t i;
227
0
  unsigned char control;
228
0
  const unsigned char *p = (const unsigned char *)url;
229
0
  if(n > CURL_MAX_INPUT_LENGTH)
230
0
    return CURLUE_MALFORMED_INPUT;
231
232
0
  control = allowspace ? 0x1f : 0x20;
233
0
  for(i = 0; i < n; i++) {
234
0
    if(p[i] <= control || p[i] == 127)
235
0
      return CURLUE_MALFORMED_INPUT;
236
0
  }
237
0
  *urllen = n;
238
0
  return CURLUE_OK;
239
0
}
240
241
/*
242
 * parse_hostname_login()
243
 *
244
 * Parse the login details (username, password and options) from the URL and
245
 * strip them out of the hostname
246
 *
247
 */
248
static CURLUcode parse_hostname_login(struct Curl_URL *u,
249
                                      const char *login,
250
                                      size_t len,
251
                                      unsigned int flags,
252
                                      size_t *offset) /* to the hostname */
253
0
{
254
0
  CURLUcode result = CURLUE_OK;
255
0
  CURLcode ccode;
256
0
  char *userp = NULL;
257
0
  char *passwdp = NULL;
258
0
  char *optionsp = NULL;
259
0
  const struct Curl_scheme *h = NULL;
260
261
  /* At this point, we assume all the other special cases have been taken
262
   * care of, so the host is at most
263
   *
264
   *   [user[:password][;options]]@]hostname
265
   *
266
   * We need somewhere to put the embedded details, so do that first.
267
   */
268
0
  const char *ptr;
269
270
0
  DEBUGASSERT(login);
271
272
0
  *offset = 0;
273
0
  ptr = memchr(login, '@', len);
274
0
  if(!ptr)
275
0
    goto out;
276
277
  /* We will now try to extract the
278
   * possible login information in a string like:
279
   * ftp://user:password@ftp.site.example:8021/README */
280
0
  ptr++;
281
282
  /* if this is a known scheme, get some details */
283
0
  if(u->scheme)
284
0
    h = Curl_get_scheme(u->scheme);
285
286
  /* We could use the login information in the URL so extract it. Only parse
287
     options if the handler says we should. Note that 'h' might be NULL! */
288
0
  ccode = Curl_parse_login_details(login, ptr - login - 1,
289
0
                                   &userp, &passwdp,
290
0
                                   (h && (h->flags & PROTOPT_URLOPTIONS)) ?
291
0
                                   &optionsp : NULL);
292
0
  if(ccode) {
293
    /* the only possible error from Curl_parse_login_details is out of
294
       memory: */
295
0
    result = CURLUE_OUT_OF_MEMORY;
296
0
    goto out;
297
0
  }
298
299
0
  if(userp) {
300
0
    if(flags & CURLU_DISALLOW_USER) {
301
      /* Option DISALLOW_USER is set and URL contains username. */
302
0
      result = CURLUE_USER_NOT_ALLOWED;
303
0
      goto out;
304
0
    }
305
0
    curlx_free(u->user);
306
0
    u->user = userp;
307
0
  }
308
309
0
  if(passwdp) {
310
0
    curlx_free(u->password);
311
0
    u->password = passwdp;
312
0
  }
313
314
0
  if(optionsp) {
315
0
    curlx_free(u->options);
316
0
    u->options = optionsp;
317
0
  }
318
319
  /* the hostname starts at this offset */
320
0
  *offset = ptr - login;
321
0
  return CURLUE_OK;
322
323
0
out:
324
325
0
  curlx_free(userp);
326
0
  curlx_free(passwdp);
327
0
  curlx_free(optionsp);
328
0
  u->user = NULL;
329
0
  u->password = NULL;
330
0
  u->options = NULL;
331
332
0
  return result;
333
0
}
334
335
UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
336
                                   bool has_scheme)
337
0
{
338
0
  const char *portptr;
339
0
  const char *hostname = curlx_dyn_ptr(host);
340
  /*
341
   * Find the end of an IPv6 address on the ']' ending bracket.
342
   */
343
0
  if(hostname[0] == '[') {
344
0
    portptr = strchr(hostname, ']');
345
0
    if(!portptr)
346
0
      return CURLUE_BAD_IPV6;
347
0
    portptr++;
348
    /* this is a RFC2732-style specified IP-address */
349
0
    if(*portptr) {
350
0
      if(*portptr != ':')
351
0
        return CURLUE_BAD_PORT_NUMBER;
352
0
    }
353
0
    else
354
0
      portptr = NULL;
355
0
  }
356
0
  else
357
0
    portptr = strchr(hostname, ':');
358
359
0
  if(portptr) {
360
0
    curl_off_t port;
361
0
    size_t keep = portptr - hostname;
362
363
    /* Browser behavior adaptation. If there is a colon with no digits after,
364
       cut off the name there which makes us ignore the colon and use the
365
       default port. Firefox, Chrome and Safari all do that.
366
367
       Do not do it if the URL has no scheme, to make something that looks like
368
       a scheme not work!
369
    */
370
0
    curlx_dyn_setlen(host, keep);
371
0
    portptr++;
372
0
    if(!*portptr)
373
0
      return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
374
375
0
    if(curlx_str_number(&portptr, &port, 0xffff) || *portptr)
376
0
      return CURLUE_BAD_PORT_NUMBER;
377
378
0
    u->portnum = (unsigned short)port;
379
    /* generate a new port number string to get rid of leading zeroes etc */
380
0
    curlx_free(u->port);
381
0
    u->port = curl_maprintf("%" CURL_FORMAT_CURL_OFF_T, port);
382
0
    if(!u->port)
383
0
      return CURLUE_OUT_OF_MEMORY;
384
0
  }
385
386
0
  return CURLUE_OK;
387
0
}
388
389
/* this assumes 'hostname' now starts with [ */
390
static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
391
                            size_t hlen) /* length of hostname */
392
0
{
393
0
  size_t len;
394
0
  DEBUGASSERT(*hostname == '[');
395
0
  if(hlen < 4) /* '[::]' is the shortest possible valid string */
396
0
    return CURLUE_BAD_IPV6;
397
0
  hostname++;
398
0
  hlen -= 2;
399
400
  /* only valid IPv6 letters are ok */
401
0
  len = strspn(hostname, "0123456789abcdefABCDEF:.");
402
403
0
  if(hlen != len) {
404
0
    hlen = len;
405
0
    if(hostname[len] == '%') {
406
      /* this could now be '%[zone id]' */
407
0
      char zoneid[16];
408
0
      int i = 0;
409
0
      char *h = &hostname[len + 1];
410
      /* pass '25' if present and is a URL encoded percent sign */
411
0
      if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
412
0
        h += 2;
413
0
      while(*h && (*h != ']') && (i < 15))
414
0
        zoneid[i++] = *h++;
415
0
      if(!i || (']' != *h))
416
0
        return CURLUE_BAD_IPV6;
417
0
      zoneid[i] = 0;
418
0
      u->zoneid = curlx_strdup(zoneid);
419
0
      if(!u->zoneid)
420
0
        return CURLUE_OUT_OF_MEMORY;
421
0
      hostname[len] = ']'; /* insert end bracket */
422
0
      hostname[len + 1] = 0; /* terminate the hostname */
423
0
    }
424
0
    else
425
0
      return CURLUE_BAD_IPV6;
426
    /* hostname is fine */
427
0
  }
428
429
  /* Normalize the IPv6 address */
430
0
  {
431
0
    char dest[16]; /* fits a binary IPv6 address */
432
0
    hostname[hlen] = 0; /* end the address there */
433
0
    if(curlx_inet_pton(AF_INET6, hostname, dest) != 1)
434
0
      return CURLUE_BAD_IPV6;
435
0
    if(curlx_inet_ntop(AF_INET6, dest, hostname, hlen + 1)) {
436
0
      hlen = strlen(hostname); /* might be shorter now */
437
0
      hostname[hlen + 1] = 0;
438
0
    }
439
0
    hostname[hlen] = ']'; /* restore ending bracket */
440
0
  }
441
0
  return CURLUE_OK;
442
0
}
443
444
static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
445
                                size_t hlen) /* length of hostname */
446
0
{
447
0
  size_t len;
448
0
  DEBUGASSERT(hostname);
449
450
0
  if(!hlen)
451
0
    return CURLUE_NO_HOST;
452
0
  else if(hostname[0] == '[')
453
0
    return ipv6_parse(u, hostname, hlen);
454
0
  else {
455
    /* letters from the second string are not ok */
456
0
    len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
457
0
    if(hlen != len)
458
      /* hostname with bad content */
459
0
      return CURLUE_BAD_HOSTNAME;
460
0
  }
461
0
  return CURLUE_OK;
462
0
}
463
464
/*
465
 * Handle partial IPv4 numerical addresses and different bases, like
466
 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
467
 *
468
 * If the given input string is syntactically wrong IPv4 or any part for
469
 * example is too big, this function returns HOST_NAME.
470
 *
471
 * Output the "normalized" version of that input string in plain quad decimal
472
 * integers.
473
 *
474
 * Returns the host type.
475
 */
476
477
0
#define HOST_ERROR   (-1) /* out of memory */
478
479
0
#define HOST_NAME    1
480
0
#define HOST_IPV4    2
481
0
#define HOST_IPV6    3
482
483
static int ipv4_normalize(struct dynbuf *host)
484
0
{
485
0
  bool done = FALSE;
486
0
  int n = 0;
487
0
  const char *c = curlx_dyn_ptr(host);
488
0
  unsigned int parts[4] = { 0, 0, 0, 0 };
489
0
  CURLcode result = CURLE_OK;
490
491
0
  if(*c == '[')
492
0
    return HOST_IPV6;
493
494
0
  while(!done) {
495
0
    int rc;
496
0
    curl_off_t l;
497
0
    if(*c == '0') {
498
0
      if(c[1] == 'x') {
499
0
        c += 2; /* skip the prefix */
500
0
        rc = curlx_str_hex(&c, &l, UINT_MAX);
501
0
      }
502
0
      else
503
0
        rc = curlx_str_octal(&c, &l, UINT_MAX);
504
0
    }
505
0
    else
506
0
      rc = curlx_str_number(&c, &l, UINT_MAX);
507
508
0
    if(rc)
509
0
      return HOST_NAME;
510
511
0
    parts[n] = (unsigned int)l;
512
513
0
    switch(*c) {
514
0
    case '.':
515
0
      if(n == 3)
516
0
        return HOST_NAME;
517
0
      n++;
518
0
      c++;
519
0
      break;
520
521
0
    case '\0':
522
0
      done = TRUE;
523
0
      break;
524
525
0
    default:
526
0
      return HOST_NAME;
527
0
    }
528
0
  }
529
530
0
  switch(n) {
531
0
  case 0: /* a -- 32 bits */
532
0
    curlx_dyn_reset(host);
533
534
0
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
535
0
                            (parts[0] >> 24),
536
0
                            ((parts[0] >> 16) & 0xff),
537
0
                            ((parts[0] >> 8) & 0xff),
538
0
                            (parts[0] & 0xff));
539
0
    break;
540
0
  case 1: /* a.b -- 8.24 bits */
541
0
    if((parts[0] > 0xff) || (parts[1] > 0xffffff))
542
0
      return HOST_NAME;
543
0
    curlx_dyn_reset(host);
544
0
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
545
0
                            (parts[0]),
546
0
                            ((parts[1] >> 16) & 0xff),
547
0
                            ((parts[1] >> 8) & 0xff),
548
0
                            (parts[1] & 0xff));
549
0
    break;
550
0
  case 2: /* a.b.c -- 8.8.16 bits */
551
0
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
552
0
      return HOST_NAME;
553
0
    curlx_dyn_reset(host);
554
0
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
555
0
                            (parts[0]),
556
0
                            (parts[1]),
557
0
                            ((parts[2] >> 8) & 0xff),
558
0
                            (parts[2] & 0xff));
559
0
    break;
560
0
  case 3: /* a.b.c.d -- 8.8.8.8 bits */
561
0
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
562
0
       (parts[3] > 0xff))
563
0
      return HOST_NAME;
564
0
    curlx_dyn_reset(host);
565
0
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
566
0
                            (parts[0]),
567
0
                            (parts[1]),
568
0
                            (parts[2]),
569
0
                            (parts[3]));
570
0
    break;
571
0
  }
572
0
  if(result)
573
0
    return HOST_ERROR;
574
0
  return HOST_IPV4;
575
0
}
576
577
/* if necessary, replace the host content with a URL decoded version */
578
static CURLUcode urldecode_host(struct dynbuf *host)
579
0
{
580
0
  const char *per;
581
0
  const char *hostname = curlx_dyn_ptr(host);
582
0
  per = strchr(hostname, '%');
583
0
  if(!per)
584
    /* nothing to decode */
585
0
    return CURLUE_OK;
586
0
  else {
587
    /* encoded */
588
0
    size_t dlen;
589
0
    char *decoded;
590
0
    CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
591
0
                                     REJECT_CTRL);
592
0
    if(result)
593
0
      return CURLUE_BAD_HOSTNAME;
594
0
    curlx_dyn_reset(host);
595
0
    result = curlx_dyn_addn(host, decoded, dlen);
596
0
    curlx_free(decoded);
597
0
    if(result)
598
0
      return cc2cu(result);
599
0
  }
600
601
0
  return CURLUE_OK;
602
0
}
603
604
static CURLUcode parse_authority(struct Curl_URL *u,
605
                                 const char *auth, size_t authlen,
606
                                 unsigned int flags,
607
                                 struct dynbuf *host,
608
                                 bool has_scheme)
609
0
{
610
0
  size_t offset;
611
0
  CURLUcode uc;
612
0
  CURLcode result;
613
614
  /*
615
   * Parse the login details and strip them out of the hostname.
616
   */
617
0
  uc = parse_hostname_login(u, auth, authlen, flags, &offset);
618
0
  if(uc)
619
0
    goto out;
620
621
0
  result = curlx_dyn_addn(host, auth + offset, authlen - offset);
622
0
  if(result) {
623
0
    uc = cc2cu(result);
624
0
    goto out;
625
0
  }
626
627
0
  uc = Curl_parse_port(u, host, has_scheme);
628
0
  if(uc)
629
0
    goto out;
630
631
0
  if(!curlx_dyn_len(host))
632
0
    return CURLUE_NO_HOST;
633
634
0
  switch(ipv4_normalize(host)) {
635
0
  case HOST_IPV4:
636
0
    break;
637
0
  case HOST_IPV6:
638
0
    uc = ipv6_parse(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
639
0
    break;
640
0
  case HOST_NAME:
641
0
    uc = urldecode_host(host);
642
0
    if(!uc)
643
0
      uc = hostname_check(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
644
0
    break;
645
0
  case HOST_ERROR:
646
0
    uc = CURLUE_OUT_OF_MEMORY;
647
0
    break;
648
0
  default:
649
0
    uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
650
0
    break;
651
0
  }
652
653
0
out:
654
0
  return uc;
655
0
}
656
657
/* used for HTTP/2 server push */
658
CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
659
0
{
660
0
  CURLUcode result;
661
0
  struct dynbuf host;
662
663
0
  DEBUGASSERT(authority);
664
0
  curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
665
666
0
  result = parse_authority(u, authority, strlen(authority),
667
0
                           CURLU_DISALLOW_USER, &host, !!u->scheme);
668
0
  if(result)
669
0
    curlx_dyn_free(&host);
670
0
  else {
671
0
    curlx_free(u->host);
672
0
    u->host = curlx_dyn_ptr(&host);
673
0
  }
674
0
  return result;
675
0
}
676
677
/*
678
 * "Remove Dot Segments"
679
 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
680
 */
681
682
static bool is_dot(const char **str, size_t *clen)
683
0
{
684
0
  const char *p = *str;
685
0
  if(*p == '.') {
686
0
    (*str)++;
687
0
    (*clen)--;
688
0
    return TRUE;
689
0
  }
690
0
  else if((*clen >= 3) &&
691
0
          (p[0] == '%') && (p[1] == '2') && ((p[2] | 0x20) == 'e')) {
692
0
    *str += 3;
693
0
    *clen -= 3;
694
0
    return TRUE;
695
0
  }
696
0
  return FALSE;
697
0
}
698
699
0
#define ISSLASH(x) ((x) == '/')
700
701
/*
702
 * dedotdotify()
703
 * @unittest: 1395
704
 *
705
 * This function gets a null-terminated path with dot and dotdot sequences
706
 * passed in and strips them off according to the rules in RFC 3986 section
707
 * 5.2.4.
708
 *
709
 * The function handles a path. It should not contain the query nor fragment.
710
 *
711
 * RETURNS
712
 *
713
 * Zero for success and 'out' set to an allocated dedotdotified string.
714
 */
715
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
716
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
717
0
{
718
0
  struct dynbuf out;
719
0
  CURLcode result = CURLE_OK;
720
721
0
  *outp = NULL;
722
  /* the path always starts with a slash, and a slash has not dot */
723
0
  if(clen < 2)
724
0
    return 0;
725
726
0
  curlx_dyn_init(&out, clen + 1);
727
728
  /*  A. If the input buffer begins with a prefix of "../" or "./", then
729
      remove that prefix from the input buffer; otherwise, */
730
0
  if(is_dot(&input, &clen)) {
731
0
    const char *p = input;
732
0
    size_t blen = clen;
733
734
0
    if(!clen)
735
      /* . [end] */
736
0
      goto end;
737
0
    else if(ISSLASH(*p)) {
738
      /* one dot followed by a slash */
739
0
      input = p + 1;
740
0
      clen--;
741
0
    }
742
743
    /*  D. if the input buffer consists only of "." or "..", then remove
744
        that from the input buffer; otherwise, */
745
0
    else if(is_dot(&p, &blen)) {
746
0
      if(!blen)
747
        /* .. [end] */
748
0
        goto end;
749
0
      else if(ISSLASH(*p)) {
750
        /* ../ */
751
0
        input = p + 1;
752
0
        clen = blen - 1;
753
0
      }
754
0
    }
755
0
  }
756
757
0
  while(clen && !result) { /* until end of path content */
758
0
    if(ISSLASH(*input)) {
759
0
      const char *p = &input[1];
760
0
      size_t blen = clen - 1;
761
      /*  B. if the input buffer begins with a prefix of "/./" or "/.", where
762
          "."  is a complete path segment, then replace that prefix with "/" in
763
          the input buffer; otherwise, */
764
0
      if(is_dot(&p, &blen)) {
765
0
        if(!blen) { /* /. */
766
0
          result = curlx_dyn_addn(&out, "/", 1);
767
0
          break;
768
0
        }
769
0
        else if(ISSLASH(*p)) { /* /./ */
770
0
          input = p;
771
0
          clen = blen;
772
0
          continue;
773
0
        }
774
775
        /*  C. if the input buffer begins with a prefix of "/../" or "/..",
776
            where ".." is a complete path segment, then replace that prefix
777
            with "/" in the input buffer and remove the last segment and its
778
            preceding "/" (if any) from the output buffer; otherwise, */
779
0
        else if(is_dot(&p, &blen) && (ISSLASH(*p) || !blen)) {
780
          /* remove the last segment from the output buffer */
781
0
          size_t len = curlx_dyn_len(&out);
782
0
          if(len) {
783
0
            const char *ptr = curlx_dyn_ptr(&out);
784
0
            const char *last = memrchr(ptr, '/', len);
785
0
            if(last)
786
              /* trim the output at the slash */
787
0
              curlx_dyn_setlen(&out, last - ptr);
788
0
          }
789
790
0
          if(blen) { /* /../ */
791
0
            input = p;
792
0
            clen = blen;
793
0
            continue;
794
0
          }
795
0
          result = curlx_dyn_addn(&out, "/", 1);
796
0
          break;
797
0
        }
798
0
      }
799
0
    }
800
801
    /*  E. move the first path segment in the input buffer to the end of
802
        the output buffer, including the initial "/" character (if any) and
803
        any subsequent characters up to, but not including, the next "/"
804
        character or the end of the input buffer. */
805
806
0
    result = curlx_dyn_addn(&out, input, 1);
807
0
    input++;
808
0
    clen--;
809
0
  }
810
0
end:
811
0
  if(!result) {
812
0
    if(curlx_dyn_len(&out))
813
0
      *outp = curlx_dyn_ptr(&out);
814
0
    else {
815
0
      *outp = curlx_strdup("");
816
0
      if(!*outp)
817
0
        return 1;
818
0
    }
819
0
  }
820
0
  return result ? 1 : 0; /* success */
821
0
}
822
823
static CURLUcode parse_file(const char *url, size_t urllen, CURLU *u,
824
                            struct dynbuf *host, const char **pathp,
825
                            size_t *pathlenp)
826
0
{
827
0
  const char *path;
828
0
  size_t pathlen;
829
0
  bool uncpath = FALSE;
830
0
  if(urllen <= 6)
831
    /* file:/ is not enough to actually be a complete file: URL */
832
0
    return CURLUE_BAD_FILE_URL;
833
834
  /* path has been allocated large enough to hold this */
835
0
  path = &url[5];
836
0
  pathlen = urllen - 5;
837
838
0
  u->scheme = curlx_strdup("file");
839
0
  if(!u->scheme)
840
0
    return CURLUE_OUT_OF_MEMORY;
841
842
  /* Extra handling URLs with an authority component (i.e. that start with
843
   * "file://")
844
   *
845
   * We allow omitted hostname (e.g. file:/<path>) -- valid according to
846
   * RFC 8089, but not the (current) WHAT-WG URL spec.
847
   */
848
0
  if(path[0] == '/' && path[1] == '/') {
849
    /* swallow the two slashes */
850
0
    const char *ptr = &path[2];
851
852
    /*
853
     * According to RFC 8089, a file: URL can be reliably dereferenced if:
854
     *
855
     *  o it has no/blank hostname, or
856
     *
857
     *  o the hostname matches "localhost" (case-insensitively), or
858
     *
859
     *  o the hostname is a FQDN that resolves to this machine, or
860
     *
861
     *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
862
     *    Appendix E.3).
863
     *
864
     * For brevity, we only consider URLs with empty, "localhost", or
865
     * "127.0.0.1" hostnames as local, otherwise as an UNC String.
866
     *
867
     * Additionally, there is an exception for URLs with a Windows drive
868
     * letter in the authority (which was accidentally omitted from RFC 8089
869
     * Appendix E, but believe me, it was meant to be there. --MK)
870
     */
871
0
    if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
872
      /* the URL includes a hostname, it must match "localhost" or
873
         "127.0.0.1" to be valid */
874
0
      if(checkprefix("localhost/", ptr) ||
875
0
         checkprefix("127.0.0.1/", ptr)) {
876
0
        ptr += 9; /* now points to the slash after the host */
877
0
      }
878
0
      else {
879
#ifdef _WIN32
880
        size_t len;
881
882
        /* the hostname, NetBIOS computer name, can not contain disallowed
883
           chars, and the delimiting slash character must be appended to the
884
           hostname */
885
        path = strpbrk(ptr, "/\\:*?\"<>|");
886
        if(!path || *path != '/')
887
          return CURLUE_BAD_FILE_URL;
888
889
        len = path - ptr;
890
        if(len) {
891
          CURLcode code = curlx_dyn_addn(host, ptr, len);
892
          if(code)
893
            return cc2cu(code);
894
          uncpath = TRUE;
895
        }
896
897
        ptr -= 2; /* now points to the // before the host in UNC */
898
#else
899
        /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
900
           none */
901
0
        return CURLUE_BAD_FILE_URL;
902
0
#endif
903
0
      }
904
0
    }
905
906
0
    path = ptr;
907
0
    pathlen = urllen - (ptr - url);
908
0
  }
909
910
0
  if(!uncpath)
911
    /* no host for file: URLs by default */
912
0
    curlx_dyn_reset(host);
913
914
0
#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
915
  /* Do not allow Windows drive letters when not in Windows.
916
   * This catches both "file:/c:" and "file:c:" */
917
0
  if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
918
0
     STARTS_WITH_URL_DRIVE_PREFIX(path)) {
919
    /* File drive letters are only accepted in MS-DOS/Windows */
920
0
    return CURLUE_BAD_FILE_URL;
921
0
  }
922
#else
923
  /* If the path starts with a slash and a drive letter, ditch the slash */
924
  if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
925
    /* This cannot be done with strcpy, as the memory chunks overlap! */
926
    path++;
927
    pathlen--;
928
  }
929
#endif
930
0
  *pathp = path;
931
0
  *pathlenp = pathlen;
932
0
  return CURLUE_OK;
933
0
}
934
935
static CURLUcode parse_scheme(const char *url, CURLU *u, char *schemebuf,
936
                              size_t schemelen, unsigned int flags,
937
                              const char **hostpp)
938
0
{
939
  /* clear path */
940
0
  const char *schemep = NULL;
941
942
0
  if(schemelen) {
943
0
    int i = 0;
944
0
    const char *p = &url[schemelen + 1];
945
0
    while((*p == '/') && (i < 4)) {
946
0
      p++;
947
0
      i++;
948
0
    }
949
950
0
    schemep = schemebuf;
951
0
    if(!Curl_get_scheme(schemep) &&
952
0
       !(flags & CURLU_NON_SUPPORT_SCHEME))
953
0
      return CURLUE_UNSUPPORTED_SCHEME;
954
955
0
    if((i < 1) || (i > 3))
956
      /* less than one or more than three slashes */
957
0
      return CURLUE_BAD_SLASHES;
958
959
0
    *hostpp = p; /* hostname starts here */
960
0
  }
961
0
  else {
962
    /* no scheme! */
963
964
0
    if(!(flags & (CURLU_DEFAULT_SCHEME | CURLU_GUESS_SCHEME)))
965
0
      return CURLUE_BAD_SCHEME;
966
967
0
    if(flags & CURLU_DEFAULT_SCHEME)
968
0
      schemep = DEFAULT_SCHEME;
969
970
    /*
971
     * The URL was badly formatted, let's try without scheme specified.
972
     */
973
0
    *hostpp = url;
974
0
  }
975
976
0
  if(schemep) {
977
0
    u->scheme = curlx_strdup(schemep);
978
0
    if(!u->scheme)
979
0
      return CURLUE_OUT_OF_MEMORY;
980
0
  }
981
0
  return CURLUE_OK;
982
0
}
983
984
static CURLUcode guess_scheme(CURLU *u, struct dynbuf *host)
985
0
{
986
0
  const char *hostname = curlx_dyn_ptr(host);
987
0
  const char *schemep = NULL;
988
  /* legacy curl-style guess based on hostname */
989
0
  if(checkprefix("ftp.", hostname))
990
0
    schemep = "ftp";
991
0
  else if(checkprefix("dict.", hostname))
992
0
    schemep = "dict";
993
0
  else if(checkprefix("ldap.", hostname))
994
0
    schemep = "ldap";
995
0
  else if(checkprefix("imap.", hostname))
996
0
    schemep = "imap";
997
0
  else if(checkprefix("smtp.", hostname))
998
0
    schemep = "smtp";
999
0
  else if(checkprefix("pop3.", hostname))
1000
0
    schemep = "pop3";
1001
0
  else
1002
0
    schemep = "http";
1003
1004
0
  u->scheme = curlx_strdup(schemep);
1005
0
  if(!u->scheme)
1006
0
    return CURLUE_OUT_OF_MEMORY;
1007
1008
0
  u->guessed_scheme = TRUE;
1009
0
  return CURLUE_OK;
1010
0
}
1011
1012
static CURLUcode handle_fragment(CURLU *u, const char *fragment,
1013
                                 size_t fraglen, unsigned int flags)
1014
0
{
1015
0
  CURLUcode result;
1016
0
  u->fragment_present = TRUE;
1017
0
  if(fraglen > 1) {
1018
    /* skip the leading '#' in the copy but include the terminating null */
1019
0
    if(flags & CURLU_URLENCODE) {
1020
0
      struct dynbuf enc;
1021
0
      curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1022
0
      result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1023
0
      if(result)
1024
0
        return result;
1025
0
      u->fragment = curlx_dyn_ptr(&enc);
1026
0
    }
1027
0
    else {
1028
0
      u->fragment = curlx_memdup0(fragment + 1, fraglen - 1);
1029
0
      if(!u->fragment)
1030
0
        return CURLUE_OUT_OF_MEMORY;
1031
0
    }
1032
0
  }
1033
0
  return CURLUE_OK;
1034
0
}
1035
1036
static CURLUcode handle_query(CURLU *u, const char *query,
1037
                              size_t qlen, unsigned int flags)
1038
0
{
1039
0
  u->query_present = TRUE;
1040
0
  if(qlen > 1) {
1041
0
    if(flags & CURLU_URLENCODE) {
1042
0
      struct dynbuf enc;
1043
0
      CURLUcode result;
1044
0
      curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1045
      /* skip the leading question mark */
1046
0
      result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1047
0
      if(result)
1048
0
        return result;
1049
0
      u->query = curlx_dyn_ptr(&enc);
1050
0
    }
1051
0
    else {
1052
0
      u->query = curlx_memdup0(query + 1, qlen - 1);
1053
0
      if(!u->query)
1054
0
        return CURLUE_OUT_OF_MEMORY;
1055
0
    }
1056
0
  }
1057
0
  else {
1058
    /* single byte query */
1059
0
    u->query = curlx_strdup("");
1060
0
    if(!u->query)
1061
0
      return CURLUE_OUT_OF_MEMORY;
1062
0
  }
1063
0
  return CURLUE_OK;
1064
0
}
1065
1066
static CURLUcode handle_path(CURLU *u, const char *path,
1067
                             size_t pathlen, unsigned int flags)
1068
0
{
1069
0
  CURLUcode result;
1070
0
  if(pathlen && (flags & CURLU_URLENCODE)) {
1071
0
    struct dynbuf enc;
1072
0
    curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1073
0
    result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1074
0
    if(result)
1075
0
      return result;
1076
0
    pathlen = curlx_dyn_len(&enc);
1077
0
    path = u->path = curlx_dyn_ptr(&enc);
1078
0
  }
1079
1080
0
  if(pathlen <= 1) {
1081
    /* there is no path left or the slash, unset */
1082
0
    path = NULL;
1083
0
  }
1084
0
  else {
1085
0
    if(!u->path) {
1086
0
      u->path = curlx_memdup0(path, pathlen);
1087
0
      if(!u->path)
1088
0
        return CURLUE_OUT_OF_MEMORY;
1089
0
      path = u->path;
1090
0
    }
1091
0
    else if(flags & CURLU_URLENCODE)
1092
      /* it might have encoded more than the path so cut it */
1093
0
      u->path[pathlen] = 0;
1094
1095
0
    if(!(flags & CURLU_PATH_AS_IS)) {
1096
      /* remove ../ and ./ sequences according to RFC3986 */
1097
0
      char *dedot;
1098
0
      int err = dedotdotify(path, pathlen, &dedot);
1099
0
      if(err)
1100
0
        return CURLUE_OUT_OF_MEMORY;
1101
0
      if(dedot) {
1102
0
        curlx_free(u->path);
1103
0
        u->path = dedot;
1104
0
      }
1105
0
    }
1106
0
  }
1107
0
  return CURLUE_OK;
1108
0
}
1109
1110
static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1111
0
{
1112
0
  const char *path;
1113
0
  size_t pathlen;
1114
0
  char schemebuf[MAX_SCHEME_LEN + 1];
1115
0
  size_t schemelen = 0;
1116
0
  size_t urllen;
1117
0
  CURLUcode result = CURLUE_OK;
1118
0
  struct dynbuf host;
1119
1120
0
  DEBUGASSERT(url);
1121
1122
0
  curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
1123
1124
0
  result = Curl_junkscan(url, &urllen, !!(flags & CURLU_ALLOW_SPACE));
1125
0
  if(result)
1126
0
    goto fail;
1127
1128
0
  schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1129
0
                                   flags & (CURLU_GUESS_SCHEME |
1130
0
                                            CURLU_DEFAULT_SCHEME));
1131
1132
  /* handle the file: scheme */
1133
0
  if(schemelen && !strcmp(schemebuf, "file"))
1134
0
    result = parse_file(url, urllen, u, &host, &path, &pathlen);
1135
0
  else {
1136
0
    const char *hostp = NULL;
1137
0
    size_t hostlen;
1138
0
    result = parse_scheme(url, u, schemebuf, schemelen, flags, &hostp);
1139
0
    if(result)
1140
0
      goto fail;
1141
1142
    /* find the end of the hostname + port number */
1143
0
    hostlen = strcspn(hostp, "/?#");
1144
0
    path = &hostp[hostlen];
1145
1146
    /* this pathlen also contains the query and the fragment */
1147
0
    pathlen = urllen - (path - url);
1148
0
    if(hostlen) {
1149
0
      result = parse_authority(u, hostp, hostlen, flags, &host,
1150
0
                               u->scheme != NULL);
1151
0
      if(!result && (flags & CURLU_GUESS_SCHEME) && !u->scheme)
1152
0
        result = guess_scheme(u, &host);
1153
0
    }
1154
0
    else if(flags & CURLU_NO_AUTHORITY) {
1155
      /* allowed to be empty. */
1156
0
      if(curlx_dyn_add(&host, ""))
1157
0
        result = CURLUE_OUT_OF_MEMORY;
1158
0
    }
1159
0
    else
1160
0
      result = CURLUE_NO_HOST;
1161
0
  }
1162
0
  if(!result) {
1163
    /* The path might at this point contain a fragment and/or a query to
1164
       handle */
1165
0
    const char *fragment = strchr(path, '#');
1166
0
    if(fragment) {
1167
0
      size_t fraglen = pathlen - (fragment - path);
1168
0
      result = handle_fragment(u, fragment, fraglen, flags);
1169
      /* after this, pathlen still contains the query */
1170
0
      pathlen -= fraglen;
1171
0
    }
1172
0
  }
1173
0
  if(!result) {
1174
0
    const char *query = memchr(path, '?', pathlen);
1175
0
    if(query) {
1176
0
      size_t qlen = pathlen - (query - path);
1177
0
      result = handle_query(u, query, qlen, flags);
1178
0
      pathlen -= qlen;
1179
0
    }
1180
0
  }
1181
0
  if(!result)
1182
    /* the fragment and query parts are trimmed off from the path */
1183
0
    result = handle_path(u, path, pathlen, flags);
1184
0
  if(!result) {
1185
0
    u->host = curlx_dyn_ptr(&host);
1186
0
    return CURLUE_OK;
1187
0
  }
1188
0
fail:
1189
0
  curlx_dyn_free(&host);
1190
0
  free_urlhandle(u);
1191
0
  return result;
1192
0
}
1193
1194
/*
1195
 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1196
 */
1197
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1198
                                      unsigned int flags)
1199
0
{
1200
0
  CURLUcode result;
1201
0
  CURLU tmpurl;
1202
0
  memset(&tmpurl, 0, sizeof(tmpurl));
1203
0
  result = parseurl(url, &tmpurl, flags);
1204
0
  if(!result) {
1205
0
    free_urlhandle(u);
1206
0
    *u = tmpurl;
1207
0
  }
1208
0
  return result;
1209
0
}
1210
1211
/*
1212
 * Concatenate a relative URL onto a base URL making it absolute.
1213
 */
1214
static CURLUcode redirect_url(const char *base, const char *relurl,
1215
                              CURLU *u, unsigned int flags)
1216
0
{
1217
0
  struct dynbuf urlbuf;
1218
0
  bool host_changed = FALSE;
1219
0
  const char *useurl = relurl;
1220
0
  const char *cutoff = NULL;
1221
0
  size_t prelen;
1222
0
  CURLUcode uc;
1223
1224
  /* protsep points to the start of the hostname, after [scheme]:// */
1225
0
  const char *protsep = base + strlen(u->scheme) + 3;
1226
0
  DEBUGASSERT(base && relurl && u); /* all set here */
1227
0
  if(!base)
1228
0
    return CURLUE_MALFORMED_INPUT; /* should never happen */
1229
1230
  /* handle different relative URL types */
1231
0
  switch(relurl[0]) {
1232
0
  case '/':
1233
0
    if(relurl[1] == '/') {
1234
      /* protocol-relative URL: //example.com/path */
1235
0
      cutoff = protsep;
1236
0
      useurl = &relurl[2];
1237
0
      host_changed = TRUE;
1238
0
    }
1239
0
    else
1240
      /* absolute /path */
1241
0
      cutoff = strchr(protsep, '/');
1242
0
    break;
1243
1244
0
  case '#':
1245
    /* fragment-only change */
1246
0
    if(u->fragment)
1247
0
      cutoff = strchr(protsep, '#');
1248
0
    break;
1249
1250
0
  default:
1251
    /* path or query-only change */
1252
0
    if(u->query && u->query[0])
1253
      /* remove existing query */
1254
0
      cutoff = strchr(protsep, '?');
1255
0
    else if(u->fragment && u->fragment[0])
1256
      /* Remove existing fragment */
1257
0
      cutoff = strchr(protsep, '#');
1258
1259
0
    if(relurl[0] != '?') {
1260
      /* append a relative path after the last slash */
1261
0
      cutoff = memrchr(protsep, '/',
1262
0
                       cutoff ? (size_t)(cutoff - protsep) : strlen(protsep));
1263
0
      if(cutoff)
1264
0
        cutoff++; /* truncate after last slash */
1265
0
    }
1266
0
    break;
1267
0
  }
1268
1269
0
  prelen = cutoff ? (size_t)(cutoff - base) : strlen(base);
1270
1271
  /* build new URL */
1272
0
  curlx_dyn_init(&urlbuf, CURL_MAX_INPUT_LENGTH);
1273
1274
0
  if(!curlx_dyn_addn(&urlbuf, base, prelen) &&
1275
0
     !urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed, FALSE)) {
1276
0
    uc = parseurl_and_replace(curlx_dyn_ptr(&urlbuf), u,
1277
0
                              flags & ~U_CURLU_PATH_AS_IS);
1278
0
  }
1279
0
  else
1280
0
    uc = CURLUE_OUT_OF_MEMORY;
1281
1282
0
  curlx_dyn_free(&urlbuf);
1283
0
  return uc;
1284
0
}
1285
1286
/*
1287
 */
1288
CURLU *curl_url(void)
1289
0
{
1290
0
  return curlx_calloc(1, sizeof(struct Curl_URL));
1291
0
}
1292
1293
void curl_url_cleanup(CURLU *u)
1294
0
{
1295
0
  if(u) {
1296
0
    free_urlhandle(u);
1297
0
    curlx_free(u);
1298
0
  }
1299
0
}
1300
1301
#define DUP(dest, src, name)                    \
1302
0
  do {                                          \
1303
0
    if((src)->name) {                           \
1304
0
      (dest)->name = curlx_strdup((src)->name); \
1305
0
      if(!(dest)->name)                         \
1306
0
        goto fail;                              \
1307
0
    }                                           \
1308
0
  } while(0)
1309
1310
CURLU *curl_url_dup(const CURLU *in)
1311
0
{
1312
0
  struct Curl_URL *u = curlx_calloc(1, sizeof(struct Curl_URL));
1313
0
  if(u) {
1314
0
    DUP(u, in, scheme);
1315
0
    DUP(u, in, user);
1316
0
    DUP(u, in, password);
1317
0
    DUP(u, in, options);
1318
0
    DUP(u, in, host);
1319
0
    DUP(u, in, port);
1320
0
    DUP(u, in, path);
1321
0
    DUP(u, in, query);
1322
0
    DUP(u, in, fragment);
1323
0
    DUP(u, in, zoneid);
1324
0
    u->portnum = in->portnum;
1325
0
    u->fragment_present = in->fragment_present;
1326
0
    u->query_present = in->query_present;
1327
0
  }
1328
0
  return u;
1329
0
fail:
1330
0
  curl_url_cleanup(u);
1331
0
  return NULL;
1332
0
}
1333
1334
#ifndef USE_IDN
1335
0
#define host_decode(x, y) CURLUE_LACKS_IDN
1336
0
#define host_encode(x, y) CURLUE_LACKS_IDN
1337
#else
1338
static CURLUcode host_decode(const char *host, char **allochost)
1339
{
1340
  CURLcode result = Curl_idn_decode(host, allochost);
1341
  if(result)
1342
    return (result == CURLE_OUT_OF_MEMORY) ?
1343
      CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1344
  return CURLUE_OK;
1345
}
1346
1347
static CURLUcode host_encode(const char *host, char **allochost)
1348
{
1349
  CURLcode result = Curl_idn_encode(host, allochost);
1350
  if(result)
1351
    return (result == CURLE_OUT_OF_MEMORY) ?
1352
      CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1353
  return CURLUE_OK;
1354
}
1355
#endif
1356
1357
static CURLUcode urlget_format(const CURLU *u, CURLUPart what,
1358
                               const char *ptr, char **partp,
1359
                               bool plusdecode, unsigned int flags)
1360
0
{
1361
0
  CURLUcode uc = CURLUE_OK;
1362
0
  size_t partlen = strlen(ptr);
1363
0
  bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0;
1364
0
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1365
0
  bool punycode = (flags & CURLU_PUNYCODE) && (what == CURLUPART_HOST);
1366
0
  bool depunyfy = (flags & CURLU_PUNY2IDN) && (what == CURLUPART_HOST);
1367
0
  char *part = curlx_memdup0(ptr, partlen);
1368
0
  *partp = NULL;
1369
0
  if(!part)
1370
0
    return CURLUE_OUT_OF_MEMORY;
1371
0
  if(plusdecode) {
1372
    /* convert + to space */
1373
0
    char *plus = part;
1374
0
    size_t i = 0;
1375
0
    for(i = 0; i < partlen; ++plus, i++) {
1376
0
      if(*plus == '+')
1377
0
        *plus = ' ';
1378
0
    }
1379
0
  }
1380
0
  if(urldecode) {
1381
0
    char *decoded;
1382
0
    size_t dlen;
1383
    /* this unconditional rejection of control bytes is documented
1384
       API behavior */
1385
0
    CURLcode res = Curl_urldecode(part, partlen, &decoded, &dlen, REJECT_CTRL);
1386
0
    curlx_free(part);
1387
0
    if(res)
1388
0
      return CURLUE_URLDECODE;
1389
0
    part = decoded;
1390
0
    partlen = dlen;
1391
0
  }
1392
0
  if(urlencode) {
1393
0
    struct dynbuf enc;
1394
0
    curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1395
0
    uc = urlencode_str(&enc, part, partlen, TRUE, what == CURLUPART_QUERY);
1396
0
    curlx_free(part);
1397
0
    if(uc)
1398
0
      return uc;
1399
0
    part = curlx_dyn_ptr(&enc);
1400
0
  }
1401
0
  else if(punycode) {
1402
0
    if(!Curl_is_ASCII_name(u->host)) {
1403
0
      char *punyversion = NULL;
1404
0
      uc = host_decode(part, &punyversion);
1405
0
      curlx_free(part);
1406
0
      if(uc)
1407
0
        return uc;
1408
0
      part = punyversion;
1409
0
    }
1410
0
  }
1411
0
  else if(depunyfy) {
1412
0
    if(Curl_is_ASCII_name(u->host)) {
1413
0
      char *unpunified = NULL;
1414
0
      uc = host_encode(part, &unpunified);
1415
0
      curlx_free(part);
1416
0
      if(uc)
1417
0
        return uc;
1418
0
      part = unpunified;
1419
0
    }
1420
0
  }
1421
0
  *partp = part;
1422
0
  return CURLUE_OK;
1423
0
}
1424
1425
static CURLUcode urlget_url(const CURLU *u, char **part, unsigned int flags)
1426
0
{
1427
0
  char *url;
1428
0
  const char *scheme;
1429
0
  char *options = u->options;
1430
0
  char *port = u->port;
1431
0
  char *allochost = NULL;
1432
0
  bool show_fragment =
1433
0
    u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
1434
0
  bool show_query = (u->query && u->query[0]) ||
1435
0
    (u->query_present && flags & CURLU_GET_EMPTY);
1436
0
  bool punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
1437
0
  bool depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
1438
0
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1439
0
  char portbuf[7];
1440
0
  if(u->scheme && curl_strequal("file", u->scheme)) {
1441
0
    url = curl_maprintf("file://%s%s%s%s%s",
1442
0
                        u->path,
1443
0
                        show_query ? "?" : "",
1444
0
                        u->query ? u->query : "",
1445
0
                        show_fragment ? "#" : "",
1446
0
                        u->fragment ? u->fragment : "");
1447
0
  }
1448
0
  else if(!u->host)
1449
0
    return CURLUE_NO_HOST;
1450
0
  else {
1451
0
    const struct Curl_scheme *h = NULL;
1452
0
    char schemebuf[MAX_SCHEME_LEN + 5];
1453
0
    if(u->scheme)
1454
0
      scheme = u->scheme;
1455
0
    else if(flags & CURLU_DEFAULT_SCHEME)
1456
0
      scheme = DEFAULT_SCHEME;
1457
0
    else
1458
0
      return CURLUE_NO_SCHEME;
1459
1460
0
    h = Curl_get_scheme(scheme);
1461
0
    if(!port && (flags & CURLU_DEFAULT_PORT)) {
1462
      /* there is no stored port number, but asked to deliver
1463
         a default one for the scheme */
1464
0
      if(h) {
1465
0
        curl_msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1466
0
        port = portbuf;
1467
0
      }
1468
0
    }
1469
0
    else if(port) {
1470
      /* there is a stored port number, but asked to inhibit if it matches
1471
         the default one for the scheme */
1472
0
      if(h && (h->defport == u->portnum) &&
1473
0
         (flags & CURLU_NO_DEFAULT_PORT))
1474
0
        port = NULL;
1475
0
    }
1476
1477
0
    if(h && !(h->flags & PROTOPT_URLOPTIONS))
1478
0
      options = NULL;
1479
1480
0
    if(u->host[0] == '[') {
1481
0
      if(u->zoneid) {
1482
        /* make it '[ host %25 zoneid ]' */
1483
0
        struct dynbuf enc;
1484
0
        size_t hostlen = strlen(u->host);
1485
0
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1486
0
        if(curlx_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1487
0
                          u->zoneid))
1488
0
          return CURLUE_OUT_OF_MEMORY;
1489
0
        allochost = curlx_dyn_ptr(&enc);
1490
0
      }
1491
0
    }
1492
0
    else if(urlencode) {
1493
0
      allochost = curl_easy_escape(NULL, u->host, 0);
1494
0
      if(!allochost)
1495
0
        return CURLUE_OUT_OF_MEMORY;
1496
0
    }
1497
0
    else if(punycode) {
1498
0
      if(!Curl_is_ASCII_name(u->host)) {
1499
0
        CURLUcode ret = host_decode(u->host, &allochost);
1500
0
        if(ret)
1501
0
          return ret;
1502
0
      }
1503
0
    }
1504
0
    else if(depunyfy) {
1505
0
      if(Curl_is_ASCII_name(u->host)) {
1506
0
        CURLUcode ret = host_encode(u->host, &allochost);
1507
0
        if(ret)
1508
0
          return ret;
1509
0
      }
1510
0
    }
1511
1512
0
    if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme)
1513
0
      curl_msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
1514
0
    else
1515
0
      schemebuf[0] = 0;
1516
1517
0
    url = curl_maprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1518
0
                        schemebuf,
1519
0
                        u->user ? u->user : "",
1520
0
                        u->password ? ":" : "",
1521
0
                        u->password ? u->password : "",
1522
0
                        options ? ";" : "",
1523
0
                        options ? options : "",
1524
0
                        (u->user || u->password || options) ? "@" : "",
1525
0
                        allochost ? allochost : u->host,
1526
0
                        port ? ":" : "",
1527
0
                        port ? port : "",
1528
0
                        u->path ? u->path : "/",
1529
0
                        show_query ? "?" : "",
1530
0
                        u->query ? u->query : "",
1531
0
                        show_fragment ? "#" : "",
1532
0
                        u->fragment ? u->fragment : "");
1533
0
    curlx_free(allochost);
1534
0
  }
1535
0
  if(!url)
1536
0
    return CURLUE_OUT_OF_MEMORY;
1537
0
  *part = url;
1538
0
  return CURLUE_OK;
1539
0
}
1540
1541
CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1542
                       char **part, unsigned int flags)
1543
0
{
1544
0
  const char *ptr;
1545
0
  CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1546
0
  char portbuf[7];
1547
0
  bool plusdecode = FALSE;
1548
0
  if(!u)
1549
0
    return CURLUE_BAD_HANDLE;
1550
0
  if(!part)
1551
0
    return CURLUE_BAD_PARTPOINTER;
1552
0
  *part = NULL;
1553
1554
0
  switch(what) {
1555
0
  case CURLUPART_SCHEME:
1556
0
    ptr = u->scheme;
1557
0
    ifmissing = CURLUE_NO_SCHEME;
1558
0
    flags &= ~U_CURLU_URLDECODE; /* never for schemes */
1559
0
    if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme)
1560
0
      return CURLUE_NO_SCHEME;
1561
0
    break;
1562
0
  case CURLUPART_USER:
1563
0
    ptr = u->user;
1564
0
    ifmissing = CURLUE_NO_USER;
1565
0
    break;
1566
0
  case CURLUPART_PASSWORD:
1567
0
    ptr = u->password;
1568
0
    ifmissing = CURLUE_NO_PASSWORD;
1569
0
    break;
1570
0
  case CURLUPART_OPTIONS:
1571
0
    ptr = u->options;
1572
0
    ifmissing = CURLUE_NO_OPTIONS;
1573
0
    break;
1574
0
  case CURLUPART_HOST:
1575
0
    ptr = u->host;
1576
0
    ifmissing = CURLUE_NO_HOST;
1577
0
    break;
1578
0
  case CURLUPART_ZONEID:
1579
0
    ptr = u->zoneid;
1580
0
    ifmissing = CURLUE_NO_ZONEID;
1581
0
    break;
1582
0
  case CURLUPART_PORT:
1583
0
    ptr = u->port;
1584
0
    ifmissing = CURLUE_NO_PORT;
1585
0
    flags &= ~U_CURLU_URLDECODE; /* never for port */
1586
0
    if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1587
      /* there is no stored port number, but asked to deliver
1588
         a default one for the scheme */
1589
0
      const struct Curl_scheme *h = Curl_get_scheme(u->scheme);
1590
0
      if(h) {
1591
0
        curl_msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1592
0
        ptr = portbuf;
1593
0
      }
1594
0
    }
1595
0
    else if(ptr && u->scheme) {
1596
      /* there is a stored port number, but ask to inhibit if
1597
         it matches the default one for the scheme */
1598
0
      const struct Curl_scheme *h = Curl_get_scheme(u->scheme);
1599
0
      if(h && (h->defport == u->portnum) &&
1600
0
         (flags & CURLU_NO_DEFAULT_PORT))
1601
0
        ptr = NULL;
1602
0
    }
1603
0
    break;
1604
0
  case CURLUPART_PATH:
1605
0
    ptr = u->path;
1606
0
    if(!ptr)
1607
0
      ptr = "/";
1608
0
    break;
1609
0
  case CURLUPART_QUERY:
1610
0
    ptr = u->query;
1611
0
    ifmissing = CURLUE_NO_QUERY;
1612
0
    plusdecode = flags & CURLU_URLDECODE;
1613
0
    if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
1614
      /* there was a blank query and the user do not ask for it */
1615
0
      ptr = NULL;
1616
0
    break;
1617
0
  case CURLUPART_FRAGMENT:
1618
0
    ptr = u->fragment;
1619
0
    ifmissing = CURLUE_NO_FRAGMENT;
1620
0
    if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
1621
      /* there was a blank fragment and the user asks for it */
1622
0
      ptr = "";
1623
0
    break;
1624
0
  case CURLUPART_URL:
1625
0
    return urlget_url(u, part, flags);
1626
0
  default:
1627
0
    ptr = NULL;
1628
0
    break;
1629
0
  }
1630
0
  if(ptr)
1631
0
    return urlget_format(u, what, ptr, part, plusdecode, flags);
1632
1633
0
  return ifmissing;
1634
0
}
1635
1636
static CURLUcode set_url_scheme(CURLU *u, const char *scheme,
1637
                                unsigned int flags)
1638
0
{
1639
0
  size_t plen = strlen(scheme);
1640
0
  const struct Curl_scheme *h = NULL;
1641
0
  if((plen > MAX_SCHEME_LEN) || (plen < 1))
1642
    /* too long or too short */
1643
0
    return CURLUE_BAD_SCHEME;
1644
  /* verify that it is a fine scheme */
1645
0
  h = Curl_get_scheme(scheme);
1646
0
  if(!(flags & CURLU_NON_SUPPORT_SCHEME) && (!h || !h->run))
1647
0
    return CURLUE_UNSUPPORTED_SCHEME;
1648
0
  if(!h) {
1649
0
    const char *s = scheme;
1650
0
    if(ISALPHA(*s)) {
1651
      /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1652
0
      while(--plen) {
1653
0
        if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1654
0
          s++; /* fine */
1655
0
        else
1656
0
          return CURLUE_BAD_SCHEME;
1657
0
      }
1658
0
    }
1659
0
    else
1660
0
      return CURLUE_BAD_SCHEME;
1661
0
  }
1662
0
  u->guessed_scheme = FALSE;
1663
0
  return CURLUE_OK;
1664
0
}
1665
1666
static CURLUcode set_url_port(CURLU *u, const char *provided_port)
1667
0
{
1668
0
  char *tmp;
1669
0
  curl_off_t port;
1670
0
  if(!ISDIGIT(provided_port[0]))
1671
    /* not a number */
1672
0
    return CURLUE_BAD_PORT_NUMBER;
1673
0
  if(curlx_str_number(&provided_port, &port, 0xffff) || *provided_port)
1674
    /* weirdly provided number, not good! */
1675
0
    return CURLUE_BAD_PORT_NUMBER;
1676
0
  tmp = curl_maprintf("%" CURL_FORMAT_CURL_OFF_T, port);
1677
0
  if(!tmp)
1678
0
    return CURLUE_OUT_OF_MEMORY;
1679
0
  curlx_free(u->port);
1680
0
  u->port = tmp;
1681
0
  u->portnum = (unsigned short)port;
1682
0
  return CURLUE_OK;
1683
0
}
1684
1685
static CURLUcode set_url(CURLU *u, const char *url, size_t part_size,
1686
                         unsigned int flags)
1687
0
{
1688
  /*
1689
   * Allow a new URL to replace the existing (if any) contents.
1690
   *
1691
   * If the existing contents is enough for a URL, allow a relative URL to
1692
   * replace it.
1693
   */
1694
0
  CURLUcode uc;
1695
0
  char *oldurl = NULL;
1696
1697
0
  if(!part_size) {
1698
    /* a blank URL is not a valid URL unless we already have a complete one
1699
       and this is a redirect */
1700
0
    uc = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1701
0
    if(!uc) {
1702
      /* success, meaning the "" is a fine relative URL, but nothing
1703
         changes */
1704
0
      curlx_free(oldurl);
1705
0
      return CURLUE_OK;
1706
0
    }
1707
0
    if(uc == CURLUE_OUT_OF_MEMORY)
1708
0
      return uc;
1709
0
    return CURLUE_MALFORMED_INPUT;
1710
0
  }
1711
1712
  /* if the new URL is absolute replace the existing with the new. */
1713
0
  if(Curl_is_absolute_url(url, NULL, 0,
1714
0
                          flags & (CURLU_GUESS_SCHEME | CURLU_DEFAULT_SCHEME)))
1715
0
    return parseurl_and_replace(url, u, flags);
1716
1717
  /* if the old URL is incomplete (we cannot get an absolute URL in
1718
     'oldurl'), replace the existing with the new */
1719
0
  uc = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1720
0
  if(uc == CURLUE_OUT_OF_MEMORY)
1721
0
    return uc;
1722
0
  else if(uc)
1723
0
    return parseurl_and_replace(url, u, flags);
1724
1725
0
  DEBUGASSERT(oldurl); /* it is set here */
1726
  /* apply the relative part to create a new URL */
1727
0
  uc = redirect_url(oldurl, url, u, flags);
1728
0
  curlx_free(oldurl);
1729
0
  return uc;
1730
0
}
1731
1732
static CURLUcode urlset_clear(CURLU *u, CURLUPart what)
1733
0
{
1734
0
  switch(what) {
1735
0
  case CURLUPART_URL:
1736
0
    free_urlhandle(u);
1737
0
    memset(u, 0, sizeof(struct Curl_URL));
1738
0
    break;
1739
0
  case CURLUPART_SCHEME:
1740
0
    Curl_safefree(u->scheme);
1741
0
    u->guessed_scheme = FALSE;
1742
0
    break;
1743
0
  case CURLUPART_USER:
1744
0
    Curl_safefree(u->user);
1745
0
    break;
1746
0
  case CURLUPART_PASSWORD:
1747
0
    Curl_safefree(u->password);
1748
0
    break;
1749
0
  case CURLUPART_OPTIONS:
1750
0
    Curl_safefree(u->options);
1751
0
    break;
1752
0
  case CURLUPART_HOST:
1753
0
    Curl_safefree(u->host);
1754
0
    break;
1755
0
  case CURLUPART_ZONEID:
1756
0
    Curl_safefree(u->zoneid);
1757
0
    break;
1758
0
  case CURLUPART_PORT:
1759
0
    u->portnum = 0;
1760
0
    Curl_safefree(u->port);
1761
0
    break;
1762
0
  case CURLUPART_PATH:
1763
0
    Curl_safefree(u->path);
1764
0
    break;
1765
0
  case CURLUPART_QUERY:
1766
0
    Curl_safefree(u->query);
1767
0
    u->query_present = FALSE;
1768
0
    break;
1769
0
  case CURLUPART_FRAGMENT:
1770
0
    Curl_safefree(u->fragment);
1771
0
    u->fragment_present = FALSE;
1772
0
    break;
1773
0
  default:
1774
0
    return CURLUE_UNKNOWN_PART;
1775
0
  }
1776
0
  return CURLUE_OK;
1777
0
}
1778
1779
static bool allowed_in_path(unsigned char x)
1780
0
{
1781
0
  switch(x) {
1782
0
  case '!':
1783
0
  case '$':
1784
0
  case '&':
1785
0
  case '\'':
1786
0
  case '(':
1787
0
  case ')':
1788
0
  case '{':
1789
0
  case '}':
1790
0
  case '[':
1791
0
  case ']':
1792
0
  case '*':
1793
0
  case '+':
1794
0
  case ',':
1795
0
  case ';':
1796
0
  case '=':
1797
0
  case ':':
1798
0
  case '@':
1799
0
  case '/':
1800
0
    return TRUE;
1801
0
  }
1802
0
  return FALSE;
1803
0
}
1804
1805
CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1806
                       const char *part, unsigned int flags)
1807
0
{
1808
0
  char **storep = NULL;
1809
0
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1810
0
  bool plusencode = FALSE;
1811
0
  bool pathmode = FALSE;
1812
0
  bool leadingslash = FALSE;
1813
0
  bool appendquery = FALSE;
1814
0
  bool equalsencode = FALSE;
1815
0
  size_t nalloc;
1816
1817
0
  if(!u)
1818
0
    return CURLUE_BAD_HANDLE;
1819
0
  if(!part)
1820
    /* setting a part to NULL clears it */
1821
0
    return urlset_clear(u, what);
1822
1823
0
  nalloc = strlen(part);
1824
0
  if(nalloc > CURL_MAX_INPUT_LENGTH)
1825
    /* excessive input length */
1826
0
    return CURLUE_MALFORMED_INPUT;
1827
1828
0
  switch(what) {
1829
0
  case CURLUPART_SCHEME: {
1830
0
    CURLUcode status = set_url_scheme(u, part, flags);
1831
0
    if(status)
1832
0
      return status;
1833
0
    storep = &u->scheme;
1834
0
    urlencode = FALSE; /* never */
1835
0
    break;
1836
0
  }
1837
0
  case CURLUPART_USER:
1838
0
    storep = &u->user;
1839
0
    break;
1840
0
  case CURLUPART_PASSWORD:
1841
0
    storep = &u->password;
1842
0
    break;
1843
0
  case CURLUPART_OPTIONS:
1844
0
    storep = &u->options;
1845
0
    break;
1846
0
  case CURLUPART_HOST:
1847
0
    storep = &u->host;
1848
0
    Curl_safefree(u->zoneid);
1849
0
    break;
1850
0
  case CURLUPART_ZONEID:
1851
0
    storep = &u->zoneid;
1852
0
    break;
1853
0
  case CURLUPART_PORT:
1854
0
    return set_url_port(u, part);
1855
0
  case CURLUPART_PATH:
1856
0
    pathmode = TRUE;
1857
0
    leadingslash = TRUE; /* enforce */
1858
0
    storep = &u->path;
1859
0
    break;
1860
0
  case CURLUPART_QUERY:
1861
0
    plusencode = urlencode;
1862
0
    appendquery = (flags & CURLU_APPENDQUERY) ? 1 : 0;
1863
0
    equalsencode = appendquery;
1864
0
    storep = &u->query;
1865
0
    u->query_present = TRUE;
1866
0
    break;
1867
0
  case CURLUPART_FRAGMENT:
1868
0
    storep = &u->fragment;
1869
0
    u->fragment_present = TRUE;
1870
0
    break;
1871
0
  case CURLUPART_URL:
1872
0
    return set_url(u, part, nalloc, flags);
1873
0
  default:
1874
0
    return CURLUE_UNKNOWN_PART;
1875
0
  }
1876
0
  DEBUGASSERT(storep);
1877
0
  {
1878
0
    const char *newp;
1879
0
    struct dynbuf enc;
1880
0
    curlx_dyn_init(&enc, (nalloc * 3) + 1 + leadingslash);
1881
1882
0
    if(leadingslash && (part[0] != '/')) {
1883
0
      CURLcode result = curlx_dyn_addn(&enc, "/", 1);
1884
0
      if(result)
1885
0
        return cc2cu(result);
1886
0
    }
1887
0
    if(urlencode) {
1888
0
      const unsigned char *i;
1889
1890
0
      for(i = (const unsigned char *)part; *i; i++) {
1891
0
        CURLcode result;
1892
0
        if((*i == ' ') && plusencode) {
1893
0
          result = curlx_dyn_addn(&enc, "+", 1);
1894
0
          if(result)
1895
0
            return CURLUE_OUT_OF_MEMORY;
1896
0
        }
1897
0
        else if(ISUNRESERVED(*i) ||
1898
0
                (pathmode && allowed_in_path(*i)) ||
1899
0
                ((*i == '=') && equalsencode)) {
1900
0
          if((*i == '=') && equalsencode)
1901
            /* only skip the first equals sign */
1902
0
            equalsencode = FALSE;
1903
0
          result = curlx_dyn_addn(&enc, i, 1);
1904
0
          if(result)
1905
0
            return cc2cu(result);
1906
0
        }
1907
0
        else {
1908
0
          unsigned char out[3] = { '%' };
1909
0
          Curl_hexbyte(&out[1], *i);
1910
0
          result = curlx_dyn_addn(&enc, out, 3);
1911
0
          if(result)
1912
0
            return cc2cu(result);
1913
0
        }
1914
0
      }
1915
0
    }
1916
0
    else {
1917
0
      char *p;
1918
0
      CURLcode result = curlx_dyn_add(&enc, part);
1919
0
      if(result)
1920
0
        return cc2cu(result);
1921
0
      p = curlx_dyn_ptr(&enc);
1922
0
      while(*p) {
1923
        /* make sure percent encoded are lower case */
1924
0
        if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1925
0
           (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1926
0
          p[1] = Curl_raw_tolower(p[1]);
1927
0
          p[2] = Curl_raw_tolower(p[2]);
1928
0
          p += 3;
1929
0
        }
1930
0
        else
1931
0
          p++;
1932
0
      }
1933
0
    }
1934
0
    newp = curlx_dyn_ptr(&enc);
1935
1936
0
    if(appendquery && newp) {
1937
      /* Append the 'newp' string onto the old query. Add a '&' separator if
1938
         none is present at the end of the existing query already */
1939
1940
0
      size_t querylen = u->query ? strlen(u->query) : 0;
1941
0
      bool addamperand = querylen && (u->query[querylen - 1] != '&');
1942
0
      if(querylen) {
1943
0
        struct dynbuf qbuf;
1944
0
        curlx_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1945
1946
0
        if(curlx_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1947
0
          goto nomem;
1948
1949
0
        if(addamperand) {
1950
0
          if(curlx_dyn_addn(&qbuf, "&", 1))
1951
0
            goto nomem;
1952
0
        }
1953
0
        if(curlx_dyn_add(&qbuf, newp))
1954
0
          goto nomem;
1955
0
        curlx_dyn_free(&enc);
1956
0
        curlx_free(*storep);
1957
0
        *storep = curlx_dyn_ptr(&qbuf);
1958
0
        return CURLUE_OK;
1959
0
nomem:
1960
0
        curlx_dyn_free(&enc);
1961
0
        return CURLUE_OUT_OF_MEMORY;
1962
0
      }
1963
0
    }
1964
1965
0
    else if(what == CURLUPART_HOST) {
1966
0
      size_t n = curlx_dyn_len(&enc);
1967
0
      if(!n && (flags & CURLU_NO_AUTHORITY)) {
1968
        /* Skip hostname check, it is allowed to be empty. */
1969
0
      }
1970
0
      else {
1971
0
        bool bad = FALSE;
1972
0
        if(!n)
1973
0
          bad = TRUE; /* empty hostname is not okay */
1974
0
        else if(!urlencode) {
1975
          /* if the hostname part was not URL encoded here, it was set ready
1976
             URL encoded so we need to decode it to check */
1977
0
          size_t dlen;
1978
0
          char *decoded = NULL;
1979
0
          CURLcode result =
1980
0
            Curl_urldecode(newp, n, &decoded, &dlen, REJECT_CTRL);
1981
0
          if(result || hostname_check(u, decoded, dlen))
1982
0
            bad = TRUE;
1983
0
          curlx_free(decoded);
1984
0
        }
1985
0
        else if(hostname_check(u, (char *)CURL_UNCONST(newp), n))
1986
0
          bad = TRUE;
1987
0
        if(bad) {
1988
0
          curlx_dyn_free(&enc);
1989
0
          return CURLUE_BAD_HOSTNAME;
1990
0
        }
1991
0
      }
1992
0
    }
1993
1994
0
    curlx_free(*storep);
1995
0
    *storep = (char *)CURL_UNCONST(newp);
1996
0
  }
1997
0
  return CURLUE_OK;
1998
0
}