Coverage Report

Created: 2026-05-16 08:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/curl/lib/urlapi.c
Line
Count
Source
1
/***************************************************************************
2
 *                                  _   _ ____  _
3
 *  Project                     ___| | | |  _ \| |
4
 *                             / __| | | | |_) | |
5
 *                            | (__| |_| |  _ <| |___
6
 *                             \___|\___/|_| \_\_____|
7
 *
8
 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9
 *
10
 * This software is licensed as described in the file COPYING, which
11
 * you should have received as part of this distribution. The terms
12
 * are also available at https://curl.se/docs/copyright.html.
13
 *
14
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15
 * copies of the Software, and permit persons to whom the Software is
16
 * furnished to do so, under the terms of the COPYING file.
17
 *
18
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19
 * KIND, either express or implied.
20
 *
21
 * SPDX-License-Identifier: curl
22
 *
23
 ***************************************************************************/
24
25
#include "curl_setup.h"
26
27
#include "urldata.h"
28
#include "urlapi-int.h"
29
#include "strcase.h"
30
#include "url.h"
31
#include "escape.h"
32
#include "curl_ctype.h"
33
#include "curlx/inet_pton.h"
34
#include "curlx/inet_ntop.h"
35
#include "strdup.h"
36
#include "idn.h"
37
#include "curlx/strparse.h"
38
#include "curl_memrchr.h"
39
40
/* The last 2 #include files should be in this order */
41
#include "curl_memory.h"
42
#include "memdebug.h"
43
44
#ifdef _WIN32
45
  /* MS-DOS/Windows style drive prefix, eg c: in c:foo */
46
#define STARTS_WITH_DRIVE_PREFIX(str) \
47
  ((('a' <= str[0] && str[0] <= 'z') || \
48
    ('A' <= str[0] && str[0] <= 'Z')) && \
49
   (str[1] == ':'))
50
#endif
51
52
  /* MS-DOS/Windows style drive prefix, optionally with
53
   * a '|' instead of ':', followed by a slash or NUL */
54
#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
55
8.36k
  ((('a' <= (str)[0] && (str)[0] <= 'z') || \
56
8.36k
    ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
57
8.36k
   ((str)[1] == ':' || (str)[1] == '|') && \
58
8.36k
   ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
59
60
/* scheme is not URL encoded, the longest libcurl supported ones are... */
61
1.59M
#define MAX_SCHEME_LEN 40
62
63
/*
64
 * If USE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
65
 * sure we have _some_ value for AF_INET6 without polluting our fake value
66
 * everywhere.
67
 */
68
#if !defined(USE_IPV6) && !defined(AF_INET6)
69
#define AF_INET6 (AF_INET + 1)
70
#endif
71
72
/* Internal representation of CURLU. Point to URL-encoded strings. */
73
struct Curl_URL {
74
  char *scheme;
75
  char *user;
76
  char *password;
77
  char *options; /* IMAP only? */
78
  char *host;
79
  char *zoneid; /* for numerical IPv6 addresses */
80
  char *port;
81
  char *path;
82
  char *query;
83
  char *fragment;
84
  unsigned short portnum; /* the numerical version (if 'port' is set) */
85
  BIT(query_present);    /* to support blank */
86
  BIT(fragment_present); /* to support blank */
87
  BIT(guessed_scheme);   /* when a URL without scheme is parsed */
88
};
89
90
0
#define DEFAULT_SCHEME "https"
91
92
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
93
                                      unsigned int flags);
94
95
static void free_urlhandle(struct Curl_URL *u)
96
594k
{
97
594k
  free(u->scheme);
98
594k
  free(u->user);
99
594k
  free(u->password);
100
594k
  free(u->options);
101
594k
  free(u->host);
102
594k
  free(u->zoneid);
103
594k
  free(u->port);
104
594k
  free(u->path);
105
594k
  free(u->query);
106
594k
  free(u->fragment);
107
594k
}
108
109
/*
110
 * Find the separator at the end of the hostname, or the '?' in cases like
111
 * http://www.example.com?id=2380
112
 */
113
static const char *find_host_sep(const char *url)
114
0
{
115
  /* Find the start of the hostname */
116
0
  const char *sep = strstr(url, "//");
117
0
  if(!sep)
118
0
    sep = url;
119
0
  else
120
0
    sep += 2;
121
122
  /* Find first / or ? */
123
0
  while(*sep && *sep != '/' && *sep != '?')
124
0
    sep++;
125
126
0
  return sep;
127
0
}
128
129
/* convert CURLcode to CURLUcode */
130
0
#define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE :   \
131
0
                  CURLUE_OUT_OF_MEMORY)
132
133
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
134
 * spaces in the source URL accordingly.
135
 *
136
 * URL encoding should be skipped for hostnames, otherwise IDN resolution
137
 * will fail.
138
 */
139
static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
140
                               size_t len, bool relative,
141
                               bool query)
142
122k
{
143
  /* we must add this with whitespace-replacing */
144
122k
  bool left = !query;
145
122k
  const unsigned char *iptr;
146
122k
  const unsigned char *host_sep = (const unsigned char *) url;
147
122k
  CURLcode result = CURLE_OK;
148
149
122k
  if(!relative) {
150
0
    size_t n;
151
0
    host_sep = (const unsigned char *) find_host_sep(url);
152
153
    /* output the first piece as-is */
154
0
    n = (const char *)host_sep - url;
155
0
    result = curlx_dyn_addn(o, url, n);
156
0
    len -= n;
157
0
  }
158
159
34.0M
  for(iptr = host_sep; len && !result; iptr++, len--) {
160
33.9M
    if(*iptr == ' ') {
161
0
      if(left)
162
0
        result = curlx_dyn_addn(o, "%20", 3);
163
0
      else
164
0
        result = curlx_dyn_addn(o, "+", 1);
165
0
    }
166
33.9M
    else if((*iptr < ' ') || (*iptr >= 0x7f)) {
167
4.85M
      unsigned char out[3]={'%'};
168
4.85M
      Curl_hexbyte(&out[1], *iptr);
169
4.85M
      result = curlx_dyn_addn(o, out, 3);
170
4.85M
    }
171
29.0M
    else {
172
29.0M
      result = curlx_dyn_addn(o, iptr, 1);
173
29.0M
      if(*iptr == '?')
174
0
        left = FALSE;
175
29.0M
    }
176
33.9M
  }
177
178
122k
  if(result)
179
0
    return cc2cu(result);
180
122k
  return CURLUE_OK;
181
122k
}
182
183
/*
184
 * Returns the length of the scheme if the given URL is absolute (as opposed
185
 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
186
 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
187
 *
188
 * If 'guess_scheme' is TRUE, it means the URL might be provided without
189
 * scheme.
190
 */
191
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
192
                            bool guess_scheme)
193
499k
{
194
499k
  size_t i = 0;
195
499k
  DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
196
499k
  (void)buflen; /* only used in debug-builds */
197
499k
  if(buf)
198
201k
    buf[0] = 0; /* always leave a defined value in buf */
199
#ifdef _WIN32
200
  if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
201
    return 0;
202
#endif
203
499k
  if(ISALPHA(url[0]))
204
1.59M
    for(i = 1; i < MAX_SCHEME_LEN; ++i) {
205
1.58M
      char s = url[i];
206
1.58M
      if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
207
        /* RFC 3986 3.1 explains:
208
           scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
209
        */
210
1.24M
      }
211
341k
      else {
212
341k
        break;
213
341k
      }
214
1.58M
    }
215
499k
  if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
216
    /* If this does not guess scheme, the scheme always ends with the colon so
217
       that this also detects data: URLs etc. In guessing mode, data: could
218
       be the hostname "data" with a specified port number. */
219
220
    /* the length of the scheme is the name part only */
221
213k
    size_t len = i;
222
213k
    if(buf) {
223
89.4k
      Curl_strntolower(buf, url, i);
224
89.4k
      buf[i] = 0;
225
89.4k
    }
226
213k
    return len;
227
213k
  }
228
286k
  return 0;
229
499k
}
230
231
/*
232
 * Concatenate a relative URL onto a base URL making it absolute.
233
 */
234
static CURLUcode redirect_url(const char *base, const char *relurl,
235
                              CURLU *u, unsigned int flags)
236
0
{
237
0
  struct dynbuf urlbuf;
238
0
  bool host_changed = FALSE;
239
0
  const char *useurl = relurl;
240
0
  const char *cutoff = NULL;
241
0
  size_t prelen;
242
0
  CURLUcode uc;
243
244
  /* protsep points to the start of the hostname, after [scheme]:// */
245
0
  const char *protsep = base + strlen(u->scheme) + 3;
246
0
  DEBUGASSERT(base && relurl && u); /* all set here */
247
0
  if(!base)
248
0
    return CURLUE_MALFORMED_INPUT; /* should never happen */
249
250
  /* handle different relative URL types */
251
0
  switch(relurl[0]) {
252
0
  case '/':
253
0
    if(relurl[1] == '/') {
254
      /* protocol-relative URL: //example.com/path */
255
0
      cutoff = protsep;
256
0
      useurl = &relurl[2];
257
0
      host_changed = TRUE;
258
0
    }
259
0
    else
260
      /* absolute /path */
261
0
      cutoff = strchr(protsep, '/');
262
0
    break;
263
264
0
  case '#':
265
    /* fragment-only change */
266
0
    if(u->fragment)
267
0
      cutoff = strchr(protsep, '#');
268
0
    break;
269
270
0
  default:
271
    /* path or query-only change */
272
0
    if(u->query && u->query[0])
273
      /* remove existing query */
274
0
      cutoff = strchr(protsep, '?');
275
0
    else if(u->fragment && u->fragment[0])
276
      /* Remove existing fragment */
277
0
      cutoff = strchr(protsep, '#');
278
279
0
    if(relurl[0] != '?') {
280
      /* append a relative path after the last slash */
281
0
      cutoff = memrchr(protsep, '/',
282
0
                       cutoff ? (size_t)(cutoff - protsep) : strlen(protsep));
283
0
      if(cutoff)
284
0
        cutoff++; /* truncate after last slash */
285
0
    }
286
0
    break;
287
0
  }
288
289
0
  prelen = cutoff ? (size_t)(cutoff - base) : strlen(base);
290
291
  /* build new URL */
292
0
  curlx_dyn_init(&urlbuf, CURL_MAX_INPUT_LENGTH);
293
294
0
  if(!curlx_dyn_addn(&urlbuf, base, prelen) &&
295
0
     !urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed, FALSE)) {
296
0
    uc = parseurl_and_replace(curlx_dyn_ptr(&urlbuf), u,
297
0
                              flags & ~CURLU_PATH_AS_IS);
298
0
  }
299
0
  else
300
0
    uc = CURLUE_OUT_OF_MEMORY;
301
302
0
  curlx_dyn_free(&urlbuf);
303
0
  return uc;
304
0
}
305
306
/* scan for byte values <= 31, 127 and sometimes space */
307
CURLUcode Curl_junkscan(const char *url, size_t *urllen, bool allowspace)
308
297k
{
309
297k
  size_t n = strlen(url);
310
297k
  size_t i;
311
297k
  unsigned char control;
312
297k
  const unsigned char *p = (const unsigned char *)url;
313
297k
  if(n > CURL_MAX_INPUT_LENGTH)
314
0
    return CURLUE_MALFORMED_INPUT;
315
316
297k
  control = allowspace ? 0x1f : 0x20;
317
192M
  for(i = 0; i < n; i++) {
318
192M
    if(p[i] <= control || p[i] == 127)
319
95.9k
      return CURLUE_MALFORMED_INPUT;
320
192M
  }
321
201k
  *urllen = n;
322
201k
  return CURLUE_OK;
323
297k
}
324
325
/*
326
 * parse_hostname_login()
327
 *
328
 * Parse the login details (username, password and options) from the URL and
329
 * strip them out of the hostname
330
 *
331
 */
332
static CURLUcode parse_hostname_login(struct Curl_URL *u,
333
                                      const char *login,
334
                                      size_t len,
335
                                      unsigned int flags,
336
                                      size_t *offset) /* to the hostname */
337
186k
{
338
186k
  CURLUcode result = CURLUE_OK;
339
186k
  CURLcode ccode;
340
186k
  char *userp = NULL;
341
186k
  char *passwdp = NULL;
342
186k
  char *optionsp = NULL;
343
186k
  const struct Curl_handler *h = NULL;
344
345
  /* At this point, we assume all the other special cases have been taken
346
   * care of, so the host is at most
347
   *
348
   *   [user[:password][;options]]@]hostname
349
   *
350
   * We need somewhere to put the embedded details, so do that first.
351
   */
352
186k
  char *ptr;
353
354
186k
  DEBUGASSERT(login);
355
356
186k
  *offset = 0;
357
186k
  ptr = memchr(login, '@', len);
358
186k
  if(!ptr)
359
154k
    goto out;
360
361
  /* We will now try to extract the
362
   * possible login information in a string like:
363
   * ftp://user:password@ftp.site.example:8021/README */
364
32.0k
  ptr++;
365
366
  /* if this is a known scheme, get some details */
367
32.0k
  if(u->scheme)
368
8.72k
    h = Curl_get_scheme_handler(u->scheme);
369
370
  /* We could use the login information in the URL so extract it. Only parse
371
     options if the handler says we should. Note that 'h' might be NULL! */
372
32.0k
  ccode = Curl_parse_login_details(login, ptr - login - 1,
373
32.0k
                                   &userp, &passwdp,
374
32.0k
                                   (h && (h->flags & PROTOPT_URLOPTIONS)) ?
375
32.0k
                                   &optionsp : NULL);
376
32.0k
  if(ccode) {
377
0
    result = CURLUE_BAD_LOGIN;
378
0
    goto out;
379
0
  }
380
381
32.0k
  if(userp) {
382
32.0k
    if(flags & CURLU_DISALLOW_USER) {
383
      /* Option DISALLOW_USER is set and URL contains username. */
384
0
      result = CURLUE_USER_NOT_ALLOWED;
385
0
      goto out;
386
0
    }
387
32.0k
    free(u->user);
388
32.0k
    u->user = userp;
389
32.0k
  }
390
391
32.0k
  if(passwdp) {
392
12.7k
    free(u->password);
393
12.7k
    u->password = passwdp;
394
12.7k
  }
395
396
32.0k
  if(optionsp) {
397
1.49k
    free(u->options);
398
1.49k
    u->options = optionsp;
399
1.49k
  }
400
401
  /* the hostname starts at this offset */
402
32.0k
  *offset = ptr - login;
403
32.0k
  return CURLUE_OK;
404
405
154k
out:
406
407
154k
  free(userp);
408
154k
  free(passwdp);
409
154k
  free(optionsp);
410
154k
  u->user = NULL;
411
154k
  u->password = NULL;
412
154k
  u->options = NULL;
413
414
154k
  return result;
415
32.0k
}
416
417
UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
418
                                   bool has_scheme)
419
186k
{
420
186k
  const char *portptr;
421
186k
  char *hostname = curlx_dyn_ptr(host);
422
  /*
423
   * Find the end of an IPv6 address on the ']' ending bracket.
424
   */
425
186k
  if(hostname[0] == '[') {
426
9.87k
    portptr = strchr(hostname, ']');
427
9.87k
    if(!portptr)
428
753
      return CURLUE_BAD_IPV6;
429
9.11k
    portptr++;
430
    /* this is a RFC2732-style specified IP-address */
431
9.11k
    if(*portptr) {
432
482
      if(*portptr != ':')
433
332
        return CURLUE_BAD_PORT_NUMBER;
434
482
    }
435
8.63k
    else
436
8.63k
      portptr = NULL;
437
9.11k
  }
438
176k
  else
439
176k
    portptr = strchr(hostname, ':');
440
441
185k
  if(portptr) {
442
27.5k
    curl_off_t port;
443
27.5k
    size_t keep = portptr - hostname;
444
445
    /* Browser behavior adaptation. If there is a colon with no digits after,
446
       just cut off the name there which makes us ignore the colon and just
447
       use the default port. Firefox, Chrome and Safari all do that.
448
449
       Do not do it if the URL has no scheme, to make something that looks like
450
       a scheme not work!
451
    */
452
27.5k
    curlx_dyn_setlen(host, keep);
453
27.5k
    portptr++;
454
27.5k
    if(!*portptr)
455
2.46k
      return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
456
457
25.0k
    if(curlx_str_number(&portptr, &port, 0xffff) || *portptr)
458
19.0k
      return CURLUE_BAD_PORT_NUMBER;
459
460
6.02k
    u->portnum = (unsigned short) port;
461
    /* generate a new port number string to get rid of leading zeroes etc */
462
6.02k
    free(u->port);
463
6.02k
    u->port = curl_maprintf("%" CURL_FORMAT_CURL_OFF_T, port);
464
6.02k
    if(!u->port)
465
0
      return CURLUE_OUT_OF_MEMORY;
466
6.02k
  }
467
468
164k
  return CURLUE_OK;
469
185k
}
470
471
/* this assumes 'hostname' now starts with [ */
472
static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
473
                            size_t hlen) /* length of hostname */
474
9.33k
{
475
9.33k
  size_t len;
476
9.33k
  DEBUGASSERT(*hostname == '[');
477
9.33k
  if(hlen < 4) /* '[::]' is the shortest possible valid string */
478
187
    return CURLUE_BAD_IPV6;
479
9.14k
  hostname++;
480
9.14k
  hlen -= 2;
481
482
  /* only valid IPv6 letters are ok */
483
9.14k
  len = strspn(hostname, "0123456789abcdefABCDEF:.");
484
485
9.14k
  if(hlen != len) {
486
8.85k
    hlen = len;
487
8.85k
    if(hostname[len] == '%') {
488
      /* this could now be '%[zone id]' */
489
8.66k
      char zoneid[16];
490
8.66k
      int i = 0;
491
8.66k
      char *h = &hostname[len + 1];
492
      /* pass '25' if present and is a URL encoded percent sign */
493
8.66k
      if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
494
132
        h += 2;
495
31.2k
      while(*h && (*h != ']') && (i < 15))
496
22.5k
        zoneid[i++] = *h++;
497
8.66k
      if(!i || (']' != *h))
498
826
        return CURLUE_BAD_IPV6;
499
7.84k
      zoneid[i] = 0;
500
7.84k
      u->zoneid = strdup(zoneid);
501
7.84k
      if(!u->zoneid)
502
0
        return CURLUE_OUT_OF_MEMORY;
503
7.84k
      hostname[len] = ']'; /* insert end bracket */
504
7.84k
      hostname[len + 1] = 0; /* terminate the hostname */
505
7.84k
    }
506
183
    else
507
183
      return CURLUE_BAD_IPV6;
508
    /* hostname is fine */
509
8.85k
  }
510
511
  /* Normalize the IPv6 address */
512
8.13k
  {
513
8.13k
    char dest[16]; /* fits a binary IPv6 address */
514
8.13k
    hostname[hlen] = 0; /* end the address there */
515
8.13k
    if(curlx_inet_pton(AF_INET6, hostname, dest) != 1)
516
1.49k
      return CURLUE_BAD_IPV6;
517
6.64k
    if(curlx_inet_ntop(AF_INET6, dest, hostname, hlen)) {
518
561
      hlen = strlen(hostname); /* might be shorter now */
519
561
      hostname[hlen + 1] = 0;
520
561
    }
521
6.64k
    hostname[hlen] = ']'; /* restore ending bracket */
522
6.64k
  }
523
0
  return CURLUE_OK;
524
8.13k
}
525
526
static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
527
                                size_t hlen) /* length of hostname */
528
139k
{
529
139k
  size_t len;
530
139k
  DEBUGASSERT(hostname);
531
532
139k
  if(!hlen)
533
0
    return CURLUE_NO_HOST;
534
139k
  else if(hostname[0] == '[')
535
652
    return ipv6_parse(u, hostname, hlen);
536
139k
  else {
537
    /* letters from the second string are not ok */
538
139k
    len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
539
139k
    if(hlen != len)
540
      /* hostname with bad content */
541
35.9k
      return CURLUE_BAD_HOSTNAME;
542
139k
  }
543
103k
  return CURLUE_OK;
544
139k
}
545
546
/*
547
 * Handle partial IPv4 numerical addresses and different bases, like
548
 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
549
 *
550
 * If the given input string is syntactically wrong IPv4 or any part for
551
 * example is too big, this function returns HOST_NAME.
552
 *
553
 * Output the "normalized" version of that input string in plain quad decimal
554
 * integers.
555
 *
556
 * Returns the host type.
557
 */
558
559
0
#define HOST_ERROR   -1 /* out of memory */
560
561
281k
#define HOST_NAME    1
562
30.6k
#define HOST_IPV4    2
563
17.3k
#define HOST_IPV6    3
564
565
static int ipv4_normalize(struct dynbuf *host)
566
164k
{
567
164k
  bool done = FALSE;
568
164k
  int n = 0;
569
164k
  const char *c = curlx_dyn_ptr(host);
570
164k
  unsigned int parts[4] = {0, 0, 0, 0};
571
164k
  CURLcode result = CURLE_OK;
572
573
164k
  if(*c == '[')
574
8.68k
    return HOST_IPV6;
575
576
185k
  while(!done) {
577
167k
    int rc;
578
167k
    curl_off_t l;
579
167k
    if(*c == '0') {
580
21.1k
      if(c[1] == 'x') {
581
306
        c += 2; /* skip the prefix */
582
306
        rc = curlx_str_hex(&c, &l, UINT_MAX);
583
306
      }
584
20.8k
      else
585
20.8k
        rc = curlx_str_octal(&c, &l, UINT_MAX);
586
21.1k
    }
587
145k
    else
588
145k
      rc = curlx_str_number(&c, &l, UINT_MAX);
589
590
167k
    if(rc)
591
131k
      return HOST_NAME;
592
593
36.0k
    parts[n] = (unsigned int)l;
594
595
36.0k
    switch(*c) {
596
11.3k
    case '.':
597
11.3k
      if(n == 3)
598
158
        return HOST_NAME;
599
11.2k
      n++;
600
11.2k
      c++;
601
11.2k
      break;
602
603
18.1k
    case '\0':
604
18.1k
      done = TRUE;
605
18.1k
      break;
606
607
6.54k
    default:
608
6.54k
      return HOST_NAME;
609
36.0k
    }
610
36.0k
  }
611
612
18.1k
  switch(n) {
613
12.9k
  case 0: /* a -- 32 bits */
614
12.9k
    curlx_dyn_reset(host);
615
616
12.9k
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
617
12.9k
                            (parts[0] >> 24),
618
12.9k
                            ((parts[0] >> 16) & 0xff),
619
12.9k
                            ((parts[0] >> 8) & 0xff),
620
12.9k
                            (parts[0] & 0xff));
621
12.9k
    break;
622
2.68k
  case 1: /* a.b -- 8.24 bits */
623
2.68k
    if((parts[0] > 0xff) || (parts[1] > 0xffffff))
624
856
      return HOST_NAME;
625
1.82k
    curlx_dyn_reset(host);
626
1.82k
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
627
1.82k
                            (parts[0]),
628
1.82k
                            ((parts[1] >> 16) & 0xff),
629
1.82k
                            ((parts[1] >> 8) & 0xff),
630
1.82k
                            (parts[1] & 0xff));
631
1.82k
    break;
632
1.17k
  case 2: /* a.b.c -- 8.8.16 bits */
633
1.17k
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
634
681
      return HOST_NAME;
635
493
    curlx_dyn_reset(host);
636
493
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
637
493
                            (parts[0]),
638
493
                            (parts[1]),
639
493
                            ((parts[2] >> 8) & 0xff),
640
493
                            (parts[2] & 0xff));
641
493
    break;
642
1.36k
  case 3: /* a.b.c.d -- 8.8.8.8 bits */
643
1.36k
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
644
356
       (parts[3] > 0xff))
645
1.25k
      return HOST_NAME;
646
113
    curlx_dyn_reset(host);
647
113
    result = curlx_dyn_addf(host, "%u.%u.%u.%u",
648
113
                            (parts[0]),
649
113
                            (parts[1]),
650
113
                            (parts[2]),
651
113
                            (parts[3]));
652
113
    break;
653
18.1k
  }
654
15.3k
  if(result)
655
0
    return HOST_ERROR;
656
15.3k
  return HOST_IPV4;
657
15.3k
}
658
659
/* if necessary, replace the host content with a URL decoded version */
660
static CURLUcode urldecode_host(struct dynbuf *host)
661
140k
{
662
140k
  char *per = NULL;
663
140k
  const char *hostname = curlx_dyn_ptr(host);
664
140k
  per = strchr(hostname, '%');
665
140k
  if(!per)
666
    /* nothing to decode */
667
131k
    return CURLUE_OK;
668
8.92k
  else {
669
    /* encoded */
670
8.92k
    size_t dlen;
671
8.92k
    char *decoded;
672
8.92k
    CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
673
8.92k
                                     REJECT_CTRL);
674
8.92k
    if(result)
675
530
      return CURLUE_BAD_HOSTNAME;
676
8.39k
    curlx_dyn_reset(host);
677
8.39k
    result = curlx_dyn_addn(host, decoded, dlen);
678
8.39k
    free(decoded);
679
8.39k
    if(result)
680
0
      return cc2cu(result);
681
8.39k
  }
682
683
8.39k
  return CURLUE_OK;
684
140k
}
685
686
static CURLUcode parse_authority(struct Curl_URL *u,
687
                                 const char *auth, size_t authlen,
688
                                 unsigned int flags,
689
                                 struct dynbuf *host,
690
                                 bool has_scheme)
691
186k
{
692
186k
  size_t offset;
693
186k
  CURLUcode uc;
694
186k
  CURLcode result;
695
696
  /*
697
   * Parse the login details and strip them out of the hostname.
698
   */
699
186k
  uc = parse_hostname_login(u, auth, authlen, flags, &offset);
700
186k
  if(uc)
701
0
    goto out;
702
703
186k
  result = curlx_dyn_addn(host, auth + offset, authlen - offset);
704
186k
  if(result) {
705
0
    uc = cc2cu(result);
706
0
    goto out;
707
0
  }
708
709
186k
  uc = Curl_parse_port(u, host, has_scheme);
710
186k
  if(uc)
711
21.9k
    goto out;
712
713
164k
  if(!curlx_dyn_len(host))
714
249
    return CURLUE_NO_HOST;
715
716
164k
  switch(ipv4_normalize(host)) {
717
15.3k
  case HOST_IPV4:
718
15.3k
    break;
719
8.68k
  case HOST_IPV6:
720
8.68k
    uc = ipv6_parse(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
721
8.68k
    break;
722
140k
  case HOST_NAME:
723
140k
    uc = urldecode_host(host);
724
140k
    if(!uc)
725
139k
      uc = hostname_check(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
726
140k
    break;
727
0
  case HOST_ERROR:
728
0
    uc = CURLUE_OUT_OF_MEMORY;
729
0
    break;
730
0
  default:
731
0
    uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
732
0
    break;
733
164k
  }
734
735
186k
out:
736
186k
  return uc;
737
164k
}
738
739
/* used for HTTP/2 server push */
740
CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
741
0
{
742
0
  CURLUcode result;
743
0
  struct dynbuf host;
744
745
0
  DEBUGASSERT(authority);
746
0
  curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
747
748
0
  result = parse_authority(u, authority, strlen(authority),
749
0
                           CURLU_DISALLOW_USER, &host, !!u->scheme);
750
0
  if(result)
751
0
    curlx_dyn_free(&host);
752
0
  else {
753
0
    free(u->host);
754
0
    u->host = curlx_dyn_ptr(&host);
755
0
  }
756
0
  return result;
757
0
}
758
759
/*
760
 * "Remove Dot Segments"
761
 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
762
 */
763
764
static bool is_dot(const char **str, size_t *clen)
765
706k
{
766
706k
  const char *p = *str;
767
706k
  if(*p == '.') {
768
109k
    (*str)++;
769
109k
    (*clen)--;
770
109k
    return TRUE;
771
109k
  }
772
596k
  else if((*clen >= 3) &&
773
557k
          (p[0] == '%') && (p[1] == '2') && ((p[2] | 0x20) == 'e')) {
774
194
    *str += 3;
775
194
    *clen -= 3;
776
194
    return TRUE;
777
194
  }
778
596k
  return FALSE;
779
706k
}
780
781
37.1M
#define ISSLASH(x) ((x) == '/')
782
783
/*
784
 * dedotdotify()
785
 * @unittest: 1395
786
 *
787
 * This function gets a null-terminated path with dot and dotdot sequences
788
 * passed in and strips them off according to the rules in RFC 3986 section
789
 * 5.2.4.
790
 *
791
 * The function handles a path. It should not contain the query nor fragment.
792
 *
793
 * RETURNS
794
 *
795
 * Zero for success and 'out' set to an allocated dedotdotified string.
796
 */
797
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
798
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
799
83.3k
{
800
83.3k
  struct dynbuf out;
801
83.3k
  CURLcode result = CURLE_OK;
802
803
83.3k
  *outp = NULL;
804
  /* the path always starts with a slash, and a slash has not dot */
805
83.3k
  if(clen < 2)
806
0
    return 0;
807
808
83.3k
  curlx_dyn_init(&out, clen + 1);
809
810
  /*  A. If the input buffer begins with a prefix of "../" or "./", then
811
      remove that prefix from the input buffer; otherwise, */
812
83.3k
  if(is_dot(&input, &clen)) {
813
0
    const char *p = input;
814
0
    size_t blen = clen;
815
816
0
    if(!clen)
817
      /* . [end] */
818
0
      goto end;
819
0
    else if(ISSLASH(*p)) {
820
      /* one dot followed by a slash */
821
0
      input = p + 1;
822
0
      clen--;
823
0
    }
824
825
    /*  D. if the input buffer consists only of "." or "..", then remove
826
        that from the input buffer; otherwise, */
827
0
    else if(is_dot(&p, &blen)) {
828
0
      if(!blen)
829
        /* .. [end] */
830
0
        goto end;
831
0
      else if(ISSLASH(*p)) {
832
        /* ../ */
833
0
        input = p + 1;
834
0
        clen = blen - 1;
835
0
      }
836
0
    }
837
0
  }
838
839
37.0M
  while(clen && !result) { /* until end of path content */
840
37.0M
    if(ISSLASH(*input)) {
841
564k
      const char *p = &input[1];
842
564k
      size_t blen = clen - 1;
843
      /*  B. if the input buffer begins with a prefix of "/./" or "/.", where
844
          "."  is a complete path segment, then replace that prefix with "/" in
845
          the input buffer; otherwise, */
846
564k
      if(is_dot(&p, &blen)) {
847
65.0k
        if(!blen) { /* /. */
848
694
          result = curlx_dyn_addn(&out, "/", 1);
849
694
          break;
850
694
        }
851
64.3k
        else if(ISSLASH(*p)) { /* /./ */
852
6.01k
          input = p;
853
6.01k
          clen = blen;
854
6.01k
          continue;
855
6.01k
        }
856
857
        /*  C. if the input buffer begins with a prefix of "/../" or "/..",
858
            where ".." is a complete path segment, then replace that prefix
859
            with "/" in the input buffer and remove the last segment and its
860
            preceding "/" (if any) from the output buffer; otherwise, */
861
58.3k
        else if(is_dot(&p, &blen) && (ISSLASH(*p) || !blen)) {
862
          /* remove the last segment from the output buffer */
863
32.7k
          size_t len = curlx_dyn_len(&out);
864
32.7k
          if(len) {
865
30.0k
            char *ptr = curlx_dyn_ptr(&out);
866
30.0k
            char *last = memrchr(ptr, '/', len);
867
30.0k
            if(last)
868
              /* trim the output at the slash */
869
30.0k
              curlx_dyn_setlen(&out, last - ptr);
870
30.0k
          }
871
872
32.7k
          if(blen) { /* /../ */
873
31.1k
            input = p;
874
31.1k
            clen = blen;
875
31.1k
            continue;
876
31.1k
          }
877
1.67k
          result = curlx_dyn_addn(&out, "/", 1);
878
1.67k
          break;
879
32.7k
        }
880
65.0k
      }
881
564k
    }
882
883
    /*  E. move the first path segment in the input buffer to the end of
884
        the output buffer, including the initial "/" character (if any) and
885
        any subsequent characters up to, but not including, the next "/"
886
        character or the end of the input buffer. */
887
888
36.9M
    result = curlx_dyn_addn(&out, input, 1);
889
36.9M
    input++;
890
36.9M
    clen--;
891
36.9M
  }
892
83.3k
end:
893
83.3k
  if(!result) {
894
83.3k
    if(curlx_dyn_len(&out))
895
83.3k
      *outp = curlx_dyn_ptr(&out);
896
0
    else {
897
0
      *outp = strdup("");
898
0
      if(!*outp)
899
0
        return 1;
900
0
    }
901
83.3k
  }
902
83.3k
  return result ? 1 : 0; /* success */
903
83.3k
}
904
905
static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
906
297k
{
907
297k
  const char *path;
908
297k
  size_t pathlen;
909
297k
  char *query = NULL;
910
297k
  char *fragment = NULL;
911
297k
  char schemebuf[MAX_SCHEME_LEN + 1];
912
297k
  size_t schemelen = 0;
913
297k
  size_t urllen;
914
297k
  CURLUcode result = CURLUE_OK;
915
297k
  size_t fraglen = 0;
916
297k
  struct dynbuf host;
917
918
297k
  DEBUGASSERT(url);
919
920
297k
  curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
921
922
297k
  result = Curl_junkscan(url, &urllen, !!(flags & CURLU_ALLOW_SPACE));
923
297k
  if(result)
924
95.9k
    goto fail;
925
926
201k
  schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
927
201k
                                   flags & (CURLU_GUESS_SCHEME|
928
201k
                                            CURLU_DEFAULT_SCHEME));
929
930
  /* handle the file: scheme */
931
201k
  if(schemelen && !strcmp(schemebuf, "file")) {
932
4.70k
    bool uncpath = FALSE;
933
4.70k
    if(urllen <= 6) {
934
      /* file:/ is not enough to actually be a complete file: URL */
935
82
      result = CURLUE_BAD_FILE_URL;
936
82
      goto fail;
937
82
    }
938
939
    /* path has been allocated large enough to hold this */
940
4.62k
    path = &url[5];
941
4.62k
    pathlen = urllen - 5;
942
943
4.62k
    u->scheme = strdup("file");
944
4.62k
    if(!u->scheme) {
945
0
      result = CURLUE_OUT_OF_MEMORY;
946
0
      goto fail;
947
0
    }
948
949
    /* Extra handling URLs with an authority component (i.e. that start with
950
     * "file://")
951
     *
952
     * We allow omitted hostname (e.g. file:/<path>) -- valid according to
953
     * RFC 8089, but not the (current) WHAT-WG URL spec.
954
     */
955
4.62k
    if(path[0] == '/' && path[1] == '/') {
956
      /* swallow the two slashes */
957
2.00k
      const char *ptr = &path[2];
958
959
      /*
960
       * According to RFC 8089, a file: URL can be reliably dereferenced if:
961
       *
962
       *  o it has no/blank hostname, or
963
       *
964
       *  o the hostname matches "localhost" (case-insensitively), or
965
       *
966
       *  o the hostname is a FQDN that resolves to this machine, or
967
       *
968
       *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
969
       *    Appendix E.3).
970
       *
971
       * For brevity, we only consider URLs with empty, "localhost", or
972
       * "127.0.0.1" hostnames as local, otherwise as an UNC String.
973
       *
974
       * Additionally, there is an exception for URLs with a Windows drive
975
       * letter in the authority (which was accidentally omitted from RFC 8089
976
       * Appendix E, but believe me, it was meant to be there. --MK)
977
       */
978
2.00k
      if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
979
        /* the URL includes a hostname, it must match "localhost" or
980
           "127.0.0.1" to be valid */
981
534
        if(checkprefix("localhost/", ptr) ||
982
534
           checkprefix("127.0.0.1/", ptr)) {
983
0
          ptr += 9; /* now points to the slash after the host */
984
0
        }
985
534
        else {
986
#ifdef _WIN32
987
          size_t len;
988
989
          /* the hostname, NetBIOS computer name, can not contain disallowed
990
             chars, and the delimiting slash character must be appended to the
991
             hostname */
992
          path = strpbrk(ptr, "/\\:*?\"<>|");
993
          if(!path || *path != '/') {
994
            result = CURLUE_BAD_FILE_URL;
995
            goto fail;
996
          }
997
998
          len = path - ptr;
999
          if(len) {
1000
            CURLcode code = curlx_dyn_addn(&host, ptr, len);
1001
            if(code) {
1002
              result = cc2cu(code);
1003
              goto fail;
1004
            }
1005
            uncpath = TRUE;
1006
          }
1007
1008
          ptr -= 2; /* now points to the // before the host in UNC */
1009
#else
1010
          /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1011
             none */
1012
534
          result = CURLUE_BAD_FILE_URL;
1013
534
          goto fail;
1014
534
#endif
1015
534
        }
1016
534
      }
1017
1018
1.47k
      path = ptr;
1019
1.47k
      pathlen = urllen - (ptr - url);
1020
1.47k
    }
1021
1022
4.08k
    if(!uncpath)
1023
      /* no host for file: URLs by default */
1024
4.08k
      curlx_dyn_reset(&host);
1025
1026
4.08k
#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1027
    /* Do not allow Windows drive letters when not in Windows.
1028
     * This catches both "file:/c:" and "file:c:" */
1029
4.08k
    if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1030
3.74k
       STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1031
      /* File drive letters are only accepted in MS-DOS/Windows */
1032
644
      result = CURLUE_BAD_FILE_URL;
1033
644
      goto fail;
1034
644
    }
1035
#else
1036
    /* If the path starts with a slash and a drive letter, ditch the slash */
1037
    if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1038
      /* This cannot be done with strcpy, as the memory chunks overlap! */
1039
      path++;
1040
      pathlen--;
1041
    }
1042
#endif
1043
1044
4.08k
  }
1045
196k
  else {
1046
    /* clear path */
1047
196k
    const char *schemep = NULL;
1048
196k
    const char *hostp;
1049
196k
    size_t hostlen;
1050
1051
196k
    if(schemelen) {
1052
84.7k
      int i = 0;
1053
84.7k
      const char *p = &url[schemelen + 1];
1054
237k
      while((*p == '/') && (i < 4)) {
1055
152k
        p++;
1056
152k
        i++;
1057
152k
      }
1058
1059
84.7k
      schemep = schemebuf;
1060
84.7k
      if(!Curl_get_scheme_handler(schemep) &&
1061
13.4k
         !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1062
0
        result = CURLUE_UNSUPPORTED_SCHEME;
1063
0
        goto fail;
1064
0
      }
1065
1066
84.7k
      if((i < 1) || (i > 3)) {
1067
        /* less than one or more than three slashes */
1068
305
        result = CURLUE_BAD_SLASHES;
1069
305
        goto fail;
1070
305
      }
1071
84.4k
      hostp = p; /* hostname starts here */
1072
84.4k
    }
1073
112k
    else {
1074
      /* no scheme! */
1075
1076
112k
      if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1077
0
        result = CURLUE_BAD_SCHEME;
1078
0
        goto fail;
1079
0
      }
1080
112k
      if(flags & CURLU_DEFAULT_SCHEME)
1081
0
        schemep = DEFAULT_SCHEME;
1082
1083
      /*
1084
       * The URL was badly formatted, let's try without scheme specified.
1085
       */
1086
112k
      hostp = url;
1087
112k
    }
1088
1089
196k
    if(schemep) {
1090
84.4k
      u->scheme = strdup(schemep);
1091
84.4k
      if(!u->scheme) {
1092
0
        result = CURLUE_OUT_OF_MEMORY;
1093
0
        goto fail;
1094
0
      }
1095
84.4k
    }
1096
1097
    /* find the end of the hostname + port number */
1098
196k
    hostlen = strcspn(hostp, "/?#");
1099
196k
    path = &hostp[hostlen];
1100
1101
    /* this pathlen also contains the query and the fragment */
1102
196k
    pathlen = urllen - (path - url);
1103
196k
    if(hostlen) {
1104
1105
186k
      result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1106
186k
      if(result)
1107
61.4k
        goto fail;
1108
1109
125k
      if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1110
58.6k
        const char *hostname = curlx_dyn_ptr(&host);
1111
        /* legacy curl-style guess based on hostname */
1112
58.6k
        if(checkprefix("ftp.", hostname))
1113
250
          schemep = "ftp";
1114
58.4k
        else if(checkprefix("dict.", hostname))
1115
0
          schemep = "dict";
1116
58.4k
        else if(checkprefix("ldap.", hostname))
1117
0
          schemep = "ldap";
1118
58.4k
        else if(checkprefix("imap.", hostname))
1119
0
          schemep = "imap";
1120
58.4k
        else if(checkprefix("smtp.", hostname))
1121
180
          schemep = "smtp";
1122
58.2k
        else if(checkprefix("pop3.", hostname))
1123
0
          schemep = "pop3";
1124
58.2k
        else
1125
58.2k
          schemep = "http";
1126
1127
58.6k
        u->scheme = strdup(schemep);
1128
58.6k
        if(!u->scheme) {
1129
0
          result = CURLUE_OUT_OF_MEMORY;
1130
0
          goto fail;
1131
0
        }
1132
58.6k
        u->guessed_scheme = TRUE;
1133
58.6k
      }
1134
125k
    }
1135
9.90k
    else if(flags & CURLU_NO_AUTHORITY) {
1136
      /* allowed to be empty. */
1137
0
      if(curlx_dyn_add(&host, "")) {
1138
0
        result = CURLUE_OUT_OF_MEMORY;
1139
0
        goto fail;
1140
0
      }
1141
0
    }
1142
9.90k
    else {
1143
9.90k
      result = CURLUE_NO_HOST;
1144
9.90k
      goto fail;
1145
9.90k
    }
1146
196k
  }
1147
1148
128k
  fragment = strchr(path, '#');
1149
128k
  if(fragment) {
1150
17.0k
    fraglen = pathlen - (fragment - path);
1151
17.0k
    u->fragment_present = TRUE;
1152
17.0k
    if(fraglen > 1) {
1153
      /* skip the leading '#' in the copy but include the terminating null */
1154
16.1k
      if(flags & CURLU_URLENCODE) {
1155
0
        struct dynbuf enc;
1156
0
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1157
0
        result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1158
0
        if(result)
1159
0
          goto fail;
1160
0
        u->fragment = curlx_dyn_ptr(&enc);
1161
0
      }
1162
16.1k
      else {
1163
16.1k
        u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1164
16.1k
        if(!u->fragment) {
1165
0
          result = CURLUE_OUT_OF_MEMORY;
1166
0
          goto fail;
1167
0
        }
1168
16.1k
      }
1169
16.1k
    }
1170
    /* after this, pathlen still contains the query */
1171
17.0k
    pathlen -= fraglen;
1172
17.0k
  }
1173
1174
128k
  query = memchr(path, '?', pathlen);
1175
128k
  if(query) {
1176
56.7k
    size_t qlen = fragment ? (size_t)(fragment - query) :
1177
56.7k
      pathlen - (query - path);
1178
56.7k
    pathlen -= qlen;
1179
56.7k
    u->query_present = TRUE;
1180
56.7k
    if(qlen > 1) {
1181
55.5k
      if(flags & CURLU_URLENCODE) {
1182
15
        struct dynbuf enc;
1183
15
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1184
        /* skip the leading question mark */
1185
15
        result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1186
15
        if(result)
1187
0
          goto fail;
1188
15
        u->query = curlx_dyn_ptr(&enc);
1189
15
      }
1190
55.5k
      else {
1191
55.5k
        u->query = Curl_memdup0(query + 1, qlen - 1);
1192
55.5k
        if(!u->query) {
1193
0
          result = CURLUE_OUT_OF_MEMORY;
1194
0
          goto fail;
1195
0
        }
1196
55.5k
      }
1197
55.5k
    }
1198
1.11k
    else {
1199
      /* single byte query */
1200
1.11k
      u->query = strdup("");
1201
1.11k
      if(!u->query) {
1202
0
        result = CURLUE_OUT_OF_MEMORY;
1203
0
        goto fail;
1204
0
      }
1205
1.11k
    }
1206
56.7k
  }
1207
1208
128k
  if(pathlen && (flags & CURLU_URLENCODE)) {
1209
444
    struct dynbuf enc;
1210
444
    curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1211
444
    result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1212
444
    if(result)
1213
0
      goto fail;
1214
444
    pathlen = curlx_dyn_len(&enc);
1215
444
    path = u->path = curlx_dyn_ptr(&enc);
1216
444
  }
1217
1218
128k
  if(pathlen <= 1) {
1219
    /* there is no path left or just the slash, unset */
1220
45.4k
    path = NULL;
1221
45.4k
  }
1222
83.3k
  else {
1223
83.3k
    if(!u->path) {
1224
83.2k
      u->path = Curl_memdup0(path, pathlen);
1225
83.2k
      if(!u->path) {
1226
0
        result = CURLUE_OUT_OF_MEMORY;
1227
0
        goto fail;
1228
0
      }
1229
83.2k
      path = u->path;
1230
83.2k
    }
1231
91
    else if(flags & CURLU_URLENCODE)
1232
      /* it might have encoded more than just the path so cut it */
1233
91
      u->path[pathlen] = 0;
1234
1235
83.3k
    if(!(flags & CURLU_PATH_AS_IS)) {
1236
      /* remove ../ and ./ sequences according to RFC3986 */
1237
83.3k
      char *dedot;
1238
83.3k
      int err = dedotdotify(path, pathlen, &dedot);
1239
83.3k
      if(err) {
1240
0
        result = CURLUE_OUT_OF_MEMORY;
1241
0
        goto fail;
1242
0
      }
1243
83.3k
      if(dedot) {
1244
83.3k
        free(u->path);
1245
83.3k
        u->path = dedot;
1246
83.3k
      }
1247
83.3k
    }
1248
83.3k
  }
1249
1250
128k
  u->host = curlx_dyn_ptr(&host);
1251
1252
128k
  return result;
1253
168k
fail:
1254
168k
  curlx_dyn_free(&host);
1255
168k
  free_urlhandle(u);
1256
168k
  return result;
1257
128k
}
1258
1259
/*
1260
 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1261
 */
1262
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1263
                                      unsigned int flags)
1264
297k
{
1265
297k
  CURLUcode result;
1266
297k
  CURLU tmpurl;
1267
297k
  memset(&tmpurl, 0, sizeof(tmpurl));
1268
297k
  result = parseurl(url, &tmpurl, flags);
1269
297k
  if(!result) {
1270
128k
    free_urlhandle(u);
1271
128k
    *u = tmpurl;
1272
128k
  }
1273
297k
  return result;
1274
297k
}
1275
1276
/*
1277
 */
1278
CURLU *curl_url(void)
1279
297k
{
1280
297k
  return calloc(1, sizeof(struct Curl_URL));
1281
297k
}
1282
1283
void curl_url_cleanup(CURLU *u)
1284
700k
{
1285
700k
  if(u) {
1286
297k
    free_urlhandle(u);
1287
297k
    free(u);
1288
297k
  }
1289
700k
}
1290
1291
#define DUP(dest, src, name)                    \
1292
0
  do {                                          \
1293
0
    if(src->name) {                             \
1294
0
      dest->name = strdup(src->name);           \
1295
0
      if(!dest->name)                           \
1296
0
        goto fail;                              \
1297
0
    }                                           \
1298
0
  } while(0)
1299
1300
CURLU *curl_url_dup(const CURLU *in)
1301
0
{
1302
0
  struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
1303
0
  if(u) {
1304
0
    DUP(u, in, scheme);
1305
0
    DUP(u, in, user);
1306
0
    DUP(u, in, password);
1307
0
    DUP(u, in, options);
1308
0
    DUP(u, in, host);
1309
0
    DUP(u, in, port);
1310
0
    DUP(u, in, path);
1311
0
    DUP(u, in, query);
1312
0
    DUP(u, in, fragment);
1313
0
    DUP(u, in, zoneid);
1314
0
    u->portnum = in->portnum;
1315
0
    u->fragment_present = in->fragment_present;
1316
0
    u->query_present = in->query_present;
1317
0
  }
1318
0
  return u;
1319
0
fail:
1320
0
  curl_url_cleanup(u);
1321
0
  return NULL;
1322
0
}
1323
1324
#ifndef USE_IDN
1325
0
#define host_decode(x,y) CURLUE_LACKS_IDN
1326
0
#define host_encode(x,y) CURLUE_LACKS_IDN
1327
#else
1328
static CURLUcode host_decode(const char *host, char **allochost)
1329
{
1330
  CURLcode result = Curl_idn_decode(host, allochost);
1331
  if(result)
1332
    return (result == CURLE_OUT_OF_MEMORY) ?
1333
      CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1334
  return CURLUE_OK;
1335
}
1336
1337
static CURLUcode host_encode(const char *host, char **allochost)
1338
{
1339
  CURLcode result = Curl_idn_encode(host, allochost);
1340
  if(result)
1341
    return (result == CURLE_OUT_OF_MEMORY) ?
1342
      CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1343
  return CURLUE_OK;
1344
}
1345
#endif
1346
1347
static CURLUcode urlget_format(const CURLU *u, CURLUPart what,
1348
                               const char *ptr, char **part,
1349
                               bool plusdecode, unsigned int flags)
1350
595k
{
1351
595k
  size_t partlen = strlen(ptr);
1352
595k
  bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0;
1353
595k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1354
595k
  bool punycode = (flags & CURLU_PUNYCODE) && (what == CURLUPART_HOST);
1355
595k
  bool depunyfy = (flags & CURLU_PUNY2IDN) && (what == CURLUPART_HOST);
1356
595k
  *part = Curl_memdup0(ptr, partlen);
1357
595k
  if(!*part)
1358
0
    return CURLUE_OUT_OF_MEMORY;
1359
595k
  if(plusdecode) {
1360
    /* convert + to space */
1361
0
    char *plus = *part;
1362
0
    size_t i = 0;
1363
0
    for(i = 0; i < partlen; ++plus, i++) {
1364
0
      if(*plus == '+')
1365
0
        *plus = ' ';
1366
0
    }
1367
0
  }
1368
595k
  if(urldecode) {
1369
6.16k
    char *decoded;
1370
6.16k
    size_t dlen;
1371
    /* this unconditional rejection of control bytes is documented
1372
       API behavior */
1373
6.16k
    CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1374
6.16k
    free(*part);
1375
6.16k
    if(res) {
1376
0
      *part = NULL;
1377
0
      return CURLUE_URLDECODE;
1378
0
    }
1379
6.16k
    *part = decoded;
1380
6.16k
    partlen = dlen;
1381
6.16k
  }
1382
595k
  if(urlencode) {
1383
122k
    struct dynbuf enc;
1384
122k
    CURLUcode uc;
1385
122k
    curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1386
122k
    uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
1387
122k
    if(uc)
1388
0
      return uc;
1389
122k
    free(*part);
1390
122k
    *part = curlx_dyn_ptr(&enc);
1391
122k
  }
1392
473k
  else if(punycode) {
1393
0
    if(!Curl_is_ASCII_name(u->host)) {
1394
0
      char *allochost = NULL;
1395
0
      CURLUcode ret = host_decode(*part, &allochost);
1396
0
      if(ret)
1397
0
        return ret;
1398
0
      free(*part);
1399
0
      *part = allochost;
1400
0
    }
1401
0
  }
1402
473k
  else if(depunyfy) {
1403
0
    if(Curl_is_ASCII_name(u->host)) {
1404
0
      char *allochost = NULL;
1405
0
      CURLUcode ret = host_encode(*part, &allochost);
1406
0
      if(ret)
1407
0
        return ret;
1408
0
      free(*part);
1409
0
      *part = allochost;
1410
0
    }
1411
0
  }
1412
1413
595k
  return CURLUE_OK;
1414
595k
}
1415
1416
static CURLUcode urlget_url(const CURLU *u, char **part, unsigned int flags)
1417
300k
{
1418
300k
  char *url;
1419
300k
  const char *scheme;
1420
300k
  char *options = u->options;
1421
300k
  char *port = u->port;
1422
300k
  char *allochost = NULL;
1423
300k
  bool show_fragment =
1424
300k
    u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
1425
300k
  bool show_query = (u->query && u->query[0]) ||
1426
245k
    (u->query_present && flags & CURLU_GET_EMPTY);
1427
300k
  bool punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
1428
300k
  bool depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
1429
300k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1430
300k
  char portbuf[7];
1431
300k
  if(u->scheme && curl_strequal("file", u->scheme)) {
1432
3.30k
    url = curl_maprintf("file://%s%s%s%s%s",
1433
3.30k
                        u->path,
1434
3.30k
                        show_query ? "?": "",
1435
3.30k
                        u->query ? u->query : "",
1436
3.30k
                        show_fragment ? "#": "",
1437
3.30k
                        u->fragment ? u->fragment : "");
1438
3.30k
  }
1439
297k
  else if(!u->host)
1440
174k
    return CURLUE_NO_HOST;
1441
122k
  else {
1442
122k
    const struct Curl_handler *h = NULL;
1443
122k
    char schemebuf[MAX_SCHEME_LEN + 5];
1444
122k
    if(u->scheme)
1445
122k
      scheme = u->scheme;
1446
0
    else if(flags & CURLU_DEFAULT_SCHEME)
1447
0
      scheme = DEFAULT_SCHEME;
1448
0
    else
1449
0
      return CURLUE_NO_SCHEME;
1450
1451
122k
    h = Curl_get_scheme_handler(scheme);
1452
122k
    if(!port && (flags & CURLU_DEFAULT_PORT)) {
1453
      /* there is no stored port number, but asked to deliver
1454
         a default one for the scheme */
1455
0
      if(h) {
1456
0
        curl_msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1457
0
        port = portbuf;
1458
0
      }
1459
0
    }
1460
122k
    else if(port) {
1461
      /* there is a stored port number, but asked to inhibit if it matches
1462
         the default one for the scheme */
1463
5.15k
      if(h && (h->defport == u->portnum) &&
1464
223
         (flags & CURLU_NO_DEFAULT_PORT))
1465
0
        port = NULL;
1466
5.15k
    }
1467
1468
122k
    if(h && !(h->flags & PROTOPT_URLOPTIONS))
1469
117k
      options = NULL;
1470
1471
122k
    if(u->host[0] == '[') {
1472
6.64k
      if(u->zoneid) {
1473
        /* make it '[ host %25 zoneid ]' */
1474
6.64k
        struct dynbuf enc;
1475
6.64k
        size_t hostlen = strlen(u->host);
1476
6.64k
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1477
6.64k
        if(curlx_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1478
6.64k
                          u->zoneid))
1479
0
          return CURLUE_OUT_OF_MEMORY;
1480
6.64k
        allochost = curlx_dyn_ptr(&enc);
1481
6.64k
      }
1482
6.64k
    }
1483
116k
    else if(urlencode) {
1484
0
      allochost = curl_easy_escape(NULL, u->host, 0);
1485
0
      if(!allochost)
1486
0
        return CURLUE_OUT_OF_MEMORY;
1487
0
    }
1488
116k
    else if(punycode) {
1489
0
      if(!Curl_is_ASCII_name(u->host)) {
1490
0
        CURLUcode ret = host_decode(u->host, &allochost);
1491
0
        if(ret)
1492
0
          return ret;
1493
0
      }
1494
0
    }
1495
116k
    else if(depunyfy) {
1496
0
      if(Curl_is_ASCII_name(u->host)) {
1497
0
        CURLUcode ret = host_encode(u->host, &allochost);
1498
0
        if(ret)
1499
0
          return ret;
1500
0
      }
1501
0
    }
1502
1503
122k
    if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme)
1504
122k
      curl_msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
1505
0
    else
1506
0
      schemebuf[0] = 0;
1507
1508
122k
    url = curl_maprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1509
122k
                        schemebuf,
1510
122k
                        u->user ? u->user : "",
1511
122k
                        u->password ? ":": "",
1512
122k
                        u->password ? u->password : "",
1513
122k
                        options ? ";" : "",
1514
122k
                        options ? options : "",
1515
122k
                        (u->user || u->password || options) ? "@": "",
1516
122k
                        allochost ? allochost : u->host,
1517
122k
                        port ? ":": "",
1518
122k
                        port ? port : "",
1519
122k
                        u->path ? u->path : "/",
1520
122k
                        show_query ? "?": "",
1521
122k
                        u->query ? u->query : "",
1522
122k
                        show_fragment ? "#": "",
1523
122k
                        u->fragment ? u->fragment : "");
1524
122k
    free(allochost);
1525
122k
  }
1526
126k
  if(!url)
1527
0
    return CURLUE_OUT_OF_MEMORY;
1528
126k
  *part = url;
1529
126k
  return CURLUE_OK;
1530
126k
}
1531
1532
CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1533
                       char **part, unsigned int flags)
1534
1.30M
{
1535
1.30M
  const char *ptr;
1536
1.30M
  CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1537
1.30M
  char portbuf[7];
1538
1.30M
  bool plusdecode = FALSE;
1539
1.30M
  if(!u)
1540
0
    return CURLUE_BAD_HANDLE;
1541
1.30M
  if(!part)
1542
0
    return CURLUE_BAD_PARTPOINTER;
1543
1.30M
  *part = NULL;
1544
1545
1.30M
  switch(what) {
1546
128k
  case CURLUPART_SCHEME:
1547
128k
    ptr = u->scheme;
1548
128k
    ifmissing = CURLUE_NO_SCHEME;
1549
128k
    flags &= ~CURLU_URLDECODE; /* never for schemes */
1550
128k
    if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme)
1551
0
      return CURLUE_NO_SCHEME;
1552
128k
    break;
1553
128k
  case CURLUPART_USER:
1554
125k
    ptr = u->user;
1555
125k
    ifmissing = CURLUE_NO_USER;
1556
125k
    break;
1557
126k
  case CURLUPART_PASSWORD:
1558
126k
    ptr = u->password;
1559
126k
    ifmissing = CURLUE_NO_PASSWORD;
1560
126k
    break;
1561
122k
  case CURLUPART_OPTIONS:
1562
122k
    ptr = u->options;
1563
122k
    ifmissing = CURLUE_NO_OPTIONS;
1564
122k
    break;
1565
128k
  case CURLUPART_HOST:
1566
128k
    ptr = u->host;
1567
128k
    ifmissing = CURLUE_NO_HOST;
1568
128k
    break;
1569
6.64k
  case CURLUPART_ZONEID:
1570
6.64k
    ptr = u->zoneid;
1571
6.64k
    ifmissing = CURLUE_NO_ZONEID;
1572
6.64k
    break;
1573
125k
  case CURLUPART_PORT:
1574
125k
    ptr = u->port;
1575
125k
    ifmissing = CURLUE_NO_PORT;
1576
125k
    flags &= ~CURLU_URLDECODE; /* never for port */
1577
125k
    if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1578
      /* there is no stored port number, but asked to deliver
1579
         a default one for the scheme */
1580
118k
      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1581
118k
      if(h) {
1582
118k
        curl_msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1583
118k
        ptr = portbuf;
1584
118k
      }
1585
118k
    }
1586
7.24k
    else if(ptr && u->scheme) {
1587
      /* there is a stored port number, but ask to inhibit if
1588
         it matches the default one for the scheme */
1589
5.29k
      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1590
5.29k
      if(h && (h->defport == u->portnum) &&
1591
288
         (flags & CURLU_NO_DEFAULT_PORT))
1592
0
        ptr = NULL;
1593
5.29k
    }
1594
125k
    break;
1595
122k
  case CURLUPART_PATH:
1596
122k
    ptr = u->path;
1597
122k
    if(!ptr)
1598
41.7k
      ptr = "/";
1599
122k
    break;
1600
122k
  case CURLUPART_QUERY:
1601
122k
    ptr = u->query;
1602
122k
    ifmissing = CURLUE_NO_QUERY;
1603
122k
    plusdecode = flags & CURLU_URLDECODE;
1604
122k
    if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
1605
      /* there was a blank query and the user do not ask for it */
1606
920
      ptr = NULL;
1607
122k
    break;
1608
0
  case CURLUPART_FRAGMENT:
1609
0
    ptr = u->fragment;
1610
0
    ifmissing = CURLUE_NO_FRAGMENT;
1611
0
    if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
1612
      /* there was a blank fragment and the user asks for it */
1613
0
      ptr = "";
1614
0
    break;
1615
300k
  case CURLUPART_URL:
1616
300k
    return urlget_url(u, part, flags);
1617
0
  default:
1618
0
    ptr = NULL;
1619
0
    break;
1620
1.30M
  }
1621
1.00M
  if(ptr)
1622
595k
    return urlget_format(u, what, ptr, part, plusdecode, flags);
1623
1624
411k
  return ifmissing;
1625
1.00M
}
1626
1627
static CURLUcode set_url_scheme(CURLU *u, const char *scheme,
1628
                                unsigned int flags)
1629
0
{
1630
0
  size_t plen = strlen(scheme);
1631
0
  const struct Curl_handler *h = NULL;
1632
0
  if((plen > MAX_SCHEME_LEN) || (plen < 1))
1633
    /* too long or too short */
1634
0
    return CURLUE_BAD_SCHEME;
1635
  /* verify that it is a fine scheme */
1636
0
  h = Curl_get_scheme_handler(scheme);
1637
0
  if(!h) {
1638
0
    const char *s = scheme;
1639
0
    if(!(flags & CURLU_NON_SUPPORT_SCHEME))
1640
0
      return CURLUE_UNSUPPORTED_SCHEME;
1641
0
    if(ISALPHA(*s)) {
1642
      /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1643
0
      while(--plen) {
1644
0
        if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1645
0
          s++; /* fine */
1646
0
        else
1647
0
          return CURLUE_BAD_SCHEME;
1648
0
      }
1649
0
    }
1650
0
    else
1651
0
      return CURLUE_BAD_SCHEME;
1652
0
  }
1653
0
  u->guessed_scheme = FALSE;
1654
0
  return CURLUE_OK;
1655
0
}
1656
1657
static CURLUcode set_url_port(CURLU *u, const char *provided_port)
1658
0
{
1659
0
  char *tmp;
1660
0
  curl_off_t port;
1661
0
  if(!ISDIGIT(provided_port[0]))
1662
    /* not a number */
1663
0
    return CURLUE_BAD_PORT_NUMBER;
1664
0
  if(curlx_str_number(&provided_port, &port, 0xffff) || *provided_port)
1665
    /* weirdly provided number, not good! */
1666
0
    return CURLUE_BAD_PORT_NUMBER;
1667
0
  tmp = curl_maprintf("%" CURL_FORMAT_CURL_OFF_T, port);
1668
0
  if(!tmp)
1669
0
    return CURLUE_OUT_OF_MEMORY;
1670
0
  free(u->port);
1671
0
  u->port = tmp;
1672
0
  u->portnum = (unsigned short)port;
1673
0
  return CURLUE_OK;
1674
0
}
1675
1676
static CURLUcode set_url(CURLU *u, const char *url, size_t part_size,
1677
                         unsigned int flags)
1678
297k
{
1679
  /*
1680
   * Allow a new URL to replace the existing (if any) contents.
1681
   *
1682
   * If the existing contents is enough for a URL, allow a relative URL to
1683
   * replace it.
1684
   */
1685
297k
  CURLUcode uc;
1686
297k
  char *oldurl = NULL;
1687
1688
297k
  if(!part_size) {
1689
    /* a blank URL is not a valid URL unless we already have a complete one
1690
       and this is a redirect */
1691
121
    if(!curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1692
      /* success, meaning the "" is a fine relative URL, but nothing
1693
         changes */
1694
0
      free(oldurl);
1695
0
      return CURLUE_OK;
1696
0
    }
1697
121
    return CURLUE_MALFORMED_INPUT;
1698
121
  }
1699
1700
  /* if the new thing is absolute or the old one is not (we could not get an
1701
   * absolute URL in 'oldurl'), then replace the existing with the new. */
1702
297k
  if(Curl_is_absolute_url(url, NULL, 0,
1703
297k
                          flags & (CURLU_GUESS_SCHEME|CURLU_DEFAULT_SCHEME))
1704
297k
     || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1705
297k
    return parseurl_and_replace(url, u, flags);
1706
297k
  }
1707
0
  DEBUGASSERT(oldurl); /* it is set here */
1708
  /* apply the relative part to create a new URL */
1709
0
  uc = redirect_url(oldurl, url, u, flags);
1710
0
  free(oldurl);
1711
0
  return uc;
1712
297k
}
1713
1714
static CURLUcode urlset_clear(CURLU *u, CURLUPart what)
1715
0
{
1716
0
  switch(what) {
1717
0
  case CURLUPART_URL:
1718
0
    free_urlhandle(u);
1719
0
    memset(u, 0, sizeof(struct Curl_URL));
1720
0
    break;
1721
0
  case CURLUPART_SCHEME:
1722
0
    Curl_safefree(u->scheme);
1723
0
    u->guessed_scheme = FALSE;
1724
0
    break;
1725
0
  case CURLUPART_USER:
1726
0
    Curl_safefree(u->user);
1727
0
    break;
1728
0
  case CURLUPART_PASSWORD:
1729
0
    Curl_safefree(u->password);
1730
0
    break;
1731
0
  case CURLUPART_OPTIONS:
1732
0
    Curl_safefree(u->options);
1733
0
    break;
1734
0
  case CURLUPART_HOST:
1735
0
    Curl_safefree(u->host);
1736
0
    break;
1737
0
  case CURLUPART_ZONEID:
1738
0
    Curl_safefree(u->zoneid);
1739
0
    break;
1740
0
  case CURLUPART_PORT:
1741
0
    u->portnum = 0;
1742
0
    Curl_safefree(u->port);
1743
0
    break;
1744
0
  case CURLUPART_PATH:
1745
0
    Curl_safefree(u->path);
1746
0
    break;
1747
0
  case CURLUPART_QUERY:
1748
0
    Curl_safefree(u->query);
1749
0
    u->query_present = FALSE;
1750
0
    break;
1751
0
  case CURLUPART_FRAGMENT:
1752
0
    Curl_safefree(u->fragment);
1753
0
    u->fragment_present = FALSE;
1754
0
    break;
1755
0
  default:
1756
0
    return CURLUE_UNKNOWN_PART;
1757
0
  }
1758
0
  return CURLUE_OK;
1759
0
}
1760
1761
static bool allowed_in_path(unsigned char x)
1762
0
{
1763
0
  switch(x) {
1764
0
  case '!': case '$': case '&': case '\'':
1765
0
  case '(': case ')': case '{': case '}':
1766
0
  case '[': case ']': case '*': case '+':
1767
0
  case ',': case ';': case '=': case ':':
1768
0
  case '@': case '/':
1769
0
    return TRUE;
1770
0
  }
1771
0
  return FALSE;
1772
0
}
1773
1774
CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1775
                       const char *part, unsigned int flags)
1776
326k
{
1777
326k
  char **storep = NULL;
1778
326k
  bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1779
326k
  bool plusencode = FALSE;
1780
326k
  bool pathmode = FALSE;
1781
326k
  bool leadingslash = FALSE;
1782
326k
  bool appendquery = FALSE;
1783
326k
  bool equalsencode = FALSE;
1784
326k
  size_t nalloc;
1785
1786
326k
  if(!u)
1787
0
    return CURLUE_BAD_HANDLE;
1788
326k
  if(!part)
1789
    /* setting a part to NULL clears it */
1790
0
    return urlset_clear(u, what);
1791
1792
326k
  nalloc = strlen(part);
1793
326k
  if(nalloc > CURL_MAX_INPUT_LENGTH)
1794
    /* excessive input length */
1795
0
    return CURLUE_MALFORMED_INPUT;
1796
1797
326k
  switch(what) {
1798
0
  case CURLUPART_SCHEME: {
1799
0
    CURLUcode status = set_url_scheme(u, part, flags);
1800
0
    if(status)
1801
0
      return status;
1802
0
    storep = &u->scheme;
1803
0
    urlencode = FALSE; /* never */
1804
0
    break;
1805
0
  }
1806
20.2k
  case CURLUPART_USER:
1807
20.2k
    storep = &u->user;
1808
20.2k
    break;
1809
8.55k
  case CURLUPART_PASSWORD:
1810
8.55k
    storep = &u->password;
1811
8.55k
    break;
1812
0
  case CURLUPART_OPTIONS:
1813
0
    storep = &u->options;
1814
0
    break;
1815
0
  case CURLUPART_HOST:
1816
0
    storep = &u->host;
1817
0
    Curl_safefree(u->zoneid);
1818
0
    break;
1819
0
  case CURLUPART_ZONEID:
1820
0
    storep = &u->zoneid;
1821
0
    break;
1822
0
  case CURLUPART_PORT:
1823
0
    return set_url_port(u, part);
1824
0
  case CURLUPART_PATH:
1825
0
    pathmode = TRUE;
1826
0
    leadingslash = TRUE; /* enforce */
1827
0
    storep = &u->path;
1828
0
    break;
1829
0
  case CURLUPART_QUERY:
1830
0
    plusencode = urlencode;
1831
0
    appendquery = (flags & CURLU_APPENDQUERY) ? 1 : 0;
1832
0
    equalsencode = appendquery;
1833
0
    storep = &u->query;
1834
0
    u->query_present = TRUE;
1835
0
    break;
1836
0
  case CURLUPART_FRAGMENT:
1837
0
    storep = &u->fragment;
1838
0
    u->fragment_present = TRUE;
1839
0
    break;
1840
297k
  case CURLUPART_URL:
1841
297k
    return set_url(u, part, nalloc, flags);
1842
0
  default:
1843
0
    return CURLUE_UNKNOWN_PART;
1844
326k
  }
1845
28.8k
  DEBUGASSERT(storep);
1846
28.8k
  {
1847
28.8k
    const char *newp;
1848
28.8k
    struct dynbuf enc;
1849
28.8k
    curlx_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1850
1851
28.8k
    if(leadingslash && (part[0] != '/')) {
1852
0
      CURLcode result = curlx_dyn_addn(&enc, "/", 1);
1853
0
      if(result)
1854
0
        return cc2cu(result);
1855
0
    }
1856
28.8k
    if(urlencode) {
1857
28.8k
      const unsigned char *i;
1858
1859
27.1M
      for(i = (const unsigned char *)part; *i; i++) {
1860
27.0M
        CURLcode result;
1861
27.0M
        if((*i == ' ') && plusencode) {
1862
0
          result = curlx_dyn_addn(&enc, "+", 1);
1863
0
          if(result)
1864
0
            return CURLUE_OUT_OF_MEMORY;
1865
0
        }
1866
27.0M
        else if(ISUNRESERVED(*i) ||
1867
17.3M
                (pathmode && allowed_in_path(*i)) ||
1868
17.3M
                ((*i == '=') && equalsencode)) {
1869
9.78M
          if((*i == '=') && equalsencode)
1870
            /* only skip the first equals sign */
1871
0
            equalsencode = FALSE;
1872
9.78M
          result = curlx_dyn_addn(&enc, i, 1);
1873
9.78M
          if(result)
1874
0
            return cc2cu(result);
1875
9.78M
        }
1876
17.3M
        else {
1877
17.3M
          unsigned char out[3]={'%'};
1878
17.3M
          Curl_hexbyte(&out[1], *i);
1879
17.3M
          result = curlx_dyn_addn(&enc, out, 3);
1880
17.3M
          if(result)
1881
0
            return cc2cu(result);
1882
17.3M
        }
1883
27.0M
      }
1884
28.8k
    }
1885
0
    else {
1886
0
      char *p;
1887
0
      CURLcode result = curlx_dyn_add(&enc, part);
1888
0
      if(result)
1889
0
        return cc2cu(result);
1890
0
      p = curlx_dyn_ptr(&enc);
1891
0
      while(*p) {
1892
        /* make sure percent encoded are lower case */
1893
0
        if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1894
0
           (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1895
0
          p[1] = Curl_raw_tolower(p[1]);
1896
0
          p[2] = Curl_raw_tolower(p[2]);
1897
0
          p += 3;
1898
0
        }
1899
0
        else
1900
0
          p++;
1901
0
      }
1902
0
    }
1903
28.8k
    newp = curlx_dyn_ptr(&enc);
1904
1905
28.8k
    if(appendquery && newp) {
1906
      /* Append the 'newp' string onto the old query. Add a '&' separator if
1907
         none is present at the end of the existing query already */
1908
1909
0
      size_t querylen = u->query ? strlen(u->query) : 0;
1910
0
      bool addamperand = querylen && (u->query[querylen -1] != '&');
1911
0
      if(querylen) {
1912
0
        struct dynbuf qbuf;
1913
0
        curlx_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1914
1915
0
        if(curlx_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1916
0
          goto nomem;
1917
1918
0
        if(addamperand) {
1919
0
          if(curlx_dyn_addn(&qbuf, "&", 1))
1920
0
            goto nomem;
1921
0
        }
1922
0
        if(curlx_dyn_add(&qbuf, newp))
1923
0
          goto nomem;
1924
0
        curlx_dyn_free(&enc);
1925
0
        free(*storep);
1926
0
        *storep = curlx_dyn_ptr(&qbuf);
1927
0
        return CURLUE_OK;
1928
0
nomem:
1929
0
        curlx_dyn_free(&enc);
1930
0
        return CURLUE_OUT_OF_MEMORY;
1931
0
      }
1932
0
    }
1933
1934
28.8k
    else if(what == CURLUPART_HOST) {
1935
0
      size_t n = curlx_dyn_len(&enc);
1936
0
      if(!n && (flags & CURLU_NO_AUTHORITY)) {
1937
        /* Skip hostname check, it is allowed to be empty. */
1938
0
      }
1939
0
      else {
1940
0
        bool bad = FALSE;
1941
0
        if(!n)
1942
0
          bad = TRUE; /* empty hostname is not okay */
1943
0
        else if(!urlencode) {
1944
          /* if the host name part was not URL encoded here, it was set ready
1945
             URL encoded so we need to decode it to check */
1946
0
          size_t dlen;
1947
0
          char *decoded = NULL;
1948
0
          CURLcode result =
1949
0
            Curl_urldecode(newp, n, &decoded, &dlen, REJECT_CTRL);
1950
0
          if(result || hostname_check(u, decoded, dlen))
1951
0
            bad = TRUE;
1952
0
          free(decoded);
1953
0
        }
1954
0
        else if(hostname_check(u, (char *)CURL_UNCONST(newp), n))
1955
0
          bad = TRUE;
1956
0
        if(bad) {
1957
0
          curlx_dyn_free(&enc);
1958
0
          return CURLUE_BAD_HOSTNAME;
1959
0
        }
1960
0
      }
1961
0
    }
1962
1963
28.8k
    free(*storep);
1964
28.8k
    *storep = (char *)CURL_UNCONST(newp);
1965
28.8k
  }
1966
0
  return CURLUE_OK;
1967
28.8k
}