Coverage Report

Created: 2023-12-08 06:48

/src/curl/lib/urlapi.c
Line
Count
Source (jump to first uncovered line)
1
/***************************************************************************
2
 *                                  _   _ ____  _
3
 *  Project                     ___| | | |  _ \| |
4
 *                             / __| | | | |_) | |
5
 *                            | (__| |_| |  _ <| |___
6
 *                             \___|\___/|_| \_\_____|
7
 *
8
 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9
 *
10
 * This software is licensed as described in the file COPYING, which
11
 * you should have received as part of this distribution. The terms
12
 * are also available at https://curl.se/docs/copyright.html.
13
 *
14
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15
 * copies of the Software, and permit persons to whom the Software is
16
 * furnished to do so, under the terms of the COPYING file.
17
 *
18
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19
 * KIND, either express or implied.
20
 *
21
 * SPDX-License-Identifier: curl
22
 *
23
 ***************************************************************************/
24
25
#include "curl_setup.h"
26
27
#include "urldata.h"
28
#include "urlapi-int.h"
29
#include "strcase.h"
30
#include "url.h"
31
#include "escape.h"
32
#include "curl_ctype.h"
33
#include "inet_pton.h"
34
#include "inet_ntop.h"
35
#include "strdup.h"
36
#include "idn.h"
37
#include "curl_memrchr.h"
38
39
/* The last 3 #include files should be in this order */
40
#include "curl_printf.h"
41
#include "curl_memory.h"
42
#include "memdebug.h"
43
44
  /* MSDOS/Windows style drive prefix, eg c: in c:foo */
45
#define STARTS_WITH_DRIVE_PREFIX(str) \
46
  ((('a' <= str[0] && str[0] <= 'z') || \
47
    ('A' <= str[0] && str[0] <= 'Z')) && \
48
   (str[1] == ':'))
49
50
  /* MSDOS/Windows style drive prefix, optionally with
51
   * a '|' instead of ':', followed by a slash or NUL */
52
#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
53
0
  ((('a' <= (str)[0] && (str)[0] <= 'z') || \
54
0
    ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
55
0
   ((str)[1] == ':' || (str)[1] == '|') && \
56
0
   ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
57
58
/* scheme is not URL encoded, the longest libcurl supported ones are... */
59
0
#define MAX_SCHEME_LEN 40
60
61
/*
62
 * If ENABLE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
63
 * sure we have _some_ value for AF_INET6 without polluting our fake value
64
 * everywhere.
65
 */
66
#if !defined(ENABLE_IPV6) && !defined(AF_INET6)
67
#define AF_INET6 (AF_INET + 1)
68
#endif
69
70
/* Internal representation of CURLU. Point to URL-encoded strings. */
71
struct Curl_URL {
72
  char *scheme;
73
  char *user;
74
  char *password;
75
  char *options; /* IMAP only? */
76
  char *host;
77
  char *zoneid; /* for numerical IPv6 addresses */
78
  char *port;
79
  char *path;
80
  char *query;
81
  char *fragment;
82
  long portnum; /* the numerical version */
83
};
84
85
0
#define DEFAULT_SCHEME "https"
86
87
static void free_urlhandle(struct Curl_URL *u)
88
0
{
89
0
  free(u->scheme);
90
0
  free(u->user);
91
0
  free(u->password);
92
0
  free(u->options);
93
0
  free(u->host);
94
0
  free(u->zoneid);
95
0
  free(u->port);
96
0
  free(u->path);
97
0
  free(u->query);
98
0
  free(u->fragment);
99
0
}
100
101
/*
102
 * Find the separator at the end of the host name, or the '?' in cases like
103
 * http://www.example.com?id=2380
104
 */
105
static const char *find_host_sep(const char *url)
106
0
{
107
0
  const char *sep;
108
0
  const char *query;
109
110
  /* Find the start of the hostname */
111
0
  sep = strstr(url, "//");
112
0
  if(!sep)
113
0
    sep = url;
114
0
  else
115
0
    sep += 2;
116
117
0
  query = strchr(sep, '?');
118
0
  sep = strchr(sep, '/');
119
120
0
  if(!sep)
121
0
    sep = url + strlen(url);
122
123
0
  if(!query)
124
0
    query = url + strlen(url);
125
126
0
  return sep < query ? sep : query;
127
0
}
128
129
/*
130
 * Decide whether a character in a URL must be escaped.
131
 */
132
0
#define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
133
134
static const char hexdigits[] = "0123456789abcdef";
135
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
136
 * spaces in the source URL accordingly.
137
 *
138
 * URL encoding should be skipped for host names, otherwise IDN resolution
139
 * will fail.
140
 */
141
static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
142
                               size_t len, bool relative,
143
                               bool query)
144
0
{
145
  /* we must add this with whitespace-replacing */
146
0
  bool left = !query;
147
0
  const unsigned char *iptr;
148
0
  const unsigned char *host_sep = (const unsigned char *) url;
149
150
0
  if(!relative)
151
0
    host_sep = (const unsigned char *) find_host_sep(url);
152
153
0
  for(iptr = (unsigned char *)url;    /* read from here */
154
0
      len; iptr++, len--) {
155
156
0
    if(iptr < host_sep) {
157
0
      if(Curl_dyn_addn(o, iptr, 1))
158
0
        return CURLUE_OUT_OF_MEMORY;
159
0
      continue;
160
0
    }
161
162
0
    if(*iptr == ' ') {
163
0
      if(left) {
164
0
        if(Curl_dyn_addn(o, "%20", 3))
165
0
          return CURLUE_OUT_OF_MEMORY;
166
0
      }
167
0
      else {
168
0
        if(Curl_dyn_addn(o, "+", 1))
169
0
          return CURLUE_OUT_OF_MEMORY;
170
0
      }
171
0
      continue;
172
0
    }
173
174
0
    if(*iptr == '?')
175
0
      left = FALSE;
176
177
0
    if(urlchar_needs_escaping(*iptr)) {
178
0
      char out[3]={'%'};
179
0
      out[1] = hexdigits[*iptr>>4];
180
0
      out[2] = hexdigits[*iptr & 0xf];
181
0
      if(Curl_dyn_addn(o, out, 3))
182
0
        return CURLUE_OUT_OF_MEMORY;
183
0
    }
184
0
    else {
185
0
      if(Curl_dyn_addn(o, iptr, 1))
186
0
        return CURLUE_OUT_OF_MEMORY;
187
0
    }
188
0
  }
189
190
0
  return CURLUE_OK;
191
0
}
192
193
/*
194
 * Returns the length of the scheme if the given URL is absolute (as opposed
195
 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
196
 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
197
 *
198
 * If 'guess_scheme' is TRUE, it means the URL might be provided without
199
 * scheme.
200
 */
201
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
202
                            bool guess_scheme)
203
0
{
204
0
  int i = 0;
205
0
  DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
206
0
  (void)buflen; /* only used in debug-builds */
207
0
  if(buf)
208
0
    buf[0] = 0; /* always leave a defined value in buf */
209
#ifdef _WIN32
210
  if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
211
    return 0;
212
#endif
213
0
  if(ISALPHA(url[0]))
214
0
    for(i = 1; i < MAX_SCHEME_LEN; ++i) {
215
0
      char s = url[i];
216
0
      if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
217
        /* RFC 3986 3.1 explains:
218
           scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
219
        */
220
0
      }
221
0
      else {
222
0
        break;
223
0
      }
224
0
    }
225
0
  if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
226
    /* If this does not guess scheme, the scheme always ends with the colon so
227
       that this also detects data: URLs etc. In guessing mode, data: could
228
       be the host name "data" with a specified port number. */
229
230
    /* the length of the scheme is the name part only */
231
0
    size_t len = i;
232
0
    if(buf) {
233
0
      buf[i] = 0;
234
0
      while(i--) {
235
0
        buf[i] = Curl_raw_tolower(url[i]);
236
0
      }
237
0
    }
238
0
    return len;
239
0
  }
240
0
  return 0;
241
0
}
242
243
/*
244
 * Concatenate a relative URL to a base URL making it absolute.
245
 * URL-encodes any spaces.
246
 * The returned pointer must be freed by the caller unless NULL
247
 * (returns NULL on out of memory).
248
 *
249
 * Note that this function destroys the 'base' string.
250
 */
251
static char *concat_url(char *base, const char *relurl)
252
0
{
253
  /***
254
   TRY to append this new path to the old URL
255
   to the right of the host part. Oh crap, this is doomed to cause
256
   problems in the future...
257
  */
258
0
  struct dynbuf newest;
259
0
  char *protsep;
260
0
  char *pathsep;
261
0
  bool host_changed = FALSE;
262
0
  const char *useurl = relurl;
263
264
  /* protsep points to the start of the host name */
265
0
  protsep = strstr(base, "//");
266
0
  if(!protsep)
267
0
    protsep = base;
268
0
  else
269
0
    protsep += 2; /* pass the slashes */
270
271
0
  if('/' != relurl[0]) {
272
0
    int level = 0;
273
274
    /* First we need to find out if there's a ?-letter in the URL,
275
       and cut it and the right-side of that off */
276
0
    pathsep = strchr(protsep, '?');
277
0
    if(pathsep)
278
0
      *pathsep = 0;
279
280
    /* we have a relative path to append to the last slash if there's one
281
       available, or if the new URL is just a query string (starts with a
282
       '?')  we append the new one at the end of the entire currently worked
283
       out URL */
284
0
    if(useurl[0] != '?') {
285
0
      pathsep = strrchr(protsep, '/');
286
0
      if(pathsep)
287
0
        *pathsep = 0;
288
0
    }
289
290
    /* Check if there's any slash after the host name, and if so, remember
291
       that position instead */
292
0
    pathsep = strchr(protsep, '/');
293
0
    if(pathsep)
294
0
      protsep = pathsep + 1;
295
0
    else
296
0
      protsep = NULL;
297
298
    /* now deal with one "./" or any amount of "../" in the newurl
299
       and act accordingly */
300
301
0
    if((useurl[0] == '.') && (useurl[1] == '/'))
302
0
      useurl += 2; /* just skip the "./" */
303
304
0
    while((useurl[0] == '.') &&
305
0
          (useurl[1] == '.') &&
306
0
          (useurl[2] == '/')) {
307
0
      level++;
308
0
      useurl += 3; /* pass the "../" */
309
0
    }
310
311
0
    if(protsep) {
312
0
      while(level--) {
313
        /* cut off one more level from the right of the original URL */
314
0
        pathsep = strrchr(protsep, '/');
315
0
        if(pathsep)
316
0
          *pathsep = 0;
317
0
        else {
318
0
          *protsep = 0;
319
0
          break;
320
0
        }
321
0
      }
322
0
    }
323
0
  }
324
0
  else {
325
    /* We got a new absolute path for this server */
326
327
0
    if(relurl[1] == '/') {
328
      /* the new URL starts with //, just keep the protocol part from the
329
         original one */
330
0
      *protsep = 0;
331
0
      useurl = &relurl[2]; /* we keep the slashes from the original, so we
332
                              skip the new ones */
333
0
      host_changed = TRUE;
334
0
    }
335
0
    else {
336
      /* cut off the original URL from the first slash, or deal with URLs
337
         without slash */
338
0
      pathsep = strchr(protsep, '/');
339
0
      if(pathsep) {
340
        /* When people use badly formatted URLs, such as
341
           "http://www.example.com?dir=/home/daniel" we must not use the first
342
           slash, if there's a ?-letter before it! */
343
0
        char *sep = strchr(protsep, '?');
344
0
        if(sep && (sep < pathsep))
345
0
          pathsep = sep;
346
0
        *pathsep = 0;
347
0
      }
348
0
      else {
349
        /* There was no slash. Now, since we might be operating on a badly
350
           formatted URL, such as "http://www.example.com?id=2380" which
351
           doesn't use a slash separator as it is supposed to, we need to check
352
           for a ?-letter as well! */
353
0
        pathsep = strchr(protsep, '?');
354
0
        if(pathsep)
355
0
          *pathsep = 0;
356
0
      }
357
0
    }
358
0
  }
359
360
0
  Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
361
362
  /* copy over the root url part */
363
0
  if(Curl_dyn_add(&newest, base))
364
0
    return NULL;
365
366
  /* check if we need to append a slash */
367
0
  if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
368
0
    ;
369
0
  else {
370
0
    if(Curl_dyn_addn(&newest, "/", 1))
371
0
      return NULL;
372
0
  }
373
374
  /* then append the new piece on the right side */
375
0
  urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
376
377
0
  return Curl_dyn_ptr(&newest);
378
0
}
379
380
/* scan for byte values <= 31, 127 and sometimes space */
381
static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags)
382
0
{
383
0
  static const char badbytes[]={
384
0
    /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
385
0
    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
386
0
    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
387
0
    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
388
0
    0x7f, 0x00 /* null-terminate */
389
0
  };
390
0
  size_t n = strlen(url);
391
0
  size_t nfine;
392
393
0
  if(n > CURL_MAX_INPUT_LENGTH)
394
    /* excessive input length */
395
0
    return CURLUE_MALFORMED_INPUT;
396
397
0
  nfine = strcspn(url, badbytes);
398
0
  if((nfine != n) ||
399
0
     (!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
400
0
    return CURLUE_MALFORMED_INPUT;
401
402
0
  *urllen = n;
403
0
  return CURLUE_OK;
404
0
}
405
406
/*
407
 * parse_hostname_login()
408
 *
409
 * Parse the login details (user name, password and options) from the URL and
410
 * strip them out of the host name
411
 *
412
 */
413
static CURLUcode parse_hostname_login(struct Curl_URL *u,
414
                                      const char *login,
415
                                      size_t len,
416
                                      unsigned int flags,
417
                                      size_t *offset) /* to the host name */
418
0
{
419
0
  CURLUcode result = CURLUE_OK;
420
0
  CURLcode ccode;
421
0
  char *userp = NULL;
422
0
  char *passwdp = NULL;
423
0
  char *optionsp = NULL;
424
0
  const struct Curl_handler *h = NULL;
425
426
  /* At this point, we assume all the other special cases have been taken
427
   * care of, so the host is at most
428
   *
429
   *   [user[:password][;options]]@]hostname
430
   *
431
   * We need somewhere to put the embedded details, so do that first.
432
   */
433
0
  char *ptr;
434
435
0
  DEBUGASSERT(login);
436
437
0
  *offset = 0;
438
0
  ptr = memchr(login, '@', len);
439
0
  if(!ptr)
440
0
    goto out;
441
442
  /* We will now try to extract the
443
   * possible login information in a string like:
444
   * ftp://user:password@ftp.my.site:8021/README */
445
0
  ptr++;
446
447
  /* if this is a known scheme, get some details */
448
0
  if(u->scheme)
449
0
    h = Curl_get_scheme_handler(u->scheme);
450
451
  /* We could use the login information in the URL so extract it. Only parse
452
     options if the handler says we should. Note that 'h' might be NULL! */
453
0
  ccode = Curl_parse_login_details(login, ptr - login - 1,
454
0
                                   &userp, &passwdp,
455
0
                                   (h && (h->flags & PROTOPT_URLOPTIONS)) ?
456
0
                                   &optionsp:NULL);
457
0
  if(ccode) {
458
0
    result = CURLUE_BAD_LOGIN;
459
0
    goto out;
460
0
  }
461
462
0
  if(userp) {
463
0
    if(flags & CURLU_DISALLOW_USER) {
464
      /* Option DISALLOW_USER is set and url contains username. */
465
0
      result = CURLUE_USER_NOT_ALLOWED;
466
0
      goto out;
467
0
    }
468
0
    free(u->user);
469
0
    u->user = userp;
470
0
  }
471
472
0
  if(passwdp) {
473
0
    free(u->password);
474
0
    u->password = passwdp;
475
0
  }
476
477
0
  if(optionsp) {
478
0
    free(u->options);
479
0
    u->options = optionsp;
480
0
  }
481
482
  /* the host name starts at this offset */
483
0
  *offset = ptr - login;
484
0
  return CURLUE_OK;
485
486
0
out:
487
488
0
  free(userp);
489
0
  free(passwdp);
490
0
  free(optionsp);
491
0
  u->user = NULL;
492
0
  u->password = NULL;
493
0
  u->options = NULL;
494
495
0
  return result;
496
0
}
497
498
UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
499
                                   bool has_scheme)
500
0
{
501
0
  char *portptr;
502
0
  char *hostname = Curl_dyn_ptr(host);
503
  /*
504
   * Find the end of an IPv6 address on the ']' ending bracket.
505
   */
506
0
  if(hostname[0] == '[') {
507
0
    portptr = strchr(hostname, ']');
508
0
    if(!portptr)
509
0
      return CURLUE_BAD_IPV6;
510
0
    portptr++;
511
    /* this is a RFC2732-style specified IP-address */
512
0
    if(*portptr) {
513
0
      if(*portptr != ':')
514
0
        return CURLUE_BAD_PORT_NUMBER;
515
0
    }
516
0
    else
517
0
      portptr = NULL;
518
0
  }
519
0
  else
520
0
    portptr = strchr(hostname, ':');
521
522
0
  if(portptr) {
523
0
    char *rest;
524
0
    long port;
525
0
    size_t keep = portptr - hostname;
526
527
    /* Browser behavior adaptation. If there's a colon with no digits after,
528
       just cut off the name there which makes us ignore the colon and just
529
       use the default port. Firefox, Chrome and Safari all do that.
530
531
       Don't do it if the URL has no scheme, to make something that looks like
532
       a scheme not work!
533
    */
534
0
    Curl_dyn_setlen(host, keep);
535
0
    portptr++;
536
0
    if(!*portptr)
537
0
      return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
538
539
0
    if(!ISDIGIT(*portptr))
540
0
      return CURLUE_BAD_PORT_NUMBER;
541
542
0
    port = strtol(portptr, &rest, 10);  /* Port number must be decimal */
543
544
0
    if(port > 0xffff)
545
0
      return CURLUE_BAD_PORT_NUMBER;
546
547
0
    if(rest[0])
548
0
      return CURLUE_BAD_PORT_NUMBER;
549
550
0
    u->portnum = port;
551
    /* generate a new port number string to get rid of leading zeroes etc */
552
0
    free(u->port);
553
0
    u->port = aprintf("%ld", port);
554
0
    if(!u->port)
555
0
      return CURLUE_OUT_OF_MEMORY;
556
0
  }
557
558
0
  return CURLUE_OK;
559
0
}
560
561
/* this assumes 'hostname' now starts with [ */
562
static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
563
                            size_t hlen) /* length of hostname */
564
0
{
565
0
  size_t len;
566
0
  DEBUGASSERT(*hostname == '[');
567
0
  if(hlen < 4) /* '[::]' is the shortest possible valid string */
568
0
    return CURLUE_BAD_IPV6;
569
0
  hostname++;
570
0
  hlen -= 2;
571
572
  /* only valid IPv6 letters are ok */
573
0
  len = strspn(hostname, "0123456789abcdefABCDEF:.");
574
575
0
  if(hlen != len) {
576
0
    hlen = len;
577
0
    if(hostname[len] == '%') {
578
      /* this could now be '%[zone id]' */
579
0
      char zoneid[16];
580
0
      int i = 0;
581
0
      char *h = &hostname[len + 1];
582
      /* pass '25' if present and is a url encoded percent sign */
583
0
      if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
584
0
        h += 2;
585
0
      while(*h && (*h != ']') && (i < 15))
586
0
        zoneid[i++] = *h++;
587
0
      if(!i || (']' != *h))
588
0
        return CURLUE_BAD_IPV6;
589
0
      zoneid[i] = 0;
590
0
      u->zoneid = strdup(zoneid);
591
0
      if(!u->zoneid)
592
0
        return CURLUE_OUT_OF_MEMORY;
593
0
      hostname[len] = ']'; /* insert end bracket */
594
0
      hostname[len + 1] = 0; /* terminate the hostname */
595
0
    }
596
0
    else
597
0
      return CURLUE_BAD_IPV6;
598
    /* hostname is fine */
599
0
  }
600
601
  /* Check the IPv6 address. */
602
0
  {
603
0
    char dest[16]; /* fits a binary IPv6 address */
604
0
    char norm[MAX_IPADR_LEN];
605
0
    hostname[hlen] = 0; /* end the address there */
606
0
    if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
607
0
      return CURLUE_BAD_IPV6;
608
609
    /* check if it can be done shorter */
610
0
    if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
611
0
       (strlen(norm) < hlen)) {
612
0
      strcpy(hostname, norm);
613
0
      hlen = strlen(norm);
614
0
      hostname[hlen + 1] = 0;
615
0
    }
616
0
    hostname[hlen] = ']'; /* restore ending bracket */
617
0
  }
618
0
  return CURLUE_OK;
619
0
}
620
621
static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
622
                                size_t hlen) /* length of hostname */
623
0
{
624
0
  size_t len;
625
0
  DEBUGASSERT(hostname);
626
627
0
  if(!hlen)
628
0
    return CURLUE_NO_HOST;
629
0
  else if(hostname[0] == '[')
630
0
    return ipv6_parse(u, hostname, hlen);
631
0
  else {
632
    /* letters from the second string are not ok */
633
0
    len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
634
0
    if(hlen != len)
635
      /* hostname with bad content */
636
0
      return CURLUE_BAD_HOSTNAME;
637
0
  }
638
0
  return CURLUE_OK;
639
0
}
640
641
/*
642
 * Handle partial IPv4 numerical addresses and different bases, like
643
 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
644
 *
645
 * If the given input string is syntactically wrong IPv4 or any part for
646
 * example is too big, this function returns HOST_NAME.
647
 *
648
 * Output the "normalized" version of that input string in plain quad decimal
649
 * integers.
650
 *
651
 * Returns the host type.
652
 */
653
654
0
#define HOST_ERROR   -1 /* out of memory */
655
0
#define HOST_BAD     -2 /* bad IPv4 address */
656
657
0
#define HOST_NAME    1
658
0
#define HOST_IPV4    2
659
0
#define HOST_IPV6    3
660
661
static int ipv4_normalize(struct dynbuf *host)
662
0
{
663
0
  bool done = FALSE;
664
0
  int n = 0;
665
0
  const char *c = Curl_dyn_ptr(host);
666
0
  unsigned long parts[4] = {0, 0, 0, 0};
667
0
  CURLcode result = CURLE_OK;
668
669
0
  if(*c == '[')
670
0
    return HOST_IPV6;
671
672
0
  while(!done) {
673
0
    char *endp;
674
0
    unsigned long l;
675
0
    if(!ISDIGIT(*c))
676
      /* most importantly this doesn't allow a leading plus or minus */
677
0
      return HOST_NAME;
678
0
    l = strtoul(c, &endp, 0);
679
680
0
    parts[n] = l;
681
0
    c = endp;
682
683
0
    switch(*c) {
684
0
    case '.':
685
0
      if(n == 3)
686
0
        return HOST_NAME;
687
0
      n++;
688
0
      c++;
689
0
      break;
690
691
0
    case '\0':
692
0
      done = TRUE;
693
0
      break;
694
695
0
    default:
696
0
      return HOST_NAME;
697
0
    }
698
699
    /* overflow */
700
0
    if((l == ULONG_MAX) && (errno == ERANGE))
701
0
      return HOST_NAME;
702
703
0
#if SIZEOF_LONG > 4
704
    /* a value larger than 32 bits */
705
0
    if(l > UINT_MAX)
706
0
      return HOST_NAME;
707
0
#endif
708
0
  }
709
710
0
  switch(n) {
711
0
  case 0: /* a -- 32 bits */
712
0
    Curl_dyn_reset(host);
713
714
0
    result = Curl_dyn_addf(host, "%u.%u.%u.%u",
715
0
                           parts[0] >> 24, (parts[0] >> 16) & 0xff,
716
0
                           (parts[0] >> 8) & 0xff, parts[0] & 0xff);
717
0
    break;
718
0
  case 1: /* a.b -- 8.24 bits */
719
0
    if((parts[0] > 0xff) || (parts[1] > 0xffffff))
720
0
      return HOST_NAME;
721
0
    Curl_dyn_reset(host);
722
0
    result = Curl_dyn_addf(host, "%u.%u.%u.%u",
723
0
                           parts[0], (parts[1] >> 16) & 0xff,
724
0
                           (parts[1] >> 8) & 0xff, parts[1] & 0xff);
725
0
    break;
726
0
  case 2: /* a.b.c -- 8.8.16 bits */
727
0
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
728
0
      return HOST_NAME;
729
0
    Curl_dyn_reset(host);
730
0
    result = Curl_dyn_addf(host, "%u.%u.%u.%u",
731
0
                           parts[0], parts[1], (parts[2] >> 8) & 0xff,
732
0
                           parts[2] & 0xff);
733
0
    break;
734
0
  case 3: /* a.b.c.d -- 8.8.8.8 bits */
735
0
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
736
0
       (parts[3] > 0xff))
737
0
      return HOST_NAME;
738
0
    Curl_dyn_reset(host);
739
0
    result = Curl_dyn_addf(host, "%u.%u.%u.%u",
740
0
                           parts[0], parts[1], parts[2], parts[3]);
741
0
    break;
742
0
  }
743
0
  if(result)
744
0
    return HOST_ERROR;
745
0
  return HOST_IPV4;
746
0
}
747
748
/* if necessary, replace the host content with a URL decoded version */
749
static CURLUcode urldecode_host(struct dynbuf *host)
750
0
{
751
0
  char *per = NULL;
752
0
  const char *hostname = Curl_dyn_ptr(host);
753
0
  per = strchr(hostname, '%');
754
0
  if(!per)
755
    /* nothing to decode */
756
0
    return CURLUE_OK;
757
0
  else {
758
    /* encoded */
759
0
    size_t dlen;
760
0
    char *decoded;
761
0
    CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
762
0
                                     REJECT_CTRL);
763
0
    if(result)
764
0
      return CURLUE_BAD_HOSTNAME;
765
0
    Curl_dyn_reset(host);
766
0
    result = Curl_dyn_addn(host, decoded, dlen);
767
0
    free(decoded);
768
0
    if(result)
769
0
      return CURLUE_OUT_OF_MEMORY;
770
0
  }
771
772
0
  return CURLUE_OK;
773
0
}
774
775
static CURLUcode parse_authority(struct Curl_URL *u,
776
                                 const char *auth, size_t authlen,
777
                                 unsigned int flags,
778
                                 struct dynbuf *host,
779
                                 bool has_scheme)
780
0
{
781
0
  size_t offset;
782
0
  CURLUcode result;
783
784
  /*
785
   * Parse the login details and strip them out of the host name.
786
   */
787
0
  result = parse_hostname_login(u, auth, authlen, flags, &offset);
788
0
  if(result)
789
0
    goto out;
790
791
0
  if(Curl_dyn_addn(host, auth + offset, authlen - offset)) {
792
0
    result = CURLUE_OUT_OF_MEMORY;
793
0
    goto out;
794
0
  }
795
796
0
  result = Curl_parse_port(u, host, has_scheme);
797
0
  if(result)
798
0
    goto out;
799
800
0
  if(!Curl_dyn_len(host))
801
0
    return CURLUE_NO_HOST;
802
803
0
  switch(ipv4_normalize(host)) {
804
0
  case HOST_IPV4:
805
0
    break;
806
0
  case HOST_IPV6:
807
0
    result = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
808
0
    break;
809
0
  case HOST_NAME:
810
0
    result = urldecode_host(host);
811
0
    if(!result)
812
0
      result = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
813
0
    break;
814
0
  case HOST_ERROR:
815
0
    result = CURLUE_OUT_OF_MEMORY;
816
0
    break;
817
0
  case HOST_BAD:
818
0
  default:
819
0
    result = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
820
0
    break;
821
0
  }
822
823
0
out:
824
0
  return result;
825
0
}
826
827
CURLUcode Curl_url_set_authority(CURLU *u, const char *authority,
828
                                 unsigned int flags)
829
0
{
830
0
  CURLUcode result;
831
0
  struct dynbuf host;
832
833
0
  DEBUGASSERT(authority);
834
0
  Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
835
836
0
  result = parse_authority(u, authority, strlen(authority), flags,
837
0
                           &host, !!u->scheme);
838
0
  if(result)
839
0
    Curl_dyn_free(&host);
840
0
  else {
841
0
    free(u->host);
842
0
    u->host = Curl_dyn_ptr(&host);
843
0
  }
844
0
  return result;
845
0
}
846
847
/*
848
 * "Remove Dot Segments"
849
 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
850
 */
851
852
/*
853
 * dedotdotify()
854
 * @unittest: 1395
855
 *
856
 * This function gets a null-terminated path with dot and dotdot sequences
857
 * passed in and strips them off according to the rules in RFC 3986 section
858
 * 5.2.4.
859
 *
860
 * The function handles a query part ('?' + stuff) appended but it expects
861
 * that fragments ('#' + stuff) have already been cut off.
862
 *
863
 * RETURNS
864
 *
865
 * Zero for success and 'out' set to an allocated dedotdotified string.
866
 */
867
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
868
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
869
0
{
870
0
  char *outptr;
871
0
  const char *endp = &input[clen];
872
0
  char *out;
873
874
0
  *outp = NULL;
875
  /* the path always starts with a slash, and a slash has not dot */
876
0
  if((clen < 2) || !memchr(input, '.', clen))
877
0
    return 0;
878
879
0
  out = malloc(clen + 1);
880
0
  if(!out)
881
0
    return 1; /* out of memory */
882
883
0
  *out = 0; /* null-terminates, for inputs like "./" */
884
0
  outptr = out;
885
886
0
  do {
887
0
    bool dotdot = TRUE;
888
0
    if(*input == '.') {
889
      /*  A.  If the input buffer begins with a prefix of "../" or "./", then
890
          remove that prefix from the input buffer; otherwise, */
891
892
0
      if(!strncmp("./", input, 2)) {
893
0
        input += 2;
894
0
        clen -= 2;
895
0
      }
896
0
      else if(!strncmp("../", input, 3)) {
897
0
        input += 3;
898
0
        clen -= 3;
899
0
      }
900
      /*  D.  if the input buffer consists only of "." or "..", then remove
901
          that from the input buffer; otherwise, */
902
903
0
      else if(!strcmp(".", input) || !strcmp("..", input) ||
904
0
              !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
905
0
        *out = 0;
906
0
        break;
907
0
      }
908
0
      else
909
0
        dotdot = FALSE;
910
0
    }
911
0
    else if(*input == '/') {
912
      /*  B.  if the input buffer begins with a prefix of "/./" or "/.", where
913
          "."  is a complete path segment, then replace that prefix with "/" in
914
          the input buffer; otherwise, */
915
0
      if(!strncmp("/./", input, 3)) {
916
0
        input += 2;
917
0
        clen -= 2;
918
0
      }
919
0
      else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
920
0
        *outptr++ = '/';
921
0
        *outptr = 0;
922
0
        break;
923
0
      }
924
925
      /*  C.  if the input buffer begins with a prefix of "/../" or "/..",
926
          where ".." is a complete path segment, then replace that prefix with
927
          "/" in the input buffer and remove the last segment and its
928
          preceding "/" (if any) from the output buffer; otherwise, */
929
930
0
      else if(!strncmp("/../", input, 4)) {
931
0
        input += 3;
932
0
        clen -= 3;
933
        /* remove the last segment from the output buffer */
934
0
        while(outptr > out) {
935
0
          outptr--;
936
0
          if(*outptr == '/')
937
0
            break;
938
0
        }
939
0
        *outptr = 0; /* null-terminate where it stops */
940
0
      }
941
0
      else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
942
        /* remove the last segment from the output buffer */
943
0
        while(outptr > out) {
944
0
          outptr--;
945
0
          if(*outptr == '/')
946
0
            break;
947
0
        }
948
0
        *outptr++ = '/';
949
0
        *outptr = 0; /* null-terminate where it stops */
950
0
        break;
951
0
      }
952
0
      else
953
0
        dotdot = FALSE;
954
0
    }
955
0
    else
956
0
      dotdot = FALSE;
957
958
0
    if(!dotdot) {
959
      /*  E.  move the first path segment in the input buffer to the end of
960
          the output buffer, including the initial "/" character (if any) and
961
          any subsequent characters up to, but not including, the next "/"
962
          character or the end of the input buffer. */
963
964
0
      do {
965
0
        *outptr++ = *input++;
966
0
        clen--;
967
0
      } while(*input && (*input != '/') && (*input != '?'));
968
0
      *outptr = 0;
969
0
    }
970
971
    /* continue until end of path */
972
0
  } while(input < endp);
973
974
0
  *outp = out;
975
0
  return 0; /* success */
976
0
}
977
978
static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
979
0
{
980
0
  const char *path;
981
0
  size_t pathlen;
982
0
  char *query = NULL;
983
0
  char *fragment = NULL;
984
0
  char schemebuf[MAX_SCHEME_LEN + 1];
985
0
  size_t schemelen = 0;
986
0
  size_t urllen;
987
0
  CURLUcode result = CURLUE_OK;
988
0
  size_t fraglen = 0;
989
0
  struct dynbuf host;
990
991
0
  DEBUGASSERT(url);
992
993
0
  Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
994
995
0
  result = junkscan(url, &urllen, flags);
996
0
  if(result)
997
0
    goto fail;
998
999
0
  schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1000
0
                                   flags & (CURLU_GUESS_SCHEME|
1001
0
                                            CURLU_DEFAULT_SCHEME));
1002
1003
  /* handle the file: scheme */
1004
0
  if(schemelen && !strcmp(schemebuf, "file")) {
1005
0
    bool uncpath = FALSE;
1006
0
    if(urllen <= 6) {
1007
      /* file:/ is not enough to actually be a complete file: URL */
1008
0
      result = CURLUE_BAD_FILE_URL;
1009
0
      goto fail;
1010
0
    }
1011
1012
    /* path has been allocated large enough to hold this */
1013
0
    path = (char *)&url[5];
1014
0
    pathlen = urllen - 5;
1015
1016
0
    u->scheme = strdup("file");
1017
0
    if(!u->scheme) {
1018
0
      result = CURLUE_OUT_OF_MEMORY;
1019
0
      goto fail;
1020
0
    }
1021
1022
    /* Extra handling URLs with an authority component (i.e. that start with
1023
     * "file://")
1024
     *
1025
     * We allow omitted hostname (e.g. file:/<path>) -- valid according to
1026
     * RFC 8089, but not the (current) WHAT-WG URL spec.
1027
     */
1028
0
    if(path[0] == '/' && path[1] == '/') {
1029
      /* swallow the two slashes */
1030
0
      const char *ptr = &path[2];
1031
1032
      /*
1033
       * According to RFC 8089, a file: URL can be reliably dereferenced if:
1034
       *
1035
       *  o it has no/blank hostname, or
1036
       *
1037
       *  o the hostname matches "localhost" (case-insensitively), or
1038
       *
1039
       *  o the hostname is a FQDN that resolves to this machine, or
1040
       *
1041
       *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
1042
       *    Appendix E.3).
1043
       *
1044
       * For brevity, we only consider URLs with empty, "localhost", or
1045
       * "127.0.0.1" hostnames as local, otherwise as an UNC String.
1046
       *
1047
       * Additionally, there is an exception for URLs with a Windows drive
1048
       * letter in the authority (which was accidentally omitted from RFC 8089
1049
       * Appendix E, but believe me, it was meant to be there. --MK)
1050
       */
1051
0
      if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
1052
        /* the URL includes a host name, it must match "localhost" or
1053
           "127.0.0.1" to be valid */
1054
0
        if(checkprefix("localhost/", ptr) ||
1055
0
           checkprefix("127.0.0.1/", ptr)) {
1056
0
          ptr += 9; /* now points to the slash after the host */
1057
0
        }
1058
0
        else {
1059
#if defined(_WIN32)
1060
          size_t len;
1061
1062
          /* the host name, NetBIOS computer name, can not contain disallowed
1063
             chars, and the delimiting slash character must be appended to the
1064
             host name */
1065
          path = strpbrk(ptr, "/\\:*?\"<>|");
1066
          if(!path || *path != '/') {
1067
            result = CURLUE_BAD_FILE_URL;
1068
            goto fail;
1069
          }
1070
1071
          len = path - ptr;
1072
          if(len) {
1073
            if(Curl_dyn_addn(&host, ptr, len)) {
1074
              result = CURLUE_OUT_OF_MEMORY;
1075
              goto fail;
1076
            }
1077
            uncpath = TRUE;
1078
          }
1079
1080
          ptr -= 2; /* now points to the // before the host in UNC */
1081
#else
1082
          /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1083
             none */
1084
0
          result = CURLUE_BAD_FILE_URL;
1085
0
          goto fail;
1086
0
#endif
1087
0
        }
1088
0
      }
1089
1090
0
      path = ptr;
1091
0
      pathlen = urllen - (ptr - url);
1092
0
    }
1093
1094
0
    if(!uncpath)
1095
      /* no host for file: URLs by default */
1096
0
      Curl_dyn_reset(&host);
1097
1098
0
#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1099
    /* Don't allow Windows drive letters when not in Windows.
1100
     * This catches both "file:/c:" and "file:c:" */
1101
0
    if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1102
0
       STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1103
      /* File drive letters are only accepted in MSDOS/Windows */
1104
0
      result = CURLUE_BAD_FILE_URL;
1105
0
      goto fail;
1106
0
    }
1107
#else
1108
    /* If the path starts with a slash and a drive letter, ditch the slash */
1109
    if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1110
      /* This cannot be done with strcpy, as the memory chunks overlap! */
1111
      path++;
1112
      pathlen--;
1113
    }
1114
#endif
1115
1116
0
  }
1117
0
  else {
1118
    /* clear path */
1119
0
    const char *schemep = NULL;
1120
0
    const char *hostp;
1121
0
    size_t hostlen;
1122
1123
0
    if(schemelen) {
1124
0
      int i = 0;
1125
0
      const char *p = &url[schemelen + 1];
1126
0
      while((*p == '/') && (i < 4)) {
1127
0
        p++;
1128
0
        i++;
1129
0
      }
1130
1131
0
      schemep = schemebuf;
1132
0
      if(!Curl_get_scheme_handler(schemep) &&
1133
0
         !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1134
0
        result = CURLUE_UNSUPPORTED_SCHEME;
1135
0
        goto fail;
1136
0
      }
1137
1138
0
      if((i < 1) || (i > 3)) {
1139
        /* less than one or more than three slashes */
1140
0
        result = CURLUE_BAD_SLASHES;
1141
0
        goto fail;
1142
0
      }
1143
0
      hostp = p; /* host name starts here */
1144
0
    }
1145
0
    else {
1146
      /* no scheme! */
1147
1148
0
      if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1149
0
        result = CURLUE_BAD_SCHEME;
1150
0
        goto fail;
1151
0
      }
1152
0
      if(flags & CURLU_DEFAULT_SCHEME)
1153
0
        schemep = DEFAULT_SCHEME;
1154
1155
      /*
1156
       * The URL was badly formatted, let's try without scheme specified.
1157
       */
1158
0
      hostp = url;
1159
0
    }
1160
1161
0
    if(schemep) {
1162
0
      u->scheme = strdup(schemep);
1163
0
      if(!u->scheme) {
1164
0
        result = CURLUE_OUT_OF_MEMORY;
1165
0
        goto fail;
1166
0
      }
1167
0
    }
1168
1169
    /* find the end of the host name + port number */
1170
0
    hostlen = strcspn(hostp, "/?#");
1171
0
    path = &hostp[hostlen];
1172
1173
    /* this pathlen also contains the query and the fragment */
1174
0
    pathlen = urllen - (path - url);
1175
0
    if(hostlen) {
1176
1177
0
      result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1178
0
      if(result)
1179
0
        goto fail;
1180
1181
0
      if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1182
0
        const char *hostname = Curl_dyn_ptr(&host);
1183
        /* legacy curl-style guess based on host name */
1184
0
        if(checkprefix("ftp.", hostname))
1185
0
          schemep = "ftp";
1186
0
        else if(checkprefix("dict.", hostname))
1187
0
          schemep = "dict";
1188
0
        else if(checkprefix("ldap.", hostname))
1189
0
          schemep = "ldap";
1190
0
        else if(checkprefix("imap.", hostname))
1191
0
          schemep = "imap";
1192
0
        else if(checkprefix("smtp.", hostname))
1193
0
          schemep = "smtp";
1194
0
        else if(checkprefix("pop3.", hostname))
1195
0
          schemep = "pop3";
1196
0
        else
1197
0
          schemep = "http";
1198
1199
0
        u->scheme = strdup(schemep);
1200
0
        if(!u->scheme) {
1201
0
          result = CURLUE_OUT_OF_MEMORY;
1202
0
          goto fail;
1203
0
        }
1204
0
      }
1205
0
    }
1206
0
    else if(flags & CURLU_NO_AUTHORITY) {
1207
      /* allowed to be empty. */
1208
0
      if(Curl_dyn_add(&host, "")) {
1209
0
        result = CURLUE_OUT_OF_MEMORY;
1210
0
        goto fail;
1211
0
      }
1212
0
    }
1213
0
    else {
1214
0
      result = CURLUE_NO_HOST;
1215
0
      goto fail;
1216
0
    }
1217
0
  }
1218
1219
0
  fragment = strchr(path, '#');
1220
0
  if(fragment) {
1221
0
    fraglen = pathlen - (fragment - path);
1222
0
    if(fraglen > 1) {
1223
      /* skip the leading '#' in the copy but include the terminating null */
1224
0
      if(flags & CURLU_URLENCODE) {
1225
0
        struct dynbuf enc;
1226
0
        Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1227
0
        if(urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE)) {
1228
0
          result = CURLUE_OUT_OF_MEMORY;
1229
0
          goto fail;
1230
0
        }
1231
0
        u->fragment = Curl_dyn_ptr(&enc);
1232
0
      }
1233
0
      else {
1234
0
        u->fragment = Curl_strndup(fragment + 1, fraglen - 1);
1235
0
        if(!u->fragment) {
1236
0
          result = CURLUE_OUT_OF_MEMORY;
1237
0
          goto fail;
1238
0
        }
1239
0
      }
1240
0
    }
1241
    /* after this, pathlen still contains the query */
1242
0
    pathlen -= fraglen;
1243
0
  }
1244
1245
0
  DEBUGASSERT(pathlen < urllen);
1246
0
  query = memchr(path, '?', pathlen);
1247
0
  if(query) {
1248
0
    size_t qlen = fragment ? (size_t)(fragment - query) :
1249
0
      pathlen - (query - path);
1250
0
    pathlen -= qlen;
1251
0
    if(qlen > 1) {
1252
0
      if(flags & CURLU_URLENCODE) {
1253
0
        struct dynbuf enc;
1254
0
        Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1255
        /* skip the leading question mark */
1256
0
        if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
1257
0
          result = CURLUE_OUT_OF_MEMORY;
1258
0
          goto fail;
1259
0
        }
1260
0
        u->query = Curl_dyn_ptr(&enc);
1261
0
      }
1262
0
      else {
1263
0
        u->query = Curl_strndup(query + 1, qlen - 1);
1264
0
        if(!u->query) {
1265
0
          result = CURLUE_OUT_OF_MEMORY;
1266
0
          goto fail;
1267
0
        }
1268
0
      }
1269
0
    }
1270
0
    else {
1271
      /* single byte query */
1272
0
      u->query = strdup("");
1273
0
      if(!u->query) {
1274
0
        result = CURLUE_OUT_OF_MEMORY;
1275
0
        goto fail;
1276
0
      }
1277
0
    }
1278
0
  }
1279
1280
0
  if(pathlen && (flags & CURLU_URLENCODE)) {
1281
0
    struct dynbuf enc;
1282
0
    Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1283
0
    if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
1284
0
      result = CURLUE_OUT_OF_MEMORY;
1285
0
      goto fail;
1286
0
    }
1287
0
    pathlen = Curl_dyn_len(&enc);
1288
0
    path = u->path = Curl_dyn_ptr(&enc);
1289
0
  }
1290
1291
0
  if(pathlen <= 1) {
1292
    /* there is no path left or just the slash, unset */
1293
0
    path = NULL;
1294
0
  }
1295
0
  else {
1296
0
    if(!u->path) {
1297
0
      u->path = Curl_strndup(path, pathlen);
1298
0
      if(!u->path) {
1299
0
        result = CURLUE_OUT_OF_MEMORY;
1300
0
        goto fail;
1301
0
      }
1302
0
      path = u->path;
1303
0
    }
1304
0
    else if(flags & CURLU_URLENCODE)
1305
      /* it might have encoded more than just the path so cut it */
1306
0
      u->path[pathlen] = 0;
1307
1308
0
    if(!(flags & CURLU_PATH_AS_IS)) {
1309
      /* remove ../ and ./ sequences according to RFC3986 */
1310
0
      char *dedot;
1311
0
      int err = dedotdotify((char *)path, pathlen, &dedot);
1312
0
      if(err) {
1313
0
        result = CURLUE_OUT_OF_MEMORY;
1314
0
        goto fail;
1315
0
      }
1316
0
      if(dedot) {
1317
0
        free(u->path);
1318
0
        u->path = dedot;
1319
0
      }
1320
0
    }
1321
0
  }
1322
1323
0
  u->host = Curl_dyn_ptr(&host);
1324
1325
0
  return result;
1326
0
fail:
1327
0
  Curl_dyn_free(&host);
1328
0
  free_urlhandle(u);
1329
0
  return result;
1330
0
}
1331
1332
/*
1333
 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1334
 */
1335
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1336
                                      unsigned int flags)
1337
0
{
1338
0
  CURLUcode result;
1339
0
  CURLU tmpurl;
1340
0
  memset(&tmpurl, 0, sizeof(tmpurl));
1341
0
  result = parseurl(url, &tmpurl, flags);
1342
0
  if(!result) {
1343
0
    free_urlhandle(u);
1344
0
    *u = tmpurl;
1345
0
  }
1346
0
  return result;
1347
0
}
1348
1349
/*
1350
 */
1351
CURLU *curl_url(void)
1352
0
{
1353
0
  return calloc(1, sizeof(struct Curl_URL));
1354
0
}
1355
1356
void curl_url_cleanup(CURLU *u)
1357
1.17k
{
1358
1.17k
  if(u) {
1359
0
    free_urlhandle(u);
1360
0
    free(u);
1361
0
  }
1362
1.17k
}
1363
1364
#define DUP(dest, src, name)                    \
1365
0
  do {                                          \
1366
0
    if(src->name) {                             \
1367
0
      dest->name = strdup(src->name);           \
1368
0
      if(!dest->name)                           \
1369
0
        goto fail;                              \
1370
0
    }                                           \
1371
0
  } while(0)
1372
1373
CURLU *curl_url_dup(const CURLU *in)
1374
0
{
1375
0
  struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
1376
0
  if(u) {
1377
0
    DUP(u, in, scheme);
1378
0
    DUP(u, in, user);
1379
0
    DUP(u, in, password);
1380
0
    DUP(u, in, options);
1381
0
    DUP(u, in, host);
1382
0
    DUP(u, in, port);
1383
0
    DUP(u, in, path);
1384
0
    DUP(u, in, query);
1385
0
    DUP(u, in, fragment);
1386
0
    DUP(u, in, zoneid);
1387
0
    u->portnum = in->portnum;
1388
0
  }
1389
0
  return u;
1390
0
fail:
1391
0
  curl_url_cleanup(u);
1392
0
  return NULL;
1393
0
}
1394
1395
CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1396
                       char **part, unsigned int flags)
1397
0
{
1398
0
  const char *ptr;
1399
0
  CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1400
0
  char portbuf[7];
1401
0
  bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1402
0
  bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1403
0
  bool punycode = FALSE;
1404
0
  bool depunyfy = FALSE;
1405
0
  bool plusdecode = FALSE;
1406
0
  (void)flags;
1407
0
  if(!u)
1408
0
    return CURLUE_BAD_HANDLE;
1409
0
  if(!part)
1410
0
    return CURLUE_BAD_PARTPOINTER;
1411
0
  *part = NULL;
1412
1413
0
  switch(what) {
1414
0
  case CURLUPART_SCHEME:
1415
0
    ptr = u->scheme;
1416
0
    ifmissing = CURLUE_NO_SCHEME;
1417
0
    urldecode = FALSE; /* never for schemes */
1418
0
    break;
1419
0
  case CURLUPART_USER:
1420
0
    ptr = u->user;
1421
0
    ifmissing = CURLUE_NO_USER;
1422
0
    break;
1423
0
  case CURLUPART_PASSWORD:
1424
0
    ptr = u->password;
1425
0
    ifmissing = CURLUE_NO_PASSWORD;
1426
0
    break;
1427
0
  case CURLUPART_OPTIONS:
1428
0
    ptr = u->options;
1429
0
    ifmissing = CURLUE_NO_OPTIONS;
1430
0
    break;
1431
0
  case CURLUPART_HOST:
1432
0
    ptr = u->host;
1433
0
    ifmissing = CURLUE_NO_HOST;
1434
0
    punycode = (flags & CURLU_PUNYCODE)?1:0;
1435
0
    depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1436
0
    break;
1437
0
  case CURLUPART_ZONEID:
1438
0
    ptr = u->zoneid;
1439
0
    ifmissing = CURLUE_NO_ZONEID;
1440
0
    break;
1441
0
  case CURLUPART_PORT:
1442
0
    ptr = u->port;
1443
0
    ifmissing = CURLUE_NO_PORT;
1444
0
    urldecode = FALSE; /* never for port */
1445
0
    if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1446
      /* there's no stored port number, but asked to deliver
1447
         a default one for the scheme */
1448
0
      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1449
0
      if(h) {
1450
0
        msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1451
0
        ptr = portbuf;
1452
0
      }
1453
0
    }
1454
0
    else if(ptr && u->scheme) {
1455
      /* there is a stored port number, but ask to inhibit if
1456
         it matches the default one for the scheme */
1457
0
      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1458
0
      if(h && (h->defport == u->portnum) &&
1459
0
         (flags & CURLU_NO_DEFAULT_PORT))
1460
0
        ptr = NULL;
1461
0
    }
1462
0
    break;
1463
0
  case CURLUPART_PATH:
1464
0
    ptr = u->path;
1465
0
    if(!ptr)
1466
0
      ptr = "/";
1467
0
    break;
1468
0
  case CURLUPART_QUERY:
1469
0
    ptr = u->query;
1470
0
    ifmissing = CURLUE_NO_QUERY;
1471
0
    plusdecode = urldecode;
1472
0
    break;
1473
0
  case CURLUPART_FRAGMENT:
1474
0
    ptr = u->fragment;
1475
0
    ifmissing = CURLUE_NO_FRAGMENT;
1476
0
    break;
1477
0
  case CURLUPART_URL: {
1478
0
    char *url;
1479
0
    char *scheme;
1480
0
    char *options = u->options;
1481
0
    char *port = u->port;
1482
0
    char *allochost = NULL;
1483
0
    punycode = (flags & CURLU_PUNYCODE)?1:0;
1484
0
    depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1485
0
    if(u->scheme && strcasecompare("file", u->scheme)) {
1486
0
      url = aprintf("file://%s%s%s",
1487
0
                    u->path,
1488
0
                    u->fragment? "#": "",
1489
0
                    u->fragment? u->fragment : "");
1490
0
    }
1491
0
    else if(!u->host)
1492
0
      return CURLUE_NO_HOST;
1493
0
    else {
1494
0
      const struct Curl_handler *h = NULL;
1495
0
      if(u->scheme)
1496
0
        scheme = u->scheme;
1497
0
      else if(flags & CURLU_DEFAULT_SCHEME)
1498
0
        scheme = (char *) DEFAULT_SCHEME;
1499
0
      else
1500
0
        return CURLUE_NO_SCHEME;
1501
1502
0
      h = Curl_get_scheme_handler(scheme);
1503
0
      if(!port && (flags & CURLU_DEFAULT_PORT)) {
1504
        /* there's no stored port number, but asked to deliver
1505
           a default one for the scheme */
1506
0
        if(h) {
1507
0
          msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1508
0
          port = portbuf;
1509
0
        }
1510
0
      }
1511
0
      else if(port) {
1512
        /* there is a stored port number, but asked to inhibit if it matches
1513
           the default one for the scheme */
1514
0
        if(h && (h->defport == u->portnum) &&
1515
0
           (flags & CURLU_NO_DEFAULT_PORT))
1516
0
          port = NULL;
1517
0
      }
1518
1519
0
      if(h && !(h->flags & PROTOPT_URLOPTIONS))
1520
0
        options = NULL;
1521
1522
0
      if(u->host[0] == '[') {
1523
0
        if(u->zoneid) {
1524
          /* make it '[ host %25 zoneid ]' */
1525
0
          struct dynbuf enc;
1526
0
          size_t hostlen = strlen(u->host);
1527
0
          Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1528
0
          if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1529
0
                           u->zoneid))
1530
0
            return CURLUE_OUT_OF_MEMORY;
1531
0
          allochost = Curl_dyn_ptr(&enc);
1532
0
        }
1533
0
      }
1534
0
      else if(urlencode) {
1535
0
        allochost = curl_easy_escape(NULL, u->host, 0);
1536
0
        if(!allochost)
1537
0
          return CURLUE_OUT_OF_MEMORY;
1538
0
      }
1539
0
      else if(punycode) {
1540
0
        if(!Curl_is_ASCII_name(u->host)) {
1541
0
#ifndef USE_IDN
1542
0
          return CURLUE_LACKS_IDN;
1543
#else
1544
          CURLcode result = Curl_idn_decode(u->host, &allochost);
1545
          if(result)
1546
            return (result == CURLE_OUT_OF_MEMORY) ?
1547
              CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1548
#endif
1549
0
        }
1550
0
      }
1551
0
      else if(depunyfy) {
1552
0
        if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1553
0
#ifndef USE_IDN
1554
0
          return CURLUE_LACKS_IDN;
1555
#else
1556
          CURLcode result = Curl_idn_encode(u->host, &allochost);
1557
          if(result)
1558
            /* this is the most likely error */
1559
            return (result == CURLE_OUT_OF_MEMORY) ?
1560
              CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1561
#endif
1562
0
        }
1563
0
      }
1564
1565
0
      url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1566
0
                    scheme,
1567
0
                    u->user ? u->user : "",
1568
0
                    u->password ? ":": "",
1569
0
                    u->password ? u->password : "",
1570
0
                    options ? ";" : "",
1571
0
                    options ? options : "",
1572
0
                    (u->user || u->password || options) ? "@": "",
1573
0
                    allochost ? allochost : u->host,
1574
0
                    port ? ":": "",
1575
0
                    port ? port : "",
1576
0
                    u->path ? u->path : "/",
1577
0
                    (u->query && u->query[0]) ? "?": "",
1578
0
                    (u->query && u->query[0]) ? u->query : "",
1579
0
                    u->fragment? "#": "",
1580
0
                    u->fragment? u->fragment : "");
1581
0
      free(allochost);
1582
0
    }
1583
0
    if(!url)
1584
0
      return CURLUE_OUT_OF_MEMORY;
1585
0
    *part = url;
1586
0
    return CURLUE_OK;
1587
0
  }
1588
0
  default:
1589
0
    ptr = NULL;
1590
0
    break;
1591
0
  }
1592
0
  if(ptr) {
1593
0
    size_t partlen = strlen(ptr);
1594
0
    size_t i = 0;
1595
0
    *part = Curl_strndup(ptr, partlen);
1596
0
    if(!*part)
1597
0
      return CURLUE_OUT_OF_MEMORY;
1598
0
    if(plusdecode) {
1599
      /* convert + to space */
1600
0
      char *plus = *part;
1601
0
      for(i = 0; i < partlen; ++plus, i++) {
1602
0
        if(*plus == '+')
1603
0
          *plus = ' ';
1604
0
      }
1605
0
    }
1606
0
    if(urldecode) {
1607
0
      char *decoded;
1608
0
      size_t dlen;
1609
      /* this unconditional rejection of control bytes is documented
1610
         API behavior */
1611
0
      CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1612
0
      free(*part);
1613
0
      if(res) {
1614
0
        *part = NULL;
1615
0
        return CURLUE_URLDECODE;
1616
0
      }
1617
0
      *part = decoded;
1618
0
      partlen = dlen;
1619
0
    }
1620
0
    if(urlencode) {
1621
0
      struct dynbuf enc;
1622
0
      Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1623
0
      if(urlencode_str(&enc, *part, partlen, TRUE,
1624
0
                       what == CURLUPART_QUERY))
1625
0
        return CURLUE_OUT_OF_MEMORY;
1626
0
      free(*part);
1627
0
      *part = Curl_dyn_ptr(&enc);
1628
0
    }
1629
0
    else if(punycode) {
1630
0
      if(!Curl_is_ASCII_name(u->host)) {
1631
0
#ifndef USE_IDN
1632
0
        return CURLUE_LACKS_IDN;
1633
#else
1634
        char *allochost;
1635
        CURLcode result = Curl_idn_decode(*part, &allochost);
1636
        if(result)
1637
          return (result == CURLE_OUT_OF_MEMORY) ?
1638
            CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1639
        free(*part);
1640
        *part = allochost;
1641
#endif
1642
0
      }
1643
0
    }
1644
0
    else if(depunyfy) {
1645
0
      if(Curl_is_ASCII_name(u->host)  && !strncmp("xn--", u->host, 4)) {
1646
0
#ifndef USE_IDN
1647
0
        return CURLUE_LACKS_IDN;
1648
#else
1649
        char *allochost;
1650
        CURLcode result = Curl_idn_encode(*part, &allochost);
1651
        if(result)
1652
          return (result == CURLE_OUT_OF_MEMORY) ?
1653
            CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1654
        free(*part);
1655
        *part = allochost;
1656
#endif
1657
0
      }
1658
0
    }
1659
1660
0
    return CURLUE_OK;
1661
0
  }
1662
0
  else
1663
0
    return ifmissing;
1664
0
}
1665
1666
CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1667
                       const char *part, unsigned int flags)
1668
0
{
1669
0
  char **storep = NULL;
1670
0
  long port = 0;
1671
0
  bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1672
0
  bool plusencode = FALSE;
1673
0
  bool urlskipslash = FALSE;
1674
0
  bool leadingslash = FALSE;
1675
0
  bool appendquery = FALSE;
1676
0
  bool equalsencode = FALSE;
1677
0
  size_t nalloc;
1678
1679
0
  if(!u)
1680
0
    return CURLUE_BAD_HANDLE;
1681
0
  if(!part) {
1682
    /* setting a part to NULL clears it */
1683
0
    switch(what) {
1684
0
    case CURLUPART_URL:
1685
0
      break;
1686
0
    case CURLUPART_SCHEME:
1687
0
      storep = &u->scheme;
1688
0
      break;
1689
0
    case CURLUPART_USER:
1690
0
      storep = &u->user;
1691
0
      break;
1692
0
    case CURLUPART_PASSWORD:
1693
0
      storep = &u->password;
1694
0
      break;
1695
0
    case CURLUPART_OPTIONS:
1696
0
      storep = &u->options;
1697
0
      break;
1698
0
    case CURLUPART_HOST:
1699
0
      storep = &u->host;
1700
0
      break;
1701
0
    case CURLUPART_ZONEID:
1702
0
      storep = &u->zoneid;
1703
0
      break;
1704
0
    case CURLUPART_PORT:
1705
0
      u->portnum = 0;
1706
0
      storep = &u->port;
1707
0
      break;
1708
0
    case CURLUPART_PATH:
1709
0
      storep = &u->path;
1710
0
      break;
1711
0
    case CURLUPART_QUERY:
1712
0
      storep = &u->query;
1713
0
      break;
1714
0
    case CURLUPART_FRAGMENT:
1715
0
      storep = &u->fragment;
1716
0
      break;
1717
0
    default:
1718
0
      return CURLUE_UNKNOWN_PART;
1719
0
    }
1720
0
    if(storep && *storep) {
1721
0
      Curl_safefree(*storep);
1722
0
    }
1723
0
    else if(!storep) {
1724
0
      free_urlhandle(u);
1725
0
      memset(u, 0, sizeof(struct Curl_URL));
1726
0
    }
1727
0
    return CURLUE_OK;
1728
0
  }
1729
1730
0
  nalloc = strlen(part);
1731
0
  if(nalloc > CURL_MAX_INPUT_LENGTH)
1732
    /* excessive input length */
1733
0
    return CURLUE_MALFORMED_INPUT;
1734
1735
0
  switch(what) {
1736
0
  case CURLUPART_SCHEME: {
1737
0
    size_t plen = strlen(part);
1738
0
    const char *s = part;
1739
0
    if((plen > MAX_SCHEME_LEN) || (plen < 1))
1740
      /* too long or too short */
1741
0
      return CURLUE_BAD_SCHEME;
1742
   /* verify that it is a fine scheme */
1743
0
    if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(part))
1744
0
      return CURLUE_UNSUPPORTED_SCHEME;
1745
0
    storep = &u->scheme;
1746
0
    urlencode = FALSE; /* never */
1747
0
    if(ISALPHA(*s)) {
1748
      /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1749
0
      while(--plen) {
1750
0
        if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1751
0
          s++; /* fine */
1752
0
        else
1753
0
          return CURLUE_BAD_SCHEME;
1754
0
      }
1755
0
    }
1756
0
    else
1757
0
      return CURLUE_BAD_SCHEME;
1758
0
    break;
1759
0
  }
1760
0
  case CURLUPART_USER:
1761
0
    storep = &u->user;
1762
0
    break;
1763
0
  case CURLUPART_PASSWORD:
1764
0
    storep = &u->password;
1765
0
    break;
1766
0
  case CURLUPART_OPTIONS:
1767
0
    storep = &u->options;
1768
0
    break;
1769
0
  case CURLUPART_HOST:
1770
0
    storep = &u->host;
1771
0
    Curl_safefree(u->zoneid);
1772
0
    break;
1773
0
  case CURLUPART_ZONEID:
1774
0
    storep = &u->zoneid;
1775
0
    break;
1776
0
  case CURLUPART_PORT:
1777
0
  {
1778
0
    char *endp;
1779
0
    urlencode = FALSE; /* never */
1780
0
    port = strtol(part, &endp, 10);  /* Port number must be decimal */
1781
0
    if((port <= 0) || (port > 0xffff))
1782
0
      return CURLUE_BAD_PORT_NUMBER;
1783
0
    if(*endp)
1784
      /* weirdly provided number, not good! */
1785
0
      return CURLUE_BAD_PORT_NUMBER;
1786
0
    storep = &u->port;
1787
0
  }
1788
0
  break;
1789
0
  case CURLUPART_PATH:
1790
0
    urlskipslash = TRUE;
1791
0
    leadingslash = TRUE; /* enforce */
1792
0
    storep = &u->path;
1793
0
    break;
1794
0
  case CURLUPART_QUERY:
1795
0
    plusencode = urlencode;
1796
0
    appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1797
0
    equalsencode = appendquery;
1798
0
    storep = &u->query;
1799
0
    break;
1800
0
  case CURLUPART_FRAGMENT:
1801
0
    storep = &u->fragment;
1802
0
    break;
1803
0
  case CURLUPART_URL: {
1804
    /*
1805
     * Allow a new URL to replace the existing (if any) contents.
1806
     *
1807
     * If the existing contents is enough for a URL, allow a relative URL to
1808
     * replace it.
1809
     */
1810
0
    CURLUcode result;
1811
0
    char *oldurl;
1812
0
    char *redired_url;
1813
1814
0
    if(!nalloc)
1815
      /* a blank URL is not a valid URL */
1816
0
      return CURLUE_MALFORMED_INPUT;
1817
1818
    /* if the new thing is absolute or the old one is not
1819
     * (we could not get an absolute url in 'oldurl'),
1820
     * then replace the existing with the new. */
1821
0
    if(Curl_is_absolute_url(part, NULL, 0,
1822
0
                            flags & (CURLU_GUESS_SCHEME|
1823
0
                                     CURLU_DEFAULT_SCHEME))
1824
0
       || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1825
0
      return parseurl_and_replace(part, u, flags);
1826
0
    }
1827
1828
    /* apply the relative part to create a new URL
1829
     * and replace the existing one with it. */
1830
0
    redired_url = concat_url(oldurl, part);
1831
0
    free(oldurl);
1832
0
    if(!redired_url)
1833
0
      return CURLUE_OUT_OF_MEMORY;
1834
1835
0
    result = parseurl_and_replace(redired_url, u, flags);
1836
0
    free(redired_url);
1837
0
    return result;
1838
0
  }
1839
0
  default:
1840
0
    return CURLUE_UNKNOWN_PART;
1841
0
  }
1842
0
  DEBUGASSERT(storep);
1843
0
  {
1844
0
    const char *newp;
1845
0
    struct dynbuf enc;
1846
0
    Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1847
1848
0
    if(leadingslash && (part[0] != '/')) {
1849
0
      CURLcode result = Curl_dyn_addn(&enc, "/", 1);
1850
0
      if(result)
1851
0
        return CURLUE_OUT_OF_MEMORY;
1852
0
    }
1853
0
    if(urlencode) {
1854
0
      const unsigned char *i;
1855
1856
0
      for(i = (const unsigned char *)part; *i; i++) {
1857
0
        CURLcode result;
1858
0
        if((*i == ' ') && plusencode) {
1859
0
          result = Curl_dyn_addn(&enc, "+", 1);
1860
0
          if(result)
1861
0
            return CURLUE_OUT_OF_MEMORY;
1862
0
        }
1863
0
        else if(ISUNRESERVED(*i) ||
1864
0
                ((*i == '/') && urlskipslash) ||
1865
0
                ((*i == '=') && equalsencode)) {
1866
0
          if((*i == '=') && equalsencode)
1867
            /* only skip the first equals sign */
1868
0
            equalsencode = FALSE;
1869
0
          result = Curl_dyn_addn(&enc, i, 1);
1870
0
          if(result)
1871
0
            return CURLUE_OUT_OF_MEMORY;
1872
0
        }
1873
0
        else {
1874
0
          char out[3]={'%'};
1875
0
          out[1] = hexdigits[*i>>4];
1876
0
          out[2] = hexdigits[*i & 0xf];
1877
0
          result = Curl_dyn_addn(&enc, out, 3);
1878
0
          if(result)
1879
0
            return CURLUE_OUT_OF_MEMORY;
1880
0
        }
1881
0
      }
1882
0
    }
1883
0
    else {
1884
0
      char *p;
1885
0
      CURLcode result = Curl_dyn_add(&enc, part);
1886
0
      if(result)
1887
0
        return CURLUE_OUT_OF_MEMORY;
1888
0
      p = Curl_dyn_ptr(&enc);
1889
0
      while(*p) {
1890
        /* make sure percent encoded are lower case */
1891
0
        if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1892
0
           (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1893
0
          p[1] = Curl_raw_tolower(p[1]);
1894
0
          p[2] = Curl_raw_tolower(p[2]);
1895
0
          p += 3;
1896
0
        }
1897
0
        else
1898
0
          p++;
1899
0
      }
1900
0
    }
1901
0
    newp = Curl_dyn_ptr(&enc);
1902
1903
0
    if(appendquery && newp) {
1904
      /* Append the 'newp' string onto the old query. Add a '&' separator if
1905
         none is present at the end of the existing query already */
1906
1907
0
      size_t querylen = u->query ? strlen(u->query) : 0;
1908
0
      bool addamperand = querylen && (u->query[querylen -1] != '&');
1909
0
      if(querylen) {
1910
0
        struct dynbuf qbuf;
1911
0
        Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1912
1913
0
        if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1914
0
          goto nomem;
1915
1916
0
        if(addamperand) {
1917
0
          if(Curl_dyn_addn(&qbuf, "&", 1))
1918
0
            goto nomem;
1919
0
        }
1920
0
        if(Curl_dyn_add(&qbuf, newp))
1921
0
          goto nomem;
1922
0
        Curl_dyn_free(&enc);
1923
0
        free(*storep);
1924
0
        *storep = Curl_dyn_ptr(&qbuf);
1925
0
        return CURLUE_OK;
1926
0
nomem:
1927
0
        Curl_dyn_free(&enc);
1928
0
        return CURLUE_OUT_OF_MEMORY;
1929
0
      }
1930
0
    }
1931
1932
0
    else if(what == CURLUPART_HOST) {
1933
0
      size_t n = Curl_dyn_len(&enc);
1934
0
      if(!n && (flags & CURLU_NO_AUTHORITY)) {
1935
        /* Skip hostname check, it's allowed to be empty. */
1936
0
      }
1937
0
      else {
1938
0
        if(!n || hostname_check(u, (char *)newp, n)) {
1939
0
          Curl_dyn_free(&enc);
1940
0
          return CURLUE_BAD_HOSTNAME;
1941
0
        }
1942
0
      }
1943
0
    }
1944
1945
0
    free(*storep);
1946
0
    *storep = (char *)newp;
1947
0
  }
1948
  /* set after the string, to make it not assigned if the allocation above
1949
     fails */
1950
0
  if(port)
1951
0
    u->portnum = port;
1952
0
  return CURLUE_OK;
1953
0
}