Coverage Report

Created: 2024-02-25 06:14

/src/PROJ/curl/lib/urlapi.c
Line
Count
Source (jump to first uncovered line)
1
/***************************************************************************
2
 *                                  _   _ ____  _
3
 *  Project                     ___| | | |  _ \| |
4
 *                             / __| | | | |_) | |
5
 *                            | (__| |_| |  _ <| |___
6
 *                             \___|\___/|_| \_\_____|
7
 *
8
 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9
 *
10
 * This software is licensed as described in the file COPYING, which
11
 * you should have received as part of this distribution. The terms
12
 * are also available at https://curl.se/docs/copyright.html.
13
 *
14
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15
 * copies of the Software, and permit persons to whom the Software is
16
 * furnished to do so, under the terms of the COPYING file.
17
 *
18
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19
 * KIND, either express or implied.
20
 *
21
 * SPDX-License-Identifier: curl
22
 *
23
 ***************************************************************************/
24
25
#include "curl_setup.h"
26
27
#include "urldata.h"
28
#include "urlapi-int.h"
29
#include "strcase.h"
30
#include "url.h"
31
#include "escape.h"
32
#include "curl_ctype.h"
33
#include "inet_pton.h"
34
#include "inet_ntop.h"
35
#include "strdup.h"
36
#include "idn.h"
37
#include "curl_memrchr.h"
38
39
/* The last 3 #include files should be in this order */
40
#include "curl_printf.h"
41
#include "curl_memory.h"
42
#include "memdebug.h"
43
44
  /* MSDOS/Windows style drive prefix, eg c: in c:foo */
45
#define STARTS_WITH_DRIVE_PREFIX(str) \
46
  ((('a' <= str[0] && str[0] <= 'z') || \
47
    ('A' <= str[0] && str[0] <= 'Z')) && \
48
   (str[1] == ':'))
49
50
  /* MSDOS/Windows style drive prefix, optionally with
51
   * a '|' instead of ':', followed by a slash or NUL */
52
#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
53
0
  ((('a' <= (str)[0] && (str)[0] <= 'z') || \
54
0
    ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
55
0
   ((str)[1] == ':' || (str)[1] == '|') && \
56
0
   ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
57
58
/* scheme is not URL encoded, the longest libcurl supported ones are... */
59
0
#define MAX_SCHEME_LEN 40
60
61
/*
62
 * If ENABLE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
63
 * sure we have _some_ value for AF_INET6 without polluting our fake value
64
 * everywhere.
65
 */
66
#if !defined(ENABLE_IPV6) && !defined(AF_INET6)
67
#define AF_INET6 (AF_INET + 1)
68
#endif
69
70
/* Internal representation of CURLU. Point to URL-encoded strings. */
71
struct Curl_URL {
72
  char *scheme;
73
  char *user;
74
  char *password;
75
  char *options; /* IMAP only? */
76
  char *host;
77
  char *zoneid; /* for numerical IPv6 addresses */
78
  char *port;
79
  char *path;
80
  char *query;
81
  char *fragment;
82
  long portnum; /* the numerical version */
83
};
84
85
0
#define DEFAULT_SCHEME "https"
86
87
static void free_urlhandle(struct Curl_URL *u)
88
0
{
89
0
  free(u->scheme);
90
0
  free(u->user);
91
0
  free(u->password);
92
0
  free(u->options);
93
0
  free(u->host);
94
0
  free(u->zoneid);
95
0
  free(u->port);
96
0
  free(u->path);
97
0
  free(u->query);
98
0
  free(u->fragment);
99
0
}
100
101
/*
102
 * Find the separator at the end of the host name, or the '?' in cases like
103
 * http://www.example.com?id=2380
104
 */
105
static const char *find_host_sep(const char *url)
106
0
{
107
0
  const char *sep;
108
0
  const char *query;
109
110
  /* Find the start of the hostname */
111
0
  sep = strstr(url, "//");
112
0
  if(!sep)
113
0
    sep = url;
114
0
  else
115
0
    sep += 2;
116
117
0
  query = strchr(sep, '?');
118
0
  sep = strchr(sep, '/');
119
120
0
  if(!sep)
121
0
    sep = url + strlen(url);
122
123
0
  if(!query)
124
0
    query = url + strlen(url);
125
126
0
  return sep < query ? sep : query;
127
0
}
128
129
/* convert CURLcode to CURLUcode */
130
0
#define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE :   \
131
0
                  CURLUE_OUT_OF_MEMORY)
132
/*
133
 * Decide whether a character in a URL must be escaped.
134
 */
135
0
#define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
136
137
static const char hexdigits[] = "0123456789abcdef";
138
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
139
 * spaces in the source URL accordingly.
140
 *
141
 * URL encoding should be skipped for host names, otherwise IDN resolution
142
 * will fail.
143
 */
144
static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
145
                               size_t len, bool relative,
146
                               bool query)
147
0
{
148
  /* we must add this with whitespace-replacing */
149
0
  bool left = !query;
150
0
  const unsigned char *iptr;
151
0
  const unsigned char *host_sep = (const unsigned char *) url;
152
0
  CURLcode result;
153
154
0
  if(!relative)
155
0
    host_sep = (const unsigned char *) find_host_sep(url);
156
157
0
  for(iptr = (unsigned char *)url;    /* read from here */
158
0
      len; iptr++, len--) {
159
160
0
    if(iptr < host_sep) {
161
0
      result = Curl_dyn_addn(o, iptr, 1);
162
0
      if(result)
163
0
        return cc2cu(result);
164
0
      continue;
165
0
    }
166
167
0
    if(*iptr == ' ') {
168
0
      if(left)
169
0
        result = Curl_dyn_addn(o, "%20", 3);
170
0
      else
171
0
        result = Curl_dyn_addn(o, "+", 1);
172
0
      if(result)
173
0
        return cc2cu(result);
174
0
      continue;
175
0
    }
176
177
0
    if(*iptr == '?')
178
0
      left = FALSE;
179
180
0
    if(urlchar_needs_escaping(*iptr)) {
181
0
      char out[3]={'%'};
182
0
      out[1] = hexdigits[*iptr>>4];
183
0
      out[2] = hexdigits[*iptr & 0xf];
184
0
      result = Curl_dyn_addn(o, out, 3);
185
0
    }
186
0
    else
187
0
      result = Curl_dyn_addn(o, iptr, 1);
188
0
    if(result)
189
0
      return cc2cu(result);
190
0
  }
191
192
0
  return CURLUE_OK;
193
0
}
194
195
/*
196
 * Returns the length of the scheme if the given URL is absolute (as opposed
197
 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
198
 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
199
 *
200
 * If 'guess_scheme' is TRUE, it means the URL might be provided without
201
 * scheme.
202
 */
203
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
204
                            bool guess_scheme)
205
0
{
206
0
  int i = 0;
207
0
  DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
208
0
  (void)buflen; /* only used in debug-builds */
209
0
  if(buf)
210
0
    buf[0] = 0; /* always leave a defined value in buf */
211
#ifdef _WIN32
212
  if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
213
    return 0;
214
#endif
215
0
  if(ISALPHA(url[0]))
216
0
    for(i = 1; i < MAX_SCHEME_LEN; ++i) {
217
0
      char s = url[i];
218
0
      if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
219
        /* RFC 3986 3.1 explains:
220
           scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
221
        */
222
0
      }
223
0
      else {
224
0
        break;
225
0
      }
226
0
    }
227
0
  if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
228
    /* If this does not guess scheme, the scheme always ends with the colon so
229
       that this also detects data: URLs etc. In guessing mode, data: could
230
       be the host name "data" with a specified port number. */
231
232
    /* the length of the scheme is the name part only */
233
0
    size_t len = i;
234
0
    if(buf) {
235
0
      buf[i] = 0;
236
0
      while(i--) {
237
0
        buf[i] = Curl_raw_tolower(url[i]);
238
0
      }
239
0
    }
240
0
    return len;
241
0
  }
242
0
  return 0;
243
0
}
244
245
/*
246
 * Concatenate a relative URL to a base URL making it absolute.
247
 * URL-encodes any spaces.
248
 * The returned pointer must be freed by the caller unless NULL
249
 * (returns NULL on out of memory).
250
 *
251
 * Note that this function destroys the 'base' string.
252
 */
253
static CURLcode concat_url(char *base, const char *relurl, char **newurl)
254
0
{
255
  /***
256
   TRY to append this new path to the old URL
257
   to the right of the host part. Oh crap, this is doomed to cause
258
   problems in the future...
259
  */
260
0
  struct dynbuf newest;
261
0
  char *protsep;
262
0
  char *pathsep;
263
0
  bool host_changed = FALSE;
264
0
  const char *useurl = relurl;
265
0
  CURLcode result = CURLE_OK;
266
0
  CURLUcode uc;
267
0
  *newurl = NULL;
268
269
  /* protsep points to the start of the host name */
270
0
  protsep = strstr(base, "//");
271
0
  if(!protsep)
272
0
    protsep = base;
273
0
  else
274
0
    protsep += 2; /* pass the slashes */
275
276
0
  if('/' != relurl[0]) {
277
0
    int level = 0;
278
279
    /* First we need to find out if there's a ?-letter in the URL,
280
       and cut it and the right-side of that off */
281
0
    pathsep = strchr(protsep, '?');
282
0
    if(pathsep)
283
0
      *pathsep = 0;
284
285
    /* we have a relative path to append to the last slash if there's one
286
       available, or if the new URL is just a query string (starts with a
287
       '?')  we append the new one at the end of the entire currently worked
288
       out URL */
289
0
    if(useurl[0] != '?') {
290
0
      pathsep = strrchr(protsep, '/');
291
0
      if(pathsep)
292
0
        *pathsep = 0;
293
0
    }
294
295
    /* Check if there's any slash after the host name, and if so, remember
296
       that position instead */
297
0
    pathsep = strchr(protsep, '/');
298
0
    if(pathsep)
299
0
      protsep = pathsep + 1;
300
0
    else
301
0
      protsep = NULL;
302
303
    /* now deal with one "./" or any amount of "../" in the newurl
304
       and act accordingly */
305
306
0
    if((useurl[0] == '.') && (useurl[1] == '/'))
307
0
      useurl += 2; /* just skip the "./" */
308
309
0
    while((useurl[0] == '.') &&
310
0
          (useurl[1] == '.') &&
311
0
          (useurl[2] == '/')) {
312
0
      level++;
313
0
      useurl += 3; /* pass the "../" */
314
0
    }
315
316
0
    if(protsep) {
317
0
      while(level--) {
318
        /* cut off one more level from the right of the original URL */
319
0
        pathsep = strrchr(protsep, '/');
320
0
        if(pathsep)
321
0
          *pathsep = 0;
322
0
        else {
323
0
          *protsep = 0;
324
0
          break;
325
0
        }
326
0
      }
327
0
    }
328
0
  }
329
0
  else {
330
    /* We got a new absolute path for this server */
331
332
0
    if(relurl[1] == '/') {
333
      /* the new URL starts with //, just keep the protocol part from the
334
         original one */
335
0
      *protsep = 0;
336
0
      useurl = &relurl[2]; /* we keep the slashes from the original, so we
337
                              skip the new ones */
338
0
      host_changed = TRUE;
339
0
    }
340
0
    else {
341
      /* cut off the original URL from the first slash, or deal with URLs
342
         without slash */
343
0
      pathsep = strchr(protsep, '/');
344
0
      if(pathsep) {
345
        /* When people use badly formatted URLs, such as
346
           "http://www.example.com?dir=/home/daniel" we must not use the first
347
           slash, if there's a ?-letter before it! */
348
0
        char *sep = strchr(protsep, '?');
349
0
        if(sep && (sep < pathsep))
350
0
          pathsep = sep;
351
0
        *pathsep = 0;
352
0
      }
353
0
      else {
354
        /* There was no slash. Now, since we might be operating on a badly
355
           formatted URL, such as "http://www.example.com?id=2380" which
356
           doesn't use a slash separator as it is supposed to, we need to check
357
           for a ?-letter as well! */
358
0
        pathsep = strchr(protsep, '?');
359
0
        if(pathsep)
360
0
          *pathsep = 0;
361
0
      }
362
0
    }
363
0
  }
364
365
0
  Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
366
367
  /* copy over the root url part */
368
0
  result = Curl_dyn_add(&newest, base);
369
0
  if(result)
370
0
    return result;
371
372
  /* check if we need to append a slash */
373
0
  if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
374
0
    ;
375
0
  else {
376
0
    result = Curl_dyn_addn(&newest, "/", 1);
377
0
    if(result)
378
0
      return result;
379
0
  }
380
381
  /* then append the new piece on the right side */
382
0
  uc = urlencode_str(&newest, useurl, strlen(useurl), !host_changed,
383
0
                     FALSE);
384
0
  if(uc)
385
0
    return (uc == CURLUE_TOO_LARGE) ? CURLE_TOO_LARGE : CURLE_OUT_OF_MEMORY;
386
387
0
  *newurl = Curl_dyn_ptr(&newest);
388
0
  return CURLE_OK;
389
0
}
390
391
/* scan for byte values <= 31, 127 and sometimes space */
392
static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags)
393
0
{
394
0
  static const char badbytes[]={
395
0
    /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
396
0
    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
397
0
    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
398
0
    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
399
0
    0x7f, 0x00 /* null-terminate */
400
0
  };
401
0
  size_t n = strlen(url);
402
0
  size_t nfine;
403
404
0
  if(n > CURL_MAX_INPUT_LENGTH)
405
    /* excessive input length */
406
0
    return CURLUE_MALFORMED_INPUT;
407
408
0
  nfine = strcspn(url, badbytes);
409
0
  if((nfine != n) ||
410
0
     (!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
411
0
    return CURLUE_MALFORMED_INPUT;
412
413
0
  *urllen = n;
414
0
  return CURLUE_OK;
415
0
}
416
417
/*
418
 * parse_hostname_login()
419
 *
420
 * Parse the login details (user name, password and options) from the URL and
421
 * strip them out of the host name
422
 *
423
 */
424
static CURLUcode parse_hostname_login(struct Curl_URL *u,
425
                                      const char *login,
426
                                      size_t len,
427
                                      unsigned int flags,
428
                                      size_t *offset) /* to the host name */
429
0
{
430
0
  CURLUcode result = CURLUE_OK;
431
0
  CURLcode ccode;
432
0
  char *userp = NULL;
433
0
  char *passwdp = NULL;
434
0
  char *optionsp = NULL;
435
0
  const struct Curl_handler *h = NULL;
436
437
  /* At this point, we assume all the other special cases have been taken
438
   * care of, so the host is at most
439
   *
440
   *   [user[:password][;options]]@]hostname
441
   *
442
   * We need somewhere to put the embedded details, so do that first.
443
   */
444
0
  char *ptr;
445
446
0
  DEBUGASSERT(login);
447
448
0
  *offset = 0;
449
0
  ptr = memchr(login, '@', len);
450
0
  if(!ptr)
451
0
    goto out;
452
453
  /* We will now try to extract the
454
   * possible login information in a string like:
455
   * ftp://user:password@ftp.my.site:8021/README */
456
0
  ptr++;
457
458
  /* if this is a known scheme, get some details */
459
0
  if(u->scheme)
460
0
    h = Curl_get_scheme_handler(u->scheme);
461
462
  /* We could use the login information in the URL so extract it. Only parse
463
     options if the handler says we should. Note that 'h' might be NULL! */
464
0
  ccode = Curl_parse_login_details(login, ptr - login - 1,
465
0
                                   &userp, &passwdp,
466
0
                                   (h && (h->flags & PROTOPT_URLOPTIONS)) ?
467
0
                                   &optionsp:NULL);
468
0
  if(ccode) {
469
0
    result = CURLUE_BAD_LOGIN;
470
0
    goto out;
471
0
  }
472
473
0
  if(userp) {
474
0
    if(flags & CURLU_DISALLOW_USER) {
475
      /* Option DISALLOW_USER is set and url contains username. */
476
0
      result = CURLUE_USER_NOT_ALLOWED;
477
0
      goto out;
478
0
    }
479
0
    free(u->user);
480
0
    u->user = userp;
481
0
  }
482
483
0
  if(passwdp) {
484
0
    free(u->password);
485
0
    u->password = passwdp;
486
0
  }
487
488
0
  if(optionsp) {
489
0
    free(u->options);
490
0
    u->options = optionsp;
491
0
  }
492
493
  /* the host name starts at this offset */
494
0
  *offset = ptr - login;
495
0
  return CURLUE_OK;
496
497
0
out:
498
499
0
  free(userp);
500
0
  free(passwdp);
501
0
  free(optionsp);
502
0
  u->user = NULL;
503
0
  u->password = NULL;
504
0
  u->options = NULL;
505
506
0
  return result;
507
0
}
508
509
UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
510
                                   bool has_scheme)
511
0
{
512
0
  char *portptr;
513
0
  char *hostname = Curl_dyn_ptr(host);
514
  /*
515
   * Find the end of an IPv6 address on the ']' ending bracket.
516
   */
517
0
  if(hostname[0] == '[') {
518
0
    portptr = strchr(hostname, ']');
519
0
    if(!portptr)
520
0
      return CURLUE_BAD_IPV6;
521
0
    portptr++;
522
    /* this is a RFC2732-style specified IP-address */
523
0
    if(*portptr) {
524
0
      if(*portptr != ':')
525
0
        return CURLUE_BAD_PORT_NUMBER;
526
0
    }
527
0
    else
528
0
      portptr = NULL;
529
0
  }
530
0
  else
531
0
    portptr = strchr(hostname, ':');
532
533
0
  if(portptr) {
534
0
    char *rest;
535
0
    long port;
536
0
    size_t keep = portptr - hostname;
537
538
    /* Browser behavior adaptation. If there's a colon with no digits after,
539
       just cut off the name there which makes us ignore the colon and just
540
       use the default port. Firefox, Chrome and Safari all do that.
541
542
       Don't do it if the URL has no scheme, to make something that looks like
543
       a scheme not work!
544
    */
545
0
    Curl_dyn_setlen(host, keep);
546
0
    portptr++;
547
0
    if(!*portptr)
548
0
      return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
549
550
0
    if(!ISDIGIT(*portptr))
551
0
      return CURLUE_BAD_PORT_NUMBER;
552
553
0
    port = strtol(portptr, &rest, 10);  /* Port number must be decimal */
554
555
0
    if(port > 0xffff)
556
0
      return CURLUE_BAD_PORT_NUMBER;
557
558
0
    if(rest[0])
559
0
      return CURLUE_BAD_PORT_NUMBER;
560
561
0
    u->portnum = port;
562
    /* generate a new port number string to get rid of leading zeroes etc */
563
0
    free(u->port);
564
0
    u->port = aprintf("%ld", port);
565
0
    if(!u->port)
566
0
      return CURLUE_OUT_OF_MEMORY;
567
0
  }
568
569
0
  return CURLUE_OK;
570
0
}
571
572
/* this assumes 'hostname' now starts with [ */
573
static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
574
                            size_t hlen) /* length of hostname */
575
0
{
576
0
  size_t len;
577
0
  DEBUGASSERT(*hostname == '[');
578
0
  if(hlen < 4) /* '[::]' is the shortest possible valid string */
579
0
    return CURLUE_BAD_IPV6;
580
0
  hostname++;
581
0
  hlen -= 2;
582
583
  /* only valid IPv6 letters are ok */
584
0
  len = strspn(hostname, "0123456789abcdefABCDEF:.");
585
586
0
  if(hlen != len) {
587
0
    hlen = len;
588
0
    if(hostname[len] == '%') {
589
      /* this could now be '%[zone id]' */
590
0
      char zoneid[16];
591
0
      int i = 0;
592
0
      char *h = &hostname[len + 1];
593
      /* pass '25' if present and is a url encoded percent sign */
594
0
      if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
595
0
        h += 2;
596
0
      while(*h && (*h != ']') && (i < 15))
597
0
        zoneid[i++] = *h++;
598
0
      if(!i || (']' != *h))
599
0
        return CURLUE_BAD_IPV6;
600
0
      zoneid[i] = 0;
601
0
      u->zoneid = strdup(zoneid);
602
0
      if(!u->zoneid)
603
0
        return CURLUE_OUT_OF_MEMORY;
604
0
      hostname[len] = ']'; /* insert end bracket */
605
0
      hostname[len + 1] = 0; /* terminate the hostname */
606
0
    }
607
0
    else
608
0
      return CURLUE_BAD_IPV6;
609
    /* hostname is fine */
610
0
  }
611
612
  /* Check the IPv6 address. */
613
0
  {
614
0
    char dest[16]; /* fits a binary IPv6 address */
615
0
    char norm[MAX_IPADR_LEN];
616
0
    hostname[hlen] = 0; /* end the address there */
617
0
    if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
618
0
      return CURLUE_BAD_IPV6;
619
620
    /* check if it can be done shorter */
621
0
    if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
622
0
       (strlen(norm) < hlen)) {
623
0
      strcpy(hostname, norm);
624
0
      hlen = strlen(norm);
625
0
      hostname[hlen + 1] = 0;
626
0
    }
627
0
    hostname[hlen] = ']'; /* restore ending bracket */
628
0
  }
629
0
  return CURLUE_OK;
630
0
}
631
632
static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
633
                                size_t hlen) /* length of hostname */
634
0
{
635
0
  size_t len;
636
0
  DEBUGASSERT(hostname);
637
638
0
  if(!hlen)
639
0
    return CURLUE_NO_HOST;
640
0
  else if(hostname[0] == '[')
641
0
    return ipv6_parse(u, hostname, hlen);
642
0
  else {
643
    /* letters from the second string are not ok */
644
0
    len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
645
0
    if(hlen != len)
646
      /* hostname with bad content */
647
0
      return CURLUE_BAD_HOSTNAME;
648
0
  }
649
0
  return CURLUE_OK;
650
0
}
651
652
/*
653
 * Handle partial IPv4 numerical addresses and different bases, like
654
 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
655
 *
656
 * If the given input string is syntactically wrong IPv4 or any part for
657
 * example is too big, this function returns HOST_NAME.
658
 *
659
 * Output the "normalized" version of that input string in plain quad decimal
660
 * integers.
661
 *
662
 * Returns the host type.
663
 */
664
665
0
#define HOST_ERROR   -1 /* out of memory */
666
0
#define HOST_BAD     -2 /* bad IPv4 address */
667
668
0
#define HOST_NAME    1
669
0
#define HOST_IPV4    2
670
0
#define HOST_IPV6    3
671
672
static int ipv4_normalize(struct dynbuf *host)
673
0
{
674
0
  bool done = FALSE;
675
0
  int n = 0;
676
0
  const char *c = Curl_dyn_ptr(host);
677
0
  unsigned long parts[4] = {0, 0, 0, 0};
678
0
  CURLcode result = CURLE_OK;
679
680
0
  if(*c == '[')
681
0
    return HOST_IPV6;
682
683
0
  while(!done) {
684
0
    char *endp;
685
0
    unsigned long l;
686
0
    if(!ISDIGIT(*c))
687
      /* most importantly this doesn't allow a leading plus or minus */
688
0
      return HOST_NAME;
689
0
    l = strtoul(c, &endp, 0);
690
691
0
    parts[n] = l;
692
0
    c = endp;
693
694
0
    switch(*c) {
695
0
    case '.':
696
0
      if(n == 3)
697
0
        return HOST_NAME;
698
0
      n++;
699
0
      c++;
700
0
      break;
701
702
0
    case '\0':
703
0
      done = TRUE;
704
0
      break;
705
706
0
    default:
707
0
      return HOST_NAME;
708
0
    }
709
710
    /* overflow */
711
0
    if((l == ULONG_MAX) && (errno == ERANGE))
712
0
      return HOST_NAME;
713
714
0
#if SIZEOF_LONG > 4
715
    /* a value larger than 32 bits */
716
0
    if(l > UINT_MAX)
717
0
      return HOST_NAME;
718
0
#endif
719
0
  }
720
721
0
  switch(n) {
722
0
  case 0: /* a -- 32 bits */
723
0
    Curl_dyn_reset(host);
724
725
0
    result = Curl_dyn_addf(host, "%u.%u.%u.%u",
726
0
                           (unsigned int)(parts[0] >> 24),
727
0
                           (unsigned int)((parts[0] >> 16) & 0xff),
728
0
                           (unsigned int)((parts[0] >> 8) & 0xff),
729
0
                           (unsigned int)(parts[0] & 0xff));
730
0
    break;
731
0
  case 1: /* a.b -- 8.24 bits */
732
0
    if((parts[0] > 0xff) || (parts[1] > 0xffffff))
733
0
      return HOST_NAME;
734
0
    Curl_dyn_reset(host);
735
0
    result = Curl_dyn_addf(host, "%u.%u.%u.%u",
736
0
                           (unsigned int)(parts[0]),
737
0
                           (unsigned int)((parts[1] >> 16) & 0xff),
738
0
                           (unsigned int)((parts[1] >> 8) & 0xff),
739
0
                           (unsigned int)(parts[1] & 0xff));
740
0
    break;
741
0
  case 2: /* a.b.c -- 8.8.16 bits */
742
0
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
743
0
      return HOST_NAME;
744
0
    Curl_dyn_reset(host);
745
0
    result = Curl_dyn_addf(host, "%u.%u.%u.%u",
746
0
                           (unsigned int)(parts[0]),
747
0
                           (unsigned int)(parts[1]),
748
0
                           (unsigned int)((parts[2] >> 8) & 0xff),
749
0
                           (unsigned int)(parts[2] & 0xff));
750
0
    break;
751
0
  case 3: /* a.b.c.d -- 8.8.8.8 bits */
752
0
    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
753
0
       (parts[3] > 0xff))
754
0
      return HOST_NAME;
755
0
    Curl_dyn_reset(host);
756
0
    result = Curl_dyn_addf(host, "%u.%u.%u.%u",
757
0
                           (unsigned int)(parts[0]),
758
0
                           (unsigned int)(parts[1]),
759
0
                           (unsigned int)(parts[2]),
760
0
                           (unsigned int)(parts[3]));
761
0
    break;
762
0
  }
763
0
  if(result)
764
0
    return HOST_ERROR;
765
0
  return HOST_IPV4;
766
0
}
767
768
/* if necessary, replace the host content with a URL decoded version */
769
static CURLUcode urldecode_host(struct dynbuf *host)
770
0
{
771
0
  char *per = NULL;
772
0
  const char *hostname = Curl_dyn_ptr(host);
773
0
  per = strchr(hostname, '%');
774
0
  if(!per)
775
    /* nothing to decode */
776
0
    return CURLUE_OK;
777
0
  else {
778
    /* encoded */
779
0
    size_t dlen;
780
0
    char *decoded;
781
0
    CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
782
0
                                     REJECT_CTRL);
783
0
    if(result)
784
0
      return CURLUE_BAD_HOSTNAME;
785
0
    Curl_dyn_reset(host);
786
0
    result = Curl_dyn_addn(host, decoded, dlen);
787
0
    free(decoded);
788
0
    if(result)
789
0
      return cc2cu(result);
790
0
  }
791
792
0
  return CURLUE_OK;
793
0
}
794
795
static CURLUcode parse_authority(struct Curl_URL *u,
796
                                 const char *auth, size_t authlen,
797
                                 unsigned int flags,
798
                                 struct dynbuf *host,
799
                                 bool has_scheme)
800
0
{
801
0
  size_t offset;
802
0
  CURLUcode uc;
803
0
  CURLcode result;
804
805
  /*
806
   * Parse the login details and strip them out of the host name.
807
   */
808
0
  uc = parse_hostname_login(u, auth, authlen, flags, &offset);
809
0
  if(uc)
810
0
    goto out;
811
812
0
  result = Curl_dyn_addn(host, auth + offset, authlen - offset);
813
0
  if(result) {
814
0
    uc = cc2cu(result);
815
0
    goto out;
816
0
  }
817
818
0
  uc = Curl_parse_port(u, host, has_scheme);
819
0
  if(uc)
820
0
    goto out;
821
822
0
  if(!Curl_dyn_len(host))
823
0
    return CURLUE_NO_HOST;
824
825
0
  switch(ipv4_normalize(host)) {
826
0
  case HOST_IPV4:
827
0
    break;
828
0
  case HOST_IPV6:
829
0
    uc = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
830
0
    break;
831
0
  case HOST_NAME:
832
0
    uc = urldecode_host(host);
833
0
    if(!uc)
834
0
      uc = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
835
0
    break;
836
0
  case HOST_ERROR:
837
0
    uc = CURLUE_OUT_OF_MEMORY;
838
0
    break;
839
0
  case HOST_BAD:
840
0
  default:
841
0
    uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
842
0
    break;
843
0
  }
844
845
0
out:
846
0
  return uc;
847
0
}
848
849
CURLUcode Curl_url_set_authority(CURLU *u, const char *authority,
850
                                 unsigned int flags)
851
0
{
852
0
  CURLUcode result;
853
0
  struct dynbuf host;
854
855
0
  DEBUGASSERT(authority);
856
0
  Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
857
858
0
  result = parse_authority(u, authority, strlen(authority), flags,
859
0
                           &host, !!u->scheme);
860
0
  if(result)
861
0
    Curl_dyn_free(&host);
862
0
  else {
863
0
    free(u->host);
864
0
    u->host = Curl_dyn_ptr(&host);
865
0
  }
866
0
  return result;
867
0
}
868
869
/*
870
 * "Remove Dot Segments"
871
 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
872
 */
873
874
/*
875
 * dedotdotify()
876
 * @unittest: 1395
877
 *
878
 * This function gets a null-terminated path with dot and dotdot sequences
879
 * passed in and strips them off according to the rules in RFC 3986 section
880
 * 5.2.4.
881
 *
882
 * The function handles a query part ('?' + stuff) appended but it expects
883
 * that fragments ('#' + stuff) have already been cut off.
884
 *
885
 * RETURNS
886
 *
887
 * Zero for success and 'out' set to an allocated dedotdotified string.
888
 */
889
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
890
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
891
0
{
892
0
  char *outptr;
893
0
  const char *endp = &input[clen];
894
0
  char *out;
895
896
0
  *outp = NULL;
897
  /* the path always starts with a slash, and a slash has not dot */
898
0
  if((clen < 2) || !memchr(input, '.', clen))
899
0
    return 0;
900
901
0
  out = malloc(clen + 1);
902
0
  if(!out)
903
0
    return 1; /* out of memory */
904
905
0
  *out = 0; /* null-terminates, for inputs like "./" */
906
0
  outptr = out;
907
908
0
  do {
909
0
    bool dotdot = TRUE;
910
0
    if(*input == '.') {
911
      /*  A.  If the input buffer begins with a prefix of "../" or "./", then
912
          remove that prefix from the input buffer; otherwise, */
913
914
0
      if(!strncmp("./", input, 2)) {
915
0
        input += 2;
916
0
        clen -= 2;
917
0
      }
918
0
      else if(!strncmp("../", input, 3)) {
919
0
        input += 3;
920
0
        clen -= 3;
921
0
      }
922
      /*  D.  if the input buffer consists only of "." or "..", then remove
923
          that from the input buffer; otherwise, */
924
925
0
      else if(!strcmp(".", input) || !strcmp("..", input) ||
926
0
              !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
927
0
        *out = 0;
928
0
        break;
929
0
      }
930
0
      else
931
0
        dotdot = FALSE;
932
0
    }
933
0
    else if(*input == '/') {
934
      /*  B.  if the input buffer begins with a prefix of "/./" or "/.", where
935
          "."  is a complete path segment, then replace that prefix with "/" in
936
          the input buffer; otherwise, */
937
0
      if(!strncmp("/./", input, 3)) {
938
0
        input += 2;
939
0
        clen -= 2;
940
0
      }
941
0
      else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
942
0
        *outptr++ = '/';
943
0
        *outptr = 0;
944
0
        break;
945
0
      }
946
947
      /*  C.  if the input buffer begins with a prefix of "/../" or "/..",
948
          where ".." is a complete path segment, then replace that prefix with
949
          "/" in the input buffer and remove the last segment and its
950
          preceding "/" (if any) from the output buffer; otherwise, */
951
952
0
      else if(!strncmp("/../", input, 4)) {
953
0
        input += 3;
954
0
        clen -= 3;
955
        /* remove the last segment from the output buffer */
956
0
        while(outptr > out) {
957
0
          outptr--;
958
0
          if(*outptr == '/')
959
0
            break;
960
0
        }
961
0
        *outptr = 0; /* null-terminate where it stops */
962
0
      }
963
0
      else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
964
        /* remove the last segment from the output buffer */
965
0
        while(outptr > out) {
966
0
          outptr--;
967
0
          if(*outptr == '/')
968
0
            break;
969
0
        }
970
0
        *outptr++ = '/';
971
0
        *outptr = 0; /* null-terminate where it stops */
972
0
        break;
973
0
      }
974
0
      else
975
0
        dotdot = FALSE;
976
0
    }
977
0
    else
978
0
      dotdot = FALSE;
979
980
0
    if(!dotdot) {
981
      /*  E.  move the first path segment in the input buffer to the end of
982
          the output buffer, including the initial "/" character (if any) and
983
          any subsequent characters up to, but not including, the next "/"
984
          character or the end of the input buffer. */
985
986
0
      do {
987
0
        *outptr++ = *input++;
988
0
        clen--;
989
0
      } while(*input && (*input != '/') && (*input != '?'));
990
0
      *outptr = 0;
991
0
    }
992
993
    /* continue until end of path */
994
0
  } while(input < endp);
995
996
0
  *outp = out;
997
0
  return 0; /* success */
998
0
}
999
1000
static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1001
0
{
1002
0
  const char *path;
1003
0
  size_t pathlen;
1004
0
  char *query = NULL;
1005
0
  char *fragment = NULL;
1006
0
  char schemebuf[MAX_SCHEME_LEN + 1];
1007
0
  size_t schemelen = 0;
1008
0
  size_t urllen;
1009
0
  CURLUcode result = CURLUE_OK;
1010
0
  size_t fraglen = 0;
1011
0
  struct dynbuf host;
1012
1013
0
  DEBUGASSERT(url);
1014
1015
0
  Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
1016
1017
0
  result = junkscan(url, &urllen, flags);
1018
0
  if(result)
1019
0
    goto fail;
1020
1021
0
  schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1022
0
                                   flags & (CURLU_GUESS_SCHEME|
1023
0
                                            CURLU_DEFAULT_SCHEME));
1024
1025
  /* handle the file: scheme */
1026
0
  if(schemelen && !strcmp(schemebuf, "file")) {
1027
0
    bool uncpath = FALSE;
1028
0
    if(urllen <= 6) {
1029
      /* file:/ is not enough to actually be a complete file: URL */
1030
0
      result = CURLUE_BAD_FILE_URL;
1031
0
      goto fail;
1032
0
    }
1033
1034
    /* path has been allocated large enough to hold this */
1035
0
    path = (char *)&url[5];
1036
0
    pathlen = urllen - 5;
1037
1038
0
    u->scheme = strdup("file");
1039
0
    if(!u->scheme) {
1040
0
      result = CURLUE_OUT_OF_MEMORY;
1041
0
      goto fail;
1042
0
    }
1043
1044
    /* Extra handling URLs with an authority component (i.e. that start with
1045
     * "file://")
1046
     *
1047
     * We allow omitted hostname (e.g. file:/<path>) -- valid according to
1048
     * RFC 8089, but not the (current) WHAT-WG URL spec.
1049
     */
1050
0
    if(path[0] == '/' && path[1] == '/') {
1051
      /* swallow the two slashes */
1052
0
      const char *ptr = &path[2];
1053
1054
      /*
1055
       * According to RFC 8089, a file: URL can be reliably dereferenced if:
1056
       *
1057
       *  o it has no/blank hostname, or
1058
       *
1059
       *  o the hostname matches "localhost" (case-insensitively), or
1060
       *
1061
       *  o the hostname is a FQDN that resolves to this machine, or
1062
       *
1063
       *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
1064
       *    Appendix E.3).
1065
       *
1066
       * For brevity, we only consider URLs with empty, "localhost", or
1067
       * "127.0.0.1" hostnames as local, otherwise as an UNC String.
1068
       *
1069
       * Additionally, there is an exception for URLs with a Windows drive
1070
       * letter in the authority (which was accidentally omitted from RFC 8089
1071
       * Appendix E, but believe me, it was meant to be there. --MK)
1072
       */
1073
0
      if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
1074
        /* the URL includes a host name, it must match "localhost" or
1075
           "127.0.0.1" to be valid */
1076
0
        if(checkprefix("localhost/", ptr) ||
1077
0
           checkprefix("127.0.0.1/", ptr)) {
1078
0
          ptr += 9; /* now points to the slash after the host */
1079
0
        }
1080
0
        else {
1081
#if defined(_WIN32)
1082
          size_t len;
1083
1084
          /* the host name, NetBIOS computer name, can not contain disallowed
1085
             chars, and the delimiting slash character must be appended to the
1086
             host name */
1087
          path = strpbrk(ptr, "/\\:*?\"<>|");
1088
          if(!path || *path != '/') {
1089
            result = CURLUE_BAD_FILE_URL;
1090
            goto fail;
1091
          }
1092
1093
          len = path - ptr;
1094
          if(len) {
1095
            CURLcode code = Curl_dyn_addn(&host, ptr, len);
1096
            if(code) {
1097
              result = cc2cu(code);
1098
              goto fail;
1099
            }
1100
            uncpath = TRUE;
1101
          }
1102
1103
          ptr -= 2; /* now points to the // before the host in UNC */
1104
#else
1105
          /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1106
             none */
1107
0
          result = CURLUE_BAD_FILE_URL;
1108
0
          goto fail;
1109
0
#endif
1110
0
        }
1111
0
      }
1112
1113
0
      path = ptr;
1114
0
      pathlen = urllen - (ptr - url);
1115
0
    }
1116
1117
0
    if(!uncpath)
1118
      /* no host for file: URLs by default */
1119
0
      Curl_dyn_reset(&host);
1120
1121
0
#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1122
    /* Don't allow Windows drive letters when not in Windows.
1123
     * This catches both "file:/c:" and "file:c:" */
1124
0
    if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1125
0
       STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1126
      /* File drive letters are only accepted in MSDOS/Windows */
1127
0
      result = CURLUE_BAD_FILE_URL;
1128
0
      goto fail;
1129
0
    }
1130
#else
1131
    /* If the path starts with a slash and a drive letter, ditch the slash */
1132
    if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1133
      /* This cannot be done with strcpy, as the memory chunks overlap! */
1134
      path++;
1135
      pathlen--;
1136
    }
1137
#endif
1138
1139
0
  }
1140
0
  else {
1141
    /* clear path */
1142
0
    const char *schemep = NULL;
1143
0
    const char *hostp;
1144
0
    size_t hostlen;
1145
1146
0
    if(schemelen) {
1147
0
      int i = 0;
1148
0
      const char *p = &url[schemelen + 1];
1149
0
      while((*p == '/') && (i < 4)) {
1150
0
        p++;
1151
0
        i++;
1152
0
      }
1153
1154
0
      schemep = schemebuf;
1155
0
      if(!Curl_get_scheme_handler(schemep) &&
1156
0
         !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1157
0
        result = CURLUE_UNSUPPORTED_SCHEME;
1158
0
        goto fail;
1159
0
      }
1160
1161
0
      if((i < 1) || (i > 3)) {
1162
        /* less than one or more than three slashes */
1163
0
        result = CURLUE_BAD_SLASHES;
1164
0
        goto fail;
1165
0
      }
1166
0
      hostp = p; /* host name starts here */
1167
0
    }
1168
0
    else {
1169
      /* no scheme! */
1170
1171
0
      if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1172
0
        result = CURLUE_BAD_SCHEME;
1173
0
        goto fail;
1174
0
      }
1175
0
      if(flags & CURLU_DEFAULT_SCHEME)
1176
0
        schemep = DEFAULT_SCHEME;
1177
1178
      /*
1179
       * The URL was badly formatted, let's try without scheme specified.
1180
       */
1181
0
      hostp = url;
1182
0
    }
1183
1184
0
    if(schemep) {
1185
0
      u->scheme = strdup(schemep);
1186
0
      if(!u->scheme) {
1187
0
        result = CURLUE_OUT_OF_MEMORY;
1188
0
        goto fail;
1189
0
      }
1190
0
    }
1191
1192
    /* find the end of the host name + port number */
1193
0
    hostlen = strcspn(hostp, "/?#");
1194
0
    path = &hostp[hostlen];
1195
1196
    /* this pathlen also contains the query and the fragment */
1197
0
    pathlen = urllen - (path - url);
1198
0
    if(hostlen) {
1199
1200
0
      result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1201
0
      if(result)
1202
0
        goto fail;
1203
1204
0
      if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1205
0
        const char *hostname = Curl_dyn_ptr(&host);
1206
        /* legacy curl-style guess based on host name */
1207
0
        if(checkprefix("ftp.", hostname))
1208
0
          schemep = "ftp";
1209
0
        else if(checkprefix("dict.", hostname))
1210
0
          schemep = "dict";
1211
0
        else if(checkprefix("ldap.", hostname))
1212
0
          schemep = "ldap";
1213
0
        else if(checkprefix("imap.", hostname))
1214
0
          schemep = "imap";
1215
0
        else if(checkprefix("smtp.", hostname))
1216
0
          schemep = "smtp";
1217
0
        else if(checkprefix("pop3.", hostname))
1218
0
          schemep = "pop3";
1219
0
        else
1220
0
          schemep = "http";
1221
1222
0
        u->scheme = strdup(schemep);
1223
0
        if(!u->scheme) {
1224
0
          result = CURLUE_OUT_OF_MEMORY;
1225
0
          goto fail;
1226
0
        }
1227
0
      }
1228
0
    }
1229
0
    else if(flags & CURLU_NO_AUTHORITY) {
1230
      /* allowed to be empty. */
1231
0
      if(Curl_dyn_add(&host, "")) {
1232
0
        result = CURLUE_OUT_OF_MEMORY;
1233
0
        goto fail;
1234
0
      }
1235
0
    }
1236
0
    else {
1237
0
      result = CURLUE_NO_HOST;
1238
0
      goto fail;
1239
0
    }
1240
0
  }
1241
1242
0
  fragment = strchr(path, '#');
1243
0
  if(fragment) {
1244
0
    fraglen = pathlen - (fragment - path);
1245
0
    if(fraglen > 1) {
1246
      /* skip the leading '#' in the copy but include the terminating null */
1247
0
      if(flags & CURLU_URLENCODE) {
1248
0
        struct dynbuf enc;
1249
0
        Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1250
0
        result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1251
0
        if(result)
1252
0
          goto fail;
1253
0
        u->fragment = Curl_dyn_ptr(&enc);
1254
0
      }
1255
0
      else {
1256
0
        u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1257
0
        if(!u->fragment) {
1258
0
          result = CURLUE_OUT_OF_MEMORY;
1259
0
          goto fail;
1260
0
        }
1261
0
      }
1262
0
    }
1263
    /* after this, pathlen still contains the query */
1264
0
    pathlen -= fraglen;
1265
0
  }
1266
1267
0
  query = memchr(path, '?', pathlen);
1268
0
  if(query) {
1269
0
    size_t qlen = fragment ? (size_t)(fragment - query) :
1270
0
      pathlen - (query - path);
1271
0
    pathlen -= qlen;
1272
0
    if(qlen > 1) {
1273
0
      if(flags & CURLU_URLENCODE) {
1274
0
        struct dynbuf enc;
1275
0
        Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1276
        /* skip the leading question mark */
1277
0
        result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1278
0
        if(result)
1279
0
          goto fail;
1280
0
        u->query = Curl_dyn_ptr(&enc);
1281
0
      }
1282
0
      else {
1283
0
        u->query = Curl_memdup0(query + 1, qlen - 1);
1284
0
        if(!u->query) {
1285
0
          result = CURLUE_OUT_OF_MEMORY;
1286
0
          goto fail;
1287
0
        }
1288
0
      }
1289
0
    }
1290
0
    else {
1291
      /* single byte query */
1292
0
      u->query = strdup("");
1293
0
      if(!u->query) {
1294
0
        result = CURLUE_OUT_OF_MEMORY;
1295
0
        goto fail;
1296
0
      }
1297
0
    }
1298
0
  }
1299
1300
0
  if(pathlen && (flags & CURLU_URLENCODE)) {
1301
0
    struct dynbuf enc;
1302
0
    Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1303
0
    result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1304
0
    if(result)
1305
0
      goto fail;
1306
0
    pathlen = Curl_dyn_len(&enc);
1307
0
    path = u->path = Curl_dyn_ptr(&enc);
1308
0
  }
1309
1310
0
  if(pathlen <= 1) {
1311
    /* there is no path left or just the slash, unset */
1312
0
    path = NULL;
1313
0
  }
1314
0
  else {
1315
0
    if(!u->path) {
1316
0
      u->path = Curl_memdup0(path, pathlen);
1317
0
      if(!u->path) {
1318
0
        result = CURLUE_OUT_OF_MEMORY;
1319
0
        goto fail;
1320
0
      }
1321
0
      path = u->path;
1322
0
    }
1323
0
    else if(flags & CURLU_URLENCODE)
1324
      /* it might have encoded more than just the path so cut it */
1325
0
      u->path[pathlen] = 0;
1326
1327
0
    if(!(flags & CURLU_PATH_AS_IS)) {
1328
      /* remove ../ and ./ sequences according to RFC3986 */
1329
0
      char *dedot;
1330
0
      int err = dedotdotify((char *)path, pathlen, &dedot);
1331
0
      if(err) {
1332
0
        result = CURLUE_OUT_OF_MEMORY;
1333
0
        goto fail;
1334
0
      }
1335
0
      if(dedot) {
1336
0
        free(u->path);
1337
0
        u->path = dedot;
1338
0
      }
1339
0
    }
1340
0
  }
1341
1342
0
  u->host = Curl_dyn_ptr(&host);
1343
1344
0
  return result;
1345
0
fail:
1346
0
  Curl_dyn_free(&host);
1347
0
  free_urlhandle(u);
1348
0
  return result;
1349
0
}
1350
1351
/*
1352
 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1353
 */
1354
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1355
                                      unsigned int flags)
1356
0
{
1357
0
  CURLUcode result;
1358
0
  CURLU tmpurl;
1359
0
  memset(&tmpurl, 0, sizeof(tmpurl));
1360
0
  result = parseurl(url, &tmpurl, flags);
1361
0
  if(!result) {
1362
0
    free_urlhandle(u);
1363
0
    *u = tmpurl;
1364
0
  }
1365
0
  return result;
1366
0
}
1367
1368
/*
1369
 */
1370
CURLU *curl_url(void)
1371
0
{
1372
0
  return calloc(1, sizeof(struct Curl_URL));
1373
0
}
1374
1375
void curl_url_cleanup(CURLU *u)
1376
0
{
1377
0
  if(u) {
1378
0
    free_urlhandle(u);
1379
0
    free(u);
1380
0
  }
1381
0
}
1382
1383
#define DUP(dest, src, name)                    \
1384
0
  do {                                          \
1385
0
    if(src->name) {                             \
1386
0
      dest->name = strdup(src->name);           \
1387
0
      if(!dest->name)                           \
1388
0
        goto fail;                              \
1389
0
    }                                           \
1390
0
  } while(0)
1391
1392
CURLU *curl_url_dup(const CURLU *in)
1393
0
{
1394
0
  struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
1395
0
  if(u) {
1396
0
    DUP(u, in, scheme);
1397
0
    DUP(u, in, user);
1398
0
    DUP(u, in, password);
1399
0
    DUP(u, in, options);
1400
0
    DUP(u, in, host);
1401
0
    DUP(u, in, port);
1402
0
    DUP(u, in, path);
1403
0
    DUP(u, in, query);
1404
0
    DUP(u, in, fragment);
1405
0
    DUP(u, in, zoneid);
1406
0
    u->portnum = in->portnum;
1407
0
  }
1408
0
  return u;
1409
0
fail:
1410
0
  curl_url_cleanup(u);
1411
0
  return NULL;
1412
0
}
1413
1414
CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1415
                       char **part, unsigned int flags)
1416
0
{
1417
0
  const char *ptr;
1418
0
  CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1419
0
  char portbuf[7];
1420
0
  bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1421
0
  bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1422
0
  bool punycode = FALSE;
1423
0
  bool depunyfy = FALSE;
1424
0
  bool plusdecode = FALSE;
1425
0
  (void)flags;
1426
0
  if(!u)
1427
0
    return CURLUE_BAD_HANDLE;
1428
0
  if(!part)
1429
0
    return CURLUE_BAD_PARTPOINTER;
1430
0
  *part = NULL;
1431
1432
0
  switch(what) {
1433
0
  case CURLUPART_SCHEME:
1434
0
    ptr = u->scheme;
1435
0
    ifmissing = CURLUE_NO_SCHEME;
1436
0
    urldecode = FALSE; /* never for schemes */
1437
0
    break;
1438
0
  case CURLUPART_USER:
1439
0
    ptr = u->user;
1440
0
    ifmissing = CURLUE_NO_USER;
1441
0
    break;
1442
0
  case CURLUPART_PASSWORD:
1443
0
    ptr = u->password;
1444
0
    ifmissing = CURLUE_NO_PASSWORD;
1445
0
    break;
1446
0
  case CURLUPART_OPTIONS:
1447
0
    ptr = u->options;
1448
0
    ifmissing = CURLUE_NO_OPTIONS;
1449
0
    break;
1450
0
  case CURLUPART_HOST:
1451
0
    ptr = u->host;
1452
0
    ifmissing = CURLUE_NO_HOST;
1453
0
    punycode = (flags & CURLU_PUNYCODE)?1:0;
1454
0
    depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1455
0
    break;
1456
0
  case CURLUPART_ZONEID:
1457
0
    ptr = u->zoneid;
1458
0
    ifmissing = CURLUE_NO_ZONEID;
1459
0
    break;
1460
0
  case CURLUPART_PORT:
1461
0
    ptr = u->port;
1462
0
    ifmissing = CURLUE_NO_PORT;
1463
0
    urldecode = FALSE; /* never for port */
1464
0
    if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1465
      /* there's no stored port number, but asked to deliver
1466
         a default one for the scheme */
1467
0
      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1468
0
      if(h) {
1469
0
        msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1470
0
        ptr = portbuf;
1471
0
      }
1472
0
    }
1473
0
    else if(ptr && u->scheme) {
1474
      /* there is a stored port number, but ask to inhibit if
1475
         it matches the default one for the scheme */
1476
0
      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1477
0
      if(h && (h->defport == u->portnum) &&
1478
0
         (flags & CURLU_NO_DEFAULT_PORT))
1479
0
        ptr = NULL;
1480
0
    }
1481
0
    break;
1482
0
  case CURLUPART_PATH:
1483
0
    ptr = u->path;
1484
0
    if(!ptr)
1485
0
      ptr = "/";
1486
0
    break;
1487
0
  case CURLUPART_QUERY:
1488
0
    ptr = u->query;
1489
0
    ifmissing = CURLUE_NO_QUERY;
1490
0
    plusdecode = urldecode;
1491
0
    break;
1492
0
  case CURLUPART_FRAGMENT:
1493
0
    ptr = u->fragment;
1494
0
    ifmissing = CURLUE_NO_FRAGMENT;
1495
0
    break;
1496
0
  case CURLUPART_URL: {
1497
0
    char *url;
1498
0
    char *scheme;
1499
0
    char *options = u->options;
1500
0
    char *port = u->port;
1501
0
    char *allochost = NULL;
1502
0
    punycode = (flags & CURLU_PUNYCODE)?1:0;
1503
0
    depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1504
0
    if(u->scheme && strcasecompare("file", u->scheme)) {
1505
0
      url = aprintf("file://%s%s%s",
1506
0
                    u->path,
1507
0
                    u->fragment? "#": "",
1508
0
                    u->fragment? u->fragment : "");
1509
0
    }
1510
0
    else if(!u->host)
1511
0
      return CURLUE_NO_HOST;
1512
0
    else {
1513
0
      const struct Curl_handler *h = NULL;
1514
0
      if(u->scheme)
1515
0
        scheme = u->scheme;
1516
0
      else if(flags & CURLU_DEFAULT_SCHEME)
1517
0
        scheme = (char *) DEFAULT_SCHEME;
1518
0
      else
1519
0
        return CURLUE_NO_SCHEME;
1520
1521
0
      h = Curl_get_scheme_handler(scheme);
1522
0
      if(!port && (flags & CURLU_DEFAULT_PORT)) {
1523
        /* there's no stored port number, but asked to deliver
1524
           a default one for the scheme */
1525
0
        if(h) {
1526
0
          msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1527
0
          port = portbuf;
1528
0
        }
1529
0
      }
1530
0
      else if(port) {
1531
        /* there is a stored port number, but asked to inhibit if it matches
1532
           the default one for the scheme */
1533
0
        if(h && (h->defport == u->portnum) &&
1534
0
           (flags & CURLU_NO_DEFAULT_PORT))
1535
0
          port = NULL;
1536
0
      }
1537
1538
0
      if(h && !(h->flags & PROTOPT_URLOPTIONS))
1539
0
        options = NULL;
1540
1541
0
      if(u->host[0] == '[') {
1542
0
        if(u->zoneid) {
1543
          /* make it '[ host %25 zoneid ]' */
1544
0
          struct dynbuf enc;
1545
0
          size_t hostlen = strlen(u->host);
1546
0
          Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1547
0
          if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1548
0
                           u->zoneid))
1549
0
            return CURLUE_OUT_OF_MEMORY;
1550
0
          allochost = Curl_dyn_ptr(&enc);
1551
0
        }
1552
0
      }
1553
0
      else if(urlencode) {
1554
0
        allochost = curl_easy_escape(NULL, u->host, 0);
1555
0
        if(!allochost)
1556
0
          return CURLUE_OUT_OF_MEMORY;
1557
0
      }
1558
0
      else if(punycode) {
1559
0
        if(!Curl_is_ASCII_name(u->host)) {
1560
0
#ifndef USE_IDN
1561
0
          return CURLUE_LACKS_IDN;
1562
#else
1563
          CURLcode result = Curl_idn_decode(u->host, &allochost);
1564
          if(result)
1565
            return (result == CURLE_OUT_OF_MEMORY) ?
1566
              CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1567
#endif
1568
0
        }
1569
0
      }
1570
0
      else if(depunyfy) {
1571
0
        if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1572
0
#ifndef USE_IDN
1573
0
          return CURLUE_LACKS_IDN;
1574
#else
1575
          CURLcode result = Curl_idn_encode(u->host, &allochost);
1576
          if(result)
1577
            /* this is the most likely error */
1578
            return (result == CURLE_OUT_OF_MEMORY) ?
1579
              CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1580
#endif
1581
0
        }
1582
0
      }
1583
1584
0
      url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1585
0
                    scheme,
1586
0
                    u->user ? u->user : "",
1587
0
                    u->password ? ":": "",
1588
0
                    u->password ? u->password : "",
1589
0
                    options ? ";" : "",
1590
0
                    options ? options : "",
1591
0
                    (u->user || u->password || options) ? "@": "",
1592
0
                    allochost ? allochost : u->host,
1593
0
                    port ? ":": "",
1594
0
                    port ? port : "",
1595
0
                    u->path ? u->path : "/",
1596
0
                    (u->query && u->query[0]) ? "?": "",
1597
0
                    (u->query && u->query[0]) ? u->query : "",
1598
0
                    u->fragment? "#": "",
1599
0
                    u->fragment? u->fragment : "");
1600
0
      free(allochost);
1601
0
    }
1602
0
    if(!url)
1603
0
      return CURLUE_OUT_OF_MEMORY;
1604
0
    *part = url;
1605
0
    return CURLUE_OK;
1606
0
  }
1607
0
  default:
1608
0
    ptr = NULL;
1609
0
    break;
1610
0
  }
1611
0
  if(ptr) {
1612
0
    size_t partlen = strlen(ptr);
1613
0
    size_t i = 0;
1614
0
    *part = Curl_memdup0(ptr, partlen);
1615
0
    if(!*part)
1616
0
      return CURLUE_OUT_OF_MEMORY;
1617
0
    if(plusdecode) {
1618
      /* convert + to space */
1619
0
      char *plus = *part;
1620
0
      for(i = 0; i < partlen; ++plus, i++) {
1621
0
        if(*plus == '+')
1622
0
          *plus = ' ';
1623
0
      }
1624
0
    }
1625
0
    if(urldecode) {
1626
0
      char *decoded;
1627
0
      size_t dlen;
1628
      /* this unconditional rejection of control bytes is documented
1629
         API behavior */
1630
0
      CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1631
0
      free(*part);
1632
0
      if(res) {
1633
0
        *part = NULL;
1634
0
        return CURLUE_URLDECODE;
1635
0
      }
1636
0
      *part = decoded;
1637
0
      partlen = dlen;
1638
0
    }
1639
0
    if(urlencode) {
1640
0
      struct dynbuf enc;
1641
0
      CURLUcode uc;
1642
0
      Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1643
0
      uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
1644
0
      if(uc)
1645
0
        return uc;
1646
0
      free(*part);
1647
0
      *part = Curl_dyn_ptr(&enc);
1648
0
    }
1649
0
    else if(punycode) {
1650
0
      if(!Curl_is_ASCII_name(u->host)) {
1651
0
#ifndef USE_IDN
1652
0
        return CURLUE_LACKS_IDN;
1653
#else
1654
        char *allochost;
1655
        CURLcode result = Curl_idn_decode(*part, &allochost);
1656
        if(result)
1657
          return (result == CURLE_OUT_OF_MEMORY) ?
1658
            CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1659
        free(*part);
1660
        *part = allochost;
1661
#endif
1662
0
      }
1663
0
    }
1664
0
    else if(depunyfy) {
1665
0
      if(Curl_is_ASCII_name(u->host)  && !strncmp("xn--", u->host, 4)) {
1666
0
#ifndef USE_IDN
1667
0
        return CURLUE_LACKS_IDN;
1668
#else
1669
        char *allochost;
1670
        CURLcode result = Curl_idn_encode(*part, &allochost);
1671
        if(result)
1672
          return (result == CURLE_OUT_OF_MEMORY) ?
1673
            CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1674
        free(*part);
1675
        *part = allochost;
1676
#endif
1677
0
      }
1678
0
    }
1679
1680
0
    return CURLUE_OK;
1681
0
  }
1682
0
  else
1683
0
    return ifmissing;
1684
0
}
1685
1686
CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1687
                       const char *part, unsigned int flags)
1688
0
{
1689
0
  char **storep = NULL;
1690
0
  long port = 0;
1691
0
  bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1692
0
  bool plusencode = FALSE;
1693
0
  bool urlskipslash = FALSE;
1694
0
  bool leadingslash = FALSE;
1695
0
  bool appendquery = FALSE;
1696
0
  bool equalsencode = FALSE;
1697
0
  size_t nalloc;
1698
1699
0
  if(!u)
1700
0
    return CURLUE_BAD_HANDLE;
1701
0
  if(!part) {
1702
    /* setting a part to NULL clears it */
1703
0
    switch(what) {
1704
0
    case CURLUPART_URL:
1705
0
      break;
1706
0
    case CURLUPART_SCHEME:
1707
0
      storep = &u->scheme;
1708
0
      break;
1709
0
    case CURLUPART_USER:
1710
0
      storep = &u->user;
1711
0
      break;
1712
0
    case CURLUPART_PASSWORD:
1713
0
      storep = &u->password;
1714
0
      break;
1715
0
    case CURLUPART_OPTIONS:
1716
0
      storep = &u->options;
1717
0
      break;
1718
0
    case CURLUPART_HOST:
1719
0
      storep = &u->host;
1720
0
      break;
1721
0
    case CURLUPART_ZONEID:
1722
0
      storep = &u->zoneid;
1723
0
      break;
1724
0
    case CURLUPART_PORT:
1725
0
      u->portnum = 0;
1726
0
      storep = &u->port;
1727
0
      break;
1728
0
    case CURLUPART_PATH:
1729
0
      storep = &u->path;
1730
0
      break;
1731
0
    case CURLUPART_QUERY:
1732
0
      storep = &u->query;
1733
0
      break;
1734
0
    case CURLUPART_FRAGMENT:
1735
0
      storep = &u->fragment;
1736
0
      break;
1737
0
    default:
1738
0
      return CURLUE_UNKNOWN_PART;
1739
0
    }
1740
0
    if(storep && *storep) {
1741
0
      Curl_safefree(*storep);
1742
0
    }
1743
0
    else if(!storep) {
1744
0
      free_urlhandle(u);
1745
0
      memset(u, 0, sizeof(struct Curl_URL));
1746
0
    }
1747
0
    return CURLUE_OK;
1748
0
  }
1749
1750
0
  nalloc = strlen(part);
1751
0
  if(nalloc > CURL_MAX_INPUT_LENGTH)
1752
    /* excessive input length */
1753
0
    return CURLUE_MALFORMED_INPUT;
1754
1755
0
  switch(what) {
1756
0
  case CURLUPART_SCHEME: {
1757
0
    size_t plen = strlen(part);
1758
0
    const char *s = part;
1759
0
    if((plen > MAX_SCHEME_LEN) || (plen < 1))
1760
      /* too long or too short */
1761
0
      return CURLUE_BAD_SCHEME;
1762
   /* verify that it is a fine scheme */
1763
0
    if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(part))
1764
0
      return CURLUE_UNSUPPORTED_SCHEME;
1765
0
    storep = &u->scheme;
1766
0
    urlencode = FALSE; /* never */
1767
0
    if(ISALPHA(*s)) {
1768
      /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1769
0
      while(--plen) {
1770
0
        if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1771
0
          s++; /* fine */
1772
0
        else
1773
0
          return CURLUE_BAD_SCHEME;
1774
0
      }
1775
0
    }
1776
0
    else
1777
0
      return CURLUE_BAD_SCHEME;
1778
0
    break;
1779
0
  }
1780
0
  case CURLUPART_USER:
1781
0
    storep = &u->user;
1782
0
    break;
1783
0
  case CURLUPART_PASSWORD:
1784
0
    storep = &u->password;
1785
0
    break;
1786
0
  case CURLUPART_OPTIONS:
1787
0
    storep = &u->options;
1788
0
    break;
1789
0
  case CURLUPART_HOST:
1790
0
    storep = &u->host;
1791
0
    Curl_safefree(u->zoneid);
1792
0
    break;
1793
0
  case CURLUPART_ZONEID:
1794
0
    storep = &u->zoneid;
1795
0
    break;
1796
0
  case CURLUPART_PORT:
1797
0
  {
1798
0
    char *endp;
1799
0
    urlencode = FALSE; /* never */
1800
0
    port = strtol(part, &endp, 10);  /* Port number must be decimal */
1801
0
    if((port <= 0) || (port > 0xffff))
1802
0
      return CURLUE_BAD_PORT_NUMBER;
1803
0
    if(*endp)
1804
      /* weirdly provided number, not good! */
1805
0
      return CURLUE_BAD_PORT_NUMBER;
1806
0
    storep = &u->port;
1807
0
  }
1808
0
  break;
1809
0
  case CURLUPART_PATH:
1810
0
    urlskipslash = TRUE;
1811
0
    leadingslash = TRUE; /* enforce */
1812
0
    storep = &u->path;
1813
0
    break;
1814
0
  case CURLUPART_QUERY:
1815
0
    plusencode = urlencode;
1816
0
    appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1817
0
    equalsencode = appendquery;
1818
0
    storep = &u->query;
1819
0
    break;
1820
0
  case CURLUPART_FRAGMENT:
1821
0
    storep = &u->fragment;
1822
0
    break;
1823
0
  case CURLUPART_URL: {
1824
    /*
1825
     * Allow a new URL to replace the existing (if any) contents.
1826
     *
1827
     * If the existing contents is enough for a URL, allow a relative URL to
1828
     * replace it.
1829
     */
1830
0
    CURLcode result;
1831
0
    CURLUcode uc;
1832
0
    char *oldurl;
1833
0
    char *redired_url;
1834
1835
0
    if(!nalloc)
1836
      /* a blank URL is not a valid URL */
1837
0
      return CURLUE_MALFORMED_INPUT;
1838
1839
    /* if the new thing is absolute or the old one is not
1840
     * (we could not get an absolute url in 'oldurl'),
1841
     * then replace the existing with the new. */
1842
0
    if(Curl_is_absolute_url(part, NULL, 0,
1843
0
                            flags & (CURLU_GUESS_SCHEME|
1844
0
                                     CURLU_DEFAULT_SCHEME))
1845
0
       || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1846
0
      return parseurl_and_replace(part, u, flags);
1847
0
    }
1848
1849
    /* apply the relative part to create a new URL
1850
     * and replace the existing one with it. */
1851
0
    result = concat_url(oldurl, part, &redired_url);
1852
0
    free(oldurl);
1853
0
    if(result)
1854
0
      return cc2cu(result);
1855
1856
0
    uc = parseurl_and_replace(redired_url, u, flags);
1857
0
    free(redired_url);
1858
0
    return uc;
1859
0
  }
1860
0
  default:
1861
0
    return CURLUE_UNKNOWN_PART;
1862
0
  }
1863
0
  DEBUGASSERT(storep);
1864
0
  {
1865
0
    const char *newp;
1866
0
    struct dynbuf enc;
1867
0
    Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1868
1869
0
    if(leadingslash && (part[0] != '/')) {
1870
0
      CURLcode result = Curl_dyn_addn(&enc, "/", 1);
1871
0
      if(result)
1872
0
        return cc2cu(result);
1873
0
    }
1874
0
    if(urlencode) {
1875
0
      const unsigned char *i;
1876
1877
0
      for(i = (const unsigned char *)part; *i; i++) {
1878
0
        CURLcode result;
1879
0
        if((*i == ' ') && plusencode) {
1880
0
          result = Curl_dyn_addn(&enc, "+", 1);
1881
0
          if(result)
1882
0
            return CURLUE_OUT_OF_MEMORY;
1883
0
        }
1884
0
        else if(ISUNRESERVED(*i) ||
1885
0
                ((*i == '/') && urlskipslash) ||
1886
0
                ((*i == '=') && equalsencode)) {
1887
0
          if((*i == '=') && equalsencode)
1888
            /* only skip the first equals sign */
1889
0
            equalsencode = FALSE;
1890
0
          result = Curl_dyn_addn(&enc, i, 1);
1891
0
          if(result)
1892
0
            return cc2cu(result);
1893
0
        }
1894
0
        else {
1895
0
          char out[3]={'%'};
1896
0
          out[1] = hexdigits[*i>>4];
1897
0
          out[2] = hexdigits[*i & 0xf];
1898
0
          result = Curl_dyn_addn(&enc, out, 3);
1899
0
          if(result)
1900
0
            return cc2cu(result);
1901
0
        }
1902
0
      }
1903
0
    }
1904
0
    else {
1905
0
      char *p;
1906
0
      CURLcode result = Curl_dyn_add(&enc, part);
1907
0
      if(result)
1908
0
        return cc2cu(result);
1909
0
      p = Curl_dyn_ptr(&enc);
1910
0
      while(*p) {
1911
        /* make sure percent encoded are lower case */
1912
0
        if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1913
0
           (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1914
0
          p[1] = Curl_raw_tolower(p[1]);
1915
0
          p[2] = Curl_raw_tolower(p[2]);
1916
0
          p += 3;
1917
0
        }
1918
0
        else
1919
0
          p++;
1920
0
      }
1921
0
    }
1922
0
    newp = Curl_dyn_ptr(&enc);
1923
1924
0
    if(appendquery && newp) {
1925
      /* Append the 'newp' string onto the old query. Add a '&' separator if
1926
         none is present at the end of the existing query already */
1927
1928
0
      size_t querylen = u->query ? strlen(u->query) : 0;
1929
0
      bool addamperand = querylen && (u->query[querylen -1] != '&');
1930
0
      if(querylen) {
1931
0
        struct dynbuf qbuf;
1932
0
        Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1933
1934
0
        if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1935
0
          goto nomem;
1936
1937
0
        if(addamperand) {
1938
0
          if(Curl_dyn_addn(&qbuf, "&", 1))
1939
0
            goto nomem;
1940
0
        }
1941
0
        if(Curl_dyn_add(&qbuf, newp))
1942
0
          goto nomem;
1943
0
        Curl_dyn_free(&enc);
1944
0
        free(*storep);
1945
0
        *storep = Curl_dyn_ptr(&qbuf);
1946
0
        return CURLUE_OK;
1947
0
nomem:
1948
0
        Curl_dyn_free(&enc);
1949
0
        return CURLUE_OUT_OF_MEMORY;
1950
0
      }
1951
0
    }
1952
1953
0
    else if(what == CURLUPART_HOST) {
1954
0
      size_t n = Curl_dyn_len(&enc);
1955
0
      if(!n && (flags & CURLU_NO_AUTHORITY)) {
1956
        /* Skip hostname check, it's allowed to be empty. */
1957
0
      }
1958
0
      else {
1959
0
        if(!n || hostname_check(u, (char *)newp, n)) {
1960
0
          Curl_dyn_free(&enc);
1961
0
          return CURLUE_BAD_HOSTNAME;
1962
0
        }
1963
0
      }
1964
0
    }
1965
1966
0
    free(*storep);
1967
0
    *storep = (char *)newp;
1968
0
  }
1969
  /* set after the string, to make it not assigned if the allocation above
1970
     fails */
1971
0
  if(port)
1972
0
    u->portnum = port;
1973
0
  return CURLUE_OK;
1974
0
}