Coverage Report

Created: 2025-07-23 06:49

/src/rauc/subprojects/glib-2.76.5/glib/guri.c
Line
Count
Source (jump to first uncovered line)
1
/* GLIB - Library of useful routines for C programming
2
 * Copyright © 2020 Red Hat, Inc.
3
 *
4
 * SPDX-License-Identifier: LGPL-2.1-or-later
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General
17
 * Public License along with this library; if not, see
18
 * <http://www.gnu.org/licenses/>.
19
 */
20
21
#include "config.h"
22
23
#include <stdlib.h>
24
#include <string.h>
25
26
#include "glib.h"
27
#include "glibintl.h"
28
#include "guriprivate.h"
29
30
/**
31
 * SECTION:guri
32
 * @short_description: URI-handling utilities
33
 * @include: glib.h
34
 *
35
 * The #GUri type and related functions can be used to parse URIs into
36
 * their components, and build valid URIs from individual components.
37
 *
38
 * Note that #GUri scope is to help manipulate URIs in various applications,
39
 * following [RFC 3986](https://tools.ietf.org/html/rfc3986). In particular,
40
 * it doesn't intend to cover web browser needs, and doesn't implement the
41
 * [WHATWG URL](https://url.spec.whatwg.org/) standard. No APIs are provided to
42
 * help prevent
43
 * [homograph attacks](https://en.wikipedia.org/wiki/IDN_homograph_attack), so
44
 * #GUri is not suitable for formatting URIs for display to the user for making
45
 * security-sensitive decisions.
46
 *
47
 * ## Relative and absolute URIs # {#relative-absolute-uris}
48
 *
49
 * As defined in [RFC 3986](https://tools.ietf.org/html/rfc3986#section-4), the
50
 * hierarchical nature of URIs means that they can either be ‘relative
51
 * references’ (sometimes referred to as ‘relative URIs’) or ‘URIs’ (for
52
 * clarity, ‘URIs’ are referred to in this documentation as
53
 * ‘absolute URIs’ — although
54
 * [in constrast to RFC 3986](https://tools.ietf.org/html/rfc3986#section-4.3),
55
 * fragment identifiers are always allowed).
56
 *
57
 * Relative references have one or more components of the URI missing. In
58
 * particular, they have no scheme. Any other component, such as hostname,
59
 * query, etc. may be missing, apart from a path, which has to be specified (but
60
 * may be empty). The path may be relative, starting with `./` rather than `/`.
61
 *
62
 * For example, a valid relative reference is `./path?query`,
63
 * `/?query#fragment` or `//example.com`.
64
 *
65
 * Absolute URIs have a scheme specified. Any other components of the URI which
66
 * are missing are specified as explicitly unset in the URI, rather than being
67
 * resolved relative to a base URI using g_uri_parse_relative().
68
 *
69
 * For example, a valid absolute URI is `file:///home/bob` or
70
 * `https://search.com?query=string`.
71
 *
72
 * A #GUri instance is always an absolute URI. A string may be an absolute URI
73
 * or a relative reference; see the documentation for individual functions as to
74
 * what forms they accept.
75
 *
76
 * ## Parsing URIs
77
 *
78
 * The most minimalist APIs for parsing URIs are g_uri_split() and
79
 * g_uri_split_with_user(). These split a URI into its component
80
 * parts, and return the parts; the difference between the two is that
81
 * g_uri_split() treats the ‘userinfo’ component of the URI as a
82
 * single element, while g_uri_split_with_user() can (depending on the
83
 * #GUriFlags you pass) treat it as containing a username, password,
84
 * and authentication parameters. Alternatively, g_uri_split_network()
85
 * can be used when you are only interested in the components that are
86
 * needed to initiate a network connection to the service (scheme,
87
 * host, and port).
88
 *
89
 * g_uri_parse() is similar to g_uri_split(), but instead of returning
90
 * individual strings, it returns a #GUri structure (and it requires
91
 * that the URI be an absolute URI).
92
 *
93
 * g_uri_resolve_relative() and g_uri_parse_relative() allow you to
94
 * resolve a relative URI relative to a base URI.
95
 * g_uri_resolve_relative() takes two strings and returns a string,
96
 * and g_uri_parse_relative() takes a #GUri and a string and returns a
97
 * #GUri.
98
 *
99
 * All of the parsing functions take a #GUriFlags argument describing
100
 * exactly how to parse the URI; see the documentation for that type
101
 * for more details on the specific flags that you can pass. If you
102
 * need to choose different flags based on the type of URI, you can
103
 * use g_uri_peek_scheme() on the URI string to check the scheme
104
 * first, and use that to decide what flags to parse it with.
105
 *
106
 * For example, you might want to use %G_URI_PARAMS_WWW_FORM when parsing the
107
 * params for a web URI, so compare the result of g_uri_peek_scheme() against
108
 * `http` and `https`.
109
 *
110
 * ## Building URIs
111
 *
112
 * g_uri_join() and g_uri_join_with_user() can be used to construct
113
 * valid URI strings from a set of component strings. They are the
114
 * inverse of g_uri_split() and g_uri_split_with_user().
115
 *
116
 * Similarly, g_uri_build() and g_uri_build_with_user() can be used to
117
 * construct a #GUri from a set of component strings.
118
 *
119
 * As with the parsing functions, the building functions take a
120
 * #GUriFlags argument. In particular, it is important to keep in mind
121
 * whether the URI components you are using are already `%`-encoded. If so,
122
 * you must pass the %G_URI_FLAGS_ENCODED flag.
123
 *
124
 * ## `file://` URIs
125
 *
126
 * Note that Windows and Unix both define special rules for parsing
127
 * `file://` URIs (involving non-UTF-8 character sets on Unix, and the
128
 * interpretation of path separators on Windows). #GUri does not
129
 * implement these rules. Use g_filename_from_uri() and
130
 * g_filename_to_uri() if you want to properly convert between
131
 * `file://` URIs and local filenames.
132
 *
133
 * ## URI Equality
134
 *
135
 * Note that there is no `g_uri_equal ()` function, because comparing
136
 * URIs usefully requires scheme-specific knowledge that #GUri does
137
 * not have. #GUri can help with normalization if you use the various
138
 * encoded #GUriFlags as well as %G_URI_FLAGS_SCHEME_NORMALIZE however
139
 * it is not comprehensive.
140
 * For example, `data:,foo` and `data:;base64,Zm9v` resolve to the same
141
 * thing according to the `data:` URI specification which GLib does not
142
 * handle.
143
 *
144
 * Since: 2.66
145
 */
146
147
/**
148
 * GUri:
149
 *
150
 * A parsed absolute URI.
151
 *
152
 * Since #GUri only represents absolute URIs, all #GUris will have a
153
 * URI scheme, so g_uri_get_scheme() will always return a non-%NULL
154
 * answer. Likewise, by definition, all URIs have a path component, so
155
 * g_uri_get_path() will always return a non-%NULL string (which may be empty).
156
 *
157
 * If the URI string has an
158
 * [‘authority’ component](https://tools.ietf.org/html/rfc3986#section-3) (that
159
 * is, if the scheme is followed by `://` rather than just `:`), then the
160
 * #GUri will contain a hostname, and possibly a port and ‘userinfo’.
161
 * Additionally, depending on how the #GUri was constructed/parsed (for example,
162
 * using the %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS flags),
163
 * the userinfo may be split out into a username, password, and
164
 * additional authorization-related parameters.
165
 *
166
 * Normally, the components of a #GUri will have all `%`-encoded
167
 * characters decoded. However, if you construct/parse a #GUri with
168
 * %G_URI_FLAGS_ENCODED, then the `%`-encoding will be preserved instead in
169
 * the userinfo, path, and query fields (and in the host field if also
170
 * created with %G_URI_FLAGS_NON_DNS). In particular, this is necessary if
171
 * the URI may contain binary data or non-UTF-8 text, or if decoding
172
 * the components might change the interpretation of the URI.
173
 *
174
 * For example, with the encoded flag:
175
 *
176
 * |[<!-- language="C" -->
177
 *   g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_ENCODED, &err);
178
 *   g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue");
179
 * ]|
180
 *
181
 * While the default `%`-decoding behaviour would give:
182
 *
183
 * |[<!-- language="C" -->
184
 *   g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_NONE, &err);
185
 *   g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http://host/path?param=value");
186
 * ]|
187
 *
188
 * During decoding, if an invalid UTF-8 string is encountered, parsing will fail
189
 * with an error indicating the bad string location:
190
 *
191
 * |[<!-- language="C" -->
192
 *   g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fbad%3D%00alue", G_URI_FLAGS_NONE, &err);
193
 *   g_assert_error (err, G_URI_ERROR, G_URI_ERROR_BAD_QUERY);
194
 * ]|
195
 *
196
 * You should pass %G_URI_FLAGS_ENCODED or %G_URI_FLAGS_ENCODED_QUERY if you
197
 * need to handle that case manually. In particular, if the query string
198
 * contains `=` characters that are `%`-encoded, you should let
199
 * g_uri_parse_params() do the decoding once of the query.
200
 *
201
 * #GUri is immutable once constructed, and can safely be accessed from
202
 * multiple threads. Its reference counting is atomic.
203
 *
204
 * Since: 2.66
205
 */
206
struct _GUri {
207
  gchar     *scheme;
208
  gchar     *userinfo;
209
  gchar     *host;
210
  gint       port;
211
  gchar     *path;
212
  gchar     *query;
213
  gchar     *fragment;
214
215
  gchar     *user;
216
  gchar     *password;
217
  gchar     *auth_params;
218
219
  GUriFlags  flags;
220
};
221
222
/**
223
 * g_uri_ref: (skip)
224
 * @uri: a #GUri
225
 *
226
 * Increments the reference count of @uri by one.
227
 *
228
 * Returns: @uri
229
 *
230
 * Since: 2.66
231
 */
232
GUri *
233
g_uri_ref (GUri *uri)
234
0
{
235
0
  g_return_val_if_fail (uri != NULL, NULL);
236
237
0
  return g_atomic_rc_box_acquire (uri);
238
0
}
239
240
static void
241
g_uri_clear (GUri *uri)
242
0
{
243
0
  g_free (uri->scheme);
244
0
  g_free (uri->userinfo);
245
0
  g_free (uri->host);
246
0
  g_free (uri->path);
247
0
  g_free (uri->query);
248
0
  g_free (uri->fragment);
249
0
  g_free (uri->user);
250
0
  g_free (uri->password);
251
0
  g_free (uri->auth_params);
252
0
}
253
254
/**
255
 * g_uri_unref: (skip)
256
 * @uri: a #GUri
257
 *
258
 * Atomically decrements the reference count of @uri by one.
259
 *
260
 * When the reference count reaches zero, the resources allocated by
261
 * @uri are freed
262
 *
263
 * Since: 2.66
264
 */
265
void
266
g_uri_unref (GUri *uri)
267
0
{
268
0
  g_return_if_fail (uri != NULL);
269
270
0
  g_atomic_rc_box_release_full (uri, (GDestroyNotify)g_uri_clear);
271
0
}
272
273
static gboolean
274
g_uri_char_is_unreserved (gchar ch)
275
0
{
276
0
  if (g_ascii_isalnum (ch))
277
0
    return TRUE;
278
0
  return ch == '-' || ch == '.' || ch == '_' || ch == '~';
279
0
}
280
281
0
#define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
282
0
#define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
283
284
static gssize
285
uri_decoder (gchar       **out,
286
             const gchar  *illegal_chars,
287
             const gchar  *start,
288
             gsize         length,
289
             gboolean      just_normalize,
290
             gboolean      www_form,
291
             GUriFlags     flags,
292
             GUriError     parse_error,
293
             GError      **error)
294
0
{
295
0
  gchar c;
296
0
  GString *decoded;
297
0
  const gchar *invalid, *s, *end;
298
0
  gssize len;
299
300
0
  if (!(flags & G_URI_FLAGS_ENCODED))
301
0
    just_normalize = FALSE;
302
303
0
  decoded = g_string_sized_new (length + 1);
304
0
  for (s = start, end = s + length; s < end; s++)
305
0
    {
306
0
      if (*s == '%')
307
0
        {
308
0
          if (s + 2 >= end ||
309
0
              !g_ascii_isxdigit (s[1]) ||
310
0
              !g_ascii_isxdigit (s[2]))
311
0
            {
312
              /* % followed by non-hex or the end of the string; this is an error */
313
0
              if (!(flags & G_URI_FLAGS_PARSE_RELAXED))
314
0
                {
315
0
                  g_set_error_literal (error, G_URI_ERROR, parse_error,
316
                                       /* xgettext: no-c-format */
317
0
                                       _("Invalid %-encoding in URI"));
318
0
                  g_string_free (decoded, TRUE);
319
0
                  return -1;
320
0
                }
321
322
              /* In non-strict mode, just let it through; we *don't*
323
               * fix it to "%25", since that might change the way that
324
               * the URI's owner would interpret it.
325
               */
326
0
              g_string_append_c (decoded, *s);
327
0
              continue;
328
0
            }
329
330
0
          c = HEXCHAR (s);
331
0
          if (illegal_chars && strchr (illegal_chars, c))
332
0
            {
333
0
              g_set_error_literal (error, G_URI_ERROR, parse_error,
334
0
                                   _("Illegal character in URI"));
335
0
              g_string_free (decoded, TRUE);
336
0
              return -1;
337
0
            }
338
0
          if (just_normalize && !g_uri_char_is_unreserved (c))
339
0
            {
340
              /* Leave the % sequence there but normalize it. */
341
0
              g_string_append_c (decoded, *s);
342
0
              g_string_append_c (decoded, g_ascii_toupper (s[1]));
343
0
              g_string_append_c (decoded, g_ascii_toupper (s[2]));
344
0
              s += 2;
345
0
            }
346
0
          else
347
0
            {
348
0
              g_string_append_c (decoded, c);
349
0
              s += 2;
350
0
            }
351
0
        }
352
0
      else if (www_form && *s == '+')
353
0
        g_string_append_c (decoded, ' ');
354
      /* Normalize any illegal characters. */
355
0
      else if (just_normalize && (!g_ascii_isgraph (*s)))
356
0
        g_string_append_printf (decoded, "%%%02X", (guchar)*s);
357
0
      else
358
0
        g_string_append_c (decoded, *s);
359
0
    }
360
361
0
  len = decoded->len;
362
0
  g_assert (len >= 0);
363
364
0
  if (!(flags & G_URI_FLAGS_ENCODED) &&
365
0
      !g_utf8_validate (decoded->str, len, &invalid))
366
0
    {
367
0
      g_set_error_literal (error, G_URI_ERROR, parse_error,
368
0
                           _("Non-UTF-8 characters in URI"));
369
0
      g_string_free (decoded, TRUE);
370
0
      return -1;
371
0
    }
372
373
0
  if (out)
374
0
    *out = g_string_free (decoded, FALSE);
375
0
  else
376
0
    g_string_free (decoded, TRUE);
377
378
0
  return len;
379
0
}
380
381
static gboolean
382
uri_decode (gchar       **out,
383
            const gchar  *illegal_chars,
384
            const gchar  *start,
385
            gsize         length,
386
            gboolean      www_form,
387
            GUriFlags     flags,
388
            GUriError     parse_error,
389
            GError      **error)
390
0
{
391
0
  return uri_decoder (out, illegal_chars, start, length, FALSE, www_form, flags,
392
0
                      parse_error, error) != -1;
393
0
}
394
395
static gboolean
396
uri_normalize (gchar       **out,
397
               const gchar  *start,
398
               gsize         length,
399
               GUriFlags     flags,
400
               GUriError     parse_error,
401
               GError      **error)
402
0
{
403
0
  return uri_decoder (out, NULL, start, length, TRUE, FALSE, flags,
404
0
                      parse_error, error) != -1;
405
0
}
406
407
static gboolean
408
is_valid (guchar       c,
409
          const gchar *reserved_chars_allowed)
410
0
{
411
0
  if (g_uri_char_is_unreserved (c))
412
0
    return TRUE;
413
414
0
  if (reserved_chars_allowed && strchr (reserved_chars_allowed, c))
415
0
    return TRUE;
416
417
0
  return FALSE;
418
0
}
419
420
void
421
_uri_encoder (GString      *out,
422
              const guchar *start,
423
              gsize         length,
424
              const gchar  *reserved_chars_allowed,
425
              gboolean      allow_utf8)
426
0
{
427
0
  static const gchar hex[] = "0123456789ABCDEF";
428
0
  const guchar *p = start;
429
0
  const guchar *end = p + length;
430
431
0
  while (p < end)
432
0
    {
433
0
      gunichar multibyte_utf8_char = 0;
434
435
0
      if (allow_utf8 && *p >= 0x80)
436
0
        multibyte_utf8_char = g_utf8_get_char_validated ((gchar *)p, end - p);
437
438
0
      if (multibyte_utf8_char > 0 &&
439
0
          multibyte_utf8_char != (gunichar) -1 && multibyte_utf8_char != (gunichar) -2)
440
0
        {
441
0
          gint len = g_utf8_skip [*p];
442
0
          g_string_append_len (out, (gchar *)p, len);
443
0
          p += len;
444
0
        }
445
0
      else if (is_valid (*p, reserved_chars_allowed))
446
0
        {
447
0
          g_string_append_c (out, *p);
448
0
          p++;
449
0
        }
450
0
      else
451
0
        {
452
0
          g_string_append_c (out, '%');
453
0
          g_string_append_c (out, hex[*p >> 4]);
454
0
          g_string_append_c (out, hex[*p & 0xf]);
455
0
          p++;
456
0
        }
457
0
    }
458
0
}
459
460
/* Parse the IP-literal construction from RFC 6874 (which extends RFC 3986 to
461
 * support IPv6 zone identifiers.
462
 *
463
 * Currently, IP versions beyond 6 (i.e. the IPvFuture rule) are unsupported.
464
 * There’s no point supporting them until (a) they exist and (b) the rest of the
465
 * stack (notably, sockets) supports them.
466
 *
467
 * Rules:
468
 *
469
 * IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture  ) "]"
470
 *
471
 * ZoneID = 1*( unreserved / pct-encoded )
472
 *
473
 * IPv6addrz = IPv6address "%25" ZoneID
474
 *
475
 * If %G_URI_FLAGS_PARSE_RELAXED is specified, this function also accepts:
476
 *
477
 * IPv6addrz = IPv6address "%" ZoneID
478
 */
479
static gboolean
480
parse_ip_literal (const gchar  *start,
481
                  gsize         length,
482
                  GUriFlags     flags,
483
                  gchar       **out,
484
                  GError      **error)
485
0
{
486
0
  gchar *pct, *zone_id = NULL;
487
0
  gchar *addr = NULL;
488
0
  gsize addr_length = 0;
489
0
  gsize zone_id_length = 0;
490
0
  gchar *decoded_zone_id = NULL;
491
492
0
  if (start[length - 1] != ']')
493
0
    goto bad_ipv6_literal;
494
495
  /* Drop the square brackets */
496
0
  addr = g_strndup (start + 1, length - 2);
497
0
  addr_length = length - 2;
498
499
  /* If there's an IPv6 scope ID, split out the zone. */
500
0
  pct = strchr (addr, '%');
501
0
  if (pct != NULL)
502
0
    {
503
0
      *pct = '\0';
504
505
0
      if (addr_length - (pct - addr) >= 4 &&
506
0
          *(pct + 1) == '2' && *(pct + 2) == '5')
507
0
        {
508
0
          zone_id = pct + 3;
509
0
          zone_id_length = addr_length - (zone_id - addr);
510
0
        }
511
0
      else if (flags & G_URI_FLAGS_PARSE_RELAXED &&
512
0
               addr_length - (pct - addr) >= 2)
513
0
        {
514
0
          zone_id = pct + 1;
515
0
          zone_id_length = addr_length - (zone_id - addr);
516
0
        }
517
0
      else
518
0
        goto bad_ipv6_literal;
519
520
0
      g_assert (zone_id_length >= 1);
521
0
    }
522
523
  /* addr must be an IPv6 address */
524
0
  if (!g_hostname_is_ip_address (addr) || !strchr (addr, ':'))
525
0
    goto bad_ipv6_literal;
526
527
  /* Zone ID must be valid. It can contain %-encoded characters. */
528
0
  if (zone_id != NULL &&
529
0
      !uri_decode (&decoded_zone_id, NULL, zone_id, zone_id_length, FALSE,
530
0
                   flags, G_URI_ERROR_BAD_HOST, NULL))
531
0
    goto bad_ipv6_literal;
532
533
  /* Success */
534
0
  if (out != NULL && decoded_zone_id != NULL)
535
0
    *out = g_strconcat (addr, "%", decoded_zone_id, NULL);
536
0
  else if (out != NULL)
537
0
    *out = g_steal_pointer (&addr);
538
539
0
  g_free (addr);
540
0
  g_free (decoded_zone_id);
541
542
0
  return TRUE;
543
544
0
bad_ipv6_literal:
545
0
  g_free (addr);
546
0
  g_free (decoded_zone_id);
547
0
  g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
548
0
               _("Invalid IPv6 address ‘%.*s’ in URI"),
549
0
               (gint)length, start);
550
551
0
  return FALSE;
552
0
}
553
554
static gboolean
555
parse_host (const gchar  *start,
556
            gsize         length,
557
            GUriFlags     flags,
558
            gchar       **out,
559
            GError      **error)
560
0
{
561
0
  gchar *decoded = NULL, *host;
562
0
  gchar *addr = NULL;
563
564
0
  if (*start == '[')
565
0
    {
566
0
      if (!parse_ip_literal (start, length, flags, &host, error))
567
0
        return FALSE;
568
0
      goto ok;
569
0
    }
570
571
0
  if (g_ascii_isdigit (*start))
572
0
    {
573
0
      addr = g_strndup (start, length);
574
0
      if (g_hostname_is_ip_address (addr))
575
0
        {
576
0
          host = addr;
577
0
          goto ok;
578
0
        }
579
0
      g_free (addr);
580
0
    }
581
582
0
  if (flags & G_URI_FLAGS_NON_DNS)
583
0
    {
584
0
      if (!uri_normalize (&decoded, start, length, flags,
585
0
                          G_URI_ERROR_BAD_HOST, error))
586
0
        return FALSE;
587
0
      host = g_steal_pointer (&decoded);
588
0
      goto ok;
589
0
    }
590
591
0
  flags &= ~G_URI_FLAGS_ENCODED;
592
0
  if (!uri_decode (&decoded, NULL, start, length, FALSE, flags,
593
0
                   G_URI_ERROR_BAD_HOST, error))
594
0
    return FALSE;
595
596
  /* You're not allowed to %-encode an IP address, so if it wasn't
597
   * one before, it better not be one now.
598
   */
599
0
  if (g_hostname_is_ip_address (decoded))
600
0
    {
601
0
      g_free (decoded);
602
0
      g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
603
0
                   _("Illegal encoded IP address ‘%.*s’ in URI"),
604
0
                   (gint)length, start);
605
0
      return FALSE;
606
0
    }
607
608
0
  if (g_hostname_is_non_ascii (decoded))
609
0
    {
610
0
      host = g_hostname_to_ascii (decoded);
611
0
      if (host == NULL)
612
0
        {
613
0
          g_free (decoded);
614
0
          g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
615
0
                       _("Illegal internationalized hostname ‘%.*s’ in URI"),
616
0
                       (gint) length, start);
617
0
          return FALSE;
618
0
        }
619
0
    }
620
0
  else
621
0
    {
622
0
      host = g_steal_pointer (&decoded);
623
0
    }
624
625
0
 ok:
626
0
  if (out)
627
0
    *out = g_steal_pointer (&host);
628
0
  g_free (host);
629
0
  g_free (decoded);
630
631
0
  return TRUE;
632
0
}
633
634
static gboolean
635
parse_port (const gchar  *start,
636
            gsize         length,
637
            gint         *out,
638
            GError      **error)
639
0
{
640
0
  gchar *end;
641
0
  gulong parsed_port;
642
643
  /* strtoul() allows leading + or -, so we have to check this first. */
644
0
  if (!g_ascii_isdigit (*start))
645
0
    {
646
0
      g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
647
0
                   _("Could not parse port ‘%.*s’ in URI"),
648
0
                   (gint)length, start);
649
0
      return FALSE;
650
0
    }
651
652
  /* We know that *(start + length) is either '\0' or a non-numeric
653
   * character, so strtoul() won't scan beyond it.
654
   */
655
0
  parsed_port = strtoul (start, &end, 10);
656
0
  if (end != start + length)
657
0
    {
658
0
      g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
659
0
                   _("Could not parse port ‘%.*s’ in URI"),
660
0
                   (gint)length, start);
661
0
      return FALSE;
662
0
    }
663
0
  else if (parsed_port > 65535)
664
0
    {
665
0
      g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
666
0
                   _("Port ‘%.*s’ in URI is out of range"),
667
0
                   (gint)length, start);
668
0
      return FALSE;
669
0
    }
670
671
0
  if (out)
672
0
    *out = parsed_port;
673
0
  return TRUE;
674
0
}
675
676
static gboolean
677
parse_userinfo (const gchar  *start,
678
                gsize         length,
679
                GUriFlags     flags,
680
                gchar       **user,
681
                gchar       **password,
682
                gchar       **auth_params,
683
                GError      **error)
684
0
{
685
0
  const gchar *user_end = NULL, *password_end = NULL, *auth_params_end;
686
687
0
  auth_params_end = start + length;
688
0
  if (flags & G_URI_FLAGS_HAS_AUTH_PARAMS)
689
0
    password_end = memchr (start, ';', auth_params_end - start);
690
0
  if (!password_end)
691
0
    password_end = auth_params_end;
692
0
  if (flags & G_URI_FLAGS_HAS_PASSWORD)
693
0
    user_end = memchr (start, ':', password_end - start);
694
0
  if (!user_end)
695
0
    user_end = password_end;
696
697
0
  if (!uri_normalize (user, start, user_end - start, flags,
698
0
                      G_URI_ERROR_BAD_USER, error))
699
0
    return FALSE;
700
701
0
  if (*user_end == ':')
702
0
    {
703
0
      start = user_end + 1;
704
0
      if (!uri_normalize (password, start, password_end - start, flags,
705
0
                          G_URI_ERROR_BAD_PASSWORD, error))
706
0
        {
707
0
          if (user)
708
0
            g_clear_pointer (user, g_free);
709
0
          return FALSE;
710
0
        }
711
0
    }
712
0
  else if (password)
713
0
    *password = NULL;
714
715
0
  if (*password_end == ';')
716
0
    {
717
0
      start = password_end + 1;
718
0
      if (!uri_normalize (auth_params, start, auth_params_end - start, flags,
719
0
                          G_URI_ERROR_BAD_AUTH_PARAMS, error))
720
0
        {
721
0
          if (user)
722
0
            g_clear_pointer (user, g_free);
723
0
          if (password)
724
0
            g_clear_pointer (password, g_free);
725
0
          return FALSE;
726
0
        }
727
0
    }
728
0
  else if (auth_params)
729
0
    *auth_params = NULL;
730
731
0
  return TRUE;
732
0
}
733
734
static gchar *
735
uri_cleanup (const gchar *uri_string)
736
0
{
737
0
  GString *copy;
738
0
  const gchar *end;
739
740
  /* Skip leading whitespace */
741
0
  while (g_ascii_isspace (*uri_string))
742
0
    uri_string++;
743
744
  /* Ignore trailing whitespace */
745
0
  end = uri_string + strlen (uri_string);
746
0
  while (end > uri_string && g_ascii_isspace (*(end - 1)))
747
0
    end--;
748
749
  /* Copy the rest, encoding unencoded spaces and stripping other whitespace */
750
0
  copy = g_string_sized_new (end - uri_string);
751
0
  while (uri_string < end)
752
0
    {
753
0
      if (*uri_string == ' ')
754
0
        g_string_append (copy, "%20");
755
0
      else if (g_ascii_isspace (*uri_string))
756
0
        ;
757
0
      else
758
0
        g_string_append_c (copy, *uri_string);
759
0
      uri_string++;
760
0
    }
761
762
0
  return g_string_free (copy, FALSE);
763
0
}
764
765
static gboolean
766
should_normalize_empty_path (const char *scheme)
767
0
{
768
0
  const char * const schemes[] = { "https", "http", "wss", "ws" };
769
0
  gsize i;
770
0
  for (i = 0; i < G_N_ELEMENTS (schemes); ++i)
771
0
    {
772
0
      if (!strcmp (schemes[i], scheme))
773
0
        return TRUE;
774
0
    }
775
0
  return FALSE;
776
0
}
777
778
static int
779
normalize_port (const char *scheme,
780
                int         port)
781
0
{
782
0
  const char *default_schemes[3] = { NULL };
783
0
  int i;
784
785
0
  switch (port)
786
0
    {
787
0
    case 21:
788
0
      default_schemes[0] = "ftp";
789
0
      break;
790
0
    case 80:
791
0
      default_schemes[0] = "http";
792
0
      default_schemes[1] = "ws";
793
0
      break;
794
0
    case 443:
795
0
      default_schemes[0] = "https";
796
0
      default_schemes[1] = "wss";
797
0
      break;
798
0
    default:
799
0
      break;
800
0
    }
801
802
0
  for (i = 0; default_schemes[i]; ++i)
803
0
    {
804
0
      if (!strcmp (scheme, default_schemes[i]))
805
0
        return -1;
806
0
    }
807
808
0
  return port;
809
0
}
810
811
static int
812
default_scheme_port (const char *scheme)
813
0
{
814
0
  if (strcmp (scheme, "http") == 0 || strcmp (scheme, "ws") == 0)
815
0
    return 80;
816
817
0
  if (strcmp (scheme, "https") == 0 || strcmp (scheme, "wss") == 0)
818
0
    return 443;
819
820
0
  if (strcmp (scheme, "ftp") == 0)
821
0
    return 21;
822
823
0
  return -1;
824
0
}
825
826
static gboolean
827
g_uri_split_internal (const gchar  *uri_string,
828
                      GUriFlags     flags,
829
                      gchar       **scheme,
830
                      gchar       **userinfo,
831
                      gchar       **user,
832
                      gchar       **password,
833
                      gchar       **auth_params,
834
                      gchar       **host,
835
                      gint         *port,
836
                      gchar       **path,
837
                      gchar       **query,
838
                      gchar       **fragment,
839
                      GError      **error)
840
0
{
841
0
  const gchar *end, *colon, *at, *path_start, *semi, *question;
842
0
  const gchar *p, *bracket, *hostend;
843
0
  gchar *cleaned_uri_string = NULL;
844
0
  gchar *normalized_scheme = NULL;
845
846
0
  if (scheme)
847
0
    *scheme = NULL;
848
0
  if (userinfo)
849
0
    *userinfo = NULL;
850
0
  if (user)
851
0
    *user = NULL;
852
0
  if (password)
853
0
    *password = NULL;
854
0
  if (auth_params)
855
0
    *auth_params = NULL;
856
0
  if (host)
857
0
    *host = NULL;
858
0
  if (port)
859
0
    *port = -1;
860
0
  if (path)
861
0
    *path = NULL;
862
0
  if (query)
863
0
    *query = NULL;
864
0
  if (fragment)
865
0
    *fragment = NULL;
866
867
0
  if ((flags & G_URI_FLAGS_PARSE_RELAXED) && strpbrk (uri_string, " \t\n\r"))
868
0
    {
869
0
      cleaned_uri_string = uri_cleanup (uri_string);
870
0
      uri_string = cleaned_uri_string;
871
0
    }
872
873
  /* Find scheme */
874
0
  p = uri_string;
875
0
  while (*p && (g_ascii_isalpha (*p) ||
876
0
               (p > uri_string && (g_ascii_isdigit (*p) ||
877
0
                                   *p == '.' || *p == '+' || *p == '-'))))
878
0
    p++;
879
880
0
  if (p > uri_string && *p == ':')
881
0
    {
882
0
      normalized_scheme = g_ascii_strdown (uri_string, p - uri_string);
883
0
      if (scheme)
884
0
        *scheme = g_steal_pointer (&normalized_scheme);
885
0
      p++;
886
0
    }
887
0
  else
888
0
    {
889
0
      if (scheme)
890
0
        *scheme = NULL;
891
0
      p = uri_string;
892
0
    }
893
894
  /* Check for authority */
895
0
  if (strncmp (p, "//", 2) == 0)
896
0
    {
897
0
      p += 2;
898
899
0
      path_start = p + strcspn (p, "/?#");
900
0
      at = memchr (p, '@', path_start - p);
901
0
      if (at)
902
0
        {
903
0
          if (flags & G_URI_FLAGS_PARSE_RELAXED)
904
0
            {
905
0
              gchar *next_at;
906
907
              /* Any "@"s in the userinfo must be %-encoded, but
908
               * people get this wrong sometimes. Since "@"s in the
909
               * hostname are unlikely (and also wrong anyway), assume
910
               * that if there are extra "@"s, they belong in the
911
               * userinfo.
912
               */
913
0
              do
914
0
                {
915
0
                  next_at = memchr (at + 1, '@', path_start - (at + 1));
916
0
                  if (next_at)
917
0
                    at = next_at;
918
0
                }
919
0
              while (next_at);
920
0
            }
921
922
0
          if (user || password || auth_params ||
923
0
              (flags & (G_URI_FLAGS_HAS_PASSWORD|G_URI_FLAGS_HAS_AUTH_PARAMS)))
924
0
            {
925
0
              if (!parse_userinfo (p, at - p, flags,
926
0
                                   user, password, auth_params,
927
0
                                   error))
928
0
                goto fail;
929
0
            }
930
931
0
          if (!uri_normalize (userinfo, p, at - p, flags,
932
0
                              G_URI_ERROR_BAD_USER, error))
933
0
            goto fail;
934
935
0
          p = at + 1;
936
0
        }
937
938
0
      if (flags & G_URI_FLAGS_PARSE_RELAXED)
939
0
        {
940
0
          semi = strchr (p, ';');
941
0
          if (semi && semi < path_start)
942
0
            {
943
              /* Technically, semicolons are allowed in the "host"
944
               * production, but no one ever does this, and some
945
               * schemes mistakenly use semicolon as a delimiter
946
               * marking the start of the path. We have to check this
947
               * after checking for userinfo though, because a
948
               * semicolon before the "@" must be part of the
949
               * userinfo.
950
               */
951
0
              path_start = semi;
952
0
            }
953
0
        }
954
955
      /* Find host and port. The host may be a bracket-delimited IPv6
956
       * address, in which case the colon delimiting the port must come
957
       * (immediately) after the close bracket.
958
       */
959
0
      if (*p == '[')
960
0
        {
961
0
          bracket = memchr (p, ']', path_start - p);
962
0
          if (bracket && *(bracket + 1) == ':')
963
0
            colon = bracket + 1;
964
0
          else
965
0
            colon = NULL;
966
0
        }
967
0
      else
968
0
        colon = memchr (p, ':', path_start - p);
969
970
0
      hostend = colon ? colon : path_start;
971
0
      if (!parse_host (p, hostend - p, flags, host, error))
972
0
        goto fail;
973
974
0
      if (colon && colon != path_start - 1)
975
0
        {
976
0
          p = colon + 1;
977
0
          if (!parse_port (p, path_start - p, port, error))
978
0
            goto fail;
979
0
        }
980
981
0
      p = path_start;
982
0
    }
983
984
  /* Find fragment. */
985
0
  end = p + strcspn (p, "#");
986
0
  if (*end == '#')
987
0
    {
988
0
      if (!uri_normalize (fragment, end + 1, strlen (end + 1),
989
0
                          flags | (flags & G_URI_FLAGS_ENCODED_FRAGMENT ? G_URI_FLAGS_ENCODED : 0),
990
0
                          G_URI_ERROR_BAD_FRAGMENT, error))
991
0
        goto fail;
992
0
    }
993
994
  /* Find query */
995
0
  question = memchr (p, '?', end - p);
996
0
  if (question)
997
0
    {
998
0
      if (!uri_normalize (query, question + 1, end - (question + 1),
999
0
                          flags | (flags & G_URI_FLAGS_ENCODED_QUERY ? G_URI_FLAGS_ENCODED : 0),
1000
0
                          G_URI_ERROR_BAD_QUERY, error))
1001
0
        goto fail;
1002
0
      end = question;
1003
0
    }
1004
1005
0
  if (!uri_normalize (path, p, end - p,
1006
0
                      flags | (flags & G_URI_FLAGS_ENCODED_PATH ? G_URI_FLAGS_ENCODED : 0),
1007
0
                      G_URI_ERROR_BAD_PATH, error))
1008
0
    goto fail;
1009
1010
  /* Scheme-based normalization */
1011
0
  if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && ((scheme && *scheme) || normalized_scheme))
1012
0
    {
1013
0
      const char *scheme_str = scheme && *scheme ? *scheme : normalized_scheme;
1014
1015
0
      if (should_normalize_empty_path (scheme_str) && path && !**path)
1016
0
        {
1017
0
          g_free (*path);
1018
0
          *path = g_strdup ("/");
1019
0
        }
1020
1021
0
      if (port && *port == -1)
1022
0
        *port = default_scheme_port (scheme_str);
1023
0
    }
1024
1025
0
  g_free (normalized_scheme);
1026
0
  g_free (cleaned_uri_string);
1027
0
  return TRUE;
1028
1029
0
 fail:
1030
0
  if (scheme)
1031
0
    g_clear_pointer (scheme, g_free);
1032
0
  if (userinfo)
1033
0
    g_clear_pointer (userinfo, g_free);
1034
0
  if (host)
1035
0
    g_clear_pointer (host, g_free);
1036
0
  if (port)
1037
0
    *port = -1;
1038
0
  if (path)
1039
0
    g_clear_pointer (path, g_free);
1040
0
  if (query)
1041
0
    g_clear_pointer (query, g_free);
1042
0
  if (fragment)
1043
0
    g_clear_pointer (fragment, g_free);
1044
1045
0
  g_free (normalized_scheme);
1046
0
  g_free (cleaned_uri_string);
1047
0
  return FALSE;
1048
0
}
1049
1050
/**
1051
 * g_uri_split:
1052
 * @uri_ref: a string containing a relative or absolute URI
1053
 * @flags: flags for parsing @uri_ref
1054
 * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1055
 *    the scheme (converted to lowercase), or %NULL
1056
 * @userinfo: (out) (nullable) (optional) (transfer full): on return, contains
1057
 *    the userinfo, or %NULL
1058
 * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1059
 *    host, or %NULL
1060
 * @port: (out) (optional) (transfer full): on return, contains the
1061
 *    port, or `-1`
1062
 * @path: (out) (not nullable) (optional) (transfer full): on return, contains the
1063
 *    path
1064
 * @query: (out) (nullable) (optional) (transfer full): on return, contains the
1065
 *    query, or %NULL
1066
 * @fragment: (out) (nullable) (optional) (transfer full): on return, contains
1067
 *    the fragment, or %NULL
1068
 * @error: #GError for error reporting, or %NULL to ignore.
1069
 *
1070
 * Parses @uri_ref (which can be an
1071
 * [absolute or relative URI][relative-absolute-uris]) according to @flags, and
1072
 * returns the pieces. Any component that doesn't appear in @uri_ref will be
1073
 * returned as %NULL (but note that all URIs always have a path component,
1074
 * though it may be the empty string).
1075
 *
1076
 * If @flags contains %G_URI_FLAGS_ENCODED, then `%`-encoded characters in
1077
 * @uri_ref will remain encoded in the output strings. (If not,
1078
 * then all such characters will be decoded.) Note that decoding will
1079
 * only work if the URI components are ASCII or UTF-8, so you will
1080
 * need to use %G_URI_FLAGS_ENCODED if they are not.
1081
 *
1082
 * Note that the %G_URI_FLAGS_HAS_PASSWORD and
1083
 * %G_URI_FLAGS_HAS_AUTH_PARAMS @flags are ignored by g_uri_split(),
1084
 * since it always returns only the full userinfo; use
1085
 * g_uri_split_with_user() if you want it split up.
1086
 *
1087
 * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE
1088
 *   on error.
1089
 *
1090
 * Since: 2.66
1091
 */
1092
gboolean
1093
g_uri_split (const gchar  *uri_ref,
1094
             GUriFlags     flags,
1095
             gchar       **scheme,
1096
             gchar       **userinfo,
1097
             gchar       **host,
1098
             gint         *port,
1099
             gchar       **path,
1100
             gchar       **query,
1101
             gchar       **fragment,
1102
             GError      **error)
1103
0
{
1104
0
  g_return_val_if_fail (uri_ref != NULL, FALSE);
1105
0
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1106
1107
0
  return g_uri_split_internal (uri_ref, flags,
1108
0
                               scheme, userinfo, NULL, NULL, NULL,
1109
0
                               host, port, path, query, fragment,
1110
0
                               error);
1111
0
}
1112
1113
/**
1114
 * g_uri_split_with_user:
1115
 * @uri_ref: a string containing a relative or absolute URI
1116
 * @flags: flags for parsing @uri_ref
1117
 * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1118
 *    the scheme (converted to lowercase), or %NULL
1119
 * @user: (out) (nullable) (optional) (transfer full): on return, contains
1120
 *    the user, or %NULL
1121
 * @password: (out) (nullable) (optional) (transfer full): on return, contains
1122
 *    the password, or %NULL
1123
 * @auth_params: (out) (nullable) (optional) (transfer full): on return, contains
1124
 *    the auth_params, or %NULL
1125
 * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1126
 *    host, or %NULL
1127
 * @port: (out) (optional) (transfer full): on return, contains the
1128
 *    port, or `-1`
1129
 * @path: (out) (not nullable) (optional) (transfer full): on return, contains the
1130
 *    path
1131
 * @query: (out) (nullable) (optional) (transfer full): on return, contains the
1132
 *    query, or %NULL
1133
 * @fragment: (out) (nullable) (optional) (transfer full): on return, contains
1134
 *    the fragment, or %NULL
1135
 * @error: #GError for error reporting, or %NULL to ignore.
1136
 *
1137
 * Parses @uri_ref (which can be an
1138
 * [absolute or relative URI][relative-absolute-uris]) according to @flags, and
1139
 * returns the pieces. Any component that doesn't appear in @uri_ref will be
1140
 * returned as %NULL (but note that all URIs always have a path component,
1141
 * though it may be the empty string).
1142
 *
1143
 * See g_uri_split(), and the definition of #GUriFlags, for more
1144
 * information on the effect of @flags. Note that @password will only
1145
 * be parsed out if @flags contains %G_URI_FLAGS_HAS_PASSWORD, and
1146
 * @auth_params will only be parsed out if @flags contains
1147
 * %G_URI_FLAGS_HAS_AUTH_PARAMS.
1148
 *
1149
 * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE
1150
 *   on error.
1151
 *
1152
 * Since: 2.66
1153
 */
1154
gboolean
1155
g_uri_split_with_user (const gchar  *uri_ref,
1156
                       GUriFlags     flags,
1157
                       gchar       **scheme,
1158
                       gchar       **user,
1159
                       gchar       **password,
1160
                       gchar       **auth_params,
1161
                       gchar       **host,
1162
                       gint         *port,
1163
                       gchar       **path,
1164
                       gchar       **query,
1165
                       gchar       **fragment,
1166
                       GError      **error)
1167
0
{
1168
0
  g_return_val_if_fail (uri_ref != NULL, FALSE);
1169
0
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1170
1171
0
  return g_uri_split_internal (uri_ref, flags,
1172
0
                               scheme, NULL, user, password, auth_params,
1173
0
                               host, port, path, query, fragment,
1174
0
                               error);
1175
0
}
1176
1177
1178
/**
1179
 * g_uri_split_network:
1180
 * @uri_string: a string containing an absolute URI
1181
 * @flags: flags for parsing @uri_string
1182
 * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1183
 *    the scheme (converted to lowercase), or %NULL
1184
 * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1185
 *    host, or %NULL
1186
 * @port: (out) (optional) (transfer full): on return, contains the
1187
 *    port, or `-1`
1188
 * @error: #GError for error reporting, or %NULL to ignore.
1189
 *
1190
 * Parses @uri_string (which must be an [absolute URI][relative-absolute-uris])
1191
 * according to @flags, and returns the pieces relevant to connecting to a host.
1192
 * See the documentation for g_uri_split() for more details; this is
1193
 * mostly a wrapper around that function with simpler arguments.
1194
 * However, it will return an error if @uri_string is a relative URI,
1195
 * or does not contain a hostname component.
1196
 *
1197
 * Returns: (skip): %TRUE if @uri_string parsed successfully,
1198
 *   %FALSE on error.
1199
 *
1200
 * Since: 2.66
1201
 */
1202
gboolean
1203
g_uri_split_network (const gchar  *uri_string,
1204
                     GUriFlags     flags,
1205
                     gchar       **scheme,
1206
                     gchar       **host,
1207
                     gint         *port,
1208
                     GError      **error)
1209
0
{
1210
0
  gchar *my_scheme = NULL, *my_host = NULL;
1211
1212
0
  g_return_val_if_fail (uri_string != NULL, FALSE);
1213
0
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1214
1215
0
  if (!g_uri_split_internal (uri_string, flags,
1216
0
                             &my_scheme, NULL, NULL, NULL, NULL,
1217
0
                             &my_host, port, NULL, NULL, NULL,
1218
0
                             error))
1219
0
    return FALSE;
1220
1221
0
  if (!my_scheme || !my_host)
1222
0
    {
1223
0
      if (!my_scheme)
1224
0
        {
1225
0
          g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
1226
0
                       _("URI ‘%s’ is not an absolute URI"),
1227
0
                       uri_string);
1228
0
        }
1229
0
      else
1230
0
        {
1231
0
          g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
1232
0
                       _("URI ‘%s’ has no host component"),
1233
0
                       uri_string);
1234
0
        }
1235
0
      g_free (my_scheme);
1236
0
      g_free (my_host);
1237
1238
0
      return FALSE;
1239
0
    }
1240
1241
0
  if (scheme)
1242
0
    *scheme = g_steal_pointer (&my_scheme);
1243
0
  if (host)
1244
0
    *host = g_steal_pointer (&my_host);
1245
1246
0
  g_free (my_scheme);
1247
0
  g_free (my_host);
1248
1249
0
  return TRUE;
1250
0
}
1251
1252
/**
1253
 * g_uri_is_valid:
1254
 * @uri_string: a string containing an absolute URI
1255
 * @flags: flags for parsing @uri_string
1256
 * @error: #GError for error reporting, or %NULL to ignore.
1257
 *
1258
 * Parses @uri_string according to @flags, to determine whether it is a valid
1259
 * [absolute URI][relative-absolute-uris], i.e. it does not need to be resolved
1260
 * relative to another URI using g_uri_parse_relative().
1261
 *
1262
 * If it’s not a valid URI, an error is returned explaining how it’s invalid.
1263
 *
1264
 * See g_uri_split(), and the definition of #GUriFlags, for more
1265
 * information on the effect of @flags.
1266
 *
1267
 * Returns: %TRUE if @uri_string is a valid absolute URI, %FALSE on error.
1268
 *
1269
 * Since: 2.66
1270
 */
1271
gboolean
1272
g_uri_is_valid (const gchar  *uri_string,
1273
                GUriFlags     flags,
1274
                GError      **error)
1275
0
{
1276
0
  gchar *my_scheme = NULL;
1277
1278
0
  g_return_val_if_fail (uri_string != NULL, FALSE);
1279
0
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1280
1281
0
  if (!g_uri_split_internal (uri_string, flags,
1282
0
                             &my_scheme, NULL, NULL, NULL, NULL,
1283
0
                             NULL, NULL, NULL, NULL, NULL,
1284
0
                             error))
1285
0
    return FALSE;
1286
1287
0
  if (!my_scheme)
1288
0
    {
1289
0
      g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
1290
0
                   _("URI ‘%s’ is not an absolute URI"),
1291
0
                   uri_string);
1292
0
      return FALSE;
1293
0
    }
1294
1295
0
  g_free (my_scheme);
1296
1297
0
  return TRUE;
1298
0
}
1299
1300
1301
/* Implements the "Remove Dot Segments" algorithm from section 5.2.4 of
1302
 * RFC 3986.
1303
 *
1304
 * See https://tools.ietf.org/html/rfc3986#section-5.2.4
1305
 */
1306
static void
1307
remove_dot_segments (gchar *path)
1308
0
{
1309
  /* The output can be written to the same buffer that the input
1310
   * is read from, as the output pointer is only ever increased
1311
   * when the input pointer is increased as well, and the input
1312
   * pointer is never decreased. */
1313
0
  gchar *input = path;
1314
0
  gchar *output = path;
1315
1316
0
  if (!*path)
1317
0
    return;
1318
1319
0
  while (*input)
1320
0
    {
1321
      /*  A.  If the input buffer begins with a prefix of "../" or "./",
1322
       *      then remove that prefix from the input buffer; otherwise,
1323
       */
1324
0
      if (strncmp (input, "../", 3) == 0)
1325
0
        input += 3;
1326
0
      else if (strncmp (input, "./", 2) == 0)
1327
0
        input += 2;
1328
1329
      /*  B.  if the input buffer begins with a prefix of "/./" or "/.",
1330
       *      where "." is a complete path segment, then replace that
1331
       *      prefix with "/" in the input buffer; otherwise,
1332
       */
1333
0
      else if (strncmp (input, "/./", 3) == 0)
1334
0
        input += 2;
1335
0
      else if (strcmp (input, "/.") == 0)
1336
0
        input[1] = '\0';
1337
1338
      /*  C.  if the input buffer begins with a prefix of "/../" or "/..",
1339
       *      where ".." is a complete path segment, then replace that
1340
       *      prefix with "/" in the input buffer and remove the last
1341
       *      segment and its preceding "/" (if any) from the output
1342
       *      buffer; otherwise,
1343
       */
1344
0
      else if (strncmp (input, "/../", 4) == 0)
1345
0
        {
1346
0
          input += 3;
1347
0
          if (output > path)
1348
0
            {
1349
0
              do
1350
0
                {
1351
0
                  output--;
1352
0
                }
1353
0
              while (*output != '/' && output > path);
1354
0
            }
1355
0
        }
1356
0
      else if (strcmp (input, "/..") == 0)
1357
0
        {
1358
0
          input[1] = '\0';
1359
0
          if (output > path)
1360
0
            {
1361
0
              do
1362
0
                 {
1363
0
                   output--;
1364
0
                 }
1365
0
              while (*output != '/' && output > path);
1366
0
            }
1367
0
        }
1368
1369
      /*  D.  if the input buffer consists only of "." or "..", then remove
1370
       *      that from the input buffer; otherwise,
1371
       */
1372
0
      else if (strcmp (input, "..") == 0 || strcmp (input, ".") == 0)
1373
0
        input[0] = '\0';
1374
1375
      /*  E.  move the first path segment in the input buffer to the end of
1376
       *      the output buffer, including the initial "/" character (if
1377
       *      any) and any subsequent characters up to, but not including,
1378
       *      the next "/" character or the end of the input buffer.
1379
       */
1380
0
      else
1381
0
        {
1382
0
          *output++ = *input++;
1383
0
          while (*input && *input != '/')
1384
0
            *output++ = *input++;
1385
0
        }
1386
0
    }
1387
0
  *output = '\0';
1388
0
}
1389
1390
/**
1391
 * g_uri_parse:
1392
 * @uri_string: a string representing an absolute URI
1393
 * @flags: flags describing how to parse @uri_string
1394
 * @error: #GError for error reporting, or %NULL to ignore.
1395
 *
1396
 * Parses @uri_string according to @flags. If the result is not a
1397
 * valid [absolute URI][relative-absolute-uris], it will be discarded, and an
1398
 * error returned.
1399
 *
1400
 * Return value: (transfer full): a new #GUri, or NULL on error.
1401
 *
1402
 * Since: 2.66
1403
 */
1404
GUri *
1405
g_uri_parse (const gchar  *uri_string,
1406
             GUriFlags     flags,
1407
             GError      **error)
1408
0
{
1409
0
  g_return_val_if_fail (uri_string != NULL, NULL);
1410
0
  g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1411
1412
0
  return g_uri_parse_relative (NULL, uri_string, flags, error);
1413
0
}
1414
1415
/**
1416
 * g_uri_parse_relative:
1417
 * @base_uri: (nullable) (transfer none): a base absolute URI
1418
 * @uri_ref: a string representing a relative or absolute URI
1419
 * @flags: flags describing how to parse @uri_ref
1420
 * @error: #GError for error reporting, or %NULL to ignore.
1421
 *
1422
 * Parses @uri_ref according to @flags and, if it is a
1423
 * [relative URI][relative-absolute-uris], resolves it relative to @base_uri.
1424
 * If the result is not a valid absolute URI, it will be discarded, and an error
1425
 * returned.
1426
 *
1427
 * Return value: (transfer full): a new #GUri, or NULL on error.
1428
 *
1429
 * Since: 2.66
1430
 */
1431
GUri *
1432
g_uri_parse_relative (GUri         *base_uri,
1433
                      const gchar  *uri_ref,
1434
                      GUriFlags     flags,
1435
                      GError      **error)
1436
0
{
1437
0
  GUri *uri = NULL;
1438
1439
0
  g_return_val_if_fail (uri_ref != NULL, NULL);
1440
0
  g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1441
0
  g_return_val_if_fail (base_uri == NULL || base_uri->scheme != NULL, NULL);
1442
1443
  /* Use GUri struct to construct the return value: there is no guarantee it is
1444
   * actually correct within the function body. */
1445
0
  uri = g_atomic_rc_box_new0 (GUri);
1446
0
  uri->flags = flags;
1447
1448
0
  if (!g_uri_split_internal (uri_ref, flags,
1449
0
                             &uri->scheme, &uri->userinfo,
1450
0
                             &uri->user, &uri->password, &uri->auth_params,
1451
0
                             &uri->host, &uri->port,
1452
0
                             &uri->path, &uri->query, &uri->fragment,
1453
0
                             error))
1454
0
    {
1455
0
      g_uri_unref (uri);
1456
0
      return NULL;
1457
0
    }
1458
1459
0
  if (!uri->scheme && !base_uri)
1460
0
    {
1461
0
      g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_FAILED,
1462
0
                           _("URI is not absolute, and no base URI was provided"));
1463
0
      g_uri_unref (uri);
1464
0
      return NULL;
1465
0
    }
1466
1467
0
  if (base_uri)
1468
0
    {
1469
      /* This is section 5.2.2 of RFC 3986, except that we're doing
1470
       * it in place in @uri rather than copying from R to T.
1471
       *
1472
       * See https://tools.ietf.org/html/rfc3986#section-5.2.2
1473
       */
1474
0
      if (uri->scheme)
1475
0
        remove_dot_segments (uri->path);
1476
0
      else
1477
0
        {
1478
0
          uri->scheme = g_strdup (base_uri->scheme);
1479
0
          if (uri->host)
1480
0
            remove_dot_segments (uri->path);
1481
0
          else
1482
0
            {
1483
0
              if (!*uri->path)
1484
0
                {
1485
0
                  g_free (uri->path);
1486
0
                  uri->path = g_strdup (base_uri->path);
1487
0
                  if (!uri->query)
1488
0
                    uri->query = g_strdup (base_uri->query);
1489
0
                }
1490
0
              else
1491
0
                {
1492
0
                  if (*uri->path == '/')
1493
0
                    remove_dot_segments (uri->path);
1494
0
                  else
1495
0
                    {
1496
0
                      gchar *newpath, *last;
1497
1498
0
                      last = strrchr (base_uri->path, '/');
1499
0
                      if (last)
1500
0
                        {
1501
0
                          newpath = g_strdup_printf ("%.*s/%s",
1502
0
                                                     (gint)(last - base_uri->path),
1503
0
                                                     base_uri->path,
1504
0
                                                     uri->path);
1505
0
                        }
1506
0
                      else
1507
0
                        newpath = g_strdup_printf ("/%s", uri->path);
1508
1509
0
                      g_free (uri->path);
1510
0
                      uri->path = g_steal_pointer (&newpath);
1511
1512
0
                      remove_dot_segments (uri->path);
1513
0
                    }
1514
0
                }
1515
1516
0
              uri->userinfo = g_strdup (base_uri->userinfo);
1517
0
              uri->user = g_strdup (base_uri->user);
1518
0
              uri->password = g_strdup (base_uri->password);
1519
0
              uri->auth_params = g_strdup (base_uri->auth_params);
1520
0
              uri->host = g_strdup (base_uri->host);
1521
0
              uri->port = base_uri->port;
1522
0
            }
1523
0
        }
1524
1525
      /* Scheme normalization couldn't have been done earlier
1526
       * as the relative URI may not have had a scheme */
1527
0
      if (flags & G_URI_FLAGS_SCHEME_NORMALIZE)
1528
0
        {
1529
0
          if (should_normalize_empty_path (uri->scheme) && !*uri->path)
1530
0
            {
1531
0
              g_free (uri->path);
1532
0
              uri->path = g_strdup ("/");
1533
0
            }
1534
1535
0
          uri->port = normalize_port (uri->scheme, uri->port);
1536
0
        }
1537
0
    }
1538
0
  else
1539
0
    {
1540
0
      remove_dot_segments (uri->path);
1541
0
    }
1542
1543
0
  return g_steal_pointer (&uri);
1544
0
}
1545
1546
/**
1547
 * g_uri_resolve_relative:
1548
 * @base_uri_string: (nullable): a string representing a base URI
1549
 * @uri_ref: a string representing a relative or absolute URI
1550
 * @flags: flags describing how to parse @uri_ref
1551
 * @error: #GError for error reporting, or %NULL to ignore.
1552
 *
1553
 * Parses @uri_ref according to @flags and, if it is a
1554
 * [relative URI][relative-absolute-uris], resolves it relative to
1555
 * @base_uri_string. If the result is not a valid absolute URI, it will be
1556
 * discarded, and an error returned.
1557
 *
1558
 * (If @base_uri_string is %NULL, this just returns @uri_ref, or
1559
 * %NULL if @uri_ref is invalid or not absolute.)
1560
 *
1561
 * Return value: (transfer full): the resolved URI string,
1562
 * or NULL on error.
1563
 *
1564
 * Since: 2.66
1565
 */
1566
gchar *
1567
g_uri_resolve_relative (const gchar  *base_uri_string,
1568
                        const gchar  *uri_ref,
1569
                        GUriFlags     flags,
1570
                        GError      **error)
1571
0
{
1572
0
  GUri *base_uri, *resolved_uri;
1573
0
  gchar *resolved_uri_string;
1574
1575
0
  g_return_val_if_fail (uri_ref != NULL, NULL);
1576
0
  g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1577
1578
0
  flags |= G_URI_FLAGS_ENCODED;
1579
1580
0
  if (base_uri_string)
1581
0
    {
1582
0
      base_uri = g_uri_parse (base_uri_string, flags, error);
1583
0
      if (!base_uri)
1584
0
        return NULL;
1585
0
    }
1586
0
  else
1587
0
    base_uri = NULL;
1588
1589
0
  resolved_uri = g_uri_parse_relative (base_uri, uri_ref, flags, error);
1590
0
  if (base_uri)
1591
0
    g_uri_unref (base_uri);
1592
0
  if (!resolved_uri)
1593
0
    return NULL;
1594
1595
0
  resolved_uri_string = g_uri_to_string (resolved_uri);
1596
0
  g_uri_unref (resolved_uri);
1597
0
  return g_steal_pointer (&resolved_uri_string);
1598
0
}
1599
1600
/* userinfo as a whole can contain sub-delims + ":", but split-out
1601
 * user can't contain ":" or ";", and split-out password can't contain
1602
 * ";".
1603
 */
1604
0
#define USERINFO_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO
1605
0
#define USER_ALLOWED_CHARS "!$&'()*+,="
1606
0
#define PASSWORD_ALLOWED_CHARS "!$&'()*+,=:"
1607
0
#define AUTH_PARAMS_ALLOWED_CHARS USERINFO_ALLOWED_CHARS
1608
0
#define IP_ADDR_ALLOWED_CHARS ":"
1609
0
#define HOST_ALLOWED_CHARS G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS
1610
0
#define PATH_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH
1611
0
#define QUERY_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
1612
0
#define FRAGMENT_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
1613
1614
static gchar *
1615
g_uri_join_internal (GUriFlags    flags,
1616
                     const gchar *scheme,
1617
                     gboolean     userinfo,
1618
                     const gchar *user,
1619
                     const gchar *password,
1620
                     const gchar *auth_params,
1621
                     const gchar *host,
1622
                     gint         port,
1623
                     const gchar *path,
1624
                     const gchar *query,
1625
                     const gchar *fragment)
1626
0
{
1627
0
  gboolean encoded = (flags & G_URI_FLAGS_ENCODED);
1628
0
  GString *str;
1629
0
  char *normalized_scheme = NULL;
1630
1631
  /* Restrictions on path prefixes. See:
1632
   * https://tools.ietf.org/html/rfc3986#section-3
1633
   */
1634
0
  g_return_val_if_fail (path != NULL, NULL);
1635
0
  g_return_val_if_fail (host == NULL || (path[0] == '\0' || path[0] == '/'), NULL);
1636
0
  g_return_val_if_fail (host != NULL || (path[0] != '/' || path[1] != '/'), NULL);
1637
1638
  /* Arbitrarily chosen default size which should handle most average length
1639
   * URIs. This should avoid a few reallocations of the buffer in most cases.
1640
   * It’s 1B shorter than a power of two, since GString will add a
1641
   * nul-terminator byte. */
1642
0
  str = g_string_sized_new (127);
1643
1644
0
  if (scheme)
1645
0
    {
1646
0
      g_string_append (str, scheme);
1647
0
      g_string_append_c (str, ':');
1648
0
    }
1649
1650
0
  if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && scheme && ((host && port != -1) || path[0] == '\0'))
1651
0
    normalized_scheme = g_ascii_strdown (scheme, -1);
1652
1653
0
  if (host)
1654
0
    {
1655
0
      g_string_append (str, "//");
1656
1657
0
      if (user)
1658
0
        {
1659
0
          if (encoded)
1660
0
            g_string_append (str, user);
1661
0
          else
1662
0
            {
1663
0
              if (userinfo)
1664
0
                g_string_append_uri_escaped (str, user, USERINFO_ALLOWED_CHARS, TRUE);
1665
0
              else
1666
                /* Encode ':' and ';' regardless of whether we have a
1667
                 * password or auth params, since it may be parsed later
1668
                 * under the assumption that it does.
1669
                 */
1670
0
                g_string_append_uri_escaped (str, user, USER_ALLOWED_CHARS, TRUE);
1671
0
            }
1672
1673
0
          if (password)
1674
0
            {
1675
0
              g_string_append_c (str, ':');
1676
0
              if (encoded)
1677
0
                g_string_append (str, password);
1678
0
              else
1679
0
                g_string_append_uri_escaped (str, password,
1680
0
                                             PASSWORD_ALLOWED_CHARS, TRUE);
1681
0
            }
1682
1683
0
          if (auth_params)
1684
0
            {
1685
0
              g_string_append_c (str, ';');
1686
0
              if (encoded)
1687
0
                g_string_append (str, auth_params);
1688
0
              else
1689
0
                g_string_append_uri_escaped (str, auth_params,
1690
0
                                             AUTH_PARAMS_ALLOWED_CHARS, TRUE);
1691
0
            }
1692
1693
0
          g_string_append_c (str, '@');
1694
0
        }
1695
1696
0
      if (strchr (host, ':') && g_hostname_is_ip_address (host))
1697
0
        {
1698
0
          g_string_append_c (str, '[');
1699
0
          if (encoded)
1700
0
            g_string_append (str, host);
1701
0
          else
1702
0
            g_string_append_uri_escaped (str, host, IP_ADDR_ALLOWED_CHARS, TRUE);
1703
0
          g_string_append_c (str, ']');
1704
0
        }
1705
0
      else
1706
0
        {
1707
0
          if (encoded)
1708
0
            g_string_append (str, host);
1709
0
          else
1710
0
            g_string_append_uri_escaped (str, host, HOST_ALLOWED_CHARS, TRUE);
1711
0
        }
1712
1713
0
      if (port != -1 && (!normalized_scheme || normalize_port (normalized_scheme, port) != -1))
1714
0
        g_string_append_printf (str, ":%d", port);
1715
0
    }
1716
1717
0
  if (path[0] == '\0' && normalized_scheme && should_normalize_empty_path (normalized_scheme))
1718
0
    g_string_append (str, "/");
1719
0
  else if (encoded || flags & G_URI_FLAGS_ENCODED_PATH)
1720
0
    g_string_append (str, path);
1721
0
  else
1722
0
    g_string_append_uri_escaped (str, path, PATH_ALLOWED_CHARS, TRUE);
1723
1724
0
  g_free (normalized_scheme);
1725
1726
0
  if (query)
1727
0
    {
1728
0
      g_string_append_c (str, '?');
1729
0
      if (encoded || flags & G_URI_FLAGS_ENCODED_QUERY)
1730
0
        g_string_append (str, query);
1731
0
      else
1732
0
        g_string_append_uri_escaped (str, query, QUERY_ALLOWED_CHARS, TRUE);
1733
0
    }
1734
0
  if (fragment)
1735
0
    {
1736
0
      g_string_append_c (str, '#');
1737
0
      if (encoded || flags & G_URI_FLAGS_ENCODED_FRAGMENT)
1738
0
        g_string_append (str, fragment);
1739
0
      else
1740
0
        g_string_append_uri_escaped (str, fragment, FRAGMENT_ALLOWED_CHARS, TRUE);
1741
0
    }
1742
1743
0
  return g_string_free (str, FALSE);
1744
0
}
1745
1746
/**
1747
 * g_uri_join:
1748
 * @flags: flags describing how to build the URI string
1749
 * @scheme: (nullable): the URI scheme, or %NULL
1750
 * @userinfo: (nullable): the userinfo component, or %NULL
1751
 * @host: (nullable): the host component, or %NULL
1752
 * @port: the port, or `-1`
1753
 * @path: (not nullable): the path component
1754
 * @query: (nullable): the query component, or %NULL
1755
 * @fragment: (nullable): the fragment, or %NULL
1756
 *
1757
 * Joins the given components together according to @flags to create
1758
 * an absolute URI string. @path may not be %NULL (though it may be the empty
1759
 * string).
1760
 *
1761
 * When @host is present, @path must either be empty or begin with a slash (`/`)
1762
 * character. When @host is not present, @path cannot begin with two slash
1763
   characters (`//`). See
1764
 * [RFC 3986, section 3](https://tools.ietf.org/html/rfc3986#section-3).
1765
 *
1766
 * See also g_uri_join_with_user(), which allows specifying the
1767
 * components of the ‘userinfo’ separately.
1768
 *
1769
 * %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS are ignored if set
1770
 * in @flags.
1771
 *
1772
 * Return value: (not nullable) (transfer full): an absolute URI string
1773
 *
1774
 * Since: 2.66
1775
 */
1776
gchar *
1777
g_uri_join (GUriFlags    flags,
1778
            const gchar *scheme,
1779
            const gchar *userinfo,
1780
            const gchar *host,
1781
            gint         port,
1782
            const gchar *path,
1783
            const gchar *query,
1784
            const gchar *fragment)
1785
0
{
1786
0
  g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1787
0
  g_return_val_if_fail (path != NULL, NULL);
1788
1789
0
  return g_uri_join_internal (flags,
1790
0
                              scheme,
1791
0
                              TRUE, userinfo, NULL, NULL,
1792
0
                              host,
1793
0
                              port,
1794
0
                              path,
1795
0
                              query,
1796
0
                              fragment);
1797
0
}
1798
1799
/**
1800
 * g_uri_join_with_user:
1801
 * @flags: flags describing how to build the URI string
1802
 * @scheme: (nullable): the URI scheme, or %NULL
1803
 * @user: (nullable): the user component of the userinfo, or %NULL
1804
 * @password: (nullable): the password component of the userinfo, or
1805
 *   %NULL
1806
 * @auth_params: (nullable): the auth params of the userinfo, or
1807
 *   %NULL
1808
 * @host: (nullable): the host component, or %NULL
1809
 * @port: the port, or `-1`
1810
 * @path: (not nullable): the path component
1811
 * @query: (nullable): the query component, or %NULL
1812
 * @fragment: (nullable): the fragment, or %NULL
1813
 *
1814
 * Joins the given components together according to @flags to create
1815
 * an absolute URI string. @path may not be %NULL (though it may be the empty
1816
 * string).
1817
 *
1818
 * In contrast to g_uri_join(), this allows specifying the components
1819
 * of the ‘userinfo’ separately. It otherwise behaves the same.
1820
 *
1821
 * %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS are ignored if set
1822
 * in @flags.
1823
 *
1824
 * Return value: (not nullable) (transfer full): an absolute URI string
1825
 *
1826
 * Since: 2.66
1827
 */
1828
gchar *
1829
g_uri_join_with_user (GUriFlags    flags,
1830
                      const gchar *scheme,
1831
                      const gchar *user,
1832
                      const gchar *password,
1833
                      const gchar *auth_params,
1834
                      const gchar *host,
1835
                      gint         port,
1836
                      const gchar *path,
1837
                      const gchar *query,
1838
                      const gchar *fragment)
1839
0
{
1840
0
  g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1841
0
  g_return_val_if_fail (path != NULL, NULL);
1842
1843
0
  return g_uri_join_internal (flags,
1844
0
                              scheme,
1845
0
                              FALSE, user, password, auth_params,
1846
0
                              host,
1847
0
                              port,
1848
0
                              path,
1849
0
                              query,
1850
0
                              fragment);
1851
0
}
1852
1853
/**
1854
 * g_uri_build:
1855
 * @flags: flags describing how to build the #GUri
1856
 * @scheme: (not nullable): the URI scheme
1857
 * @userinfo: (nullable): the userinfo component, or %NULL
1858
 * @host: (nullable): the host component, or %NULL
1859
 * @port: the port, or `-1`
1860
 * @path: (not nullable): the path component
1861
 * @query: (nullable): the query component, or %NULL
1862
 * @fragment: (nullable): the fragment, or %NULL
1863
 *
1864
 * Creates a new #GUri from the given components according to @flags.
1865
 *
1866
 * See also g_uri_build_with_user(), which allows specifying the
1867
 * components of the "userinfo" separately.
1868
 *
1869
 * Return value: (not nullable) (transfer full): a new #GUri
1870
 *
1871
 * Since: 2.66
1872
 */
1873
GUri *
1874
g_uri_build (GUriFlags    flags,
1875
             const gchar *scheme,
1876
             const gchar *userinfo,
1877
             const gchar *host,
1878
             gint         port,
1879
             const gchar *path,
1880
             const gchar *query,
1881
             const gchar *fragment)
1882
0
{
1883
0
  GUri *uri;
1884
1885
0
  g_return_val_if_fail (scheme != NULL, NULL);
1886
0
  g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1887
0
  g_return_val_if_fail (path != NULL, NULL);
1888
1889
0
  uri = g_atomic_rc_box_new0 (GUri);
1890
0
  uri->flags = flags;
1891
0
  uri->scheme = g_ascii_strdown (scheme, -1);
1892
0
  uri->userinfo = g_strdup (userinfo);
1893
0
  uri->host = g_strdup (host);
1894
0
  uri->port = port;
1895
0
  uri->path = g_strdup (path);
1896
0
  uri->query = g_strdup (query);
1897
0
  uri->fragment = g_strdup (fragment);
1898
1899
0
  return g_steal_pointer (&uri);
1900
0
}
1901
1902
/**
1903
 * g_uri_build_with_user:
1904
 * @flags: flags describing how to build the #GUri
1905
 * @scheme: (not nullable): the URI scheme
1906
 * @user: (nullable): the user component of the userinfo, or %NULL
1907
 * @password: (nullable): the password component of the userinfo, or %NULL
1908
 * @auth_params: (nullable): the auth params of the userinfo, or %NULL
1909
 * @host: (nullable): the host component, or %NULL
1910
 * @port: the port, or `-1`
1911
 * @path: (not nullable): the path component
1912
 * @query: (nullable): the query component, or %NULL
1913
 * @fragment: (nullable): the fragment, or %NULL
1914
 *
1915
 * Creates a new #GUri from the given components according to @flags
1916
 * (%G_URI_FLAGS_HAS_PASSWORD is added unconditionally). The @flags must be
1917
 * coherent with the passed values, in particular use `%`-encoded values with
1918
 * %G_URI_FLAGS_ENCODED.
1919
 *
1920
 * In contrast to g_uri_build(), this allows specifying the components
1921
 * of the ‘userinfo’ field separately. Note that @user must be non-%NULL
1922
 * if either @password or @auth_params is non-%NULL.
1923
 *
1924
 * Return value: (not nullable) (transfer full): a new #GUri
1925
 *
1926
 * Since: 2.66
1927
 */
1928
GUri *
1929
g_uri_build_with_user (GUriFlags    flags,
1930
                       const gchar *scheme,
1931
                       const gchar *user,
1932
                       const gchar *password,
1933
                       const gchar *auth_params,
1934
                       const gchar *host,
1935
                       gint         port,
1936
                       const gchar *path,
1937
                       const gchar *query,
1938
                       const gchar *fragment)
1939
0
{
1940
0
  GUri *uri;
1941
0
  GString *userinfo;
1942
1943
0
  g_return_val_if_fail (scheme != NULL, NULL);
1944
0
  g_return_val_if_fail (password == NULL || user != NULL, NULL);
1945
0
  g_return_val_if_fail (auth_params == NULL || user != NULL, NULL);
1946
0
  g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1947
0
  g_return_val_if_fail (path != NULL, NULL);
1948
1949
0
  uri = g_atomic_rc_box_new0 (GUri);
1950
0
  uri->flags = flags | G_URI_FLAGS_HAS_PASSWORD;
1951
0
  uri->scheme = g_ascii_strdown (scheme, -1);
1952
0
  uri->user = g_strdup (user);
1953
0
  uri->password = g_strdup (password);
1954
0
  uri->auth_params = g_strdup (auth_params);
1955
0
  uri->host = g_strdup (host);
1956
0
  uri->port = port;
1957
0
  uri->path = g_strdup (path);
1958
0
  uri->query = g_strdup (query);
1959
0
  uri->fragment = g_strdup (fragment);
1960
1961
0
  if (user)
1962
0
    {
1963
0
      userinfo = g_string_new (user);
1964
0
      if (password)
1965
0
        {
1966
0
          g_string_append_c (userinfo, ':');
1967
0
          g_string_append (userinfo, uri->password);
1968
0
        }
1969
0
      if (auth_params)
1970
0
        {
1971
0
          g_string_append_c (userinfo, ';');
1972
0
          g_string_append (userinfo, uri->auth_params);
1973
0
        }
1974
0
      uri->userinfo = g_string_free (userinfo, FALSE);
1975
0
    }
1976
1977
0
  return g_steal_pointer (&uri);
1978
0
}
1979
1980
/**
1981
 * g_uri_to_string:
1982
 * @uri: a #GUri
1983
 *
1984
 * Returns a string representing @uri.
1985
 *
1986
 * This is not guaranteed to return a string which is identical to the
1987
 * string that @uri was parsed from. However, if the source URI was
1988
 * syntactically correct (according to RFC 3986), and it was parsed
1989
 * with %G_URI_FLAGS_ENCODED, then g_uri_to_string() is guaranteed to return
1990
 * a string which is at least semantically equivalent to the source
1991
 * URI (according to RFC 3986).
1992
 *
1993
 * If @uri might contain sensitive details, such as authentication parameters,
1994
 * or private data in its query string, and the returned string is going to be
1995
 * logged, then consider using g_uri_to_string_partial() to redact parts.
1996
 *
1997
 * Return value: (not nullable) (transfer full): a string representing @uri,
1998
 *     which the caller must free.
1999
 *
2000
 * Since: 2.66
2001
 */
2002
gchar *
2003
g_uri_to_string (GUri *uri)
2004
0
{
2005
0
  g_return_val_if_fail (uri != NULL, NULL);
2006
2007
0
  return g_uri_to_string_partial (uri, G_URI_HIDE_NONE);
2008
0
}
2009
2010
/**
2011
 * g_uri_to_string_partial:
2012
 * @uri: a #GUri
2013
 * @flags: flags describing what parts of @uri to hide
2014
 *
2015
 * Returns a string representing @uri, subject to the options in
2016
 * @flags. See g_uri_to_string() and #GUriHideFlags for more details.
2017
 *
2018
 * Return value: (not nullable) (transfer full): a string representing
2019
 *     @uri, which the caller must free.
2020
 *
2021
 * Since: 2.66
2022
 */
2023
gchar *
2024
g_uri_to_string_partial (GUri          *uri,
2025
                         GUriHideFlags  flags)
2026
0
{
2027
0
  gboolean hide_user = (flags & G_URI_HIDE_USERINFO);
2028
0
  gboolean hide_password = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_PASSWORD));
2029
0
  gboolean hide_auth_params = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_AUTH_PARAMS));
2030
0
  gboolean hide_query = (flags & G_URI_HIDE_QUERY);
2031
0
  gboolean hide_fragment = (flags & G_URI_HIDE_FRAGMENT);
2032
2033
0
  g_return_val_if_fail (uri != NULL, NULL);
2034
2035
0
  if (uri->flags & (G_URI_FLAGS_HAS_PASSWORD | G_URI_FLAGS_HAS_AUTH_PARAMS))
2036
0
    {
2037
0
      return g_uri_join_with_user (uri->flags,
2038
0
                                   uri->scheme,
2039
0
                                   hide_user ? NULL : uri->user,
2040
0
                                   hide_password ? NULL : uri->password,
2041
0
                                   hide_auth_params ? NULL : uri->auth_params,
2042
0
                                   uri->host,
2043
0
                                   uri->port,
2044
0
                                   uri->path,
2045
0
                                   hide_query ? NULL : uri->query,
2046
0
                                   hide_fragment ? NULL : uri->fragment);
2047
0
    }
2048
2049
0
  return g_uri_join (uri->flags,
2050
0
                     uri->scheme,
2051
0
                     hide_user ? NULL : uri->userinfo,
2052
0
                     uri->host,
2053
0
                     uri->port,
2054
0
                     uri->path,
2055
0
                     hide_query ? NULL : uri->query,
2056
0
                     hide_fragment ? NULL : uri->fragment);
2057
0
}
2058
2059
/* This is just a copy of g_str_hash() with g_ascii_toupper() added */
2060
static guint
2061
str_ascii_case_hash (gconstpointer v)
2062
0
{
2063
0
  const signed char *p;
2064
0
  guint32 h = 5381;
2065
2066
0
  for (p = v; *p != '\0'; p++)
2067
0
    h = (h << 5) + h + g_ascii_toupper (*p);
2068
2069
0
  return h;
2070
0
}
2071
2072
static gboolean
2073
str_ascii_case_equal (gconstpointer v1,
2074
                      gconstpointer v2)
2075
0
{
2076
0
  const gchar *string1 = v1;
2077
0
  const gchar *string2 = v2;
2078
2079
0
  return g_ascii_strcasecmp (string1, string2) == 0;
2080
0
}
2081
2082
/**
2083
 * GUriParamsIter:
2084
 *
2085
 * Many URI schemes include one or more attribute/value pairs as part of the URI
2086
 * value. For example `scheme://server/path?query=string&is=there` has two
2087
 * attributes – `query=string` and `is=there` – in its query part.
2088
 *
2089
 * A #GUriParamsIter structure represents an iterator that can be used to
2090
 * iterate over the attribute/value pairs of a URI query string. #GUriParamsIter
2091
 * structures are typically allocated on the stack and then initialized with
2092
 * g_uri_params_iter_init(). See the documentation for g_uri_params_iter_init()
2093
 * for a usage example.
2094
 *
2095
 * Since: 2.66
2096
 */
2097
typedef struct
2098
{
2099
  GUriParamsFlags flags;
2100
  const gchar    *attr;
2101
  const gchar    *end;
2102
  guint8          sep_table[256]; /* 1 = index is a separator; 0 otherwise */
2103
} RealIter;
2104
2105
G_STATIC_ASSERT (sizeof (GUriParamsIter) == sizeof (RealIter));
2106
G_STATIC_ASSERT (G_ALIGNOF (GUriParamsIter) >= G_ALIGNOF (RealIter));
2107
2108
/**
2109
 * g_uri_params_iter_init:
2110
 * @iter: an uninitialized #GUriParamsIter
2111
 * @params: a `%`-encoded string containing `attribute=value`
2112
 *   parameters
2113
 * @length: the length of @params, or `-1` if it is nul-terminated
2114
 * @separators: the separator byte character set between parameters. (usually
2115
 *   `&`, but sometimes `;` or both `&;`). Note that this function works on
2116
 *   bytes not characters, so it can't be used to delimit UTF-8 strings for
2117
 *   anything but ASCII characters. You may pass an empty set, in which case
2118
 *   no splitting will occur.
2119
 * @flags: flags to modify the way the parameters are handled.
2120
 *
2121
 * Initializes an attribute/value pair iterator.
2122
 *
2123
 * The iterator keeps pointers to the @params and @separators arguments, those
2124
 * variables must thus outlive the iterator and not be modified during the
2125
 * iteration.
2126
 *
2127
 * If %G_URI_PARAMS_WWW_FORM is passed in @flags, `+` characters in the param
2128
 * string will be replaced with spaces in the output. For example, `foo=bar+baz`
2129
 * will give attribute `foo` with value `bar baz`. This is commonly used on the
2130
 * web (the `https` and `http` schemes only), but is deprecated in favour of
2131
 * the equivalent of encoding spaces as `%20`.
2132
 *
2133
 * Unlike with g_uri_parse_params(), %G_URI_PARAMS_CASE_INSENSITIVE has no
2134
 * effect if passed to @flags for g_uri_params_iter_init(). The caller is
2135
 * responsible for doing their own case-insensitive comparisons.
2136
 *
2137
 * |[<!-- language="C" -->
2138
 * GUriParamsIter iter;
2139
 * GError *error = NULL;
2140
 * gchar *unowned_attr, *unowned_value;
2141
 *
2142
 * g_uri_params_iter_init (&iter, "foo=bar&baz=bar&Foo=frob&baz=bar2", -1, "&", G_URI_PARAMS_NONE);
2143
 * while (g_uri_params_iter_next (&iter, &unowned_attr, &unowned_value, &error))
2144
 *   {
2145
 *     g_autofree gchar *attr = g_steal_pointer (&unowned_attr);
2146
 *     g_autofree gchar *value = g_steal_pointer (&unowned_value);
2147
 *     // do something with attr and value; this code will be called 4 times
2148
 *     // for the params string in this example: once with attr=foo and value=bar,
2149
 *     // then with baz/bar, then Foo/frob, then baz/bar2.
2150
 *   }
2151
 * if (error)
2152
 *   // handle parsing error
2153
 * ]|
2154
 *
2155
 * Since: 2.66
2156
 */
2157
void
2158
g_uri_params_iter_init (GUriParamsIter *iter,
2159
                        const gchar    *params,
2160
                        gssize          length,
2161
                        const gchar    *separators,
2162
                        GUriParamsFlags flags)
2163
0
{
2164
0
  RealIter *ri = (RealIter *)iter;
2165
0
  const gchar *s;
2166
2167
0
  g_return_if_fail (iter != NULL);
2168
0
  g_return_if_fail (length == 0 || params != NULL);
2169
0
  g_return_if_fail (length >= -1);
2170
0
  g_return_if_fail (separators != NULL);
2171
2172
0
  ri->flags = flags;
2173
2174
0
  if (length == -1)
2175
0
    ri->end = params + strlen (params);
2176
0
  else
2177
0
    ri->end = params + length;
2178
2179
0
  memset (ri->sep_table, FALSE, sizeof (ri->sep_table));
2180
0
  for (s = separators; *s != '\0'; ++s)
2181
0
    ri->sep_table[*(guchar *)s] = TRUE;
2182
2183
0
  ri->attr = params;
2184
0
}
2185
2186
/**
2187
 * g_uri_params_iter_next:
2188
 * @iter: an initialized #GUriParamsIter
2189
 * @attribute: (out) (nullable) (optional) (transfer full): on return, contains
2190
 *     the attribute, or %NULL.
2191
 * @value: (out) (nullable) (optional) (transfer full): on return, contains
2192
 *     the value, or %NULL.
2193
 * @error: #GError for error reporting, or %NULL to ignore.
2194
 *
2195
 * Advances @iter and retrieves the next attribute/value. %FALSE is returned if
2196
 * an error has occurred (in which case @error is set), or if the end of the
2197
 * iteration is reached (in which case @attribute and @value are set to %NULL
2198
 * and the iterator becomes invalid). If %TRUE is returned,
2199
 * g_uri_params_iter_next() may be called again to receive another
2200
 * attribute/value pair.
2201
 *
2202
 * Note that the same @attribute may be returned multiple times, since URIs
2203
 * allow repeated attributes.
2204
 *
2205
 * Returns: %FALSE if the end of the parameters has been reached or an error was
2206
 *     encountered. %TRUE otherwise.
2207
 *
2208
 * Since: 2.66
2209
 */
2210
gboolean
2211
g_uri_params_iter_next (GUriParamsIter *iter,
2212
                        gchar         **attribute,
2213
                        gchar         **value,
2214
                        GError        **error)
2215
0
{
2216
0
  RealIter *ri = (RealIter *)iter;
2217
0
  const gchar *attr_end, *val, *val_end;
2218
0
  gchar *decoded_attr, *decoded_value;
2219
0
  gboolean www_form = ri->flags & G_URI_PARAMS_WWW_FORM;
2220
0
  GUriFlags decode_flags = G_URI_FLAGS_NONE;
2221
2222
0
  g_return_val_if_fail (iter != NULL, FALSE);
2223
0
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
2224
2225
  /* Pre-clear these in case of failure or finishing. */
2226
0
  if (attribute)
2227
0
    *attribute = NULL;
2228
0
  if (value)
2229
0
    *value = NULL;
2230
2231
0
  if (ri->attr >= ri->end)
2232
0
    return FALSE;
2233
2234
0
  if (ri->flags & G_URI_PARAMS_PARSE_RELAXED)
2235
0
    decode_flags |= G_URI_FLAGS_PARSE_RELAXED;
2236
2237
  /* Check if each character in @attr is a separator, by indexing by the
2238
   * character value into the @sep_table, which has value 1 stored at an
2239
   * index if that index is a separator. */
2240
0
  for (val_end = ri->attr; val_end < ri->end; val_end++)
2241
0
    if (ri->sep_table[*(guchar *)val_end])
2242
0
      break;
2243
2244
0
  attr_end = memchr (ri->attr, '=', val_end - ri->attr);
2245
0
  if (!attr_end)
2246
0
    {
2247
0
      g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_FAILED,
2248
0
                           _("Missing ‘=’ and parameter value"));
2249
0
      return FALSE;
2250
0
    }
2251
0
  if (!uri_decode (&decoded_attr, NULL, ri->attr, attr_end - ri->attr,
2252
0
                   www_form, decode_flags, G_URI_ERROR_FAILED, error))
2253
0
    {
2254
0
      return FALSE;
2255
0
    }
2256
2257
0
  val = attr_end + 1;
2258
0
  if (!uri_decode (&decoded_value, NULL, val, val_end - val,
2259
0
                   www_form, decode_flags, G_URI_ERROR_FAILED, error))
2260
0
    {
2261
0
      g_free (decoded_attr);
2262
0
      return FALSE;
2263
0
    }
2264
2265
0
  if (attribute)
2266
0
    *attribute = g_steal_pointer (&decoded_attr);
2267
0
  if (value)
2268
0
    *value = g_steal_pointer (&decoded_value);
2269
2270
0
  g_free (decoded_attr);
2271
0
  g_free (decoded_value);
2272
2273
0
  ri->attr = val_end + 1;
2274
0
  return TRUE;
2275
0
}
2276
2277
/**
2278
 * g_uri_parse_params:
2279
 * @params: a `%`-encoded string containing `attribute=value`
2280
 *   parameters
2281
 * @length: the length of @params, or `-1` if it is nul-terminated
2282
 * @separators: the separator byte character set between parameters. (usually
2283
 *   `&`, but sometimes `;` or both `&;`). Note that this function works on
2284
 *   bytes not characters, so it can't be used to delimit UTF-8 strings for
2285
 *   anything but ASCII characters. You may pass an empty set, in which case
2286
 *   no splitting will occur.
2287
 * @flags: flags to modify the way the parameters are handled.
2288
 * @error: #GError for error reporting, or %NULL to ignore.
2289
 *
2290
 * Many URI schemes include one or more attribute/value pairs as part of the URI
2291
 * value. This method can be used to parse them into a hash table. When an
2292
 * attribute has multiple occurrences, the last value is the final returned
2293
 * value. If you need to handle repeated attributes differently, use
2294
 * #GUriParamsIter.
2295
 *
2296
 * The @params string is assumed to still be `%`-encoded, but the returned
2297
 * values will be fully decoded. (Thus it is possible that the returned values
2298
 * may contain `=` or @separators, if the value was encoded in the input.)
2299
 * Invalid `%`-encoding is treated as with the %G_URI_FLAGS_PARSE_RELAXED
2300
 * rules for g_uri_parse(). (However, if @params is the path or query string
2301
 * from a #GUri that was parsed without %G_URI_FLAGS_PARSE_RELAXED and
2302
 * %G_URI_FLAGS_ENCODED, then you already know that it does not contain any
2303
 * invalid encoding.)
2304
 *
2305
 * %G_URI_PARAMS_WWW_FORM is handled as documented for g_uri_params_iter_init().
2306
 *
2307
 * If %G_URI_PARAMS_CASE_INSENSITIVE is passed to @flags, attributes will be
2308
 * compared case-insensitively, so a params string `attr=123&Attr=456` will only
2309
 * return a single attribute–value pair, `Attr=456`. Case will be preserved in
2310
 * the returned attributes.
2311
 *
2312
 * If @params cannot be parsed (for example, it contains two @separators
2313
 * characters in a row), then @error is set and %NULL is returned.
2314
 *
2315
 * Return value: (transfer full) (element-type utf8 utf8):
2316
 *     A hash table of attribute/value pairs, with both names and values
2317
 *     fully-decoded; or %NULL on error.
2318
 *
2319
 * Since: 2.66
2320
 */
2321
GHashTable *
2322
g_uri_parse_params (const gchar     *params,
2323
                    gssize           length,
2324
                    const gchar     *separators,
2325
                    GUriParamsFlags  flags,
2326
                    GError         **error)
2327
0
{
2328
0
  GHashTable *hash;
2329
0
  GUriParamsIter iter;
2330
0
  gchar *attribute, *value;
2331
0
  GError *err = NULL;
2332
2333
0
  g_return_val_if_fail (length == 0 || params != NULL, NULL);
2334
0
  g_return_val_if_fail (length >= -1, NULL);
2335
0
  g_return_val_if_fail (separators != NULL, NULL);
2336
0
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
2337
2338
0
  if (flags & G_URI_PARAMS_CASE_INSENSITIVE)
2339
0
    {
2340
0
      hash = g_hash_table_new_full (str_ascii_case_hash,
2341
0
                                    str_ascii_case_equal,
2342
0
                                    g_free, g_free);
2343
0
    }
2344
0
  else
2345
0
    {
2346
0
      hash = g_hash_table_new_full (g_str_hash, g_str_equal,
2347
0
                                    g_free, g_free);
2348
0
    }
2349
2350
0
  g_uri_params_iter_init (&iter, params, length, separators, flags);
2351
2352
0
  while (g_uri_params_iter_next (&iter, &attribute, &value, &err))
2353
0
    g_hash_table_insert (hash, attribute, value);
2354
2355
0
  if (err)
2356
0
    {
2357
0
      g_propagate_error (error, g_steal_pointer (&err));
2358
0
      g_hash_table_destroy (hash);
2359
0
      return NULL;
2360
0
    }
2361
2362
0
  return g_steal_pointer (&hash);
2363
0
}
2364
2365
/**
2366
 * g_uri_get_scheme:
2367
 * @uri: a #GUri
2368
 *
2369
 * Gets @uri's scheme. Note that this will always be all-lowercase,
2370
 * regardless of the string or strings that @uri was created from.
2371
 *
2372
 * Return value: (not nullable): @uri's scheme.
2373
 *
2374
 * Since: 2.66
2375
 */
2376
const gchar *
2377
g_uri_get_scheme (GUri *uri)
2378
0
{
2379
0
  g_return_val_if_fail (uri != NULL, NULL);
2380
2381
0
  return uri->scheme;
2382
0
}
2383
2384
/**
2385
 * g_uri_get_userinfo:
2386
 * @uri: a #GUri
2387
 *
2388
 * Gets @uri's userinfo, which may contain `%`-encoding, depending on
2389
 * the flags with which @uri was created.
2390
 *
2391
 * Return value: (nullable): @uri's userinfo.
2392
 *
2393
 * Since: 2.66
2394
 */
2395
const gchar *
2396
g_uri_get_userinfo (GUri *uri)
2397
0
{
2398
0
  g_return_val_if_fail (uri != NULL, NULL);
2399
2400
0
  return uri->userinfo;
2401
0
}
2402
2403
/**
2404
 * g_uri_get_user:
2405
 * @uri: a #GUri
2406
 *
2407
 * Gets the ‘username’ component of @uri's userinfo, which may contain
2408
 * `%`-encoding, depending on the flags with which @uri was created.
2409
 * If @uri was not created with %G_URI_FLAGS_HAS_PASSWORD or
2410
 * %G_URI_FLAGS_HAS_AUTH_PARAMS, this is the same as g_uri_get_userinfo().
2411
 *
2412
 * Return value: (nullable): @uri's user.
2413
 *
2414
 * Since: 2.66
2415
 */
2416
const gchar *
2417
g_uri_get_user (GUri *uri)
2418
0
{
2419
0
  g_return_val_if_fail (uri != NULL, NULL);
2420
2421
0
  return uri->user;
2422
0
}
2423
2424
/**
2425
 * g_uri_get_password:
2426
 * @uri: a #GUri
2427
 *
2428
 * Gets @uri's password, which may contain `%`-encoding, depending on
2429
 * the flags with which @uri was created. (If @uri was not created
2430
 * with %G_URI_FLAGS_HAS_PASSWORD then this will be %NULL.)
2431
 *
2432
 * Return value: (nullable): @uri's password.
2433
 *
2434
 * Since: 2.66
2435
 */
2436
const gchar *
2437
g_uri_get_password (GUri *uri)
2438
0
{
2439
0
  g_return_val_if_fail (uri != NULL, NULL);
2440
2441
0
  return uri->password;
2442
0
}
2443
2444
/**
2445
 * g_uri_get_auth_params:
2446
 * @uri: a #GUri
2447
 *
2448
 * Gets @uri's authentication parameters, which may contain
2449
 * `%`-encoding, depending on the flags with which @uri was created.
2450
 * (If @uri was not created with %G_URI_FLAGS_HAS_AUTH_PARAMS then this will
2451
 * be %NULL.)
2452
 *
2453
 * Depending on the URI scheme, g_uri_parse_params() may be useful for
2454
 * further parsing this information.
2455
 *
2456
 * Return value: (nullable): @uri's authentication parameters.
2457
 *
2458
 * Since: 2.66
2459
 */
2460
const gchar *
2461
g_uri_get_auth_params (GUri *uri)
2462
0
{
2463
0
  g_return_val_if_fail (uri != NULL, NULL);
2464
2465
0
  return uri->auth_params;
2466
0
}
2467
2468
/**
2469
 * g_uri_get_host:
2470
 * @uri: a #GUri
2471
 *
2472
 * Gets @uri's host. This will never have `%`-encoded characters,
2473
 * unless it is non-UTF-8 (which can only be the case if @uri was
2474
 * created with %G_URI_FLAGS_NON_DNS).
2475
 *
2476
 * If @uri contained an IPv6 address literal, this value will be just
2477
 * that address, without the brackets around it that are necessary in
2478
 * the string form of the URI. Note that in this case there may also
2479
 * be a scope ID attached to the address. Eg, `fe80::1234%``em1` (or
2480
 * `fe80::1234%``25em1` if the string is still encoded).
2481
 *
2482
 * Return value: (nullable): @uri's host.
2483
 *
2484
 * Since: 2.66
2485
 */
2486
const gchar *
2487
g_uri_get_host (GUri *uri)
2488
0
{
2489
0
  g_return_val_if_fail (uri != NULL, NULL);
2490
2491
0
  return uri->host;
2492
0
}
2493
2494
/**
2495
 * g_uri_get_port:
2496
 * @uri: a #GUri
2497
 *
2498
 * Gets @uri's port.
2499
 *
2500
 * Return value: @uri's port, or `-1` if no port was specified.
2501
 *
2502
 * Since: 2.66
2503
 */
2504
gint
2505
g_uri_get_port (GUri *uri)
2506
0
{
2507
0
  g_return_val_if_fail (uri != NULL, -1);
2508
2509
0
  if (uri->port == -1 && uri->flags & G_URI_FLAGS_SCHEME_NORMALIZE)
2510
0
    return default_scheme_port (uri->scheme);
2511
2512
0
  return uri->port;
2513
0
}
2514
2515
/**
2516
 * g_uri_get_path:
2517
 * @uri: a #GUri
2518
 *
2519
 * Gets @uri's path, which may contain `%`-encoding, depending on the
2520
 * flags with which @uri was created.
2521
 *
2522
 * Return value: (not nullable): @uri's path.
2523
 *
2524
 * Since: 2.66
2525
 */
2526
const gchar *
2527
g_uri_get_path (GUri *uri)
2528
0
{
2529
0
  g_return_val_if_fail (uri != NULL, NULL);
2530
2531
0
  return uri->path;
2532
0
}
2533
2534
/**
2535
 * g_uri_get_query:
2536
 * @uri: a #GUri
2537
 *
2538
 * Gets @uri's query, which may contain `%`-encoding, depending on the
2539
 * flags with which @uri was created.
2540
 *
2541
 * For queries consisting of a series of `name=value` parameters,
2542
 * #GUriParamsIter or g_uri_parse_params() may be useful.
2543
 *
2544
 * Return value: (nullable): @uri's query.
2545
 *
2546
 * Since: 2.66
2547
 */
2548
const gchar *
2549
g_uri_get_query (GUri *uri)
2550
0
{
2551
0
  g_return_val_if_fail (uri != NULL, NULL);
2552
2553
0
  return uri->query;
2554
0
}
2555
2556
/**
2557
 * g_uri_get_fragment:
2558
 * @uri: a #GUri
2559
 *
2560
 * Gets @uri's fragment, which may contain `%`-encoding, depending on
2561
 * the flags with which @uri was created.
2562
 *
2563
 * Return value: (nullable): @uri's fragment.
2564
 *
2565
 * Since: 2.66
2566
 */
2567
const gchar *
2568
g_uri_get_fragment (GUri *uri)
2569
0
{
2570
0
  g_return_val_if_fail (uri != NULL, NULL);
2571
2572
0
  return uri->fragment;
2573
0
}
2574
2575
2576
/**
2577
 * g_uri_get_flags:
2578
 * @uri: a #GUri
2579
 *
2580
 * Gets @uri's flags set upon construction.
2581
 *
2582
 * Return value: @uri's flags.
2583
 *
2584
 * Since: 2.66
2585
 **/
2586
GUriFlags
2587
g_uri_get_flags (GUri *uri)
2588
0
{
2589
0
  g_return_val_if_fail (uri != NULL, G_URI_FLAGS_NONE);
2590
2591
0
  return uri->flags;
2592
0
}
2593
2594
/**
2595
 * g_uri_unescape_segment:
2596
 * @escaped_string: (nullable): A string, may be %NULL
2597
 * @escaped_string_end: (nullable): Pointer to end of @escaped_string,
2598
 *   may be %NULL
2599
 * @illegal_characters: (nullable): An optional string of illegal
2600
 *   characters not to be allowed, may be %NULL
2601
 *
2602
 * Unescapes a segment of an escaped string.
2603
 *
2604
 * If any of the characters in @illegal_characters or the NUL
2605
 * character appears as an escaped character in @escaped_string, then
2606
 * that is an error and %NULL will be returned. This is useful if you
2607
 * want to avoid for instance having a slash being expanded in an
2608
 * escaped path element, which might confuse pathname handling.
2609
 *
2610
 * Note: `NUL` byte is not accepted in the output, in contrast to
2611
 * g_uri_unescape_bytes().
2612
 *
2613
 * Returns: (nullable): an unescaped version of @escaped_string,
2614
 * or %NULL on error. The returned string should be freed when no longer
2615
 * needed.  As a special case if %NULL is given for @escaped_string, this
2616
 * function will return %NULL.
2617
 *
2618
 * Since: 2.16
2619
 **/
2620
gchar *
2621
g_uri_unescape_segment (const gchar *escaped_string,
2622
                        const gchar *escaped_string_end,
2623
                        const gchar *illegal_characters)
2624
0
{
2625
0
  gchar *unescaped;
2626
0
  gsize length;
2627
0
  gssize decoded_len;
2628
2629
0
  if (!escaped_string)
2630
0
    return NULL;
2631
2632
0
  if (escaped_string_end)
2633
0
    length = escaped_string_end - escaped_string;
2634
0
  else
2635
0
    length = strlen (escaped_string);
2636
2637
0
  decoded_len = uri_decoder (&unescaped,
2638
0
                             illegal_characters,
2639
0
                             escaped_string, length,
2640
0
                             FALSE, FALSE,
2641
0
                             G_URI_FLAGS_ENCODED,
2642
0
                             0, NULL);
2643
0
  if (decoded_len < 0)
2644
0
    return NULL;
2645
2646
0
  if (memchr (unescaped, '\0', decoded_len))
2647
0
    {
2648
0
      g_free (unescaped);
2649
0
      return NULL;
2650
0
    }
2651
2652
0
  return unescaped;
2653
0
}
2654
2655
/**
2656
 * g_uri_unescape_string:
2657
 * @escaped_string: an escaped string to be unescaped.
2658
 * @illegal_characters: (nullable): a string of illegal characters
2659
 *   not to be allowed, or %NULL.
2660
 *
2661
 * Unescapes a whole escaped string.
2662
 *
2663
 * If any of the characters in @illegal_characters or the NUL
2664
 * character appears as an escaped character in @escaped_string, then
2665
 * that is an error and %NULL will be returned. This is useful if you
2666
 * want to avoid for instance having a slash being expanded in an
2667
 * escaped path element, which might confuse pathname handling.
2668
 *
2669
 * Returns: (nullable): an unescaped version of @escaped_string.
2670
 * The returned string should be freed when no longer needed.
2671
 *
2672
 * Since: 2.16
2673
 **/
2674
gchar *
2675
g_uri_unescape_string (const gchar *escaped_string,
2676
                       const gchar *illegal_characters)
2677
0
{
2678
0
  return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
2679
0
}
2680
2681
/**
2682
 * g_uri_escape_string:
2683
 * @unescaped: the unescaped input string.
2684
 * @reserved_chars_allowed: (nullable): a string of reserved
2685
 *   characters that are allowed to be used, or %NULL.
2686
 * @allow_utf8: %TRUE if the result can include UTF-8 characters.
2687
 *
2688
 * Escapes a string for use in a URI.
2689
 *
2690
 * Normally all characters that are not "unreserved" (i.e. ASCII
2691
 * alphanumerical characters plus dash, dot, underscore and tilde) are
2692
 * escaped. But if you specify characters in @reserved_chars_allowed
2693
 * they are not escaped. This is useful for the "reserved" characters
2694
 * in the URI specification, since those are allowed unescaped in some
2695
 * portions of a URI.
2696
 *
2697
 * Returns: (not nullable): an escaped version of @unescaped. The
2698
 * returned string should be freed when no longer needed.
2699
 *
2700
 * Since: 2.16
2701
 **/
2702
gchar *
2703
g_uri_escape_string (const gchar *unescaped,
2704
                     const gchar *reserved_chars_allowed,
2705
                     gboolean     allow_utf8)
2706
0
{
2707
0
  GString *s;
2708
2709
0
  g_return_val_if_fail (unescaped != NULL, NULL);
2710
2711
0
  s = g_string_sized_new (strlen (unescaped) * 1.25);
2712
2713
0
  g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
2714
2715
0
  return g_string_free (s, FALSE);
2716
0
}
2717
2718
/**
2719
 * g_uri_unescape_bytes:
2720
 * @escaped_string: A URI-escaped string
2721
 * @length: the length (in bytes) of @escaped_string to escape, or `-1` if it
2722
 *   is nul-terminated.
2723
 * @illegal_characters: (nullable): a string of illegal characters
2724
 *   not to be allowed, or %NULL.
2725
 * @error: #GError for error reporting, or %NULL to ignore.
2726
 *
2727
 * Unescapes a segment of an escaped string as binary data.
2728
 *
2729
 * Note that in contrast to g_uri_unescape_string(), this does allow
2730
 * nul bytes to appear in the output.
2731
 *
2732
 * If any of the characters in @illegal_characters appears as an escaped
2733
 * character in @escaped_string, then that is an error and %NULL will be
2734
 * returned. This is useful if you want to avoid for instance having a slash
2735
 * being expanded in an escaped path element, which might confuse pathname
2736
 * handling.
2737
 *
2738
 * Returns: (transfer full): an unescaped version of @escaped_string
2739
 *     or %NULL on error (if decoding failed, using %G_URI_ERROR_FAILED error
2740
 *     code). The returned #GBytes should be unreffed when no longer needed.
2741
 *
2742
 * Since: 2.66
2743
 **/
2744
GBytes *
2745
g_uri_unescape_bytes (const gchar *escaped_string,
2746
                      gssize       length,
2747
                      const char *illegal_characters,
2748
                      GError     **error)
2749
0
{
2750
0
  gchar *buf;
2751
0
  gssize unescaped_length;
2752
2753
0
  g_return_val_if_fail (escaped_string != NULL, NULL);
2754
0
  g_return_val_if_fail (error == NULL || *error == NULL, NULL);
2755
2756
0
  if (length == -1)
2757
0
    length = strlen (escaped_string);
2758
2759
0
  unescaped_length = uri_decoder (&buf,
2760
0
                                  illegal_characters,
2761
0
                                  escaped_string, length,
2762
0
                                  FALSE,
2763
0
                                  FALSE,
2764
0
                                  G_URI_FLAGS_ENCODED,
2765
0
                                  G_URI_ERROR_FAILED, error);
2766
0
  if (unescaped_length == -1)
2767
0
    return NULL;
2768
2769
0
  return g_bytes_new_take (buf, unescaped_length);
2770
0
}
2771
2772
/**
2773
 * g_uri_escape_bytes:
2774
 * @unescaped: (array length=length): the unescaped input data.
2775
 * @length: the length of @unescaped
2776
 * @reserved_chars_allowed: (nullable): a string of reserved
2777
 *   characters that are allowed to be used, or %NULL.
2778
 *
2779
 * Escapes arbitrary data for use in a URI.
2780
 *
2781
 * Normally all characters that are not ‘unreserved’ (i.e. ASCII
2782
 * alphanumerical characters plus dash, dot, underscore and tilde) are
2783
 * escaped. But if you specify characters in @reserved_chars_allowed
2784
 * they are not escaped. This is useful for the ‘reserved’ characters
2785
 * in the URI specification, since those are allowed unescaped in some
2786
 * portions of a URI.
2787
 *
2788
 * Though technically incorrect, this will also allow escaping nul
2789
 * bytes as `%``00`.
2790
 *
2791
 * Returns: (not nullable) (transfer full): an escaped version of @unescaped.
2792
 *     The returned string should be freed when no longer needed.
2793
 *
2794
 * Since: 2.66
2795
 */
2796
gchar *
2797
g_uri_escape_bytes (const guint8 *unescaped,
2798
                    gsize         length,
2799
                    const gchar  *reserved_chars_allowed)
2800
0
{
2801
0
  GString *string;
2802
2803
0
  g_return_val_if_fail (unescaped != NULL, NULL);
2804
2805
0
  string = g_string_sized_new (length * 1.25);
2806
2807
0
  _uri_encoder (string, unescaped, length,
2808
0
               reserved_chars_allowed, FALSE);
2809
2810
0
  return g_string_free (string, FALSE);
2811
0
}
2812
2813
static gssize
2814
g_uri_scheme_length (const gchar *uri)
2815
5.12k
{
2816
5.12k
  const gchar *p;
2817
2818
5.12k
  p = uri;
2819
5.12k
  if (!g_ascii_isalpha (*p))
2820
5.12k
    return -1;
2821
0
  p++;
2822
0
  while (g_ascii_isalnum (*p) || *p == '.' || *p == '+' || *p == '-')
2823
0
    p++;
2824
2825
0
  if (p > uri && *p == ':')
2826
0
    return p - uri;
2827
2828
0
  return -1;
2829
0
}
2830
2831
/**
2832
 * g_uri_parse_scheme:
2833
 * @uri: a valid URI.
2834
 *
2835
 * Gets the scheme portion of a URI string.
2836
 * [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3) decodes the scheme
2837
 * as:
2838
 * |[
2839
 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
2840
 * ]|
2841
 * Common schemes include `file`, `https`, `svn+ssh`, etc.
2842
 *
2843
 * Returns: (transfer full) (nullable): The ‘scheme’ component of the URI, or
2844
 *     %NULL on error. The returned string should be freed when no longer needed.
2845
 *
2846
 * Since: 2.16
2847
 **/
2848
gchar *
2849
g_uri_parse_scheme (const gchar *uri)
2850
5.12k
{
2851
5.12k
  gssize len;
2852
2853
5.12k
  g_return_val_if_fail (uri != NULL, NULL);
2854
2855
5.12k
  len = g_uri_scheme_length (uri);
2856
5.12k
  return len == -1 ? NULL : g_strndup (uri, len);
2857
5.12k
}
2858
2859
/**
2860
 * g_uri_peek_scheme:
2861
 * @uri: a valid URI.
2862
 *
2863
 * Gets the scheme portion of a URI string.
2864
 * [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3) decodes the scheme
2865
 * as:
2866
 * |[
2867
 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
2868
 * ]|
2869
 * Common schemes include `file`, `https`, `svn+ssh`, etc.
2870
 *
2871
 * Unlike g_uri_parse_scheme(), the returned scheme is normalized to
2872
 * all-lowercase and does not need to be freed.
2873
 *
2874
 * Returns: (transfer none) (nullable): The ‘scheme’ component of the URI, or
2875
 *     %NULL on error. The returned string is normalized to all-lowercase, and
2876
 *     interned via g_intern_string(), so it does not need to be freed.
2877
 *
2878
 * Since: 2.66
2879
 **/
2880
const gchar *
2881
g_uri_peek_scheme (const gchar *uri)
2882
0
{
2883
0
  gssize len;
2884
0
  gchar *lower_scheme;
2885
0
  const gchar *scheme;
2886
2887
0
  g_return_val_if_fail (uri != NULL, NULL);
2888
2889
0
  len = g_uri_scheme_length (uri);
2890
0
  if (len == -1)
2891
0
    return NULL;
2892
2893
0
  lower_scheme = g_ascii_strdown (uri, len);
2894
0
  scheme = g_intern_string (lower_scheme);
2895
0
  g_free (lower_scheme);
2896
2897
0
  return scheme;
2898
0
}
2899
2900
G_DEFINE_QUARK (g-uri-quark, g_uri_error)