Coverage Report

Created: 2025-10-10 07:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/glib/glib/guri.c
Line
Count
Source
1
/* GLIB - Library of useful routines for C programming
2
 * Copyright © 2020 Red Hat, Inc.
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General
15
 * Public License along with this library; if not, see
16
 * <http://www.gnu.org/licenses/>.
17
 */
18
19
#include "config.h"
20
21
#include <stdlib.h>
22
#include <string.h>
23
24
#include "glib.h"
25
#include "glibintl.h"
26
#include "guriprivate.h"
27
28
/**
29
 * SECTION:guri
30
 * @short_description: URI-handling utilities
31
 * @include: glib.h
32
 *
33
 * The #GUri type and related functions can be used to parse URIs into
34
 * their components, and build valid URIs from individual components.
35
 *
36
 * Note that #GUri scope is to help manipulate URIs in various applications,
37
 * following [RFC 3986](https://tools.ietf.org/html/rfc3986). In particular,
38
 * it doesn't intend to cover web browser needs, and doesn't implement the
39
 * [WHATWG URL](https://url.spec.whatwg.org/) standard. No APIs are provided to
40
 * help prevent
41
 * [homograph attacks](https://en.wikipedia.org/wiki/IDN_homograph_attack), so
42
 * #GUri is not suitable for formatting URIs for display to the user for making
43
 * security-sensitive decisions.
44
 *
45
 * ## Relative and absolute URIs # {#relative-absolute-uris}
46
 *
47
 * As defined in [RFC 3986](https://tools.ietf.org/html/rfc3986#section-4), the
48
 * hierarchical nature of URIs means that they can either be ‘relative
49
 * references’ (sometimes referred to as ‘relative URIs’) or ‘URIs’ (for
50
 * clarity, ‘URIs’ are referred to in this documentation as
51
 * ‘absolute URIs’ — although
52
 * [in constrast to RFC 3986](https://tools.ietf.org/html/rfc3986#section-4.3),
53
 * fragment identifiers are always allowed).
54
 *
55
 * Relative references have one or more components of the URI missing. In
56
 * particular, they have no scheme. Any other component, such as hostname,
57
 * query, etc. may be missing, apart from a path, which has to be specified (but
58
 * may be empty). The path may be relative, starting with `./` rather than `/`.
59
 *
60
 * For example, a valid relative reference is `./path?query`,
61
 * `/?query#fragment` or `//example.com`.
62
 *
63
 * Absolute URIs have a scheme specified. Any other components of the URI which
64
 * are missing are specified as explicitly unset in the URI, rather than being
65
 * resolved relative to a base URI using g_uri_parse_relative().
66
 *
67
 * For example, a valid absolute URI is `file:///home/bob` or
68
 * `https://search.com?query=string`.
69
 *
70
 * A #GUri instance is always an absolute URI. A string may be an absolute URI
71
 * or a relative reference; see the documentation for individual functions as to
72
 * what forms they accept.
73
 *
74
 * ## Parsing URIs
75
 *
76
 * The most minimalist APIs for parsing URIs are g_uri_split() and
77
 * g_uri_split_with_user(). These split a URI into its component
78
 * parts, and return the parts; the difference between the two is that
79
 * g_uri_split() treats the ‘userinfo’ component of the URI as a
80
 * single element, while g_uri_split_with_user() can (depending on the
81
 * #GUriFlags you pass) treat it as containing a username, password,
82
 * and authentication parameters. Alternatively, g_uri_split_network()
83
 * can be used when you are only interested in the components that are
84
 * needed to initiate a network connection to the service (scheme,
85
 * host, and port).
86
 *
87
 * g_uri_parse() is similar to g_uri_split(), but instead of returning
88
 * individual strings, it returns a #GUri structure (and it requires
89
 * that the URI be an absolute URI).
90
 *
91
 * g_uri_resolve_relative() and g_uri_parse_relative() allow you to
92
 * resolve a relative URI relative to a base URI.
93
 * g_uri_resolve_relative() takes two strings and returns a string,
94
 * and g_uri_parse_relative() takes a #GUri and a string and returns a
95
 * #GUri.
96
 *
97
 * All of the parsing functions take a #GUriFlags argument describing
98
 * exactly how to parse the URI; see the documentation for that type
99
 * for more details on the specific flags that you can pass. If you
100
 * need to choose different flags based on the type of URI, you can
101
 * use g_uri_peek_scheme() on the URI string to check the scheme
102
 * first, and use that to decide what flags to parse it with.
103
 *
104
 * For example, you might want to use %G_URI_PARAMS_WWW_FORM when parsing the
105
 * params for a web URI, so compare the result of g_uri_peek_scheme() against
106
 * `http` and `https`.
107
 *
108
 * ## Building URIs
109
 *
110
 * g_uri_join() and g_uri_join_with_user() can be used to construct
111
 * valid URI strings from a set of component strings. They are the
112
 * inverse of g_uri_split() and g_uri_split_with_user().
113
 *
114
 * Similarly, g_uri_build() and g_uri_build_with_user() can be used to
115
 * construct a #GUri from a set of component strings.
116
 *
117
 * As with the parsing functions, the building functions take a
118
 * #GUriFlags argument. In particular, it is important to keep in mind
119
 * whether the URI components you are using are already `%`-encoded. If so,
120
 * you must pass the %G_URI_FLAGS_ENCODED flag.
121
 *
122
 * ## `file://` URIs
123
 *
124
 * Note that Windows and Unix both define special rules for parsing
125
 * `file://` URIs (involving non-UTF-8 character sets on Unix, and the
126
 * interpretation of path separators on Windows). #GUri does not
127
 * implement these rules. Use g_filename_from_uri() and
128
 * g_filename_to_uri() if you want to properly convert between
129
 * `file://` URIs and local filenames.
130
 *
131
 * ## URI Equality
132
 *
133
 * Note that there is no `g_uri_equal ()` function, because comparing
134
 * URIs usefully requires scheme-specific knowledge that #GUri does
135
 * not have. #GUri can help with normalization if you use the various
136
 * encoded #GUriFlags as well as %G_URI_FLAGS_SCHEME_NORMALIZE however
137
 * it is not comprehensive.
138
 * For example, `data:,foo` and `data:;base64,Zm9v` resolve to the same
139
 * thing according to the `data:` URI specification which GLib does not
140
 * handle.
141
 *
142
 * Since: 2.66
143
 */
144
145
/**
146
 * GUri:
147
 *
148
 * A parsed absolute URI.
149
 *
150
 * Since #GUri only represents absolute URIs, all #GUris will have a
151
 * URI scheme, so g_uri_get_scheme() will always return a non-%NULL
152
 * answer. Likewise, by definition, all URIs have a path component, so
153
 * g_uri_get_path() will always return a non-%NULL string (which may be empty).
154
 *
155
 * If the URI string has an
156
 * [‘authority’ component](https://tools.ietf.org/html/rfc3986#section-3) (that
157
 * is, if the scheme is followed by `://` rather than just `:`), then the
158
 * #GUri will contain a hostname, and possibly a port and ‘userinfo’.
159
 * Additionally, depending on how the #GUri was constructed/parsed (for example,
160
 * using the %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS flags),
161
 * the userinfo may be split out into a username, password, and
162
 * additional authorization-related parameters.
163
 *
164
 * Normally, the components of a #GUri will have all `%`-encoded
165
 * characters decoded. However, if you construct/parse a #GUri with
166
 * %G_URI_FLAGS_ENCODED, then the `%`-encoding will be preserved instead in
167
 * the userinfo, path, and query fields (and in the host field if also
168
 * created with %G_URI_FLAGS_NON_DNS). In particular, this is necessary if
169
 * the URI may contain binary data or non-UTF-8 text, or if decoding
170
 * the components might change the interpretation of the URI.
171
 *
172
 * For example, with the encoded flag:
173
 *
174
 * |[<!-- language="C" -->
175
 *   g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_ENCODED, &err);
176
 *   g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue");
177
 * ]|
178
 *
179
 * While the default `%`-decoding behaviour would give:
180
 *
181
 * |[<!-- language="C" -->
182
 *   g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_NONE, &err);
183
 *   g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http://host/path?param=value");
184
 * ]|
185
 *
186
 * During decoding, if an invalid UTF-8 string is encountered, parsing will fail
187
 * with an error indicating the bad string location:
188
 *
189
 * |[<!-- language="C" -->
190
 *   g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fbad%3D%00alue", G_URI_FLAGS_NONE, &err);
191
 *   g_assert_error (err, G_URI_ERROR, G_URI_ERROR_BAD_QUERY);
192
 * ]|
193
 *
194
 * You should pass %G_URI_FLAGS_ENCODED or %G_URI_FLAGS_ENCODED_QUERY if you
195
 * need to handle that case manually. In particular, if the query string
196
 * contains `=` characters that are `%`-encoded, you should let
197
 * g_uri_parse_params() do the decoding once of the query.
198
 *
199
 * #GUri is immutable once constructed, and can safely be accessed from
200
 * multiple threads. Its reference counting is atomic.
201
 *
202
 * Since: 2.66
203
 */
204
struct _GUri {
205
  gchar     *scheme;
206
  gchar     *userinfo;
207
  gchar     *host;
208
  gint       port;
209
  gchar     *path;
210
  gchar     *query;
211
  gchar     *fragment;
212
213
  gchar     *user;
214
  gchar     *password;
215
  gchar     *auth_params;
216
217
  GUriFlags  flags;
218
};
219
220
/**
221
 * g_uri_ref: (skip)
222
 * @uri: a #GUri
223
 *
224
 * Increments the reference count of @uri by one.
225
 *
226
 * Returns: @uri
227
 *
228
 * Since: 2.66
229
 */
230
GUri *
231
g_uri_ref (GUri *uri)
232
0
{
233
0
  g_return_val_if_fail (uri != NULL, NULL);
234
235
0
  return g_atomic_rc_box_acquire (uri);
236
0
}
237
238
static void
239
g_uri_clear (GUri *uri)
240
0
{
241
0
  g_free (uri->scheme);
242
0
  g_free (uri->userinfo);
243
0
  g_free (uri->host);
244
0
  g_free (uri->path);
245
0
  g_free (uri->query);
246
0
  g_free (uri->fragment);
247
0
  g_free (uri->user);
248
0
  g_free (uri->password);
249
0
  g_free (uri->auth_params);
250
0
}
251
252
/**
253
 * g_uri_unref: (skip)
254
 * @uri: a #GUri
255
 *
256
 * Atomically decrements the reference count of @uri by one.
257
 *
258
 * When the reference count reaches zero, the resources allocated by
259
 * @uri are freed
260
 *
261
 * Since: 2.66
262
 */
263
void
264
g_uri_unref (GUri *uri)
265
0
{
266
0
  g_return_if_fail (uri != NULL);
267
268
0
  g_atomic_rc_box_release_full (uri, (GDestroyNotify)g_uri_clear);
269
0
}
270
271
static gboolean
272
g_uri_char_is_unreserved (gchar ch)
273
0
{
274
0
  if (g_ascii_isalnum (ch))
275
0
    return TRUE;
276
0
  return ch == '-' || ch == '.' || ch == '_' || ch == '~';
277
0
}
278
279
0
#define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
280
0
#define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
281
282
static gssize
283
uri_decoder (gchar       **out,
284
             const gchar  *illegal_chars,
285
             const gchar  *start,
286
             gsize         length,
287
             gboolean      just_normalize,
288
             gboolean      www_form,
289
             GUriFlags     flags,
290
             GUriError     parse_error,
291
             GError      **error)
292
0
{
293
0
  gchar c;
294
0
  GString *decoded;
295
0
  const gchar *invalid, *s, *end;
296
0
  gssize len;
297
298
0
  if (!(flags & G_URI_FLAGS_ENCODED))
299
0
    just_normalize = FALSE;
300
301
0
  decoded = g_string_sized_new (length + 1);
302
0
  for (s = start, end = s + length; s < end; s++)
303
0
    {
304
0
      if (*s == '%')
305
0
        {
306
0
          if (s + 2 >= end ||
307
0
              !g_ascii_isxdigit (s[1]) ||
308
0
              !g_ascii_isxdigit (s[2]))
309
0
            {
310
              /* % followed by non-hex or the end of the string; this is an error */
311
0
              if (!(flags & G_URI_FLAGS_PARSE_RELAXED))
312
0
                {
313
0
                  g_set_error_literal (error, G_URI_ERROR, parse_error,
314
                                       /* xgettext: no-c-format */
315
0
                                       _("Invalid %-encoding in URI"));
316
0
                  g_string_free (decoded, TRUE);
317
0
                  return -1;
318
0
                }
319
320
              /* In non-strict mode, just let it through; we *don't*
321
               * fix it to "%25", since that might change the way that
322
               * the URI's owner would interpret it.
323
               */
324
0
              g_string_append_c (decoded, *s);
325
0
              continue;
326
0
            }
327
328
0
          c = HEXCHAR (s);
329
0
          if (illegal_chars && strchr (illegal_chars, c))
330
0
            {
331
0
              g_set_error_literal (error, G_URI_ERROR, parse_error,
332
0
                                   _("Illegal character in URI"));
333
0
              g_string_free (decoded, TRUE);
334
0
              return -1;
335
0
            }
336
0
          if (just_normalize && !g_uri_char_is_unreserved (c))
337
0
            {
338
              /* Leave the % sequence there but normalize it. */
339
0
              g_string_append_c (decoded, *s);
340
0
              g_string_append_c (decoded, g_ascii_toupper (s[1]));
341
0
              g_string_append_c (decoded, g_ascii_toupper (s[2]));
342
0
              s += 2;
343
0
            }
344
0
          else
345
0
            {
346
0
              g_string_append_c (decoded, c);
347
0
              s += 2;
348
0
            }
349
0
        }
350
0
      else if (www_form && *s == '+')
351
0
        g_string_append_c (decoded, ' ');
352
      /* Normalize any illegal characters. */
353
0
      else if (just_normalize && (!g_ascii_isgraph (*s)))
354
0
        g_string_append_printf (decoded, "%%%02X", (guchar)*s);
355
0
      else
356
0
        g_string_append_c (decoded, *s);
357
0
    }
358
359
0
  len = decoded->len;
360
0
  g_assert (len >= 0);
361
362
0
  if (!(flags & G_URI_FLAGS_ENCODED) &&
363
0
      !g_utf8_validate (decoded->str, len, &invalid))
364
0
    {
365
0
      g_set_error_literal (error, G_URI_ERROR, parse_error,
366
0
                           _("Non-UTF-8 characters in URI"));
367
0
      g_string_free (decoded, TRUE);
368
0
      return -1;
369
0
    }
370
371
0
  if (out)
372
0
    *out = g_string_free (decoded, FALSE);
373
0
  else
374
0
    g_string_free (decoded, TRUE);
375
376
0
  return len;
377
0
}
378
379
static gboolean
380
uri_decode (gchar       **out,
381
            const gchar  *illegal_chars,
382
            const gchar  *start,
383
            gsize         length,
384
            gboolean      www_form,
385
            GUriFlags     flags,
386
            GUriError     parse_error,
387
            GError      **error)
388
0
{
389
0
  return uri_decoder (out, illegal_chars, start, length, FALSE, www_form, flags,
390
0
                      parse_error, error) != -1;
391
0
}
392
393
static gboolean
394
uri_normalize (gchar       **out,
395
               const gchar  *start,
396
               gsize         length,
397
               GUriFlags     flags,
398
               GUriError     parse_error,
399
               GError      **error)
400
0
{
401
0
  return uri_decoder (out, NULL, start, length, TRUE, FALSE, flags,
402
0
                      parse_error, error) != -1;
403
0
}
404
405
static gboolean
406
is_valid (guchar       c,
407
          const gchar *reserved_chars_allowed)
408
0
{
409
0
  if (g_uri_char_is_unreserved (c))
410
0
    return TRUE;
411
412
0
  if (reserved_chars_allowed && strchr (reserved_chars_allowed, c))
413
0
    return TRUE;
414
415
0
  return FALSE;
416
0
}
417
418
void
419
_uri_encoder (GString      *out,
420
              const guchar *start,
421
              gsize         length,
422
              const gchar  *reserved_chars_allowed,
423
              gboolean      allow_utf8)
424
0
{
425
0
  static const gchar hex[16] = "0123456789ABCDEF";
426
0
  const guchar *p = start;
427
0
  const guchar *end = p + length;
428
429
0
  while (p < end)
430
0
    {
431
0
      gunichar multibyte_utf8_char = 0;
432
433
0
      if (allow_utf8 && *p >= 0x80)
434
0
        multibyte_utf8_char = g_utf8_get_char_validated ((gchar *)p, end - p);
435
436
0
      if (multibyte_utf8_char > 0 &&
437
0
          multibyte_utf8_char != (gunichar) -1 && multibyte_utf8_char != (gunichar) -2)
438
0
        {
439
0
          gint len = g_utf8_skip [*p];
440
0
          g_string_append_len (out, (gchar *)p, len);
441
0
          p += len;
442
0
        }
443
0
      else if (is_valid (*p, reserved_chars_allowed))
444
0
        {
445
0
          g_string_append_c (out, *p);
446
0
          p++;
447
0
        }
448
0
      else
449
0
        {
450
0
          g_string_append_c (out, '%');
451
0
          g_string_append_c (out, hex[*p >> 4]);
452
0
          g_string_append_c (out, hex[*p & 0xf]);
453
0
          p++;
454
0
        }
455
0
    }
456
0
}
457
458
/* Parse the IP-literal construction from RFC 6874 (which extends RFC 3986 to
459
 * support IPv6 zone identifiers.
460
 *
461
 * Currently, IP versions beyond 6 (i.e. the IPvFuture rule) are unsupported.
462
 * There’s no point supporting them until (a) they exist and (b) the rest of the
463
 * stack (notably, sockets) supports them.
464
 *
465
 * Rules:
466
 *
467
 * IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture  ) "]"
468
 *
469
 * ZoneID = 1*( unreserved / pct-encoded )
470
 *
471
 * IPv6addrz = IPv6address "%25" ZoneID
472
 *
473
 * If %G_URI_FLAGS_PARSE_RELAXED is specified, this function also accepts:
474
 *
475
 * IPv6addrz = IPv6address "%" ZoneID
476
 */
477
static gboolean
478
parse_ip_literal (const gchar  *start,
479
                  gsize         length,
480
                  GUriFlags     flags,
481
                  gchar       **out,
482
                  GError      **error)
483
0
{
484
0
  gchar *pct, *zone_id = NULL;
485
0
  gchar *addr = NULL;
486
0
  gsize addr_length = 0;
487
0
  gsize zone_id_length = 0;
488
0
  gchar *decoded_zone_id = NULL;
489
490
0
  if (start[length - 1] != ']')
491
0
    goto bad_ipv6_literal;
492
493
  /* Drop the square brackets */
494
0
  addr = g_strndup (start + 1, length - 2);
495
0
  addr_length = length - 2;
496
497
  /* If there's an IPv6 scope ID, split out the zone. */
498
0
  pct = strchr (addr, '%');
499
0
  if (pct != NULL)
500
0
    {
501
0
      *pct = '\0';
502
503
0
      if (addr_length - (pct - addr) >= 4 &&
504
0
          *(pct + 1) == '2' && *(pct + 2) == '5')
505
0
        {
506
0
          zone_id = pct + 3;
507
0
          zone_id_length = addr_length - (zone_id - addr);
508
0
        }
509
0
      else if (flags & G_URI_FLAGS_PARSE_RELAXED &&
510
0
               addr_length - (pct - addr) >= 2)
511
0
        {
512
0
          zone_id = pct + 1;
513
0
          zone_id_length = addr_length - (zone_id - addr);
514
0
        }
515
0
      else
516
0
        goto bad_ipv6_literal;
517
518
0
      g_assert (zone_id_length >= 1);
519
0
    }
520
521
  /* addr must be an IPv6 address */
522
0
  if (!g_hostname_is_ip_address (addr) || !strchr (addr, ':'))
523
0
    goto bad_ipv6_literal;
524
525
  /* Zone ID must be valid. It can contain %-encoded characters. */
526
0
  if (zone_id != NULL &&
527
0
      !uri_decode (&decoded_zone_id, NULL, zone_id, zone_id_length, FALSE,
528
0
                   flags, G_URI_ERROR_BAD_HOST, NULL))
529
0
    goto bad_ipv6_literal;
530
531
  /* Success */
532
0
  if (out != NULL && decoded_zone_id != NULL)
533
0
    *out = g_strconcat (addr, "%", decoded_zone_id, NULL);
534
0
  else if (out != NULL)
535
0
    *out = g_steal_pointer (&addr);
536
537
0
  g_free (addr);
538
0
  g_free (decoded_zone_id);
539
540
0
  return TRUE;
541
542
0
bad_ipv6_literal:
543
0
  g_free (addr);
544
0
  g_free (decoded_zone_id);
545
0
  g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
546
0
               _("Invalid IPv6 address ‘%.*s’ in URI"),
547
0
               (gint)length, start);
548
549
0
  return FALSE;
550
0
}
551
552
static gboolean
553
parse_host (const gchar  *start,
554
            gsize         length,
555
            GUriFlags     flags,
556
            gchar       **out,
557
            GError      **error)
558
0
{
559
0
  gchar *decoded = NULL, *host;
560
0
  gchar *addr = NULL;
561
562
0
  if (*start == '[')
563
0
    {
564
0
      if (!parse_ip_literal (start, length, flags, &host, error))
565
0
        return FALSE;
566
0
      goto ok;
567
0
    }
568
569
0
  if (g_ascii_isdigit (*start))
570
0
    {
571
0
      addr = g_strndup (start, length);
572
0
      if (g_hostname_is_ip_address (addr))
573
0
        {
574
0
          host = addr;
575
0
          goto ok;
576
0
        }
577
0
      g_free (addr);
578
0
    }
579
580
0
  if (flags & G_URI_FLAGS_NON_DNS)
581
0
    {
582
0
      if (!uri_normalize (&decoded, start, length, flags,
583
0
                          G_URI_ERROR_BAD_HOST, error))
584
0
        return FALSE;
585
0
      host = g_steal_pointer (&decoded);
586
0
      goto ok;
587
0
    }
588
589
0
  flags &= ~G_URI_FLAGS_ENCODED;
590
0
  if (!uri_decode (&decoded, NULL, start, length, FALSE, flags,
591
0
                   G_URI_ERROR_BAD_HOST, error))
592
0
    return FALSE;
593
594
  /* You're not allowed to %-encode an IP address, so if it wasn't
595
   * one before, it better not be one now.
596
   */
597
0
  if (g_hostname_is_ip_address (decoded))
598
0
    {
599
0
      g_free (decoded);
600
0
      g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
601
0
                   _("Illegal encoded IP address ‘%.*s’ in URI"),
602
0
                   (gint)length, start);
603
0
      return FALSE;
604
0
    }
605
606
0
  if (g_hostname_is_non_ascii (decoded))
607
0
    {
608
0
      host = g_hostname_to_ascii (decoded);
609
0
      if (host == NULL)
610
0
        {
611
0
          g_free (decoded);
612
0
          g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
613
0
                       _("Illegal internationalized hostname ‘%.*s’ in URI"),
614
0
                       (gint) length, start);
615
0
          return FALSE;
616
0
        }
617
0
    }
618
0
  else
619
0
    {
620
0
      host = g_steal_pointer (&decoded);
621
0
    }
622
623
0
 ok:
624
0
  if (out)
625
0
    *out = g_steal_pointer (&host);
626
0
  g_free (host);
627
0
  g_free (decoded);
628
629
0
  return TRUE;
630
0
}
631
632
static gboolean
633
parse_port (const gchar  *start,
634
            gsize         length,
635
            gint         *out,
636
            GError      **error)
637
0
{
638
0
  gchar *end;
639
0
  gulong parsed_port;
640
641
  /* strtoul() allows leading + or -, so we have to check this first. */
642
0
  if (!g_ascii_isdigit (*start))
643
0
    {
644
0
      g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
645
0
                   _("Could not parse port ‘%.*s’ in URI"),
646
0
                   (gint)length, start);
647
0
      return FALSE;
648
0
    }
649
650
  /* We know that *(start + length) is either '\0' or a non-numeric
651
   * character, so strtoul() won't scan beyond it.
652
   */
653
0
  parsed_port = strtoul (start, &end, 10);
654
0
  if (end != start + length)
655
0
    {
656
0
      g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
657
0
                   _("Could not parse port ‘%.*s’ in URI"),
658
0
                   (gint)length, start);
659
0
      return FALSE;
660
0
    }
661
0
  else if (parsed_port > 65535)
662
0
    {
663
0
      g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
664
0
                   _("Port ‘%.*s’ in URI is out of range"),
665
0
                   (gint)length, start);
666
0
      return FALSE;
667
0
    }
668
669
0
  if (out)
670
0
    *out = parsed_port;
671
0
  return TRUE;
672
0
}
673
674
static gboolean
675
parse_userinfo (const gchar  *start,
676
                gsize         length,
677
                GUriFlags     flags,
678
                gchar       **user,
679
                gchar       **password,
680
                gchar       **auth_params,
681
                GError      **error)
682
0
{
683
0
  const gchar *user_end = NULL, *password_end = NULL, *auth_params_end;
684
685
0
  auth_params_end = start + length;
686
0
  if (flags & G_URI_FLAGS_HAS_AUTH_PARAMS)
687
0
    password_end = memchr (start, ';', auth_params_end - start);
688
0
  if (!password_end)
689
0
    password_end = auth_params_end;
690
0
  if (flags & G_URI_FLAGS_HAS_PASSWORD)
691
0
    user_end = memchr (start, ':', password_end - start);
692
0
  if (!user_end)
693
0
    user_end = password_end;
694
695
0
  if (!uri_normalize (user, start, user_end - start, flags,
696
0
                      G_URI_ERROR_BAD_USER, error))
697
0
    return FALSE;
698
699
0
  if (*user_end == ':')
700
0
    {
701
0
      start = user_end + 1;
702
0
      if (!uri_normalize (password, start, password_end - start, flags,
703
0
                          G_URI_ERROR_BAD_PASSWORD, error))
704
0
        {
705
0
          if (user)
706
0
            g_clear_pointer (user, g_free);
707
0
          return FALSE;
708
0
        }
709
0
    }
710
0
  else if (password)
711
0
    *password = NULL;
712
713
0
  if (*password_end == ';')
714
0
    {
715
0
      start = password_end + 1;
716
0
      if (!uri_normalize (auth_params, start, auth_params_end - start, flags,
717
0
                          G_URI_ERROR_BAD_AUTH_PARAMS, error))
718
0
        {
719
0
          if (user)
720
0
            g_clear_pointer (user, g_free);
721
0
          if (password)
722
0
            g_clear_pointer (password, g_free);
723
0
          return FALSE;
724
0
        }
725
0
    }
726
0
  else if (auth_params)
727
0
    *auth_params = NULL;
728
729
0
  return TRUE;
730
0
}
731
732
static gchar *
733
uri_cleanup (const gchar *uri_string)
734
0
{
735
0
  GString *copy;
736
0
  const gchar *end;
737
738
  /* Skip leading whitespace */
739
0
  while (g_ascii_isspace (*uri_string))
740
0
    uri_string++;
741
742
  /* Ignore trailing whitespace */
743
0
  end = uri_string + strlen (uri_string);
744
0
  while (end > uri_string && g_ascii_isspace (*(end - 1)))
745
0
    end--;
746
747
  /* Copy the rest, encoding unencoded spaces and stripping other whitespace */
748
0
  copy = g_string_sized_new (end - uri_string);
749
0
  while (uri_string < end)
750
0
    {
751
0
      if (*uri_string == ' ')
752
0
        g_string_append (copy, "%20");
753
0
      else if (g_ascii_isspace (*uri_string))
754
0
        ;
755
0
      else
756
0
        g_string_append_c (copy, *uri_string);
757
0
      uri_string++;
758
0
    }
759
760
0
  return g_string_free (copy, FALSE);
761
0
}
762
763
static gboolean
764
should_normalize_empty_path (const char *scheme)
765
0
{
766
0
  const char * const schemes[] = { "https", "http", "wss", "ws" };
767
0
  gsize i;
768
0
  for (i = 0; i < G_N_ELEMENTS (schemes); ++i)
769
0
    {
770
0
      if (!strcmp (schemes[i], scheme))
771
0
        return TRUE;
772
0
    }
773
0
  return FALSE;
774
0
}
775
776
static int
777
normalize_port (const char *scheme,
778
                int         port)
779
0
{
780
0
  const char *default_schemes[3] = { NULL };
781
0
  int i;
782
783
0
  switch (port)
784
0
    {
785
0
    case 21:
786
0
      default_schemes[0] = "ftp";
787
0
      break;
788
0
    case 80:
789
0
      default_schemes[0] = "http";
790
0
      default_schemes[1] = "ws";
791
0
      break;
792
0
    case 443:
793
0
      default_schemes[0] = "https";
794
0
      default_schemes[1] = "wss";
795
0
      break;
796
0
    default:
797
0
      break;
798
0
    }
799
800
0
  for (i = 0; default_schemes[i]; ++i)
801
0
    {
802
0
      if (!strcmp (scheme, default_schemes[i]))
803
0
        return -1;
804
0
    }
805
806
0
  return port;
807
0
}
808
809
static int
810
default_scheme_port (const char *scheme)
811
0
{
812
0
  if (strcmp (scheme, "http") == 0 || strcmp (scheme, "ws") == 0)
813
0
    return 80;
814
815
0
  if (strcmp (scheme, "https") == 0 || strcmp (scheme, "wss") == 0)
816
0
    return 443;
817
818
0
  if (strcmp (scheme, "ftp") == 0)
819
0
    return 21;
820
821
0
  return -1;
822
0
}
823
824
static gboolean
825
g_uri_split_internal (const gchar  *uri_string,
826
                      GUriFlags     flags,
827
                      gchar       **scheme,
828
                      gchar       **userinfo,
829
                      gchar       **user,
830
                      gchar       **password,
831
                      gchar       **auth_params,
832
                      gchar       **host,
833
                      gint         *port,
834
                      gchar       **path,
835
                      gchar       **query,
836
                      gchar       **fragment,
837
                      GError      **error)
838
0
{
839
0
  const gchar *end, *colon, *at, *path_start, *semi, *question;
840
0
  const gchar *p, *bracket, *hostend;
841
0
  gchar *cleaned_uri_string = NULL;
842
0
  gchar *normalized_scheme = NULL;
843
844
0
  if (scheme)
845
0
    *scheme = NULL;
846
0
  if (userinfo)
847
0
    *userinfo = NULL;
848
0
  if (user)
849
0
    *user = NULL;
850
0
  if (password)
851
0
    *password = NULL;
852
0
  if (auth_params)
853
0
    *auth_params = NULL;
854
0
  if (host)
855
0
    *host = NULL;
856
0
  if (port)
857
0
    *port = -1;
858
0
  if (path)
859
0
    *path = NULL;
860
0
  if (query)
861
0
    *query = NULL;
862
0
  if (fragment)
863
0
    *fragment = NULL;
864
865
0
  if ((flags & G_URI_FLAGS_PARSE_RELAXED) && strpbrk (uri_string, " \t\n\r"))
866
0
    {
867
0
      cleaned_uri_string = uri_cleanup (uri_string);
868
0
      uri_string = cleaned_uri_string;
869
0
    }
870
871
  /* Find scheme */
872
0
  p = uri_string;
873
0
  while (*p && (g_ascii_isalpha (*p) ||
874
0
               (p > uri_string && (g_ascii_isdigit (*p) ||
875
0
                                   *p == '.' || *p == '+' || *p == '-'))))
876
0
    p++;
877
878
0
  if (p > uri_string && *p == ':')
879
0
    {
880
0
      normalized_scheme = g_ascii_strdown (uri_string, p - uri_string);
881
0
      if (scheme)
882
0
        *scheme = g_steal_pointer (&normalized_scheme);
883
0
      p++;
884
0
    }
885
0
  else
886
0
    {
887
0
      if (scheme)
888
0
        *scheme = NULL;
889
0
      p = uri_string;
890
0
    }
891
892
  /* Check for authority */
893
0
  if (strncmp (p, "//", 2) == 0)
894
0
    {
895
0
      p += 2;
896
897
0
      path_start = p + strcspn (p, "/?#");
898
0
      at = memchr (p, '@', path_start - p);
899
0
      if (at)
900
0
        {
901
0
          if (flags & G_URI_FLAGS_PARSE_RELAXED)
902
0
            {
903
0
              gchar *next_at;
904
905
              /* Any "@"s in the userinfo must be %-encoded, but
906
               * people get this wrong sometimes. Since "@"s in the
907
               * hostname are unlikely (and also wrong anyway), assume
908
               * that if there are extra "@"s, they belong in the
909
               * userinfo.
910
               */
911
0
              do
912
0
                {
913
0
                  next_at = memchr (at + 1, '@', path_start - (at + 1));
914
0
                  if (next_at)
915
0
                    at = next_at;
916
0
                }
917
0
              while (next_at);
918
0
            }
919
920
0
          if (user || password || auth_params ||
921
0
              (flags & (G_URI_FLAGS_HAS_PASSWORD|G_URI_FLAGS_HAS_AUTH_PARAMS)))
922
0
            {
923
0
              if (!parse_userinfo (p, at - p, flags,
924
0
                                   user, password, auth_params,
925
0
                                   error))
926
0
                goto fail;
927
0
            }
928
929
0
          if (!uri_normalize (userinfo, p, at - p, flags,
930
0
                              G_URI_ERROR_BAD_USER, error))
931
0
            goto fail;
932
933
0
          p = at + 1;
934
0
        }
935
936
0
      if (flags & G_URI_FLAGS_PARSE_RELAXED)
937
0
        {
938
0
          semi = strchr (p, ';');
939
0
          if (semi && semi < path_start)
940
0
            {
941
              /* Technically, semicolons are allowed in the "host"
942
               * production, but no one ever does this, and some
943
               * schemes mistakenly use semicolon as a delimiter
944
               * marking the start of the path. We have to check this
945
               * after checking for userinfo though, because a
946
               * semicolon before the "@" must be part of the
947
               * userinfo.
948
               */
949
0
              path_start = semi;
950
0
            }
951
0
        }
952
953
      /* Find host and port. The host may be a bracket-delimited IPv6
954
       * address, in which case the colon delimiting the port must come
955
       * (immediately) after the close bracket.
956
       */
957
0
      if (*p == '[')
958
0
        {
959
0
          bracket = memchr (p, ']', path_start - p);
960
0
          if (bracket && *(bracket + 1) == ':')
961
0
            colon = bracket + 1;
962
0
          else
963
0
            colon = NULL;
964
0
        }
965
0
      else
966
0
        colon = memchr (p, ':', path_start - p);
967
968
0
      hostend = colon ? colon : path_start;
969
0
      if (!parse_host (p, hostend - p, flags, host, error))
970
0
        goto fail;
971
972
0
      if (colon && colon != path_start - 1)
973
0
        {
974
0
          p = colon + 1;
975
0
          if (!parse_port (p, path_start - p, port, error))
976
0
            goto fail;
977
0
        }
978
979
0
      p = path_start;
980
0
    }
981
982
  /* Find fragment. */
983
0
  end = p + strcspn (p, "#");
984
0
  if (*end == '#')
985
0
    {
986
0
      if (!uri_normalize (fragment, end + 1, strlen (end + 1),
987
0
                          flags | (flags & G_URI_FLAGS_ENCODED_FRAGMENT ? G_URI_FLAGS_ENCODED : 0),
988
0
                          G_URI_ERROR_BAD_FRAGMENT, error))
989
0
        goto fail;
990
0
    }
991
992
  /* Find query */
993
0
  question = memchr (p, '?', end - p);
994
0
  if (question)
995
0
    {
996
0
      if (!uri_normalize (query, question + 1, end - (question + 1),
997
0
                          flags | (flags & G_URI_FLAGS_ENCODED_QUERY ? G_URI_FLAGS_ENCODED : 0),
998
0
                          G_URI_ERROR_BAD_QUERY, error))
999
0
        goto fail;
1000
0
      end = question;
1001
0
    }
1002
1003
0
  if (!uri_normalize (path, p, end - p,
1004
0
                      flags | (flags & G_URI_FLAGS_ENCODED_PATH ? G_URI_FLAGS_ENCODED : 0),
1005
0
                      G_URI_ERROR_BAD_PATH, error))
1006
0
    goto fail;
1007
1008
  /* Scheme-based normalization */
1009
0
  if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && ((scheme && *scheme) || normalized_scheme))
1010
0
    {
1011
0
      const char *scheme_str = scheme && *scheme ? *scheme : normalized_scheme;
1012
1013
0
      if (should_normalize_empty_path (scheme_str) && path && !**path)
1014
0
        {
1015
0
          g_free (*path);
1016
0
          *path = g_strdup ("/");
1017
0
        }
1018
1019
0
      if (port && *port == -1)
1020
0
        *port = default_scheme_port (scheme_str);
1021
0
    }
1022
1023
0
  g_free (normalized_scheme);
1024
0
  g_free (cleaned_uri_string);
1025
0
  return TRUE;
1026
1027
0
 fail:
1028
0
  if (scheme)
1029
0
    g_clear_pointer (scheme, g_free);
1030
0
  if (userinfo)
1031
0
    g_clear_pointer (userinfo, g_free);
1032
0
  if (host)
1033
0
    g_clear_pointer (host, g_free);
1034
0
  if (port)
1035
0
    *port = -1;
1036
0
  if (path)
1037
0
    g_clear_pointer (path, g_free);
1038
0
  if (query)
1039
0
    g_clear_pointer (query, g_free);
1040
0
  if (fragment)
1041
0
    g_clear_pointer (fragment, g_free);
1042
1043
0
  g_free (normalized_scheme);
1044
0
  g_free (cleaned_uri_string);
1045
0
  return FALSE;
1046
0
}
1047
1048
/**
1049
 * g_uri_split:
1050
 * @uri_ref: a string containing a relative or absolute URI
1051
 * @flags: flags for parsing @uri_ref
1052
 * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1053
 *    the scheme (converted to lowercase), or %NULL
1054
 * @userinfo: (out) (nullable) (optional) (transfer full): on return, contains
1055
 *    the userinfo, or %NULL
1056
 * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1057
 *    host, or %NULL
1058
 * @port: (out) (optional) (transfer full): on return, contains the
1059
 *    port, or `-1`
1060
 * @path: (out) (not nullable) (optional) (transfer full): on return, contains the
1061
 *    path
1062
 * @query: (out) (nullable) (optional) (transfer full): on return, contains the
1063
 *    query, or %NULL
1064
 * @fragment: (out) (nullable) (optional) (transfer full): on return, contains
1065
 *    the fragment, or %NULL
1066
 * @error: #GError for error reporting, or %NULL to ignore.
1067
 *
1068
 * Parses @uri_ref (which can be an
1069
 * [absolute or relative URI][relative-absolute-uris]) according to @flags, and
1070
 * returns the pieces. Any component that doesn't appear in @uri_ref will be
1071
 * returned as %NULL (but note that all URIs always have a path component,
1072
 * though it may be the empty string).
1073
 *
1074
 * If @flags contains %G_URI_FLAGS_ENCODED, then `%`-encoded characters in
1075
 * @uri_ref will remain encoded in the output strings. (If not,
1076
 * then all such characters will be decoded.) Note that decoding will
1077
 * only work if the URI components are ASCII or UTF-8, so you will
1078
 * need to use %G_URI_FLAGS_ENCODED if they are not.
1079
 *
1080
 * Note that the %G_URI_FLAGS_HAS_PASSWORD and
1081
 * %G_URI_FLAGS_HAS_AUTH_PARAMS @flags are ignored by g_uri_split(),
1082
 * since it always returns only the full userinfo; use
1083
 * g_uri_split_with_user() if you want it split up.
1084
 *
1085
 * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE
1086
 *   on error.
1087
 *
1088
 * Since: 2.66
1089
 */
1090
gboolean
1091
g_uri_split (const gchar  *uri_ref,
1092
             GUriFlags     flags,
1093
             gchar       **scheme,
1094
             gchar       **userinfo,
1095
             gchar       **host,
1096
             gint         *port,
1097
             gchar       **path,
1098
             gchar       **query,
1099
             gchar       **fragment,
1100
             GError      **error)
1101
0
{
1102
0
  g_return_val_if_fail (uri_ref != NULL, FALSE);
1103
0
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1104
1105
0
  return g_uri_split_internal (uri_ref, flags,
1106
0
                               scheme, userinfo, NULL, NULL, NULL,
1107
0
                               host, port, path, query, fragment,
1108
0
                               error);
1109
0
}
1110
1111
/**
1112
 * g_uri_split_with_user:
1113
 * @uri_ref: a string containing a relative or absolute URI
1114
 * @flags: flags for parsing @uri_ref
1115
 * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1116
 *    the scheme (converted to lowercase), or %NULL
1117
 * @user: (out) (nullable) (optional) (transfer full): on return, contains
1118
 *    the user, or %NULL
1119
 * @password: (out) (nullable) (optional) (transfer full): on return, contains
1120
 *    the password, or %NULL
1121
 * @auth_params: (out) (nullable) (optional) (transfer full): on return, contains
1122
 *    the auth_params, or %NULL
1123
 * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1124
 *    host, or %NULL
1125
 * @port: (out) (optional) (transfer full): on return, contains the
1126
 *    port, or `-1`
1127
 * @path: (out) (not nullable) (optional) (transfer full): on return, contains the
1128
 *    path
1129
 * @query: (out) (nullable) (optional) (transfer full): on return, contains the
1130
 *    query, or %NULL
1131
 * @fragment: (out) (nullable) (optional) (transfer full): on return, contains
1132
 *    the fragment, or %NULL
1133
 * @error: #GError for error reporting, or %NULL to ignore.
1134
 *
1135
 * Parses @uri_ref (which can be an
1136
 * [absolute or relative URI][relative-absolute-uris]) according to @flags, and
1137
 * returns the pieces. Any component that doesn't appear in @uri_ref will be
1138
 * returned as %NULL (but note that all URIs always have a path component,
1139
 * though it may be the empty string).
1140
 *
1141
 * See g_uri_split(), and the definition of #GUriFlags, for more
1142
 * information on the effect of @flags. Note that @password will only
1143
 * be parsed out if @flags contains %G_URI_FLAGS_HAS_PASSWORD, and
1144
 * @auth_params will only be parsed out if @flags contains
1145
 * %G_URI_FLAGS_HAS_AUTH_PARAMS.
1146
 *
1147
 * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE
1148
 *   on error.
1149
 *
1150
 * Since: 2.66
1151
 */
1152
gboolean
1153
g_uri_split_with_user (const gchar  *uri_ref,
1154
                       GUriFlags     flags,
1155
                       gchar       **scheme,
1156
                       gchar       **user,
1157
                       gchar       **password,
1158
                       gchar       **auth_params,
1159
                       gchar       **host,
1160
                       gint         *port,
1161
                       gchar       **path,
1162
                       gchar       **query,
1163
                       gchar       **fragment,
1164
                       GError      **error)
1165
0
{
1166
0
  g_return_val_if_fail (uri_ref != NULL, FALSE);
1167
0
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1168
1169
0
  return g_uri_split_internal (uri_ref, flags,
1170
0
                               scheme, NULL, user, password, auth_params,
1171
0
                               host, port, path, query, fragment,
1172
0
                               error);
1173
0
}
1174
1175
1176
/**
1177
 * g_uri_split_network:
1178
 * @uri_string: a string containing an absolute URI
1179
 * @flags: flags for parsing @uri_string
1180
 * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1181
 *    the scheme (converted to lowercase), or %NULL
1182
 * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1183
 *    host, or %NULL
1184
 * @port: (out) (optional) (transfer full): on return, contains the
1185
 *    port, or `-1`
1186
 * @error: #GError for error reporting, or %NULL to ignore.
1187
 *
1188
 * Parses @uri_string (which must be an [absolute URI][relative-absolute-uris])
1189
 * according to @flags, and returns the pieces relevant to connecting to a host.
1190
 * See the documentation for g_uri_split() for more details; this is
1191
 * mostly a wrapper around that function with simpler arguments.
1192
 * However, it will return an error if @uri_string is a relative URI,
1193
 * or does not contain a hostname component.
1194
 *
1195
 * Returns: (skip): %TRUE if @uri_string parsed successfully,
1196
 *   %FALSE on error.
1197
 *
1198
 * Since: 2.66
1199
 */
1200
gboolean
1201
g_uri_split_network (const gchar  *uri_string,
1202
                     GUriFlags     flags,
1203
                     gchar       **scheme,
1204
                     gchar       **host,
1205
                     gint         *port,
1206
                     GError      **error)
1207
0
{
1208
0
  gchar *my_scheme = NULL, *my_host = NULL;
1209
1210
0
  g_return_val_if_fail (uri_string != NULL, FALSE);
1211
0
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1212
1213
0
  if (!g_uri_split_internal (uri_string, flags,
1214
0
                             &my_scheme, NULL, NULL, NULL, NULL,
1215
0
                             &my_host, port, NULL, NULL, NULL,
1216
0
                             error))
1217
0
    return FALSE;
1218
1219
0
  if (!my_scheme || !my_host)
1220
0
    {
1221
0
      if (!my_scheme)
1222
0
        {
1223
0
          g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
1224
0
                       _("URI ‘%s’ is not an absolute URI"),
1225
0
                       uri_string);
1226
0
        }
1227
0
      else
1228
0
        {
1229
0
          g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
1230
0
                       _("URI ‘%s’ has no host component"),
1231
0
                       uri_string);
1232
0
        }
1233
0
      g_free (my_scheme);
1234
0
      g_free (my_host);
1235
1236
0
      return FALSE;
1237
0
    }
1238
1239
0
  if (scheme)
1240
0
    *scheme = g_steal_pointer (&my_scheme);
1241
0
  if (host)
1242
0
    *host = g_steal_pointer (&my_host);
1243
1244
0
  g_free (my_scheme);
1245
0
  g_free (my_host);
1246
1247
0
  return TRUE;
1248
0
}
1249
1250
/**
1251
 * g_uri_is_valid:
1252
 * @uri_string: a string containing an absolute URI
1253
 * @flags: flags for parsing @uri_string
1254
 * @error: #GError for error reporting, or %NULL to ignore.
1255
 *
1256
 * Parses @uri_string according to @flags, to determine whether it is a valid
1257
 * [absolute URI][relative-absolute-uris], i.e. it does not need to be resolved
1258
 * relative to another URI using g_uri_parse_relative().
1259
 *
1260
 * If it’s not a valid URI, an error is returned explaining how it’s invalid.
1261
 *
1262
 * See g_uri_split(), and the definition of #GUriFlags, for more
1263
 * information on the effect of @flags.
1264
 *
1265
 * Returns: %TRUE if @uri_string is a valid absolute URI, %FALSE on error.
1266
 *
1267
 * Since: 2.66
1268
 */
1269
gboolean
1270
g_uri_is_valid (const gchar  *uri_string,
1271
                GUriFlags     flags,
1272
                GError      **error)
1273
0
{
1274
0
  gchar *my_scheme = NULL;
1275
1276
0
  g_return_val_if_fail (uri_string != NULL, FALSE);
1277
0
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1278
1279
0
  if (!g_uri_split_internal (uri_string, flags,
1280
0
                             &my_scheme, NULL, NULL, NULL, NULL,
1281
0
                             NULL, NULL, NULL, NULL, NULL,
1282
0
                             error))
1283
0
    return FALSE;
1284
1285
0
  if (!my_scheme)
1286
0
    {
1287
0
      g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
1288
0
                   _("URI ‘%s’ is not an absolute URI"),
1289
0
                   uri_string);
1290
0
      return FALSE;
1291
0
    }
1292
1293
0
  g_free (my_scheme);
1294
1295
0
  return TRUE;
1296
0
}
1297
1298
1299
/* This does the "Remove Dot Segments" algorithm from section 5.2.4 of
1300
 * RFC 3986, except that @path is modified in place.
1301
 *
1302
 * See https://tools.ietf.org/html/rfc3986#section-5.2.4
1303
 */
1304
static void
1305
remove_dot_segments (gchar *path)
1306
0
{
1307
0
  gchar *p, *q;
1308
1309
0
  if (!*path)
1310
0
    return;
1311
1312
  /* Remove "./" where "." is a complete segment. */
1313
0
  for (p = path + 1; *p; )
1314
0
    {
1315
0
      if (*(p - 1) == '/' &&
1316
0
          *p == '.' && *(p + 1) == '/')
1317
0
        memmove (p, p + 2, strlen (p + 2) + 1);
1318
0
      else
1319
0
        p++;
1320
0
    }
1321
  /* Remove "." at end. */
1322
0
  if (p > path + 2 &&
1323
0
      *(p - 1) == '.' && *(p - 2) == '/')
1324
0
    *(p - 1) = '\0';
1325
1326
  /* Remove "<segment>/../" where <segment> != ".." */
1327
0
  for (p = path + 1; *p; )
1328
0
    {
1329
0
      if (!strncmp (p, "../", 3))
1330
0
        {
1331
0
          p += 3;
1332
0
          continue;
1333
0
        }
1334
0
      q = strchr (p + 1, '/');
1335
0
      if (!q)
1336
0
        break;
1337
0
      if (strncmp (q, "/../", 4) != 0)
1338
0
        {
1339
0
          p = q + 1;
1340
0
          continue;
1341
0
        }
1342
0
      memmove (p, q + 4, strlen (q + 4) + 1);
1343
0
      p = path + 1;
1344
0
    }
1345
  /* Remove "<segment>/.." at end where <segment> != ".." */
1346
0
  q = strrchr (path, '/');
1347
0
  if (q && q != path && !strcmp (q, "/.."))
1348
0
    {
1349
0
      p = q - 1;
1350
0
      while (p > path && *p != '/')
1351
0
        p--;
1352
0
      if (strncmp (p, "/../", 4) != 0)
1353
0
        *(p + 1) = 0;
1354
0
    }
1355
1356
  /* Remove extraneous initial "/.."s */
1357
0
  while (!strncmp (path, "/../", 4))
1358
0
    memmove (path, path + 3, strlen (path) - 2);
1359
0
  if (!strcmp (path, "/.."))
1360
0
    path[1] = '\0';
1361
0
}
1362
1363
/**
1364
 * g_uri_parse:
1365
 * @uri_string: a string representing an absolute URI
1366
 * @flags: flags describing how to parse @uri_string
1367
 * @error: #GError for error reporting, or %NULL to ignore.
1368
 *
1369
 * Parses @uri_string according to @flags. If the result is not a
1370
 * valid [absolute URI][relative-absolute-uris], it will be discarded, and an
1371
 * error returned.
1372
 *
1373
 * Return value: (transfer full): a new #GUri, or NULL on error.
1374
 *
1375
 * Since: 2.66
1376
 */
1377
GUri *
1378
g_uri_parse (const gchar  *uri_string,
1379
             GUriFlags     flags,
1380
             GError      **error)
1381
0
{
1382
0
  g_return_val_if_fail (uri_string != NULL, NULL);
1383
0
  g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1384
1385
0
  return g_uri_parse_relative (NULL, uri_string, flags, error);
1386
0
}
1387
1388
/**
1389
 * g_uri_parse_relative:
1390
 * @base_uri: (nullable) (transfer none): a base absolute URI
1391
 * @uri_ref: a string representing a relative or absolute URI
1392
 * @flags: flags describing how to parse @uri_ref
1393
 * @error: #GError for error reporting, or %NULL to ignore.
1394
 *
1395
 * Parses @uri_ref according to @flags and, if it is a
1396
 * [relative URI][relative-absolute-uris], resolves it relative to @base_uri.
1397
 * If the result is not a valid absolute URI, it will be discarded, and an error
1398
 * returned.
1399
 *
1400
 * Return value: (transfer full): a new #GUri, or NULL on error.
1401
 *
1402
 * Since: 2.66
1403
 */
1404
GUri *
1405
g_uri_parse_relative (GUri         *base_uri,
1406
                      const gchar  *uri_ref,
1407
                      GUriFlags     flags,
1408
                      GError      **error)
1409
0
{
1410
0
  GUri *uri = NULL;
1411
1412
0
  g_return_val_if_fail (uri_ref != NULL, NULL);
1413
0
  g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1414
0
  g_return_val_if_fail (base_uri == NULL || base_uri->scheme != NULL, NULL);
1415
1416
  /* Use GUri struct to construct the return value: there is no guarantee it is
1417
   * actually correct within the function body. */
1418
0
  uri = g_atomic_rc_box_new0 (GUri);
1419
0
  uri->flags = flags;
1420
1421
0
  if (!g_uri_split_internal (uri_ref, flags,
1422
0
                             &uri->scheme, &uri->userinfo,
1423
0
                             &uri->user, &uri->password, &uri->auth_params,
1424
0
                             &uri->host, &uri->port,
1425
0
                             &uri->path, &uri->query, &uri->fragment,
1426
0
                             error))
1427
0
    {
1428
0
      g_uri_unref (uri);
1429
0
      return NULL;
1430
0
    }
1431
1432
0
  if (!uri->scheme && !base_uri)
1433
0
    {
1434
0
      g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_FAILED,
1435
0
                           _("URI is not absolute, and no base URI was provided"));
1436
0
      g_uri_unref (uri);
1437
0
      return NULL;
1438
0
    }
1439
1440
0
  if (base_uri)
1441
0
    {
1442
      /* This is section 5.2.2 of RFC 3986, except that we're doing
1443
       * it in place in @uri rather than copying from R to T.
1444
       *
1445
       * See https://tools.ietf.org/html/rfc3986#section-5.2.2
1446
       */
1447
0
      if (uri->scheme)
1448
0
        remove_dot_segments (uri->path);
1449
0
      else
1450
0
        {
1451
0
          uri->scheme = g_strdup (base_uri->scheme);
1452
0
          if (uri->host)
1453
0
            remove_dot_segments (uri->path);
1454
0
          else
1455
0
            {
1456
0
              if (!*uri->path)
1457
0
                {
1458
0
                  g_free (uri->path);
1459
0
                  uri->path = g_strdup (base_uri->path);
1460
0
                  if (!uri->query)
1461
0
                    uri->query = g_strdup (base_uri->query);
1462
0
                }
1463
0
              else
1464
0
                {
1465
0
                  if (*uri->path == '/')
1466
0
                    remove_dot_segments (uri->path);
1467
0
                  else
1468
0
                    {
1469
0
                      gchar *newpath, *last;
1470
1471
0
                      last = strrchr (base_uri->path, '/');
1472
0
                      if (last)
1473
0
                        {
1474
0
                          newpath = g_strdup_printf ("%.*s/%s",
1475
0
                                                     (gint)(last - base_uri->path),
1476
0
                                                     base_uri->path,
1477
0
                                                     uri->path);
1478
0
                        }
1479
0
                      else
1480
0
                        newpath = g_strdup_printf ("/%s", uri->path);
1481
1482
0
                      g_free (uri->path);
1483
0
                      uri->path = g_steal_pointer (&newpath);
1484
1485
0
                      remove_dot_segments (uri->path);
1486
0
                    }
1487
0
                }
1488
1489
0
              uri->userinfo = g_strdup (base_uri->userinfo);
1490
0
              uri->user = g_strdup (base_uri->user);
1491
0
              uri->password = g_strdup (base_uri->password);
1492
0
              uri->auth_params = g_strdup (base_uri->auth_params);
1493
0
              uri->host = g_strdup (base_uri->host);
1494
0
              uri->port = base_uri->port;
1495
0
            }
1496
0
        }
1497
1498
      /* Scheme normalization couldn't have been done earlier
1499
       * as the relative URI may not have had a scheme */
1500
0
      if (flags & G_URI_FLAGS_SCHEME_NORMALIZE)
1501
0
        {
1502
0
          if (should_normalize_empty_path (uri->scheme) && !*uri->path)
1503
0
            {
1504
0
              g_free (uri->path);
1505
0
              uri->path = g_strdup ("/");
1506
0
            }
1507
1508
0
          uri->port = normalize_port (uri->scheme, uri->port);
1509
0
        }
1510
0
    }
1511
1512
0
  return g_steal_pointer (&uri);
1513
0
}
1514
1515
/**
1516
 * g_uri_resolve_relative:
1517
 * @base_uri_string: (nullable): a string representing a base URI
1518
 * @uri_ref: a string representing a relative or absolute URI
1519
 * @flags: flags describing how to parse @uri_ref
1520
 * @error: #GError for error reporting, or %NULL to ignore.
1521
 *
1522
 * Parses @uri_ref according to @flags and, if it is a
1523
 * [relative URI][relative-absolute-uris], resolves it relative to
1524
 * @base_uri_string. If the result is not a valid absolute URI, it will be
1525
 * discarded, and an error returned.
1526
 *
1527
 * (If @base_uri_string is %NULL, this just returns @uri_ref, or
1528
 * %NULL if @uri_ref is invalid or not absolute.)
1529
 *
1530
 * Return value: (transfer full): the resolved URI string,
1531
 * or NULL on error.
1532
 *
1533
 * Since: 2.66
1534
 */
1535
gchar *
1536
g_uri_resolve_relative (const gchar  *base_uri_string,
1537
                        const gchar  *uri_ref,
1538
                        GUriFlags     flags,
1539
                        GError      **error)
1540
0
{
1541
0
  GUri *base_uri, *resolved_uri;
1542
0
  gchar *resolved_uri_string;
1543
1544
0
  g_return_val_if_fail (uri_ref != NULL, NULL);
1545
0
  g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1546
1547
0
  flags |= G_URI_FLAGS_ENCODED;
1548
1549
0
  if (base_uri_string)
1550
0
    {
1551
0
      base_uri = g_uri_parse (base_uri_string, flags, error);
1552
0
      if (!base_uri)
1553
0
        return NULL;
1554
0
    }
1555
0
  else
1556
0
    base_uri = NULL;
1557
1558
0
  resolved_uri = g_uri_parse_relative (base_uri, uri_ref, flags, error);
1559
0
  if (base_uri)
1560
0
    g_uri_unref (base_uri);
1561
0
  if (!resolved_uri)
1562
0
    return NULL;
1563
1564
0
  resolved_uri_string = g_uri_to_string (resolved_uri);
1565
0
  g_uri_unref (resolved_uri);
1566
0
  return g_steal_pointer (&resolved_uri_string);
1567
0
}
1568
1569
/* userinfo as a whole can contain sub-delims + ":", but split-out
1570
 * user can't contain ":" or ";", and split-out password can't contain
1571
 * ";".
1572
 */
1573
0
#define USERINFO_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO
1574
0
#define USER_ALLOWED_CHARS "!$&'()*+,="
1575
0
#define PASSWORD_ALLOWED_CHARS "!$&'()*+,=:"
1576
0
#define AUTH_PARAMS_ALLOWED_CHARS USERINFO_ALLOWED_CHARS
1577
0
#define IP_ADDR_ALLOWED_CHARS ":"
1578
0
#define HOST_ALLOWED_CHARS G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS
1579
0
#define PATH_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH
1580
0
#define QUERY_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
1581
0
#define FRAGMENT_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
1582
1583
static gchar *
1584
g_uri_join_internal (GUriFlags    flags,
1585
                     const gchar *scheme,
1586
                     gboolean     userinfo,
1587
                     const gchar *user,
1588
                     const gchar *password,
1589
                     const gchar *auth_params,
1590
                     const gchar *host,
1591
                     gint         port,
1592
                     const gchar *path,
1593
                     const gchar *query,
1594
                     const gchar *fragment)
1595
0
{
1596
0
  gboolean encoded = (flags & G_URI_FLAGS_ENCODED);
1597
0
  GString *str;
1598
0
  char *normalized_scheme = NULL;
1599
1600
  /* Restrictions on path prefixes. See:
1601
   * https://tools.ietf.org/html/rfc3986#section-3
1602
   */
1603
0
  g_return_val_if_fail (path != NULL, NULL);
1604
0
  g_return_val_if_fail (host == NULL || (path[0] == '\0' || path[0] == '/'), NULL);
1605
0
  g_return_val_if_fail (host != NULL || (path[0] != '/' || path[1] != '/'), NULL);
1606
1607
0
  str = g_string_new (scheme);
1608
0
  if (scheme)
1609
0
    g_string_append_c (str, ':');
1610
1611
0
  if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && scheme && ((host && port != -1) || path[0] == '\0'))
1612
0
    normalized_scheme = g_ascii_strdown (scheme, -1);
1613
1614
0
  if (host)
1615
0
    {
1616
0
      g_string_append (str, "//");
1617
1618
0
      if (user)
1619
0
        {
1620
0
          if (encoded)
1621
0
            g_string_append (str, user);
1622
0
          else
1623
0
            {
1624
0
              if (userinfo)
1625
0
                g_string_append_uri_escaped (str, user, USERINFO_ALLOWED_CHARS, TRUE);
1626
0
              else
1627
                /* Encode ':' and ';' regardless of whether we have a
1628
                 * password or auth params, since it may be parsed later
1629
                 * under the assumption that it does.
1630
                 */
1631
0
                g_string_append_uri_escaped (str, user, USER_ALLOWED_CHARS, TRUE);
1632
0
            }
1633
1634
0
          if (password)
1635
0
            {
1636
0
              g_string_append_c (str, ':');
1637
0
              if (encoded)
1638
0
                g_string_append (str, password);
1639
0
              else
1640
0
                g_string_append_uri_escaped (str, password,
1641
0
                                             PASSWORD_ALLOWED_CHARS, TRUE);
1642
0
            }
1643
1644
0
          if (auth_params)
1645
0
            {
1646
0
              g_string_append_c (str, ';');
1647
0
              if (encoded)
1648
0
                g_string_append (str, auth_params);
1649
0
              else
1650
0
                g_string_append_uri_escaped (str, auth_params,
1651
0
                                             AUTH_PARAMS_ALLOWED_CHARS, TRUE);
1652
0
            }
1653
1654
0
          g_string_append_c (str, '@');
1655
0
        }
1656
1657
0
      if (strchr (host, ':') && g_hostname_is_ip_address (host))
1658
0
        {
1659
0
          g_string_append_c (str, '[');
1660
0
          if (encoded)
1661
0
            g_string_append (str, host);
1662
0
          else
1663
0
            g_string_append_uri_escaped (str, host, IP_ADDR_ALLOWED_CHARS, TRUE);
1664
0
          g_string_append_c (str, ']');
1665
0
        }
1666
0
      else
1667
0
        {
1668
0
          if (encoded)
1669
0
            g_string_append (str, host);
1670
0
          else
1671
0
            g_string_append_uri_escaped (str, host, HOST_ALLOWED_CHARS, TRUE);
1672
0
        }
1673
1674
0
      if (port != -1 && (!normalized_scheme || normalize_port (normalized_scheme, port) != -1))
1675
0
        g_string_append_printf (str, ":%d", port);
1676
0
    }
1677
1678
0
  if (path[0] == '\0' && normalized_scheme && should_normalize_empty_path (normalized_scheme))
1679
0
    g_string_append (str, "/");
1680
0
  else if (encoded || flags & G_URI_FLAGS_ENCODED_PATH)
1681
0
    g_string_append (str, path);
1682
0
  else
1683
0
    g_string_append_uri_escaped (str, path, PATH_ALLOWED_CHARS, TRUE);
1684
1685
0
  g_free (normalized_scheme);
1686
1687
0
  if (query)
1688
0
    {
1689
0
      g_string_append_c (str, '?');
1690
0
      if (encoded || flags & G_URI_FLAGS_ENCODED_QUERY)
1691
0
        g_string_append (str, query);
1692
0
      else
1693
0
        g_string_append_uri_escaped (str, query, QUERY_ALLOWED_CHARS, TRUE);
1694
0
    }
1695
0
  if (fragment)
1696
0
    {
1697
0
      g_string_append_c (str, '#');
1698
0
      if (encoded || flags & G_URI_FLAGS_ENCODED_FRAGMENT)
1699
0
        g_string_append (str, fragment);
1700
0
      else
1701
0
        g_string_append_uri_escaped (str, fragment, FRAGMENT_ALLOWED_CHARS, TRUE);
1702
0
    }
1703
1704
0
  return g_string_free (str, FALSE);
1705
0
}
1706
1707
/**
1708
 * g_uri_join:
1709
 * @flags: flags describing how to build the URI string
1710
 * @scheme: (nullable): the URI scheme, or %NULL
1711
 * @userinfo: (nullable): the userinfo component, or %NULL
1712
 * @host: (nullable): the host component, or %NULL
1713
 * @port: the port, or `-1`
1714
 * @path: (not nullable): the path component
1715
 * @query: (nullable): the query component, or %NULL
1716
 * @fragment: (nullable): the fragment, or %NULL
1717
 *
1718
 * Joins the given components together according to @flags to create
1719
 * an absolute URI string. @path may not be %NULL (though it may be the empty
1720
 * string).
1721
 *
1722
 * When @host is present, @path must either be empty or begin with a slash (`/`)
1723
 * character. When @host is not present, @path cannot begin with two slash
1724
   characters (`//`). See
1725
 * [RFC 3986, section 3](https://tools.ietf.org/html/rfc3986#section-3).
1726
 *
1727
 * See also g_uri_join_with_user(), which allows specifying the
1728
 * components of the ‘userinfo’ separately.
1729
 *
1730
 * %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS are ignored if set
1731
 * in @flags.
1732
 *
1733
 * Return value: (not nullable) (transfer full): an absolute URI string
1734
 *
1735
 * Since: 2.66
1736
 */
1737
gchar *
1738
g_uri_join (GUriFlags    flags,
1739
            const gchar *scheme,
1740
            const gchar *userinfo,
1741
            const gchar *host,
1742
            gint         port,
1743
            const gchar *path,
1744
            const gchar *query,
1745
            const gchar *fragment)
1746
0
{
1747
0
  g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1748
0
  g_return_val_if_fail (path != NULL, NULL);
1749
1750
0
  return g_uri_join_internal (flags,
1751
0
                              scheme,
1752
0
                              TRUE, userinfo, NULL, NULL,
1753
0
                              host,
1754
0
                              port,
1755
0
                              path,
1756
0
                              query,
1757
0
                              fragment);
1758
0
}
1759
1760
/**
1761
 * g_uri_join_with_user:
1762
 * @flags: flags describing how to build the URI string
1763
 * @scheme: (nullable): the URI scheme, or %NULL
1764
 * @user: (nullable): the user component of the userinfo, or %NULL
1765
 * @password: (nullable): the password component of the userinfo, or
1766
 *   %NULL
1767
 * @auth_params: (nullable): the auth params of the userinfo, or
1768
 *   %NULL
1769
 * @host: (nullable): the host component, or %NULL
1770
 * @port: the port, or `-1`
1771
 * @path: (not nullable): the path component
1772
 * @query: (nullable): the query component, or %NULL
1773
 * @fragment: (nullable): the fragment, or %NULL
1774
 *
1775
 * Joins the given components together according to @flags to create
1776
 * an absolute URI string. @path may not be %NULL (though it may be the empty
1777
 * string).
1778
 *
1779
 * In contrast to g_uri_join(), this allows specifying the components
1780
 * of the ‘userinfo’ separately. It otherwise behaves the same.
1781
 *
1782
 * %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS are ignored if set
1783
 * in @flags.
1784
 *
1785
 * Return value: (not nullable) (transfer full): an absolute URI string
1786
 *
1787
 * Since: 2.66
1788
 */
1789
gchar *
1790
g_uri_join_with_user (GUriFlags    flags,
1791
                      const gchar *scheme,
1792
                      const gchar *user,
1793
                      const gchar *password,
1794
                      const gchar *auth_params,
1795
                      const gchar *host,
1796
                      gint         port,
1797
                      const gchar *path,
1798
                      const gchar *query,
1799
                      const gchar *fragment)
1800
0
{
1801
0
  g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1802
0
  g_return_val_if_fail (path != NULL, NULL);
1803
1804
0
  return g_uri_join_internal (flags,
1805
0
                              scheme,
1806
0
                              FALSE, user, password, auth_params,
1807
0
                              host,
1808
0
                              port,
1809
0
                              path,
1810
0
                              query,
1811
0
                              fragment);
1812
0
}
1813
1814
/**
1815
 * g_uri_build:
1816
 * @flags: flags describing how to build the #GUri
1817
 * @scheme: (not nullable): the URI scheme
1818
 * @userinfo: (nullable): the userinfo component, or %NULL
1819
 * @host: (nullable): the host component, or %NULL
1820
 * @port: the port, or `-1`
1821
 * @path: (not nullable): the path component
1822
 * @query: (nullable): the query component, or %NULL
1823
 * @fragment: (nullable): the fragment, or %NULL
1824
 *
1825
 * Creates a new #GUri from the given components according to @flags.
1826
 *
1827
 * See also g_uri_build_with_user(), which allows specifying the
1828
 * components of the "userinfo" separately.
1829
 *
1830
 * Return value: (not nullable) (transfer full): a new #GUri
1831
 *
1832
 * Since: 2.66
1833
 */
1834
GUri *
1835
g_uri_build (GUriFlags    flags,
1836
             const gchar *scheme,
1837
             const gchar *userinfo,
1838
             const gchar *host,
1839
             gint         port,
1840
             const gchar *path,
1841
             const gchar *query,
1842
             const gchar *fragment)
1843
0
{
1844
0
  GUri *uri;
1845
1846
0
  g_return_val_if_fail (scheme != NULL, NULL);
1847
0
  g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1848
0
  g_return_val_if_fail (path != NULL, NULL);
1849
1850
0
  uri = g_atomic_rc_box_new0 (GUri);
1851
0
  uri->flags = flags;
1852
0
  uri->scheme = g_ascii_strdown (scheme, -1);
1853
0
  uri->userinfo = g_strdup (userinfo);
1854
0
  uri->host = g_strdup (host);
1855
0
  uri->port = port;
1856
0
  uri->path = g_strdup (path);
1857
0
  uri->query = g_strdup (query);
1858
0
  uri->fragment = g_strdup (fragment);
1859
1860
0
  return g_steal_pointer (&uri);
1861
0
}
1862
1863
/**
1864
 * g_uri_build_with_user:
1865
 * @flags: flags describing how to build the #GUri
1866
 * @scheme: (not nullable): the URI scheme
1867
 * @user: (nullable): the user component of the userinfo, or %NULL
1868
 * @password: (nullable): the password component of the userinfo, or %NULL
1869
 * @auth_params: (nullable): the auth params of the userinfo, or %NULL
1870
 * @host: (nullable): the host component, or %NULL
1871
 * @port: the port, or `-1`
1872
 * @path: (not nullable): the path component
1873
 * @query: (nullable): the query component, or %NULL
1874
 * @fragment: (nullable): the fragment, or %NULL
1875
 *
1876
 * Creates a new #GUri from the given components according to @flags
1877
 * (%G_URI_FLAGS_HAS_PASSWORD is added unconditionally). The @flags must be
1878
 * coherent with the passed values, in particular use `%`-encoded values with
1879
 * %G_URI_FLAGS_ENCODED.
1880
 *
1881
 * In contrast to g_uri_build(), this allows specifying the components
1882
 * of the ‘userinfo’ field separately. Note that @user must be non-%NULL
1883
 * if either @password or @auth_params is non-%NULL.
1884
 *
1885
 * Return value: (not nullable) (transfer full): a new #GUri
1886
 *
1887
 * Since: 2.66
1888
 */
1889
GUri *
1890
g_uri_build_with_user (GUriFlags    flags,
1891
                       const gchar *scheme,
1892
                       const gchar *user,
1893
                       const gchar *password,
1894
                       const gchar *auth_params,
1895
                       const gchar *host,
1896
                       gint         port,
1897
                       const gchar *path,
1898
                       const gchar *query,
1899
                       const gchar *fragment)
1900
0
{
1901
0
  GUri *uri;
1902
0
  GString *userinfo;
1903
1904
0
  g_return_val_if_fail (scheme != NULL, NULL);
1905
0
  g_return_val_if_fail (password == NULL || user != NULL, NULL);
1906
0
  g_return_val_if_fail (auth_params == NULL || user != NULL, NULL);
1907
0
  g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1908
0
  g_return_val_if_fail (path != NULL, NULL);
1909
1910
0
  uri = g_atomic_rc_box_new0 (GUri);
1911
0
  uri->flags = flags | G_URI_FLAGS_HAS_PASSWORD;
1912
0
  uri->scheme = g_ascii_strdown (scheme, -1);
1913
0
  uri->user = g_strdup (user);
1914
0
  uri->password = g_strdup (password);
1915
0
  uri->auth_params = g_strdup (auth_params);
1916
0
  uri->host = g_strdup (host);
1917
0
  uri->port = port;
1918
0
  uri->path = g_strdup (path);
1919
0
  uri->query = g_strdup (query);
1920
0
  uri->fragment = g_strdup (fragment);
1921
1922
0
  if (user)
1923
0
    {
1924
0
      userinfo = g_string_new (user);
1925
0
      if (password)
1926
0
        {
1927
0
          g_string_append_c (userinfo, ':');
1928
0
          g_string_append (userinfo, uri->password);
1929
0
        }
1930
0
      if (auth_params)
1931
0
        {
1932
0
          g_string_append_c (userinfo, ';');
1933
0
          g_string_append (userinfo, uri->auth_params);
1934
0
        }
1935
0
      uri->userinfo = g_string_free (userinfo, FALSE);
1936
0
    }
1937
1938
0
  return g_steal_pointer (&uri);
1939
0
}
1940
1941
/**
1942
 * g_uri_to_string:
1943
 * @uri: a #GUri
1944
 *
1945
 * Returns a string representing @uri.
1946
 *
1947
 * This is not guaranteed to return a string which is identical to the
1948
 * string that @uri was parsed from. However, if the source URI was
1949
 * syntactically correct (according to RFC 3986), and it was parsed
1950
 * with %G_URI_FLAGS_ENCODED, then g_uri_to_string() is guaranteed to return
1951
 * a string which is at least semantically equivalent to the source
1952
 * URI (according to RFC 3986).
1953
 *
1954
 * If @uri might contain sensitive details, such as authentication parameters,
1955
 * or private data in its query string, and the returned string is going to be
1956
 * logged, then consider using g_uri_to_string_partial() to redact parts.
1957
 *
1958
 * Return value: (not nullable) (transfer full): a string representing @uri,
1959
 *     which the caller must free.
1960
 *
1961
 * Since: 2.66
1962
 */
1963
gchar *
1964
g_uri_to_string (GUri *uri)
1965
0
{
1966
0
  g_return_val_if_fail (uri != NULL, NULL);
1967
1968
0
  return g_uri_to_string_partial (uri, G_URI_HIDE_NONE);
1969
0
}
1970
1971
/**
1972
 * g_uri_to_string_partial:
1973
 * @uri: a #GUri
1974
 * @flags: flags describing what parts of @uri to hide
1975
 *
1976
 * Returns a string representing @uri, subject to the options in
1977
 * @flags. See g_uri_to_string() and #GUriHideFlags for more details.
1978
 *
1979
 * Return value: (not nullable) (transfer full): a string representing
1980
 *     @uri, which the caller must free.
1981
 *
1982
 * Since: 2.66
1983
 */
1984
gchar *
1985
g_uri_to_string_partial (GUri          *uri,
1986
                         GUriHideFlags  flags)
1987
0
{
1988
0
  gboolean hide_user = (flags & G_URI_HIDE_USERINFO);
1989
0
  gboolean hide_password = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_PASSWORD));
1990
0
  gboolean hide_auth_params = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_AUTH_PARAMS));
1991
0
  gboolean hide_query = (flags & G_URI_HIDE_QUERY);
1992
0
  gboolean hide_fragment = (flags & G_URI_HIDE_FRAGMENT);
1993
1994
0
  g_return_val_if_fail (uri != NULL, NULL);
1995
1996
0
  if (uri->flags & (G_URI_FLAGS_HAS_PASSWORD | G_URI_FLAGS_HAS_AUTH_PARAMS))
1997
0
    {
1998
0
      return g_uri_join_with_user (uri->flags,
1999
0
                                   uri->scheme,
2000
0
                                   hide_user ? NULL : uri->user,
2001
0
                                   hide_password ? NULL : uri->password,
2002
0
                                   hide_auth_params ? NULL : uri->auth_params,
2003
0
                                   uri->host,
2004
0
                                   uri->port,
2005
0
                                   uri->path,
2006
0
                                   hide_query ? NULL : uri->query,
2007
0
                                   hide_fragment ? NULL : uri->fragment);
2008
0
    }
2009
2010
0
  return g_uri_join (uri->flags,
2011
0
                     uri->scheme,
2012
0
                     hide_user ? NULL : uri->userinfo,
2013
0
                     uri->host,
2014
0
                     uri->port,
2015
0
                     uri->path,
2016
0
                     hide_query ? NULL : uri->query,
2017
0
                     hide_fragment ? NULL : uri->fragment);
2018
0
}
2019
2020
/* This is just a copy of g_str_hash() with g_ascii_toupper() added */
2021
static guint
2022
str_ascii_case_hash (gconstpointer v)
2023
0
{
2024
0
  const signed char *p;
2025
0
  guint32 h = 5381;
2026
2027
0
  for (p = v; *p != '\0'; p++)
2028
0
    h = (h << 5) + h + g_ascii_toupper (*p);
2029
2030
0
  return h;
2031
0
}
2032
2033
static gboolean
2034
str_ascii_case_equal (gconstpointer v1,
2035
                      gconstpointer v2)
2036
0
{
2037
0
  const gchar *string1 = v1;
2038
0
  const gchar *string2 = v2;
2039
2040
0
  return g_ascii_strcasecmp (string1, string2) == 0;
2041
0
}
2042
2043
/**
2044
 * GUriParamsIter:
2045
 *
2046
 * Many URI schemes include one or more attribute/value pairs as part of the URI
2047
 * value. For example `scheme://server/path?query=string&is=there` has two
2048
 * attributes – `query=string` and `is=there` – in its query part.
2049
 *
2050
 * A #GUriParamsIter structure represents an iterator that can be used to
2051
 * iterate over the attribute/value pairs of a URI query string. #GUriParamsIter
2052
 * structures are typically allocated on the stack and then initialized with
2053
 * g_uri_params_iter_init(). See the documentation for g_uri_params_iter_init()
2054
 * for a usage example.
2055
 *
2056
 * Since: 2.66
2057
 */
2058
typedef struct
2059
{
2060
  GUriParamsFlags flags;
2061
  const gchar    *attr;
2062
  const gchar    *end;
2063
  guint8          sep_table[256]; /* 1 = index is a separator; 0 otherwise */
2064
} RealIter;
2065
2066
G_STATIC_ASSERT (sizeof (GUriParamsIter) == sizeof (RealIter));
2067
G_STATIC_ASSERT (G_ALIGNOF (GUriParamsIter) >= G_ALIGNOF (RealIter));
2068
2069
/**
2070
 * g_uri_params_iter_init:
2071
 * @iter: an uninitialized #GUriParamsIter
2072
 * @params: a `%`-encoded string containing `attribute=value`
2073
 *   parameters
2074
 * @length: the length of @params, or `-1` if it is nul-terminated
2075
 * @separators: the separator byte character set between parameters. (usually
2076
 *   `&`, but sometimes `;` or both `&;`). Note that this function works on
2077
 *   bytes not characters, so it can't be used to delimit UTF-8 strings for
2078
 *   anything but ASCII characters. You may pass an empty set, in which case
2079
 *   no splitting will occur.
2080
 * @flags: flags to modify the way the parameters are handled.
2081
 *
2082
 * Initializes an attribute/value pair iterator.
2083
 *
2084
 * The iterator keeps pointers to the @params and @separators arguments, those
2085
 * variables must thus outlive the iterator and not be modified during the
2086
 * iteration.
2087
 *
2088
 * If %G_URI_PARAMS_WWW_FORM is passed in @flags, `+` characters in the param
2089
 * string will be replaced with spaces in the output. For example, `foo=bar+baz`
2090
 * will give attribute `foo` with value `bar baz`. This is commonly used on the
2091
 * web (the `https` and `http` schemes only), but is deprecated in favour of
2092
 * the equivalent of encoding spaces as `%20`.
2093
 *
2094
 * Unlike with g_uri_parse_params(), %G_URI_PARAMS_CASE_INSENSITIVE has no
2095
 * effect if passed to @flags for g_uri_params_iter_init(). The caller is
2096
 * responsible for doing their own case-insensitive comparisons.
2097
 *
2098
 * |[<!-- language="C" -->
2099
 * GUriParamsIter iter;
2100
 * GError *error = NULL;
2101
 * gchar *unowned_attr, *unowned_value;
2102
 *
2103
 * g_uri_params_iter_init (&iter, "foo=bar&baz=bar&Foo=frob&baz=bar2", -1, "&", G_URI_PARAMS_NONE);
2104
 * while (g_uri_params_iter_next (&iter, &unowned_attr, &unowned_value, &error))
2105
 *   {
2106
 *     g_autofree gchar *attr = g_steal_pointer (&unowned_attr);
2107
 *     g_autofree gchar *value = g_steal_pointer (&unowned_value);
2108
 *     // do something with attr and value; this code will be called 4 times
2109
 *     // for the params string in this example: once with attr=foo and value=bar,
2110
 *     // then with baz/bar, then Foo/frob, then baz/bar2.
2111
 *   }
2112
 * if (error)
2113
 *   // handle parsing error
2114
 * ]|
2115
 *
2116
 * Since: 2.66
2117
 */
2118
void
2119
g_uri_params_iter_init (GUriParamsIter *iter,
2120
                        const gchar    *params,
2121
                        gssize          length,
2122
                        const gchar    *separators,
2123
                        GUriParamsFlags flags)
2124
0
{
2125
0
  RealIter *ri = (RealIter *)iter;
2126
0
  const gchar *s;
2127
2128
0
  g_return_if_fail (iter != NULL);
2129
0
  g_return_if_fail (length == 0 || params != NULL);
2130
0
  g_return_if_fail (length >= -1);
2131
0
  g_return_if_fail (separators != NULL);
2132
2133
0
  ri->flags = flags;
2134
2135
0
  if (length == -1)
2136
0
    ri->end = params + strlen (params);
2137
0
  else
2138
0
    ri->end = params + length;
2139
2140
0
  memset (ri->sep_table, FALSE, sizeof (ri->sep_table));
2141
0
  for (s = separators; *s != '\0'; ++s)
2142
0
    ri->sep_table[*(guchar *)s] = TRUE;
2143
2144
0
  ri->attr = params;
2145
0
}
2146
2147
/**
2148
 * g_uri_params_iter_next:
2149
 * @iter: an initialized #GUriParamsIter
2150
 * @attribute: (out) (nullable) (optional) (transfer full): on return, contains
2151
 *     the attribute, or %NULL.
2152
 * @value: (out) (nullable) (optional) (transfer full): on return, contains
2153
 *     the value, or %NULL.
2154
 * @error: #GError for error reporting, or %NULL to ignore.
2155
 *
2156
 * Advances @iter and retrieves the next attribute/value. %FALSE is returned if
2157
 * an error has occurred (in which case @error is set), or if the end of the
2158
 * iteration is reached (in which case @attribute and @value are set to %NULL
2159
 * and the iterator becomes invalid). If %TRUE is returned,
2160
 * g_uri_params_iter_next() may be called again to receive another
2161
 * attribute/value pair.
2162
 *
2163
 * Note that the same @attribute may be returned multiple times, since URIs
2164
 * allow repeated attributes.
2165
 *
2166
 * Returns: %FALSE if the end of the parameters has been reached or an error was
2167
 *     encountered. %TRUE otherwise.
2168
 *
2169
 * Since: 2.66
2170
 */
2171
gboolean
2172
g_uri_params_iter_next (GUriParamsIter *iter,
2173
                        gchar         **attribute,
2174
                        gchar         **value,
2175
                        GError        **error)
2176
0
{
2177
0
  RealIter *ri = (RealIter *)iter;
2178
0
  const gchar *attr_end, *val, *val_end;
2179
0
  gchar *decoded_attr, *decoded_value;
2180
0
  gboolean www_form = ri->flags & G_URI_PARAMS_WWW_FORM;
2181
0
  GUriFlags decode_flags = G_URI_FLAGS_NONE;
2182
2183
0
  g_return_val_if_fail (iter != NULL, FALSE);
2184
0
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
2185
2186
  /* Pre-clear these in case of failure or finishing. */
2187
0
  if (attribute)
2188
0
    *attribute = NULL;
2189
0
  if (value)
2190
0
    *value = NULL;
2191
2192
0
  if (ri->attr >= ri->end)
2193
0
    return FALSE;
2194
2195
0
  if (ri->flags & G_URI_PARAMS_PARSE_RELAXED)
2196
0
    decode_flags |= G_URI_FLAGS_PARSE_RELAXED;
2197
2198
  /* Check if each character in @attr is a separator, by indexing by the
2199
   * character value into the @sep_table, which has value 1 stored at an
2200
   * index if that index is a separator. */
2201
0
  for (val_end = ri->attr; val_end < ri->end; val_end++)
2202
0
    if (ri->sep_table[*(guchar *)val_end])
2203
0
      break;
2204
2205
0
  attr_end = memchr (ri->attr, '=', val_end - ri->attr);
2206
0
  if (!attr_end)
2207
0
    {
2208
0
      g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_FAILED,
2209
0
                           _("Missing ‘=’ and parameter value"));
2210
0
      return FALSE;
2211
0
    }
2212
0
  if (!uri_decode (&decoded_attr, NULL, ri->attr, attr_end - ri->attr,
2213
0
                   www_form, decode_flags, G_URI_ERROR_FAILED, error))
2214
0
    {
2215
0
      return FALSE;
2216
0
    }
2217
2218
0
  val = attr_end + 1;
2219
0
  if (!uri_decode (&decoded_value, NULL, val, val_end - val,
2220
0
                   www_form, decode_flags, G_URI_ERROR_FAILED, error))
2221
0
    {
2222
0
      g_free (decoded_attr);
2223
0
      return FALSE;
2224
0
    }
2225
2226
0
  if (attribute)
2227
0
    *attribute = g_steal_pointer (&decoded_attr);
2228
0
  if (value)
2229
0
    *value = g_steal_pointer (&decoded_value);
2230
2231
0
  g_free (decoded_attr);
2232
0
  g_free (decoded_value);
2233
2234
0
  ri->attr = val_end + 1;
2235
0
  return TRUE;
2236
0
}
2237
2238
/**
2239
 * g_uri_parse_params:
2240
 * @params: a `%`-encoded string containing `attribute=value`
2241
 *   parameters
2242
 * @length: the length of @params, or `-1` if it is nul-terminated
2243
 * @separators: the separator byte character set between parameters. (usually
2244
 *   `&`, but sometimes `;` or both `&;`). Note that this function works on
2245
 *   bytes not characters, so it can't be used to delimit UTF-8 strings for
2246
 *   anything but ASCII characters. You may pass an empty set, in which case
2247
 *   no splitting will occur.
2248
 * @flags: flags to modify the way the parameters are handled.
2249
 * @error: #GError for error reporting, or %NULL to ignore.
2250
 *
2251
 * Many URI schemes include one or more attribute/value pairs as part of the URI
2252
 * value. This method can be used to parse them into a hash table. When an
2253
 * attribute has multiple occurrences, the last value is the final returned
2254
 * value. If you need to handle repeated attributes differently, use
2255
 * #GUriParamsIter.
2256
 *
2257
 * The @params string is assumed to still be `%`-encoded, but the returned
2258
 * values will be fully decoded. (Thus it is possible that the returned values
2259
 * may contain `=` or @separators, if the value was encoded in the input.)
2260
 * Invalid `%`-encoding is treated as with the %G_URI_FLAGS_PARSE_RELAXED
2261
 * rules for g_uri_parse(). (However, if @params is the path or query string
2262
 * from a #GUri that was parsed without %G_URI_FLAGS_PARSE_RELAXED and
2263
 * %G_URI_FLAGS_ENCODED, then you already know that it does not contain any
2264
 * invalid encoding.)
2265
 *
2266
 * %G_URI_PARAMS_WWW_FORM is handled as documented for g_uri_params_iter_init().
2267
 *
2268
 * If %G_URI_PARAMS_CASE_INSENSITIVE is passed to @flags, attributes will be
2269
 * compared case-insensitively, so a params string `attr=123&Attr=456` will only
2270
 * return a single attribute–value pair, `Attr=456`. Case will be preserved in
2271
 * the returned attributes.
2272
 *
2273
 * If @params cannot be parsed (for example, it contains two @separators
2274
 * characters in a row), then @error is set and %NULL is returned.
2275
 *
2276
 * Return value: (transfer full) (element-type utf8 utf8):
2277
 *     A hash table of attribute/value pairs, with both names and values
2278
 *     fully-decoded; or %NULL on error.
2279
 *
2280
 * Since: 2.66
2281
 */
2282
GHashTable *
2283
g_uri_parse_params (const gchar     *params,
2284
                    gssize           length,
2285
                    const gchar     *separators,
2286
                    GUriParamsFlags  flags,
2287
                    GError         **error)
2288
0
{
2289
0
  GHashTable *hash;
2290
0
  GUriParamsIter iter;
2291
0
  gchar *attribute, *value;
2292
0
  GError *err = NULL;
2293
2294
0
  g_return_val_if_fail (length == 0 || params != NULL, NULL);
2295
0
  g_return_val_if_fail (length >= -1, NULL);
2296
0
  g_return_val_if_fail (separators != NULL, NULL);
2297
0
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
2298
2299
0
  if (flags & G_URI_PARAMS_CASE_INSENSITIVE)
2300
0
    {
2301
0
      hash = g_hash_table_new_full (str_ascii_case_hash,
2302
0
                                    str_ascii_case_equal,
2303
0
                                    g_free, g_free);
2304
0
    }
2305
0
  else
2306
0
    {
2307
0
      hash = g_hash_table_new_full (g_str_hash, g_str_equal,
2308
0
                                    g_free, g_free);
2309
0
    }
2310
2311
0
  g_uri_params_iter_init (&iter, params, length, separators, flags);
2312
2313
0
  while (g_uri_params_iter_next (&iter, &attribute, &value, &err))
2314
0
    g_hash_table_insert (hash, attribute, value);
2315
2316
0
  if (err)
2317
0
    {
2318
0
      g_propagate_error (error, g_steal_pointer (&err));
2319
0
      g_hash_table_destroy (hash);
2320
0
      return NULL;
2321
0
    }
2322
2323
0
  return g_steal_pointer (&hash);
2324
0
}
2325
2326
/**
2327
 * g_uri_get_scheme:
2328
 * @uri: a #GUri
2329
 *
2330
 * Gets @uri's scheme. Note that this will always be all-lowercase,
2331
 * regardless of the string or strings that @uri was created from.
2332
 *
2333
 * Return value: (not nullable): @uri's scheme.
2334
 *
2335
 * Since: 2.66
2336
 */
2337
const gchar *
2338
g_uri_get_scheme (GUri *uri)
2339
0
{
2340
0
  g_return_val_if_fail (uri != NULL, NULL);
2341
2342
0
  return uri->scheme;
2343
0
}
2344
2345
/**
2346
 * g_uri_get_userinfo:
2347
 * @uri: a #GUri
2348
 *
2349
 * Gets @uri's userinfo, which may contain `%`-encoding, depending on
2350
 * the flags with which @uri was created.
2351
 *
2352
 * Return value: (nullable): @uri's userinfo.
2353
 *
2354
 * Since: 2.66
2355
 */
2356
const gchar *
2357
g_uri_get_userinfo (GUri *uri)
2358
0
{
2359
0
  g_return_val_if_fail (uri != NULL, NULL);
2360
2361
0
  return uri->userinfo;
2362
0
}
2363
2364
/**
2365
 * g_uri_get_user:
2366
 * @uri: a #GUri
2367
 *
2368
 * Gets the ‘username’ component of @uri's userinfo, which may contain
2369
 * `%`-encoding, depending on the flags with which @uri was created.
2370
 * If @uri was not created with %G_URI_FLAGS_HAS_PASSWORD or
2371
 * %G_URI_FLAGS_HAS_AUTH_PARAMS, this is the same as g_uri_get_userinfo().
2372
 *
2373
 * Return value: (nullable): @uri's user.
2374
 *
2375
 * Since: 2.66
2376
 */
2377
const gchar *
2378
g_uri_get_user (GUri *uri)
2379
0
{
2380
0
  g_return_val_if_fail (uri != NULL, NULL);
2381
2382
0
  return uri->user;
2383
0
}
2384
2385
/**
2386
 * g_uri_get_password:
2387
 * @uri: a #GUri
2388
 *
2389
 * Gets @uri's password, which may contain `%`-encoding, depending on
2390
 * the flags with which @uri was created. (If @uri was not created
2391
 * with %G_URI_FLAGS_HAS_PASSWORD then this will be %NULL.)
2392
 *
2393
 * Return value: (nullable): @uri's password.
2394
 *
2395
 * Since: 2.66
2396
 */
2397
const gchar *
2398
g_uri_get_password (GUri *uri)
2399
0
{
2400
0
  g_return_val_if_fail (uri != NULL, NULL);
2401
2402
0
  return uri->password;
2403
0
}
2404
2405
/**
2406
 * g_uri_get_auth_params:
2407
 * @uri: a #GUri
2408
 *
2409
 * Gets @uri's authentication parameters, which may contain
2410
 * `%`-encoding, depending on the flags with which @uri was created.
2411
 * (If @uri was not created with %G_URI_FLAGS_HAS_AUTH_PARAMS then this will
2412
 * be %NULL.)
2413
 *
2414
 * Depending on the URI scheme, g_uri_parse_params() may be useful for
2415
 * further parsing this information.
2416
 *
2417
 * Return value: (nullable): @uri's authentication parameters.
2418
 *
2419
 * Since: 2.66
2420
 */
2421
const gchar *
2422
g_uri_get_auth_params (GUri *uri)
2423
0
{
2424
0
  g_return_val_if_fail (uri != NULL, NULL);
2425
2426
0
  return uri->auth_params;
2427
0
}
2428
2429
/**
2430
 * g_uri_get_host:
2431
 * @uri: a #GUri
2432
 *
2433
 * Gets @uri's host. This will never have `%`-encoded characters,
2434
 * unless it is non-UTF-8 (which can only be the case if @uri was
2435
 * created with %G_URI_FLAGS_NON_DNS).
2436
 *
2437
 * If @uri contained an IPv6 address literal, this value will be just
2438
 * that address, without the brackets around it that are necessary in
2439
 * the string form of the URI. Note that in this case there may also
2440
 * be a scope ID attached to the address. Eg, `fe80::1234%``em1` (or
2441
 * `fe80::1234%``25em1` if the string is still encoded).
2442
 *
2443
 * Return value: (nullable): @uri's host.
2444
 *
2445
 * Since: 2.66
2446
 */
2447
const gchar *
2448
g_uri_get_host (GUri *uri)
2449
0
{
2450
0
  g_return_val_if_fail (uri != NULL, NULL);
2451
2452
0
  return uri->host;
2453
0
}
2454
2455
/**
2456
 * g_uri_get_port:
2457
 * @uri: a #GUri
2458
 *
2459
 * Gets @uri's port.
2460
 *
2461
 * Return value: @uri's port, or `-1` if no port was specified.
2462
 *
2463
 * Since: 2.66
2464
 */
2465
gint
2466
g_uri_get_port (GUri *uri)
2467
0
{
2468
0
  g_return_val_if_fail (uri != NULL, -1);
2469
2470
0
  if (uri->port == -1 && uri->flags & G_URI_FLAGS_SCHEME_NORMALIZE)
2471
0
    return default_scheme_port (uri->scheme);
2472
2473
0
  return uri->port;
2474
0
}
2475
2476
/**
2477
 * g_uri_get_path:
2478
 * @uri: a #GUri
2479
 *
2480
 * Gets @uri's path, which may contain `%`-encoding, depending on the
2481
 * flags with which @uri was created.
2482
 *
2483
 * Return value: (not nullable): @uri's path.
2484
 *
2485
 * Since: 2.66
2486
 */
2487
const gchar *
2488
g_uri_get_path (GUri *uri)
2489
0
{
2490
0
  g_return_val_if_fail (uri != NULL, NULL);
2491
2492
0
  return uri->path;
2493
0
}
2494
2495
/**
2496
 * g_uri_get_query:
2497
 * @uri: a #GUri
2498
 *
2499
 * Gets @uri's query, which may contain `%`-encoding, depending on the
2500
 * flags with which @uri was created.
2501
 *
2502
 * For queries consisting of a series of `name=value` parameters,
2503
 * #GUriParamsIter or g_uri_parse_params() may be useful.
2504
 *
2505
 * Return value: (nullable): @uri's query.
2506
 *
2507
 * Since: 2.66
2508
 */
2509
const gchar *
2510
g_uri_get_query (GUri *uri)
2511
0
{
2512
0
  g_return_val_if_fail (uri != NULL, NULL);
2513
2514
0
  return uri->query;
2515
0
}
2516
2517
/**
2518
 * g_uri_get_fragment:
2519
 * @uri: a #GUri
2520
 *
2521
 * Gets @uri's fragment, which may contain `%`-encoding, depending on
2522
 * the flags with which @uri was created.
2523
 *
2524
 * Return value: (nullable): @uri's fragment.
2525
 *
2526
 * Since: 2.66
2527
 */
2528
const gchar *
2529
g_uri_get_fragment (GUri *uri)
2530
0
{
2531
0
  g_return_val_if_fail (uri != NULL, NULL);
2532
2533
0
  return uri->fragment;
2534
0
}
2535
2536
2537
/**
2538
 * g_uri_get_flags:
2539
 * @uri: a #GUri
2540
 *
2541
 * Gets @uri's flags set upon construction.
2542
 *
2543
 * Return value: @uri's flags.
2544
 *
2545
 * Since: 2.66
2546
 **/
2547
GUriFlags
2548
g_uri_get_flags (GUri *uri)
2549
0
{
2550
0
  g_return_val_if_fail (uri != NULL, G_URI_FLAGS_NONE);
2551
2552
0
  return uri->flags;
2553
0
}
2554
2555
/**
2556
 * g_uri_unescape_segment:
2557
 * @escaped_string: (nullable): A string, may be %NULL
2558
 * @escaped_string_end: (nullable): Pointer to end of @escaped_string,
2559
 *   may be %NULL
2560
 * @illegal_characters: (nullable): An optional string of illegal
2561
 *   characters not to be allowed, may be %NULL
2562
 *
2563
 * Unescapes a segment of an escaped string.
2564
 *
2565
 * If any of the characters in @illegal_characters or the NUL
2566
 * character appears as an escaped character in @escaped_string, then
2567
 * that is an error and %NULL will be returned. This is useful if you
2568
 * want to avoid for instance having a slash being expanded in an
2569
 * escaped path element, which might confuse pathname handling.
2570
 *
2571
 * Note: `NUL` byte is not accepted in the output, in contrast to
2572
 * g_uri_unescape_bytes().
2573
 *
2574
 * Returns: (nullable): an unescaped version of @escaped_string,
2575
 * or %NULL on error. The returned string should be freed when no longer
2576
 * needed.  As a special case if %NULL is given for @escaped_string, this
2577
 * function will return %NULL.
2578
 *
2579
 * Since: 2.16
2580
 **/
2581
gchar *
2582
g_uri_unescape_segment (const gchar *escaped_string,
2583
                        const gchar *escaped_string_end,
2584
                        const gchar *illegal_characters)
2585
0
{
2586
0
  gchar *unescaped;
2587
0
  gsize length;
2588
0
  gssize decoded_len;
2589
2590
0
  if (!escaped_string)
2591
0
    return NULL;
2592
2593
0
  if (escaped_string_end)
2594
0
    length = escaped_string_end - escaped_string;
2595
0
  else
2596
0
    length = strlen (escaped_string);
2597
2598
0
  decoded_len = uri_decoder (&unescaped,
2599
0
                             illegal_characters,
2600
0
                             escaped_string, length,
2601
0
                             FALSE, FALSE,
2602
0
                             G_URI_FLAGS_ENCODED,
2603
0
                             0, NULL);
2604
0
  if (decoded_len < 0)
2605
0
    return NULL;
2606
2607
0
  if (memchr (unescaped, '\0', decoded_len))
2608
0
    {
2609
0
      g_free (unescaped);
2610
0
      return NULL;
2611
0
    }
2612
2613
0
  return unescaped;
2614
0
}
2615
2616
/**
2617
 * g_uri_unescape_string:
2618
 * @escaped_string: an escaped string to be unescaped.
2619
 * @illegal_characters: (nullable): a string of illegal characters
2620
 *   not to be allowed, or %NULL.
2621
 *
2622
 * Unescapes a whole escaped string.
2623
 *
2624
 * If any of the characters in @illegal_characters or the NUL
2625
 * character appears as an escaped character in @escaped_string, then
2626
 * that is an error and %NULL will be returned. This is useful if you
2627
 * want to avoid for instance having a slash being expanded in an
2628
 * escaped path element, which might confuse pathname handling.
2629
 *
2630
 * Returns: (nullable): an unescaped version of @escaped_string.
2631
 * The returned string should be freed when no longer needed.
2632
 *
2633
 * Since: 2.16
2634
 **/
2635
gchar *
2636
g_uri_unescape_string (const gchar *escaped_string,
2637
                       const gchar *illegal_characters)
2638
0
{
2639
0
  return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
2640
0
}
2641
2642
/**
2643
 * g_uri_escape_string:
2644
 * @unescaped: the unescaped input string.
2645
 * @reserved_chars_allowed: (nullable): a string of reserved
2646
 *   characters that are allowed to be used, or %NULL.
2647
 * @allow_utf8: %TRUE if the result can include UTF-8 characters.
2648
 *
2649
 * Escapes a string for use in a URI.
2650
 *
2651
 * Normally all characters that are not "unreserved" (i.e. ASCII
2652
 * alphanumerical characters plus dash, dot, underscore and tilde) are
2653
 * escaped. But if you specify characters in @reserved_chars_allowed
2654
 * they are not escaped. This is useful for the "reserved" characters
2655
 * in the URI specification, since those are allowed unescaped in some
2656
 * portions of a URI.
2657
 *
2658
 * Returns: (not nullable): an escaped version of @unescaped. The
2659
 * returned string should be freed when no longer needed.
2660
 *
2661
 * Since: 2.16
2662
 **/
2663
gchar *
2664
g_uri_escape_string (const gchar *unescaped,
2665
                     const gchar *reserved_chars_allowed,
2666
                     gboolean     allow_utf8)
2667
0
{
2668
0
  GString *s;
2669
2670
0
  g_return_val_if_fail (unescaped != NULL, NULL);
2671
2672
0
  s = g_string_sized_new (strlen (unescaped) * 1.25);
2673
2674
0
  g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
2675
2676
0
  return g_string_free (s, FALSE);
2677
0
}
2678
2679
/**
2680
 * g_uri_unescape_bytes:
2681
 * @escaped_string: A URI-escaped string
2682
 * @length: the length (in bytes) of @escaped_string to escape, or `-1` if it
2683
 *   is nul-terminated.
2684
 * @illegal_characters: (nullable): a string of illegal characters
2685
 *   not to be allowed, or %NULL.
2686
 * @error: #GError for error reporting, or %NULL to ignore.
2687
 *
2688
 * Unescapes a segment of an escaped string as binary data.
2689
 *
2690
 * Note that in contrast to g_uri_unescape_string(), this does allow
2691
 * nul bytes to appear in the output.
2692
 *
2693
 * If any of the characters in @illegal_characters appears as an escaped
2694
 * character in @escaped_string, then that is an error and %NULL will be
2695
 * returned. This is useful if you want to avoid for instance having a slash
2696
 * being expanded in an escaped path element, which might confuse pathname
2697
 * handling.
2698
 *
2699
 * Returns: (transfer full): an unescaped version of @escaped_string
2700
 *     or %NULL on error (if decoding failed, using %G_URI_ERROR_FAILED error
2701
 *     code). The returned #GBytes should be unreffed when no longer needed.
2702
 *
2703
 * Since: 2.66
2704
 **/
2705
GBytes *
2706
g_uri_unescape_bytes (const gchar *escaped_string,
2707
                      gssize       length,
2708
                      const char *illegal_characters,
2709
                      GError     **error)
2710
0
{
2711
0
  gchar *buf;
2712
0
  gssize unescaped_length;
2713
2714
0
  g_return_val_if_fail (escaped_string != NULL, NULL);
2715
0
  g_return_val_if_fail (error == NULL || *error == NULL, NULL);
2716
2717
0
  if (length == -1)
2718
0
    length = strlen (escaped_string);
2719
2720
0
  unescaped_length = uri_decoder (&buf,
2721
0
                                  illegal_characters,
2722
0
                                  escaped_string, length,
2723
0
                                  FALSE,
2724
0
                                  FALSE,
2725
0
                                  G_URI_FLAGS_ENCODED,
2726
0
                                  G_URI_ERROR_FAILED, error);
2727
0
  if (unescaped_length == -1)
2728
0
    return NULL;
2729
2730
0
  return g_bytes_new_take (buf, unescaped_length);
2731
0
}
2732
2733
/**
2734
 * g_uri_escape_bytes:
2735
 * @unescaped: (array length=length): the unescaped input data.
2736
 * @length: the length of @unescaped
2737
 * @reserved_chars_allowed: (nullable): a string of reserved
2738
 *   characters that are allowed to be used, or %NULL.
2739
 *
2740
 * Escapes arbitrary data for use in a URI.
2741
 *
2742
 * Normally all characters that are not ‘unreserved’ (i.e. ASCII
2743
 * alphanumerical characters plus dash, dot, underscore and tilde) are
2744
 * escaped. But if you specify characters in @reserved_chars_allowed
2745
 * they are not escaped. This is useful for the ‘reserved’ characters
2746
 * in the URI specification, since those are allowed unescaped in some
2747
 * portions of a URI.
2748
 *
2749
 * Though technically incorrect, this will also allow escaping nul
2750
 * bytes as `%``00`.
2751
 *
2752
 * Returns: (not nullable) (transfer full): an escaped version of @unescaped.
2753
 *     The returned string should be freed when no longer needed.
2754
 *
2755
 * Since: 2.66
2756
 */
2757
gchar *
2758
g_uri_escape_bytes (const guint8 *unescaped,
2759
                    gsize         length,
2760
                    const gchar  *reserved_chars_allowed)
2761
0
{
2762
0
  GString *string;
2763
2764
0
  g_return_val_if_fail (unescaped != NULL, NULL);
2765
2766
0
  string = g_string_sized_new (length * 1.25);
2767
2768
0
  _uri_encoder (string, unescaped, length,
2769
0
               reserved_chars_allowed, FALSE);
2770
2771
0
  return g_string_free (string, FALSE);
2772
0
}
2773
2774
static gssize
2775
g_uri_scheme_length (const gchar *uri)
2776
0
{
2777
0
  const gchar *p;
2778
2779
0
  p = uri;
2780
0
  if (!g_ascii_isalpha (*p))
2781
0
    return -1;
2782
0
  p++;
2783
0
  while (g_ascii_isalnum (*p) || *p == '.' || *p == '+' || *p == '-')
2784
0
    p++;
2785
2786
0
  if (p > uri && *p == ':')
2787
0
    return p - uri;
2788
2789
0
  return -1;
2790
0
}
2791
2792
/**
2793
 * g_uri_parse_scheme:
2794
 * @uri: a valid URI.
2795
 *
2796
 * Gets the scheme portion of a URI string.
2797
 * [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3) decodes the scheme
2798
 * as:
2799
 * |[
2800
 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
2801
 * ]|
2802
 * Common schemes include `file`, `https`, `svn+ssh`, etc.
2803
 *
2804
 * Returns: (transfer full) (nullable): The ‘scheme’ component of the URI, or
2805
 *     %NULL on error. The returned string should be freed when no longer needed.
2806
 *
2807
 * Since: 2.16
2808
 **/
2809
gchar *
2810
g_uri_parse_scheme (const gchar *uri)
2811
0
{
2812
0
  gssize len;
2813
2814
0
  g_return_val_if_fail (uri != NULL, NULL);
2815
2816
0
  len = g_uri_scheme_length (uri);
2817
0
  return len == -1 ? NULL : g_strndup (uri, len);
2818
0
}
2819
2820
/**
2821
 * g_uri_peek_scheme:
2822
 * @uri: a valid URI.
2823
 *
2824
 * Gets the scheme portion of a URI string.
2825
 * [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3) decodes the scheme
2826
 * as:
2827
 * |[
2828
 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
2829
 * ]|
2830
 * Common schemes include `file`, `https`, `svn+ssh`, etc.
2831
 *
2832
 * Unlike g_uri_parse_scheme(), the returned scheme is normalized to
2833
 * all-lowercase and does not need to be freed.
2834
 *
2835
 * Returns: (transfer none) (nullable): The ‘scheme’ component of the URI, or
2836
 *     %NULL on error. The returned string is normalized to all-lowercase, and
2837
 *     interned via g_intern_string(), so it does not need to be freed.
2838
 *
2839
 * Since: 2.66
2840
 **/
2841
const gchar *
2842
g_uri_peek_scheme (const gchar *uri)
2843
0
{
2844
0
  gssize len;
2845
0
  gchar *lower_scheme;
2846
0
  const gchar *scheme;
2847
2848
0
  g_return_val_if_fail (uri != NULL, NULL);
2849
2850
0
  len = g_uri_scheme_length (uri);
2851
0
  if (len == -1)
2852
0
    return NULL;
2853
2854
0
  lower_scheme = g_ascii_strdown (uri, len);
2855
0
  scheme = g_intern_string (lower_scheme);
2856
0
  g_free (lower_scheme);
2857
2858
0
  return scheme;
2859
0
}
2860
2861
G_DEFINE_QUARK (g-uri-quark, g_uri_error)