Coverage Report

Created: 2025-11-16 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/glib/glib/guri.c
Line
Count
Source
1
/* GLIB - Library of useful routines for C programming
2
 * Copyright © 2020 Red Hat, Inc.
3
 *
4
 * SPDX-License-Identifier: LGPL-2.1-or-later
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General
17
 * Public License along with this library; if not, see
18
 * <http://www.gnu.org/licenses/>.
19
 */
20
21
#include "config.h"
22
23
#include <stdlib.h>
24
#include <string.h>
25
26
#include "glib.h"
27
#include "glibintl.h"
28
#include "glib-private.h"
29
#include "guriprivate.h"
30
31
/**
32
 * GUri:
33
 *
34
 * The `GUri` type and related functions can be used to parse URIs into
35
 * their components, and build valid URIs from individual components.
36
 *
37
 * Since `GUri` only represents absolute URIs, all `GUri`s will have a
38
 * URI scheme, so [method@GLib.Uri.get_scheme] will always return a non-`NULL`
39
 * answer. Likewise, by definition, all URIs have a path component, so
40
 * [method@GLib.Uri.get_path] will always return a non-`NULL` string (which may
41
 * be empty).
42
 *
43
 * If the URI string has an
44
 * [‘authority’ component](https://tools.ietf.org/html/rfc3986#section-3) (that
45
 * is, if the scheme is followed by `://` rather than just `:`), then the
46
 * `GUri` will contain a hostname, and possibly a port and ‘userinfo’.
47
 * Additionally, depending on how the `GUri` was constructed/parsed (for example,
48
 * using the `G_URI_FLAGS_HAS_PASSWORD` and `G_URI_FLAGS_HAS_AUTH_PARAMS` flags),
49
 * the userinfo may be split out into a username, password, and
50
 * additional authorization-related parameters.
51
 *
52
 * Normally, the components of a `GUri` will have all `%`-encoded
53
 * characters decoded. However, if you construct/parse a `GUri` with
54
 * `G_URI_FLAGS_ENCODED`, then the `%`-encoding will be preserved instead in
55
 * the userinfo, path, and query fields (and in the host field if also
56
 * created with `G_URI_FLAGS_NON_DNS`). In particular, this is necessary if
57
 * the URI may contain binary data or non-UTF-8 text, or if decoding
58
 * the components might change the interpretation of the URI.
59
 *
60
 * For example, with the encoded flag:
61
 *
62
 * ```c
63
 * g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_ENCODED, &err);
64
 * g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue");
65
 * ```
66
 *
67
 * While the default `%`-decoding behaviour would give:
68
 *
69
 * ```c
70
 * g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_NONE, &err);
71
 * g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http://host/path?param=value");
72
 * ```
73
 *
74
 * During decoding, if an invalid UTF-8 string is encountered, parsing will fail
75
 * with an error indicating the bad string location:
76
 *
77
 * ```c
78
 * g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fbad%3D%00alue", G_URI_FLAGS_NONE, &err);
79
 * g_assert_error (err, G_URI_ERROR, G_URI_ERROR_BAD_QUERY);
80
 * ```
81
 *
82
 * You should pass `G_URI_FLAGS_ENCODED` or `G_URI_FLAGS_ENCODED_QUERY` if you
83
 * need to handle that case manually. In particular, if the query string
84
 * contains `=` characters that are `%`-encoded, you should let
85
 * [func@GLib.Uri.parse_params] do the decoding once of the query.
86
 *
87
 * `GUri` is immutable once constructed, and can safely be accessed from
88
 * multiple threads. Its reference counting is atomic.
89
 *
90
 * Note that the scope of `GUri` is to help manipulate URIs in various applications,
91
 * following [RFC 3986](https://tools.ietf.org/html/rfc3986). In particular,
92
 * it doesn't intend to cover web browser needs, and doesn’t implement the
93
 * [WHATWG URL](https://url.spec.whatwg.org/) standard. No APIs are provided to
94
 * help prevent
95
 * [homograph attacks](https://en.wikipedia.org/wiki/IDN_homograph_attack), so
96
 * `GUri` is not suitable for formatting URIs for display to the user for making
97
 * security-sensitive decisions.
98
 *
99
 * ## Relative and absolute URIs
100
 *
101
 * As defined in [RFC 3986](https://tools.ietf.org/html/rfc3986#section-4), the
102
 * hierarchical nature of URIs means that they can either be ‘relative
103
 * references’ (sometimes referred to as ‘relative URIs’) or ‘URIs’ (for
104
 * clarity, ‘URIs’ are referred to in this documentation as
105
 * ‘absolute URIs’ — although
106
 * [in contrast to RFC 3986](https://tools.ietf.org/html/rfc3986#section-4.3),
107
 * fragment identifiers are always allowed).
108
 *
109
 * Relative references have one or more components of the URI missing. In
110
 * particular, they have no scheme. Any other component, such as hostname,
111
 * query, etc. may be missing, apart from a path, which has to be specified (but
112
 * may be empty). The path may be relative, starting with `./` rather than `/`.
113
 *
114
 * For example, a valid relative reference is `./path?query`,
115
 * `/?query#fragment` or `//example.com`.
116
 *
117
 * Absolute URIs have a scheme specified. Any other components of the URI which
118
 * are missing are specified as explicitly unset in the URI, rather than being
119
 * resolved relative to a base URI using [method@GLib.Uri.parse_relative].
120
 *
121
 * For example, a valid absolute URI is `file:///home/bob` or
122
 * `https://search.com?query=string`.
123
 *
124
 * A `GUri` instance is always an absolute URI. A string may be an absolute URI
125
 * or a relative reference; see the documentation for individual functions as to
126
 * what forms they accept.
127
 *
128
 * ## Parsing URIs
129
 *
130
 * The most minimalist APIs for parsing URIs are [func@GLib.Uri.split] and
131
 * [func@GLib.Uri.split_with_user]. These split a URI into its component
132
 * parts, and return the parts; the difference between the two is that
133
 * [func@GLib.Uri.split] treats the ‘userinfo’ component of the URI as a
134
 * single element, while [func@GLib.Uri.split_with_user] can (depending on the
135
 * [flags@GLib.UriFlags] you pass) treat it as containing a username, password,
136
 * and authentication parameters. Alternatively, [func@GLib.Uri.split_network]
137
 * can be used when you are only interested in the components that are
138
 * needed to initiate a network connection to the service (scheme,
139
 * host, and port).
140
 *
141
 * [func@GLib.Uri.parse] is similar to [func@GLib.Uri.split], but instead of
142
 * returning individual strings, it returns a `GUri` structure (and it requires
143
 * that the URI be an absolute URI).
144
 *
145
 * [func@GLib.Uri.resolve_relative] and [method@GLib.Uri.parse_relative] allow
146
 * you to resolve a relative URI relative to a base URI.
147
 * [func@GLib.Uri.resolve_relative] takes two strings and returns a string,
148
 * and [method@GLib.Uri.parse_relative] takes a `GUri` and a string and returns a
149
 * `GUri`.
150
 *
151
 * All of the parsing functions take a [flags@GLib.UriFlags] argument describing
152
 * exactly how to parse the URI; see the documentation for that type
153
 * for more details on the specific flags that you can pass. If you
154
 * need to choose different flags based on the type of URI, you can
155
 * use [func@GLib.Uri.peek_scheme] on the URI string to check the scheme
156
 * first, and use that to decide what flags to parse it with.
157
 *
158
 * For example, you might want to use `G_URI_PARAMS_WWW_FORM` when parsing the
159
 * params for a web URI, so compare the result of [func@GLib.Uri.peek_scheme]
160
 * against `http` and `https`.
161
 *
162
 * ## Building URIs
163
 *
164
 * [func@GLib.Uri.join] and [func@GLib.Uri.join_with_user] can be used to construct
165
 * valid URI strings from a set of component strings. They are the
166
 * inverse of [func@GLib.Uri.split] and [func@GLib.Uri.split_with_user].
167
 *
168
 * Similarly, [func@GLib.Uri.build] and [func@GLib.Uri.build_with_user] can be
169
 * used to construct a `GUri` from a set of component strings.
170
 *
171
 * As with the parsing functions, the building functions take a
172
 * [flags@GLib.UriFlags] argument. In particular, it is important to keep in mind
173
 * whether the URI components you are using are already `%`-encoded. If so,
174
 * you must pass the `G_URI_FLAGS_ENCODED` flag.
175
 *
176
 * ## `file://` URIs
177
 *
178
 * Note that Windows and Unix both define special rules for parsing
179
 * `file://` URIs (involving non-UTF-8 character sets on Unix, and the
180
 * interpretation of path separators on Windows). `GUri` does not
181
 * implement these rules. Use [func@GLib.filename_from_uri] and
182
 * [func@GLib.filename_to_uri] if you want to properly convert between
183
 * `file://` URIs and local filenames.
184
 *
185
 * ## URI Equality
186
 *
187
 * Note that there is no `g_uri_equal ()` function, because comparing
188
 * URIs usefully requires scheme-specific knowledge that `GUri` does
189
 * not have. `GUri` can help with normalization if you use the various
190
 * encoded [flags@GLib.UriFlags] as well as `G_URI_FLAGS_SCHEME_NORMALIZE`
191
 * however it is not comprehensive.
192
 * For example, `data:,foo` and `data:;base64,Zm9v` resolve to the same
193
 * thing according to the `data:` URI specification which GLib does not
194
 * handle.
195
 *
196
 * Since: 2.66
197
 */
198
struct _GUri {
199
  gchar     *scheme;
200
  gchar     *userinfo;
201
  gchar     *host;
202
  gint       port;
203
  gchar     *path;
204
  gchar     *query;
205
  gchar     *fragment;
206
207
  gchar     *user;
208
  gchar     *password;
209
  gchar     *auth_params;
210
211
  GUriFlags  flags;
212
};
213
214
/**
215
 * g_uri_ref: (skip)
216
 * @uri: a #GUri
217
 *
218
 * Increments the reference count of @uri by one.
219
 *
220
 * Returns: @uri
221
 *
222
 * Since: 2.66
223
 */
224
GUri *
225
g_uri_ref (GUri *uri)
226
0
{
227
0
  g_return_val_if_fail (uri != NULL, NULL);
228
229
0
  return g_atomic_rc_box_acquire (uri);
230
0
}
231
232
static void
233
g_uri_clear (GUri *uri)
234
33.5k
{
235
33.5k
  g_free (uri->scheme);
236
33.5k
  g_free (uri->userinfo);
237
33.5k
  g_free (uri->host);
238
33.5k
  g_free (uri->path);
239
33.5k
  g_free (uri->query);
240
33.5k
  g_free (uri->fragment);
241
33.5k
  g_free (uri->user);
242
33.5k
  g_free (uri->password);
243
33.5k
  g_free (uri->auth_params);
244
33.5k
}
245
246
/**
247
 * g_uri_unref: (skip)
248
 * @uri: a #GUri
249
 *
250
 * Atomically decrements the reference count of @uri by one.
251
 *
252
 * When the reference count reaches zero, the resources allocated by
253
 * @uri are freed
254
 *
255
 * Since: 2.66
256
 */
257
void
258
g_uri_unref (GUri *uri)
259
33.5k
{
260
33.5k
  g_return_if_fail (uri != NULL);
261
262
33.5k
  g_atomic_rc_box_release_full (uri, (GDestroyNotify)g_uri_clear);
263
33.5k
}
264
265
static gboolean
266
g_uri_char_is_unreserved (gchar ch)
267
196M
{
268
196M
  if (g_ascii_isalnum (ch))
269
41.0M
    return TRUE;
270
155M
  return ch == '-' || ch == '.' || ch == '_' || ch == '~';
271
196M
}
272
273
20.8M
#define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
274
10.4M
#define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
275
276
static gssize
277
uri_decoder (gchar       **out,
278
             const gchar  *illegal_chars,
279
             const gchar  *start,
280
             gsize         length,
281
             gboolean      just_normalize,
282
             gboolean      www_form,
283
             GUriFlags     flags,
284
             GUriError     parse_error,
285
             GError      **error)
286
84.9k
{
287
84.9k
  gchar c;
288
84.9k
  GString *decoded;
289
84.9k
  const gchar *invalid, *s, *end;
290
84.9k
  gssize len;
291
292
84.9k
  if (!(flags & G_URI_FLAGS_ENCODED))
293
80.7k
    just_normalize = FALSE;
294
295
84.9k
  decoded = g_string_sized_new (length + 1);
296
785M
  for (s = start, end = s + length; s < end; s++)
297
785M
    {
298
785M
      if (*s == '%')
299
10.4M
        {
300
10.4M
          if (s + 2 >= end ||
301
10.4M
              !g_ascii_isxdigit (s[1]) ||
302
10.4M
              !g_ascii_isxdigit (s[2]))
303
92.2k
            {
304
              /* % followed by non-hex or the end of the string; this is an error */
305
92.2k
              if (!(flags & G_URI_FLAGS_PARSE_RELAXED))
306
3.09k
                {
307
3.09k
                  g_set_error_literal (error, G_URI_ERROR, parse_error,
308
                                       /* xgettext: no-c-format */
309
3.09k
                                       _("Invalid %-encoding in URI"));
310
3.09k
                  g_string_free (decoded, TRUE);
311
3.09k
                  return -1;
312
3.09k
                }
313
314
              /* In non-strict mode, just let it through; we *don't*
315
               * fix it to "%25", since that might change the way that
316
               * the URI's owner would interpret it.
317
               */
318
89.1k
              g_string_append_c (decoded, *s);
319
89.1k
              continue;
320
92.2k
            }
321
322
10.4M
          c = HEXCHAR (s);
323
10.4M
          if (illegal_chars && strchr (illegal_chars, c))
324
0
            {
325
0
              g_set_error_literal (error, G_URI_ERROR, parse_error,
326
0
                                   _("Illegal character in URI"));
327
0
              g_string_free (decoded, TRUE);
328
0
              return -1;
329
0
            }
330
10.4M
          if (just_normalize && !g_uri_char_is_unreserved (c))
331
490
            {
332
              /* Leave the % sequence there but normalize it. */
333
490
              g_string_append_c (decoded, *s);
334
490
              g_string_append_c (decoded, g_ascii_toupper (s[1]));
335
490
              g_string_append_c (decoded, g_ascii_toupper (s[2]));
336
490
              s += 2;
337
490
            }
338
10.4M
          else
339
10.4M
            {
340
10.4M
              g_string_append_c (decoded, c);
341
10.4M
              s += 2;
342
10.4M
            }
343
10.4M
        }
344
775M
      else if (www_form && *s == '+')
345
0
        g_string_append_c (decoded, ' ');
346
      /* Normalize any illegal characters. */
347
775M
      else if (just_normalize && (!g_ascii_isgraph (*s)))
348
30.6M
        g_string_append_printf (decoded, "%%%02X", (guchar)*s);
349
744M
      else
350
744M
        g_string_append_c (decoded, *s);
351
785M
    }
352
353
81.8k
  len = decoded->len;
354
81.8k
  g_assert (len >= 0);
355
356
81.8k
  if (!(flags & G_URI_FLAGS_ENCODED) &&
357
77.8k
      !g_utf8_validate (decoded->str, len, &invalid))
358
5.68k
    {
359
5.68k
      g_set_error_literal (error, G_URI_ERROR, parse_error,
360
5.68k
                           _("Non-UTF-8 characters in URI"));
361
5.68k
      g_string_free (decoded, TRUE);
362
5.68k
      return -1;
363
5.68k
    }
364
365
76.1k
  if (out)
366
74.3k
    *out = g_string_free (decoded, FALSE);
367
1.79k
  else
368
1.79k
    g_string_free (decoded, TRUE);
369
370
76.1k
  return len;
371
81.8k
}
372
373
static gboolean
374
uri_decode (gchar       **out,
375
            const gchar  *illegal_chars,
376
            const gchar  *start,
377
            gsize         length,
378
            gboolean      www_form,
379
            GUriFlags     flags,
380
            GUriError     parse_error,
381
            GError      **error)
382
46.9k
{
383
46.9k
  return uri_decoder (out, illegal_chars, start, length, FALSE, www_form, flags,
384
46.9k
                      parse_error, error) != -1;
385
46.9k
}
386
387
static gboolean
388
uri_normalize (gchar       **out,
389
               const gchar  *start,
390
               gsize         length,
391
               GUriFlags     flags,
392
               GUriError     parse_error,
393
               GError      **error)
394
36.9k
{
395
36.9k
  return uri_decoder (out, NULL, start, length, TRUE, FALSE, flags,
396
36.9k
                      parse_error, error) != -1;
397
36.9k
}
398
399
static gboolean
400
is_valid (guchar       c,
401
          const gchar *reserved_chars_allowed)
402
196M
{
403
196M
  if (g_uri_char_is_unreserved (c))
404
41.2M
    return TRUE;
405
406
155M
  if (reserved_chars_allowed && strchr (reserved_chars_allowed, c))
407
25.8k
    return TRUE;
408
409
155M
  return FALSE;
410
155M
}
411
412
void
413
_uri_encoder (GString      *out,
414
              const guchar *start,
415
              gsize         length,
416
              const gchar  *reserved_chars_allowed,
417
              gboolean      allow_utf8)
418
11.5k
{
419
11.5k
  static const gchar hex[] = "0123456789ABCDEF";
420
11.5k
  const guchar *p = start;
421
11.5k
  const guchar *end = p + length;
422
423
196M
  while (p < end)
424
196M
    {
425
196M
      gunichar multibyte_utf8_char = 0;
426
427
196M
      if (allow_utf8 && *p >= 0x80)
428
3.30M
        multibyte_utf8_char = g_utf8_get_char_validated ((gchar *)p, end - p);
429
430
196M
      if (multibyte_utf8_char > 0 &&
431
3.30M
          multibyte_utf8_char != (gunichar) -1 && multibyte_utf8_char != (gunichar) -2)
432
28.0k
        {
433
28.0k
          gint len = g_utf8_skip [*p];
434
28.0k
          g_string_append_len (out, (gchar *)p, len);
435
28.0k
          p += len;
436
28.0k
        }
437
196M
      else if (is_valid (*p, reserved_chars_allowed))
438
41.2M
        {
439
41.2M
          g_string_append_c (out, *p);
440
41.2M
          p++;
441
41.2M
        }
442
155M
      else
443
155M
        {
444
155M
          g_string_append_c (out, '%');
445
155M
          g_string_append_c (out, hex[*p >> 4]);
446
155M
          g_string_append_c (out, hex[*p & 0xf]);
447
155M
          p++;
448
155M
        }
449
196M
    }
450
11.5k
}
451
452
/* Parse the IP-literal construction from RFC 6874 (which extends RFC 3986 to
453
 * support IPv6 zone identifiers.
454
 *
455
 * Currently, IP versions beyond 6 (i.e. the IPvFuture rule) are unsupported.
456
 * There’s no point supporting them until (a) they exist and (b) the rest of the
457
 * stack (notably, sockets) supports them.
458
 *
459
 * Rules:
460
 *
461
 * IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture  ) "]"
462
 *
463
 * ZoneID = 1*( unreserved / pct-encoded )
464
 *
465
 * IPv6addrz = IPv6address "%25" ZoneID
466
 *
467
 * If %G_URI_FLAGS_PARSE_RELAXED is specified, this function also accepts:
468
 *
469
 * IPv6addrz = IPv6address "%" ZoneID
470
 */
471
static gboolean
472
parse_ip_literal (const gchar  *start,
473
                  gsize         length,
474
                  GUriFlags     flags,
475
                  gchar       **out,
476
                  GError      **error)
477
2.28k
{
478
2.28k
  gchar *pct, *zone_id = NULL;
479
2.28k
  gchar *addr = NULL;
480
2.28k
  gsize addr_length = 0;
481
2.28k
  gsize zone_id_length = 0;
482
2.28k
  gchar *decoded_zone_id = NULL;
483
484
2.28k
  if (start[length - 1] != ']')
485
216
    goto bad_ipv6_literal;
486
487
  /* Drop the square brackets */
488
2.06k
  addr = g_strndup (start + 1, length - 2);
489
2.06k
  addr_length = length - 2;
490
491
  /* If there's an IPv6 scope ID, split out the zone. */
492
2.06k
  pct = strchr (addr, '%');
493
2.06k
  if (pct != NULL)
494
487
    {
495
487
      *pct = '\0';
496
497
487
      if (addr_length - (pct - addr) >= 4 &&
498
433
          *(pct + 1) == '2' && *(pct + 2) == '5')
499
74
        {
500
74
          zone_id = pct + 3;
501
74
          zone_id_length = addr_length - (zone_id - addr);
502
74
        }
503
413
      else if (flags & G_URI_FLAGS_PARSE_RELAXED &&
504
100
               addr_length - (pct - addr) >= 2)
505
99
        {
506
99
          zone_id = pct + 1;
507
99
          zone_id_length = addr_length - (zone_id - addr);
508
99
        }
509
314
      else
510
314
        goto bad_ipv6_literal;
511
512
487
      g_assert (zone_id_length >= 1);
513
173
    }
514
515
  /* addr must be an IPv6 address */
516
1.75k
  if (!g_hostname_is_ip_address (addr) || !strchr (addr, ':'))
517
1.05k
    goto bad_ipv6_literal;
518
519
  /* Zone ID must be valid. It can contain %-encoded characters. */
520
694
  if (zone_id != NULL &&
521
89
      !uri_decode (&decoded_zone_id, NULL, zone_id, zone_id_length, FALSE,
522
89
                   flags, G_URI_ERROR_BAD_HOST, NULL))
523
31
    goto bad_ipv6_literal;
524
525
  /* Success */
526
663
  if (out != NULL && decoded_zone_id != NULL)
527
58
    *out = g_strconcat (addr, "%", decoded_zone_id, NULL);
528
605
  else if (out != NULL)
529
605
    *out = g_steal_pointer (&addr);
530
531
663
  g_free (addr);
532
663
  g_free (decoded_zone_id);
533
534
663
  return TRUE;
535
536
1.61k
bad_ipv6_literal:
537
1.61k
  g_free (addr);
538
1.61k
  g_free (decoded_zone_id);
539
1.61k
  g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
540
1.61k
               _("Invalid IPv6 address ‘%.*s’ in URI"),
541
1.61k
               (gint)length, start);
542
543
1.61k
  return FALSE;
544
694
}
545
546
static gboolean
547
parse_host (const gchar  *start,
548
            gsize         length,
549
            GUriFlags     flags,
550
            gchar       **out,
551
            GError      **error)
552
24.0k
{
553
24.0k
  gchar *decoded = NULL, *host;
554
24.0k
  gchar *addr = NULL;
555
556
24.0k
  if (*start == '[')
557
2.28k
    {
558
2.28k
      if (!parse_ip_literal (start, length, flags, &host, error))
559
1.61k
        return FALSE;
560
663
      goto ok;
561
2.28k
    }
562
563
21.7k
  if (g_ascii_isdigit (*start))
564
814
    {
565
814
      addr = g_strndup (start, length);
566
814
      if (g_hostname_is_ip_address (addr))
567
18
        {
568
18
          host = addr;
569
18
          goto ok;
570
18
        }
571
796
      g_free (addr);
572
796
    }
573
574
21.7k
  if (flags & G_URI_FLAGS_NON_DNS)
575
2.43k
    {
576
2.43k
      if (!uri_normalize (&decoded, start, length, flags,
577
2.43k
                          G_URI_ERROR_BAD_HOST, error))
578
279
        return FALSE;
579
2.16k
      host = g_steal_pointer (&decoded);
580
2.16k
      goto ok;
581
2.43k
    }
582
583
19.3k
  flags &= ~G_URI_FLAGS_ENCODED;
584
19.3k
  if (!uri_decode (&decoded, NULL, start, length, FALSE, flags,
585
19.3k
                   G_URI_ERROR_BAD_HOST, error))
586
1.71k
    return FALSE;
587
588
  /* You're not allowed to %-encode an IP address, so if it wasn't
589
   * one before, it better not be one now.
590
   */
591
17.6k
  if (g_hostname_is_ip_address (decoded))
592
38
    {
593
38
      g_free (decoded);
594
38
      g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
595
38
                   _("Illegal encoded IP address ‘%.*s’ in URI"),
596
38
                   (gint)length, start);
597
38
      return FALSE;
598
38
    }
599
600
17.5k
  if (g_hostname_is_non_ascii (decoded))
601
13.7k
    {
602
13.7k
      host = g_hostname_to_ascii (decoded);
603
13.7k
      if (host == NULL)
604
4.69k
        {
605
4.69k
          g_free (decoded);
606
4.69k
          g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
607
4.69k
                       _("Illegal internationalized hostname ‘%.*s’ in URI"),
608
4.69k
                       (gint) length, start);
609
4.69k
          return FALSE;
610
4.69k
        }
611
13.7k
    }
612
3.87k
  else
613
3.87k
    {
614
3.87k
      host = g_steal_pointer (&decoded);
615
3.87k
    }
616
617
15.7k
 ok:
618
15.7k
  if (out)
619
15.7k
    *out = g_steal_pointer (&host);
620
15.7k
  g_free (host);
621
15.7k
  g_free (decoded);
622
623
15.7k
  return TRUE;
624
17.5k
}
625
626
static gboolean
627
parse_port (const gchar  *start,
628
            gsize         length,
629
            gint         *out,
630
            GError      **error)
631
1.90k
{
632
1.90k
  gchar *end;
633
1.90k
  gulong parsed_port;
634
635
  /* strtoul() allows leading + or -, so we have to check this first. */
636
1.90k
  if (!g_ascii_isdigit (*start))
637
56
    {
638
56
      g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
639
56
                   _("Could not parse port ‘%.*s’ in URI"),
640
56
                   (gint)length, start);
641
56
      return FALSE;
642
56
    }
643
644
  /* We know that *(start + length) is either '\0' or a non-numeric
645
   * character, so strtoul() won't scan beyond it.
646
   */
647
1.85k
  parsed_port = strtoul (start, &end, 10);
648
1.85k
  if (end != start + length)
649
26
    {
650
26
      g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
651
26
                   _("Could not parse port ‘%.*s’ in URI"),
652
26
                   (gint)length, start);
653
26
      return FALSE;
654
26
    }
655
1.82k
  else if (parsed_port > 65535)
656
845
    {
657
845
      g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
658
845
                   _("Port ‘%.*s’ in URI is out of range"),
659
845
                   (gint)length, start);
660
845
      return FALSE;
661
845
    }
662
663
982
  if (out)
664
982
    *out = parsed_port;
665
982
  return TRUE;
666
1.85k
}
667
668
static gboolean
669
parse_userinfo (const gchar  *start,
670
                gsize         length,
671
                GUriFlags     flags,
672
                gchar       **user,
673
                gchar       **password,
674
                gchar       **auth_params,
675
                GError      **error)
676
2.89k
{
677
2.89k
  const gchar *user_end = NULL, *password_end = NULL, *auth_params_end;
678
679
2.89k
  auth_params_end = start + length;
680
2.89k
  if (flags & G_URI_FLAGS_HAS_AUTH_PARAMS)
681
361
    password_end = memchr (start, ';', auth_params_end - start);
682
2.89k
  if (!password_end)
683
2.71k
    password_end = auth_params_end;
684
2.89k
  if (flags & G_URI_FLAGS_HAS_PASSWORD)
685
361
    user_end = memchr (start, ':', password_end - start);
686
2.89k
  if (!user_end)
687
2.71k
    user_end = password_end;
688
689
2.89k
  if (!uri_normalize (user, start, user_end - start, flags,
690
2.89k
                      G_URI_ERROR_BAD_USER, error))
691
492
    return FALSE;
692
693
2.40k
  if (*user_end == ':')
694
149
    {
695
149
      start = user_end + 1;
696
149
      if (!uri_normalize (password, start, password_end - start, flags,
697
149
                          G_URI_ERROR_BAD_PASSWORD, error))
698
9
        {
699
9
          if (user)
700
9
            g_clear_pointer (user, g_free);
701
9
          return FALSE;
702
9
        }
703
149
    }
704
2.25k
  else if (password)
705
2.25k
    *password = NULL;
706
707
2.39k
  if (*password_end == ';')
708
141
    {
709
141
      start = password_end + 1;
710
141
      if (!uri_normalize (auth_params, start, auth_params_end - start, flags,
711
141
                          G_URI_ERROR_BAD_AUTH_PARAMS, error))
712
7
        {
713
7
          if (user)
714
7
            g_clear_pointer (user, g_free);
715
7
          if (password)
716
7
            g_clear_pointer (password, g_free);
717
7
          return FALSE;
718
7
        }
719
141
    }
720
2.25k
  else if (auth_params)
721
2.25k
    *auth_params = NULL;
722
723
2.38k
  return TRUE;
724
2.39k
}
725
726
static gchar *
727
uri_cleanup (const gchar *uri_string)
728
978
{
729
978
  GString *copy;
730
978
  const gchar *end;
731
732
  /* Skip leading whitespace */
733
2.79k
  while (g_ascii_isspace (*uri_string))
734
1.81k
    uri_string++;
735
736
  /* Ignore trailing whitespace */
737
978
  end = uri_string + strlen (uri_string);
738
2.20k
  while (end > uri_string && g_ascii_isspace (*(end - 1)))
739
1.22k
    end--;
740
741
  /* Copy the rest, encoding unencoded spaces and stripping other whitespace */
742
978
  copy = g_string_sized_new (end - uri_string);
743
71.9M
  while (uri_string < end)
744
71.9M
    {
745
71.9M
      if (*uri_string == ' ')
746
11.7M
        g_string_append (copy, "%20");
747
60.1M
      else if (g_ascii_isspace (*uri_string))
748
205k
        ;
749
59.9M
      else
750
59.9M
        g_string_append_c (copy, *uri_string);
751
71.9M
      uri_string++;
752
71.9M
    }
753
754
978
  return g_string_free (copy, FALSE);
755
978
}
756
757
static gboolean
758
should_normalize_empty_path (const char *scheme)
759
1.19k
{
760
1.19k
  const char * const schemes[] = { "https", "http", "wss", "ws" };
761
1.19k
  gsize i;
762
5.92k
  for (i = 0; i < G_N_ELEMENTS (schemes); ++i)
763
4.75k
    {
764
4.75k
      if (!strcmp (schemes[i], scheme))
765
18
        return TRUE;
766
4.75k
    }
767
1.17k
  return FALSE;
768
1.19k
}
769
770
static int
771
normalize_port (const char *scheme,
772
                int         port)
773
97
{
774
97
  const char *default_schemes[3] = { NULL };
775
97
  int i;
776
777
97
  switch (port)
778
97
    {
779
4
    case 21:
780
4
      default_schemes[0] = "ftp";
781
4
      break;
782
21
    case 80:
783
21
      default_schemes[0] = "http";
784
21
      default_schemes[1] = "ws";
785
21
      break;
786
7
    case 443:
787
7
      default_schemes[0] = "https";
788
7
      default_schemes[1] = "wss";
789
7
      break;
790
65
    default:
791
65
      break;
792
97
    }
793
794
154
  for (i = 0; default_schemes[i]; ++i)
795
60
    {
796
60
      if (!strcmp (scheme, default_schemes[i]))
797
3
        return -1;
798
60
    }
799
800
94
  return port;
801
97
}
802
803
int
804
g_uri_get_default_scheme_port (const char *scheme)
805
662
{
806
662
  if (strcmp (scheme, "http") == 0 || strcmp (scheme, "ws") == 0)
807
11
    return 80;
808
809
651
  if (strcmp (scheme, "https") == 0 || strcmp (scheme, "wss") == 0)
810
4
    return 443;
811
812
647
  if (strcmp (scheme, "ftp") == 0)
813
2
    return 21;
814
815
645
  if (strstr (scheme, "socks") == scheme)
816
12
    return 1080;
817
818
633
  return -1;
819
645
}
820
821
static gboolean
822
g_uri_split_internal (const gchar  *uri_string,
823
                      GUriFlags     flags,
824
                      gchar       **scheme,
825
                      gchar       **userinfo,
826
                      gchar       **user,
827
                      gchar       **password,
828
                      gchar       **auth_params,
829
                      gchar       **host,
830
                      gint         *port,
831
                      gchar       **path,
832
                      gchar       **query,
833
                      gchar       **fragment,
834
                      GError      **error)
835
36.5k
{
836
36.5k
  const gchar *end, *colon, *at, *path_start, *semi, *question;
837
36.5k
  const gchar *p, *bracket, *hostend;
838
36.5k
  gchar *cleaned_uri_string = NULL;
839
36.5k
  gchar *normalized_scheme = NULL;
840
841
36.5k
  if (scheme)
842
36.5k
    *scheme = NULL;
843
36.5k
  if (userinfo)
844
33.5k
    *userinfo = NULL;
845
36.5k
  if (user)
846
33.5k
    *user = NULL;
847
36.5k
  if (password)
848
33.5k
    *password = NULL;
849
36.5k
  if (auth_params)
850
33.5k
    *auth_params = NULL;
851
36.5k
  if (host)
852
36.5k
    *host = NULL;
853
36.5k
  if (port)
854
36.5k
    *port = -1;
855
36.5k
  if (path)
856
33.5k
    *path = NULL;
857
36.5k
  if (query)
858
33.5k
    *query = NULL;
859
36.5k
  if (fragment)
860
33.5k
    *fragment = NULL;
861
862
36.5k
  if ((flags & G_URI_FLAGS_PARSE_RELAXED) && strpbrk (uri_string, " \t\n\r"))
863
978
    {
864
978
      cleaned_uri_string = uri_cleanup (uri_string);
865
978
      uri_string = cleaned_uri_string;
866
978
    }
867
868
  /* Find scheme */
869
36.5k
  p = uri_string;
870
62.4M
  while (*p && (g_ascii_isalpha (*p) ||
871
2.88M
               (p > uri_string && (g_ascii_isdigit (*p) ||
872
81.0k
                                   *p == '.' || *p == '+' || *p == '-'))))
873
62.4M
    p++;
874
875
36.5k
  if (p > uri_string && *p == ':')
876
8.32k
    {
877
8.32k
      normalized_scheme = g_ascii_strdown (uri_string, p - uri_string);
878
8.32k
      if (scheme)
879
8.32k
        *scheme = g_steal_pointer (&normalized_scheme);
880
8.32k
      p++;
881
8.32k
    }
882
28.2k
  else
883
28.2k
    {
884
28.2k
      if (scheme)
885
28.2k
        *scheme = NULL;
886
28.2k
      p = uri_string;
887
28.2k
    }
888
889
  /* Check for authority */
890
36.5k
  if (strncmp (p, "//", 2) == 0)
891
24.5k
    {
892
24.5k
      p += 2;
893
894
24.5k
      path_start = p + strcspn (p, "/?#");
895
24.5k
      at = memchr (p, '@', path_start - p);
896
24.5k
      if (at)
897
2.92k
        {
898
2.92k
          if (flags & G_URI_FLAGS_PARSE_RELAXED)
899
365
            {
900
365
              gchar *next_at;
901
902
              /* Any "@"s in the userinfo must be %-encoded, but
903
               * people get this wrong sometimes. Since "@"s in the
904
               * hostname are unlikely (and also wrong anyway), assume
905
               * that if there are extra "@"s, they belong in the
906
               * userinfo.
907
               */
908
365
              do
909
740
                {
910
740
                  next_at = memchr (at + 1, '@', path_start - (at + 1));
911
740
                  if (next_at)
912
375
                    at = next_at;
913
740
                }
914
740
              while (next_at);
915
365
            }
916
917
2.92k
          if (user || password || auth_params ||
918
37
              (flags & (G_URI_FLAGS_HAS_PASSWORD|G_URI_FLAGS_HAS_AUTH_PARAMS)))
919
2.89k
            {
920
2.89k
              if (!parse_userinfo (p, at - p, flags,
921
2.89k
                                   user, password, auth_params,
922
2.89k
                                   error))
923
508
                goto fail;
924
2.89k
            }
925
926
2.42k
          if (!uri_normalize (userinfo, p, at - p, flags,
927
2.42k
                              G_URI_ERROR_BAD_USER, error))
928
1
            goto fail;
929
930
2.42k
          p = at + 1;
931
2.42k
        }
932
933
24.0k
      if (flags & G_URI_FLAGS_PARSE_RELAXED)
934
2.83k
        {
935
2.83k
          semi = strchr (p, ';');
936
2.83k
          if (semi && semi < path_start)
937
59
            {
938
              /* Technically, semicolons are allowed in the "host"
939
               * production, but no one ever does this, and some
940
               * schemes mistakenly use semicolon as a delimiter
941
               * marking the start of the path. We have to check this
942
               * after checking for userinfo though, because a
943
               * semicolon before the "@" must be part of the
944
               * userinfo.
945
               */
946
59
              path_start = semi;
947
59
            }
948
2.83k
        }
949
950
      /* Find host and port. The host may be a bracket-delimited IPv6
951
       * address, in which case the colon delimiting the port must come
952
       * (immediately) after the close bracket.
953
       */
954
24.0k
      if (*p == '[')
955
2.28k
        {
956
2.28k
          bracket = memchr (p, ']', path_start - p);
957
2.28k
          if (bracket && *(bracket + 1) == ':')
958
59
            colon = bracket + 1;
959
2.22k
          else
960
2.22k
            colon = NULL;
961
2.28k
        }
962
21.7k
      else
963
21.7k
        colon = memchr (p, ':', path_start - p);
964
965
24.0k
      hostend = colon ? colon : path_start;
966
24.0k
      if (!parse_host (p, hostend - p, flags, host, error))
967
8.34k
        goto fail;
968
969
15.7k
      if (colon && colon != path_start - 1)
970
1.90k
        {
971
1.90k
          p = colon + 1;
972
1.90k
          if (!parse_port (p, path_start - p, port, error))
973
927
            goto fail;
974
1.90k
        }
975
976
14.8k
      p = path_start;
977
14.8k
    }
978
979
  /* Find fragment. */
980
26.7k
  end = p + strcspn (p, "#");
981
26.7k
  if (*end == '#')
982
1.45k
    {
983
1.45k
      if (!uri_normalize (fragment, end + 1, strlen (end + 1),
984
1.45k
                          flags | (flags & G_URI_FLAGS_ENCODED_FRAGMENT ? G_URI_FLAGS_ENCODED : 0),
985
1.45k
                          G_URI_ERROR_BAD_FRAGMENT, error))
986
424
        goto fail;
987
1.45k
    }
988
989
  /* Find query */
990
26.3k
  question = memchr (p, '?', end - p);
991
26.3k
  if (question)
992
1.77k
    {
993
1.77k
      if (!uri_normalize (query, question + 1, end - (question + 1),
994
1.77k
                          flags | (flags & G_URI_FLAGS_ENCODED_QUERY ? G_URI_FLAGS_ENCODED : 0),
995
1.77k
                          G_URI_ERROR_BAD_QUERY, error))
996
687
        goto fail;
997
1.08k
      end = question;
998
1.08k
    }
999
1000
25.6k
  if (!uri_normalize (path, p, end - p,
1001
25.6k
                      flags | (flags & G_URI_FLAGS_ENCODED_PATH ? G_URI_FLAGS_ENCODED : 0),
1002
25.6k
                      G_URI_ERROR_BAD_PATH, error))
1003
4.66k
    goto fail;
1004
1005
  /* Scheme-based normalization */
1006
20.9k
  if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && ((scheme && *scheme) || normalized_scheme))
1007
752
    {
1008
752
      const char *scheme_str = scheme && *scheme ? *scheme : normalized_scheme;
1009
1010
752
      if (should_normalize_empty_path (scheme_str) && path && !**path)
1011
5
        {
1012
5
          g_free (*path);
1013
5
          *path = g_strdup ("/");
1014
5
        }
1015
1016
752
      if (port && *port == -1)
1017
662
        *port = g_uri_get_default_scheme_port (scheme_str);
1018
752
    }
1019
1020
20.9k
  g_free (normalized_scheme);
1021
20.9k
  g_free (cleaned_uri_string);
1022
20.9k
  return TRUE;
1023
1024
15.5k
 fail:
1025
15.5k
  if (scheme)
1026
15.5k
    g_clear_pointer (scheme, g_free);
1027
15.5k
  if (userinfo)
1028
15.5k
    g_clear_pointer (userinfo, g_free);
1029
15.5k
  if (host)
1030
15.5k
    g_clear_pointer (host, g_free);
1031
15.5k
  if (port)
1032
15.5k
    *port = -1;
1033
15.5k
  if (path)
1034
15.5k
    g_clear_pointer (path, g_free);
1035
15.5k
  if (query)
1036
15.5k
    g_clear_pointer (query, g_free);
1037
15.5k
  if (fragment)
1038
15.5k
    g_clear_pointer (fragment, g_free);
1039
1040
15.5k
  g_free (normalized_scheme);
1041
15.5k
  g_free (cleaned_uri_string);
1042
15.5k
  return FALSE;
1043
25.6k
}
1044
1045
/**
1046
 * g_uri_split:
1047
 * @uri_ref: a string containing a relative or absolute URI
1048
 * @flags: flags for parsing @uri_ref
1049
 * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1050
 *    the scheme (converted to lowercase), or %NULL
1051
 * @userinfo: (out) (nullable) (optional) (transfer full): on return, contains
1052
 *    the userinfo, or %NULL
1053
 * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1054
 *    host, or %NULL
1055
 * @port: (out) (optional) (transfer full): on return, contains the
1056
 *    port, or `-1`
1057
 * @path: (out) (not nullable) (optional) (transfer full): on return, contains the
1058
 *    path
1059
 * @query: (out) (nullable) (optional) (transfer full): on return, contains the
1060
 *    query, or %NULL
1061
 * @fragment: (out) (nullable) (optional) (transfer full): on return, contains
1062
 *    the fragment, or %NULL
1063
 * @error: #GError for error reporting, or %NULL to ignore.
1064
 *
1065
 * Parses @uri_ref (which can be an
1066
 * [absolute or relative URI](#relative-and-absolute-uris)) according to @flags, and
1067
 * returns the pieces. Any component that doesn't appear in @uri_ref will be
1068
 * returned as %NULL (but note that all URIs always have a path component,
1069
 * though it may be the empty string).
1070
 *
1071
 * If @flags contains %G_URI_FLAGS_ENCODED, then `%`-encoded characters in
1072
 * @uri_ref will remain encoded in the output strings. (If not,
1073
 * then all such characters will be decoded.) Note that decoding will
1074
 * only work if the URI components are ASCII or UTF-8, so you will
1075
 * need to use %G_URI_FLAGS_ENCODED if they are not.
1076
 *
1077
 * Note that the %G_URI_FLAGS_HAS_PASSWORD and
1078
 * %G_URI_FLAGS_HAS_AUTH_PARAMS @flags are ignored by g_uri_split(),
1079
 * since it always returns only the full userinfo; use
1080
 * g_uri_split_with_user() if you want it split up.
1081
 *
1082
 * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE
1083
 *   on error.
1084
 *
1085
 * Since: 2.66
1086
 */
1087
gboolean
1088
g_uri_split (const gchar  *uri_ref,
1089
             GUriFlags     flags,
1090
             gchar       **scheme,
1091
             gchar       **userinfo,
1092
             gchar       **host,
1093
             gint         *port,
1094
             gchar       **path,
1095
             gchar       **query,
1096
             gchar       **fragment,
1097
             GError      **error)
1098
0
{
1099
0
  g_return_val_if_fail (uri_ref != NULL, FALSE);
1100
0
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1101
1102
0
  return g_uri_split_internal (uri_ref, flags,
1103
0
                               scheme, userinfo, NULL, NULL, NULL,
1104
0
                               host, port, path, query, fragment,
1105
0
                               error);
1106
0
}
1107
1108
/**
1109
 * g_uri_split_with_user:
1110
 * @uri_ref: a string containing a relative or absolute URI
1111
 * @flags: flags for parsing @uri_ref
1112
 * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1113
 *    the scheme (converted to lowercase), or %NULL
1114
 * @user: (out) (nullable) (optional) (transfer full): on return, contains
1115
 *    the user, or %NULL
1116
 * @password: (out) (nullable) (optional) (transfer full): on return, contains
1117
 *    the password, or %NULL
1118
 * @auth_params: (out) (nullable) (optional) (transfer full): on return, contains
1119
 *    the auth_params, or %NULL
1120
 * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1121
 *    host, or %NULL
1122
 * @port: (out) (optional) (transfer full): on return, contains the
1123
 *    port, or `-1`
1124
 * @path: (out) (not nullable) (optional) (transfer full): on return, contains the
1125
 *    path
1126
 * @query: (out) (nullable) (optional) (transfer full): on return, contains the
1127
 *    query, or %NULL
1128
 * @fragment: (out) (nullable) (optional) (transfer full): on return, contains
1129
 *    the fragment, or %NULL
1130
 * @error: #GError for error reporting, or %NULL to ignore.
1131
 *
1132
 * Parses @uri_ref (which can be an
1133
 * [absolute or relative URI](#relative-and-absolute-uris)) according to @flags, and
1134
 * returns the pieces. Any component that doesn't appear in @uri_ref will be
1135
 * returned as %NULL (but note that all URIs always have a path component,
1136
 * though it may be the empty string).
1137
 *
1138
 * See g_uri_split(), and the definition of #GUriFlags, for more
1139
 * information on the effect of @flags. Note that @password will only
1140
 * be parsed out if @flags contains %G_URI_FLAGS_HAS_PASSWORD, and
1141
 * @auth_params will only be parsed out if @flags contains
1142
 * %G_URI_FLAGS_HAS_AUTH_PARAMS.
1143
 *
1144
 * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE
1145
 *   on error.
1146
 *
1147
 * Since: 2.66
1148
 */
1149
gboolean
1150
g_uri_split_with_user (const gchar  *uri_ref,
1151
                       GUriFlags     flags,
1152
                       gchar       **scheme,
1153
                       gchar       **user,
1154
                       gchar       **password,
1155
                       gchar       **auth_params,
1156
                       gchar       **host,
1157
                       gint         *port,
1158
                       gchar       **path,
1159
                       gchar       **query,
1160
                       gchar       **fragment,
1161
                       GError      **error)
1162
0
{
1163
0
  g_return_val_if_fail (uri_ref != NULL, FALSE);
1164
0
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1165
1166
0
  return g_uri_split_internal (uri_ref, flags,
1167
0
                               scheme, NULL, user, password, auth_params,
1168
0
                               host, port, path, query, fragment,
1169
0
                               error);
1170
0
}
1171
1172
1173
/**
1174
 * g_uri_split_network:
1175
 * @uri_string: a string containing an absolute URI
1176
 * @flags: flags for parsing @uri_string
1177
 * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1178
 *    the scheme (converted to lowercase), or %NULL
1179
 * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1180
 *    host, or %NULL
1181
 * @port: (out) (optional) (transfer full): on return, contains the
1182
 *    port, or `-1`
1183
 * @error: #GError for error reporting, or %NULL to ignore.
1184
 *
1185
 * Parses @uri_string (which must be an [absolute URI](#relative-and-absolute-uris))
1186
 * according to @flags, and returns the pieces relevant to connecting to a host.
1187
 * See the documentation for g_uri_split() for more details; this is
1188
 * mostly a wrapper around that function with simpler arguments.
1189
 * However, it will return an error if @uri_string is a relative URI,
1190
 * or does not contain a hostname component.
1191
 *
1192
 * Returns: (skip): %TRUE if @uri_string parsed successfully,
1193
 *   %FALSE on error.
1194
 *
1195
 * Since: 2.66
1196
 */
1197
gboolean
1198
g_uri_split_network (const gchar  *uri_string,
1199
                     GUriFlags     flags,
1200
                     gchar       **scheme,
1201
                     gchar       **host,
1202
                     gint         *port,
1203
                     GError      **error)
1204
2.93k
{
1205
2.93k
  gchar *my_scheme = NULL, *my_host = NULL;
1206
1207
2.93k
  g_return_val_if_fail (uri_string != NULL, FALSE);
1208
2.93k
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1209
1210
2.93k
  if (!g_uri_split_internal (uri_string, flags,
1211
2.93k
                             &my_scheme, NULL, NULL, NULL, NULL,
1212
2.93k
                             &my_host, port, NULL, NULL, NULL,
1213
2.93k
                             error))
1214
1.26k
    return FALSE;
1215
1216
1.66k
  if (!my_scheme || !my_host)
1217
1.54k
    {
1218
1.54k
      if (!my_scheme)
1219
1.51k
        {
1220
1.51k
          g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
1221
1.51k
                       _("URI ‘%s’ is not an absolute URI"),
1222
1.51k
                       uri_string);
1223
1.51k
        }
1224
30
      else
1225
30
        {
1226
30
          g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
1227
30
                       _("URI ‘%s’ has no host component"),
1228
30
                       uri_string);
1229
30
        }
1230
1.54k
      g_free (my_scheme);
1231
1.54k
      g_free (my_host);
1232
1233
1.54k
      return FALSE;
1234
1.54k
    }
1235
1236
124
  if (scheme)
1237
124
    *scheme = g_steal_pointer (&my_scheme);
1238
124
  if (host)
1239
124
    *host = g_steal_pointer (&my_host);
1240
1241
124
  g_free (my_scheme);
1242
124
  g_free (my_host);
1243
1244
124
  return TRUE;
1245
1.66k
}
1246
1247
/**
1248
 * g_uri_is_valid:
1249
 * @uri_string: a string containing an absolute URI
1250
 * @flags: flags for parsing @uri_string
1251
 * @error: #GError for error reporting, or %NULL to ignore.
1252
 *
1253
 * Parses @uri_string according to @flags, to determine whether it is a valid
1254
 * [absolute URI](#relative-and-absolute-uris), i.e. it does not need to be resolved
1255
 * relative to another URI using g_uri_parse_relative().
1256
 *
1257
 * If it’s not a valid URI, an error is returned explaining how it’s invalid.
1258
 *
1259
 * See g_uri_split(), and the definition of #GUriFlags, for more
1260
 * information on the effect of @flags.
1261
 *
1262
 * Returns: %TRUE if @uri_string is a valid absolute URI, %FALSE on error.
1263
 *
1264
 * Since: 2.66
1265
 */
1266
gboolean
1267
g_uri_is_valid (const gchar  *uri_string,
1268
                GUriFlags     flags,
1269
                GError      **error)
1270
0
{
1271
0
  gchar *my_scheme = NULL;
1272
1273
0
  g_return_val_if_fail (uri_string != NULL, FALSE);
1274
0
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1275
1276
0
  if (!g_uri_split_internal (uri_string, flags,
1277
0
                             &my_scheme, NULL, NULL, NULL, NULL,
1278
0
                             NULL, NULL, NULL, NULL, NULL,
1279
0
                             error))
1280
0
    return FALSE;
1281
1282
0
  if (!my_scheme)
1283
0
    {
1284
0
      g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
1285
0
                   _("URI ‘%s’ is not an absolute URI"),
1286
0
                   uri_string);
1287
0
      return FALSE;
1288
0
    }
1289
1290
0
  g_free (my_scheme);
1291
1292
0
  return TRUE;
1293
0
}
1294
1295
1296
/* Implements the "Remove Dot Segments" algorithm from section 5.2.4 of
1297
 * RFC 3986.
1298
 *
1299
 * See https://tools.ietf.org/html/rfc3986#section-5.2.4
1300
 */
1301
static void
1302
remove_dot_segments (gchar *path)
1303
6.33k
{
1304
  /* The output can be written to the same buffer that the input
1305
   * is read from, as the output pointer is only ever increased
1306
   * when the input pointer is increased as well, and the input
1307
   * pointer is never decreased. */
1308
6.33k
  gchar *input = path;
1309
6.33k
  gchar *output = path;
1310
1311
6.33k
  if (!*path)
1312
3.51k
    return;
1313
1314
1.06M
  while (*input)
1315
1.06M
    {
1316
      /*  A.  If the input buffer begins with a prefix of "../" or "./",
1317
       *      then remove that prefix from the input buffer; otherwise,
1318
       */
1319
1.06M
      if (strncmp (input, "../", 3) == 0)
1320
242
        input += 3;
1321
1.06M
      else if (strncmp (input, "./", 2) == 0)
1322
213
        input += 2;
1323
1324
      /*  B.  if the input buffer begins with a prefix of "/./" or "/.",
1325
       *      where "." is a complete path segment, then replace that
1326
       *      prefix with "/" in the input buffer; otherwise,
1327
       */
1328
1.06M
      else if (strncmp (input, "/./", 3) == 0)
1329
299
        input += 2;
1330
1.06M
      else if (strcmp (input, "/.") == 0)
1331
26
        input[1] = '\0';
1332
1333
      /*  C.  if the input buffer begins with a prefix of "/../" or "/..",
1334
       *      where ".." is a complete path segment, then replace that
1335
       *      prefix with "/" in the input buffer and remove the last
1336
       *      segment and its preceding "/" (if any) from the output
1337
       *      buffer; otherwise,
1338
       */
1339
1.06M
      else if (strncmp (input, "/../", 4) == 0)
1340
846k
        {
1341
846k
          input += 3;
1342
846k
          if (output > path)
1343
211k
            {
1344
211k
              do
1345
846k
                {
1346
846k
                  output--;
1347
846k
                }
1348
846k
              while (*output != '/' && output > path);
1349
211k
            }
1350
846k
        }
1351
218k
      else if (strcmp (input, "/..") == 0)
1352
130
        {
1353
130
          input[1] = '\0';
1354
130
          if (output > path)
1355
103
            {
1356
103
              do
1357
461
                 {
1358
461
                   output--;
1359
461
                 }
1360
461
              while (*output != '/' && output > path);
1361
103
            }
1362
130
        }
1363
1364
      /*  D.  if the input buffer consists only of "." or "..", then remove
1365
       *      that from the input buffer; otherwise,
1366
       */
1367
218k
      else if (strcmp (input, "..") == 0 || strcmp (input, ".") == 0)
1368
62
        input[0] = '\0';
1369
1370
      /*  E.  move the first path segment in the input buffer to the end of
1371
       *      the output buffer, including the initial "/" character (if
1372
       *      any) and any subsequent characters up to, but not including,
1373
       *      the next "/" character or the end of the input buffer.
1374
       */
1375
217k
      else
1376
217k
        {
1377
217k
          *output++ = *input++;
1378
130M
          while (*input && *input != '/')
1379
130M
            *output++ = *input++;
1380
217k
        }
1381
1.06M
    }
1382
2.82k
  *output = '\0';
1383
2.82k
}
1384
1385
/**
1386
 * g_uri_parse:
1387
 * @uri_string: a string representing an absolute URI
1388
 * @flags: flags describing how to parse @uri_string
1389
 * @error: #GError for error reporting, or %NULL to ignore.
1390
 *
1391
 * Parses @uri_string according to @flags. If the result is not a
1392
 * valid [absolute URI](#relative-and-absolute-uris), it will be discarded, and an
1393
 * error returned.
1394
 *
1395
 * Return value: (transfer full): a new #GUri, or NULL on error.
1396
 *
1397
 * Since: 2.66
1398
 */
1399
GUri *
1400
g_uri_parse (const gchar  *uri_string,
1401
             GUriFlags     flags,
1402
             GError      **error)
1403
33.5k
{
1404
33.5k
  g_return_val_if_fail (uri_string != NULL, NULL);
1405
33.5k
  g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1406
1407
33.5k
  return g_uri_parse_relative (NULL, uri_string, flags, error);
1408
33.5k
}
1409
1410
/**
1411
 * g_uri_parse_relative:
1412
 * @base_uri: (nullable) (transfer none): a base absolute URI
1413
 * @uri_ref: a string representing a relative or absolute URI
1414
 * @flags: flags describing how to parse @uri_ref
1415
 * @error: #GError for error reporting, or %NULL to ignore.
1416
 *
1417
 * Parses @uri_ref according to @flags and, if it is a
1418
 * [relative URI](#relative-and-absolute-uris), resolves it relative to @base_uri.
1419
 * If the result is not a valid absolute URI, it will be discarded, and an error
1420
 * returned.
1421
 *
1422
 * Return value: (transfer full): a new #GUri, or NULL on error.
1423
 *
1424
 * Since: 2.66
1425
 */
1426
GUri *
1427
g_uri_parse_relative (GUri         *base_uri,
1428
                      const gchar  *uri_ref,
1429
                      GUriFlags     flags,
1430
                      GError      **error)
1431
33.5k
{
1432
33.5k
  GUri *uri = NULL;
1433
1434
33.5k
  g_return_val_if_fail (uri_ref != NULL, NULL);
1435
33.5k
  g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1436
33.5k
  g_return_val_if_fail (base_uri == NULL || base_uri->scheme != NULL, NULL);
1437
1438
  /* Use GUri struct to construct the return value: there is no guarantee it is
1439
   * actually correct within the function body. */
1440
33.5k
  uri = g_atomic_rc_box_new0 (GUri);
1441
33.5k
  uri->flags = flags;
1442
1443
33.5k
  if (!g_uri_split_internal (uri_ref, flags,
1444
33.5k
                             &uri->scheme, &uri->userinfo,
1445
33.5k
                             &uri->user, &uri->password, &uri->auth_params,
1446
33.5k
                             &uri->host, &uri->port,
1447
33.5k
                             &uri->path, &uri->query, &uri->fragment,
1448
33.5k
                             error))
1449
14.2k
    {
1450
14.2k
      g_uri_unref (uri);
1451
14.2k
      return NULL;
1452
14.2k
    }
1453
1454
19.3k
  if (!uri->scheme && !base_uri)
1455
12.9k
    {
1456
12.9k
      g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_FAILED,
1457
12.9k
                           _("URI is not absolute, and no base URI was provided"));
1458
12.9k
      g_uri_unref (uri);
1459
12.9k
      return NULL;
1460
12.9k
    }
1461
1462
6.33k
  if (base_uri)
1463
0
    {
1464
      /* This is section 5.2.2 of RFC 3986, except that we're doing
1465
       * it in place in @uri rather than copying from R to T.
1466
       *
1467
       * See https://tools.ietf.org/html/rfc3986#section-5.2.2
1468
       */
1469
0
      if (uri->scheme)
1470
0
        remove_dot_segments (uri->path);
1471
0
      else
1472
0
        {
1473
0
          uri->scheme = g_strdup (base_uri->scheme);
1474
0
          if (uri->host)
1475
0
            remove_dot_segments (uri->path);
1476
0
          else
1477
0
            {
1478
0
              if (!*uri->path)
1479
0
                {
1480
0
                  g_free (uri->path);
1481
0
                  uri->path = g_strdup (base_uri->path);
1482
0
                  if (!uri->query)
1483
0
                    uri->query = g_strdup (base_uri->query);
1484
0
                }
1485
0
              else
1486
0
                {
1487
0
                  if (*uri->path == '/')
1488
0
                    remove_dot_segments (uri->path);
1489
0
                  else
1490
0
                    {
1491
0
                      gchar *newpath, *last;
1492
1493
0
                      last = strrchr (base_uri->path, '/');
1494
0
                      if (last)
1495
0
                        {
1496
0
                          newpath = g_strdup_printf ("%.*s/%s",
1497
0
                                                     (gint)(last - base_uri->path),
1498
0
                                                     base_uri->path,
1499
0
                                                     uri->path);
1500
0
                        }
1501
0
                      else
1502
0
                        newpath = g_strdup_printf ("/%s", uri->path);
1503
1504
0
                      g_free (uri->path);
1505
0
                      uri->path = g_steal_pointer (&newpath);
1506
1507
0
                      remove_dot_segments (uri->path);
1508
0
                    }
1509
0
                }
1510
1511
0
              uri->userinfo = g_strdup (base_uri->userinfo);
1512
0
              uri->user = g_strdup (base_uri->user);
1513
0
              uri->password = g_strdup (base_uri->password);
1514
0
              uri->auth_params = g_strdup (base_uri->auth_params);
1515
0
              uri->host = g_strdup (base_uri->host);
1516
0
              uri->port = base_uri->port;
1517
0
            }
1518
0
        }
1519
1520
      /* Scheme normalization couldn't have been done earlier
1521
       * as the relative URI may not have had a scheme */
1522
0
      if (flags & G_URI_FLAGS_SCHEME_NORMALIZE)
1523
0
        {
1524
0
          if (should_normalize_empty_path (uri->scheme) && !*uri->path)
1525
0
            {
1526
0
              g_free (uri->path);
1527
0
              uri->path = g_strdup ("/");
1528
0
            }
1529
1530
0
          uri->port = normalize_port (uri->scheme, uri->port);
1531
0
        }
1532
0
    }
1533
6.33k
  else
1534
6.33k
    {
1535
6.33k
      remove_dot_segments (uri->path);
1536
6.33k
    }
1537
1538
  /* Fix up the invalid cases from
1539
   * https://datatracker.ietf.org/doc/html/rfc3986#section-3, as otherwise
1540
   * calling g_uri_to_string() on this URI will fail. These can be caused by
1541
   * remove_dot_segments(), e.g. `data:/.//` gets normalised to `data://` whose
1542
   * path is invalid given the lack of an authority. */
1543
6.33k
  if (uri->host == NULL && uri->path[0] == '/' && uri->path[1] == '/')
1544
20
    {
1545
20
      char *new_path = g_strconcat ("/.", uri->path, NULL);
1546
20
      g_free (uri->path);
1547
20
      uri->path = g_steal_pointer (&new_path);
1548
20
    }
1549
1550
6.33k
  return g_steal_pointer (&uri);
1551
19.3k
}
1552
1553
/**
1554
 * g_uri_resolve_relative:
1555
 * @base_uri_string: (nullable): a string representing a base URI
1556
 * @uri_ref: a string representing a relative or absolute URI
1557
 * @flags: flags describing how to parse @uri_ref
1558
 * @error: #GError for error reporting, or %NULL to ignore.
1559
 *
1560
 * Parses @uri_ref according to @flags and, if it is a
1561
 * [relative URI](#relative-and-absolute-uris), resolves it relative to
1562
 * @base_uri_string. If the result is not a valid absolute URI, it will be
1563
 * discarded, and an error returned.
1564
 *
1565
 * (If @base_uri_string is %NULL, this just returns @uri_ref, or
1566
 * %NULL if @uri_ref is invalid or not absolute.)
1567
 *
1568
 * Return value: (transfer full): the resolved URI string,
1569
 * or NULL on error.
1570
 *
1571
 * Since: 2.66
1572
 */
1573
gchar *
1574
g_uri_resolve_relative (const gchar  *base_uri_string,
1575
                        const gchar  *uri_ref,
1576
                        GUriFlags     flags,
1577
                        GError      **error)
1578
0
{
1579
0
  GUri *base_uri, *resolved_uri;
1580
0
  gchar *resolved_uri_string;
1581
1582
0
  g_return_val_if_fail (uri_ref != NULL, NULL);
1583
0
  g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1584
1585
0
  flags |= G_URI_FLAGS_ENCODED;
1586
1587
0
  if (base_uri_string)
1588
0
    {
1589
0
      base_uri = g_uri_parse (base_uri_string, flags, error);
1590
0
      if (!base_uri)
1591
0
        return NULL;
1592
0
    }
1593
0
  else
1594
0
    base_uri = NULL;
1595
1596
0
  resolved_uri = g_uri_parse_relative (base_uri, uri_ref, flags, error);
1597
0
  if (base_uri)
1598
0
    g_uri_unref (base_uri);
1599
0
  if (!resolved_uri)
1600
0
    return NULL;
1601
1602
0
  resolved_uri_string = g_uri_to_string (resolved_uri);
1603
0
  g_uri_unref (resolved_uri);
1604
0
  return g_steal_pointer (&resolved_uri_string);
1605
0
}
1606
1607
/* userinfo as a whole can contain sub-delims + ":", but split-out
1608
 * user can't contain ":" or ";", and split-out password can't contain
1609
 * ";".
1610
 */
1611
993
#define USERINFO_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO
1612
288
#define USER_ALLOWED_CHARS "!$&'()*+,="
1613
93
#define PASSWORD_ALLOWED_CHARS "!$&'()*+,=:"
1614
92
#define AUTH_PARAMS_ALLOWED_CHARS USERINFO_ALLOWED_CHARS
1615
555
#define IP_ADDR_ALLOWED_CHARS ":"
1616
2.33k
#define HOST_ALLOWED_CHARS G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS
1617
5.42k
#define PATH_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH
1618
394
#define QUERY_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
1619
440
#define FRAGMENT_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
1620
1621
static gchar *
1622
g_uri_join_internal (GUriFlags    flags,
1623
                     const gchar *scheme,
1624
                     gboolean     userinfo,
1625
                     const gchar *user,
1626
                     const gchar *password,
1627
                     const gchar *auth_params,
1628
                     const gchar *host,
1629
                     gint         port,
1630
                     const gchar *path,
1631
                     const gchar *query,
1632
                     const gchar *fragment)
1633
6.33k
{
1634
6.33k
  gboolean encoded = (flags & G_URI_FLAGS_ENCODED);
1635
6.33k
  GString *str;
1636
6.33k
  char *normalized_scheme = NULL;
1637
1638
  /* Restrictions on path prefixes. See:
1639
   * https://tools.ietf.org/html/rfc3986#section-3
1640
   */
1641
6.33k
  g_return_val_if_fail (path != NULL, NULL);
1642
6.33k
  g_return_val_if_fail (host == NULL || (path[0] == '\0' || path[0] == '/'), NULL);
1643
6.33k
  g_return_val_if_fail (host != NULL || (path[0] != '/' || path[1] != '/'), NULL);
1644
1645
  /* Arbitrarily chosen default size which should handle most average length
1646
   * URIs. This should avoid a few reallocations of the buffer in most cases.
1647
   * It’s 1B shorter than a power of two, since GString will add a
1648
   * nul-terminator byte. */
1649
6.31k
  str = g_string_sized_new (127);
1650
1651
6.31k
  if (scheme)
1652
6.31k
    {
1653
6.31k
      g_string_append (str, scheme);
1654
6.31k
      g_string_append_c (str, ':');
1655
6.31k
    }
1656
1657
6.31k
  if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && scheme && ((host && port != -1) || path[0] == '\0'))
1658
442
    normalized_scheme = g_ascii_strdown (scheme, -1);
1659
1660
6.31k
  if (host)
1661
2.88k
    {
1662
2.88k
      g_string_append (str, "//");
1663
1664
2.88k
      if (user)
1665
1.18k
        {
1666
1.18k
          if (encoded)
1667
0
            g_string_append (str, user);
1668
1.18k
          else
1669
1.18k
            {
1670
1.18k
              if (userinfo)
1671
901
                g_string_append_uri_escaped (str, user, USERINFO_ALLOWED_CHARS, TRUE);
1672
288
              else
1673
                /* Encode ':' and ';' regardless of whether we have a
1674
                 * password or auth params, since it may be parsed later
1675
                 * under the assumption that it does.
1676
                 */
1677
288
                g_string_append_uri_escaped (str, user, USER_ALLOWED_CHARS, TRUE);
1678
1.18k
            }
1679
1680
1.18k
          if (password)
1681
93
            {
1682
93
              g_string_append_c (str, ':');
1683
93
              if (encoded)
1684
0
                g_string_append (str, password);
1685
93
              else
1686
93
                g_string_append_uri_escaped (str, password,
1687
93
                                             PASSWORD_ALLOWED_CHARS, TRUE);
1688
93
            }
1689
1690
1.18k
          if (auth_params)
1691
92
            {
1692
92
              g_string_append_c (str, ';');
1693
92
              if (encoded)
1694
0
                g_string_append (str, auth_params);
1695
92
              else
1696
92
                g_string_append_uri_escaped (str, auth_params,
1697
92
                                             AUTH_PARAMS_ALLOWED_CHARS, TRUE);
1698
92
            }
1699
1700
1.18k
          g_string_append_c (str, '@');
1701
1.18k
        }
1702
1703
2.88k
      if (strchr (host, ':') && g_hostname_is_ip_address (host))
1704
555
        {
1705
555
          g_string_append_c (str, '[');
1706
555
          if (encoded)
1707
0
            g_string_append (str, host);
1708
555
          else
1709
555
            g_string_append_uri_escaped (str, host, IP_ADDR_ALLOWED_CHARS, TRUE);
1710
555
          g_string_append_c (str, ']');
1711
555
        }
1712
2.33k
      else
1713
2.33k
        {
1714
2.33k
          if (encoded)
1715
0
            g_string_append (str, host);
1716
2.33k
          else
1717
2.33k
            g_string_append_uri_escaped (str, host, HOST_ALLOWED_CHARS, TRUE);
1718
2.33k
        }
1719
1720
2.88k
      if (port != -1 && (!normalized_scheme || normalize_port (normalized_scheme, port) != -1))
1721
727
        g_string_append_printf (str, ":%d", port);
1722
2.88k
    }
1723
1724
6.31k
  if (path[0] == '\0' && normalized_scheme && should_normalize_empty_path (normalized_scheme))
1725
3
    g_string_append (str, "/");
1726
6.30k
  else if (encoded || flags & G_URI_FLAGS_ENCODED_PATH)
1727
889
    g_string_append (str, path);
1728
5.42k
  else
1729
5.42k
    g_string_append_uri_escaped (str, path, PATH_ALLOWED_CHARS, TRUE);
1730
1731
6.31k
  g_free (normalized_scheme);
1732
1733
6.31k
  if (query)
1734
486
    {
1735
486
      g_string_append_c (str, '?');
1736
486
      if (encoded || flags & G_URI_FLAGS_ENCODED_QUERY)
1737
92
        g_string_append (str, query);
1738
394
      else
1739
394
        g_string_append_uri_escaped (str, query, QUERY_ALLOWED_CHARS, TRUE);
1740
486
    }
1741
6.31k
  if (fragment)
1742
440
    {
1743
440
      g_string_append_c (str, '#');
1744
440
      if (encoded || flags & G_URI_FLAGS_ENCODED_FRAGMENT)
1745
0
        g_string_append (str, fragment);
1746
440
      else
1747
440
        g_string_append_uri_escaped (str, fragment, FRAGMENT_ALLOWED_CHARS, TRUE);
1748
440
    }
1749
1750
6.31k
  return g_string_free (str, FALSE);
1751
6.31k
}
1752
1753
/**
1754
 * g_uri_join:
1755
 * @flags: flags describing how to build the URI string
1756
 * @scheme: (nullable): the URI scheme, or %NULL
1757
 * @userinfo: (nullable): the userinfo component, or %NULL
1758
 * @host: (nullable): the host component, or %NULL
1759
 * @port: the port, or `-1`
1760
 * @path: (not nullable): the path component
1761
 * @query: (nullable): the query component, or %NULL
1762
 * @fragment: (nullable): the fragment, or %NULL
1763
 *
1764
 * Joins the given components together according to @flags to create
1765
 * an absolute URI string. @path may not be %NULL (though it may be the empty
1766
 * string).
1767
 *
1768
 * When @host is present, @path must either be empty or begin with a slash (`/`)
1769
 * character. When @host is not present, @path cannot begin with two slash
1770
 * characters (`//`). See
1771
 * [RFC 3986, section 3](https://tools.ietf.org/html/rfc3986#section-3).
1772
 *
1773
 * See also g_uri_join_with_user(), which allows specifying the
1774
 * components of the ‘userinfo’ separately.
1775
 *
1776
 * %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS are ignored if set
1777
 * in @flags.
1778
 *
1779
 * Return value: (not nullable) (transfer full): an absolute URI string
1780
 *
1781
 * Since: 2.66
1782
 */
1783
gchar *
1784
g_uri_join (GUriFlags    flags,
1785
            const gchar *scheme,
1786
            const gchar *userinfo,
1787
            const gchar *host,
1788
            gint         port,
1789
            const gchar *path,
1790
            const gchar *query,
1791
            const gchar *fragment)
1792
4.83k
{
1793
4.83k
  g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1794
4.83k
  g_return_val_if_fail (path != NULL, NULL);
1795
1796
4.83k
  return g_uri_join_internal (flags,
1797
4.83k
                              scheme,
1798
4.83k
                              TRUE, userinfo, NULL, NULL,
1799
4.83k
                              host,
1800
4.83k
                              port,
1801
4.83k
                              path,
1802
4.83k
                              query,
1803
4.83k
                              fragment);
1804
4.83k
}
1805
1806
/**
1807
 * g_uri_join_with_user:
1808
 * @flags: flags describing how to build the URI string
1809
 * @scheme: (nullable): the URI scheme, or %NULL
1810
 * @user: (nullable): the user component of the userinfo, or %NULL
1811
 * @password: (nullable): the password component of the userinfo, or
1812
 *   %NULL
1813
 * @auth_params: (nullable): the auth params of the userinfo, or
1814
 *   %NULL
1815
 * @host: (nullable): the host component, or %NULL
1816
 * @port: the port, or `-1`
1817
 * @path: (not nullable): the path component
1818
 * @query: (nullable): the query component, or %NULL
1819
 * @fragment: (nullable): the fragment, or %NULL
1820
 *
1821
 * Joins the given components together according to @flags to create
1822
 * an absolute URI string. @path may not be %NULL (though it may be the empty
1823
 * string).
1824
 *
1825
 * In contrast to g_uri_join(), this allows specifying the components
1826
 * of the ‘userinfo’ separately. It otherwise behaves the same.
1827
 *
1828
 * %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS are ignored if set
1829
 * in @flags.
1830
 *
1831
 * Return value: (not nullable) (transfer full): an absolute URI string
1832
 *
1833
 * Since: 2.66
1834
 */
1835
gchar *
1836
g_uri_join_with_user (GUriFlags    flags,
1837
                      const gchar *scheme,
1838
                      const gchar *user,
1839
                      const gchar *password,
1840
                      const gchar *auth_params,
1841
                      const gchar *host,
1842
                      gint         port,
1843
                      const gchar *path,
1844
                      const gchar *query,
1845
                      const gchar *fragment)
1846
1.50k
{
1847
1.50k
  g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1848
1.50k
  g_return_val_if_fail (path != NULL, NULL);
1849
1850
1.50k
  return g_uri_join_internal (flags,
1851
1.50k
                              scheme,
1852
1.50k
                              FALSE, user, password, auth_params,
1853
1.50k
                              host,
1854
1.50k
                              port,
1855
1.50k
                              path,
1856
1.50k
                              query,
1857
1.50k
                              fragment);
1858
1.50k
}
1859
1860
/**
1861
 * g_uri_build:
1862
 * @flags: flags describing how to build the #GUri
1863
 * @scheme: (not nullable): the URI scheme
1864
 * @userinfo: (nullable): the userinfo component, or %NULL
1865
 * @host: (nullable): the host component, or %NULL
1866
 * @port: the port, or `-1`
1867
 * @path: (not nullable): the path component
1868
 * @query: (nullable): the query component, or %NULL
1869
 * @fragment: (nullable): the fragment, or %NULL
1870
 *
1871
 * Creates a new #GUri from the given components according to @flags.
1872
 *
1873
 * See also g_uri_build_with_user(), which allows specifying the
1874
 * components of the "userinfo" separately.
1875
 *
1876
 * Return value: (not nullable) (transfer full): a new #GUri
1877
 *
1878
 * Since: 2.66
1879
 */
1880
GUri *
1881
g_uri_build (GUriFlags    flags,
1882
             const gchar *scheme,
1883
             const gchar *userinfo,
1884
             const gchar *host,
1885
             gint         port,
1886
             const gchar *path,
1887
             const gchar *query,
1888
             const gchar *fragment)
1889
0
{
1890
0
  GUri *uri;
1891
1892
0
  g_return_val_if_fail (scheme != NULL, NULL);
1893
0
  g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1894
0
  g_return_val_if_fail (path != NULL, NULL);
1895
1896
0
  uri = g_atomic_rc_box_new0 (GUri);
1897
0
  uri->flags = flags;
1898
0
  uri->scheme = g_ascii_strdown (scheme, -1);
1899
0
  uri->userinfo = g_strdup (userinfo);
1900
0
  uri->host = g_strdup (host);
1901
0
  uri->port = port;
1902
0
  uri->path = g_strdup (path);
1903
0
  uri->query = g_strdup (query);
1904
0
  uri->fragment = g_strdup (fragment);
1905
1906
0
  return g_steal_pointer (&uri);
1907
0
}
1908
1909
/**
1910
 * g_uri_build_with_user:
1911
 * @flags: flags describing how to build the #GUri
1912
 * @scheme: (not nullable): the URI scheme
1913
 * @user: (nullable): the user component of the userinfo, or %NULL
1914
 * @password: (nullable): the password component of the userinfo, or %NULL
1915
 * @auth_params: (nullable): the auth params of the userinfo, or %NULL
1916
 * @host: (nullable): the host component, or %NULL
1917
 * @port: the port, or `-1`
1918
 * @path: (not nullable): the path component
1919
 * @query: (nullable): the query component, or %NULL
1920
 * @fragment: (nullable): the fragment, or %NULL
1921
 *
1922
 * Creates a new #GUri from the given components according to @flags
1923
 * (%G_URI_FLAGS_HAS_PASSWORD is added unconditionally). The @flags must be
1924
 * coherent with the passed values, in particular use `%`-encoded values with
1925
 * %G_URI_FLAGS_ENCODED.
1926
 *
1927
 * In contrast to g_uri_build(), this allows specifying the components
1928
 * of the ‘userinfo’ field separately. Note that @user must be non-%NULL
1929
 * if either @password or @auth_params is non-%NULL.
1930
 *
1931
 * Return value: (not nullable) (transfer full): a new #GUri
1932
 *
1933
 * Since: 2.66
1934
 */
1935
GUri *
1936
g_uri_build_with_user (GUriFlags    flags,
1937
                       const gchar *scheme,
1938
                       const gchar *user,
1939
                       const gchar *password,
1940
                       const gchar *auth_params,
1941
                       const gchar *host,
1942
                       gint         port,
1943
                       const gchar *path,
1944
                       const gchar *query,
1945
                       const gchar *fragment)
1946
0
{
1947
0
  GUri *uri;
1948
0
  GString *userinfo;
1949
1950
0
  g_return_val_if_fail (scheme != NULL, NULL);
1951
0
  g_return_val_if_fail (password == NULL || user != NULL, NULL);
1952
0
  g_return_val_if_fail (auth_params == NULL || user != NULL, NULL);
1953
0
  g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1954
0
  g_return_val_if_fail (path != NULL, NULL);
1955
1956
0
  uri = g_atomic_rc_box_new0 (GUri);
1957
0
  uri->flags = flags | G_URI_FLAGS_HAS_PASSWORD;
1958
0
  uri->scheme = g_ascii_strdown (scheme, -1);
1959
0
  uri->user = g_strdup (user);
1960
0
  uri->password = g_strdup (password);
1961
0
  uri->auth_params = g_strdup (auth_params);
1962
0
  uri->host = g_strdup (host);
1963
0
  uri->port = port;
1964
0
  uri->path = g_strdup (path);
1965
0
  uri->query = g_strdup (query);
1966
0
  uri->fragment = g_strdup (fragment);
1967
1968
0
  if (user)
1969
0
    {
1970
0
      userinfo = g_string_new (user);
1971
0
      if (password)
1972
0
        {
1973
0
          g_string_append_c (userinfo, ':');
1974
0
          g_string_append (userinfo, uri->password);
1975
0
        }
1976
0
      if (auth_params)
1977
0
        {
1978
0
          g_string_append_c (userinfo, ';');
1979
0
          g_string_append (userinfo, uri->auth_params);
1980
0
        }
1981
0
      uri->userinfo = g_string_free (userinfo, FALSE);
1982
0
    }
1983
1984
0
  return g_steal_pointer (&uri);
1985
0
}
1986
1987
/**
1988
 * g_uri_to_string:
1989
 * @uri: a #GUri
1990
 *
1991
 * Returns a string representing @uri.
1992
 *
1993
 * This is not guaranteed to return a string which is identical to the
1994
 * string that @uri was parsed from. However, if the source URI was
1995
 * syntactically correct (according to RFC 3986), and it was parsed
1996
 * with %G_URI_FLAGS_ENCODED, then g_uri_to_string() is guaranteed to return
1997
 * a string which is at least semantically equivalent to the source
1998
 * URI (according to RFC 3986).
1999
 *
2000
 * If @uri might contain sensitive details, such as authentication parameters,
2001
 * or private data in its query string, and the returned string is going to be
2002
 * logged, then consider using g_uri_to_string_partial() to redact parts.
2003
 *
2004
 * Return value: (not nullable) (transfer full): a string representing @uri,
2005
 *     which the caller must free.
2006
 *
2007
 * Since: 2.66
2008
 */
2009
gchar *
2010
g_uri_to_string (GUri *uri)
2011
6.33k
{
2012
6.33k
  g_return_val_if_fail (uri != NULL, NULL);
2013
2014
6.33k
  return g_uri_to_string_partial (uri, G_URI_HIDE_NONE);
2015
6.33k
}
2016
2017
/**
2018
 * g_uri_to_string_partial:
2019
 * @uri: a #GUri
2020
 * @flags: flags describing what parts of @uri to hide
2021
 *
2022
 * Returns a string representing @uri, subject to the options in
2023
 * @flags. See g_uri_to_string() and #GUriHideFlags for more details.
2024
 *
2025
 * Return value: (not nullable) (transfer full): a string representing
2026
 *     @uri, which the caller must free.
2027
 *
2028
 * Since: 2.66
2029
 */
2030
gchar *
2031
g_uri_to_string_partial (GUri          *uri,
2032
                         GUriHideFlags  flags)
2033
6.33k
{
2034
6.33k
  gboolean hide_user = (flags & G_URI_HIDE_USERINFO);
2035
6.33k
  gboolean hide_password = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_PASSWORD));
2036
6.33k
  gboolean hide_auth_params = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_AUTH_PARAMS));
2037
6.33k
  gboolean hide_query = (flags & G_URI_HIDE_QUERY);
2038
6.33k
  gboolean hide_fragment = (flags & G_URI_HIDE_FRAGMENT);
2039
2040
6.33k
  g_return_val_if_fail (uri != NULL, NULL);
2041
2042
6.33k
  if (uri->flags & (G_URI_FLAGS_HAS_PASSWORD | G_URI_FLAGS_HAS_AUTH_PARAMS))
2043
1.50k
    {
2044
1.50k
      return g_uri_join_with_user (uri->flags,
2045
1.50k
                                   uri->scheme,
2046
1.50k
                                   hide_user ? NULL : uri->user,
2047
1.50k
                                   hide_password ? NULL : uri->password,
2048
1.50k
                                   hide_auth_params ? NULL : uri->auth_params,
2049
1.50k
                                   uri->host,
2050
1.50k
                                   uri->port,
2051
1.50k
                                   uri->path,
2052
1.50k
                                   hide_query ? NULL : uri->query,
2053
1.50k
                                   hide_fragment ? NULL : uri->fragment);
2054
1.50k
    }
2055
2056
4.83k
  return g_uri_join (uri->flags,
2057
4.83k
                     uri->scheme,
2058
4.83k
                     hide_user ? NULL : uri->userinfo,
2059
4.83k
                     uri->host,
2060
4.83k
                     uri->port,
2061
4.83k
                     uri->path,
2062
4.83k
                     hide_query ? NULL : uri->query,
2063
4.83k
                     hide_fragment ? NULL : uri->fragment);
2064
6.33k
}
2065
2066
/* This is just a copy of g_str_hash() with g_ascii_toupper() added */
2067
static guint
2068
str_ascii_case_hash (gconstpointer v)
2069
0
{
2070
0
  const signed char *p;
2071
0
  guint32 h = 5381;
2072
2073
0
  for (p = v; *p != '\0'; p++)
2074
0
    h = (h << 5) + h + g_ascii_toupper (*p);
2075
2076
0
  return h;
2077
0
}
2078
2079
static gboolean
2080
str_ascii_case_equal (gconstpointer v1,
2081
                      gconstpointer v2)
2082
0
{
2083
0
  const gchar *string1 = v1;
2084
0
  const gchar *string2 = v2;
2085
2086
0
  return g_ascii_strcasecmp (string1, string2) == 0;
2087
0
}
2088
2089
/**
2090
 * GUriParamsIter:
2091
 *
2092
 * Many URI schemes include one or more attribute/value pairs as part of the URI
2093
 * value. For example `scheme://server/path?query=string&is=there` has two
2094
 * attributes – `query=string` and `is=there` – in its query part.
2095
 *
2096
 * A #GUriParamsIter structure represents an iterator that can be used to
2097
 * iterate over the attribute/value pairs of a URI query string. #GUriParamsIter
2098
 * structures are typically allocated on the stack and then initialized with
2099
 * g_uri_params_iter_init(). See the documentation for g_uri_params_iter_init()
2100
 * for a usage example.
2101
 *
2102
 * Since: 2.66
2103
 */
2104
typedef struct
2105
{
2106
  GUriParamsFlags flags;
2107
  const gchar    *attr;
2108
  const gchar    *end;
2109
  guint8          sep_table[256]; /* 1 = index is a separator; 0 otherwise */
2110
} RealIter;
2111
2112
G_STATIC_ASSERT (sizeof (GUriParamsIter) == sizeof (RealIter));
2113
G_STATIC_ASSERT (G_ALIGNOF (GUriParamsIter) >= G_ALIGNOF (RealIter));
2114
2115
/**
2116
 * g_uri_params_iter_init:
2117
 * @iter: an uninitialized #GUriParamsIter
2118
 * @params: a `%`-encoded string containing `attribute=value`
2119
 *   parameters
2120
 * @length: the length of @params, or `-1` if it is nul-terminated
2121
 * @separators: the separator byte character set between parameters. (usually
2122
 *   `&`, but sometimes `;` or both `&;`). Note that this function works on
2123
 *   bytes not characters, so it can't be used to delimit UTF-8 strings for
2124
 *   anything but ASCII characters. You may pass an empty set, in which case
2125
 *   no splitting will occur.
2126
 * @flags: flags to modify the way the parameters are handled.
2127
 *
2128
 * Initializes an attribute/value pair iterator.
2129
 *
2130
 * The iterator keeps pointers to the @params and @separators arguments, those
2131
 * variables must thus outlive the iterator and not be modified during the
2132
 * iteration.
2133
 *
2134
 * If %G_URI_PARAMS_WWW_FORM is passed in @flags, `+` characters in the param
2135
 * string will be replaced with spaces in the output. For example, `foo=bar+baz`
2136
 * will give attribute `foo` with value `bar baz`. This is commonly used on the
2137
 * web (the `https` and `http` schemes only), but is deprecated in favour of
2138
 * the equivalent of encoding spaces as `%20`.
2139
 *
2140
 * Unlike with g_uri_parse_params(), %G_URI_PARAMS_CASE_INSENSITIVE has no
2141
 * effect if passed to @flags for g_uri_params_iter_init(). The caller is
2142
 * responsible for doing their own case-insensitive comparisons.
2143
 *
2144
 * |[<!-- language="C" -->
2145
 * GUriParamsIter iter;
2146
 * GError *error = NULL;
2147
 * gchar *unowned_attr, *unowned_value;
2148
 *
2149
 * g_uri_params_iter_init (&iter, "foo=bar&baz=bar&Foo=frob&baz=bar2", -1, "&", G_URI_PARAMS_NONE);
2150
 * while (g_uri_params_iter_next (&iter, &unowned_attr, &unowned_value, &error))
2151
 *   {
2152
 *     g_autofree gchar *attr = g_steal_pointer (&unowned_attr);
2153
 *     g_autofree gchar *value = g_steal_pointer (&unowned_value);
2154
 *     // do something with attr and value; this code will be called 4 times
2155
 *     // for the params string in this example: once with attr=foo and value=bar,
2156
 *     // then with baz/bar, then Foo/frob, then baz/bar2.
2157
 *   }
2158
 * if (error)
2159
 *   // handle parsing error
2160
 * ]|
2161
 *
2162
 * Since: 2.66
2163
 */
2164
void
2165
g_uri_params_iter_init (GUriParamsIter *iter,
2166
                        const gchar    *params,
2167
                        gssize          length,
2168
                        const gchar    *separators,
2169
                        GUriParamsFlags flags)
2170
1.25k
{
2171
1.25k
  RealIter *ri = (RealIter *)iter;
2172
1.25k
  const gchar *s;
2173
2174
1.25k
  g_return_if_fail (iter != NULL);
2175
1.25k
  g_return_if_fail (length == 0 || params != NULL);
2176
1.25k
  g_return_if_fail (length >= -1);
2177
1.25k
  g_return_if_fail (separators != NULL);
2178
2179
1.25k
  ri->flags = flags;
2180
2181
1.25k
  if (length == -1)
2182
0
    ri->end = params + strlen (params);
2183
1.25k
  else
2184
1.25k
    ri->end = params + length;
2185
2186
1.25k
  memset (ri->sep_table, FALSE, sizeof (ri->sep_table));
2187
2.51k
  for (s = separators; *s != '\0'; ++s)
2188
1.25k
    ri->sep_table[*(guchar *)s] = TRUE;
2189
2190
1.25k
  ri->attr = params;
2191
1.25k
}
2192
2193
/**
2194
 * g_uri_params_iter_next:
2195
 * @iter: an initialized #GUriParamsIter
2196
 * @attribute: (out) (nullable) (optional) (transfer full): on return, contains
2197
 *     the attribute, or %NULL.
2198
 * @value: (out) (nullable) (optional) (transfer full): on return, contains
2199
 *     the value, or %NULL.
2200
 * @error: #GError for error reporting, or %NULL to ignore.
2201
 *
2202
 * Advances @iter and retrieves the next attribute/value. %FALSE is returned if
2203
 * an error has occurred (in which case @error is set), or if the end of the
2204
 * iteration is reached (in which case @attribute and @value are set to %NULL
2205
 * and the iterator becomes invalid). If %TRUE is returned,
2206
 * g_uri_params_iter_next() may be called again to receive another
2207
 * attribute/value pair.
2208
 *
2209
 * Note that the same @attribute may be returned multiple times, since URIs
2210
 * allow repeated attributes.
2211
 *
2212
 * Returns: %FALSE if the end of the parameters has been reached or an error was
2213
 *     encountered. %TRUE otherwise.
2214
 *
2215
 * Since: 2.66
2216
 */
2217
gboolean
2218
g_uri_params_iter_next (GUriParamsIter *iter,
2219
                        gchar         **attribute,
2220
                        gchar         **value,
2221
                        GError        **error)
2222
14.6k
{
2223
14.6k
  RealIter *ri = (RealIter *)iter;
2224
14.6k
  const gchar *attr_end, *val, *val_end;
2225
14.6k
  gchar *decoded_attr, *decoded_value;
2226
14.6k
  gboolean www_form = ri->flags & G_URI_PARAMS_WWW_FORM;
2227
14.6k
  GUriFlags decode_flags = G_URI_FLAGS_NONE;
2228
2229
14.6k
  g_return_val_if_fail (iter != NULL, FALSE);
2230
14.6k
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
2231
2232
  /* Pre-clear these in case of failure or finishing. */
2233
14.6k
  if (attribute)
2234
14.6k
    *attribute = NULL;
2235
14.6k
  if (value)
2236
14.6k
    *value = NULL;
2237
2238
14.6k
  if (ri->attr >= ri->end)
2239
766
    return FALSE;
2240
2241
13.8k
  if (ri->flags & G_URI_PARAMS_PARSE_RELAXED)
2242
0
    decode_flags |= G_URI_FLAGS_PARSE_RELAXED;
2243
2244
  /* Check if each character in @attr is a separator, by indexing by the
2245
   * character value into the @sep_table, which has value 1 stored at an
2246
   * index if that index is a separator. */
2247
34.3M
  for (val_end = ri->attr; val_end < ri->end; val_end++)
2248
34.3M
    if (ri->sep_table[*(guchar *)val_end])
2249
12.6k
      break;
2250
2251
13.8k
  attr_end = memchr (ri->attr, '=', val_end - ri->attr);
2252
13.8k
  if (!attr_end)
2253
30
    {
2254
30
      g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_FAILED,
2255
30
                           _("Missing ‘=’ and parameter value"));
2256
30
      return FALSE;
2257
30
    }
2258
13.8k
  if (!uri_decode (&decoded_attr, NULL, ri->attr, attr_end - ri->attr,
2259
13.8k
                   www_form, decode_flags, G_URI_ERROR_FAILED, error))
2260
182
    {
2261
182
      return FALSE;
2262
182
    }
2263
2264
13.6k
  val = attr_end + 1;
2265
13.6k
  if (!uri_decode (&decoded_value, NULL, val, val_end - val,
2266
13.6k
                   www_form, decode_flags, G_URI_ERROR_FAILED, error))
2267
279
    {
2268
279
      g_free (decoded_attr);
2269
279
      return FALSE;
2270
279
    }
2271
2272
13.3k
  if (attribute)
2273
13.3k
    *attribute = g_steal_pointer (&decoded_attr);
2274
13.3k
  if (value)
2275
13.3k
    *value = g_steal_pointer (&decoded_value);
2276
2277
13.3k
  g_free (decoded_attr);
2278
13.3k
  g_free (decoded_value);
2279
2280
13.3k
  ri->attr = val_end + 1;
2281
13.3k
  return TRUE;
2282
13.6k
}
2283
2284
/**
2285
 * g_uri_parse_params:
2286
 * @params: a `%`-encoded string containing `attribute=value`
2287
 *   parameters
2288
 * @length: the length of @params, or `-1` if it is nul-terminated
2289
 * @separators: the separator byte character set between parameters. (usually
2290
 *   `&`, but sometimes `;` or both `&;`). Note that this function works on
2291
 *   bytes not characters, so it can't be used to delimit UTF-8 strings for
2292
 *   anything but ASCII characters. You may pass an empty set, in which case
2293
 *   no splitting will occur.
2294
 * @flags: flags to modify the way the parameters are handled.
2295
 * @error: #GError for error reporting, or %NULL to ignore.
2296
 *
2297
 * Many URI schemes include one or more attribute/value pairs as part of the URI
2298
 * value. This method can be used to parse them into a hash table. When an
2299
 * attribute has multiple occurrences, the last value is the final returned
2300
 * value. If you need to handle repeated attributes differently, use
2301
 * #GUriParamsIter.
2302
 *
2303
 * The @params string is assumed to still be `%`-encoded, but the returned
2304
 * values will be fully decoded. (Thus it is possible that the returned values
2305
 * may contain `=` or @separators, if the value was encoded in the input.)
2306
 * Invalid `%`-encoding is treated as with the %G_URI_FLAGS_PARSE_RELAXED
2307
 * rules for g_uri_parse(). (However, if @params is the path or query string
2308
 * from a #GUri that was parsed without %G_URI_FLAGS_PARSE_RELAXED and
2309
 * %G_URI_FLAGS_ENCODED, then you already know that it does not contain any
2310
 * invalid encoding.)
2311
 *
2312
 * %G_URI_PARAMS_WWW_FORM is handled as documented for g_uri_params_iter_init().
2313
 *
2314
 * If %G_URI_PARAMS_CASE_INSENSITIVE is passed to @flags, attributes will be
2315
 * compared case-insensitively, so a params string `attr=123&Attr=456` will only
2316
 * return a single attribute–value pair, `Attr=456`. Case will be preserved in
2317
 * the returned attributes.
2318
 *
2319
 * If @params cannot be parsed (for example, it contains two @separators
2320
 * characters in a row), then @error is set and %NULL is returned.
2321
 *
2322
 * Return value: (transfer full) (element-type utf8 utf8):
2323
 *     A hash table of attribute/value pairs, with both names and values
2324
 *     fully-decoded; or %NULL on error.
2325
 *
2326
 * Since: 2.66
2327
 */
2328
GHashTable *
2329
g_uri_parse_params (const gchar     *params,
2330
                    gssize           length,
2331
                    const gchar     *separators,
2332
                    GUriParamsFlags  flags,
2333
                    GError         **error)
2334
1.25k
{
2335
1.25k
  GHashTable *hash;
2336
1.25k
  GUriParamsIter iter;
2337
1.25k
  gchar *attribute, *value;
2338
1.25k
  GError *err = NULL;
2339
2340
1.25k
  g_return_val_if_fail (length == 0 || params != NULL, NULL);
2341
1.25k
  g_return_val_if_fail (length >= -1, NULL);
2342
1.25k
  g_return_val_if_fail (separators != NULL, NULL);
2343
1.25k
  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
2344
2345
1.25k
  if (flags & G_URI_PARAMS_CASE_INSENSITIVE)
2346
0
    {
2347
0
      hash = g_hash_table_new_full (str_ascii_case_hash,
2348
0
                                    str_ascii_case_equal,
2349
0
                                    g_free, g_free);
2350
0
    }
2351
1.25k
  else
2352
1.25k
    {
2353
1.25k
      hash = g_hash_table_new_full (g_str_hash, g_str_equal,
2354
1.25k
                                    g_free, g_free);
2355
1.25k
    }
2356
2357
1.25k
  g_uri_params_iter_init (&iter, params, length, separators, flags);
2358
2359
14.6k
  while (g_uri_params_iter_next (&iter, &attribute, &value, &err))
2360
13.3k
    g_hash_table_insert (hash, attribute, value);
2361
2362
1.25k
  if (err)
2363
491
    {
2364
491
      g_propagate_error (error, g_steal_pointer (&err));
2365
491
      g_hash_table_destroy (hash);
2366
491
      return NULL;
2367
491
    }
2368
2369
766
  return g_steal_pointer (&hash);
2370
1.25k
}
2371
2372
/**
2373
 * g_uri_get_scheme:
2374
 * @uri: a #GUri
2375
 *
2376
 * Gets @uri's scheme. Note that this will always be all-lowercase,
2377
 * regardless of the string or strings that @uri was created from.
2378
 *
2379
 * Return value: (not nullable): @uri's scheme.
2380
 *
2381
 * Since: 2.66
2382
 */
2383
const gchar *
2384
g_uri_get_scheme (GUri *uri)
2385
0
{
2386
0
  g_return_val_if_fail (uri != NULL, NULL);
2387
2388
0
  return uri->scheme;
2389
0
}
2390
2391
/**
2392
 * g_uri_get_userinfo:
2393
 * @uri: a #GUri
2394
 *
2395
 * Gets @uri's userinfo, which may contain `%`-encoding, depending on
2396
 * the flags with which @uri was created.
2397
 *
2398
 * Return value: (nullable): @uri's userinfo.
2399
 *
2400
 * Since: 2.66
2401
 */
2402
const gchar *
2403
g_uri_get_userinfo (GUri *uri)
2404
0
{
2405
0
  g_return_val_if_fail (uri != NULL, NULL);
2406
2407
0
  return uri->userinfo;
2408
0
}
2409
2410
/**
2411
 * g_uri_get_user:
2412
 * @uri: a #GUri
2413
 *
2414
 * Gets the ‘username’ component of @uri's userinfo, which may contain
2415
 * `%`-encoding, depending on the flags with which @uri was created.
2416
 * If @uri was not created with %G_URI_FLAGS_HAS_PASSWORD or
2417
 * %G_URI_FLAGS_HAS_AUTH_PARAMS, this is the same as g_uri_get_userinfo().
2418
 *
2419
 * Return value: (nullable): @uri's user.
2420
 *
2421
 * Since: 2.66
2422
 */
2423
const gchar *
2424
g_uri_get_user (GUri *uri)
2425
0
{
2426
0
  g_return_val_if_fail (uri != NULL, NULL);
2427
2428
0
  return uri->user;
2429
0
}
2430
2431
/**
2432
 * g_uri_get_password:
2433
 * @uri: a #GUri
2434
 *
2435
 * Gets @uri's password, which may contain `%`-encoding, depending on
2436
 * the flags with which @uri was created. (If @uri was not created
2437
 * with %G_URI_FLAGS_HAS_PASSWORD then this will be %NULL.)
2438
 *
2439
 * Return value: (nullable): @uri's password.
2440
 *
2441
 * Since: 2.66
2442
 */
2443
const gchar *
2444
g_uri_get_password (GUri *uri)
2445
0
{
2446
0
  g_return_val_if_fail (uri != NULL, NULL);
2447
2448
0
  return uri->password;
2449
0
}
2450
2451
/**
2452
 * g_uri_get_auth_params:
2453
 * @uri: a #GUri
2454
 *
2455
 * Gets @uri's authentication parameters, which may contain
2456
 * `%`-encoding, depending on the flags with which @uri was created.
2457
 * (If @uri was not created with %G_URI_FLAGS_HAS_AUTH_PARAMS then this will
2458
 * be %NULL.)
2459
 *
2460
 * Depending on the URI scheme, g_uri_parse_params() may be useful for
2461
 * further parsing this information.
2462
 *
2463
 * Return value: (nullable): @uri's authentication parameters.
2464
 *
2465
 * Since: 2.66
2466
 */
2467
const gchar *
2468
g_uri_get_auth_params (GUri *uri)
2469
0
{
2470
0
  g_return_val_if_fail (uri != NULL, NULL);
2471
2472
0
  return uri->auth_params;
2473
0
}
2474
2475
/**
2476
 * g_uri_get_host:
2477
 * @uri: a #GUri
2478
 *
2479
 * Gets @uri's host. This will never have `%`-encoded characters,
2480
 * unless it is non-UTF-8 (which can only be the case if @uri was
2481
 * created with %G_URI_FLAGS_NON_DNS).
2482
 *
2483
 * If @uri contained an IPv6 address literal, this value will be just
2484
 * that address, without the brackets around it that are necessary in
2485
 * the string form of the URI. Note that in this case there may also
2486
 * be a scope ID attached to the address. Eg, `fe80::1234%``em1` (or
2487
 * `fe80::1234%``25em1` if the string is still encoded).
2488
 *
2489
 * Return value: (nullable): @uri's host.
2490
 *
2491
 * Since: 2.66
2492
 */
2493
const gchar *
2494
g_uri_get_host (GUri *uri)
2495
0
{
2496
0
  g_return_val_if_fail (uri != NULL, NULL);
2497
2498
0
  return uri->host;
2499
0
}
2500
2501
/**
2502
 * g_uri_get_port:
2503
 * @uri: a #GUri
2504
 *
2505
 * Gets @uri's port.
2506
 *
2507
 * Return value: @uri's port, or `-1` if no port was specified.
2508
 *
2509
 * Since: 2.66
2510
 */
2511
gint
2512
g_uri_get_port (GUri *uri)
2513
0
{
2514
0
  g_return_val_if_fail (uri != NULL, -1);
2515
2516
0
  if (uri->port == -1 && uri->flags & G_URI_FLAGS_SCHEME_NORMALIZE)
2517
0
    return g_uri_get_default_scheme_port (uri->scheme);
2518
2519
0
  return uri->port;
2520
0
}
2521
2522
/**
2523
 * g_uri_get_path:
2524
 * @uri: a #GUri
2525
 *
2526
 * Gets @uri's path, which may contain `%`-encoding, depending on the
2527
 * flags with which @uri was created.
2528
 *
2529
 * Return value: (not nullable): @uri's path.
2530
 *
2531
 * Since: 2.66
2532
 */
2533
const gchar *
2534
g_uri_get_path (GUri *uri)
2535
0
{
2536
0
  g_return_val_if_fail (uri != NULL, NULL);
2537
2538
0
  return uri->path;
2539
0
}
2540
2541
/**
2542
 * g_uri_get_query:
2543
 * @uri: a #GUri
2544
 *
2545
 * Gets @uri's query, which may contain `%`-encoding, depending on the
2546
 * flags with which @uri was created.
2547
 *
2548
 * For queries consisting of a series of `name=value` parameters,
2549
 * #GUriParamsIter or g_uri_parse_params() may be useful.
2550
 *
2551
 * Return value: (nullable): @uri's query.
2552
 *
2553
 * Since: 2.66
2554
 */
2555
const gchar *
2556
g_uri_get_query (GUri *uri)
2557
0
{
2558
0
  g_return_val_if_fail (uri != NULL, NULL);
2559
2560
0
  return uri->query;
2561
0
}
2562
2563
/**
2564
 * g_uri_get_fragment:
2565
 * @uri: a #GUri
2566
 *
2567
 * Gets @uri's fragment, which may contain `%`-encoding, depending on
2568
 * the flags with which @uri was created.
2569
 *
2570
 * Return value: (nullable): @uri's fragment.
2571
 *
2572
 * Since: 2.66
2573
 */
2574
const gchar *
2575
g_uri_get_fragment (GUri *uri)
2576
0
{
2577
0
  g_return_val_if_fail (uri != NULL, NULL);
2578
2579
0
  return uri->fragment;
2580
0
}
2581
2582
2583
/**
2584
 * g_uri_get_flags:
2585
 * @uri: a #GUri
2586
 *
2587
 * Gets @uri's flags set upon construction.
2588
 *
2589
 * Return value: @uri's flags.
2590
 *
2591
 * Since: 2.66
2592
 **/
2593
GUriFlags
2594
g_uri_get_flags (GUri *uri)
2595
0
{
2596
0
  g_return_val_if_fail (uri != NULL, G_URI_FLAGS_NONE);
2597
2598
0
  return uri->flags;
2599
0
}
2600
2601
/**
2602
 * g_uri_unescape_segment:
2603
 * @escaped_string: (nullable): A string, may be %NULL
2604
 * @escaped_string_end: (nullable): Pointer to end of @escaped_string,
2605
 *   may be %NULL
2606
 * @illegal_characters: (nullable): An optional string of illegal
2607
 *   characters not to be allowed, may be %NULL
2608
 *
2609
 * Unescapes a segment of an escaped string.
2610
 *
2611
 * If any of the characters in @illegal_characters or the NUL
2612
 * character appears as an escaped character in @escaped_string, then
2613
 * that is an error and %NULL will be returned. This is useful if you
2614
 * want to avoid for instance having a slash being expanded in an
2615
 * escaped path element, which might confuse pathname handling.
2616
 *
2617
 * Note: `NUL` byte is not accepted in the output, in contrast to
2618
 * g_uri_unescape_bytes().
2619
 *
2620
 * Returns: (nullable): an unescaped version of @escaped_string,
2621
 * or %NULL on error. The returned string should be freed when no longer
2622
 * needed.  As a special case if %NULL is given for @escaped_string, this
2623
 * function will return %NULL.
2624
 *
2625
 * Since: 2.16
2626
 **/
2627
gchar *
2628
g_uri_unescape_segment (const gchar *escaped_string,
2629
                        const gchar *escaped_string_end,
2630
                        const gchar *illegal_characters)
2631
552
{
2632
552
  gchar *unescaped;
2633
552
  gsize length;
2634
552
  gssize decoded_len;
2635
2636
552
  if (!escaped_string)
2637
0
    return NULL;
2638
2639
552
  if (escaped_string_end)
2640
552
    length = escaped_string_end - escaped_string;
2641
0
  else
2642
0
    length = strlen (escaped_string);
2643
2644
552
  decoded_len = uri_decoder (&unescaped,
2645
552
                             illegal_characters,
2646
552
                             escaped_string, length,
2647
552
                             FALSE, FALSE,
2648
552
                             G_URI_FLAGS_ENCODED,
2649
552
                             0, NULL);
2650
552
  if (decoded_len < 0)
2651
8
    return NULL;
2652
2653
544
  if (memchr (unescaped, '\0', decoded_len))
2654
87
    {
2655
87
      g_free (unescaped);
2656
87
      return NULL;
2657
87
    }
2658
2659
457
  return unescaped;
2660
544
}
2661
2662
/**
2663
 * g_uri_unescape_string:
2664
 * @escaped_string: an escaped string to be unescaped.
2665
 * @illegal_characters: (nullable): a string of illegal characters
2666
 *   not to be allowed, or %NULL.
2667
 *
2668
 * Unescapes a whole escaped string.
2669
 *
2670
 * If any of the characters in @illegal_characters or the NUL
2671
 * character appears as an escaped character in @escaped_string, then
2672
 * that is an error and %NULL will be returned. This is useful if you
2673
 * want to avoid for instance having a slash being expanded in an
2674
 * escaped path element, which might confuse pathname handling.
2675
 *
2676
 * Returns: (nullable): an unescaped version of @escaped_string.
2677
 * The returned string should be freed when no longer needed.
2678
 *
2679
 * Since: 2.16
2680
 **/
2681
gchar *
2682
g_uri_unescape_string (const gchar *escaped_string,
2683
                       const gchar *illegal_characters)
2684
0
{
2685
0
  return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
2686
0
}
2687
2688
/**
2689
 * g_uri_escape_string:
2690
 * @unescaped: the unescaped input string.
2691
 * @reserved_chars_allowed: (nullable): a string of reserved
2692
 *   characters that are allowed to be used, or %NULL.
2693
 * @allow_utf8: %TRUE if the result can include UTF-8 characters.
2694
 *
2695
 * Escapes a string for use in a URI.
2696
 *
2697
 * Normally all characters that are not "unreserved" (i.e. ASCII
2698
 * alphanumerical characters plus dash, dot, underscore and tilde) are
2699
 * escaped. But if you specify characters in @reserved_chars_allowed
2700
 * they are not escaped. This is useful for the "reserved" characters
2701
 * in the URI specification, since those are allowed unescaped in some
2702
 * portions of a URI.
2703
 *
2704
 * Returns: (not nullable): an escaped version of @unescaped. The
2705
 * returned string should be freed when no longer needed.
2706
 *
2707
 * Since: 2.16
2708
 **/
2709
gchar *
2710
g_uri_escape_string (const gchar *unescaped,
2711
                     const gchar *reserved_chars_allowed,
2712
                     gboolean     allow_utf8)
2713
457
{
2714
457
  GString *s;
2715
2716
457
  g_return_val_if_fail (unescaped != NULL, NULL);
2717
2718
457
  s = g_string_sized_new ((size_t) (strlen (unescaped) * 1.25));
2719
2720
457
  g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
2721
2722
457
  return g_string_free (s, FALSE);
2723
457
}
2724
2725
/**
2726
 * g_uri_unescape_bytes:
2727
 * @escaped_string: A URI-escaped string
2728
 * @length: the length (in bytes) of @escaped_string to escape, or `-1` if it
2729
 *   is nul-terminated.
2730
 * @illegal_characters: (nullable): a string of illegal characters
2731
 *   not to be allowed, or %NULL.
2732
 * @error: #GError for error reporting, or %NULL to ignore.
2733
 *
2734
 * Unescapes a segment of an escaped string as binary data.
2735
 *
2736
 * Note that in contrast to g_uri_unescape_string(), this does allow
2737
 * nul bytes to appear in the output.
2738
 *
2739
 * If any of the characters in @illegal_characters appears as an escaped
2740
 * character in @escaped_string, then that is an error and %NULL will be
2741
 * returned. This is useful if you want to avoid for instance having a slash
2742
 * being expanded in an escaped path element, which might confuse pathname
2743
 * handling.
2744
 *
2745
 * Returns: (transfer full): an unescaped version of @escaped_string
2746
 *     or %NULL on error (if decoding failed, using %G_URI_ERROR_FAILED error
2747
 *     code). The returned #GBytes should be unreffed when no longer needed.
2748
 *
2749
 * Since: 2.66
2750
 **/
2751
GBytes *
2752
g_uri_unescape_bytes (const gchar *escaped_string,
2753
                      gssize       length,
2754
                      const char *illegal_characters,
2755
                      GError     **error)
2756
552
{
2757
552
  gchar *buf;
2758
552
  gssize unescaped_length;
2759
2760
552
  g_return_val_if_fail (escaped_string != NULL, NULL);
2761
552
  g_return_val_if_fail (error == NULL || *error == NULL, NULL);
2762
2763
552
  if (length == -1)
2764
0
    length = strlen (escaped_string);
2765
2766
552
  unescaped_length = uri_decoder (&buf,
2767
552
                                  illegal_characters,
2768
552
                                  escaped_string, length,
2769
552
                                  FALSE,
2770
552
                                  FALSE,
2771
552
                                  G_URI_FLAGS_ENCODED,
2772
552
                                  G_URI_ERROR_FAILED, error);
2773
552
  if (unescaped_length == -1)
2774
8
    return NULL;
2775
2776
544
  return g_bytes_new_take (buf, unescaped_length);
2777
552
}
2778
2779
/**
2780
 * g_uri_escape_bytes:
2781
 * @unescaped: (array length=length): the unescaped input data.
2782
 * @length: the length of @unescaped
2783
 * @reserved_chars_allowed: (nullable): a string of reserved
2784
 *   characters that are allowed to be used, or %NULL.
2785
 *
2786
 * Escapes arbitrary data for use in a URI.
2787
 *
2788
 * Normally all characters that are not ‘unreserved’ (i.e. ASCII
2789
 * alphanumerical characters plus dash, dot, underscore and tilde) are
2790
 * escaped. But if you specify characters in @reserved_chars_allowed
2791
 * they are not escaped. This is useful for the ‘reserved’ characters
2792
 * in the URI specification, since those are allowed unescaped in some
2793
 * portions of a URI.
2794
 *
2795
 * Though technically incorrect, this will also allow escaping nul
2796
 * bytes as `%``00`.
2797
 *
2798
 * Returns: (not nullable) (transfer full): an escaped version of @unescaped.
2799
 *     The returned string should be freed when no longer needed.
2800
 *
2801
 * Since: 2.66
2802
 */
2803
gchar *
2804
g_uri_escape_bytes (const guint8 *unescaped,
2805
                    gsize         length,
2806
                    const gchar  *reserved_chars_allowed)
2807
544
{
2808
544
  GString *string;
2809
2810
544
  g_return_val_if_fail (unescaped != NULL, NULL);
2811
2812
544
  string = g_string_sized_new ((size_t) (length * 1.25));
2813
2814
544
  _uri_encoder (string, unescaped, length,
2815
544
               reserved_chars_allowed, FALSE);
2816
2817
544
  return g_string_free (string, FALSE);
2818
544
}
2819
2820
static gssize
2821
g_uri_scheme_length (const gchar *uri)
2822
0
{
2823
0
  const gchar *p;
2824
2825
0
  p = uri;
2826
0
  if (!g_ascii_isalpha (*p))
2827
0
    return -1;
2828
0
  p++;
2829
0
  while (g_ascii_isalnum (*p) || *p == '.' || *p == '+' || *p == '-')
2830
0
    p++;
2831
2832
0
  if (p > uri && *p == ':')
2833
0
    return p - uri;
2834
2835
0
  return -1;
2836
0
}
2837
2838
/**
2839
 * g_uri_parse_scheme:
2840
 * @uri: a valid URI.
2841
 *
2842
 * Gets the scheme portion of a URI string.
2843
 * [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3) decodes the scheme
2844
 * as:
2845
 * |[
2846
 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
2847
 * ]|
2848
 * Common schemes include `file`, `https`, `svn+ssh`, etc.
2849
 *
2850
 * Returns: (transfer full) (nullable): The ‘scheme’ component of the URI, or
2851
 *     %NULL on error. The returned string should be freed when no longer needed.
2852
 *
2853
 * Since: 2.16
2854
 **/
2855
gchar *
2856
g_uri_parse_scheme (const gchar *uri)
2857
0
{
2858
0
  gssize len;
2859
2860
0
  g_return_val_if_fail (uri != NULL, NULL);
2861
2862
0
  len = g_uri_scheme_length (uri);
2863
0
  return len == -1 ? NULL : g_strndup (uri, len);
2864
0
}
2865
2866
/**
2867
 * g_uri_peek_scheme:
2868
 * @uri: a valid URI.
2869
 *
2870
 * Gets the scheme portion of a URI string.
2871
 * [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3) decodes the scheme
2872
 * as:
2873
 * |[
2874
 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
2875
 * ]|
2876
 * Common schemes include `file`, `https`, `svn+ssh`, etc.
2877
 *
2878
 * Unlike g_uri_parse_scheme(), the returned scheme is normalized to
2879
 * all-lowercase and does not need to be freed.
2880
 *
2881
 * Returns: (transfer none) (nullable): The ‘scheme’ component of the URI, or
2882
 *     %NULL on error. The returned string is normalized to all-lowercase, and
2883
 *     interned via g_intern_string(), so it does not need to be freed.
2884
 *
2885
 * Since: 2.66
2886
 **/
2887
const gchar *
2888
g_uri_peek_scheme (const gchar *uri)
2889
0
{
2890
0
  gssize len;
2891
0
  gchar *lower_scheme;
2892
0
  const gchar *scheme;
2893
2894
0
  g_return_val_if_fail (uri != NULL, NULL);
2895
2896
0
  len = g_uri_scheme_length (uri);
2897
0
  if (len == -1)
2898
0
    return NULL;
2899
2900
0
  lower_scheme = g_ascii_strdown (uri, len);
2901
0
  scheme = g_intern_string (lower_scheme);
2902
0
  g_free (lower_scheme);
2903
2904
0
  return scheme;
2905
0
}
2906
2907
G_DEFINE_QUARK (g-uri-quark, g_uri_error)