Coverage Report

Created: 2025-11-02 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libsoup/libsoup/soup-uri-utils.c
Line
Count
Source
1
/* soup-uri-utils.c
2
 *
3
 * Copyright 2020 Igalia S.L.
4
 * Copyright 1999-2003 Ximian, Inc.
5
 *
6
 * This file is free software; you can redistribute it and/or modify it
7
 * under the terms of the GNU Lesser General Public License as
8
 * published by the Free Software Foundation; either version 2 of the
9
 * License, or (at your option) any later version.
10
 *
11
 * This file is distributed in the hope that it will be useful, but
12
 * WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
 *
19
 * SPDX-License-Identifier: LGPL-2.0-or-later
20
 */
21
22
#ifdef HAVE_CONFIG_H
23
#include <config.h>
24
#endif
25
26
#include <string.h>
27
#include <stdlib.h>
28
29
#include <glib/gi18n-lib.h>
30
31
#include "soup-uri-utils-private.h"
32
#include "soup.h"
33
#include "soup-misc.h"
34
35
/**
36
 * SOUP_HTTP_URI_FLAGS:
37
 *
38
 * The set of #GUriFlags libsoup expects all #GUri to use.
39
 */
40
41
static inline int
42
soup_scheme_default_port (const char *scheme)
43
0
{
44
0
        if (!g_strcmp0 (scheme, "http") ||
45
0
            !g_strcmp0 (scheme, "ws"))
46
0
    return 80;
47
0
  else if (!g_strcmp0 (scheme, "https") ||
48
0
                 !g_strcmp0 (scheme, "wss"))
49
0
    return 443;
50
0
  else if (!g_strcmp0 (scheme, "ftp"))
51
0
    return 21;
52
0
  else
53
0
    return -1;
54
0
}
55
56
static inline gboolean
57
parts_equal (const char *one, const char *two, gboolean insensitive)
58
0
{
59
0
  if (!one && !two)
60
0
    return TRUE;
61
0
  if (!one || !two)
62
0
    return FALSE;
63
0
  return insensitive ? !g_ascii_strcasecmp (one, two) : !strcmp (one, two);
64
0
}
65
66
static inline gboolean
67
path_equal (const char *one, const char *two)
68
0
{
69
0
        if (one[0] == '\0')
70
0
                one = "/";
71
0
        if (two[0] == '\0')
72
0
                two = "/";
73
74
0
  return !strcmp (one, two);
75
0
}
76
77
static gboolean
78
flags_equal (GUriFlags flags1, GUriFlags flags2)
79
0
{
80
        /* We only care about flags that affect the contents which these do */
81
0
        static const GUriFlags normalization_flags = (G_URI_FLAGS_ENCODED | G_URI_FLAGS_ENCODED_FRAGMENT |
82
0
                                                      G_URI_FLAGS_ENCODED_PATH | G_URI_FLAGS_ENCODED_QUERY |
83
0
                                                      G_URI_FLAGS_SCHEME_NORMALIZE);
84
85
0
        return (flags1 & normalization_flags) == (flags2 & normalization_flags);
86
0
}
87
88
/**
89
 * soup_uri_equal:
90
 * @uri1: a #GUri
91
 * @uri2: another #GUri
92
 *
93
 * Tests whether or not @uri1 and @uri2 are equal in all parts.
94
 *
95
 * Returns: %TRUE if equal otherwise %FALSE
96
 **/
97
gboolean
98
soup_uri_equal (GUri *uri1, GUri *uri2)
99
0
{
100
0
      g_return_val_if_fail (uri1 != NULL, FALSE);
101
0
  g_return_val_if_fail (uri2 != NULL, FALSE);
102
103
0
        if (!flags_equal (g_uri_get_flags (uri1), g_uri_get_flags (uri2))                  ||
104
0
            g_strcmp0 (g_uri_get_scheme (uri1), g_uri_get_scheme (uri2))                   ||
105
0
      g_uri_get_port (uri1) != g_uri_get_port (uri2)                                 ||
106
0
      !parts_equal (g_uri_get_user (uri1), g_uri_get_user (uri2), FALSE)             ||
107
0
      !parts_equal (g_uri_get_password (uri1), g_uri_get_password (uri2), FALSE)     ||
108
0
      !parts_equal (g_uri_get_host (uri1), g_uri_get_host (uri2), TRUE)              ||
109
0
      !path_equal (g_uri_get_path (uri1), g_uri_get_path (uri2))                     ||
110
0
      !parts_equal (g_uri_get_query (uri1), g_uri_get_query (uri2), FALSE)           ||
111
0
      !parts_equal (g_uri_get_fragment (uri1), g_uri_get_fragment (uri2), FALSE)) {
112
0
                return FALSE;
113
0
            }
114
115
0
        return TRUE;
116
0
}
117
118
/**
119
 * soup_uri_get_path_and_query:
120
 * @uri: a #GUri
121
 *
122
 * Extracts the `path` and `query` parts from @uri.
123
 *
124
 * Returns: string of combined path and query
125
 **/
126
char *
127
soup_uri_get_path_and_query (GUri *uri)
128
0
{
129
0
        const char *query;
130
131
0
  g_return_val_if_fail (uri != NULL, NULL);
132
133
0
        query = g_uri_get_query (uri);
134
135
0
        return g_strdup_printf ("%s%c%s", g_uri_get_path (uri),
136
0
                                query ? '?' : '\0',
137
0
                                query ? query : "");
138
0
}
139
140
/**
141
 * soup_uri_uses_default_port:
142
 * @uri: a #GUri
143
 *
144
 * Tests if @uri uses the default port for its scheme.
145
 *
146
 * (Eg, 80 for http.) (This only works for http, https and ftp; libsoup does not
147
 * know the default ports of other protocols.)
148
 *
149
 * Returns: %TRUE or %FALSE
150
 **/
151
gboolean
152
soup_uri_uses_default_port (GUri *uri)
153
0
{
154
0
        g_return_val_if_fail (uri != NULL, FALSE);
155
156
0
        if (g_uri_get_port (uri) == -1)
157
0
                return TRUE;
158
159
0
        if (g_uri_get_scheme (uri))
160
0
                return g_uri_get_port (uri) == soup_scheme_default_port (g_uri_get_scheme (uri));
161
162
0
        return FALSE;
163
0
}
164
165
GUri *
166
soup_uri_copy_host (GUri *uri)
167
0
{
168
0
        g_return_val_if_fail (uri != NULL, NULL);
169
170
0
        return soup_uri_copy (uri,
171
0
                              SOUP_URI_USER, NULL,
172
0
                              SOUP_URI_PASSWORD, NULL,
173
0
                              SOUP_URI_AUTH_PARAMS, NULL,
174
0
                              SOUP_URI_PATH, "/",
175
0
                              SOUP_URI_QUERY, NULL,
176
0
                              SOUP_URI_FRAGMENT, NULL,
177
0
                              SOUP_URI_NONE);
178
0
}
179
180
/**
181
 * soup_uri_host_hash:
182
 * @key: (type GUri): a #GUri with a non-%NULL @host member
183
 *
184
 * Hashes @key, considering only the scheme, host, and port.
185
 *
186
 * Returns: A hash
187
 */
188
guint
189
soup_uri_host_hash (gconstpointer key)
190
0
{
191
0
  GUri *uri = (GUri*)key;
192
0
        const char *host;
193
194
0
  g_return_val_if_fail (uri != NULL, 0);
195
196
0
        host = g_uri_get_host (uri);
197
198
0
  g_return_val_if_fail (host != NULL, 0);
199
200
0
  return soup_str_case_hash (g_uri_get_scheme (uri)) +
201
0
               g_uri_get_port (uri) +
202
0
         soup_str_case_hash (host);
203
0
}
204
205
/**
206
 * soup_uri_host_equal:
207
 * @v1: (type GUri): a #GUri with a non-%NULL @host member
208
 * @v2: (type GUri): a #GUri with a non-%NULL @host member
209
 *
210
 * Compares @v1 and @v2, considering only the scheme, host, and port.
211
 *
212
 * Returns: %TRUE if the URIs are equal in scheme, host, and port.
213
 */
214
gboolean
215
soup_uri_host_equal (gconstpointer v1, gconstpointer v2)
216
0
{
217
0
  GUri *one = (GUri*)v1;
218
0
  GUri *two = (GUri*)v2;
219
0
        const char *one_host, *two_host;
220
221
0
  g_return_val_if_fail (one != NULL && two != NULL, one == two);
222
223
0
        one_host = g_uri_get_host (one);
224
0
        two_host = g_uri_get_host (two);
225
226
0
  g_return_val_if_fail (one_host != NULL && two_host != NULL, one_host == two_host);
227
228
0
        if (one == two)
229
0
                return TRUE;
230
0
  if (g_strcmp0 (g_uri_get_scheme (one), g_uri_get_scheme (two)) != 0)
231
0
    return FALSE;
232
233
0
  if (g_uri_get_port (one) != g_uri_get_port (two))
234
0
    return FALSE;
235
236
0
  return g_ascii_strcasecmp (one_host, two_host) == 0;
237
0
}
238
239
gboolean
240
soup_uri_is_https (GUri *uri)
241
0
{
242
0
        const char *scheme;
243
244
0
        g_assert (uri != NULL);
245
246
0
        scheme = g_uri_get_scheme (uri);
247
0
        if (G_UNLIKELY (scheme == NULL))
248
0
                return FALSE;
249
250
0
        return strcmp (scheme, "https") == 0 || strcmp (scheme, "wss") == 0;
251
0
}
252
253
gboolean
254
soup_uri_is_http (GUri *uri)
255
0
{
256
0
        const char *scheme;
257
258
0
        g_assert (uri != NULL);
259
260
0
        scheme = g_uri_get_scheme (uri);
261
0
        if (G_UNLIKELY (scheme == NULL))
262
0
                return FALSE;
263
264
0
        return strcmp (scheme, "http") == 0 || strcmp (scheme, "ws") == 0;
265
0
}
266
267
37
#define BASE64_INDICATOR     ";base64"
268
176
#define BASE64_INDICATOR_LEN (sizeof (";base64") - 1)
269
270
/**
271
 * soup_uri_decode_data_uri:
272
 * @uri: a data URI, in string form
273
 * @content_type: (out) (nullable) (transfer full): location to store content type
274
 *
275
 * Decodes the given data URI and returns its contents and @content_type.
276
 *
277
 * Returns: (transfer full): a #GBytes with the contents of @uri,
278
 *    or %NULL if @uri is not a valid data URI
279
 */
280
GBytes *
281
soup_uri_decode_data_uri (const char *uri,
282
                          char      **content_type)
283
138
{
284
138
        GUri *soup_uri;
285
138
        const char *comma, *start, *end;
286
138
        gboolean base64 = FALSE;
287
138
        char *uri_string;
288
138
        GBytes *bytes;
289
290
138
        g_return_val_if_fail (uri != NULL, NULL);
291
292
138
        soup_uri = g_uri_parse (uri, SOUP_HTTP_URI_FLAGS, NULL);
293
138
        if (!soup_uri)
294
1
                return NULL;
295
296
137
        if (g_strcmp0 (g_uri_get_scheme (soup_uri), "data") || g_uri_get_host (soup_uri) != NULL) {
297
31
                g_uri_unref (soup_uri);
298
31
                return NULL;
299
31
        }
300
301
106
        if (content_type)
302
0
                *content_type = NULL;
303
304
106
#if !GLIB_CHECK_VERSION (2, 83, 1)
305
        /* g_uri_to_string() is picky about paths that start with `//` and will assert, clean them up.
306
         * https://gitlab.gnome.org/GNOME/glib/-/merge_requests/4407 */
307
106
        const char *path = g_uri_get_path (soup_uri);
308
106
        if (path[0] == '/' && path[1] == '/') {
309
1
                char *new_path = g_strconcat ("/.", path, NULL);
310
1
                GUri *new_uri = soup_uri_copy (soup_uri, SOUP_URI_PATH, new_path, SOUP_URI_NONE);
311
312
1
                g_uri_unref (soup_uri);
313
1
                g_free (new_path);
314
315
1
                soup_uri = new_uri;
316
1
        }
317
106
#endif
318
319
106
        uri_string = g_uri_to_string (soup_uri);
320
106
        g_uri_unref (soup_uri);
321
106
        if (!uri_string)
322
0
                return NULL;
323
324
106
        start = uri_string + 5;
325
106
        comma = strchr (start, ',');
326
106
        if (comma && comma != start) {
327
                /* Deal with MIME type / params */
328
40
                if (comma >= start + BASE64_INDICATOR_LEN && !g_ascii_strncasecmp (comma - BASE64_INDICATOR_LEN, BASE64_INDICATOR, BASE64_INDICATOR_LEN)) {
329
22
                        end = comma - BASE64_INDICATOR_LEN;
330
22
                        base64 = TRUE;
331
22
                } else
332
18
                        end = comma;
333
334
40
                if (end != start && content_type)
335
0
                        *content_type = g_uri_unescape_segment (start, end, NULL);
336
40
        }
337
338
106
        if (content_type && !*content_type)
339
0
                *content_type = g_strdup ("text/plain;charset=US-ASCII");
340
341
106
        if (comma)
342
41
                start = comma + 1;
343
344
106
        if (*start) {
345
84
                bytes = g_uri_unescape_bytes (start, -1, NULL, NULL);
346
347
84
                if (base64 && bytes) {
348
21
                        if (g_bytes_get_size (bytes) <= 1)
349
21
                                g_clear_pointer (&bytes, g_bytes_unref);
350
20
                        else {
351
20
                                gsize content_length;
352
20
                                GByteArray *unescaped_array = g_bytes_unref_to_array (bytes);
353
20
                                g_base64_decode_inplace ((gchar*)unescaped_array->data, &content_length);
354
20
                                unescaped_array->len = content_length;
355
20
                                bytes = g_byte_array_free_to_bytes (unescaped_array);
356
20
                        }
357
21
                }
358
84
        } else {
359
22
                bytes = g_bytes_new_static (NULL, 0);
360
22
        }
361
106
        g_free (uri_string);
362
363
106
        return bytes;
364
106
}
365
366
/**
367
 * SoupURIComponent:
368
 * @SOUP_URI_NONE: no component
369
 * @SOUP_URI_SCHEME: the URI scheme component
370
 * @SOUP_URI_USER: the URI user component
371
 * @SOUP_URI_PASSWORD: the URI password component
372
 * @SOUP_URI_AUTH_PARAMS: the URI authentication parameters component
373
 * @SOUP_URI_HOST: the URI host component
374
 * @SOUP_URI_PORT: the URI port component
375
 * @SOUP_URI_PATH: the URI path component
376
 * @SOUP_URI_QUERY: the URI query component
377
 * @SOUP_URI_FRAGMENT: the URI fragment component
378
 *
379
 * Enum values passed to [func@uri_copy] to indicate the components of
380
 * the URI that should be updated with the given values.
381
 */
382
383
static int
384
get_maybe_default_port (GUri *uri)
385
1
{
386
1
        const char *scheme = g_uri_get_scheme (uri);
387
1
        int port = g_uri_get_port (uri);
388
389
1
        switch (port) {
390
0
        case 80:
391
0
                if (!strcmp (scheme, "http") || !strcmp (scheme, "ws"))
392
0
                        return -1;
393
0
                break;
394
0
        case 443:
395
0
                if (!strcmp (scheme, "https") || !strcmp (scheme, "wss"))
396
0
                        return -1;
397
0
                break;
398
1
        default:
399
1
                break;
400
1
        }
401
402
1
        return port;
403
1
}
404
405
/**
406
 * soup_uri_copy: (skip)
407
 * @uri: the #GUri to copy
408
 * @first_component: first #SoupURIComponent to update
409
 * @...: value of @first_component  followed by additional
410
 *    components and values, terminated by %SOUP_URI_NONE
411
 *
412
 * As of 3.4.0 this will detect the default ports of HTTP(s) and WS(S)
413
 * URIs when copying and set it to the default port of the new scheme.
414
 * So for example copying `http://localhost:80` while changing the scheme to https
415
 * will result in `https://localhost:443`.
416
 * 
417
 * Return a copy of @uri with the given components updated.
418
 *
419
 * Returns: (transfer full): a new #GUri
420
 */
421
GUri *
422
soup_uri_copy (GUri            *uri,
423
               SoupURIComponent first_component,
424
               ...)
425
1
{
426
1
        va_list args;
427
1
        SoupURIComponent component = first_component;
428
1
        gpointer values[SOUP_URI_FRAGMENT + 1];
429
1
        gboolean values_to_set[SOUP_URI_FRAGMENT + 1];
430
1
        GUriFlags flags = g_uri_get_flags (uri);
431
432
1
        g_return_val_if_fail (uri != NULL, NULL);
433
434
1
        memset (&values_to_set, 0, sizeof (values_to_set));
435
436
1
        va_start (args, first_component);
437
2
        while (component != SOUP_URI_NONE) {
438
1
                if (component == SOUP_URI_PORT)
439
0
                        values[component] = GINT_TO_POINTER (va_arg (args, gint));
440
1
                else
441
1
                        values[component] = va_arg (args, gpointer);
442
1
                values_to_set[component] = TRUE;
443
1
                component = va_arg (args, SoupURIComponent);
444
1
        }
445
1
        va_end (args);
446
447
1
        if (values_to_set[SOUP_URI_PASSWORD])
448
0
                flags |= G_URI_FLAGS_HAS_PASSWORD;
449
1
        if (values_to_set[SOUP_URI_AUTH_PARAMS])
450
0
                flags |= G_URI_FLAGS_HAS_AUTH_PARAMS;
451
1
        if (values_to_set[SOUP_URI_PATH])
452
1
                flags |= G_URI_FLAGS_ENCODED_PATH;
453
1
        if (values_to_set[SOUP_URI_QUERY])
454
0
                flags |= G_URI_FLAGS_ENCODED_QUERY;
455
1
        if (values_to_set[SOUP_URI_FRAGMENT])
456
0
                flags |= G_URI_FLAGS_ENCODED_FRAGMENT;
457
1
        return g_uri_build_with_user (
458
1
                flags,
459
1
                values_to_set[SOUP_URI_SCHEME] ? values[SOUP_URI_SCHEME] : g_uri_get_scheme (uri),
460
1
                values_to_set[SOUP_URI_USER] ? values[SOUP_URI_USER] : g_uri_get_user (uri),
461
1
                values_to_set[SOUP_URI_PASSWORD] ? values[SOUP_URI_PASSWORD] : g_uri_get_password (uri),
462
1
                values_to_set[SOUP_URI_AUTH_PARAMS] ? values[SOUP_URI_AUTH_PARAMS] : g_uri_get_auth_params (uri),
463
1
                values_to_set[SOUP_URI_HOST] ? values[SOUP_URI_HOST] : g_uri_get_host (uri),
464
1
                values_to_set[SOUP_URI_PORT] ? GPOINTER_TO_INT (values[SOUP_URI_PORT]) : get_maybe_default_port (uri),
465
1
                values_to_set[SOUP_URI_PATH] ? values[SOUP_URI_PATH] : g_uri_get_path (uri),
466
1
                values_to_set[SOUP_URI_QUERY] ? values[SOUP_URI_QUERY] : g_uri_get_query (uri),
467
1
                values_to_set[SOUP_URI_FRAGMENT] ? values[SOUP_URI_FRAGMENT] : g_uri_get_fragment (uri)
468
1
        );
469
1
}
470
471
GUri *
472
soup_uri_copy_with_normalized_flags (GUri *uri)
473
0
{
474
0
        GUriFlags flags = g_uri_get_flags (uri);
475
476
        /* We require its encoded (hostname encoding optional) */
477
0
        if (((flags & (G_URI_FLAGS_ENCODED_PATH | G_URI_FLAGS_ENCODED_QUERY | G_URI_FLAGS_ENCODED_FRAGMENT)) ||
478
0
             (flags & G_URI_FLAGS_ENCODED)) &&
479
            /* And has scheme-based normalization */
480
0
            (flags & G_URI_FLAGS_SCHEME_NORMALIZE))
481
0
                return g_uri_ref (uri);
482
483
0
        return g_uri_build_with_user (
484
0
                g_uri_get_flags (uri) | SOUP_HTTP_URI_FLAGS,
485
0
                g_uri_get_scheme (uri),
486
0
                g_uri_get_user (uri),
487
0
                g_uri_get_password (uri),
488
0
                g_uri_get_auth_params (uri),
489
0
                g_uri_get_host (uri),
490
0
                g_uri_get_port (uri),
491
0
                g_uri_get_path (uri),
492
0
                g_uri_get_query (uri),
493
0
                g_uri_get_fragment (uri)
494
0
        );
495
0
}
496
497
char *
498
soup_uri_get_host_for_headers (GUri *uri)
499
0
{
500
0
        const char *host = g_uri_get_host (uri);
501
502
0
        if (strchr (host, ':'))
503
0
                return g_strdup_printf ("[%.*s]", (int)strcspn (host, "%"), host);
504
0
        if (g_hostname_is_non_ascii (host))
505
0
                return g_hostname_to_ascii (host);
506
507
0
        return g_strdup (host);
508
0
}