Coverage Report

Created: 2022-05-20 06:13

/src/serenity/AK/URL.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
3
 * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
4
 *
5
 * SPDX-License-Identifier: BSD-2-Clause
6
 */
7
8
#include <AK/CharacterTypes.h>
9
#include <AK/Debug.h>
10
#include <AK/LexicalPath.h>
11
#include <AK/StringBuilder.h>
12
#include <AK/URL.h>
13
#include <AK/URLParser.h>
14
#include <AK/Utf8View.h>
15
16
namespace AK {
17
18
// FIXME: It could make sense to force users of URL to use URLParser::parse() explicitly instead of using a constructor.
19
URL::URL(StringView string)
20
    : URL(URLParser::parse(string))
21
0
{
22
0
    if constexpr (URL_PARSER_DEBUG) {
23
0
        if (m_valid)
24
0
            dbgln("URL constructor: Parsed URL to be '{}'.", serialize());
25
0
        else
26
0
            dbgln("URL constructor: Parsed URL to be invalid.");
27
0
    }
28
0
}
29
30
String URL::path() const
31
0
{
32
0
    if (cannot_be_a_base_url())
33
0
        return paths()[0];
34
0
    StringBuilder builder;
35
0
    for (auto& path : m_paths) {
36
0
        builder.append('/');
37
0
        builder.append(path);
38
0
    }
39
0
    return builder.to_string();
40
0
}
41
42
URL URL::complete_url(String const& string) const
43
0
{
44
0
    if (!is_valid())
45
0
        return {};
46
47
0
    return URLParser::parse(string, this);
48
0
}
49
50
void URL::set_scheme(String scheme)
51
0
{
52
0
    m_scheme = move(scheme);
53
0
    m_valid = compute_validity();
54
0
}
55
56
void URL::set_username(String username)
57
0
{
58
0
    m_username = move(username);
59
0
    m_valid = compute_validity();
60
0
}
61
62
void URL::set_password(String password)
63
0
{
64
0
    m_password = move(password);
65
0
    m_valid = compute_validity();
66
0
}
67
68
void URL::set_host(String host)
69
0
{
70
0
    m_host = move(host);
71
0
    m_valid = compute_validity();
72
0
}
73
74
void URL::set_port(Optional<u16> port)
75
0
{
76
0
    if (port == default_port_for_scheme(m_scheme)) {
77
0
        m_port = {};
78
0
        return;
79
0
    }
80
0
    m_port = move(port);
81
0
    m_valid = compute_validity();
82
0
}
83
84
void URL::set_paths(Vector<String> paths)
85
0
{
86
0
    m_paths = move(paths);
87
0
    m_valid = compute_validity();
88
0
}
89
90
void URL::set_query(String query)
91
0
{
92
0
    m_query = move(query);
93
0
}
94
95
void URL::set_fragment(String fragment)
96
0
{
97
0
    m_fragment = move(fragment);
98
0
}
99
100
// FIXME: This is by no means complete.
101
// NOTE: This relies on some assumptions about how the spec-defined URL parser works that may turn out to be wrong.
102
bool URL::compute_validity() const
103
0
{
104
0
    if (m_scheme.is_empty())
105
0
        return false;
106
107
0
    if (m_scheme == "data") {
108
0
        if (m_data_mime_type.is_empty())
109
0
            return false;
110
0
        if (m_data_payload_is_base64) {
111
0
            if (m_data_payload.length() % 4 != 0)
112
0
                return false;
113
0
            for (auto character : m_data_payload) {
114
0
                if (!is_ascii_alphanumeric(character) || character == '+' || character == '/' || character == '=')
115
0
                    return false;
116
0
            }
117
0
        }
118
0
    } else if (m_cannot_be_a_base_url) {
119
0
        if (m_paths.size() != 1)
120
0
            return false;
121
0
        if (m_paths[0].is_empty())
122
0
            return false;
123
0
    } else {
124
0
        if (m_scheme.is_one_of("about", "mailto"))
125
0
            return false;
126
        // NOTE: Maybe it is allowed to have a zero-segment path.
127
0
        if (m_paths.size() == 0)
128
0
            return false;
129
0
    }
130
131
    // NOTE: A file URL's host should be the empty string for localhost, not null.
132
0
    if (m_scheme == "file" && m_host.is_null())
133
0
        return false;
134
135
0
    return true;
136
0
}
137
138
bool URL::scheme_requires_port(StringView scheme)
139
0
{
140
0
    return (default_port_for_scheme(scheme) != 0);
141
0
}
142
143
u16 URL::default_port_for_scheme(StringView scheme)
144
0
{
145
0
    if (scheme == "http")
146
0
        return 80;
147
0
    if (scheme == "https")
148
0
        return 443;
149
0
    if (scheme == "gemini")
150
0
        return 1965;
151
0
    if (scheme == "irc")
152
0
        return 6667;
153
0
    if (scheme == "ircs")
154
0
        return 6697;
155
0
    if (scheme == "ws")
156
0
        return 80;
157
0
    if (scheme == "wss")
158
0
        return 443;
159
0
    return 0;
160
0
}
161
162
URL URL::create_with_file_scheme(String const& path, String const& fragment, String const& hostname)
163
0
{
164
0
    LexicalPath lexical_path(path);
165
0
    if (!lexical_path.is_absolute())
166
0
        return {};
167
168
0
    URL url;
169
0
    url.set_scheme("file");
170
    // NOTE: If the hostname is localhost (or null, which implies localhost), it should be set to the empty string.
171
    //       This is because a file URL always needs a non-null hostname.
172
0
    url.set_host(hostname.is_null() || hostname == "localhost" ? String::empty() : hostname);
173
0
    url.set_paths(lexical_path.parts());
174
    // NOTE: To indicate that we want to end the path with a slash, we have to append an empty path segment.
175
0
    if (path.ends_with('/'))
176
0
        url.append_path("");
177
0
    url.set_fragment(fragment);
178
0
    return url;
179
0
}
180
181
URL URL::create_with_help_scheme(String const& path, String const& fragment, String const& hostname)
182
0
{
183
0
    LexicalPath lexical_path(path);
184
185
0
    URL url;
186
0
    url.set_scheme("help");
187
    // NOTE: If the hostname is localhost (or null, which implies localhost), it should be set to the empty string.
188
    //       This is because a file URL always needs a non-null hostname.
189
0
    url.set_host(hostname.is_null() || hostname == "localhost" ? String::empty() : hostname);
190
0
    url.set_paths(lexical_path.parts());
191
    // NOTE: To indicate that we want to end the path with a slash, we have to append an empty path segment.
192
0
    if (path.ends_with('/'))
193
0
        url.append_path("");
194
0
    url.set_fragment(fragment);
195
0
    return url;
196
0
}
197
198
URL URL::create_with_url_or_path(String const& url_or_path)
199
0
{
200
0
    URL url = url_or_path;
201
0
    if (url.is_valid())
202
0
        return url;
203
204
0
    String path = LexicalPath::canonicalized_path(url_or_path);
205
0
    return URL::create_with_file_scheme(path);
206
0
}
207
208
// https://url.spec.whatwg.org/#special-scheme
209
bool URL::is_special_scheme(StringView scheme)
210
0
{
211
0
    return scheme.is_one_of("ftp", "file", "http", "https", "ws", "wss");
212
0
}
213
214
String URL::serialize_data_url() const
215
0
{
216
0
    VERIFY(m_scheme == "data");
217
0
    VERIFY(!m_data_mime_type.is_null());
218
0
    VERIFY(!m_data_payload.is_null());
219
0
    StringBuilder builder;
220
0
    builder.append(m_scheme);
221
0
    builder.append(':');
222
0
    builder.append(m_data_mime_type);
223
0
    if (m_data_payload_is_base64)
224
0
        builder.append(";base64");
225
0
    builder.append(',');
226
    // NOTE: The specification does not say anything about encoding this, but we should encode at least control and non-ASCII
227
    //       characters (since this is also a valid representation of the same data URL).
228
0
    builder.append(URL::percent_encode(m_data_payload, PercentEncodeSet::C0Control));
229
0
    return builder.to_string();
230
0
}
231
232
// https://url.spec.whatwg.org/#concept-url-serializer
233
String URL::serialize(ExcludeFragment exclude_fragment) const
234
0
{
235
0
    if (m_scheme == "data")
236
0
        return serialize_data_url();
237
0
    StringBuilder builder;
238
0
    builder.append(m_scheme);
239
0
    builder.append(':');
240
241
0
    if (!m_host.is_null()) {
242
0
        builder.append("//");
243
244
0
        if (includes_credentials()) {
245
0
            builder.append(percent_encode(m_username, PercentEncodeSet::Userinfo));
246
0
            if (!m_password.is_empty()) {
247
0
                builder.append(':');
248
0
                builder.append(percent_encode(m_password, PercentEncodeSet::Userinfo));
249
0
            }
250
0
            builder.append('@');
251
0
        }
252
253
0
        builder.append(m_host);
254
0
        if (m_port.has_value())
255
0
            builder.appendff(":{}", *m_port);
256
0
    }
257
258
0
    if (cannot_be_a_base_url()) {
259
0
        builder.append(percent_encode(m_paths[0], PercentEncodeSet::Path));
260
0
    } else {
261
0
        if (m_host.is_null() && m_paths.size() > 1 && m_paths[0].is_empty())
262
0
            builder.append("/.");
263
0
        for (auto& segment : m_paths) {
264
0
            builder.append('/');
265
0
            builder.append(percent_encode(segment, PercentEncodeSet::Path));
266
0
        }
267
0
    }
268
269
0
    if (!m_query.is_null()) {
270
0
        builder.append('?');
271
0
        builder.append(percent_encode(m_query, is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query));
272
0
    }
273
274
0
    if (exclude_fragment == ExcludeFragment::No && !m_fragment.is_null()) {
275
0
        builder.append('#');
276
0
        builder.append(percent_encode(m_fragment, PercentEncodeSet::Fragment));
277
0
    }
278
279
0
    return builder.to_string();
280
0
}
281
282
// https://url.spec.whatwg.org/#url-rendering
283
// NOTE: This does e.g. not display credentials.
284
// FIXME: Parts of the URL other than the host should have their sequences of percent-encoded bytes replaced with code points
285
//        resulting from percent-decoding those sequences converted to bytes, unless that renders those sequences invisible.
286
String URL::serialize_for_display() const
287
0
{
288
0
    VERIFY(m_valid);
289
0
    if (m_scheme == "data")
290
0
        return serialize_data_url();
291
0
    StringBuilder builder;
292
0
    builder.append(m_scheme);
293
0
    builder.append(':');
294
295
0
    if (!m_host.is_null()) {
296
0
        builder.append("//");
297
0
        builder.append(m_host);
298
0
        if (m_port.has_value())
299
0
            builder.appendff(":{}", *m_port);
300
0
    }
301
302
0
    if (cannot_be_a_base_url()) {
303
0
        builder.append(percent_encode(m_paths[0], PercentEncodeSet::Path));
304
0
    } else {
305
0
        if (m_host.is_null() && m_paths.size() > 1 && m_paths[0].is_empty())
306
0
            builder.append("/.");
307
0
        for (auto& segment : m_paths) {
308
0
            builder.append('/');
309
0
            builder.append(percent_encode(segment, PercentEncodeSet::Path));
310
0
        }
311
0
    }
312
313
0
    if (!m_query.is_null()) {
314
0
        builder.append('?');
315
0
        builder.append(percent_encode(m_query, is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query));
316
0
    }
317
318
0
    if (!m_fragment.is_null()) {
319
0
        builder.append('#');
320
0
        builder.append(percent_encode(m_fragment, PercentEncodeSet::Fragment));
321
0
    }
322
323
0
    return builder.to_string();
324
0
}
325
326
// https://html.spec.whatwg.org/multipage/origin.html#ascii-serialisation-of-an-origin
327
// https://url.spec.whatwg.org/#concept-url-origin
328
String URL::serialize_origin() const
329
0
{
330
0
    VERIFY(m_valid);
331
332
0
    if (m_scheme == "blob"sv) {
333
        // TODO: 1. If URL’s blob URL entry is non-null, then return URL’s blob URL entry’s environment’s origin.
334
        // 2. Let url be the result of parsing URL’s path[0].
335
0
        VERIFY(!m_paths.is_empty());
336
0
        URL url = m_paths[0];
337
        // 3. Return a new opaque origin, if url is failure, and url’s origin otherwise.
338
0
        if (!url.is_valid())
339
0
            return "null";
340
0
        return url.serialize_origin();
341
0
    } else if (!m_scheme.is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) { // file: "Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin."
342
0
        return "null";
343
0
    }
344
345
0
    StringBuilder builder;
346
0
    builder.append(m_scheme);
347
0
    builder.append("://"sv);
348
0
    builder.append(m_host);
349
0
    if (m_port.has_value())
350
0
        builder.append(":{}", *m_port);
351
0
    return builder.build();
352
0
}
353
354
bool URL::equals(URL const& other, ExcludeFragment exclude_fragments) const
355
0
{
356
0
    if (this == &other)
357
0
        return true;
358
0
    if (!m_valid || !other.m_valid)
359
0
        return false;
360
0
    return serialize(exclude_fragments) == other.serialize(exclude_fragments);
361
0
}
362
363
String URL::basename() const
364
0
{
365
0
    if (!m_valid)
366
0
        return {};
367
0
    if (m_paths.is_empty())
368
0
        return {};
369
0
    return m_paths.last();
370
0
}
371
372
void URL::append_percent_encoded(StringBuilder& builder, u32 code_point)
373
0
{
374
0
    if (code_point <= 0x7f)
375
0
        builder.appendff("%{:02X}", code_point);
376
0
    else if (code_point <= 0x07ff)
377
0
        builder.appendff("%{:02X}%{:02X}", ((code_point >> 6) & 0x1f) | 0xc0, (code_point & 0x3f) | 0x80);
378
0
    else if (code_point <= 0xffff)
379
0
        builder.appendff("%{:02X}%{:02X}%{:02X}", ((code_point >> 12) & 0x0f) | 0xe0, ((code_point >> 6) & 0x3f) | 0x80, (code_point & 0x3f) | 0x80);
380
0
    else if (code_point <= 0x10ffff)
381
0
        builder.appendff("%{:02X}%{:02X}%{:02X}%{:02X}", ((code_point >> 18) & 0x07) | 0xf0, ((code_point >> 12) & 0x3f) | 0x80, ((code_point >> 6) & 0x3f) | 0x80, (code_point & 0x3f) | 0x80);
382
0
    else
383
0
        VERIFY_NOT_REACHED();
384
0
}
385
386
// https://url.spec.whatwg.org/#c0-control-percent-encode-set
387
bool URL::code_point_is_in_percent_encode_set(u32 code_point, URL::PercentEncodeSet set)
388
0
{
389
0
    switch (set) {
390
0
    case URL::PercentEncodeSet::C0Control:
391
0
        return code_point < 0x20 || code_point > 0x7E;
392
0
    case URL::PercentEncodeSet::Fragment:
393
0
        return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::C0Control) || " \"<>`"sv.contains(code_point);
394
0
    case URL::PercentEncodeSet::Query:
395
0
        return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::C0Control) || " \"#<>"sv.contains(code_point);
396
0
    case URL::PercentEncodeSet::SpecialQuery:
397
0
        return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Query) || code_point == '\'';
398
0
    case URL::PercentEncodeSet::Path:
399
0
        return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Query) || "?`{}"sv.contains(code_point);
400
0
    case URL::PercentEncodeSet::Userinfo:
401
0
        return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Path) || "/:;=@[\\]^|"sv.contains(code_point);
402
0
    case URL::PercentEncodeSet::Component:
403
0
        return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Userinfo) || "$%&+,"sv.contains(code_point);
404
0
    case URL::PercentEncodeSet::ApplicationXWWWFormUrlencoded:
405
0
        return code_point >= 0x7E || !(is_ascii_alphanumeric(code_point) || "!'()~"sv.contains(code_point));
406
0
    case URL::PercentEncodeSet::EncodeURI:
407
        // NOTE: This is the same percent encode set that JS encodeURI() uses.
408
        // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
409
0
        return code_point >= 0x7E || (!is_ascii_alphanumeric(code_point) && !";,/?:@&=+$-_.!~*'()#"sv.contains(code_point));
410
0
    default:
411
0
        VERIFY_NOT_REACHED();
412
0
    }
413
0
}
414
415
void URL::append_percent_encoded_if_necessary(StringBuilder& builder, u32 code_point, URL::PercentEncodeSet set)
416
0
{
417
0
    if (code_point_is_in_percent_encode_set(code_point, set))
418
0
        append_percent_encoded(builder, code_point);
419
0
    else
420
0
        builder.append_code_point(code_point);
421
0
}
422
423
String URL::percent_encode(StringView input, URL::PercentEncodeSet set, SpaceAsPlus space_as_plus)
424
0
{
425
0
    StringBuilder builder;
426
0
    for (auto code_point : Utf8View(input)) {
427
0
        if (space_as_plus == SpaceAsPlus::Yes && code_point == ' ')
428
0
            builder.append('+');
429
0
        else
430
0
            append_percent_encoded_if_necessary(builder, code_point, set);
431
0
    }
432
0
    return builder.to_string();
433
0
}
434
435
String URL::percent_decode(StringView input)
436
872
{
437
872
    if (!input.contains('%'))
438
215
        return input;
439
657
    StringBuilder builder;
440
657
    Utf8View utf8_view(input);
441
1.51M
    for (auto it = utf8_view.begin(); !it.done(); ++it) {
442
1.51M
        if (*it != '%') {
443
1.40M
            builder.append_code_point(*it);
444
1.40M
        } else if (!is_ascii_hex_digit(it.peek(1).value_or(0)) || !is_ascii_hex_digit(it.peek(2).value_or(0))) {
445
95.7k
            builder.append_code_point(*it);
446
95.7k
        } else {
447
9.90k
            ++it;
448
9.90k
            u8 byte = parse_ascii_hex_digit(*it) << 4;
449
9.90k
            ++it;
450
9.90k
            byte += parse_ascii_hex_digit(*it);
451
9.90k
            builder.append(byte);
452
9.90k
        }
453
1.51M
    }
454
657
    return builder.to_string();
455
872
}
456
457
}