/src/serenity/Userland/Libraries/LibURL/URL.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org> |
3 | | * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch> |
4 | | * Copyright (c) 2023-2024, Shannon Booth <shannon@serenityos.org> |
5 | | * |
6 | | * SPDX-License-Identifier: BSD-2-Clause |
7 | | */ |
8 | | |
9 | | #pragma once |
10 | | |
11 | | #include <AK/ByteString.h> |
12 | | #include <AK/CopyOnWrite.h> |
13 | | #include <AK/String.h> |
14 | | #include <AK/StringView.h> |
15 | | #include <AK/Vector.h> |
16 | | #include <LibURL/Host.h> |
17 | | #include <LibURL/Origin.h> |
18 | | |
19 | | // On Linux distros that use mlibc `basename` is defined as a macro that expands to `__mlibc_gnu_basename` or `__mlibc_gnu_basename_c`, so we undefine it. |
20 | | #if defined(AK_OS_LINUX) && defined(basename) |
21 | | # undef basename |
22 | | #endif |
23 | | |
24 | | namespace URL { |
25 | | |
26 | | enum class PercentEncodeSet { |
27 | | C0Control, |
28 | | Fragment, |
29 | | Query, |
30 | | SpecialQuery, |
31 | | Path, |
32 | | Userinfo, |
33 | | Component, |
34 | | ApplicationXWWWFormUrlencoded, |
35 | | EncodeURI |
36 | | }; |
37 | | |
38 | | enum class ExcludeFragment { |
39 | | No, |
40 | | Yes |
41 | | }; |
42 | | |
43 | | // https://w3c.github.io/FileAPI/#blob-url-entry |
44 | | // NOTE: This represents the raw bytes behind a 'Blob' (and does not yet support a MediaSourceQuery). |
45 | | struct BlobURLEntry { |
46 | | String type; |
47 | | ByteBuffer byte_buffer; |
48 | | Origin environment_origin; |
49 | | }; |
50 | | |
51 | | void append_percent_encoded_if_necessary(StringBuilder&, u32 code_point, PercentEncodeSet set = PercentEncodeSet::Userinfo); |
52 | | void append_percent_encoded(StringBuilder&, u32 code_point); |
53 | | bool code_point_is_in_percent_encode_set(u32 code_point, PercentEncodeSet); |
54 | | Optional<u16> default_port_for_scheme(StringView); |
55 | | bool is_special_scheme(StringView); |
56 | | |
57 | | enum class SpaceAsPlus { |
58 | | No, |
59 | | Yes, |
60 | | }; |
61 | | String percent_encode(StringView input, PercentEncodeSet set = PercentEncodeSet::Userinfo, SpaceAsPlus = SpaceAsPlus::No); |
62 | | ByteString percent_decode(StringView input); |
63 | | |
64 | | // https://url.spec.whatwg.org/#url-representation |
65 | | // A URL is a struct that represents a universal identifier. To disambiguate from a valid URL string it can also be referred to as a URL record. |
66 | | class URL { |
67 | | friend class Parser; |
68 | | |
69 | | public: |
70 | 41.7k | URL() = default; |
71 | | URL(StringView); |
72 | | URL(ByteString const& string) |
73 | 0 | : URL(string.view()) |
74 | 0 | { |
75 | 0 | } |
76 | | URL(String const& string) |
77 | | : URL(string.bytes_as_string_view()) |
78 | 0 | { |
79 | 0 | } |
80 | | |
81 | 15.4k | bool is_valid() const { return m_data->valid; } |
82 | | |
83 | 10.4k | String const& scheme() const { return m_data->scheme; } |
84 | 0 | String const& username() const { return m_data->username; } |
85 | 0 | String const& password() const { return m_data->password; } |
86 | 0 | Host const& host() const { return m_data->host; } |
87 | | ErrorOr<String> serialized_host() const; |
88 | | ByteString basename() const; |
89 | 0 | Optional<String> const& query() const { return m_data->query; } |
90 | 0 | Optional<String> const& fragment() const { return m_data->fragment; } |
91 | 0 | Optional<u16> port() const { return m_data->port; } |
92 | | ByteString path_segment_at_index(size_t index) const; |
93 | 0 | size_t path_segment_count() const { return m_data->paths.size(); } |
94 | | |
95 | 0 | u16 port_or_default() const { return m_data->port.value_or(default_port_for_scheme(m_data->scheme).value_or(0)); } |
96 | 18.5k | bool cannot_be_a_base_url() const { return m_data->cannot_be_a_base_url; } |
97 | | bool cannot_have_a_username_or_password_or_port() const; |
98 | | |
99 | 0 | bool includes_credentials() const { return !m_data->username.is_empty() || !m_data->password.is_empty(); } |
100 | 81.0M | bool is_special() const { return is_special_scheme(m_data->scheme); } |
101 | | |
102 | | void set_scheme(String); |
103 | | void set_username(StringView); |
104 | | void set_password(StringView); |
105 | | void set_host(Host); |
106 | | void set_port(Optional<u16>); |
107 | | void set_paths(Vector<ByteString> const&); |
108 | 0 | Vector<String> const& paths() const { return m_data->paths; } |
109 | 239 | void set_query(Optional<String> query) { m_data->query = move(query); } |
110 | 0 | void set_fragment(Optional<String> fragment) { m_data->fragment = move(fragment); } |
111 | 1.13k | void set_cannot_be_a_base_url(bool value) { m_data->cannot_be_a_base_url = value; } |
112 | | void append_path(StringView); |
113 | | void append_slash() |
114 | 1.96k | { |
115 | | // NOTE: To indicate that we want to end the path with a slash, we have to append an empty path segment. |
116 | 1.96k | m_data->paths.append(String {}); |
117 | 1.96k | } |
118 | | |
119 | | String serialize_path() const; |
120 | | ByteString serialize(ExcludeFragment = ExcludeFragment::No) const; |
121 | | ByteString serialize_for_display() const; |
122 | 14.4k | ByteString to_byte_string() const { return serialize(); } |
123 | | ErrorOr<String> to_string() const; |
124 | | |
125 | | Origin origin() const; |
126 | | |
127 | | bool equals(URL const& other, ExcludeFragment = ExcludeFragment::No) const; |
128 | | |
129 | | URL complete_url(StringView) const; |
130 | | |
131 | | [[nodiscard]] bool operator==(URL const& other) const |
132 | 0 | { |
133 | 0 | if (m_data.ptr() == other.m_data.ptr()) |
134 | 0 | return true; |
135 | 0 | return equals(other, ExcludeFragment::No); |
136 | 0 | } |
137 | | |
138 | 0 | Optional<BlobURLEntry> const& blob_url_entry() const { return m_data->blob_url_entry; } |
139 | 0 | void set_blob_url_entry(Optional<BlobURLEntry> entry) { m_data->blob_url_entry = move(entry); } |
140 | | |
141 | | private: |
142 | | bool compute_validity() const; |
143 | | |
144 | | struct Data : public RefCounted<Data> { |
145 | | NonnullRefPtr<Data> clone() |
146 | 0 | { |
147 | 0 | auto clone = adopt_ref(*new Data); |
148 | 0 | clone->valid = valid; |
149 | 0 | clone->scheme = scheme; |
150 | 0 | clone->username = username; |
151 | 0 | clone->password = password; |
152 | 0 | clone->host = host; |
153 | 0 | clone->port = port; |
154 | 0 | clone->paths = paths; |
155 | 0 | clone->query = query; |
156 | 0 | clone->fragment = fragment; |
157 | 0 | clone->cannot_be_a_base_url = cannot_be_a_base_url; |
158 | 0 | clone->blob_url_entry = blob_url_entry; |
159 | 0 | return clone; |
160 | 0 | } |
161 | | |
162 | | bool valid { false }; |
163 | | |
164 | | // A URL’s scheme is an ASCII string that identifies the type of URL and can be used to dispatch a URL for further processing after parsing. It is initially the empty string. |
165 | | String scheme; |
166 | | |
167 | | // A URL’s username is an ASCII string identifying a username. It is initially the empty string. |
168 | | String username; |
169 | | |
170 | | // A URL’s password is an ASCII string identifying a password. It is initially the empty string. |
171 | | String password; |
172 | | |
173 | | // A URL’s host is null or a host. It is initially null. |
174 | | Host host; |
175 | | |
176 | | // A URL’s port is either null or a 16-bit unsigned integer that identifies a networking port. It is initially null. |
177 | | Optional<u16> port; |
178 | | |
179 | | // A URL’s path is either a URL path segment or a list of zero or more URL path segments, usually identifying a location. It is initially « ». |
180 | | // A URL path segment is an ASCII string. It commonly refers to a directory or a file, but has no predefined meaning. |
181 | | Vector<String> paths; |
182 | | |
183 | | // A URL’s query is either null or an ASCII string. It is initially null. |
184 | | Optional<String> query; |
185 | | |
186 | | // A URL’s fragment is either null or an ASCII string that can be used for further processing on the resource the URL’s other components identify. It is initially null. |
187 | | Optional<String> fragment; |
188 | | |
189 | | bool cannot_be_a_base_url { false }; |
190 | | |
191 | | // https://url.spec.whatwg.org/#concept-url-blob-entry |
192 | | // A URL also has an associated blob URL entry that is either null or a blob URL entry. It is initially null. |
193 | | Optional<BlobURLEntry> blob_url_entry; |
194 | | }; |
195 | | AK::CopyOnWrite<Data> m_data; |
196 | | }; |
197 | | |
198 | | URL create_with_url_or_path(ByteString const&); |
199 | | URL create_with_file_scheme(ByteString const& path, ByteString const& fragment = {}, ByteString const& hostname = {}); |
200 | | URL create_with_help_scheme(ByteString const& path, ByteString const& fragment = {}, ByteString const& hostname = {}); |
201 | | URL create_with_data(StringView mime_type, StringView payload, bool is_base64 = false); |
202 | | |
203 | | } |
204 | | |
205 | | template<> |
206 | | struct AK::Formatter<URL::URL> : AK::Formatter<StringView> { |
207 | | ErrorOr<void> format(FormatBuilder& builder, URL::URL const& value) |
208 | 0 | { |
209 | 0 | return Formatter<StringView>::format(builder, value.serialize()); |
210 | 0 | } |
211 | | }; |
212 | | |
213 | | template<> |
214 | | struct AK::Traits<URL::URL> : public AK::DefaultTraits<URL::URL> { |
215 | 0 | static unsigned hash(URL::URL const& url) { return url.to_byte_string().hash(); } |
216 | | }; |