/src/poco/Foundation/include/Poco/URI.h
Line | Count | Source |
1 | | // |
2 | | // URI.h |
3 | | // |
4 | | // Library: Foundation |
5 | | // Package: URI |
6 | | // Module: URI |
7 | | // |
8 | | // Definition of the URI class. |
9 | | // |
10 | | // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. |
11 | | // and Contributors. |
12 | | // |
13 | | // SPDX-License-Identifier: BSL-1.0 |
14 | | // |
15 | | |
16 | | |
17 | | #ifndef Foundation_URI_INCLUDED |
18 | | #define Foundation_URI_INCLUDED |
19 | | |
20 | | |
21 | | #include "Poco/Foundation.h" |
22 | | #include <vector> |
23 | | #include <utility> |
24 | | |
25 | | |
26 | | namespace Poco { |
27 | | |
28 | | |
29 | | class Path; |
30 | | |
31 | | |
32 | | class Foundation_API URI |
33 | | /// A Uniform Resource Identifier, as specified in RFC 3986. |
34 | | /// |
35 | | /// The URI class provides methods for building URIs from their |
36 | | /// parts, as well as for splitting URIs into their parts. |
37 | | /// Furthermore, the class provides methods for resolving |
38 | | /// relative URIs against base URIs. |
39 | | /// |
40 | | /// The class automatically performs a few normalizations on |
41 | | /// all URIs and URI parts passed to it: |
42 | | /// * scheme identifiers are converted to lower case |
43 | | /// * percent-encoded characters are decoded (except for the query string and fragment string) |
44 | | /// * optionally, dot segments are removed from paths (see normalize()) |
45 | | /// |
46 | | /// Note that dealing with query strings and fragment strings requires some precautions, |
47 | | /// as, internally, query strings and fragment strings are stored in percent-encoded |
48 | | /// form, while all other parts of the URI are stored in decoded form. While parsing |
49 | | /// query strings and fragment strings from properly encoded URLs generally works, |
50 | | /// explicitly setting query strings (fragment strings) with setQuery() (setFragment()) |
51 | | /// or extracting query strings (fragment strings) with getQuery() (getFragment()) may |
52 | | /// lead to ambiguities. See the descriptions of setQuery(), setRawQuery(), getQuery(), |
53 | | /// getRawQuery(), setFragment(), setRawFragment(), getFragment() and getRawFragment() |
54 | | /// for more information. |
55 | | { |
56 | | public: |
57 | | using QueryParameters = std::vector<std::pair<std::string, std::string>>; |
58 | | |
59 | | URI(); |
60 | | /// Creates an empty URI. |
61 | | |
62 | | explicit URI(const std::string& uri); |
63 | | /// Parses an URI from the given string. Throws a |
64 | | /// SyntaxException if the uri is not valid. |
65 | | |
66 | | explicit URI(const char* uri); |
67 | | /// Parses an URI from the given string. Throws a |
68 | | /// SyntaxException if the uri is not valid. |
69 | | |
70 | | URI(const std::string& scheme, const std::string& pathEtc); |
71 | | /// Creates an URI from its parts. |
72 | | |
73 | | URI(const std::string& scheme, const std::string& authority, const std::string& pathEtc); |
74 | | /// Creates an URI from its parts. |
75 | | |
76 | | URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query); |
77 | | /// Creates an URI from its parts. |
78 | | |
79 | | URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query, const std::string& fragment); |
80 | | /// Creates an URI from its parts. |
81 | | |
82 | | URI(const URI& uri); |
83 | | /// Copy constructor. Creates an URI from another one. |
84 | | |
85 | | URI(URI&& uri) noexcept; |
86 | | /// Move constructor. |
87 | | |
88 | | URI(const URI& baseURI, const std::string& relativeURI); |
89 | | /// Creates an URI from a base URI and a relative URI, according to |
90 | | /// the algorithm in section 5.2 of RFC 3986. |
91 | | |
92 | | explicit URI(const Path& path); |
93 | | /// Creates a URI from a path. |
94 | | /// |
95 | | /// The path will be made absolute, and a file:// URI |
96 | | /// will be built from it. |
97 | | |
98 | | ~URI(); |
99 | | /// Destroys the URI. |
100 | | |
101 | | URI& operator = (const URI& uri); |
102 | | /// Assignment operator. |
103 | | |
104 | | URI& operator = (URI&& uri) noexcept; |
105 | | /// Move assignment. |
106 | | |
107 | | URI& operator = (const std::string& uri); |
108 | | /// Parses and assigns an URI from the given string. Throws a |
109 | | /// SyntaxException if the uri is not valid. |
110 | | |
111 | | URI& operator = (const char* uri); |
112 | | /// Parses and assigns an URI from the given string. Throws a |
113 | | /// SyntaxException if the uri is not valid. |
114 | | |
115 | | void swap(URI& uri) noexcept; |
116 | | /// Swaps the URI with another one. |
117 | | |
118 | | void clear(); |
119 | | /// Clears all parts of the URI. |
120 | | |
121 | | std::string toString() const; |
122 | | /// Returns a string representation of the URI. |
123 | | /// |
124 | | /// Characters in the path, query and fragment parts will be |
125 | | /// percent-encoded as necessary. |
126 | | |
127 | | const std::string& getScheme() const; |
128 | | /// Returns the scheme part of the URI. |
129 | | |
130 | | void setScheme(const std::string& scheme); |
131 | | /// Sets the scheme part of the URI. The given scheme |
132 | | /// is converted to lower-case. |
133 | | /// |
134 | | /// A list of registered URI schemes can be found |
135 | | /// at <http://www.iana.org/assignments/uri-schemes>. |
136 | | |
137 | | const std::string& getUserInfo() const; |
138 | | /// Returns the user-info part of the URI. |
139 | | |
140 | | void setUserInfo(const std::string& userInfo); |
141 | | /// Sets the user-info part of the URI. |
142 | | |
143 | | const std::string& getHost() const; |
144 | | /// Returns the host part of the URI. |
145 | | |
146 | | void setHost(const std::string& host); |
147 | | /// Sets the host part of the URI. |
148 | | |
149 | | unsigned short getPort() const; |
150 | | /// Returns the port number part of the URI. |
151 | | /// |
152 | | /// If no port number (0) has been specified, the |
153 | | /// well-known port number (e.g., 80 for http) for |
154 | | /// the given scheme is returned if it is known. |
155 | | /// Otherwise, 0 is returned. |
156 | | |
157 | | void setPort(unsigned short port); |
158 | | /// Sets the port number part of the URI. |
159 | | |
160 | | unsigned short getSpecifiedPort() const; |
161 | | /// Returns the port number part of the URI. |
162 | | /// |
163 | | /// If no explicit port number has been specified, |
164 | | /// returns 0. |
165 | | |
166 | | std::string getAuthority() const; |
167 | | /// Returns the authority part (userInfo, host and port) |
168 | | /// of the URI. |
169 | | /// |
170 | | /// If the port number is a well-known port |
171 | | /// number for the given scheme (e.g., 80 for http), it |
172 | | /// is not included in the authority. |
173 | | |
174 | | void setAuthority(const std::string& authority); |
175 | | /// Parses the given authority part for the URI and sets |
176 | | /// the user-info, host, port components accordingly. |
177 | | |
178 | | const std::string& getPath() const; |
179 | | /// Returns the decoded path part of the URI. |
180 | | |
181 | | void setPath(const std::string& path); |
182 | | /// Sets the path part of the URI. |
183 | | |
184 | | std::string getQuery() const; |
185 | | /// Returns the decoded query part of the URI. |
186 | | /// |
187 | | /// Note that encoded ampersand characters ('&', "%26") |
188 | | /// will be decoded, which could cause ambiguities if the query |
189 | | /// string contains multiple parameters and a parameter name |
190 | | /// or value contains an ampersand as well. |
191 | | /// In such a case it's better to use getRawQuery() or |
192 | | /// getQueryParameters(). |
193 | | |
194 | | void setQuery(const std::string& query); |
195 | | /// Sets the query part of the URI. |
196 | | /// |
197 | | /// The query string will be percent-encoded. If the query |
198 | | /// already contains percent-encoded characters, these |
199 | | /// will be double-encoded, which is probably not what's |
200 | | /// intended by the caller. Furthermore, ampersand ('&') |
201 | | /// characters in the query will not be encoded. This could |
202 | | /// lead to ambiguity issues if the query string contains multiple |
203 | | /// name-value parameters separated by ampersand, and if any |
204 | | /// name or value also contains an ampersand. In such a |
205 | | /// case, it's better to use setRawQuery() with a properly |
206 | | /// percent-encoded query string, or use addQueryParameter() |
207 | | /// or setQueryParameters(), which take care of appropriate |
208 | | /// percent encoding of parameter names and values. |
209 | | |
210 | | void addQueryParameter(const std::string& param, const std::string& val = ""); |
211 | | /// Adds "param=val" to the query; "param" may not be empty. |
212 | | /// If val is empty, only '=' is appended to the parameter. |
213 | | /// |
214 | | /// In addition to regular encoding, function also encodes '&' and '=', |
215 | | /// if found in param or val. |
216 | | |
217 | | const std::string& getRawQuery() const; |
218 | | /// Returns the query string in raw form, which usually |
219 | | /// means percent encoded. |
220 | | |
221 | | void setRawQuery(const std::string& query); |
222 | | /// Sets the query part of the URI. |
223 | | /// |
224 | | /// The given query string must be properly percent-encoded. |
225 | | |
226 | | QueryParameters getQueryParameters(bool plusIsSpace = true) const; |
227 | | /// Returns the decoded query string parameters as a vector |
228 | | /// of name-value pairs. |
229 | | |
230 | | void setQueryParameters(const QueryParameters& params); |
231 | | /// Sets the query part of the URI from a vector |
232 | | /// of query parameters. |
233 | | /// |
234 | | /// Calls addQueryParameter() for each parameter name and value. |
235 | | |
236 | | std::string getFragment() const; |
237 | | /// Returns the fragment part of the URI. |
238 | | |
239 | | void setFragment(const std::string& fragment); |
240 | | /// Sets the fragment part of the URI. |
241 | | |
242 | | std::string getRawFragment() const; |
243 | | /// Returns the fragment part of the URI in raw form. |
244 | | |
245 | | void setRawFragment(const std::string& fragment); |
246 | | /// Sets the fragment part of the URI. |
247 | | /// |
248 | | /// The given fragment string must be properly percent-encoded |
249 | | |
250 | | void setPathEtc(const std::string& pathEtc); |
251 | | /// Sets the path, query and fragment parts of the URI. |
252 | | |
253 | | std::string getPathEtc() const; |
254 | | /// Returns the encoded path, query and fragment parts of the URI. |
255 | | |
256 | | std::string getPathAndQuery() const; |
257 | | /// Returns the encoded path and query parts of the URI. |
258 | | |
259 | | void resolve(const std::string& relativeURI); |
260 | | /// Resolves the given relative URI against the base URI. |
261 | | /// See section 5.2 of RFC 3986 for the algorithm used. |
262 | | |
263 | | void resolve(const URI& relativeURI); |
264 | | /// Resolves the given relative URI against the base URI. |
265 | | /// See section 5.2 of RFC 3986 for the algorithm used. |
266 | | |
267 | | bool isRelative() const; |
268 | | /// Returns true if the URI is a relative reference, false otherwise. |
269 | | /// |
270 | | /// A relative reference does not contain a scheme identifier. |
271 | | /// Relative references are usually resolved against an absolute |
272 | | /// base reference. |
273 | | |
274 | | bool empty() const; |
275 | | /// Returns true if the URI is empty, false otherwise. |
276 | | |
277 | | bool operator == (const URI& uri) const; |
278 | | /// Returns true if both URIs are identical, false otherwise. |
279 | | /// |
280 | | /// Two URIs are identical if their scheme, authority, |
281 | | /// path, query and fragment part are identical. |
282 | | |
283 | | bool operator == (const std::string& uri) const; |
284 | | /// Parses the given URI and returns true if both URIs are identical, |
285 | | /// false otherwise. |
286 | | |
287 | | bool operator != (const URI& uri) const; |
288 | | /// Returns true if both URIs are identical, false otherwise. |
289 | | |
290 | | bool operator != (const std::string& uri) const; |
291 | | /// Parses the given URI and returns true if both URIs are identical, |
292 | | /// false otherwise. |
293 | | |
294 | | void normalize(); |
295 | | /// Normalizes the URI by removing all but leading . and .. segments from the path. |
296 | | /// |
297 | | /// If the first path segment in a relative path contains a colon (:), |
298 | | /// such as in a Windows path containing a drive letter, a dot segment (./) |
299 | | /// is prepended in accordance with section 3.3 of RFC 3986. |
300 | | |
301 | | void getPathSegments(std::vector<std::string>& segments) const; |
302 | | /// Places the single path segments (delimited by slashes) into the |
303 | | /// given vector. |
304 | | |
305 | | static void encode(const std::string& str, const std::string& reserved, std::string& encodedStr); |
306 | | /// URI-encodes the given string by escaping reserved and non-ASCII |
307 | | /// characters. The encoded string is appended to encodedStr. |
308 | | |
309 | | static void decode(const std::string& str, std::string& decodedStr, bool plusAsSpace = false); |
310 | | /// URI-decodes the given string by replacing percent-encoded |
311 | | /// characters with the actual character. The decoded string |
312 | | /// is appended to decodedStr. |
313 | | /// |
314 | | /// When plusAsSpace is true, non-encoded plus signs in the query are decoded as spaces. |
315 | | /// (http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1) |
316 | | |
317 | | protected: |
318 | | bool equals(const URI& uri) const; |
319 | | /// Returns true if both uri's are equivalent. |
320 | | |
321 | | bool isWellKnownPort() const; |
322 | | /// Returns true if the URI's port number is a well-known one |
323 | | /// (for example, 80, if the scheme is http). |
324 | | |
325 | | unsigned short getWellKnownPort() const; |
326 | | /// Returns the well-known port number for the URI's scheme, |
327 | | /// or 0 if the port number is not known. |
328 | | |
329 | | void parse(const std::string& uri); |
330 | | /// Parses and assigns an URI from the given string. Throws a |
331 | | /// SyntaxException if the uri is not valid. |
332 | | |
333 | | void parseAuthority(std::string::const_iterator& it, const std::string::const_iterator& end); |
334 | | /// Parses and sets the user-info, host and port from the given data. |
335 | | |
336 | | void parseHostAndPort(std::string::const_iterator& it, const std::string::const_iterator& end); |
337 | | /// Parses and sets the host and port from the given data. |
338 | | |
339 | | void parsePath(std::string::const_iterator& it, const std::string::const_iterator& end); |
340 | | /// Parses and sets the path from the given data. |
341 | | |
342 | | void parsePathEtc(std::string::const_iterator& it, const std::string::const_iterator& end); |
343 | | /// Parses and sets the path, query and fragment from the given data. |
344 | | |
345 | | void parseQuery(std::string::const_iterator& it, const std::string::const_iterator& end); |
346 | | /// Parses and sets the query from the given data. |
347 | | |
348 | | void parseFragment(std::string::const_iterator& it, const std::string::const_iterator& end); |
349 | | /// Parses and sets the fragment from the given data. |
350 | | |
351 | | void mergePath(const std::string& path); |
352 | | /// Appends a path to the URI's path. |
353 | | |
354 | | void removeDotSegments(bool removeLeading = true); |
355 | | /// Removes all dot segments from the path. |
356 | | |
357 | | static void getPathSegments(const std::string& path, std::vector<std::string>& segments); |
358 | | /// Places the single path segments (delimited by slashes) into the |
359 | | /// given vector. |
360 | | |
361 | | void buildPath(const std::vector<std::string>& segments, bool leadingSlash, bool trailingSlash); |
362 | | /// Builds the path from the given segments. |
363 | | |
364 | | static const std::string RESERVED_PATH; |
365 | | static const std::string RESERVED_QUERY; |
366 | | static const std::string RESERVED_QUERY_PARAM; |
367 | | static const std::string RESERVED_FRAGMENT; |
368 | | static const std::string ILLEGAL; |
369 | | |
370 | | private: |
371 | | std::string _scheme; |
372 | | std::string _userInfo; |
373 | | std::string _host; |
374 | | unsigned short _port; |
375 | | std::string _path; |
376 | | std::string _query; |
377 | | std::string _fragment; |
378 | | }; |
379 | | |
380 | | |
381 | | // |
382 | | // inlines |
383 | | // |
384 | | inline const std::string& URI::getScheme() const |
385 | 112k | { |
386 | 112k | return _scheme; |
387 | 112k | } |
388 | | |
389 | | |
390 | | inline const std::string& URI::getUserInfo() const |
391 | 8.27k | { |
392 | 8.27k | return _userInfo; |
393 | 8.27k | } |
394 | | |
395 | | |
396 | | inline const std::string& URI::getHost() const |
397 | 36.2k | { |
398 | 36.2k | return _host; |
399 | 36.2k | } |
400 | | |
401 | | |
402 | | inline const std::string& URI::getPath() const |
403 | 28.2k | { |
404 | 28.2k | return _path; |
405 | 28.2k | } |
406 | | |
407 | | |
408 | | inline const std::string& URI::getRawQuery() const |
409 | 8.00k | { |
410 | 8.00k | return _query; |
411 | 8.00k | } |
412 | | |
413 | | |
414 | | inline std::string URI::getRawFragment() const |
415 | 0 | { |
416 | 0 | return _fragment; |
417 | 0 | } |
418 | | |
419 | | |
420 | | inline unsigned short URI::getSpecifiedPort() const |
421 | 0 | { |
422 | 0 | return _port; |
423 | 0 | } |
424 | | |
425 | | |
426 | | inline void swap(URI& u1, URI& u2) noexcept |
427 | 0 | { |
428 | 0 | u1.swap(u2); |
429 | 0 | } |
430 | | |
431 | | |
432 | | } // namespace Poco |
433 | | |
434 | | |
435 | | #endif // Foundation_URI_INCLUDED |