/src/php-src/ext/uri/php_uriparser.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | +----------------------------------------------------------------------+ |
3 | | | Copyright (c) The PHP Group | |
4 | | +----------------------------------------------------------------------+ |
5 | | | This source file is subject to version 3.01 of the PHP license, | |
6 | | | that is bundled with this package in the file LICENSE, and is | |
7 | | | available through the world-wide-web at the following url: | |
8 | | | https://www.php.net/license/3_01.txt | |
9 | | | If you did not receive a copy of the PHP license and are unable to | |
10 | | | obtain it through the world-wide-web, please send a note to | |
11 | | | license@php.net so we can mail you a copy immediately. | |
12 | | +----------------------------------------------------------------------+ |
13 | | | Authors: Máté Kocsis <kocsismate@php.net> | |
14 | | +----------------------------------------------------------------------+ |
15 | | */ |
16 | | |
17 | | #include "php.h" |
18 | | #include "php_uriparser.h" |
19 | | #include "php_uri_common.h" |
20 | | #include "Zend/zend_smart_str.h" |
21 | | #include "Zend/zend_exceptions.h" |
22 | | |
23 | | static void uriparser_free_uri(void *uri); |
24 | | |
25 | | static void *uriparser_malloc(UriMemoryManager *memory_manager, size_t size) |
26 | 0 | { |
27 | 0 | return emalloc(size); |
28 | 0 | } |
29 | | |
30 | | static void *uriparser_calloc(UriMemoryManager *memory_manager, size_t nmemb, size_t size) |
31 | 0 | { |
32 | 0 | return ecalloc(nmemb, size); |
33 | 0 | } |
34 | | |
35 | | static void *uriparser_realloc(UriMemoryManager *memory_manager, void *ptr, size_t size) |
36 | 0 | { |
37 | 0 | return erealloc(ptr, size); |
38 | 0 | } |
39 | | |
40 | | static void *uriparser_reallocarray(UriMemoryManager *memory_manager, void *ptr, size_t nmemb, size_t size) |
41 | 0 | { |
42 | 0 | return safe_erealloc(ptr, nmemb, size, 0); |
43 | 0 | } |
44 | | |
45 | | static void uriparser_free(UriMemoryManager *memory_manager, void *ptr) |
46 | 0 | { |
47 | 0 | efree(ptr); |
48 | 0 | } |
49 | | |
50 | | static const UriMemoryManager uriparser_mm = { |
51 | | .malloc = uriparser_malloc, |
52 | | .calloc = uriparser_calloc, |
53 | | .realloc = uriparser_realloc, |
54 | | .reallocarray = uriparser_reallocarray, |
55 | | .free = uriparser_free, |
56 | | .userData = NULL, |
57 | | }; |
58 | | |
59 | | /* The library expects a pointer to a non-const UriMemoryManager, but does |
60 | | * not actually modify it (and neither does our implementation). Use a |
61 | | * const struct with a non-const pointer for convenience. */ |
62 | | static UriMemoryManager* const mm = (UriMemoryManager*)&uriparser_mm; |
63 | | |
64 | | static inline size_t get_text_range_length(const UriTextRangeA *range) |
65 | 0 | { |
66 | 0 | return range->afterLast - range->first; |
67 | 0 | } |
68 | | |
69 | | ZEND_ATTRIBUTE_NONNULL static void uriparser_copy_uri(UriUriA *new_uriparser_uri, const UriUriA *uriparser_uri) |
70 | 0 | { |
71 | 0 | int result = uriCopyUriMmA(new_uriparser_uri, uriparser_uri, mm); |
72 | 0 | ZEND_ASSERT(result == URI_SUCCESS); |
73 | 0 | } |
74 | | |
75 | 0 | ZEND_ATTRIBUTE_NONNULL static UriUriA *get_normalized_uri(uriparser_uris_t *uriparser_uris) { |
76 | 0 | if (!uriparser_uris->normalized_uri_initialized) { |
77 | 0 | uriparser_copy_uri(&uriparser_uris->normalized_uri, &uriparser_uris->uri); |
78 | 0 | int result = uriNormalizeSyntaxExMmA(&uriparser_uris->normalized_uri, (unsigned int)-1, mm); |
79 | 0 | ZEND_ASSERT(result == URI_SUCCESS); |
80 | 0 | uriparser_uris->normalized_uri_initialized = true; |
81 | 0 | } |
82 | | |
83 | 0 | return &uriparser_uris->normalized_uri; |
84 | 0 | } |
85 | | |
86 | | ZEND_ATTRIBUTE_NONNULL static UriUriA *uriparser_read_uri(uriparser_uris_t *uriparser_uris, uri_component_read_mode_t read_mode) |
87 | 0 | { |
88 | 0 | switch (read_mode) { |
89 | 0 | case URI_COMPONENT_READ_RAW: |
90 | 0 | return &uriparser_uris->uri; |
91 | 0 | case URI_COMPONENT_READ_NORMALIZED_ASCII: |
92 | 0 | ZEND_FALLTHROUGH; |
93 | 0 | case URI_COMPONENT_READ_NORMALIZED_UNICODE: |
94 | 0 | return get_normalized_uri(uriparser_uris); |
95 | 0 | EMPTY_SWITCH_DEFAULT_CASE() |
96 | 0 | } |
97 | 0 | } |
98 | | |
99 | | ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_scheme(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval) |
100 | 0 | { |
101 | 0 | UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode); |
102 | 0 | ZEND_ASSERT(uriparser_uri != NULL); |
103 | | |
104 | 0 | if (uriparser_uri->scheme.first != NULL && uriparser_uri->scheme.afterLast != NULL) { |
105 | 0 | zend_string *str = zend_string_init(uriparser_uri->scheme.first, get_text_range_length(&uriparser_uri->scheme), false); |
106 | 0 | ZVAL_NEW_STR(retval, str); |
107 | 0 | } else { |
108 | 0 | ZVAL_NULL(retval); |
109 | 0 | } |
110 | |
|
111 | 0 | return SUCCESS; |
112 | 0 | } |
113 | | |
114 | | ZEND_ATTRIBUTE_NONNULL zend_result uriparser_read_userinfo(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval) |
115 | 0 | { |
116 | 0 | UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode); |
117 | 0 | ZEND_ASSERT(uriparser_uri != NULL); |
118 | | |
119 | 0 | if (uriparser_uri->userInfo.first != NULL && uriparser_uri->userInfo.afterLast != NULL) { |
120 | 0 | ZVAL_STRINGL(retval, uriparser_uri->userInfo.first, get_text_range_length(&uriparser_uri->userInfo)); |
121 | 0 | } else { |
122 | 0 | ZVAL_NULL(retval); |
123 | 0 | } |
124 | |
|
125 | 0 | return SUCCESS; |
126 | 0 | } |
127 | | |
128 | | ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_username(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval) |
129 | 0 | { |
130 | 0 | UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode); |
131 | 0 | ZEND_ASSERT(uriparser_uri != NULL); |
132 | | |
133 | 0 | if (uriparser_uri->userInfo.first != NULL && uriparser_uri->userInfo.afterLast != NULL) { |
134 | 0 | size_t length = get_text_range_length(&uriparser_uri->userInfo); |
135 | 0 | const char *c = memchr(uriparser_uri->userInfo.first, ':', length); |
136 | |
|
137 | 0 | if (c == NULL && length > 0) { |
138 | 0 | ZVAL_STRINGL(retval, uriparser_uri->userInfo.first, length); |
139 | 0 | } else if (c != NULL && c - uriparser_uri->userInfo.first > 0) { |
140 | 0 | ZVAL_STRINGL(retval, uriparser_uri->userInfo.first, c - uriparser_uri->userInfo.first); |
141 | 0 | } else { |
142 | 0 | ZVAL_NULL(retval); |
143 | 0 | } |
144 | 0 | } else { |
145 | 0 | ZVAL_NULL(retval); |
146 | 0 | } |
147 | |
|
148 | 0 | return SUCCESS; |
149 | 0 | } |
150 | | |
151 | | ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_password(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval) |
152 | 0 | { |
153 | 0 | UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode); |
154 | 0 | ZEND_ASSERT(uriparser_uri != NULL); |
155 | | |
156 | 0 | if (uriparser_uri->userInfo.first != NULL && uriparser_uri->userInfo.afterLast != NULL) { |
157 | 0 | const char *c = memchr(uriparser_uri->userInfo.first, ':', get_text_range_length(&uriparser_uri->userInfo)); |
158 | |
|
159 | 0 | if (c != NULL && uriparser_uri->userInfo.afterLast - c - 1 > 0) { |
160 | 0 | ZVAL_STRINGL(retval, c + 1, uriparser_uri->userInfo.afterLast - c - 1); |
161 | 0 | } else { |
162 | 0 | ZVAL_NULL(retval); |
163 | 0 | } |
164 | 0 | } else { |
165 | 0 | ZVAL_NULL(retval); |
166 | 0 | } |
167 | |
|
168 | 0 | return SUCCESS; |
169 | 0 | } |
170 | | |
171 | | ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_host(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval) |
172 | 0 | { |
173 | 0 | UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode); |
174 | 0 | ZEND_ASSERT(uriparser_uri != NULL); |
175 | | |
176 | 0 | if (uriparser_uri->hostText.first != NULL && uriparser_uri->hostText.afterLast != NULL) { |
177 | 0 | if (uriparser_uri->hostData.ip6 != NULL || uriparser_uri->hostData.ipFuture.first != NULL) { |
178 | | /* the textual representation of the host is always accessible in the .hostText field no matter what the host is */ |
179 | 0 | smart_str host_str = {0}; |
180 | |
|
181 | 0 | smart_str_appendc(&host_str, '['); |
182 | 0 | smart_str_appendl(&host_str, uriparser_uri->hostText.first, get_text_range_length(&uriparser_uri->hostText)); |
183 | 0 | smart_str_appendc(&host_str, ']'); |
184 | |
|
185 | 0 | ZVAL_NEW_STR(retval, smart_str_extract(&host_str)); |
186 | 0 | } else { |
187 | 0 | ZVAL_STRINGL(retval, uriparser_uri->hostText.first, get_text_range_length(&uriparser_uri->hostText)); |
188 | 0 | } |
189 | 0 | } else { |
190 | 0 | ZVAL_NULL(retval); |
191 | 0 | } |
192 | |
|
193 | 0 | return SUCCESS; |
194 | 0 | } |
195 | | |
196 | | ZEND_ATTRIBUTE_NONNULL static size_t str_to_int(const char *str, size_t len) |
197 | 0 | { |
198 | 0 | size_t result = 0; |
199 | |
|
200 | 0 | for (size_t i = 0; i < len; ++i) { |
201 | 0 | result = result * 10 + (str[i] - '0'); |
202 | 0 | } |
203 | |
|
204 | 0 | return result; |
205 | 0 | } |
206 | | |
207 | | ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_port(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval) |
208 | 0 | { |
209 | 0 | UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode); |
210 | 0 | ZEND_ASSERT(uriparser_uri != NULL); |
211 | | |
212 | 0 | if (uriparser_uri->portText.first != NULL && uriparser_uri->portText.afterLast != NULL) { |
213 | 0 | ZVAL_LONG(retval, str_to_int(uriparser_uri->portText.first, get_text_range_length(&uriparser_uri->portText))); |
214 | 0 | } else { |
215 | 0 | ZVAL_NULL(retval); |
216 | 0 | } |
217 | |
|
218 | 0 | return SUCCESS; |
219 | 0 | } |
220 | | |
221 | | ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_path(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval) |
222 | 0 | { |
223 | 0 | UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode); |
224 | 0 | ZEND_ASSERT(uriparser_uri != NULL); |
225 | | |
226 | 0 | if (uriparser_uri->pathHead != NULL) { |
227 | 0 | smart_str str = {0}; |
228 | |
|
229 | 0 | if (uriparser_uri->absolutePath || uriHasHostA(uriparser_uri)) { |
230 | 0 | smart_str_appendc(&str, '/'); |
231 | 0 | } |
232 | |
|
233 | 0 | for (const UriPathSegmentA *p = uriparser_uri->pathHead; p; p = p->next) { |
234 | 0 | smart_str_appendl(&str, p->text.first, get_text_range_length(&p->text)); |
235 | 0 | if (p->next) { |
236 | 0 | smart_str_appendc(&str, '/'); |
237 | 0 | } |
238 | 0 | } |
239 | |
|
240 | 0 | ZVAL_NEW_STR(retval, smart_str_extract(&str)); |
241 | 0 | } else if (uriparser_uri->absolutePath) { |
242 | 0 | ZVAL_CHAR(retval, '/'); |
243 | 0 | } else { |
244 | 0 | ZVAL_EMPTY_STRING(retval); |
245 | 0 | } |
246 | |
|
247 | 0 | return SUCCESS; |
248 | 0 | } |
249 | | |
250 | | ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_query(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval) |
251 | 0 | { |
252 | 0 | UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode); |
253 | 0 | ZEND_ASSERT(uriparser_uri != NULL); |
254 | | |
255 | 0 | if (uriparser_uri->query.first != NULL && uriparser_uri->query.afterLast != NULL) { |
256 | 0 | ZVAL_STRINGL(retval, uriparser_uri->query.first, get_text_range_length(&uriparser_uri->query)); |
257 | 0 | } else { |
258 | 0 | ZVAL_NULL(retval); |
259 | 0 | } |
260 | |
|
261 | 0 | return SUCCESS; |
262 | 0 | } |
263 | | |
264 | | ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_fragment(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval) |
265 | 0 | { |
266 | 0 | UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode); |
267 | 0 | ZEND_ASSERT(uriparser_uri != NULL); |
268 | | |
269 | 0 | if (uriparser_uri->fragment.first != NULL && uriparser_uri->fragment.afterLast != NULL) { |
270 | 0 | ZVAL_STRINGL(retval, uriparser_uri->fragment.first, get_text_range_length(&uriparser_uri->fragment)); |
271 | 0 | } else { |
272 | 0 | ZVAL_NULL(retval); |
273 | 0 | } |
274 | |
|
275 | 0 | return SUCCESS; |
276 | 0 | } |
277 | | |
278 | | PHP_MINIT_FUNCTION(uri_uriparser) |
279 | 16 | { |
280 | 16 | if (uri_handler_register(&uriparser_uri_handler) == FAILURE) { |
281 | 0 | return FAILURE; |
282 | 0 | } |
283 | | |
284 | 16 | return SUCCESS; |
285 | 16 | } |
286 | | |
287 | | static uriparser_uris_t *uriparser_create_uris(void) |
288 | 0 | { |
289 | 0 | uriparser_uris_t *uriparser_uris = ecalloc(1, sizeof(*uriparser_uris)); |
290 | 0 | uriparser_uris->normalized_uri_initialized = false; |
291 | |
|
292 | 0 | return uriparser_uris; |
293 | 0 | } |
294 | | |
295 | | void *uriparser_parse_uri_ex(const zend_string *uri_str, const uriparser_uris_t *uriparser_base_urls, bool silent) |
296 | 0 | { |
297 | 0 | UriUriA uri = {0}; |
298 | | |
299 | | /* Parse the URI. */ |
300 | 0 | if (uriParseSingleUriExMmA(&uri, ZSTR_VAL(uri_str), ZSTR_VAL(uri_str) + ZSTR_LEN(uri_str), NULL, mm) != URI_SUCCESS) { |
301 | 0 | if (!silent) { |
302 | 0 | zend_throw_exception(uri_invalid_uri_exception_ce, "The specified URI is malformed", 0); |
303 | 0 | } |
304 | |
|
305 | 0 | goto fail; |
306 | 0 | } |
307 | | |
308 | 0 | if (uriparser_base_urls != NULL) { |
309 | 0 | UriUriA tmp = {0}; |
310 | | |
311 | | /* Combine the parsed URI with the base URI and store the result in 'tmp', |
312 | | * since the target and source URLs must be distinct. */ |
313 | 0 | int result = uriAddBaseUriExMmA(&tmp, &uri, &uriparser_base_urls->uri, URI_RESOLVE_STRICTLY, mm); |
314 | 0 | if (result != URI_SUCCESS) { |
315 | 0 | if (!silent) { |
316 | 0 | switch (result) { |
317 | 0 | case URI_ERROR_ADDBASE_REL_BASE: |
318 | 0 | zend_throw_exception(uri_invalid_uri_exception_ce, "The specified base URI must be absolute", 0); |
319 | 0 | break; |
320 | 0 | default: |
321 | | /* This should be unreachable in practice. */ |
322 | 0 | zend_throw_exception(uri_invalid_uri_exception_ce, "Failed to resolve the specified URI against the base URI", 0); |
323 | 0 | break; |
324 | 0 | } |
325 | 0 | } |
326 | | |
327 | 0 | goto fail; |
328 | 0 | } |
329 | | |
330 | | /* Store the combined URI back into 'uri'. */ |
331 | 0 | uriFreeUriMembersMmA(&uri, mm); |
332 | 0 | uri = tmp; |
333 | 0 | } |
334 | | |
335 | | /* Make the resulting URI independent of the 'uri_str'. */ |
336 | 0 | uriMakeOwnerMmA(&uri, mm); |
337 | |
|
338 | 0 | uriparser_uris_t *uriparser_uris = uriparser_create_uris(); |
339 | 0 | uriparser_uris->uri = uri; |
340 | |
|
341 | 0 | return uriparser_uris; |
342 | | |
343 | 0 | fail: |
344 | |
|
345 | 0 | uriFreeUriMembersMmA(&uri, mm); |
346 | |
|
347 | 0 | return NULL; |
348 | 0 | } |
349 | | |
350 | | void *uriparser_parse_uri(const zend_string *uri_str, const void *base_url, zval *errors, bool silent) |
351 | 0 | { |
352 | 0 | return uriparser_parse_uri_ex(uri_str, base_url, silent); |
353 | 0 | } |
354 | | |
355 | | /* TODO make the clone handler accept a flag to distinguish between clone() calls and withers. |
356 | | * When calling a wither successfully, the normalized URI is surely invalidated, therefore |
357 | | * it doesn't make sense to copy it. In case of failure, an exception is thrown, and the URI object |
358 | | * is discarded altogether. */ |
359 | | ZEND_ATTRIBUTE_NONNULL static void *uriparser_clone_uri(void *uri) |
360 | 0 | { |
361 | 0 | uriparser_uris_t *uriparser_uris = uri; |
362 | |
|
363 | 0 | uriparser_uris_t *new_uriparser_uris = uriparser_create_uris(); |
364 | 0 | uriparser_copy_uri(&new_uriparser_uris->uri, &uriparser_uris->uri); |
365 | 0 | if (uriparser_uris->normalized_uri_initialized) { |
366 | 0 | uriparser_copy_uri(&new_uriparser_uris->normalized_uri, &uriparser_uris->normalized_uri); |
367 | 0 | new_uriparser_uris->normalized_uri_initialized = true; |
368 | 0 | } |
369 | |
|
370 | 0 | return new_uriparser_uris; |
371 | 0 | } |
372 | | |
373 | | ZEND_ATTRIBUTE_NONNULL static zend_string *uriparser_uri_to_string(void *uri, uri_recomposition_mode_t recomposition_mode, bool exclude_fragment) |
374 | 0 | { |
375 | 0 | uriparser_uris_t *uriparser_uris = uri; |
376 | 0 | UriUriA *uriparser_uri; |
377 | |
|
378 | 0 | if (recomposition_mode == URI_RECOMPOSITION_RAW_ASCII || recomposition_mode == URI_RECOMPOSITION_RAW_UNICODE) { |
379 | 0 | uriparser_uri = &uriparser_uris->uri; |
380 | 0 | } else { |
381 | 0 | uriparser_uri = get_normalized_uri(uriparser_uris); |
382 | 0 | } |
383 | |
|
384 | 0 | int charsRequired = 0; |
385 | 0 | int result = uriToStringCharsRequiredA(uriparser_uri, &charsRequired); |
386 | 0 | ZEND_ASSERT(result == URI_SUCCESS); |
387 | | |
388 | 0 | charsRequired++; |
389 | |
|
390 | 0 | zend_string *uri_string = zend_string_alloc(charsRequired - 1, false); |
391 | 0 | result = uriToStringA(ZSTR_VAL(uri_string), uriparser_uri, charsRequired, NULL); |
392 | 0 | ZEND_ASSERT(result == URI_SUCCESS); |
393 | | |
394 | 0 | if (exclude_fragment) { |
395 | 0 | const char *pos = zend_memrchr(ZSTR_VAL(uri_string), '#', ZSTR_LEN(uri_string)); |
396 | 0 | if (pos != NULL) { |
397 | 0 | uri_string = zend_string_truncate(uri_string, (pos - ZSTR_VAL(uri_string)), false); |
398 | 0 | } |
399 | 0 | } |
400 | |
|
401 | 0 | return uri_string; |
402 | 0 | } |
403 | | |
404 | | ZEND_ATTRIBUTE_NONNULL static void uriparser_free_uri(void *uri) |
405 | 0 | { |
406 | 0 | uriparser_uris_t *uriparser_uris = uri; |
407 | |
|
408 | 0 | uriFreeUriMembersMmA(&uriparser_uris->uri, mm); |
409 | 0 | uriFreeUriMembersMmA(&uriparser_uris->normalized_uri, mm); |
410 | |
|
411 | 0 | efree(uriparser_uris); |
412 | 0 | } |
413 | | |
414 | | const uri_handler_t uriparser_uri_handler = { |
415 | | .name = URI_PARSER_RFC3986, |
416 | | .parse_uri = uriparser_parse_uri, |
417 | | .clone_uri = uriparser_clone_uri, |
418 | | .uri_to_string = uriparser_uri_to_string, |
419 | | .free_uri = uriparser_free_uri, |
420 | | { |
421 | | .scheme = {.read_func = uriparser_read_scheme, .write_func = NULL}, |
422 | | .username = {.read_func = uriparser_read_username, .write_func = NULL}, |
423 | | .password = {.read_func = uriparser_read_password, .write_func = NULL}, |
424 | | .host = {.read_func = uriparser_read_host, .write_func = NULL}, |
425 | | .port = {.read_func = uriparser_read_port, .write_func = NULL}, |
426 | | .path = {.read_func = uriparser_read_path, .write_func = NULL}, |
427 | | .query = {.read_func = uriparser_read_query, .write_func = NULL}, |
428 | | .fragment = {.read_func = uriparser_read_fragment, .write_func = NULL}, |
429 | | } |
430 | | }; |