Coverage Report

Created: 2025-07-23 06:33

/src/php-src/ext/uri/php_uriparser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
   +----------------------------------------------------------------------+
3
   | Copyright (c) The PHP Group                                          |
4
   +----------------------------------------------------------------------+
5
   | This source file is subject to version 3.01 of the PHP license,      |
6
   | that is bundled with this package in the file LICENSE, and is        |
7
   | available through the world-wide-web at the following url:           |
8
   | https://www.php.net/license/3_01.txt                                 |
9
   | If you did not receive a copy of the PHP license and are unable to   |
10
   | obtain it through the world-wide-web, please send a note to          |
11
   | license@php.net so we can mail you a copy immediately.               |
12
   +----------------------------------------------------------------------+
13
   | Authors: Máté Kocsis <kocsismate@php.net>                            |
14
   +----------------------------------------------------------------------+
15
*/
16
17
#include "php.h"
18
#include "php_uriparser.h"
19
#include "php_uri_common.h"
20
#include "Zend/zend_smart_str.h"
21
#include "Zend/zend_exceptions.h"
22
23
static void uriparser_free_uri(void *uri);
24
25
static void *uriparser_malloc(UriMemoryManager *memory_manager, size_t size)
26
0
{
27
0
  return emalloc(size);
28
0
}
29
30
static void *uriparser_calloc(UriMemoryManager *memory_manager, size_t nmemb, size_t size)
31
0
{
32
0
  return ecalloc(nmemb, size);
33
0
}
34
35
static void *uriparser_realloc(UriMemoryManager *memory_manager, void *ptr, size_t size)
36
0
{
37
0
  return erealloc(ptr, size);
38
0
}
39
40
static void *uriparser_reallocarray(UriMemoryManager *memory_manager, void *ptr, size_t nmemb, size_t size)
41
0
{
42
0
  return safe_erealloc(ptr, nmemb, size, 0);
43
0
}
44
45
static void uriparser_free(UriMemoryManager *memory_manager, void *ptr)
46
0
{
47
0
  efree(ptr);
48
0
}
49
50
static const UriMemoryManager uriparser_mm = {
51
  .malloc = uriparser_malloc,
52
  .calloc = uriparser_calloc,
53
  .realloc = uriparser_realloc,
54
  .reallocarray = uriparser_reallocarray,
55
  .free = uriparser_free,
56
  .userData = NULL,
57
};
58
59
/* The library expects a pointer to a non-const UriMemoryManager, but does
60
 * not actually modify it (and neither does our implementation). Use a
61
 * const struct with a non-const pointer for convenience. */
62
static UriMemoryManager* const mm = (UriMemoryManager*)&uriparser_mm;
63
64
static inline size_t get_text_range_length(const UriTextRangeA *range)
65
0
{
66
0
  return range->afterLast - range->first;
67
0
}
68
69
ZEND_ATTRIBUTE_NONNULL static void uriparser_copy_uri(UriUriA *new_uriparser_uri, const UriUriA *uriparser_uri)
70
0
{
71
0
  int result = uriCopyUriMmA(new_uriparser_uri, uriparser_uri, mm);
72
0
  ZEND_ASSERT(result == URI_SUCCESS);
73
0
}
74
75
0
ZEND_ATTRIBUTE_NONNULL static UriUriA *get_normalized_uri(uriparser_uris_t *uriparser_uris) {
76
0
  if (!uriparser_uris->normalized_uri_initialized) {
77
0
    uriparser_copy_uri(&uriparser_uris->normalized_uri, &uriparser_uris->uri);
78
0
    int result = uriNormalizeSyntaxExMmA(&uriparser_uris->normalized_uri, (unsigned int)-1, mm);
79
0
    ZEND_ASSERT(result == URI_SUCCESS);
80
0
    uriparser_uris->normalized_uri_initialized = true;
81
0
  }
82
83
0
  return &uriparser_uris->normalized_uri;
84
0
}
85
86
ZEND_ATTRIBUTE_NONNULL static UriUriA *uriparser_read_uri(uriparser_uris_t *uriparser_uris, uri_component_read_mode_t read_mode)
87
0
{
88
0
  switch (read_mode) {
89
0
    case URI_COMPONENT_READ_RAW:
90
0
      return &uriparser_uris->uri;
91
0
    case URI_COMPONENT_READ_NORMALIZED_ASCII:
92
0
      ZEND_FALLTHROUGH;
93
0
    case URI_COMPONENT_READ_NORMALIZED_UNICODE:
94
0
      return get_normalized_uri(uriparser_uris);
95
0
    EMPTY_SWITCH_DEFAULT_CASE()
96
0
  }
97
0
}
98
99
ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_scheme(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
100
0
{
101
0
  UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
102
0
  ZEND_ASSERT(uriparser_uri != NULL);
103
104
0
  if (uriparser_uri->scheme.first != NULL && uriparser_uri->scheme.afterLast != NULL) {
105
0
    zend_string *str = zend_string_init(uriparser_uri->scheme.first, get_text_range_length(&uriparser_uri->scheme), false);
106
0
    ZVAL_NEW_STR(retval, str);
107
0
  } else {
108
0
    ZVAL_NULL(retval);
109
0
  }
110
111
0
  return SUCCESS;
112
0
}
113
114
ZEND_ATTRIBUTE_NONNULL zend_result uriparser_read_userinfo(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
115
0
{
116
0
  UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
117
0
  ZEND_ASSERT(uriparser_uri != NULL);
118
119
0
  if (uriparser_uri->userInfo.first != NULL && uriparser_uri->userInfo.afterLast != NULL) {
120
0
    ZVAL_STRINGL(retval, uriparser_uri->userInfo.first, get_text_range_length(&uriparser_uri->userInfo));
121
0
  } else {
122
0
    ZVAL_NULL(retval);
123
0
  }
124
125
0
  return SUCCESS;
126
0
}
127
128
ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_username(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
129
0
{
130
0
  UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
131
0
  ZEND_ASSERT(uriparser_uri != NULL);
132
133
0
  if (uriparser_uri->userInfo.first != NULL && uriparser_uri->userInfo.afterLast != NULL) {
134
0
    size_t length = get_text_range_length(&uriparser_uri->userInfo);
135
0
    const char *c = memchr(uriparser_uri->userInfo.first, ':', length);
136
137
0
    if (c == NULL && length > 0) {
138
0
      ZVAL_STRINGL(retval, uriparser_uri->userInfo.first, length);
139
0
    } else if (c != NULL && c - uriparser_uri->userInfo.first > 0) {
140
0
      ZVAL_STRINGL(retval, uriparser_uri->userInfo.first, c - uriparser_uri->userInfo.first);
141
0
    } else {
142
0
      ZVAL_NULL(retval);
143
0
    }
144
0
  } else {
145
0
    ZVAL_NULL(retval);
146
0
  }
147
148
0
  return SUCCESS;
149
0
}
150
151
ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_password(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
152
0
{
153
0
  UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
154
0
  ZEND_ASSERT(uriparser_uri != NULL);
155
156
0
  if (uriparser_uri->userInfo.first != NULL && uriparser_uri->userInfo.afterLast != NULL) {
157
0
    const char *c = memchr(uriparser_uri->userInfo.first, ':', get_text_range_length(&uriparser_uri->userInfo));
158
159
0
    if (c != NULL && uriparser_uri->userInfo.afterLast - c - 1 > 0) {
160
0
      ZVAL_STRINGL(retval, c + 1, uriparser_uri->userInfo.afterLast - c - 1);
161
0
    } else {
162
0
      ZVAL_NULL(retval);
163
0
    }
164
0
  } else {
165
0
    ZVAL_NULL(retval);
166
0
  }
167
168
0
  return SUCCESS;
169
0
}
170
171
ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_host(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
172
0
{
173
0
  UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
174
0
  ZEND_ASSERT(uriparser_uri != NULL);
175
176
0
  if (uriparser_uri->hostText.first != NULL && uriparser_uri->hostText.afterLast != NULL) {
177
0
    if (uriparser_uri->hostData.ip6 != NULL || uriparser_uri->hostData.ipFuture.first != NULL) {
178
      /* the textual representation of the host is always accessible in the .hostText field no matter what the host is */
179
0
      smart_str host_str = {0};
180
181
0
      smart_str_appendc(&host_str, '[');
182
0
      smart_str_appendl(&host_str, uriparser_uri->hostText.first, get_text_range_length(&uriparser_uri->hostText));
183
0
      smart_str_appendc(&host_str, ']');
184
185
0
      ZVAL_NEW_STR(retval, smart_str_extract(&host_str));
186
0
    } else {
187
0
      ZVAL_STRINGL(retval, uriparser_uri->hostText.first, get_text_range_length(&uriparser_uri->hostText));
188
0
    }
189
0
  } else {
190
0
    ZVAL_NULL(retval);
191
0
  }
192
193
0
  return SUCCESS;
194
0
}
195
196
ZEND_ATTRIBUTE_NONNULL static size_t str_to_int(const char *str, size_t len)
197
0
{
198
0
  size_t result = 0;
199
200
0
  for (size_t i = 0; i < len; ++i) {
201
0
    result = result * 10 + (str[i] - '0');
202
0
  }
203
204
0
  return result;
205
0
}
206
207
ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_port(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
208
0
{
209
0
  UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
210
0
  ZEND_ASSERT(uriparser_uri != NULL);
211
212
0
  if (uriparser_uri->portText.first != NULL && uriparser_uri->portText.afterLast != NULL) {
213
0
    ZVAL_LONG(retval, str_to_int(uriparser_uri->portText.first, get_text_range_length(&uriparser_uri->portText)));
214
0
  } else {
215
0
    ZVAL_NULL(retval);
216
0
  }
217
218
0
  return SUCCESS;
219
0
}
220
221
ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_path(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
222
0
{
223
0
  UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
224
0
  ZEND_ASSERT(uriparser_uri != NULL);
225
226
0
  if (uriparser_uri->pathHead != NULL) {
227
0
    smart_str str = {0};
228
229
0
    if (uriparser_uri->absolutePath || uriHasHostA(uriparser_uri)) {
230
0
      smart_str_appendc(&str, '/');
231
0
    }
232
233
0
    for (const UriPathSegmentA *p = uriparser_uri->pathHead; p; p = p->next) {
234
0
      smart_str_appendl(&str, p->text.first, get_text_range_length(&p->text));
235
0
      if (p->next) {
236
0
        smart_str_appendc(&str, '/');
237
0
      }
238
0
    }
239
240
0
    ZVAL_NEW_STR(retval, smart_str_extract(&str));
241
0
  } else if (uriparser_uri->absolutePath) {
242
0
    ZVAL_CHAR(retval, '/');
243
0
  } else {
244
0
    ZVAL_EMPTY_STRING(retval);
245
0
  }
246
247
0
  return SUCCESS;
248
0
}
249
250
ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_query(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
251
0
{
252
0
  UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
253
0
  ZEND_ASSERT(uriparser_uri != NULL);
254
255
0
  if (uriparser_uri->query.first != NULL && uriparser_uri->query.afterLast != NULL) {
256
0
    ZVAL_STRINGL(retval, uriparser_uri->query.first, get_text_range_length(&uriparser_uri->query));
257
0
  } else {
258
0
    ZVAL_NULL(retval);
259
0
  }
260
261
0
  return SUCCESS;
262
0
}
263
264
ZEND_ATTRIBUTE_NONNULL static zend_result uriparser_read_fragment(const uri_internal_t *internal_uri, uri_component_read_mode_t read_mode, zval *retval)
265
0
{
266
0
  UriUriA *uriparser_uri = uriparser_read_uri(internal_uri->uri, read_mode);
267
0
  ZEND_ASSERT(uriparser_uri != NULL);
268
269
0
  if (uriparser_uri->fragment.first != NULL && uriparser_uri->fragment.afterLast != NULL) {
270
0
    ZVAL_STRINGL(retval, uriparser_uri->fragment.first, get_text_range_length(&uriparser_uri->fragment));
271
0
  } else {
272
0
    ZVAL_NULL(retval);
273
0
  }
274
275
0
  return SUCCESS;
276
0
}
277
278
PHP_MINIT_FUNCTION(uri_uriparser)
279
16
{
280
16
  if (uri_handler_register(&uriparser_uri_handler) == FAILURE) {
281
0
    return FAILURE;
282
0
  }
283
284
16
  return SUCCESS;
285
16
}
286
287
static uriparser_uris_t *uriparser_create_uris(void)
288
0
{
289
0
  uriparser_uris_t *uriparser_uris = ecalloc(1, sizeof(*uriparser_uris));
290
0
  uriparser_uris->normalized_uri_initialized = false;
291
292
0
  return uriparser_uris;
293
0
}
294
295
void *uriparser_parse_uri_ex(const zend_string *uri_str, const uriparser_uris_t *uriparser_base_urls, bool silent)
296
0
{
297
0
  UriUriA uri = {0};
298
299
  /* Parse the URI. */
300
0
  if (uriParseSingleUriExMmA(&uri, ZSTR_VAL(uri_str), ZSTR_VAL(uri_str) + ZSTR_LEN(uri_str), NULL, mm) != URI_SUCCESS) {
301
0
    if (!silent) {
302
0
      zend_throw_exception(uri_invalid_uri_exception_ce, "The specified URI is malformed", 0);
303
0
    }
304
305
0
    goto fail;
306
0
  }
307
308
0
  if (uriparser_base_urls != NULL) {
309
0
    UriUriA tmp = {0};
310
311
    /* Combine the parsed URI with the base URI and store the result in 'tmp',
312
     * since the target and source URLs must be distinct. */
313
0
    int result = uriAddBaseUriExMmA(&tmp, &uri, &uriparser_base_urls->uri, URI_RESOLVE_STRICTLY, mm);
314
0
    if (result != URI_SUCCESS) {
315
0
      if (!silent) {
316
0
        switch (result) {
317
0
          case URI_ERROR_ADDBASE_REL_BASE:
318
0
            zend_throw_exception(uri_invalid_uri_exception_ce, "The specified base URI must be absolute", 0);
319
0
            break;
320
0
          default:
321
            /* This should be unreachable in practice. */
322
0
            zend_throw_exception(uri_invalid_uri_exception_ce, "Failed to resolve the specified URI against the base URI", 0);
323
0
            break;
324
0
        }
325
0
      }
326
327
0
      goto fail;
328
0
    }
329
330
    /* Store the combined URI back into 'uri'. */
331
0
    uriFreeUriMembersMmA(&uri, mm);
332
0
    uri = tmp;
333
0
  }
334
335
  /* Make the resulting URI independent of the 'uri_str'. */
336
0
  uriMakeOwnerMmA(&uri, mm);
337
338
0
  uriparser_uris_t *uriparser_uris = uriparser_create_uris();
339
0
  uriparser_uris->uri = uri;
340
341
0
  return uriparser_uris;
342
343
0
 fail:
344
345
0
  uriFreeUriMembersMmA(&uri, mm);
346
347
0
  return NULL;
348
0
}
349
350
void *uriparser_parse_uri(const zend_string *uri_str, const void *base_url, zval *errors, bool silent)
351
0
{
352
0
  return uriparser_parse_uri_ex(uri_str, base_url, silent);
353
0
}
354
355
/* TODO make the clone handler accept a flag to distinguish between clone() calls and withers.
356
 * When calling a wither successfully, the normalized URI is surely invalidated, therefore
357
 * it doesn't make sense to copy it. In case of failure, an exception is thrown, and the URI object
358
 * is discarded altogether. */
359
ZEND_ATTRIBUTE_NONNULL static void *uriparser_clone_uri(void *uri)
360
0
{
361
0
  uriparser_uris_t *uriparser_uris = uri;
362
363
0
  uriparser_uris_t *new_uriparser_uris = uriparser_create_uris();
364
0
  uriparser_copy_uri(&new_uriparser_uris->uri, &uriparser_uris->uri);
365
0
  if (uriparser_uris->normalized_uri_initialized) {
366
0
    uriparser_copy_uri(&new_uriparser_uris->normalized_uri, &uriparser_uris->normalized_uri);
367
0
    new_uriparser_uris->normalized_uri_initialized = true;
368
0
  }
369
370
0
  return new_uriparser_uris;
371
0
}
372
373
ZEND_ATTRIBUTE_NONNULL static zend_string *uriparser_uri_to_string(void *uri, uri_recomposition_mode_t recomposition_mode, bool exclude_fragment)
374
0
{
375
0
  uriparser_uris_t *uriparser_uris = uri;
376
0
  UriUriA *uriparser_uri;
377
378
0
  if (recomposition_mode == URI_RECOMPOSITION_RAW_ASCII || recomposition_mode == URI_RECOMPOSITION_RAW_UNICODE) {
379
0
    uriparser_uri = &uriparser_uris->uri;
380
0
  } else {
381
0
    uriparser_uri = get_normalized_uri(uriparser_uris);
382
0
  }
383
384
0
  int charsRequired = 0;
385
0
  int result = uriToStringCharsRequiredA(uriparser_uri, &charsRequired);
386
0
  ZEND_ASSERT(result == URI_SUCCESS);
387
388
0
  charsRequired++;
389
390
0
  zend_string *uri_string = zend_string_alloc(charsRequired - 1, false);
391
0
  result = uriToStringA(ZSTR_VAL(uri_string), uriparser_uri, charsRequired, NULL);
392
0
  ZEND_ASSERT(result == URI_SUCCESS);
393
394
0
  if (exclude_fragment) {
395
0
    const char *pos = zend_memrchr(ZSTR_VAL(uri_string), '#', ZSTR_LEN(uri_string));
396
0
    if (pos != NULL) {
397
0
      uri_string = zend_string_truncate(uri_string, (pos - ZSTR_VAL(uri_string)), false);
398
0
    }
399
0
  }
400
401
0
  return uri_string;
402
0
}
403
404
ZEND_ATTRIBUTE_NONNULL static void uriparser_free_uri(void *uri)
405
0
{
406
0
  uriparser_uris_t *uriparser_uris = uri;
407
408
0
  uriFreeUriMembersMmA(&uriparser_uris->uri, mm);
409
0
  uriFreeUriMembersMmA(&uriparser_uris->normalized_uri, mm);
410
411
0
  efree(uriparser_uris);
412
0
}
413
414
const uri_handler_t uriparser_uri_handler = {
415
  .name = URI_PARSER_RFC3986,
416
  .parse_uri = uriparser_parse_uri,
417
  .clone_uri = uriparser_clone_uri,
418
  .uri_to_string = uriparser_uri_to_string,
419
  .free_uri = uriparser_free_uri,
420
  {
421
    .scheme = {.read_func = uriparser_read_scheme, .write_func = NULL},
422
    .username = {.read_func = uriparser_read_username, .write_func = NULL},
423
    .password = {.read_func = uriparser_read_password, .write_func = NULL},
424
    .host = {.read_func = uriparser_read_host, .write_func = NULL},
425
    .port = {.read_func = uriparser_read_port, .write_func = NULL},
426
    .path = {.read_func = uriparser_read_path, .write_func = NULL},
427
    .query = {.read_func = uriparser_read_query, .write_func = NULL},
428
    .fragment = {.read_func = uriparser_read_fragment, .write_func = NULL},
429
  }
430
};