/src/php-src/ext/standard/url.c
Line | Count | Source |
1 | | /* |
2 | | +----------------------------------------------------------------------+ |
3 | | | Copyright © The PHP Group and Contributors. | |
4 | | +----------------------------------------------------------------------+ |
5 | | | This source file is subject to the Modified BSD License that is | |
6 | | | bundled with this package in the file LICENSE, and is available | |
7 | | | through the World Wide Web at <https://www.php.net/license/>. | |
8 | | | | |
9 | | | SPDX-License-Identifier: BSD-3-Clause | |
10 | | +----------------------------------------------------------------------+ |
11 | | | Author: Jim Winstead <jimw@php.net> | |
12 | | +----------------------------------------------------------------------+ |
13 | | */ |
14 | | |
15 | | #include <stdlib.h> |
16 | | #include <string.h> |
17 | | #include <ctype.h> |
18 | | #include <sys/types.h> |
19 | | |
20 | | #include "php.h" |
21 | | |
22 | | #include "url.h" |
23 | | #include "file.h" |
24 | | #include "zend_simd.h" |
25 | | #include "Zend/zend_smart_str.h" |
26 | | |
27 | | /* {{{ free_url */ |
28 | | PHPAPI void php_url_free(php_url *theurl) |
29 | 0 | { |
30 | 0 | if (theurl->scheme) |
31 | 0 | zend_string_release_ex(theurl->scheme, 0); |
32 | 0 | if (theurl->user) |
33 | 0 | zend_string_release_ex(theurl->user, 0); |
34 | 0 | if (theurl->pass) |
35 | 0 | zend_string_release_ex(theurl->pass, 0); |
36 | 0 | if (theurl->host) |
37 | 0 | zend_string_release_ex(theurl->host, 0); |
38 | 0 | if (theurl->path) |
39 | 0 | zend_string_release_ex(theurl->path, 0); |
40 | 0 | if (theurl->query) |
41 | 0 | zend_string_release_ex(theurl->query, 0); |
42 | 0 | if (theurl->fragment) |
43 | 0 | zend_string_release_ex(theurl->fragment, 0); |
44 | 0 | efree(theurl); |
45 | 0 | } |
46 | | /* }}} */ |
47 | | |
48 | | static void php_replace_controlchars(char *str, size_t len) |
49 | 0 | { |
50 | 0 | unsigned char *s = (unsigned char *)str; |
51 | 0 | unsigned char *e = (unsigned char *)str + len; |
52 | |
|
53 | 0 | ZEND_ASSERT(str != NULL); |
54 | |
|
55 | 0 | while (s < e) { |
56 | 0 | if (iscntrl(*s)) { |
57 | 0 | *s='_'; |
58 | 0 | } |
59 | 0 | s++; |
60 | 0 | } |
61 | 0 | } |
62 | | |
63 | | PHPAPI php_url *php_url_parse(char const *str) |
64 | 0 | { |
65 | 0 | return php_url_parse_ex(str, strlen(str)); |
66 | 0 | } |
67 | | |
68 | 0 | static const char *binary_strcspn(const char *s, const char *e, const char *chars) { |
69 | 0 | while (*chars) { |
70 | 0 | const char *p = memchr(s, *chars, e - s); |
71 | 0 | if (p) { |
72 | 0 | e = p; |
73 | 0 | } |
74 | 0 | chars++; |
75 | 0 | } |
76 | 0 | return e; |
77 | 0 | } |
78 | | |
79 | | /* {{{ php_url_parse */ |
80 | | PHPAPI php_url *php_url_parse_ex(char const *str, size_t length) |
81 | 0 | { |
82 | 0 | bool has_port; |
83 | 0 | return php_url_parse_ex2(str, length, &has_port); |
84 | 0 | } |
85 | | |
86 | | /* {{{ php_url_parse_ex2 |
87 | | */ |
88 | | PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port) |
89 | 0 | { |
90 | 0 | char port_buf[6]; |
91 | 0 | php_url *ret = ecalloc(1, sizeof(php_url)); |
92 | 0 | char const *s, *e, *p, *pp, *ue; |
93 | |
|
94 | 0 | *has_port = false; |
95 | 0 | s = str; |
96 | 0 | ue = s + length; |
97 | | |
98 | | /* parse scheme */ |
99 | 0 | if ((e = memchr(s, ':', length)) && e != s) { |
100 | | /* validate scheme */ |
101 | 0 | p = s; |
102 | 0 | while (p < e) { |
103 | | /* scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] */ |
104 | 0 | if (!isalpha((unsigned char)*p) && !isdigit((unsigned char)*p) && *p != '+' && *p != '.' && *p != '-') { |
105 | 0 | if (e + 1 < ue && e < binary_strcspn(s, ue, "?#")) { |
106 | 0 | goto parse_port; |
107 | 0 | } else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ |
108 | 0 | s += 2; |
109 | 0 | e = 0; |
110 | 0 | goto parse_host; |
111 | 0 | } else { |
112 | 0 | goto just_path; |
113 | 0 | } |
114 | 0 | } |
115 | 0 | p++; |
116 | 0 | } |
117 | | |
118 | 0 | if (e + 1 == ue) { /* only scheme is available */ |
119 | 0 | ret->scheme = zend_string_init(s, (e - s), 0); |
120 | 0 | php_replace_controlchars(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme)); |
121 | 0 | return ret; |
122 | 0 | } |
123 | | |
124 | | /* |
125 | | * certain schemas like mailto: and zlib: may not have any / after them |
126 | | * this check ensures we support those. |
127 | | */ |
128 | 0 | if (*(e+1) != '/') { |
129 | | /* check if the data we get is a port this allows us to |
130 | | * correctly parse things like a.com:80 |
131 | | */ |
132 | 0 | p = e + 1; |
133 | 0 | while (p < ue && isdigit((unsigned char)*p)) { |
134 | 0 | p++; |
135 | 0 | } |
136 | |
|
137 | 0 | if ((p == ue || *p == '/') && (p - e) < 7) { |
138 | 0 | goto parse_port; |
139 | 0 | } |
140 | | |
141 | 0 | ret->scheme = zend_string_init(s, (e-s), 0); |
142 | 0 | php_replace_controlchars(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme)); |
143 | |
|
144 | 0 | s = e + 1; |
145 | 0 | goto just_path; |
146 | 0 | } else { |
147 | 0 | ret->scheme = zend_string_init(s, (e-s), 0); |
148 | 0 | php_replace_controlchars(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme)); |
149 | |
|
150 | 0 | if (e + 2 < ue && *(e + 2) == '/') { |
151 | 0 | s = e + 3; |
152 | 0 | if (zend_string_equals_literal_ci(ret->scheme, "file")) { |
153 | 0 | if (e + 3 < ue && *(e + 3) == '/') { |
154 | | /* support windows drive letters as in: |
155 | | file:///c:/somedir/file.txt |
156 | | */ |
157 | 0 | if (e + 5 < ue && *(e + 5) == ':') { |
158 | 0 | s = e + 4; |
159 | 0 | } |
160 | 0 | goto just_path; |
161 | 0 | } |
162 | 0 | } |
163 | 0 | } else { |
164 | 0 | s = e + 1; |
165 | 0 | goto just_path; |
166 | 0 | } |
167 | 0 | } |
168 | 0 | } else if (e) { /* no scheme; starts with colon: look for port */ |
169 | 0 | parse_port: |
170 | 0 | p = e + 1; |
171 | 0 | pp = p; |
172 | |
|
173 | 0 | while (pp < ue && pp - p < 6 && isdigit((unsigned char)*pp)) { |
174 | 0 | pp++; |
175 | 0 | } |
176 | |
|
177 | 0 | if (pp - p > 0 && pp - p < 6 && (pp == ue || *pp == '/')) { |
178 | 0 | zend_long port; |
179 | 0 | char *end; |
180 | 0 | memcpy(port_buf, p, (pp - p)); |
181 | 0 | port_buf[pp - p] = '\0'; |
182 | 0 | port = ZEND_STRTOL(port_buf, &end, 10); |
183 | 0 | if (port >= 0 && port <= 65535 && end != port_buf) { |
184 | 0 | *has_port = true; |
185 | 0 | ret->port = (unsigned short) port; |
186 | 0 | if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ |
187 | 0 | s += 2; |
188 | 0 | } |
189 | 0 | } else { |
190 | 0 | php_url_free(ret); |
191 | 0 | return NULL; |
192 | 0 | } |
193 | 0 | } else if (p == pp && pp == ue) { |
194 | 0 | php_url_free(ret); |
195 | 0 | return NULL; |
196 | 0 | } else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ |
197 | 0 | s += 2; |
198 | 0 | } else { |
199 | 0 | goto just_path; |
200 | 0 | } |
201 | 0 | } else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ |
202 | 0 | s += 2; |
203 | 0 | } else { |
204 | 0 | goto just_path; |
205 | 0 | } |
206 | | |
207 | 0 | parse_host: |
208 | 0 | e = binary_strcspn(s, ue, "/?#"); |
209 | | |
210 | | /* check for login and password */ |
211 | 0 | if ((p = zend_memrchr(s, '@', (e-s)))) { |
212 | 0 | if ((pp = memchr(s, ':', (p-s)))) { |
213 | 0 | ret->user = zend_string_init(s, (pp-s), 0); |
214 | 0 | php_replace_controlchars(ZSTR_VAL(ret->user), ZSTR_LEN(ret->user)); |
215 | |
|
216 | 0 | pp++; |
217 | 0 | ret->pass = zend_string_init(pp, (p-pp), 0); |
218 | 0 | php_replace_controlchars(ZSTR_VAL(ret->pass), ZSTR_LEN(ret->pass)); |
219 | 0 | } else { |
220 | 0 | ret->user = zend_string_init(s, (p-s), 0); |
221 | 0 | php_replace_controlchars(ZSTR_VAL(ret->user), ZSTR_LEN(ret->user)); |
222 | 0 | } |
223 | |
|
224 | 0 | s = p + 1; |
225 | 0 | } |
226 | | |
227 | | /* check for port */ |
228 | 0 | if (s < ue && *s == '[' && *(e-1) == ']') { |
229 | | /* Short circuit portscan, |
230 | | we're dealing with an |
231 | | IPv6 embedded address */ |
232 | 0 | p = NULL; |
233 | 0 | } else { |
234 | 0 | p = zend_memrchr(s, ':', (e-s)); |
235 | 0 | } |
236 | |
|
237 | 0 | if (p) { |
238 | 0 | if (!ret->port) { |
239 | 0 | p++; |
240 | 0 | if (e-p > 5) { /* port cannot be longer then 5 characters */ |
241 | 0 | php_url_free(ret); |
242 | 0 | return NULL; |
243 | 0 | } else if (e - p > 0) { |
244 | 0 | zend_long port; |
245 | 0 | char *end; |
246 | 0 | memcpy(port_buf, p, (e - p)); |
247 | 0 | port_buf[e - p] = '\0'; |
248 | 0 | port = ZEND_STRTOL(port_buf, &end, 10); |
249 | 0 | if (port >= 0 && port <= 65535 && end != port_buf) { |
250 | 0 | *has_port = true; |
251 | 0 | ret->port = (unsigned short)port; |
252 | 0 | } else { |
253 | 0 | php_url_free(ret); |
254 | 0 | return NULL; |
255 | 0 | } |
256 | 0 | } |
257 | 0 | p--; |
258 | 0 | } |
259 | 0 | } else { |
260 | 0 | p = e; |
261 | 0 | } |
262 | | |
263 | | /* check if we have a valid host, if we don't reject the string as url */ |
264 | 0 | if ((p-s) < 1) { |
265 | 0 | php_url_free(ret); |
266 | 0 | return NULL; |
267 | 0 | } |
268 | | |
269 | 0 | ret->host = zend_string_init(s, (p-s), 0); |
270 | 0 | php_replace_controlchars(ZSTR_VAL(ret->host), ZSTR_LEN(ret->host)); |
271 | |
|
272 | 0 | if (e == ue) { |
273 | 0 | return ret; |
274 | 0 | } |
275 | | |
276 | 0 | s = e; |
277 | |
|
278 | 0 | just_path: |
279 | |
|
280 | 0 | e = ue; |
281 | 0 | p = memchr(s, '#', (e - s)); |
282 | 0 | if (p) { |
283 | 0 | p++; |
284 | 0 | if (p < e) { |
285 | 0 | ret->fragment = zend_string_init(p, (e - p), 0); |
286 | 0 | php_replace_controlchars(ZSTR_VAL(ret->fragment), ZSTR_LEN(ret->fragment)); |
287 | 0 | } else { |
288 | 0 | ret->fragment = ZSTR_EMPTY_ALLOC(); |
289 | 0 | } |
290 | 0 | e = p-1; |
291 | 0 | } |
292 | |
|
293 | 0 | p = memchr(s, '?', (e - s)); |
294 | 0 | if (p) { |
295 | 0 | p++; |
296 | 0 | if (p < e) { |
297 | 0 | ret->query = zend_string_init(p, (e - p), 0); |
298 | 0 | php_replace_controlchars(ZSTR_VAL(ret->query), ZSTR_LEN(ret->query)); |
299 | 0 | } else { |
300 | 0 | ret->query = ZSTR_EMPTY_ALLOC(); |
301 | 0 | } |
302 | 0 | e = p-1; |
303 | 0 | } |
304 | |
|
305 | 0 | if (s < e || s == ue) { |
306 | 0 | ret->path = zend_string_init(s, (e - s), 0); |
307 | 0 | php_replace_controlchars(ZSTR_VAL(ret->path), ZSTR_LEN(ret->path)); |
308 | 0 | } |
309 | |
|
310 | 0 | return ret; |
311 | 0 | } |
312 | | /* }}} */ |
313 | | |
314 | | /* {{{ Parse a URL and return its components */ |
315 | | PHP_FUNCTION(parse_url) |
316 | 0 | { |
317 | 0 | char *str; |
318 | 0 | size_t str_len; |
319 | 0 | php_url *resource; |
320 | 0 | zend_long key = -1; |
321 | 0 | zval tmp; |
322 | 0 | bool has_port; |
323 | |
|
324 | 0 | ZEND_PARSE_PARAMETERS_START(1, 2) |
325 | 0 | Z_PARAM_STRING(str, str_len) |
326 | 0 | Z_PARAM_OPTIONAL |
327 | 0 | Z_PARAM_LONG(key) |
328 | 0 | ZEND_PARSE_PARAMETERS_END(); |
329 | | |
330 | 0 | resource = php_url_parse_ex2(str, str_len, &has_port); |
331 | 0 | if (resource == NULL) { |
332 | | /* @todo Find a method to determine why php_url_parse_ex() failed */ |
333 | 0 | RETURN_FALSE; |
334 | 0 | } |
335 | | |
336 | 0 | if (key > -1) { |
337 | 0 | switch (key) { |
338 | 0 | case PHP_URL_SCHEME: |
339 | 0 | if (resource->scheme != NULL) RETVAL_STR_COPY(resource->scheme); |
340 | 0 | break; |
341 | 0 | case PHP_URL_HOST: |
342 | 0 | if (resource->host != NULL) RETVAL_STR_COPY(resource->host); |
343 | 0 | break; |
344 | 0 | case PHP_URL_PORT: |
345 | 0 | if (has_port) RETVAL_LONG(resource->port); |
346 | 0 | break; |
347 | 0 | case PHP_URL_USER: |
348 | 0 | if (resource->user != NULL) RETVAL_STR_COPY(resource->user); |
349 | 0 | break; |
350 | 0 | case PHP_URL_PASS: |
351 | 0 | if (resource->pass != NULL) RETVAL_STR_COPY(resource->pass); |
352 | 0 | break; |
353 | 0 | case PHP_URL_PATH: |
354 | 0 | if (resource->path != NULL) RETVAL_STR_COPY(resource->path); |
355 | 0 | break; |
356 | 0 | case PHP_URL_QUERY: |
357 | 0 | if (resource->query != NULL) RETVAL_STR_COPY(resource->query); |
358 | 0 | break; |
359 | 0 | case PHP_URL_FRAGMENT: |
360 | 0 | if (resource->fragment != NULL) RETVAL_STR_COPY(resource->fragment); |
361 | 0 | break; |
362 | 0 | default: |
363 | 0 | zend_argument_value_error(2, "must be a valid URL component identifier, " ZEND_LONG_FMT " given", key); |
364 | 0 | break; |
365 | 0 | } |
366 | 0 | goto done; |
367 | 0 | } |
368 | | |
369 | | /* allocate an array for return */ |
370 | 0 | array_init(return_value); |
371 | | |
372 | | /* add the various elements to the array */ |
373 | 0 | if (resource->scheme != NULL) { |
374 | 0 | ZVAL_STR_COPY(&tmp, resource->scheme); |
375 | 0 | zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_SCHEME), &tmp); |
376 | 0 | } |
377 | 0 | if (resource->host != NULL) { |
378 | 0 | ZVAL_STR_COPY(&tmp, resource->host); |
379 | 0 | zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_HOST), &tmp); |
380 | 0 | } |
381 | 0 | if (has_port) { |
382 | 0 | ZVAL_LONG(&tmp, resource->port); |
383 | 0 | zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_PORT), &tmp); |
384 | 0 | } |
385 | 0 | if (resource->user != NULL) { |
386 | 0 | ZVAL_STR_COPY(&tmp, resource->user); |
387 | 0 | zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_USER), &tmp); |
388 | 0 | } |
389 | 0 | if (resource->pass != NULL) { |
390 | 0 | ZVAL_STR_COPY(&tmp, resource->pass); |
391 | 0 | zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_PASS), &tmp); |
392 | 0 | } |
393 | 0 | if (resource->path != NULL) { |
394 | 0 | ZVAL_STR_COPY(&tmp, resource->path); |
395 | 0 | zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_PATH), &tmp); |
396 | 0 | } |
397 | 0 | if (resource->query != NULL) { |
398 | 0 | ZVAL_STR_COPY(&tmp, resource->query); |
399 | 0 | zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_QUERY), &tmp); |
400 | 0 | } |
401 | 0 | if (resource->fragment != NULL) { |
402 | 0 | ZVAL_STR_COPY(&tmp, resource->fragment); |
403 | 0 | zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_FRAGMENT), &tmp); |
404 | 0 | } |
405 | 0 | done: |
406 | 0 | php_url_free(resource); |
407 | 0 | } |
408 | | /* }}} */ |
409 | | |
410 | | /* https://stackoverflow.com/questions/34365746/whats-the-fastest-way-to-convert-hex-to-integer-in-c */ |
411 | | static unsigned int php_htoi_single(unsigned char x) |
412 | 0 | { |
413 | 0 | ZEND_ASSERT((x >= 'a' && x <= 'f') || (x >= 'A' && x <= 'F') || (x >= '0' && x <= '9')); |
414 | 0 | return 9 * (x >> 6) + (x & 0xf); |
415 | 0 | } |
416 | | |
417 | | /* {{{ php_htoi */ |
418 | | static int php_htoi(const char *s) |
419 | 0 | { |
420 | 0 | int value; |
421 | 0 | unsigned char c; |
422 | |
|
423 | 0 | c = ((unsigned char *)s)[0]; |
424 | 0 | value = php_htoi_single(c) * 16; |
425 | |
|
426 | 0 | c = ((unsigned char *)s)[1]; |
427 | 0 | value += php_htoi_single(c); |
428 | |
|
429 | 0 | return (value); |
430 | 0 | } |
431 | | /* }}} */ |
432 | | |
433 | | /* rfc1738: |
434 | | |
435 | | ...The characters ";", |
436 | | "/", "?", ":", "@", "=" and "&" are the characters which may be |
437 | | reserved for special meaning within a scheme... |
438 | | |
439 | | ...Thus, only alphanumerics, the special characters "$-_.+!*'(),", and |
440 | | reserved characters used for their reserved purposes may be used |
441 | | unencoded within a URL... |
442 | | |
443 | | For added safety, we only leave -_. unencoded. |
444 | | */ |
445 | | |
446 | | static const unsigned char hexchars[] = "0123456789ABCDEF"; |
447 | | |
448 | 0 | static zend_always_inline size_t php_url_encode_impl(unsigned char *to, const char *s, size_t len, bool raw) /* {{{ */ { |
449 | 0 | unsigned char c; |
450 | 0 | unsigned char const *from, *end; |
451 | 0 | const unsigned char *to_init = to; |
452 | |
|
453 | 0 | from = (unsigned char *)s; |
454 | 0 | end = (unsigned char *)s + len; |
455 | |
|
456 | 0 | #ifdef XSSE2 |
457 | 0 | while (from + 16 < end) { |
458 | 0 | __m128i mask; |
459 | 0 | uint32_t bits; |
460 | 0 | const __m128i _A = _mm_set1_epi8('A' - 1); |
461 | 0 | const __m128i Z_ = _mm_set1_epi8('Z' + 1); |
462 | 0 | const __m128i _a = _mm_set1_epi8('a' - 1); |
463 | 0 | const __m128i z_ = _mm_set1_epi8('z' + 1); |
464 | 0 | const __m128i _zero = _mm_set1_epi8('0' - 1); |
465 | 0 | const __m128i nine_ = _mm_set1_epi8('9' + 1); |
466 | 0 | const __m128i dot = _mm_set1_epi8('.'); |
467 | 0 | const __m128i minus = _mm_set1_epi8('-'); |
468 | 0 | const __m128i under = _mm_set1_epi8('_'); |
469 | |
|
470 | 0 | __m128i in = _mm_loadu_si128((__m128i *)from); |
471 | |
|
472 | 0 | __m128i gt = _mm_cmpgt_epi8(in, _A); |
473 | 0 | __m128i lt = _mm_cmplt_epi8(in, Z_); |
474 | 0 | mask = _mm_and_si128(lt, gt); /* upper */ |
475 | 0 | gt = _mm_cmpgt_epi8(in, _a); |
476 | 0 | lt = _mm_cmplt_epi8(in, z_); |
477 | 0 | mask = _mm_or_si128(mask, _mm_and_si128(lt, gt)); /* lower */ |
478 | 0 | gt = _mm_cmpgt_epi8(in, _zero); |
479 | 0 | lt = _mm_cmplt_epi8(in, nine_); |
480 | 0 | mask = _mm_or_si128(mask, _mm_and_si128(lt, gt)); /* number */ |
481 | 0 | mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, dot)); |
482 | 0 | mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, minus)); |
483 | 0 | mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, under)); |
484 | |
|
485 | 0 | if (!raw) { |
486 | 0 | const __m128i blank = _mm_set1_epi8(' '); |
487 | 0 | __m128i eq = _mm_cmpeq_epi8(in, blank); |
488 | 0 | if (_mm_movemask_epi8(eq)) { |
489 | 0 | in = _mm_add_epi8(in, _mm_and_si128(eq, _mm_set1_epi8('+' - ' '))); |
490 | 0 | mask = _mm_or_si128(mask, eq); |
491 | 0 | } |
492 | 0 | } |
493 | 0 | if (raw) { |
494 | 0 | const __m128i wavy = _mm_set1_epi8('~'); |
495 | 0 | mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, wavy)); |
496 | 0 | } |
497 | 0 | if (((bits = _mm_movemask_epi8(mask)) & 0xffff) == 0xffff) { |
498 | 0 | _mm_storeu_si128((__m128i*)to, in); |
499 | 0 | to += 16; |
500 | 0 | } else { |
501 | 0 | unsigned char xmm[16]; |
502 | 0 | _mm_storeu_si128((__m128i*)xmm, in); |
503 | 0 | for (size_t i = 0; i < sizeof(xmm); i++) { |
504 | 0 | if ((bits & (0x1 << i))) { |
505 | 0 | *to++ = xmm[i]; |
506 | 0 | } else { |
507 | 0 | *to++ = '%'; |
508 | 0 | *to++ = hexchars[xmm[i] >> 4]; |
509 | 0 | *to++ = hexchars[xmm[i] & 0xf]; |
510 | 0 | } |
511 | 0 | } |
512 | 0 | } |
513 | 0 | from += 16; |
514 | 0 | } |
515 | 0 | #endif |
516 | 0 | while (from < end) { |
517 | 0 | c = *from++; |
518 | |
|
519 | 0 | if (!raw && c == ' ') { |
520 | 0 | *to++ = '+'; |
521 | 0 | } else if ((c < '0' && c != '-' && c != '.') || |
522 | 0 | (c < 'A' && c > '9') || |
523 | 0 | (c > 'Z' && c < 'a' && c != '_') || |
524 | 0 | (c > 'z' && (!raw || c != '~'))) { |
525 | 0 | to[0] = '%'; |
526 | 0 | to[1] = hexchars[c >> 4]; |
527 | 0 | to[2] = hexchars[c & 15]; |
528 | 0 | to += 3; |
529 | 0 | } else { |
530 | 0 | *to++ = c; |
531 | 0 | } |
532 | 0 | } |
533 | |
|
534 | 0 | return to - to_init; |
535 | 0 | } |
536 | | /* }}} */ |
537 | | |
538 | | static zend_always_inline zend_string *php_url_encode_helper(char const *s, size_t len, bool raw) |
539 | 0 | { |
540 | 0 | zend_string *result = zend_string_safe_alloc(3, len, 0, false); |
541 | 0 | size_t length = php_url_encode_impl((unsigned char *) ZSTR_VAL(result), s, len, raw); |
542 | 0 | ZSTR_VAL(result)[length] = '\0'; |
543 | 0 | ZEND_ASSERT(!ZSTR_IS_INTERNED(result) && GC_REFCOUNT(result) == 1); |
544 | 0 | return zend_string_truncate(result, length, false); |
545 | 0 | } |
546 | | |
547 | | /* {{{ php_url_encode */ |
548 | | PHPAPI zend_string *php_url_encode(char const *s, size_t len) |
549 | 0 | { |
550 | 0 | return php_url_encode_helper(s, len, false); |
551 | 0 | } |
552 | | /* }}} */ |
553 | | |
554 | | /* {{{ URL-encodes string */ |
555 | | PHP_FUNCTION(urlencode) |
556 | 0 | { |
557 | 0 | zend_string *in_str; |
558 | |
|
559 | 0 | ZEND_PARSE_PARAMETERS_START(1, 1) |
560 | 0 | Z_PARAM_STR(in_str) |
561 | 0 | ZEND_PARSE_PARAMETERS_END(); |
562 | | |
563 | 0 | RETURN_NEW_STR(php_url_encode(ZSTR_VAL(in_str), ZSTR_LEN(in_str))); |
564 | 0 | } |
565 | | /* }}} */ |
566 | | |
567 | | /* {{{ Decodes URL-encoded string */ |
568 | | PHP_FUNCTION(urldecode) |
569 | 0 | { |
570 | 0 | zend_string *in_str, *out_str; |
571 | |
|
572 | 0 | ZEND_PARSE_PARAMETERS_START(1, 1) |
573 | 0 | Z_PARAM_STR(in_str) |
574 | 0 | ZEND_PARSE_PARAMETERS_END(); |
575 | | |
576 | 0 | out_str = zend_string_alloc(ZSTR_LEN(in_str), false); |
577 | 0 | ZSTR_LEN(out_str) = php_url_decode_ex(ZSTR_VAL(out_str), ZSTR_VAL(in_str), ZSTR_LEN(in_str)); |
578 | |
|
579 | 0 | RETURN_NEW_STR(out_str); |
580 | 0 | } |
581 | | /* }}} */ |
582 | | |
583 | | PHPAPI size_t php_url_decode_ex(char *dest, const char *src, size_t src_len) |
584 | 138 | { |
585 | 138 | char *dest_start = dest; |
586 | 138 | const char *data = src; |
587 | | |
588 | 2.03k | while (src_len--) { |
589 | 1.89k | if (*data == '+') { |
590 | 0 | *dest = ' '; |
591 | 0 | } |
592 | 1.89k | else if (*data == '%' && src_len >= 2 && isxdigit((unsigned char)data[1]) |
593 | 1.89k | && isxdigit((unsigned char)data[2])) { |
594 | 0 | *dest = (char) php_htoi(data + 1); |
595 | 0 | data += 2; |
596 | 0 | src_len -= 2; |
597 | 1.89k | } else { |
598 | 1.89k | *dest = *data; |
599 | 1.89k | } |
600 | 1.89k | data++; |
601 | 1.89k | dest++; |
602 | 1.89k | } |
603 | 138 | *dest = '\0'; |
604 | 138 | return dest - dest_start; |
605 | 138 | } |
606 | | |
607 | | /* {{{ php_url_decode */ |
608 | | PHPAPI size_t php_url_decode(char *str, size_t len) |
609 | 138 | { |
610 | 138 | return php_url_decode_ex(str, str, len); |
611 | 138 | } |
612 | | /* }}} */ |
613 | | |
614 | | /* {{{ php_raw_url_encode */ |
615 | | PHPAPI zend_string *php_raw_url_encode(char const *s, size_t len) |
616 | 0 | { |
617 | 0 | return php_url_encode_helper(s, len, true); |
618 | 0 | } |
619 | | /* }}} */ |
620 | | |
621 | | PHPAPI void php_url_encode_to_smart_str(smart_str *buf, char const *s, size_t len, bool raw) |
622 | 0 | { |
623 | 0 | size_t start_length = smart_str_get_len(buf); |
624 | 0 | size_t extend = zend_safe_address_guarded(3, len, 0); |
625 | 0 | char *dest = smart_str_extend(buf, extend); |
626 | 0 | size_t length = php_url_encode_impl((unsigned char *) dest, s, len, raw); |
627 | 0 | ZSTR_LEN(buf->s) = start_length + length; |
628 | 0 | } |
629 | | |
630 | | /* {{{ URL-encodes string */ |
631 | | PHP_FUNCTION(rawurlencode) |
632 | 0 | { |
633 | 0 | zend_string *in_str; |
634 | |
|
635 | 0 | ZEND_PARSE_PARAMETERS_START(1, 1) |
636 | 0 | Z_PARAM_STR(in_str) |
637 | 0 | ZEND_PARSE_PARAMETERS_END(); |
638 | | |
639 | 0 | RETURN_NEW_STR(php_raw_url_encode(ZSTR_VAL(in_str), ZSTR_LEN(in_str))); |
640 | 0 | } |
641 | | /* }}} */ |
642 | | |
643 | | /* {{{ Decodes URL-encodes string */ |
644 | | PHP_FUNCTION(rawurldecode) |
645 | 0 | { |
646 | 0 | zend_string *in_str, *out_str; |
647 | |
|
648 | 0 | ZEND_PARSE_PARAMETERS_START(1, 1) |
649 | 0 | Z_PARAM_STR(in_str) |
650 | 0 | ZEND_PARSE_PARAMETERS_END(); |
651 | | |
652 | 0 | out_str = zend_string_alloc(ZSTR_LEN(in_str), false); |
653 | 0 | ZSTR_LEN(out_str) = php_raw_url_decode_ex(ZSTR_VAL(out_str), ZSTR_VAL(in_str), ZSTR_LEN(in_str)); |
654 | |
|
655 | 0 | RETURN_NEW_STR(out_str); |
656 | 0 | } |
657 | | /* }}} */ |
658 | | |
659 | | PHPAPI size_t php_raw_url_decode_ex(char *dest, const char *src, size_t src_len) |
660 | 0 | { |
661 | 0 | char *dest_start = dest; |
662 | 0 | const char *data = src; |
663 | |
|
664 | 0 | while (src_len--) { |
665 | 0 | if (*data == '%' && src_len >= 2 && isxdigit((unsigned char)data[1]) |
666 | 0 | && isxdigit((unsigned char)data[2])) { |
667 | 0 | *dest = (char) php_htoi(data + 1); |
668 | 0 | data += 2; |
669 | 0 | src_len -= 2; |
670 | 0 | } else { |
671 | 0 | *dest = *data; |
672 | 0 | } |
673 | 0 | data++; |
674 | 0 | dest++; |
675 | 0 | } |
676 | 0 | *dest = '\0'; |
677 | 0 | return dest - dest_start; |
678 | 0 | } |
679 | | |
680 | | /* {{{ php_raw_url_decode */ |
681 | | PHPAPI size_t php_raw_url_decode(char *str, size_t len) |
682 | 0 | { |
683 | 0 | return php_raw_url_decode_ex(str, str, len); |
684 | 0 | } |
685 | | /* }}} */ |
686 | | |
687 | | /* {{{ fetches all the headers sent by the server in response to a HTTP request */ |
688 | | PHP_FUNCTION(get_headers) |
689 | 0 | { |
690 | 0 | char *url; |
691 | 0 | size_t url_len; |
692 | 0 | php_stream *stream; |
693 | 0 | zval *prev_val, *hdr = NULL; |
694 | 0 | bool format = 0; |
695 | 0 | zval *zcontext = NULL; |
696 | 0 | php_stream_context *context; |
697 | |
|
698 | 0 | ZEND_PARSE_PARAMETERS_START(1, 3) |
699 | 0 | Z_PARAM_PATH(url, url_len) |
700 | 0 | Z_PARAM_OPTIONAL |
701 | 0 | Z_PARAM_BOOL(format) |
702 | 0 | Z_PARAM_RESOURCE_OR_NULL(zcontext) |
703 | 0 | ZEND_PARSE_PARAMETERS_END(); |
704 | | |
705 | 0 | context = php_stream_context_from_zval(zcontext, 0); |
706 | |
|
707 | 0 | if (!(stream = php_stream_open_wrapper_ex(url, "r", REPORT_ERRORS | STREAM_USE_URL | STREAM_ONLY_GET_HEADERS, NULL, context))) { |
708 | 0 | RETURN_FALSE; |
709 | 0 | } |
710 | | |
711 | 0 | if (Z_TYPE(stream->wrapperdata) != IS_ARRAY) { |
712 | 0 | php_stream_close(stream); |
713 | 0 | RETURN_FALSE; |
714 | 0 | } |
715 | | |
716 | 0 | array_init(return_value); |
717 | |
|
718 | 0 | ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(&stream->wrapperdata), hdr) { |
719 | 0 | if (Z_TYPE_P(hdr) != IS_STRING) { |
720 | 0 | continue; |
721 | 0 | } |
722 | 0 | if (!format) { |
723 | 0 | no_name_header: |
724 | 0 | add_next_index_str(return_value, zend_string_copy(Z_STR_P(hdr))); |
725 | 0 | } else { |
726 | 0 | char c; |
727 | 0 | char *s, *p; |
728 | |
|
729 | 0 | if ((p = strchr(Z_STRVAL_P(hdr), ':'))) { |
730 | 0 | c = *p; |
731 | 0 | *p = '\0'; |
732 | 0 | s = p + 1; |
733 | 0 | while (isspace((unsigned char)*s)) { |
734 | 0 | s++; |
735 | 0 | } |
736 | |
|
737 | 0 | if ((prev_val = zend_hash_str_find(Z_ARRVAL_P(return_value), Z_STRVAL_P(hdr), (p - Z_STRVAL_P(hdr)))) == NULL) { |
738 | 0 | add_assoc_stringl_ex(return_value, Z_STRVAL_P(hdr), (p - Z_STRVAL_P(hdr)), s, (Z_STRLEN_P(hdr) - (s - Z_STRVAL_P(hdr)))); |
739 | 0 | } else { /* some headers may occur more than once, therefore we need to remake the string into an array */ |
740 | 0 | convert_to_array(prev_val); |
741 | 0 | add_next_index_stringl(prev_val, s, (Z_STRLEN_P(hdr) - (s - Z_STRVAL_P(hdr)))); |
742 | 0 | } |
743 | |
|
744 | 0 | *p = c; |
745 | 0 | } else { |
746 | 0 | goto no_name_header; |
747 | 0 | } |
748 | 0 | } |
749 | 0 | } ZEND_HASH_FOREACH_END(); |
750 | | |
751 | 0 | php_stream_close(stream); |
752 | 0 | } |
753 | | /* }}} */ |