/src/php-src/ext/standard/url.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | +----------------------------------------------------------------------+ |
3 | | | Copyright (c) The PHP Group | |
4 | | +----------------------------------------------------------------------+ |
5 | | | This source file is subject to version 3.01 of the PHP license, | |
6 | | | that is bundled with this package in the file LICENSE, and is | |
7 | | | available through the world-wide-web at the following url: | |
8 | | | https://www.php.net/license/3_01.txt | |
9 | | | If you did not receive a copy of the PHP license and are unable to | |
10 | | | obtain it through the world-wide-web, please send a note to | |
11 | | | license@php.net so we can mail you a copy immediately. | |
12 | | +----------------------------------------------------------------------+ |
13 | | | Author: Jim Winstead <jimw@php.net> | |
14 | | +----------------------------------------------------------------------+ |
15 | | */ |
16 | | |
17 | | #include <stdlib.h> |
18 | | #include <string.h> |
19 | | #include <ctype.h> |
20 | | #include <sys/types.h> |
21 | | |
22 | | #include "php.h" |
23 | | |
24 | | #include "url.h" |
25 | | #include "file.h" |
26 | | #include "zend_simd.h" |
27 | | #include "Zend/zend_smart_str.h" |
28 | | |
29 | | /* {{{ free_url */ |
30 | | PHPAPI void php_url_free(php_url *theurl) |
31 | 0 | { |
32 | 0 | if (theurl->scheme) |
33 | 0 | zend_string_release_ex(theurl->scheme, 0); |
34 | 0 | if (theurl->user) |
35 | 0 | zend_string_release_ex(theurl->user, 0); |
36 | 0 | if (theurl->pass) |
37 | 0 | zend_string_release_ex(theurl->pass, 0); |
38 | 0 | if (theurl->host) |
39 | 0 | zend_string_release_ex(theurl->host, 0); |
40 | 0 | if (theurl->path) |
41 | 0 | zend_string_release_ex(theurl->path, 0); |
42 | 0 | if (theurl->query) |
43 | 0 | zend_string_release_ex(theurl->query, 0); |
44 | 0 | if (theurl->fragment) |
45 | 0 | zend_string_release_ex(theurl->fragment, 0); |
46 | 0 | efree(theurl); |
47 | 0 | } |
48 | | /* }}} */ |
49 | | |
50 | | static void php_replace_controlchars(char *str, size_t len) |
51 | 0 | { |
52 | 0 | unsigned char *s = (unsigned char *)str; |
53 | 0 | unsigned char *e = (unsigned char *)str + len; |
54 | |
|
55 | 0 | ZEND_ASSERT(str != NULL); |
56 | | |
57 | 0 | while (s < e) { |
58 | 0 | if (iscntrl(*s)) { |
59 | 0 | *s='_'; |
60 | 0 | } |
61 | 0 | s++; |
62 | 0 | } |
63 | 0 | } |
64 | | |
65 | | PHPAPI php_url *php_url_parse(char const *str) |
66 | 0 | { |
67 | 0 | return php_url_parse_ex(str, strlen(str)); |
68 | 0 | } |
69 | | |
70 | 0 | static const char *binary_strcspn(const char *s, const char *e, const char *chars) { |
71 | 0 | while (*chars) { |
72 | 0 | const char *p = memchr(s, *chars, e - s); |
73 | 0 | if (p) { |
74 | 0 | e = p; |
75 | 0 | } |
76 | 0 | chars++; |
77 | 0 | } |
78 | 0 | return e; |
79 | 0 | } |
80 | | |
81 | | /* {{{ php_url_parse */ |
82 | | PHPAPI php_url *php_url_parse_ex(char const *str, size_t length) |
83 | 0 | { |
84 | 0 | bool has_port; |
85 | 0 | return php_url_parse_ex2(str, length, &has_port); |
86 | 0 | } |
87 | | |
88 | | /* {{{ php_url_parse_ex2 |
89 | | */ |
90 | | PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port) |
91 | 0 | { |
92 | 0 | char port_buf[6]; |
93 | 0 | php_url *ret = ecalloc(1, sizeof(php_url)); |
94 | 0 | char const *s, *e, *p, *pp, *ue; |
95 | |
|
96 | 0 | *has_port = 0; |
97 | 0 | s = str; |
98 | 0 | ue = s + length; |
99 | | |
100 | | /* parse scheme */ |
101 | 0 | if ((e = memchr(s, ':', length)) && e != s) { |
102 | | /* validate scheme */ |
103 | 0 | p = s; |
104 | 0 | while (p < e) { |
105 | | /* scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] */ |
106 | 0 | if (!isalpha(*p) && !isdigit(*p) && *p != '+' && *p != '.' && *p != '-') { |
107 | 0 | if (e + 1 < ue && e < binary_strcspn(s, ue, "?#")) { |
108 | 0 | goto parse_port; |
109 | 0 | } else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ |
110 | 0 | s += 2; |
111 | 0 | e = 0; |
112 | 0 | goto parse_host; |
113 | 0 | } else { |
114 | 0 | goto just_path; |
115 | 0 | } |
116 | 0 | } |
117 | 0 | p++; |
118 | 0 | } |
119 | | |
120 | 0 | if (e + 1 == ue) { /* only scheme is available */ |
121 | 0 | ret->scheme = zend_string_init(s, (e - s), 0); |
122 | 0 | php_replace_controlchars(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme)); |
123 | 0 | return ret; |
124 | 0 | } |
125 | | |
126 | | /* |
127 | | * certain schemas like mailto: and zlib: may not have any / after them |
128 | | * this check ensures we support those. |
129 | | */ |
130 | 0 | if (*(e+1) != '/') { |
131 | | /* check if the data we get is a port this allows us to |
132 | | * correctly parse things like a.com:80 |
133 | | */ |
134 | 0 | p = e + 1; |
135 | 0 | while (p < ue && isdigit(*p)) { |
136 | 0 | p++; |
137 | 0 | } |
138 | |
|
139 | 0 | if ((p == ue || *p == '/') && (p - e) < 7) { |
140 | 0 | goto parse_port; |
141 | 0 | } |
142 | | |
143 | 0 | ret->scheme = zend_string_init(s, (e-s), 0); |
144 | 0 | php_replace_controlchars(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme)); |
145 | |
|
146 | 0 | s = e + 1; |
147 | 0 | goto just_path; |
148 | 0 | } else { |
149 | 0 | ret->scheme = zend_string_init(s, (e-s), 0); |
150 | 0 | php_replace_controlchars(ZSTR_VAL(ret->scheme), ZSTR_LEN(ret->scheme)); |
151 | |
|
152 | 0 | if (e + 2 < ue && *(e + 2) == '/') { |
153 | 0 | s = e + 3; |
154 | 0 | if (zend_string_equals_literal_ci(ret->scheme, "file")) { |
155 | 0 | if (e + 3 < ue && *(e + 3) == '/') { |
156 | | /* support windows drive letters as in: |
157 | | file:///c:/somedir/file.txt |
158 | | */ |
159 | 0 | if (e + 5 < ue && *(e + 5) == ':') { |
160 | 0 | s = e + 4; |
161 | 0 | } |
162 | 0 | goto just_path; |
163 | 0 | } |
164 | 0 | } |
165 | 0 | } else { |
166 | 0 | s = e + 1; |
167 | 0 | goto just_path; |
168 | 0 | } |
169 | 0 | } |
170 | 0 | } else if (e) { /* no scheme; starts with colon: look for port */ |
171 | 0 | parse_port: |
172 | 0 | p = e + 1; |
173 | 0 | pp = p; |
174 | |
|
175 | 0 | while (pp < ue && pp - p < 6 && isdigit(*pp)) { |
176 | 0 | pp++; |
177 | 0 | } |
178 | |
|
179 | 0 | if (pp - p > 0 && pp - p < 6 && (pp == ue || *pp == '/')) { |
180 | 0 | zend_long port; |
181 | 0 | char *end; |
182 | 0 | memcpy(port_buf, p, (pp - p)); |
183 | 0 | port_buf[pp - p] = '\0'; |
184 | 0 | port = ZEND_STRTOL(port_buf, &end, 10); |
185 | 0 | if (port >= 0 && port <= 65535 && end != port_buf) { |
186 | 0 | *has_port = 1; |
187 | 0 | ret->port = (unsigned short) port; |
188 | 0 | if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ |
189 | 0 | s += 2; |
190 | 0 | } |
191 | 0 | } else { |
192 | 0 | php_url_free(ret); |
193 | 0 | return NULL; |
194 | 0 | } |
195 | 0 | } else if (p == pp && pp == ue) { |
196 | 0 | php_url_free(ret); |
197 | 0 | return NULL; |
198 | 0 | } else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ |
199 | 0 | s += 2; |
200 | 0 | } else { |
201 | 0 | goto just_path; |
202 | 0 | } |
203 | 0 | } else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ |
204 | 0 | s += 2; |
205 | 0 | } else { |
206 | 0 | goto just_path; |
207 | 0 | } |
208 | | |
209 | 0 | parse_host: |
210 | 0 | e = binary_strcspn(s, ue, "/?#"); |
211 | | |
212 | | /* check for login and password */ |
213 | 0 | if ((p = zend_memrchr(s, '@', (e-s)))) { |
214 | 0 | if ((pp = memchr(s, ':', (p-s)))) { |
215 | 0 | ret->user = zend_string_init(s, (pp-s), 0); |
216 | 0 | php_replace_controlchars(ZSTR_VAL(ret->user), ZSTR_LEN(ret->user)); |
217 | |
|
218 | 0 | pp++; |
219 | 0 | ret->pass = zend_string_init(pp, (p-pp), 0); |
220 | 0 | php_replace_controlchars(ZSTR_VAL(ret->pass), ZSTR_LEN(ret->pass)); |
221 | 0 | } else { |
222 | 0 | ret->user = zend_string_init(s, (p-s), 0); |
223 | 0 | php_replace_controlchars(ZSTR_VAL(ret->user), ZSTR_LEN(ret->user)); |
224 | 0 | } |
225 | |
|
226 | 0 | s = p + 1; |
227 | 0 | } |
228 | | |
229 | | /* check for port */ |
230 | 0 | if (s < ue && *s == '[' && *(e-1) == ']') { |
231 | | /* Short circuit portscan, |
232 | | we're dealing with an |
233 | | IPv6 embedded address */ |
234 | 0 | p = NULL; |
235 | 0 | } else { |
236 | 0 | p = zend_memrchr(s, ':', (e-s)); |
237 | 0 | } |
238 | |
|
239 | 0 | if (p) { |
240 | 0 | if (!ret->port) { |
241 | 0 | p++; |
242 | 0 | if (e-p > 5) { /* port cannot be longer then 5 characters */ |
243 | 0 | php_url_free(ret); |
244 | 0 | return NULL; |
245 | 0 | } else if (e - p > 0) { |
246 | 0 | zend_long port; |
247 | 0 | char *end; |
248 | 0 | memcpy(port_buf, p, (e - p)); |
249 | 0 | port_buf[e - p] = '\0'; |
250 | 0 | port = ZEND_STRTOL(port_buf, &end, 10); |
251 | 0 | if (port >= 0 && port <= 65535 && end != port_buf) { |
252 | 0 | *has_port = 1; |
253 | 0 | ret->port = (unsigned short)port; |
254 | 0 | } else { |
255 | 0 | php_url_free(ret); |
256 | 0 | return NULL; |
257 | 0 | } |
258 | 0 | } |
259 | 0 | p--; |
260 | 0 | } |
261 | 0 | } else { |
262 | 0 | p = e; |
263 | 0 | } |
264 | | |
265 | | /* check if we have a valid host, if we don't reject the string as url */ |
266 | 0 | if ((p-s) < 1) { |
267 | 0 | php_url_free(ret); |
268 | 0 | return NULL; |
269 | 0 | } |
270 | | |
271 | 0 | ret->host = zend_string_init(s, (p-s), 0); |
272 | 0 | php_replace_controlchars(ZSTR_VAL(ret->host), ZSTR_LEN(ret->host)); |
273 | |
|
274 | 0 | if (e == ue) { |
275 | 0 | return ret; |
276 | 0 | } |
277 | | |
278 | 0 | s = e; |
279 | |
|
280 | 0 | just_path: |
281 | |
|
282 | 0 | e = ue; |
283 | 0 | p = memchr(s, '#', (e - s)); |
284 | 0 | if (p) { |
285 | 0 | p++; |
286 | 0 | if (p < e) { |
287 | 0 | ret->fragment = zend_string_init(p, (e - p), 0); |
288 | 0 | php_replace_controlchars(ZSTR_VAL(ret->fragment), ZSTR_LEN(ret->fragment)); |
289 | 0 | } else { |
290 | 0 | ret->fragment = ZSTR_EMPTY_ALLOC(); |
291 | 0 | } |
292 | 0 | e = p-1; |
293 | 0 | } |
294 | |
|
295 | 0 | p = memchr(s, '?', (e - s)); |
296 | 0 | if (p) { |
297 | 0 | p++; |
298 | 0 | if (p < e) { |
299 | 0 | ret->query = zend_string_init(p, (e - p), 0); |
300 | 0 | php_replace_controlchars(ZSTR_VAL(ret->query), ZSTR_LEN(ret->query)); |
301 | 0 | } else { |
302 | 0 | ret->query = ZSTR_EMPTY_ALLOC(); |
303 | 0 | } |
304 | 0 | e = p-1; |
305 | 0 | } |
306 | |
|
307 | 0 | if (s < e || s == ue) { |
308 | 0 | ret->path = zend_string_init(s, (e - s), 0); |
309 | 0 | php_replace_controlchars(ZSTR_VAL(ret->path), ZSTR_LEN(ret->path)); |
310 | 0 | } |
311 | |
|
312 | 0 | return ret; |
313 | 0 | } |
314 | | /* }}} */ |
315 | | |
316 | | /* {{{ Parse a URL and return its components */ |
317 | | PHP_FUNCTION(parse_url) |
318 | 0 | { |
319 | 0 | char *str; |
320 | 0 | size_t str_len; |
321 | 0 | php_url *resource; |
322 | 0 | zend_long key = -1; |
323 | 0 | zval tmp; |
324 | 0 | bool has_port; |
325 | |
|
326 | 0 | ZEND_PARSE_PARAMETERS_START(1, 2) |
327 | 0 | Z_PARAM_STRING(str, str_len) |
328 | 0 | Z_PARAM_OPTIONAL |
329 | 0 | Z_PARAM_LONG(key) |
330 | 0 | ZEND_PARSE_PARAMETERS_END(); |
331 | | |
332 | 0 | resource = php_url_parse_ex2(str, str_len, &has_port); |
333 | 0 | if (resource == NULL) { |
334 | | /* @todo Find a method to determine why php_url_parse_ex() failed */ |
335 | 0 | RETURN_FALSE; |
336 | 0 | } |
337 | | |
338 | 0 | if (key > -1) { |
339 | 0 | switch (key) { |
340 | 0 | case PHP_URL_SCHEME: |
341 | 0 | if (resource->scheme != NULL) RETVAL_STR_COPY(resource->scheme); |
342 | 0 | break; |
343 | 0 | case PHP_URL_HOST: |
344 | 0 | if (resource->host != NULL) RETVAL_STR_COPY(resource->host); |
345 | 0 | break; |
346 | 0 | case PHP_URL_PORT: |
347 | 0 | if (has_port) RETVAL_LONG(resource->port); |
348 | 0 | break; |
349 | 0 | case PHP_URL_USER: |
350 | 0 | if (resource->user != NULL) RETVAL_STR_COPY(resource->user); |
351 | 0 | break; |
352 | 0 | case PHP_URL_PASS: |
353 | 0 | if (resource->pass != NULL) RETVAL_STR_COPY(resource->pass); |
354 | 0 | break; |
355 | 0 | case PHP_URL_PATH: |
356 | 0 | if (resource->path != NULL) RETVAL_STR_COPY(resource->path); |
357 | 0 | break; |
358 | 0 | case PHP_URL_QUERY: |
359 | 0 | if (resource->query != NULL) RETVAL_STR_COPY(resource->query); |
360 | 0 | break; |
361 | 0 | case PHP_URL_FRAGMENT: |
362 | 0 | if (resource->fragment != NULL) RETVAL_STR_COPY(resource->fragment); |
363 | 0 | break; |
364 | 0 | default: |
365 | 0 | zend_argument_value_error(2, "must be a valid URL component identifier, " ZEND_LONG_FMT " given", key); |
366 | 0 | break; |
367 | 0 | } |
368 | 0 | goto done; |
369 | 0 | } |
370 | | |
371 | | /* allocate an array for return */ |
372 | 0 | array_init(return_value); |
373 | | |
374 | | /* add the various elements to the array */ |
375 | 0 | if (resource->scheme != NULL) { |
376 | 0 | ZVAL_STR_COPY(&tmp, resource->scheme); |
377 | 0 | zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_SCHEME), &tmp); |
378 | 0 | } |
379 | 0 | if (resource->host != NULL) { |
380 | 0 | ZVAL_STR_COPY(&tmp, resource->host); |
381 | 0 | zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_HOST), &tmp); |
382 | 0 | } |
383 | 0 | if (has_port) { |
384 | 0 | ZVAL_LONG(&tmp, resource->port); |
385 | 0 | zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_PORT), &tmp); |
386 | 0 | } |
387 | 0 | if (resource->user != NULL) { |
388 | 0 | ZVAL_STR_COPY(&tmp, resource->user); |
389 | 0 | zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_USER), &tmp); |
390 | 0 | } |
391 | 0 | if (resource->pass != NULL) { |
392 | 0 | ZVAL_STR_COPY(&tmp, resource->pass); |
393 | 0 | zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_PASS), &tmp); |
394 | 0 | } |
395 | 0 | if (resource->path != NULL) { |
396 | 0 | ZVAL_STR_COPY(&tmp, resource->path); |
397 | 0 | zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_PATH), &tmp); |
398 | 0 | } |
399 | 0 | if (resource->query != NULL) { |
400 | 0 | ZVAL_STR_COPY(&tmp, resource->query); |
401 | 0 | zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_QUERY), &tmp); |
402 | 0 | } |
403 | 0 | if (resource->fragment != NULL) { |
404 | 0 | ZVAL_STR_COPY(&tmp, resource->fragment); |
405 | 0 | zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_FRAGMENT), &tmp); |
406 | 0 | } |
407 | 0 | done: |
408 | 0 | php_url_free(resource); |
409 | 0 | } |
410 | | /* }}} */ |
411 | | |
412 | | /* https://stackoverflow.com/questions/34365746/whats-the-fastest-way-to-convert-hex-to-integer-in-c */ |
413 | | static unsigned int php_htoi_single(unsigned char x) |
414 | 0 | { |
415 | 0 | ZEND_ASSERT((x >= 'a' && x <= 'f') || (x >= 'A' && x <= 'F') || (x >= '0' && x <= '9')); |
416 | 0 | return 9 * (x >> 6) + (x & 0xf); |
417 | 0 | } |
418 | | |
419 | | /* {{{ php_htoi */ |
420 | | static int php_htoi(const char *s) |
421 | 0 | { |
422 | 0 | int value; |
423 | 0 | unsigned char c; |
424 | |
|
425 | 0 | c = ((unsigned char *)s)[0]; |
426 | 0 | value = php_htoi_single(c) * 16; |
427 | |
|
428 | 0 | c = ((unsigned char *)s)[1]; |
429 | 0 | value += php_htoi_single(c); |
430 | |
|
431 | 0 | return (value); |
432 | 0 | } |
433 | | /* }}} */ |
434 | | |
435 | | /* rfc1738: |
436 | | |
437 | | ...The characters ";", |
438 | | "/", "?", ":", "@", "=" and "&" are the characters which may be |
439 | | reserved for special meaning within a scheme... |
440 | | |
441 | | ...Thus, only alphanumerics, the special characters "$-_.+!*'(),", and |
442 | | reserved characters used for their reserved purposes may be used |
443 | | unencoded within a URL... |
444 | | |
445 | | For added safety, we only leave -_. unencoded. |
446 | | */ |
447 | | |
448 | | static const unsigned char hexchars[] = "0123456789ABCDEF"; |
449 | | |
450 | 0 | static zend_always_inline size_t php_url_encode_impl(unsigned char *to, const char *s, size_t len, bool raw) /* {{{ */ { |
451 | 0 | unsigned char c; |
452 | 0 | unsigned char const *from, *end; |
453 | 0 | const unsigned char *to_init = to; |
454 | |
|
455 | 0 | from = (unsigned char *)s; |
456 | 0 | end = (unsigned char *)s + len; |
457 | |
|
458 | 0 | #ifdef XSSE2 |
459 | 0 | while (from + 16 < end) { |
460 | 0 | __m128i mask; |
461 | 0 | uint32_t bits; |
462 | 0 | const __m128i _A = _mm_set1_epi8('A' - 1); |
463 | 0 | const __m128i Z_ = _mm_set1_epi8('Z' + 1); |
464 | 0 | const __m128i _a = _mm_set1_epi8('a' - 1); |
465 | 0 | const __m128i z_ = _mm_set1_epi8('z' + 1); |
466 | 0 | const __m128i _zero = _mm_set1_epi8('0' - 1); |
467 | 0 | const __m128i nine_ = _mm_set1_epi8('9' + 1); |
468 | 0 | const __m128i dot = _mm_set1_epi8('.'); |
469 | 0 | const __m128i minus = _mm_set1_epi8('-'); |
470 | 0 | const __m128i under = _mm_set1_epi8('_'); |
471 | |
|
472 | 0 | __m128i in = _mm_loadu_si128((__m128i *)from); |
473 | |
|
474 | 0 | __m128i gt = _mm_cmpgt_epi8(in, _A); |
475 | 0 | __m128i lt = _mm_cmplt_epi8(in, Z_); |
476 | 0 | mask = _mm_and_si128(lt, gt); /* upper */ |
477 | 0 | gt = _mm_cmpgt_epi8(in, _a); |
478 | 0 | lt = _mm_cmplt_epi8(in, z_); |
479 | 0 | mask = _mm_or_si128(mask, _mm_and_si128(lt, gt)); /* lower */ |
480 | 0 | gt = _mm_cmpgt_epi8(in, _zero); |
481 | 0 | lt = _mm_cmplt_epi8(in, nine_); |
482 | 0 | mask = _mm_or_si128(mask, _mm_and_si128(lt, gt)); /* number */ |
483 | 0 | mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, dot)); |
484 | 0 | mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, minus)); |
485 | 0 | mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, under)); |
486 | |
|
487 | 0 | if (!raw) { |
488 | 0 | const __m128i blank = _mm_set1_epi8(' '); |
489 | 0 | __m128i eq = _mm_cmpeq_epi8(in, blank); |
490 | 0 | if (_mm_movemask_epi8(eq)) { |
491 | 0 | in = _mm_add_epi8(in, _mm_and_si128(eq, _mm_set1_epi8('+' - ' '))); |
492 | 0 | mask = _mm_or_si128(mask, eq); |
493 | 0 | } |
494 | 0 | } |
495 | 0 | if (raw) { |
496 | 0 | const __m128i wavy = _mm_set1_epi8('~'); |
497 | 0 | mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, wavy)); |
498 | 0 | } |
499 | 0 | if (((bits = _mm_movemask_epi8(mask)) & 0xffff) == 0xffff) { |
500 | 0 | _mm_storeu_si128((__m128i*)to, in); |
501 | 0 | to += 16; |
502 | 0 | } else { |
503 | 0 | unsigned char xmm[16]; |
504 | 0 | _mm_storeu_si128((__m128i*)xmm, in); |
505 | 0 | for (size_t i = 0; i < sizeof(xmm); i++) { |
506 | 0 | if ((bits & (0x1 << i))) { |
507 | 0 | *to++ = xmm[i]; |
508 | 0 | } else { |
509 | 0 | *to++ = '%'; |
510 | 0 | *to++ = hexchars[xmm[i] >> 4]; |
511 | 0 | *to++ = hexchars[xmm[i] & 0xf]; |
512 | 0 | } |
513 | 0 | } |
514 | 0 | } |
515 | 0 | from += 16; |
516 | 0 | } |
517 | 0 | #endif |
518 | 0 | while (from < end) { |
519 | 0 | c = *from++; |
520 | |
|
521 | 0 | if (!raw && c == ' ') { |
522 | 0 | *to++ = '+'; |
523 | 0 | } else if ((c < '0' && c != '-' && c != '.') || |
524 | 0 | (c < 'A' && c > '9') || |
525 | 0 | (c > 'Z' && c < 'a' && c != '_') || |
526 | 0 | (c > 'z' && (!raw || c != '~'))) { |
527 | 0 | to[0] = '%'; |
528 | 0 | to[1] = hexchars[c >> 4]; |
529 | 0 | to[2] = hexchars[c & 15]; |
530 | 0 | to += 3; |
531 | 0 | } else { |
532 | 0 | *to++ = c; |
533 | 0 | } |
534 | 0 | } |
535 | |
|
536 | 0 | return to - to_init; |
537 | 0 | } |
538 | | /* }}} */ |
539 | | |
540 | | static zend_always_inline zend_string *php_url_encode_helper(char const *s, size_t len, bool raw) |
541 | 0 | { |
542 | 0 | zend_string *result = zend_string_safe_alloc(3, len, 0, false); |
543 | 0 | size_t length = php_url_encode_impl((unsigned char *) ZSTR_VAL(result), s, len, raw); |
544 | 0 | ZSTR_VAL(result)[length] = '\0'; |
545 | 0 | ZEND_ASSERT(!ZSTR_IS_INTERNED(result) && GC_REFCOUNT(result) == 1); |
546 | 0 | return zend_string_truncate(result, length, false); |
547 | 0 | } |
548 | | |
549 | | /* {{{ php_url_encode */ |
550 | | PHPAPI zend_string *php_url_encode(char const *s, size_t len) |
551 | 0 | { |
552 | 0 | return php_url_encode_helper(s, len, false); |
553 | 0 | } |
554 | | /* }}} */ |
555 | | |
556 | | /* {{{ URL-encodes string */ |
557 | | PHP_FUNCTION(urlencode) |
558 | 0 | { |
559 | 0 | zend_string *in_str; |
560 | |
|
561 | 0 | ZEND_PARSE_PARAMETERS_START(1, 1) |
562 | 0 | Z_PARAM_STR(in_str) |
563 | 0 | ZEND_PARSE_PARAMETERS_END(); |
564 | | |
565 | 0 | RETURN_NEW_STR(php_url_encode(ZSTR_VAL(in_str), ZSTR_LEN(in_str))); |
566 | 0 | } |
567 | | /* }}} */ |
568 | | |
569 | | /* {{{ Decodes URL-encoded string */ |
570 | | PHP_FUNCTION(urldecode) |
571 | 0 | { |
572 | 0 | zend_string *in_str, *out_str; |
573 | |
|
574 | 0 | ZEND_PARSE_PARAMETERS_START(1, 1) |
575 | 0 | Z_PARAM_STR(in_str) |
576 | 0 | ZEND_PARSE_PARAMETERS_END(); |
577 | | |
578 | 0 | out_str = zend_string_alloc(ZSTR_LEN(in_str), false); |
579 | 0 | ZSTR_LEN(out_str) = php_url_decode_ex(ZSTR_VAL(out_str), ZSTR_VAL(in_str), ZSTR_LEN(in_str)); |
580 | |
|
581 | 0 | RETURN_NEW_STR(out_str); |
582 | 0 | } |
583 | | /* }}} */ |
584 | | |
585 | | PHPAPI size_t php_url_decode_ex(char *dest, const char *src, size_t src_len) |
586 | 38 | { |
587 | 38 | char *dest_start = dest; |
588 | 38 | const char *data = src; |
589 | | |
590 | 1.81k | while (src_len--) { |
591 | 1.77k | if (*data == '+') { |
592 | 2 | *dest = ' '; |
593 | 2 | } |
594 | 1.77k | else if (*data == '%' && src_len >= 2 && isxdigit((int) *(data + 1)) |
595 | 1.77k | && isxdigit((int) *(data + 2))) { |
596 | 0 | *dest = (char) php_htoi(data + 1); |
597 | 0 | data += 2; |
598 | 0 | src_len -= 2; |
599 | 1.77k | } else { |
600 | 1.77k | *dest = *data; |
601 | 1.77k | } |
602 | 1.77k | data++; |
603 | 1.77k | dest++; |
604 | 1.77k | } |
605 | 38 | *dest = '\0'; |
606 | 38 | return dest - dest_start; |
607 | 38 | } |
608 | | |
609 | | /* {{{ php_url_decode */ |
610 | | PHPAPI size_t php_url_decode(char *str, size_t len) |
611 | 38 | { |
612 | 38 | return php_url_decode_ex(str, str, len); |
613 | 38 | } |
614 | | /* }}} */ |
615 | | |
616 | | /* {{{ php_raw_url_encode */ |
617 | | PHPAPI zend_string *php_raw_url_encode(char const *s, size_t len) |
618 | 0 | { |
619 | 0 | return php_url_encode_helper(s, len, true); |
620 | 0 | } |
621 | | /* }}} */ |
622 | | |
623 | | PHPAPI void php_url_encode_to_smart_str(smart_str *buf, char const *s, size_t len, bool raw) |
624 | 0 | { |
625 | 0 | size_t start_length = smart_str_get_len(buf); |
626 | 0 | size_t extend = zend_safe_address_guarded(3, len, 0); |
627 | 0 | char *dest = smart_str_extend(buf, extend); |
628 | 0 | size_t length = php_url_encode_impl((unsigned char *) dest, s, len, raw); |
629 | 0 | ZSTR_LEN(buf->s) = start_length + length; |
630 | 0 | } |
631 | | |
632 | | /* {{{ URL-encodes string */ |
633 | | PHP_FUNCTION(rawurlencode) |
634 | 0 | { |
635 | 0 | zend_string *in_str; |
636 | |
|
637 | 0 | ZEND_PARSE_PARAMETERS_START(1, 1) |
638 | 0 | Z_PARAM_STR(in_str) |
639 | 0 | ZEND_PARSE_PARAMETERS_END(); |
640 | | |
641 | 0 | RETURN_NEW_STR(php_raw_url_encode(ZSTR_VAL(in_str), ZSTR_LEN(in_str))); |
642 | 0 | } |
643 | | /* }}} */ |
644 | | |
645 | | /* {{{ Decodes URL-encodes string */ |
646 | | PHP_FUNCTION(rawurldecode) |
647 | 0 | { |
648 | 0 | zend_string *in_str, *out_str; |
649 | |
|
650 | 0 | ZEND_PARSE_PARAMETERS_START(1, 1) |
651 | 0 | Z_PARAM_STR(in_str) |
652 | 0 | ZEND_PARSE_PARAMETERS_END(); |
653 | | |
654 | 0 | out_str = zend_string_alloc(ZSTR_LEN(in_str), false); |
655 | 0 | ZSTR_LEN(out_str) = php_raw_url_decode_ex(ZSTR_VAL(out_str), ZSTR_VAL(in_str), ZSTR_LEN(in_str)); |
656 | |
|
657 | 0 | RETURN_NEW_STR(out_str); |
658 | 0 | } |
659 | | /* }}} */ |
660 | | |
661 | | PHPAPI size_t php_raw_url_decode_ex(char *dest, const char *src, size_t src_len) |
662 | 0 | { |
663 | 0 | char *dest_start = dest; |
664 | 0 | const char *data = src; |
665 | |
|
666 | 0 | while (src_len--) { |
667 | 0 | if (*data == '%' && src_len >= 2 && isxdigit((int) *(data + 1)) |
668 | 0 | && isxdigit((int) *(data + 2))) { |
669 | 0 | *dest = (char) php_htoi(data + 1); |
670 | 0 | data += 2; |
671 | 0 | src_len -= 2; |
672 | 0 | } else { |
673 | 0 | *dest = *data; |
674 | 0 | } |
675 | 0 | data++; |
676 | 0 | dest++; |
677 | 0 | } |
678 | 0 | *dest = '\0'; |
679 | 0 | return dest - dest_start; |
680 | 0 | } |
681 | | |
682 | | /* {{{ php_raw_url_decode */ |
683 | | PHPAPI size_t php_raw_url_decode(char *str, size_t len) |
684 | 0 | { |
685 | 0 | return php_raw_url_decode_ex(str, str, len); |
686 | 0 | } |
687 | | /* }}} */ |
688 | | |
689 | | /* {{{ fetches all the headers sent by the server in response to a HTTP request */ |
690 | | PHP_FUNCTION(get_headers) |
691 | 0 | { |
692 | 0 | char *url; |
693 | 0 | size_t url_len; |
694 | 0 | php_stream *stream; |
695 | 0 | zval *prev_val, *hdr = NULL; |
696 | 0 | bool format = 0; |
697 | 0 | zval *zcontext = NULL; |
698 | 0 | php_stream_context *context; |
699 | |
|
700 | 0 | ZEND_PARSE_PARAMETERS_START(1, 3) |
701 | 0 | Z_PARAM_PATH(url, url_len) |
702 | 0 | Z_PARAM_OPTIONAL |
703 | 0 | Z_PARAM_BOOL(format) |
704 | 0 | Z_PARAM_RESOURCE_OR_NULL(zcontext) |
705 | 0 | ZEND_PARSE_PARAMETERS_END(); |
706 | | |
707 | 0 | context = php_stream_context_from_zval(zcontext, 0); |
708 | |
|
709 | 0 | if (!(stream = php_stream_open_wrapper_ex(url, "r", REPORT_ERRORS | STREAM_USE_URL | STREAM_ONLY_GET_HEADERS, NULL, context))) { |
710 | 0 | RETURN_FALSE; |
711 | 0 | } |
712 | | |
713 | 0 | if (Z_TYPE(stream->wrapperdata) != IS_ARRAY) { |
714 | 0 | php_stream_close(stream); |
715 | 0 | RETURN_FALSE; |
716 | 0 | } |
717 | | |
718 | 0 | array_init(return_value); |
719 | |
|
720 | 0 | ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(&stream->wrapperdata), hdr) { |
721 | 0 | if (Z_TYPE_P(hdr) != IS_STRING) { |
722 | 0 | continue; |
723 | 0 | } |
724 | 0 | if (!format) { |
725 | 0 | no_name_header: |
726 | 0 | add_next_index_str(return_value, zend_string_copy(Z_STR_P(hdr))); |
727 | 0 | } else { |
728 | 0 | char c; |
729 | 0 | char *s, *p; |
730 | |
|
731 | 0 | if ((p = strchr(Z_STRVAL_P(hdr), ':'))) { |
732 | 0 | c = *p; |
733 | 0 | *p = '\0'; |
734 | 0 | s = p + 1; |
735 | 0 | while (isspace((int)*(unsigned char *)s)) { |
736 | 0 | s++; |
737 | 0 | } |
738 | |
|
739 | 0 | if ((prev_val = zend_hash_str_find(Z_ARRVAL_P(return_value), Z_STRVAL_P(hdr), (p - Z_STRVAL_P(hdr)))) == NULL) { |
740 | 0 | add_assoc_stringl_ex(return_value, Z_STRVAL_P(hdr), (p - Z_STRVAL_P(hdr)), s, (Z_STRLEN_P(hdr) - (s - Z_STRVAL_P(hdr)))); |
741 | 0 | } else { /* some headers may occur more than once, therefore we need to remake the string into an array */ |
742 | 0 | convert_to_array(prev_val); |
743 | 0 | add_next_index_stringl(prev_val, s, (Z_STRLEN_P(hdr) - (s - Z_STRVAL_P(hdr)))); |
744 | 0 | } |
745 | |
|
746 | 0 | *p = c; |
747 | 0 | } else { |
748 | 0 | goto no_name_header; |
749 | 0 | } |
750 | 0 | } |
751 | 0 | } ZEND_HASH_FOREACH_END(); |
752 | | |
753 | 0 | php_stream_close(stream); |
754 | 0 | } |
755 | | /* }}} */ |