/src/php-src/ext/pcre/php_pcre.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | +----------------------------------------------------------------------+ |
3 | | | Copyright (c) The PHP Group | |
4 | | +----------------------------------------------------------------------+ |
5 | | | This source file is subject to version 3.01 of the PHP license, | |
6 | | | that is bundled with this package in the file LICENSE, and is | |
7 | | | available through the world-wide-web at the following url: | |
8 | | | https://www.php.net/license/3_01.txt | |
9 | | | If you did not receive a copy of the PHP license and are unable to | |
10 | | | obtain it through the world-wide-web, please send a note to | |
11 | | | license@php.net so we can mail you a copy immediately. | |
12 | | +----------------------------------------------------------------------+ |
13 | | | Author: Andrei Zmievski <andrei@php.net> | |
14 | | +----------------------------------------------------------------------+ |
15 | | */ |
16 | | |
17 | | #include "php.h" |
18 | | #include "php_ini.h" |
19 | | #include "php_pcre.h" |
20 | | #include "ext/standard/info.h" |
21 | | #include "ext/standard/basic_functions.h" |
22 | | #include "zend_smart_str.h" |
23 | | #include "SAPI.h" |
24 | | |
25 | 0 | #define PREG_PATTERN_ORDER 1 |
26 | 0 | #define PREG_SET_ORDER 2 |
27 | 184 | #define PREG_OFFSET_CAPTURE (1<<8) |
28 | 184 | #define PREG_UNMATCHED_AS_NULL (1<<9) |
29 | | |
30 | 0 | #define PREG_SPLIT_NO_EMPTY (1<<0) |
31 | 0 | #define PREG_SPLIT_DELIM_CAPTURE (1<<1) |
32 | 0 | #define PREG_SPLIT_OFFSET_CAPTURE (1<<2) |
33 | | |
34 | 0 | #define PREG_GREP_INVERT (1<<0) |
35 | | |
36 | | #define PREG_JIT (1<<3) |
37 | | |
38 | 1.16k | #define PCRE_CACHE_SIZE 4096 |
39 | | |
40 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
41 | | #define PHP_PCRE_JIT_SUPPORT 1 |
42 | | #else |
43 | | #define PHP_PCRE_JIT_SUPPORT 0 |
44 | | #endif |
45 | | |
46 | | char *php_pcre_version; |
47 | | |
48 | | #include "php_pcre_arginfo.h" |
49 | | |
50 | | struct _pcre_cache_entry { |
51 | | pcre2_code *re; |
52 | | /* Pointer is not NULL (during request) when there are named captures. |
53 | | * Length is equal to capture_count + 1 to account for capture group 0. |
54 | | * This table cache is only valid during request. |
55 | | * Trying to store this over multiple requests causes issues when the keys are exposed in user arrays |
56 | | * (see GH-17122 and GH-17132). */ |
57 | | zend_string **subpats_table; |
58 | | uint32_t preg_options; |
59 | | uint32_t name_count; |
60 | | uint32_t capture_count; |
61 | | uint32_t compile_options; |
62 | | uint32_t refcount; |
63 | | }; |
64 | | |
65 | | PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre) |
66 | | |
67 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
68 | | #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024) |
69 | | #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024) |
70 | | ZEND_TLS pcre2_jit_stack *jit_stack = NULL; |
71 | | #endif |
72 | | /* General context using (infallible) system allocator. */ |
73 | | ZEND_TLS pcre2_general_context *gctx = NULL; |
74 | | /* These two are global per thread for now. Though it is possible to use these |
75 | | per pattern. Either one can copy it and use in pce, or one does no global |
76 | | contexts at all, but creates for every pce. */ |
77 | | ZEND_TLS pcre2_compile_context *cctx = NULL; |
78 | | ZEND_TLS pcre2_match_context *mctx = NULL; |
79 | | ZEND_TLS pcre2_match_data *mdata = NULL; |
80 | | ZEND_TLS bool mdata_used = 0; |
81 | | ZEND_TLS uint8_t pcre2_init_ok = 0; |
82 | | #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT) |
83 | | static MUTEX_T pcre_mt = NULL; |
84 | | #define php_pcre_mutex_alloc() \ |
85 | | if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc(); |
86 | | #define php_pcre_mutex_free() \ |
87 | | if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; } |
88 | | #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt); |
89 | | #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt); |
90 | | #else |
91 | | #define php_pcre_mutex_alloc() |
92 | | #define php_pcre_mutex_free() |
93 | | #define php_pcre_mutex_lock() |
94 | | #define php_pcre_mutex_unlock() |
95 | | #endif |
96 | | |
97 | | ZEND_TLS HashTable char_tables; |
98 | | |
99 | | static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats); |
100 | | |
101 | | static void php_pcre_free_char_table(zval *data) |
102 | 0 | {/*{{{*/ |
103 | 0 | void *ptr = Z_PTR_P(data); |
104 | 0 | pefree(ptr, 1); |
105 | 0 | }/*}}}*/ |
106 | | |
107 | | static void pcre_handle_exec_error(int pcre_code) /* {{{ */ |
108 | 1.55k | { |
109 | 1.55k | int preg_code = 0; |
110 | | |
111 | 1.55k | switch (pcre_code) { |
112 | 15 | case PCRE2_ERROR_MATCHLIMIT: |
113 | 15 | preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR; |
114 | 15 | break; |
115 | | |
116 | 0 | case PCRE2_ERROR_RECURSIONLIMIT: |
117 | 0 | preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR; |
118 | 0 | break; |
119 | | |
120 | 0 | case PCRE2_ERROR_BADUTFOFFSET: |
121 | 0 | preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR; |
122 | 0 | break; |
123 | | |
124 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
125 | | case PCRE2_ERROR_JIT_STACKLIMIT: |
126 | | preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR; |
127 | | break; |
128 | | #endif |
129 | | |
130 | 1.54k | default: |
131 | 1.54k | if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) { |
132 | 140 | preg_code = PHP_PCRE_BAD_UTF8_ERROR; |
133 | 1.40k | } else { |
134 | 1.40k | preg_code = PHP_PCRE_INTERNAL_ERROR; |
135 | 1.40k | } |
136 | 1.54k | break; |
137 | 1.55k | } |
138 | | |
139 | 1.55k | PCRE_G(error_code) = preg_code; |
140 | 1.55k | } |
141 | | /* }}} */ |
142 | | |
143 | | static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */ |
144 | 0 | { |
145 | 0 | switch (error_code) { |
146 | 0 | case PHP_PCRE_NO_ERROR: |
147 | 0 | return "No error"; |
148 | 0 | case PHP_PCRE_INTERNAL_ERROR: |
149 | 0 | return "Internal error"; |
150 | 0 | case PHP_PCRE_BAD_UTF8_ERROR: |
151 | 0 | return "Malformed UTF-8 characters, possibly incorrectly encoded"; |
152 | 0 | case PHP_PCRE_BAD_UTF8_OFFSET_ERROR: |
153 | 0 | return "The offset did not correspond to the beginning of a valid UTF-8 code point"; |
154 | 0 | case PHP_PCRE_BACKTRACK_LIMIT_ERROR: |
155 | 0 | return "Backtrack limit exhausted"; |
156 | 0 | case PHP_PCRE_RECURSION_LIMIT_ERROR: |
157 | 0 | return "Recursion limit exhausted"; |
158 | | |
159 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
160 | | case PHP_PCRE_JIT_STACKLIMIT_ERROR: |
161 | | return "JIT stack limit exhausted"; |
162 | | #endif |
163 | | |
164 | 0 | default: |
165 | 0 | return "Unknown error"; |
166 | 0 | } |
167 | 0 | } |
168 | | /* }}} */ |
169 | | |
170 | | static void php_free_pcre_cache(zval *data) /* {{{ */ |
171 | 0 | { |
172 | 0 | pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data); |
173 | 0 | if (!pce) return; |
174 | 0 | if (pce->subpats_table) { |
175 | 0 | free_subpats_table(pce->subpats_table, pce->capture_count + 1); |
176 | 0 | } |
177 | 0 | pcre2_code_free(pce->re); |
178 | 0 | free(pce); |
179 | 0 | } |
180 | | /* }}} */ |
181 | | |
182 | | static void *php_pcre_malloc(PCRE2_SIZE size, void *data) |
183 | 3.27k | { |
184 | 3.27k | return pemalloc(size, 1); |
185 | 3.27k | } |
186 | | |
187 | | static void php_pcre_free(void *block, void *data) |
188 | 2.04k | { |
189 | 2.04k | pefree(block, 1); |
190 | 2.04k | } |
191 | | |
192 | | static void *php_pcre_emalloc(PCRE2_SIZE size, void *data) |
193 | 300k | { |
194 | 300k | return emalloc(size); |
195 | 300k | } |
196 | | |
197 | | static void php_pcre_efree(void *block, void *data) |
198 | 300k | { |
199 | 300k | efree(block); |
200 | 300k | } |
201 | | |
202 | 3.89k | #define PHP_PCRE_PREALLOC_MDATA_SIZE 32 |
203 | | |
204 | | static void php_pcre_init_pcre2(uint8_t jit) |
205 | 16 | {/*{{{*/ |
206 | 16 | if (!gctx) { |
207 | 16 | gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL); |
208 | 16 | if (!gctx) { |
209 | 0 | pcre2_init_ok = 0; |
210 | 0 | return; |
211 | 0 | } |
212 | 16 | } |
213 | | |
214 | 16 | if (!cctx) { |
215 | 16 | cctx = pcre2_compile_context_create(gctx); |
216 | 16 | if (!cctx) { |
217 | 0 | pcre2_init_ok = 0; |
218 | 0 | return; |
219 | 0 | } |
220 | 16 | } |
221 | | |
222 | 16 | if (!mctx) { |
223 | 16 | mctx = pcre2_match_context_create(gctx); |
224 | 16 | if (!mctx) { |
225 | 0 | pcre2_init_ok = 0; |
226 | 0 | return; |
227 | 0 | } |
228 | 16 | } |
229 | | |
230 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
231 | | if (jit && !jit_stack) { |
232 | | jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx); |
233 | | if (!jit_stack) { |
234 | | pcre2_init_ok = 0; |
235 | | return; |
236 | | } |
237 | | } |
238 | | #endif |
239 | | |
240 | 16 | if (!mdata) { |
241 | 16 | mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx); |
242 | 16 | if (!mdata) { |
243 | 0 | pcre2_init_ok = 0; |
244 | 0 | return; |
245 | 0 | } |
246 | 16 | } |
247 | | |
248 | 16 | pcre2_init_ok = 1; |
249 | 16 | }/*}}}*/ |
250 | | |
251 | | static void php_pcre_shutdown_pcre2(void) |
252 | 0 | {/*{{{*/ |
253 | 0 | if (gctx) { |
254 | 0 | pcre2_general_context_free(gctx); |
255 | 0 | gctx = NULL; |
256 | 0 | } |
257 | |
|
258 | 0 | if (cctx) { |
259 | 0 | pcre2_compile_context_free(cctx); |
260 | 0 | cctx = NULL; |
261 | 0 | } |
262 | |
|
263 | 0 | if (mctx) { |
264 | 0 | pcre2_match_context_free(mctx); |
265 | 0 | mctx = NULL; |
266 | 0 | } |
267 | |
|
268 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
269 | | /* Stack may only be destroyed when no cached patterns |
270 | | possibly associated with it do exist. */ |
271 | | if (jit_stack) { |
272 | | pcre2_jit_stack_free(jit_stack); |
273 | | jit_stack = NULL; |
274 | | } |
275 | | #endif |
276 | |
|
277 | 0 | if (mdata) { |
278 | 0 | pcre2_match_data_free(mdata); |
279 | 0 | mdata = NULL; |
280 | 0 | } |
281 | |
|
282 | 0 | pcre2_init_ok = 0; |
283 | 0 | }/*}}}*/ |
284 | | |
285 | | static PHP_GINIT_FUNCTION(pcre) /* {{{ */ |
286 | 16 | { |
287 | 16 | php_pcre_mutex_alloc(); |
288 | | |
289 | 16 | zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1); |
290 | | |
291 | 16 | pcre_globals->backtrack_limit = 0; |
292 | 16 | pcre_globals->recursion_limit = 0; |
293 | 16 | pcre_globals->error_code = PHP_PCRE_NO_ERROR; |
294 | 16 | ZVAL_UNDEF(&pcre_globals->unmatched_null_pair); |
295 | 16 | ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair); |
296 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
297 | | pcre_globals->jit = 1; |
298 | | #endif |
299 | | |
300 | 16 | php_pcre_init_pcre2(1); |
301 | 16 | zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1); |
302 | 16 | } |
303 | | /* }}} */ |
304 | | |
305 | | static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */ |
306 | 0 | { |
307 | 0 | zend_hash_destroy(&pcre_globals->pcre_cache); |
308 | |
|
309 | 0 | php_pcre_shutdown_pcre2(); |
310 | 0 | zend_hash_destroy(&char_tables); |
311 | 0 | php_pcre_mutex_free(); |
312 | 0 | } |
313 | | /* }}} */ |
314 | | |
315 | | static PHP_INI_MH(OnUpdateBacktrackLimit) |
316 | 16 | {/*{{{*/ |
317 | 16 | OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage); |
318 | 16 | if (mctx) { |
319 | 16 | pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit)); |
320 | 16 | } |
321 | | |
322 | 16 | return SUCCESS; |
323 | 16 | }/*}}}*/ |
324 | | |
325 | | static PHP_INI_MH(OnUpdateRecursionLimit) |
326 | 16 | {/*{{{*/ |
327 | 16 | OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage); |
328 | 16 | if (mctx) { |
329 | 16 | pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit)); |
330 | 16 | } |
331 | | |
332 | 16 | return SUCCESS; |
333 | 16 | }/*}}}*/ |
334 | | |
335 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
336 | | static PHP_INI_MH(OnUpdateJit) |
337 | | {/*{{{*/ |
338 | | OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage); |
339 | | if (PCRE_G(jit) && jit_stack) { |
340 | | pcre2_jit_stack_assign(mctx, NULL, jit_stack); |
341 | | } else { |
342 | | pcre2_jit_stack_assign(mctx, NULL, NULL); |
343 | | } |
344 | | |
345 | | return SUCCESS; |
346 | | }/*}}}*/ |
347 | | #endif |
348 | | |
349 | | PHP_INI_BEGIN() |
350 | | STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals) |
351 | | STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals) |
352 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
353 | | STD_PHP_INI_BOOLEAN("pcre.jit", "1", PHP_INI_ALL, OnUpdateJit, jit, zend_pcre_globals, pcre_globals) |
354 | | #endif |
355 | | PHP_INI_END() |
356 | | |
357 | | static char *_pcre2_config_str(uint32_t what) |
358 | 26 | {/*{{{*/ |
359 | 26 | int len = pcre2_config(what, NULL); |
360 | 26 | char *ret = (char *) malloc(len + 1); |
361 | | |
362 | 26 | len = pcre2_config(what, ret); |
363 | 26 | if (!len) { |
364 | 0 | free(ret); |
365 | 0 | return NULL; |
366 | 0 | } |
367 | | |
368 | 26 | return ret; |
369 | 26 | }/*}}}*/ |
370 | | |
371 | | /* {{{ PHP_MINFO_FUNCTION(pcre) */ |
372 | | static PHP_MINFO_FUNCTION(pcre) |
373 | 5 | { |
374 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
375 | | uint32_t flag = 0; |
376 | | char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET); |
377 | | #endif |
378 | 5 | char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION); |
379 | 5 | char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION); |
380 | | |
381 | 5 | php_info_print_table_start(); |
382 | 5 | php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" ); |
383 | 5 | php_info_print_table_row(2, "PCRE Library Version", version); |
384 | 5 | free(version); |
385 | 5 | php_info_print_table_row(2, "PCRE Unicode Version", unicode); |
386 | 5 | free(unicode); |
387 | | |
388 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
389 | | if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) { |
390 | | php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled"); |
391 | | } else { |
392 | | php_info_print_table_row(2, "PCRE JIT Support", "unknown" ); |
393 | | } |
394 | | if (jit_target) { |
395 | | php_info_print_table_row(2, "PCRE JIT Target", jit_target); |
396 | | } |
397 | | free(jit_target); |
398 | | #else |
399 | 5 | php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" ); |
400 | 5 | #endif |
401 | | |
402 | | #ifdef HAVE_PCRE_VALGRIND_SUPPORT |
403 | | php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" ); |
404 | | #endif |
405 | | |
406 | 5 | php_info_print_table_end(); |
407 | | |
408 | 5 | DISPLAY_INI_ENTRIES(); |
409 | 5 | } |
410 | | /* }}} */ |
411 | | |
412 | | /* {{{ PHP_MINIT_FUNCTION(pcre) */ |
413 | | static PHP_MINIT_FUNCTION(pcre) |
414 | 16 | { |
415 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
416 | | if (UNEXPECTED(!pcre2_init_ok)) { |
417 | | /* Retry. */ |
418 | | php_pcre_init_pcre2(PCRE_G(jit)); |
419 | | if (!pcre2_init_ok) { |
420 | | return FAILURE; |
421 | | } |
422 | | } |
423 | | #endif |
424 | | |
425 | 16 | REGISTER_INI_ENTRIES(); |
426 | | |
427 | 16 | php_pcre_version = _pcre2_config_str(PCRE2_CONFIG_VERSION); |
428 | | |
429 | 16 | register_php_pcre_symbols(module_number); |
430 | | |
431 | 16 | return SUCCESS; |
432 | 16 | } |
433 | | /* }}} */ |
434 | | |
435 | | /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */ |
436 | | static PHP_MSHUTDOWN_FUNCTION(pcre) |
437 | 0 | { |
438 | 0 | UNREGISTER_INI_ENTRIES(); |
439 | |
|
440 | 0 | free(php_pcre_version); |
441 | |
|
442 | 0 | return SUCCESS; |
443 | 0 | } |
444 | | /* }}} */ |
445 | | |
446 | | /* {{{ PHP_RINIT_FUNCTION(pcre) */ |
447 | | static PHP_RINIT_FUNCTION(pcre) |
448 | 300k | { |
449 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
450 | | if (UNEXPECTED(!pcre2_init_ok)) { |
451 | | /* Retry. */ |
452 | | php_pcre_mutex_lock(); |
453 | | php_pcre_init_pcre2(PCRE_G(jit)); |
454 | | if (!pcre2_init_ok) { |
455 | | php_pcre_mutex_unlock(); |
456 | | return FAILURE; |
457 | | } |
458 | | php_pcre_mutex_unlock(); |
459 | | } |
460 | | |
461 | | mdata_used = 0; |
462 | | #endif |
463 | | |
464 | 300k | PCRE_G(error_code) = PHP_PCRE_NO_ERROR; |
465 | 300k | PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL); |
466 | 300k | if (!PCRE_G(gctx_zmm)) { |
467 | 0 | return FAILURE; |
468 | 0 | } |
469 | | |
470 | 300k | return SUCCESS; |
471 | 300k | } |
472 | | /* }}} */ |
473 | | |
474 | | static PHP_RSHUTDOWN_FUNCTION(pcre) |
475 | 300k | { |
476 | 300k | pcre_cache_entry *pce; |
477 | 57.8M | ZEND_HASH_MAP_FOREACH_PTR(&PCRE_G(pcre_cache), pce) { |
478 | 57.8M | if (pce->subpats_table) { |
479 | 0 | free_subpats_table(pce->subpats_table, pce->capture_count + 1); |
480 | 0 | pce->subpats_table = NULL; |
481 | 0 | } |
482 | 57.8M | } ZEND_HASH_FOREACH_END(); |
483 | | |
484 | 300k | pcre2_general_context_free(PCRE_G(gctx_zmm)); |
485 | 300k | PCRE_G(gctx_zmm) = NULL; |
486 | | |
487 | 300k | zval_ptr_dtor(&PCRE_G(unmatched_null_pair)); |
488 | 300k | zval_ptr_dtor(&PCRE_G(unmatched_empty_pair)); |
489 | 300k | ZVAL_UNDEF(&PCRE_G(unmatched_null_pair)); |
490 | 300k | ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair)); |
491 | 300k | return SUCCESS; |
492 | 300k | } |
493 | | |
494 | | /* {{{ static pcre_clean_cache */ |
495 | | static int pcre_clean_cache(zval *data, void *arg) |
496 | 0 | { |
497 | 0 | pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data); |
498 | 0 | int *num_clean = (int *)arg; |
499 | |
|
500 | 0 | if (!pce->refcount) { |
501 | 0 | if (--(*num_clean) == 0) { |
502 | 0 | return ZEND_HASH_APPLY_REMOVE|ZEND_HASH_APPLY_STOP; |
503 | 0 | } |
504 | 0 | return ZEND_HASH_APPLY_REMOVE; |
505 | 0 | } else { |
506 | 0 | return ZEND_HASH_APPLY_KEEP; |
507 | 0 | } |
508 | 0 | } |
509 | | /* }}} */ |
510 | | |
511 | 0 | static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) { |
512 | 0 | uint32_t i; |
513 | 0 | for (i = 0; i < num_subpats; i++) { |
514 | 0 | if (subpat_names[i]) { |
515 | 0 | zend_string_release_ex(subpat_names[i], false); |
516 | 0 | } |
517 | 0 | } |
518 | 0 | efree(subpat_names); |
519 | 0 | } |
520 | | |
521 | | /* {{{ static make_subpats_table */ |
522 | | static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce) |
523 | 0 | { |
524 | 0 | uint32_t num_subpats = pce->capture_count + 1; |
525 | 0 | uint32_t name_size, ni = 0; |
526 | 0 | char *name_table; |
527 | 0 | zend_string **subpat_names; |
528 | 0 | int rc1, rc2; |
529 | |
|
530 | 0 | rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table); |
531 | 0 | rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size); |
532 | 0 | if (rc1 < 0 || rc2 < 0) { |
533 | 0 | php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2); |
534 | 0 | return NULL; |
535 | 0 | } |
536 | | |
537 | 0 | subpat_names = ecalloc(num_subpats, sizeof(zend_string *)); |
538 | 0 | while (ni++ < name_cnt) { |
539 | 0 | unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1]; |
540 | 0 | const char *name = name_table + 2; |
541 | 0 | subpat_names[name_idx] = zend_string_init(name, strlen(name), false); |
542 | 0 | name_table += name_size; |
543 | 0 | } |
544 | 0 | return subpat_names; |
545 | 0 | } |
546 | | /* }}} */ |
547 | | |
548 | | static zend_string **ensure_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce) |
549 | 0 | { |
550 | 0 | if (!pce->subpats_table) { |
551 | 0 | pce->subpats_table = make_subpats_table(name_cnt, pce); |
552 | 0 | } |
553 | 0 | return pce->subpats_table; |
554 | 0 | } |
555 | | |
556 | | /* {{{ static calculate_unit_length */ |
557 | | /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */ |
558 | | static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start) |
559 | 81 | { |
560 | 81 | size_t unit_len; |
561 | | |
562 | 81 | if (pce->compile_options & PCRE2_UTF) { |
563 | 18 | const char *end = start; |
564 | | |
565 | | /* skip continuation bytes */ |
566 | 18 | while ((*++end & 0xC0) == 0x80); |
567 | 18 | unit_len = end - start; |
568 | 63 | } else { |
569 | 63 | unit_len = 1; |
570 | 63 | } |
571 | 81 | return unit_len; |
572 | 81 | } |
573 | | /* }}} */ |
574 | | |
575 | | /* {{{ pcre_get_compiled_regex_cache */ |
576 | | PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bool locale_aware) |
577 | 5.36k | { |
578 | 5.36k | pcre2_code *re = NULL; |
579 | | #if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !defined(HAVE_BUNDLED_PCRE) |
580 | | uint32_t coptions = PCRE2_NO_START_OPTIMIZE; |
581 | | #else |
582 | 5.36k | uint32_t coptions = 0; |
583 | 5.36k | #endif |
584 | 5.36k | uint32_t eoptions = 0; |
585 | 5.36k | PCRE2_UCHAR error[128]; |
586 | 5.36k | PCRE2_SIZE erroffset; |
587 | 5.36k | int errnumber; |
588 | 5.36k | char delimiter; |
589 | 5.36k | char start_delimiter; |
590 | 5.36k | char end_delimiter; |
591 | 5.36k | char *p, *pp; |
592 | 5.36k | char *pattern; |
593 | 5.36k | size_t pattern_len; |
594 | 5.36k | uint32_t poptions = 0; |
595 | 5.36k | const uint8_t *tables = NULL; |
596 | 5.36k | zval *zv; |
597 | 5.36k | pcre_cache_entry new_entry; |
598 | 5.36k | int rc; |
599 | 5.36k | zend_string *key; |
600 | 5.36k | pcre_cache_entry *ret; |
601 | | |
602 | 5.36k | if (locale_aware && BG(ctype_string)) { |
603 | 0 | key = zend_string_concat2( |
604 | 0 | ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)), |
605 | 0 | ZSTR_VAL(regex), ZSTR_LEN(regex)); |
606 | 5.36k | } else { |
607 | 5.36k | key = regex; |
608 | 5.36k | } |
609 | | |
610 | | /* Try to lookup the cached regex entry, and if successful, just pass |
611 | | back the compiled pattern, otherwise go on and compile it. */ |
612 | 5.36k | zv = zend_hash_find(&PCRE_G(pcre_cache), key); |
613 | 5.36k | if (zv) { |
614 | 2.83k | if (key != regex) { |
615 | 0 | zend_string_release_ex(key, 0); |
616 | 0 | } |
617 | 2.83k | return (pcre_cache_entry*)Z_PTR_P(zv); |
618 | 2.83k | } |
619 | | |
620 | 2.53k | p = ZSTR_VAL(regex); |
621 | 2.53k | const char* end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex); |
622 | | |
623 | | /* Parse through the leading whitespace, and display a warning if we |
624 | | get to the end without encountering a delimiter. */ |
625 | 2.53k | while (isspace((int)*(unsigned char *)p)) p++; |
626 | 2.53k | if (p >= end_p) { |
627 | 0 | if (key != regex) { |
628 | 0 | zend_string_release_ex(key, 0); |
629 | 0 | } |
630 | 0 | php_error_docref(NULL, E_WARNING, "Empty regular expression"); |
631 | 0 | pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); |
632 | 0 | return NULL; |
633 | 0 | } |
634 | | |
635 | | /* Get the delimiter and display a warning if it is alphanumeric |
636 | | or a backslash. */ |
637 | 2.53k | delimiter = *p++; |
638 | 2.53k | if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\' || delimiter == '\0') { |
639 | 15 | if (key != regex) { |
640 | 0 | zend_string_release_ex(key, 0); |
641 | 0 | } |
642 | 15 | php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash, or NUL byte"); |
643 | 15 | pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); |
644 | 15 | return NULL; |
645 | 15 | } |
646 | | |
647 | 2.52k | start_delimiter = delimiter; |
648 | 2.52k | if ((pp = strchr("([{< )]}> )]}>", delimiter))) |
649 | 59 | delimiter = pp[5]; |
650 | 2.52k | end_delimiter = delimiter; |
651 | | |
652 | 2.52k | pp = p; |
653 | | |
654 | 2.52k | if (start_delimiter == end_delimiter) { |
655 | | /* We need to iterate through the pattern, searching for the ending delimiter, |
656 | | but skipping the backslashed delimiters. If the ending delimiter is not |
657 | | found, display a warning. */ |
658 | 1.46M | while (pp < end_p) { |
659 | 1.46M | if (*pp == '\\' && pp + 1 < end_p) pp++; |
660 | 1.40M | else if (*pp == delimiter) |
661 | 2.44k | break; |
662 | 1.46M | pp++; |
663 | 1.46M | } |
664 | 2.46k | } else { |
665 | | /* We iterate through the pattern, searching for the matching ending |
666 | | * delimiter. For each matching starting delimiter, we increment nesting |
667 | | * level, and decrement it for each matching ending delimiter. If we |
668 | | * reach the end of the pattern without matching, display a warning. |
669 | | */ |
670 | 53 | int brackets = 1; /* brackets nesting level */ |
671 | 23.7k | while (pp < end_p) { |
672 | 23.6k | if (*pp == '\\' && pp + 1 < end_p) pp++; |
673 | 23.2k | else if (*pp == end_delimiter && --brackets <= 0) |
674 | 2 | break; |
675 | 23.2k | else if (*pp == start_delimiter) |
676 | 1.28k | brackets++; |
677 | 23.6k | pp++; |
678 | 23.6k | } |
679 | 53 | } |
680 | | |
681 | 2.52k | if (pp >= end_p) { |
682 | 76 | if (key != regex) { |
683 | 0 | zend_string_release_ex(key, 0); |
684 | 0 | } |
685 | 76 | if (start_delimiter == end_delimiter) { |
686 | 25 | php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter); |
687 | 51 | } else { |
688 | 51 | php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter); |
689 | 51 | } |
690 | 76 | pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); |
691 | 76 | return NULL; |
692 | 76 | } |
693 | | |
694 | | /* Make a copy of the actual pattern. */ |
695 | 2.44k | pattern_len = pp - p; |
696 | 2.44k | pattern = estrndup(p, pattern_len); |
697 | | |
698 | | /* Move on to the options */ |
699 | 2.44k | pp++; |
700 | | |
701 | | /* Parse through the options, setting appropriate flags. Display |
702 | | a warning if we encounter an unknown modifier. */ |
703 | 5.74k | while (pp < end_p) { |
704 | 3.35k | switch (*pp++) { |
705 | | /* Perl compatible options */ |
706 | 1.29k | case 'i': coptions |= PCRE2_CASELESS; break; |
707 | 103 | case 'm': coptions |= PCRE2_MULTILINE; break; |
708 | 10 | case 'n': coptions |= PCRE2_NO_AUTO_CAPTURE; break; |
709 | 156 | case 's': coptions |= PCRE2_DOTALL; break; |
710 | 0 | case 'x': coptions |= PCRE2_EXTENDED; break; |
711 | | |
712 | | /* PCRE specific options */ |
713 | 365 | case 'A': coptions |= PCRE2_ANCHORED; break; |
714 | 0 | case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break; |
715 | 0 | #ifdef PCRE2_EXTRA_CASELESS_RESTRICT |
716 | 1 | case 'r': eoptions |= PCRE2_EXTRA_CASELESS_RESTRICT; break; |
717 | 0 | #endif |
718 | 1 | case 'S': /* Pass. */ break; |
719 | 1 | case 'X': /* Pass. */ break; |
720 | 215 | case 'U': coptions |= PCRE2_UNGREEDY; break; |
721 | 872 | case 'u': coptions |= PCRE2_UTF; |
722 | | /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII |
723 | | characters, even in UTF-8 mode. However, this can be changed by setting |
724 | | the PCRE2_UCP option. */ |
725 | 872 | #ifdef PCRE2_UCP |
726 | 872 | coptions |= PCRE2_UCP; |
727 | 872 | #endif |
728 | 872 | break; |
729 | 0 | case 'J': coptions |= PCRE2_DUPNAMES; break; |
730 | | |
731 | 45 | case ' ': |
732 | 269 | case '\n': |
733 | 277 | case '\r': |
734 | 277 | break; |
735 | | |
736 | 3 | case 'e': /* legacy eval */ |
737 | 53 | default: |
738 | 53 | if (pp[-1]) { |
739 | 27 | php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]); |
740 | 27 | } else { |
741 | 26 | php_error_docref(NULL, E_WARNING, "NUL byte is not a valid modifier"); |
742 | 26 | } |
743 | 53 | pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); |
744 | 53 | efree(pattern); |
745 | 53 | if (key != regex) { |
746 | 0 | zend_string_release_ex(key, 0); |
747 | 0 | } |
748 | 53 | return NULL; |
749 | 3.35k | } |
750 | 3.35k | } |
751 | | |
752 | 2.39k | if (key != regex) { |
753 | 0 | tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(ctype_string)); |
754 | 0 | if (!tables) { |
755 | 0 | zend_string *_k; |
756 | 0 | tables = pcre2_maketables(gctx); |
757 | 0 | if (UNEXPECTED(!tables)) { |
758 | 0 | php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables"); |
759 | 0 | pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY); |
760 | 0 | zend_string_release_ex(key, 0); |
761 | 0 | efree(pattern); |
762 | 0 | return NULL; |
763 | 0 | } |
764 | 0 | _k = zend_string_init(ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)), 1); |
765 | 0 | GC_MAKE_PERSISTENT_LOCAL(_k); |
766 | 0 | zend_hash_add_ptr(&char_tables, _k, (void *)tables); |
767 | 0 | zend_string_release(_k); |
768 | 0 | } |
769 | 0 | } |
770 | 2.39k | pcre2_set_character_tables(cctx, tables); |
771 | | |
772 | 2.39k | pcre2_set_compile_extra_options(cctx, eoptions); |
773 | | |
774 | | /* Compile pattern and display a warning if compilation failed. */ |
775 | 2.39k | re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx); |
776 | | |
777 | 2.39k | if (re == NULL) { |
778 | 1.22k | if (key != regex) { |
779 | 0 | zend_string_release_ex(key, 0); |
780 | 0 | } |
781 | 1.22k | pcre2_get_error_message(errnumber, error, sizeof(error)); |
782 | 1.22k | php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset); |
783 | 1.22k | pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); |
784 | 1.22k | efree(pattern); |
785 | 1.22k | return NULL; |
786 | 1.22k | } |
787 | | |
788 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
789 | | if (PCRE_G(jit)) { |
790 | | /* Enable PCRE JIT compiler */ |
791 | | rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE); |
792 | | if (EXPECTED(rc >= 0)) { |
793 | | size_t jit_size = 0; |
794 | | if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) { |
795 | | poptions |= PREG_JIT; |
796 | | } |
797 | | } else if (rc == PCRE2_ERROR_NOMEMORY) { |
798 | | php_error_docref(NULL, E_WARNING, |
799 | | "Allocation of JIT memory failed, PCRE JIT will be disabled. " |
800 | | "This is likely caused by security restrictions. " |
801 | | "Either grant PHP permission to allocate executable memory, or set pcre.jit=0"); |
802 | | PCRE_G(jit) = 0; |
803 | | } else { |
804 | | pcre2_get_error_message(rc, error, sizeof(error)); |
805 | | php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error); |
806 | | pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); |
807 | | } |
808 | | } |
809 | | #endif |
810 | 1.16k | efree(pattern); |
811 | | |
812 | | /* |
813 | | * If we reached cache limit, clean out the items from the head of the list; |
814 | | * these are supposedly the oldest ones (but not necessarily the least used |
815 | | * ones). |
816 | | */ |
817 | 1.16k | if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) { |
818 | 0 | int num_clean = PCRE_CACHE_SIZE / 8; |
819 | 0 | zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean); |
820 | 0 | } |
821 | | |
822 | | /* Store the compiled pattern and extra info in the cache. */ |
823 | 1.16k | new_entry.re = re; |
824 | 1.16k | new_entry.preg_options = poptions; |
825 | 1.16k | new_entry.compile_options = coptions; |
826 | 1.16k | new_entry.refcount = 0; |
827 | 1.16k | new_entry.subpats_table = NULL; |
828 | | |
829 | 1.16k | rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count); |
830 | 1.16k | if (rc < 0) { |
831 | 0 | if (key != regex) { |
832 | 0 | zend_string_release_ex(key, 0); |
833 | 0 | } |
834 | 0 | php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc); |
835 | 0 | pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); |
836 | 0 | return NULL; |
837 | 0 | } |
838 | | |
839 | 1.16k | rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count); |
840 | 1.16k | if (rc < 0) { |
841 | 0 | if (key != regex) { |
842 | 0 | zend_string_release_ex(key, 0); |
843 | 0 | } |
844 | 0 | php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc); |
845 | 0 | pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); |
846 | 0 | return NULL; |
847 | 0 | } |
848 | | |
849 | | /* |
850 | | * Interned strings are not duplicated when stored in HashTable, |
851 | | * but all the interned strings created during HTTP request are removed |
852 | | * at end of request. However PCRE_G(pcre_cache) must be consistent |
853 | | * on the next request as well. So we disable usage of interned strings |
854 | | * as hash keys especually for this table. |
855 | | * See bug #63180 |
856 | | */ |
857 | 1.16k | if (!(GC_FLAGS(key) & IS_STR_PERMANENT)) { |
858 | 309 | zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1); |
859 | 309 | GC_MAKE_PERSISTENT_LOCAL(str); |
860 | | |
861 | 309 | ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry)); |
862 | 309 | zend_string_release(str); |
863 | 857 | } else { |
864 | 857 | ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry)); |
865 | 857 | } |
866 | | |
867 | 1.16k | if (key != regex) { |
868 | 0 | zend_string_release_ex(key, 0); |
869 | 0 | } |
870 | | |
871 | 1.16k | return ret; |
872 | 1.16k | } |
873 | | /* }}} */ |
874 | | |
875 | | /* {{{ pcre_get_compiled_regex_cache */ |
876 | | PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex) |
877 | 5.36k | { |
878 | 5.36k | return pcre_get_compiled_regex_cache_ex(regex, true); |
879 | 5.36k | } |
880 | | /* }}} */ |
881 | | |
882 | | /* {{{ pcre_get_compiled_regex */ |
883 | | PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count) |
884 | 0 | { |
885 | 0 | pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex); |
886 | |
|
887 | 0 | if (capture_count) { |
888 | 0 | *capture_count = pce ? pce->capture_count : 0; |
889 | 0 | } |
890 | |
|
891 | 0 | return pce ? pce->re : NULL; |
892 | 0 | } |
893 | | /* }}} */ |
894 | | |
895 | | /* XXX For the cases where it's only about match yes/no and no capture |
896 | | required, perhaps just a minimum sized data would suffice. */ |
897 | | PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re) |
898 | 0 | {/*{{{*/ |
899 | |
|
900 | 0 | assert(NULL != re); |
901 | | |
902 | 0 | if (EXPECTED(!mdata_used)) { |
903 | 0 | int rc = 0; |
904 | |
|
905 | 0 | if (!capture_count) { |
906 | | /* As we deal with a non cached pattern, no other way to gather this info. */ |
907 | 0 | rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count); |
908 | 0 | } |
909 | |
|
910 | 0 | if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) { |
911 | 0 | mdata_used = 1; |
912 | 0 | return mdata; |
913 | 0 | } |
914 | 0 | } |
915 | | |
916 | 0 | return pcre2_match_data_create_from_pattern(re, gctx); |
917 | 0 | }/*}}}*/ |
918 | | |
919 | | PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data) |
920 | 0 | {/*{{{*/ |
921 | 0 | if (UNEXPECTED(match_data != mdata)) { |
922 | 0 | pcre2_match_data_free(match_data); |
923 | 0 | } else { |
924 | 0 | mdata_used = 0; |
925 | 0 | } |
926 | 0 | }/*}}}*/ |
927 | | |
928 | 0 | static void init_unmatched_null_pair(zval *pair) { |
929 | 0 | zval val1, val2; |
930 | 0 | ZVAL_NULL(&val1); |
931 | 0 | ZVAL_LONG(&val2, -1); |
932 | 0 | ZVAL_ARR(pair, zend_new_pair(&val1, &val2)); |
933 | 0 | } |
934 | | |
935 | 0 | static void init_unmatched_empty_pair(zval *pair) { |
936 | 0 | zval val1, val2; |
937 | 0 | ZVAL_EMPTY_STRING(&val1); |
938 | 0 | ZVAL_LONG(&val2, -1); |
939 | 0 | ZVAL_ARR(pair, zend_new_pair(&val1, &val2)); |
940 | 0 | } |
941 | | |
942 | | static zend_always_inline void populate_match_value_str( |
943 | 355 | zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) { |
944 | 355 | ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset); |
945 | 355 | } |
946 | | |
947 | | static zend_always_inline void populate_match_value( |
948 | | zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, |
949 | 355 | bool unmatched_as_null) { |
950 | 355 | if (PCRE2_UNSET == start_offset) { |
951 | 0 | if (unmatched_as_null) { |
952 | 0 | ZVAL_NULL(val); |
953 | 0 | } else { |
954 | 0 | ZVAL_EMPTY_STRING(val); |
955 | 0 | } |
956 | 355 | } else { |
957 | 355 | populate_match_value_str(val, subject, start_offset, end_offset); |
958 | 355 | } |
959 | 355 | } |
960 | | |
961 | | static inline void add_named( |
962 | 0 | HashTable *const subpats, zend_string *name, zval *val, bool unmatched) { |
963 | 0 | ZEND_ASSERT(!(GC_FLAGS(name) & IS_STR_PERSISTENT)); |
964 | | |
965 | | /* If the DUPNAMES option is used, multiple subpatterns might have the same name. |
966 | | * In this case we want to preserve the one that actually has a value. */ |
967 | 0 | if (!unmatched) { |
968 | 0 | zend_hash_update(subpats, name, val); |
969 | 0 | } else { |
970 | 0 | if (!zend_hash_add(subpats, name, val)) { |
971 | 0 | return; |
972 | 0 | } |
973 | 0 | } |
974 | 0 | Z_TRY_ADDREF_P(val); |
975 | 0 | } |
976 | | |
977 | | /* {{{ add_offset_pair */ |
978 | | static inline void add_offset_pair( |
979 | | HashTable *const result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, |
980 | | zend_string *name, zend_long unmatched_as_null) |
981 | 0 | { |
982 | 0 | zval match_pair; |
983 | | |
984 | | /* Add (match, offset) to the return value */ |
985 | 0 | if (PCRE2_UNSET == start_offset) { |
986 | 0 | if (unmatched_as_null) { |
987 | 0 | do { |
988 | 0 | if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) { |
989 | 0 | if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) { |
990 | 0 | init_unmatched_null_pair(&match_pair); |
991 | 0 | break; |
992 | 0 | } else { |
993 | 0 | init_unmatched_null_pair(&PCRE_G(unmatched_null_pair)); |
994 | 0 | } |
995 | 0 | } |
996 | 0 | ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair)); |
997 | 0 | } while (0); |
998 | 0 | } else { |
999 | 0 | do { |
1000 | 0 | if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) { |
1001 | 0 | if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) { |
1002 | 0 | init_unmatched_empty_pair(&match_pair); |
1003 | 0 | break; |
1004 | 0 | } else { |
1005 | 0 | init_unmatched_empty_pair(&PCRE_G(unmatched_empty_pair)); |
1006 | 0 | } |
1007 | 0 | } |
1008 | 0 | ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair)); |
1009 | 0 | } while (0); |
1010 | 0 | } |
1011 | 0 | } else { |
1012 | 0 | zval val1, val2; |
1013 | 0 | populate_match_value_str(&val1, subject, start_offset, end_offset); |
1014 | 0 | ZVAL_LONG(&val2, start_offset); |
1015 | 0 | ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2)); |
1016 | 0 | } |
1017 | | |
1018 | 0 | if (name) { |
1019 | 0 | add_named(result, name, &match_pair, start_offset == PCRE2_UNSET); |
1020 | 0 | } |
1021 | 0 | zend_hash_next_index_insert_new(result, &match_pair); |
1022 | 0 | } |
1023 | | /* }}} */ |
1024 | | |
1025 | | static void populate_subpat_array( |
1026 | | HashTable *subpats_ht, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, |
1027 | 184 | uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) { |
1028 | 184 | zend_long offset_capture = flags & PREG_OFFSET_CAPTURE; |
1029 | 184 | zend_long unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL; |
1030 | 184 | zval val; |
1031 | 184 | int i; |
1032 | 184 | if (subpat_names) { |
1033 | 0 | if (offset_capture) { |
1034 | 0 | for (i = 0; i < count; i++) { |
1035 | 0 | add_offset_pair( |
1036 | 0 | subpats_ht, subject, offsets[2*i], offsets[2*i+1], |
1037 | 0 | subpat_names[i], unmatched_as_null); |
1038 | 0 | } |
1039 | 0 | if (unmatched_as_null) { |
1040 | 0 | for (i = count; i < num_subpats; i++) { |
1041 | 0 | add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1); |
1042 | 0 | } |
1043 | 0 | } |
1044 | 0 | } else { |
1045 | 0 | for (i = 0; i < count; i++) { |
1046 | 0 | populate_match_value( |
1047 | 0 | &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null); |
1048 | 0 | if (subpat_names[i]) { |
1049 | 0 | add_named(subpats_ht, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET); |
1050 | 0 | } |
1051 | 0 | zend_hash_next_index_insert_new(subpats_ht, &val); |
1052 | 0 | } |
1053 | 0 | if (unmatched_as_null) { |
1054 | 0 | for (i = count; i < num_subpats; i++) { |
1055 | 0 | ZVAL_NULL(&val); |
1056 | 0 | if (subpat_names[i]) { |
1057 | 0 | zend_hash_add(subpats_ht, subpat_names[i], &val); |
1058 | 0 | } |
1059 | 0 | zend_hash_next_index_insert_new(subpats_ht, &val); |
1060 | 0 | } |
1061 | 0 | } |
1062 | 0 | } |
1063 | 184 | } else { |
1064 | 184 | if (offset_capture) { |
1065 | 0 | for (i = 0; i < count; i++) { |
1066 | 0 | add_offset_pair( |
1067 | 0 | subpats_ht, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null); |
1068 | 0 | } |
1069 | 0 | if (unmatched_as_null) { |
1070 | 0 | for (i = count; i < num_subpats; i++) { |
1071 | 0 | add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1); |
1072 | 0 | } |
1073 | 0 | } |
1074 | 184 | } else { |
1075 | 539 | for (i = 0; i < count; i++) { |
1076 | 355 | populate_match_value( |
1077 | 355 | &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null); |
1078 | 355 | zend_hash_next_index_insert_new(subpats_ht, &val); |
1079 | 355 | } |
1080 | 184 | if (unmatched_as_null) { |
1081 | 0 | ZVAL_NULL(&val); |
1082 | 0 | for (i = count; i < num_subpats; i++) { |
1083 | 0 | zend_hash_next_index_insert_new(subpats_ht, &val); |
1084 | 0 | } |
1085 | 0 | } |
1086 | 184 | } |
1087 | 184 | } |
1088 | | /* Add MARK, if available */ |
1089 | 184 | if (mark) { |
1090 | 0 | ZVAL_STRING(&val, (char *)mark); |
1091 | 0 | zend_hash_str_add_new(subpats_ht, ZEND_STRL("MARK"), &val); |
1092 | 0 | } |
1093 | 184 | } |
1094 | | |
1095 | | static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, bool global) /* {{{ */ |
1096 | 5.08k | { |
1097 | | /* parameters */ |
1098 | 5.08k | zend_string *regex; /* Regular expression */ |
1099 | 5.08k | zend_string *subject; /* String to match against */ |
1100 | 5.08k | pcre_cache_entry *pce; /* Compiled regular expression */ |
1101 | 5.08k | zval *subpats = NULL; /* Array for subpatterns */ |
1102 | 5.08k | zend_long flags = 0; /* Match control flags */ |
1103 | 5.08k | zend_long start_offset = 0; /* Where the new search starts */ |
1104 | | |
1105 | 15.2k | ZEND_PARSE_PARAMETERS_START(2, 5) |
1106 | 20.2k | Z_PARAM_STR(regex) |
1107 | 25.3k | Z_PARAM_STR(subject) |
1108 | 5.06k | Z_PARAM_OPTIONAL |
1109 | 10.1k | Z_PARAM_ZVAL(subpats) |
1110 | 10.1k | Z_PARAM_LONG(flags) |
1111 | 0 | Z_PARAM_LONG(start_offset) |
1112 | 5.08k | ZEND_PARSE_PARAMETERS_END(); |
1113 | | |
1114 | | /* Compile regex or get it from cache. */ |
1115 | 5.06k | if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) { |
1116 | 1.33k | RETURN_FALSE; |
1117 | 1.33k | } |
1118 | | |
1119 | 3.72k | if (start_offset == ZEND_LONG_MIN) { |
1120 | 0 | zend_argument_value_error(5, "must be greater than " ZEND_LONG_FMT, ZEND_LONG_MIN); |
1121 | 0 | RETURN_THROWS(); |
1122 | 0 | } |
1123 | | |
1124 | 3.72k | pce->refcount++; |
1125 | 3.72k | php_pcre_match_impl(pce, subject, return_value, subpats, |
1126 | 3.72k | global, flags, start_offset); |
1127 | 3.72k | pce->refcount--; |
1128 | 3.72k | } |
1129 | | /* }}} */ |
1130 | | |
1131 | | static zend_always_inline bool is_known_valid_utf8( |
1132 | 1.23k | zend_string *subject_str, PCRE2_SIZE start_offset) { |
1133 | 1.23k | if (!ZSTR_IS_VALID_UTF8(subject_str)) { |
1134 | | /* We don't know whether the string is valid UTF-8 or not. */ |
1135 | 1.23k | return 0; |
1136 | 1.23k | } |
1137 | | |
1138 | 0 | if (start_offset == ZSTR_LEN(subject_str)) { |
1139 | | /* Degenerate case: Offset points to end of string. */ |
1140 | 0 | return 1; |
1141 | 0 | } |
1142 | | |
1143 | | /* Check that the offset does not point to an UTF-8 continuation byte. */ |
1144 | 0 | return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80; |
1145 | 0 | } |
1146 | | |
1147 | | /* {{{ php_pcre_match_impl() */ |
1148 | | PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value, |
1149 | | zval *subpats, bool global, zend_long flags, zend_off_t start_offset) |
1150 | 3.72k | { |
1151 | 3.72k | zval result_set; /* Holds a set of subpatterns after |
1152 | | a global match */ |
1153 | 3.72k | HashTable **match_sets = NULL; /* An array of sets of matches for each |
1154 | | subpattern after a global match */ |
1155 | 3.72k | uint32_t options; /* Execution options */ |
1156 | 3.72k | int count; /* Count of matched subpatterns */ |
1157 | 3.72k | uint32_t num_subpats; /* Number of captured subpatterns */ |
1158 | 3.72k | int matched; /* Has anything matched */ |
1159 | 3.72k | zend_string **subpat_names; /* Array for named subpatterns */ |
1160 | 3.72k | size_t i; |
1161 | 3.72k | uint32_t subpats_order; /* Order of subpattern matches */ |
1162 | 3.72k | uint32_t offset_capture; /* Capture match offsets: yes/no */ |
1163 | 3.72k | zend_long unmatched_as_null; /* Null non-matches: yes/no */ |
1164 | 3.72k | PCRE2_SPTR mark = NULL; /* Target for MARK name */ |
1165 | 3.72k | HashTable *marks = NULL; /* Array of marks for PREG_PATTERN_ORDER */ |
1166 | 3.72k | pcre2_match_data *match_data; |
1167 | 3.72k | PCRE2_SIZE start_offset2, orig_start_offset; |
1168 | | |
1169 | 3.72k | char *subject = ZSTR_VAL(subject_str); |
1170 | 3.72k | size_t subject_len = ZSTR_LEN(subject_str); |
1171 | | |
1172 | | /* Overwrite the passed-in value for subpatterns with an empty array. */ |
1173 | 3.72k | if (subpats != NULL) { |
1174 | 0 | subpats = zend_try_array_init(subpats); |
1175 | 0 | if (!subpats) { |
1176 | 0 | RETURN_THROWS(); |
1177 | 0 | } |
1178 | 0 | } |
1179 | | |
1180 | 3.72k | subpats_order = global ? PREG_PATTERN_ORDER : 0; |
1181 | | |
1182 | 3.72k | if (flags) { |
1183 | 0 | offset_capture = flags & PREG_OFFSET_CAPTURE; |
1184 | 0 | unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL; |
1185 | | |
1186 | | /* |
1187 | | * subpats_order is pre-set to pattern mode so we change it only if |
1188 | | * necessary. |
1189 | | */ |
1190 | 0 | if (flags & 0xff) { |
1191 | 0 | subpats_order = flags & 0xff; |
1192 | 0 | if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) || |
1193 | 0 | (!global && subpats_order != 0)) { |
1194 | 0 | zend_argument_value_error(4, "must be a PREG_* constant"); |
1195 | 0 | RETURN_THROWS(); |
1196 | 0 | } |
1197 | 0 | } |
1198 | 3.72k | } else { |
1199 | 3.72k | offset_capture = 0; |
1200 | 3.72k | unmatched_as_null = 0; |
1201 | 3.72k | } |
1202 | | |
1203 | | /* Negative offset counts from the end of the string. */ |
1204 | 3.72k | if (start_offset < 0) { |
1205 | 0 | if ((PCRE2_SIZE)-start_offset <= subject_len) { |
1206 | 0 | start_offset2 = subject_len + start_offset; |
1207 | 0 | } else { |
1208 | 0 | start_offset2 = 0; |
1209 | 0 | } |
1210 | 3.72k | } else { |
1211 | 3.72k | start_offset2 = (PCRE2_SIZE)start_offset; |
1212 | 3.72k | } |
1213 | | |
1214 | 3.72k | if (start_offset2 > subject_len) { |
1215 | 0 | pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET); |
1216 | 0 | RETURN_FALSE; |
1217 | 0 | } |
1218 | | |
1219 | | /* Calculate the size of the offsets array, and allocate memory for it. */ |
1220 | 3.72k | num_subpats = pce->capture_count + 1; |
1221 | | |
1222 | | /* |
1223 | | * Build a mapping from subpattern numbers to their names. We will |
1224 | | * allocate the table only if there are any named subpatterns. |
1225 | | */ |
1226 | 3.72k | subpat_names = NULL; |
1227 | 3.72k | if (subpats && pce->name_count > 0) { |
1228 | 0 | subpat_names = ensure_subpats_table(pce->name_count, pce); |
1229 | 0 | if (UNEXPECTED(!subpat_names)) { |
1230 | 0 | RETURN_FALSE; |
1231 | 0 | } |
1232 | 0 | } |
1233 | | |
1234 | 3.72k | matched = 0; |
1235 | 3.72k | PCRE_G(error_code) = PHP_PCRE_NO_ERROR; |
1236 | | |
1237 | 3.72k | if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) { |
1238 | 3.68k | match_data = mdata; |
1239 | 3.68k | } else { |
1240 | 37 | match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm)); |
1241 | 37 | if (!match_data) { |
1242 | 0 | PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR; |
1243 | 0 | RETURN_FALSE; |
1244 | 0 | } |
1245 | 37 | } |
1246 | | |
1247 | | /* Allocate match sets array and initialize the values. */ |
1248 | 3.72k | if (global && subpats && subpats_order == PREG_PATTERN_ORDER) { |
1249 | 0 | match_sets = safe_emalloc(num_subpats, sizeof(HashTable *), 0); |
1250 | 0 | for (i=0; i<num_subpats; i++) { |
1251 | 0 | match_sets[i] = zend_new_array(0); |
1252 | 0 | } |
1253 | 0 | } |
1254 | | |
1255 | | /* Array of subpattern offsets */ |
1256 | 3.72k | PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data); |
1257 | | |
1258 | 3.72k | orig_start_offset = start_offset2; |
1259 | 3.72k | options = |
1260 | 3.72k | (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset) |
1261 | 3.72k | ? 0 : PCRE2_NO_UTF_CHECK; |
1262 | | |
1263 | | /* Execute the regular expression. */ |
1264 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
1265 | | if ((pce->preg_options & PREG_JIT) && options) { |
1266 | | count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2, |
1267 | | PCRE2_NO_UTF_CHECK, match_data, mctx); |
1268 | | } else |
1269 | | #endif |
1270 | 3.72k | count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2, |
1271 | 3.72k | options, match_data, mctx); |
1272 | | |
1273 | 3.72k | while (1) { |
1274 | | /* If something has matched */ |
1275 | 3.72k | if (count >= 0) { |
1276 | | /* Check for too many substrings condition. */ |
1277 | 238 | if (UNEXPECTED(count == 0)) { |
1278 | 0 | php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings"); |
1279 | 0 | count = num_subpats; |
1280 | 0 | } |
1281 | | |
1282 | 238 | matched: |
1283 | 238 | matched++; |
1284 | | |
1285 | | /* If subpatterns array has been passed, fill it in with values. */ |
1286 | 238 | if (subpats != NULL) { |
1287 | | /* Try to get the list of substrings and display a warning if failed. */ |
1288 | 0 | if (UNEXPECTED(offsets[1] < offsets[0])) { |
1289 | 0 | if (match_sets) efree(match_sets); |
1290 | 0 | php_error_docref(NULL, E_WARNING, "Get subpatterns list failed"); |
1291 | 0 | RETURN_FALSE; |
1292 | 0 | } |
1293 | | |
1294 | 0 | if (global) { /* global pattern matching */ |
1295 | 0 | if (subpats_order == PREG_PATTERN_ORDER) { |
1296 | | /* For each subpattern, insert it into the appropriate array. */ |
1297 | 0 | if (offset_capture) { |
1298 | 0 | for (i = 0; i < count; i++) { |
1299 | 0 | add_offset_pair( |
1300 | 0 | match_sets[i], subject, offsets[2*i], offsets[2*i+1], |
1301 | 0 | NULL, unmatched_as_null); |
1302 | 0 | } |
1303 | 0 | } else { |
1304 | 0 | for (i = 0; i < count; i++) { |
1305 | 0 | zval val; |
1306 | 0 | populate_match_value( |
1307 | 0 | &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null); |
1308 | 0 | zend_hash_next_index_insert_new(match_sets[i], &val); |
1309 | 0 | } |
1310 | 0 | } |
1311 | 0 | mark = pcre2_get_mark(match_data); |
1312 | | /* Add MARK, if available */ |
1313 | 0 | if (mark) { |
1314 | 0 | if (!marks) { |
1315 | 0 | marks = zend_new_array(0); |
1316 | 0 | } |
1317 | 0 | zval tmp; |
1318 | 0 | ZVAL_STRING(&tmp, (char *) mark); |
1319 | 0 | zend_hash_index_add_new(marks, matched - 1, &tmp); |
1320 | 0 | } |
1321 | | /* |
1322 | | * If the number of captured subpatterns on this run is |
1323 | | * less than the total possible number, pad the result |
1324 | | * arrays with NULLs or empty strings. |
1325 | | */ |
1326 | 0 | if (count < num_subpats) { |
1327 | 0 | for (int i = count; i < num_subpats; i++) { |
1328 | 0 | if (offset_capture) { |
1329 | 0 | add_offset_pair( |
1330 | 0 | match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET, |
1331 | 0 | NULL, unmatched_as_null); |
1332 | 0 | } else if (unmatched_as_null) { |
1333 | 0 | zval tmp; |
1334 | 0 | ZVAL_NULL(&tmp); |
1335 | 0 | zend_hash_next_index_insert_new(match_sets[i], &tmp); |
1336 | 0 | } else { |
1337 | 0 | zval tmp; |
1338 | 0 | ZVAL_EMPTY_STRING(&tmp); |
1339 | 0 | zend_hash_next_index_insert_new(match_sets[i], &tmp); |
1340 | 0 | } |
1341 | 0 | } |
1342 | 0 | } |
1343 | 0 | } else { |
1344 | | /* Allocate and populate the result set array */ |
1345 | 0 | mark = pcre2_get_mark(match_data); |
1346 | 0 | array_init_size(&result_set, count + (mark ? 1 : 0)); |
1347 | 0 | populate_subpat_array( |
1348 | 0 | Z_ARRVAL(result_set), subject, offsets, subpat_names, |
1349 | 0 | num_subpats, count, mark, flags); |
1350 | | /* And add it to the output array */ |
1351 | 0 | zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &result_set); |
1352 | 0 | } |
1353 | 0 | } else { /* single pattern matching */ |
1354 | | /* For each subpattern, insert it into the subpatterns array. */ |
1355 | 0 | mark = pcre2_get_mark(match_data); |
1356 | 0 | populate_subpat_array( |
1357 | 0 | Z_ARRVAL_P(subpats), subject, offsets, subpat_names, num_subpats, count, mark, flags); |
1358 | 0 | break; |
1359 | 0 | } |
1360 | 0 | } |
1361 | | |
1362 | | /* Advance to the next piece. */ |
1363 | 238 | start_offset2 = offsets[1]; |
1364 | | |
1365 | | /* If we have matched an empty string, mimic what Perl's /g options does. |
1366 | | This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try |
1367 | | the match again at the same point. If this fails (picked up above) we |
1368 | | advance to the next character. */ |
1369 | 238 | if (start_offset2 == offsets[0]) { |
1370 | 106 | count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2, |
1371 | 106 | PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx); |
1372 | 106 | if (count >= 0) { |
1373 | 17 | if (global) { |
1374 | 0 | goto matched; |
1375 | 17 | } else { |
1376 | 17 | break; |
1377 | 17 | } |
1378 | 89 | } else if (count == PCRE2_ERROR_NOMATCH) { |
1379 | | /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match, |
1380 | | this is not necessarily the end. We need to advance |
1381 | | the start offset, and continue. Fudge the offset values |
1382 | | to achieve this, unless we're already at the end of the string. */ |
1383 | 89 | if (start_offset2 < subject_len) { |
1384 | 81 | size_t unit_len = calculate_unit_length(pce, subject + start_offset2); |
1385 | | |
1386 | 81 | start_offset2 += unit_len; |
1387 | 81 | } else { |
1388 | 8 | break; |
1389 | 8 | } |
1390 | 89 | } else { |
1391 | 0 | goto error; |
1392 | 0 | } |
1393 | 106 | } |
1394 | 3.48k | } else if (count == PCRE2_ERROR_NOMATCH) { |
1395 | 3.31k | break; |
1396 | 3.31k | } else { |
1397 | 170 | error: |
1398 | 170 | pcre_handle_exec_error(count); |
1399 | 170 | break; |
1400 | 170 | } |
1401 | | |
1402 | 213 | if (!global) { |
1403 | 213 | break; |
1404 | 213 | } |
1405 | | |
1406 | | /* Execute the regular expression. */ |
1407 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
1408 | | if ((pce->preg_options & PREG_JIT)) { |
1409 | | if (start_offset2 > subject_len) { |
1410 | | pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET); |
1411 | | break; |
1412 | | } |
1413 | | count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2, |
1414 | | PCRE2_NO_UTF_CHECK, match_data, mctx); |
1415 | | } else |
1416 | | #endif |
1417 | 0 | count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2, |
1418 | 0 | PCRE2_NO_UTF_CHECK, match_data, mctx); |
1419 | 0 | } |
1420 | 3.72k | if (match_data != mdata) { |
1421 | 37 | pcre2_match_data_free(match_data); |
1422 | 37 | } |
1423 | | |
1424 | | /* Add the match sets to the output array and clean up */ |
1425 | 3.72k | if (match_sets) { |
1426 | 0 | if (subpat_names) { |
1427 | 0 | for (i = 0; i < num_subpats; i++) { |
1428 | 0 | zval wrapper; |
1429 | 0 | ZVAL_ARR(&wrapper, match_sets[i]); |
1430 | 0 | if (subpat_names[i]) { |
1431 | 0 | zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper); |
1432 | 0 | GC_ADDREF(match_sets[i]); |
1433 | 0 | } |
1434 | 0 | zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper); |
1435 | 0 | } |
1436 | 0 | } else { |
1437 | 0 | for (i = 0; i < num_subpats; i++) { |
1438 | 0 | zval wrapper; |
1439 | 0 | ZVAL_ARR(&wrapper, match_sets[i]); |
1440 | 0 | zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper); |
1441 | 0 | } |
1442 | 0 | } |
1443 | 0 | efree(match_sets); |
1444 | |
|
1445 | 0 | if (marks) { |
1446 | 0 | zval tmp; |
1447 | 0 | ZVAL_ARR(&tmp, marks); |
1448 | 0 | zend_hash_str_update(Z_ARRVAL_P(subpats), "MARK", sizeof("MARK") - 1, &tmp); |
1449 | 0 | } |
1450 | 0 | } |
1451 | | |
1452 | 3.72k | if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) { |
1453 | | /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */ |
1454 | 3.55k | if ((pce->compile_options & PCRE2_UTF) |
1455 | 3.55k | && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) { |
1456 | 176 | GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8); |
1457 | 176 | } |
1458 | | |
1459 | 3.55k | RETVAL_LONG(matched); |
1460 | 3.55k | } else { |
1461 | 170 | RETVAL_FALSE; |
1462 | 170 | } |
1463 | 3.72k | } |
1464 | | /* }}} */ |
1465 | | |
1466 | | /* {{{ Perform a Perl-style regular expression match */ |
1467 | | PHP_FUNCTION(preg_match) |
1468 | 5.08k | { |
1469 | 5.08k | php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, false); |
1470 | 5.08k | } |
1471 | | /* }}} */ |
1472 | | |
1473 | | ZEND_FRAMELESS_FUNCTION(preg_match, 2) |
1474 | 0 | { |
1475 | 0 | zval regex_tmp, subject_tmp; |
1476 | 0 | zend_string *regex, *subject; |
1477 | |
|
1478 | 0 | Z_FLF_PARAM_STR(1, regex, regex_tmp); |
1479 | 0 | Z_FLF_PARAM_STR(2, subject, subject_tmp); |
1480 | | |
1481 | | /* Compile regex or get it from cache. */ |
1482 | 0 | pcre_cache_entry *pce; |
1483 | 0 | if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) { |
1484 | 0 | RETURN_FALSE; |
1485 | 0 | } |
1486 | | |
1487 | 0 | pce->refcount++; |
1488 | 0 | php_pcre_match_impl(pce, subject, return_value, /* subpats */ NULL, |
1489 | | /* global */ false, /* flags */ 0, /* start_offset */ 0); |
1490 | 0 | pce->refcount--; |
1491 | |
|
1492 | 0 | flf_clean: |
1493 | 0 | Z_FLF_PARAM_FREE_STR(1, regex_tmp); |
1494 | 0 | Z_FLF_PARAM_FREE_STR(2, subject_tmp); |
1495 | 0 | } |
1496 | | |
1497 | | /* {{{ Perform a Perl-style global regular expression match */ |
1498 | | PHP_FUNCTION(preg_match_all) |
1499 | 0 | { |
1500 | 0 | php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, true); |
1501 | 0 | } |
1502 | | /* }}} */ |
1503 | | |
1504 | | /* {{{ preg_get_backref */ |
1505 | | static int preg_get_backref(char **str, int *backref) |
1506 | 0 | { |
1507 | 0 | char in_brace = 0; |
1508 | 0 | char *walk = *str; |
1509 | |
|
1510 | 0 | if (walk[1] == 0) |
1511 | 0 | return 0; |
1512 | | |
1513 | 0 | if (*walk == '$' && walk[1] == '{') { |
1514 | 0 | in_brace = 1; |
1515 | 0 | walk++; |
1516 | 0 | } |
1517 | 0 | walk++; |
1518 | |
|
1519 | 0 | if (*walk >= '0' && *walk <= '9') { |
1520 | 0 | *backref = *walk - '0'; |
1521 | 0 | walk++; |
1522 | 0 | } else |
1523 | 0 | return 0; |
1524 | | |
1525 | 0 | if (*walk && *walk >= '0' && *walk <= '9') { |
1526 | 0 | *backref = *backref * 10 + *walk - '0'; |
1527 | 0 | walk++; |
1528 | 0 | } |
1529 | |
|
1530 | 0 | if (in_brace) { |
1531 | 0 | if (*walk != '}') |
1532 | 0 | return 0; |
1533 | 0 | else |
1534 | 0 | walk++; |
1535 | 0 | } |
1536 | | |
1537 | 0 | *str = walk; |
1538 | 0 | return 1; |
1539 | 0 | } |
1540 | | /* }}} */ |
1541 | | |
1542 | | /* Return NULL if an exception has occurred */ |
1543 | | static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) |
1544 | 184 | { |
1545 | 184 | zend_string *result_str = NULL; |
1546 | 184 | zval retval; /* Function return value */ |
1547 | 184 | zval arg; /* Argument to pass to function */ |
1548 | | |
1549 | 184 | array_init_size(&arg, count + (mark ? 1 : 0)); |
1550 | 184 | populate_subpat_array(Z_ARRVAL(arg), subject, offsets, subpat_names, num_subpats, count, mark, flags); |
1551 | | |
1552 | 184 | fci->retval = &retval; |
1553 | 184 | fci->param_count = 1; |
1554 | 184 | fci->params = &arg; |
1555 | 184 | zend_call_function(fci, fcc); |
1556 | 184 | zval_ptr_dtor(&arg); |
1557 | 184 | if (EXPECTED(Z_TYPE(retval) == IS_STRING)) { |
1558 | 34 | return Z_STR(retval); |
1559 | 34 | } |
1560 | | /* No Exception has occurred */ |
1561 | 150 | else if (EXPECTED(Z_TYPE(retval) != IS_UNDEF)) { |
1562 | 135 | result_str = zval_try_get_string_func(&retval); |
1563 | 135 | } |
1564 | 150 | zval_ptr_dtor(&retval); |
1565 | | |
1566 | 150 | return result_str; |
1567 | 184 | } |
1568 | | |
1569 | | /* {{{ php_pcre_replace */ |
1570 | | PHPAPI zend_string *php_pcre_replace(zend_string *regex, |
1571 | | zend_string *subject_str, |
1572 | | const char *subject, size_t subject_len, |
1573 | | zend_string *replace_str, |
1574 | | size_t limit, size_t *replace_count) |
1575 | 153 | { |
1576 | 153 | pcre_cache_entry *pce; /* Compiled regular expression */ |
1577 | 153 | zend_string *result; /* Function result */ |
1578 | | |
1579 | | /* Abort on pending exception, e.g. thrown from __toString(). */ |
1580 | 153 | if (UNEXPECTED(EG(exception))) { |
1581 | 0 | return NULL; |
1582 | 0 | } |
1583 | | |
1584 | | /* Compile regex or get it from cache. */ |
1585 | 153 | if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) { |
1586 | 25 | return NULL; |
1587 | 25 | } |
1588 | 128 | pce->refcount++; |
1589 | 128 | result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str, |
1590 | 128 | limit, replace_count); |
1591 | 128 | pce->refcount--; |
1592 | | |
1593 | 128 | return result; |
1594 | 153 | } |
1595 | | /* }}} */ |
1596 | | |
1597 | | /* {{{ php_pcre_replace_impl() */ |
1598 | | PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count) |
1599 | 128 | { |
1600 | 128 | uint32_t options; /* Execution options */ |
1601 | 128 | int count; /* Count of matched subpatterns */ |
1602 | 128 | uint32_t num_subpats; /* Number of captured subpatterns */ |
1603 | 128 | size_t new_len; /* Length of needed storage */ |
1604 | 128 | size_t alloc_len; /* Actual allocated length */ |
1605 | 128 | size_t match_len; /* Length of the current match */ |
1606 | 128 | int backref; /* Backreference number */ |
1607 | 128 | PCRE2_SIZE start_offset; /* Where the new search starts */ |
1608 | 128 | size_t last_end_offset; /* Where the last search ended */ |
1609 | 128 | char *walkbuf, /* Location of current replacement in the result */ |
1610 | 128 | *walk, /* Used to walk the replacement string */ |
1611 | 128 | walk_last; /* Last walked character */ |
1612 | 128 | const char *match, /* The current match */ |
1613 | 128 | *piece, /* The current piece of subject */ |
1614 | 128 | *replace_end; /* End of replacement string */ |
1615 | 128 | size_t result_len; /* Length of result */ |
1616 | 128 | zend_string *result; /* Result of replacement */ |
1617 | 128 | pcre2_match_data *match_data; |
1618 | | |
1619 | | /* Calculate the size of the offsets array, and allocate memory for it. */ |
1620 | 128 | num_subpats = pce->capture_count + 1; |
1621 | 128 | alloc_len = 0; |
1622 | 128 | result = NULL; |
1623 | | |
1624 | | /* Initialize */ |
1625 | 128 | match = NULL; |
1626 | 128 | start_offset = 0; |
1627 | 128 | last_end_offset = 0; |
1628 | 128 | result_len = 0; |
1629 | 128 | PCRE_G(error_code) = PHP_PCRE_NO_ERROR; |
1630 | | |
1631 | 128 | if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) { |
1632 | 13 | match_data = mdata; |
1633 | 115 | } else { |
1634 | 115 | match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm)); |
1635 | 115 | if (!match_data) { |
1636 | 0 | PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR; |
1637 | 0 | return NULL; |
1638 | 0 | } |
1639 | 115 | } |
1640 | | |
1641 | 128 | options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK; |
1642 | | |
1643 | | /* Array of subpattern offsets */ |
1644 | 128 | PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data); |
1645 | | |
1646 | | /* Execute the regular expression. */ |
1647 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
1648 | | if ((pce->preg_options & PREG_JIT) && options) { |
1649 | | count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset, |
1650 | | PCRE2_NO_UTF_CHECK, match_data, mctx); |
1651 | | } else |
1652 | | #endif |
1653 | 128 | count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset, |
1654 | 128 | options, match_data, mctx); |
1655 | | |
1656 | 182 | while (1) { |
1657 | 182 | piece = subject + last_end_offset; |
1658 | | |
1659 | 182 | if (count >= 0 && limit > 0) { |
1660 | 54 | bool simple_string; |
1661 | | |
1662 | | /* Check for too many substrings condition. */ |
1663 | 54 | if (UNEXPECTED(count == 0)) { |
1664 | 0 | php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings"); |
1665 | 0 | count = num_subpats; |
1666 | 0 | } |
1667 | | |
1668 | 54 | matched: |
1669 | 54 | if (UNEXPECTED(offsets[1] < offsets[0])) { |
1670 | 0 | PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR; |
1671 | 0 | if (result) { |
1672 | 0 | zend_string_release_ex(result, 0); |
1673 | 0 | result = NULL; |
1674 | 0 | } |
1675 | 0 | break; |
1676 | 0 | } |
1677 | | |
1678 | 54 | if (replace_count) { |
1679 | 54 | ++*replace_count; |
1680 | 54 | } |
1681 | | |
1682 | | /* Set the match location in subject */ |
1683 | 54 | match = subject + offsets[0]; |
1684 | | |
1685 | 54 | new_len = result_len + offsets[0] - last_end_offset; /* part before the match */ |
1686 | | |
1687 | 54 | walk = ZSTR_VAL(replace_str); |
1688 | 54 | replace_end = walk + ZSTR_LEN(replace_str); |
1689 | 54 | walk_last = 0; |
1690 | 54 | simple_string = 1; |
1691 | 108 | while (walk < replace_end) { |
1692 | 54 | if ('\\' == *walk || '$' == *walk) { |
1693 | 0 | simple_string = 0; |
1694 | 0 | if (walk_last == '\\') { |
1695 | 0 | walk++; |
1696 | 0 | walk_last = 0; |
1697 | 0 | continue; |
1698 | 0 | } |
1699 | 0 | if (preg_get_backref(&walk, &backref)) { |
1700 | 0 | if (backref < count) |
1701 | 0 | new_len += offsets[(backref<<1)+1] - offsets[backref<<1]; |
1702 | 0 | continue; |
1703 | 0 | } |
1704 | 0 | } |
1705 | 54 | new_len++; |
1706 | 54 | walk++; |
1707 | 54 | walk_last = walk[-1]; |
1708 | 54 | } |
1709 | | |
1710 | 54 | if (new_len >= alloc_len) { |
1711 | 54 | alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD; |
1712 | 54 | if (result == NULL) { |
1713 | 52 | result = zend_string_alloc(alloc_len, 0); |
1714 | 52 | } else { |
1715 | 2 | result = zend_string_extend(result, alloc_len, 0); |
1716 | 2 | } |
1717 | 54 | } |
1718 | | |
1719 | 54 | if (match-piece > 0) { |
1720 | | /* copy the part of the string before the match */ |
1721 | 54 | memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece); |
1722 | 54 | result_len += (match-piece); |
1723 | 54 | } |
1724 | | |
1725 | 54 | if (simple_string) { |
1726 | | /* copy replacement */ |
1727 | 54 | memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1); |
1728 | 54 | result_len += ZSTR_LEN(replace_str); |
1729 | 54 | } else { |
1730 | | /* copy replacement and backrefs */ |
1731 | 0 | walkbuf = ZSTR_VAL(result) + result_len; |
1732 | |
|
1733 | 0 | walk = ZSTR_VAL(replace_str); |
1734 | 0 | walk_last = 0; |
1735 | 0 | while (walk < replace_end) { |
1736 | 0 | if ('\\' == *walk || '$' == *walk) { |
1737 | 0 | if (walk_last == '\\') { |
1738 | 0 | *(walkbuf-1) = *walk++; |
1739 | 0 | walk_last = 0; |
1740 | 0 | continue; |
1741 | 0 | } |
1742 | 0 | if (preg_get_backref(&walk, &backref)) { |
1743 | 0 | if (backref < count) { |
1744 | 0 | if (offsets[backref<<1] < SIZE_MAX) { |
1745 | 0 | match_len = offsets[(backref<<1)+1] - offsets[backref<<1]; |
1746 | 0 | walkbuf = zend_mempcpy(walkbuf, subject + offsets[backref << 1], match_len); |
1747 | 0 | } |
1748 | 0 | } |
1749 | 0 | continue; |
1750 | 0 | } |
1751 | 0 | } |
1752 | 0 | *walkbuf++ = *walk++; |
1753 | 0 | walk_last = walk[-1]; |
1754 | 0 | } |
1755 | 0 | *walkbuf = '\0'; |
1756 | | /* increment the result length by how much we've added to the string */ |
1757 | 0 | result_len += (walkbuf - (ZSTR_VAL(result) + result_len)); |
1758 | 0 | } |
1759 | | |
1760 | 54 | limit--; |
1761 | | |
1762 | | /* Advance to the next piece. */ |
1763 | 54 | start_offset = last_end_offset = offsets[1]; |
1764 | | |
1765 | | /* If we have matched an empty string, mimic what Perl's /g options does. |
1766 | | This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try |
1767 | | the match again at the same point. If this fails (picked up above) we |
1768 | | advance to the next character. */ |
1769 | 54 | if (start_offset == offsets[0]) { |
1770 | 0 | count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset, |
1771 | 0 | PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx); |
1772 | |
|
1773 | 0 | piece = subject + start_offset; |
1774 | 0 | if (count >= 0 && limit > 0) { |
1775 | 0 | goto matched; |
1776 | 0 | } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) { |
1777 | | /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match, |
1778 | | this is not necessarily the end. We need to advance |
1779 | | the start offset, and continue. Fudge the offset values |
1780 | | to achieve this, unless we're already at the end of the string. */ |
1781 | 0 | if (start_offset < subject_len) { |
1782 | 0 | size_t unit_len = calculate_unit_length(pce, piece); |
1783 | 0 | start_offset += unit_len; |
1784 | 0 | } else { |
1785 | 0 | goto not_matched; |
1786 | 0 | } |
1787 | 0 | } else { |
1788 | 0 | goto error; |
1789 | 0 | } |
1790 | 0 | } |
1791 | | |
1792 | 128 | } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) { |
1793 | 128 | not_matched: |
1794 | 128 | if (!result && subject_str) { |
1795 | 76 | result = zend_string_copy(subject_str); |
1796 | 76 | break; |
1797 | 76 | } |
1798 | | /* now we know exactly how long it is */ |
1799 | 52 | alloc_len = result_len + subject_len - last_end_offset; |
1800 | 52 | if (NULL != result) { |
1801 | 52 | result = zend_string_realloc(result, alloc_len, 0); |
1802 | 52 | } else { |
1803 | 0 | result = zend_string_alloc(alloc_len, 0); |
1804 | 0 | } |
1805 | | /* stick that last bit of string on our output */ |
1806 | 52 | memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset); |
1807 | 52 | result_len += subject_len - last_end_offset; |
1808 | 52 | ZSTR_VAL(result)[result_len] = '\0'; |
1809 | 52 | ZSTR_LEN(result) = result_len; |
1810 | 52 | break; |
1811 | 128 | } else { |
1812 | 0 | error: |
1813 | 0 | pcre_handle_exec_error(count); |
1814 | 0 | if (result) { |
1815 | 0 | zend_string_release_ex(result, 0); |
1816 | 0 | result = NULL; |
1817 | 0 | } |
1818 | 0 | break; |
1819 | 0 | } |
1820 | | |
1821 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
1822 | | if (pce->preg_options & PREG_JIT) { |
1823 | | count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset, |
1824 | | PCRE2_NO_UTF_CHECK, match_data, mctx); |
1825 | | } else |
1826 | | #endif |
1827 | 54 | count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset, |
1828 | 54 | PCRE2_NO_UTF_CHECK, match_data, mctx); |
1829 | 54 | } |
1830 | 128 | if (match_data != mdata) { |
1831 | 115 | pcre2_match_data_free(match_data); |
1832 | 115 | } |
1833 | | |
1834 | 128 | return result; |
1835 | 128 | } |
1836 | | /* }}} */ |
1837 | | |
1838 | | static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, |
1839 | | zend_fcall_info *fci, zend_fcall_info_cache *fcc, |
1840 | | size_t limit, size_t *replace_count, zend_long flags |
1841 | 142 | ) { |
1842 | 142 | uint32_t options; /* Execution options */ |
1843 | 142 | int count; /* Count of matched subpatterns */ |
1844 | 142 | zend_string **subpat_names; /* Array for named subpatterns */ |
1845 | 142 | uint32_t num_subpats; /* Number of captured subpatterns */ |
1846 | 142 | size_t alloc_len; /* Actual allocated length */ |
1847 | 142 | PCRE2_SIZE start_offset; /* Where the new search starts */ |
1848 | 142 | size_t last_end_offset; /* Where the last search ended */ |
1849 | 142 | const char *match, /* The current match */ |
1850 | 142 | *piece; /* The current piece of subject */ |
1851 | 142 | size_t result_len; /* Length of result */ |
1852 | 142 | zend_string *result; /* Result of replacement */ |
1853 | 142 | pcre2_match_data *match_data; |
1854 | 142 | bool old_mdata_used; |
1855 | | |
1856 | | /* Calculate the size of the offsets array, and allocate memory for it. */ |
1857 | 142 | num_subpats = pce->capture_count + 1; |
1858 | 142 | if (pce->name_count > 0) { |
1859 | 0 | subpat_names = ensure_subpats_table(pce->name_count, pce); |
1860 | 0 | if (UNEXPECTED(!subpat_names)) { |
1861 | 0 | return NULL; |
1862 | 0 | } |
1863 | 142 | } else { |
1864 | 142 | subpat_names = NULL; |
1865 | 142 | } |
1866 | | |
1867 | 142 | alloc_len = 0; |
1868 | 142 | result = NULL; |
1869 | | |
1870 | | /* Initialize */ |
1871 | 142 | match = NULL; |
1872 | 142 | start_offset = 0; |
1873 | 142 | last_end_offset = 0; |
1874 | 142 | result_len = 0; |
1875 | 142 | PCRE_G(error_code) = PHP_PCRE_NO_ERROR; |
1876 | | |
1877 | 142 | old_mdata_used = mdata_used; |
1878 | 142 | if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) { |
1879 | 142 | mdata_used = 1; |
1880 | 142 | match_data = mdata; |
1881 | 142 | } else { |
1882 | 0 | match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm)); |
1883 | 0 | if (!match_data) { |
1884 | 0 | PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR; |
1885 | 0 | mdata_used = old_mdata_used; |
1886 | 0 | return NULL; |
1887 | 0 | } |
1888 | 0 | } |
1889 | | |
1890 | 142 | options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK; |
1891 | | |
1892 | | /* Array of subpattern offsets */ |
1893 | 142 | PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data); |
1894 | | |
1895 | | /* Execute the regular expression. */ |
1896 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
1897 | | if ((pce->preg_options & PREG_JIT) && options) { |
1898 | | count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, |
1899 | | PCRE2_NO_UTF_CHECK, match_data, mctx); |
1900 | | } else |
1901 | | #endif |
1902 | 142 | count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, |
1903 | 142 | options, match_data, mctx); |
1904 | | |
1905 | 311 | while (1) { |
1906 | 311 | piece = ZSTR_VAL(subject_str) + last_end_offset; |
1907 | | |
1908 | 311 | if (count >= 0 && limit) { |
1909 | | /* Check for too many substrings condition. */ |
1910 | 184 | if (UNEXPECTED(count == 0)) { |
1911 | 0 | php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings"); |
1912 | 0 | count = num_subpats; |
1913 | 0 | } |
1914 | | |
1915 | 184 | matched: |
1916 | 184 | if (UNEXPECTED(offsets[1] < offsets[0])) { |
1917 | 0 | PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR; |
1918 | 0 | if (result) { |
1919 | 0 | zend_string_release_ex(result, 0); |
1920 | 0 | result = NULL; |
1921 | 0 | } |
1922 | 0 | break; |
1923 | 0 | } |
1924 | | |
1925 | 184 | if (replace_count) { |
1926 | 184 | ++*replace_count; |
1927 | 184 | } |
1928 | | |
1929 | | /* Set the match location in subject */ |
1930 | 184 | match = ZSTR_VAL(subject_str) + offsets[0]; |
1931 | | |
1932 | | /* Length of needed storage */ |
1933 | 184 | size_t new_len = result_len + offsets[0] - last_end_offset; /* part before the match */ |
1934 | | |
1935 | | /* Use custom function to get replacement string and its length. */ |
1936 | 184 | zend_string *eval_result = preg_do_repl_func( |
1937 | 184 | fci, fcc, ZSTR_VAL(subject_str), offsets, subpat_names, num_subpats, count, |
1938 | 184 | pcre2_get_mark(match_data), flags); |
1939 | | |
1940 | 184 | if (UNEXPECTED(eval_result == NULL)) { |
1941 | 15 | goto error; |
1942 | 15 | } |
1943 | 169 | new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD; |
1944 | 169 | if (new_len >= alloc_len) { |
1945 | 153 | alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD; |
1946 | 153 | if (result == NULL) { |
1947 | 95 | result = zend_string_alloc(alloc_len, 0); |
1948 | 95 | } else { |
1949 | 58 | result = zend_string_extend(result, alloc_len, 0); |
1950 | 58 | } |
1951 | 153 | } |
1952 | | |
1953 | 169 | if (match-piece > 0) { |
1954 | | /* copy the part of the string before the match */ |
1955 | 169 | memcpy(ZSTR_VAL(result) + result_len, piece, match-piece); |
1956 | 169 | result_len += (match-piece); |
1957 | 169 | } |
1958 | | |
1959 | | /* If using custom function, copy result to the buffer and clean up. */ |
1960 | 169 | memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result)); |
1961 | 169 | result_len += ZSTR_LEN(eval_result); |
1962 | 169 | zend_string_release_ex(eval_result, 0); |
1963 | | |
1964 | 169 | limit--; |
1965 | | |
1966 | | /* Advance to the next piece. */ |
1967 | 169 | start_offset = last_end_offset = offsets[1]; |
1968 | | |
1969 | | /* If we have matched an empty string, mimic what Perl's /g options does. |
1970 | | This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try |
1971 | | the match again at the same point. If this fails (picked up above) we |
1972 | | advance to the next character. */ |
1973 | 169 | if (start_offset == offsets[0]) { |
1974 | 0 | count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, |
1975 | 0 | PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx); |
1976 | |
|
1977 | 0 | piece = ZSTR_VAL(subject_str) + start_offset; |
1978 | 0 | if (count >= 0 && limit) { |
1979 | 0 | goto matched; |
1980 | 0 | } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) { |
1981 | | /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match, |
1982 | | this is not necessarily the end. We need to advance |
1983 | | the start offset, and continue. Fudge the offset values |
1984 | | to achieve this, unless we're already at the end of the string. */ |
1985 | 0 | if (start_offset < ZSTR_LEN(subject_str)) { |
1986 | 0 | size_t unit_len = calculate_unit_length(pce, piece); |
1987 | 0 | start_offset += unit_len; |
1988 | 0 | } else { |
1989 | 0 | goto not_matched; |
1990 | 0 | } |
1991 | 0 | } else { |
1992 | 0 | goto error; |
1993 | 0 | } |
1994 | 0 | } |
1995 | | |
1996 | 169 | } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) { |
1997 | 127 | not_matched: |
1998 | 127 | if (result == NULL) { |
1999 | 32 | result = zend_string_copy(subject_str); |
2000 | 32 | break; |
2001 | 32 | } |
2002 | | /* now we know exactly how long it is */ |
2003 | 95 | size_t segment_len = ZSTR_LEN(subject_str) - last_end_offset; |
2004 | 95 | alloc_len = result_len + segment_len; |
2005 | 95 | result = zend_string_realloc(result, alloc_len, 0); |
2006 | | /* stick that last bit of string on our output */ |
2007 | 95 | memcpy(ZSTR_VAL(result) + result_len, piece, segment_len); |
2008 | 95 | result_len += segment_len; |
2009 | 95 | ZSTR_VAL(result)[result_len] = '\0'; |
2010 | 95 | ZSTR_LEN(result) = result_len; |
2011 | 95 | break; |
2012 | 127 | } else { |
2013 | 15 | error: |
2014 | 15 | pcre_handle_exec_error(count); |
2015 | 15 | if (result) { |
2016 | 0 | zend_string_release_ex(result, 0); |
2017 | 0 | result = NULL; |
2018 | 0 | } |
2019 | 15 | break; |
2020 | 0 | } |
2021 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
2022 | | if ((pce->preg_options & PREG_JIT)) { |
2023 | | count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, |
2024 | | PCRE2_NO_UTF_CHECK, match_data, mctx); |
2025 | | } else |
2026 | | #endif |
2027 | 169 | count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, |
2028 | 169 | PCRE2_NO_UTF_CHECK, match_data, mctx); |
2029 | 169 | } |
2030 | 142 | if (match_data != mdata) { |
2031 | 0 | pcre2_match_data_free(match_data); |
2032 | 0 | } |
2033 | 142 | mdata_used = old_mdata_used; |
2034 | | |
2035 | 142 | return result; |
2036 | 142 | } |
2037 | | |
2038 | | static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex, |
2039 | | zend_string *subject_str, |
2040 | | zend_fcall_info *fci, zend_fcall_info_cache *fcc, |
2041 | | size_t limit, size_t *replace_count, zend_long flags) |
2042 | 154 | { |
2043 | 154 | pcre_cache_entry *pce; /* Compiled regular expression */ |
2044 | 154 | zend_string *result; /* Function result */ |
2045 | | |
2046 | | /* Compile regex or get it from cache. */ |
2047 | 154 | if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) { |
2048 | 12 | return NULL; |
2049 | 12 | } |
2050 | 142 | pce->refcount++; |
2051 | 142 | result = php_pcre_replace_func_impl(pce, subject_str, fci, fcc, limit, replace_count, flags); |
2052 | 142 | pce->refcount--; |
2053 | | |
2054 | 142 | return result; |
2055 | 154 | } |
2056 | | |
2057 | | /* {{{ php_pcre_replace_array */ |
2058 | | static zend_string *php_pcre_replace_array(HashTable *regex, |
2059 | | zend_string *replace_str, HashTable *replace_ht, |
2060 | | zend_string *subject_str, size_t limit, size_t *replace_count) |
2061 | 0 | { |
2062 | 0 | zval *regex_entry; |
2063 | 0 | zend_string *result; |
2064 | |
|
2065 | 0 | zend_string_addref(subject_str); |
2066 | |
|
2067 | 0 | if (replace_ht) { |
2068 | 0 | uint32_t replace_idx = 0; |
2069 | | |
2070 | | /* For each entry in the regex array, get the entry */ |
2071 | 0 | ZEND_HASH_FOREACH_VAL(regex, regex_entry) { |
2072 | | /* Make sure we're dealing with strings. */ |
2073 | 0 | zend_string *tmp_regex_str; |
2074 | 0 | zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str); |
2075 | 0 | zend_string *replace_entry_str, *tmp_replace_entry_str; |
2076 | 0 | zval *zv; |
2077 | | |
2078 | | /* Get current entry */ |
2079 | 0 | while (1) { |
2080 | 0 | if (replace_idx == replace_ht->nNumUsed) { |
2081 | 0 | replace_entry_str = ZSTR_EMPTY_ALLOC(); |
2082 | 0 | tmp_replace_entry_str = NULL; |
2083 | 0 | break; |
2084 | 0 | } |
2085 | 0 | zv = ZEND_HASH_ELEMENT(replace_ht, replace_idx); |
2086 | 0 | replace_idx++; |
2087 | 0 | if (Z_TYPE_P(zv) != IS_UNDEF) { |
2088 | 0 | replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str); |
2089 | 0 | break; |
2090 | 0 | } |
2091 | 0 | } |
2092 | | |
2093 | | /* Do the actual replacement and put the result back into subject_str |
2094 | | for further replacements. */ |
2095 | 0 | result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str), |
2096 | 0 | ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count); |
2097 | 0 | zend_tmp_string_release(tmp_replace_entry_str); |
2098 | 0 | zend_tmp_string_release(tmp_regex_str); |
2099 | 0 | zend_string_release_ex(subject_str, 0); |
2100 | 0 | subject_str = result; |
2101 | 0 | if (UNEXPECTED(result == NULL)) { |
2102 | 0 | break; |
2103 | 0 | } |
2104 | 0 | } ZEND_HASH_FOREACH_END(); |
2105 | |
|
2106 | 0 | } else { |
2107 | 0 | ZEND_ASSERT(replace_str != NULL); |
2108 | | |
2109 | | /* For each entry in the regex array, get the entry */ |
2110 | 0 | ZEND_HASH_FOREACH_VAL(regex, regex_entry) { |
2111 | | /* Make sure we're dealing with strings. */ |
2112 | 0 | zend_string *tmp_regex_str; |
2113 | 0 | zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str); |
2114 | | |
2115 | | /* Do the actual replacement and put the result back into subject_str |
2116 | | for further replacements. */ |
2117 | 0 | result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str), |
2118 | 0 | ZSTR_LEN(subject_str), replace_str, limit, replace_count); |
2119 | 0 | zend_tmp_string_release(tmp_regex_str); |
2120 | 0 | zend_string_release_ex(subject_str, 0); |
2121 | 0 | subject_str = result; |
2122 | |
|
2123 | 0 | if (UNEXPECTED(result == NULL)) { |
2124 | 0 | break; |
2125 | 0 | } |
2126 | 0 | } ZEND_HASH_FOREACH_END(); |
2127 | 0 | } |
2128 | | |
2129 | 0 | return subject_str; |
2130 | 0 | } |
2131 | | /* }}} */ |
2132 | | |
2133 | | /* {{{ php_replace_in_subject */ |
2134 | | static zend_always_inline zend_string *php_replace_in_subject( |
2135 | | zend_string *regex_str, HashTable *regex_ht, |
2136 | | zend_string *replace_str, HashTable *replace_ht, |
2137 | | zend_string *subject, size_t limit, size_t *replace_count) |
2138 | 153 | { |
2139 | 153 | zend_string *result; |
2140 | | |
2141 | 153 | if (regex_str) { |
2142 | 153 | ZEND_ASSERT(replace_str != NULL); |
2143 | 153 | result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject), |
2144 | 153 | replace_str, limit, replace_count); |
2145 | 153 | } else { |
2146 | 0 | ZEND_ASSERT(regex_ht != NULL); |
2147 | 0 | result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject, |
2148 | 0 | limit, replace_count); |
2149 | 0 | } |
2150 | 153 | return result; |
2151 | 153 | } |
2152 | | /* }}} */ |
2153 | | |
2154 | | static zend_string *php_replace_in_subject_func(zend_string *regex_str, const HashTable *regex_ht, |
2155 | | zend_fcall_info *fci, zend_fcall_info_cache *fcc, |
2156 | | zend_string *subject, size_t limit, size_t *replace_count, zend_long flags) |
2157 | 154 | { |
2158 | 154 | zend_string *result; |
2159 | | |
2160 | 154 | if (regex_str) { |
2161 | 154 | result = php_pcre_replace_func(regex_str, subject, fci, fcc, limit, replace_count, flags); |
2162 | 154 | return result; |
2163 | 154 | } else { |
2164 | | /* If regex is an array */ |
2165 | 0 | zval *regex_entry; |
2166 | |
|
2167 | 0 | ZEND_ASSERT(regex_ht != NULL); |
2168 | | |
2169 | 0 | zend_string_addref(subject); |
2170 | | |
2171 | | /* For each entry in the regex array, get the entry */ |
2172 | 0 | ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) { |
2173 | | /* Make sure we're dealing with strings. */ |
2174 | 0 | zend_string *tmp_regex_entry_str; |
2175 | 0 | zend_string *regex_entry_str = zval_try_get_tmp_string(regex_entry, &tmp_regex_entry_str); |
2176 | 0 | if (UNEXPECTED(regex_entry_str == NULL)) { |
2177 | 0 | break; |
2178 | 0 | } |
2179 | | |
2180 | | /* Do the actual replacement and put the result back into subject |
2181 | | for further replacements. */ |
2182 | 0 | result = php_pcre_replace_func( |
2183 | 0 | regex_entry_str, subject, fci, fcc, limit, replace_count, flags); |
2184 | 0 | zend_tmp_string_release(tmp_regex_entry_str); |
2185 | 0 | zend_string_release(subject); |
2186 | 0 | subject = result; |
2187 | 0 | if (UNEXPECTED(result == NULL)) { |
2188 | 0 | break; |
2189 | 0 | } |
2190 | 0 | } ZEND_HASH_FOREACH_END(); |
2191 | |
|
2192 | 0 | return subject; |
2193 | 0 | } |
2194 | 154 | } |
2195 | | |
2196 | | static size_t php_preg_replace_func_impl(zval *return_value, |
2197 | | zend_string *regex_str, const HashTable *regex_ht, |
2198 | | zend_fcall_info *fci, zend_fcall_info_cache *fcc, |
2199 | | zend_string *subject_str, const HashTable *subject_ht, zend_long limit_val, zend_long flags) |
2200 | 154 | { |
2201 | 154 | zend_string *result; |
2202 | 154 | size_t replace_count = 0; |
2203 | | |
2204 | 154 | if (subject_str) { |
2205 | 154 | result = php_replace_in_subject_func( |
2206 | 154 | regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags); |
2207 | 154 | if (result != NULL) { |
2208 | 127 | RETVAL_STR(result); |
2209 | 127 | } else { |
2210 | 27 | RETVAL_NULL(); |
2211 | 27 | } |
2212 | 154 | } else { |
2213 | | /* if subject is an array */ |
2214 | 0 | zval *subject_entry, zv; |
2215 | 0 | zend_string *string_key; |
2216 | 0 | zend_ulong num_key; |
2217 | |
|
2218 | 0 | ZEND_ASSERT(subject_ht != NULL); |
2219 | | |
2220 | 0 | array_init_size(return_value, zend_hash_num_elements(subject_ht)); |
2221 | 0 | HashTable *return_value_ht = Z_ARRVAL_P(return_value); |
2222 | | |
2223 | | /* For each subject entry, convert it to string, then perform replacement |
2224 | | and add the result to the return_value array. */ |
2225 | 0 | ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) { |
2226 | 0 | zend_string *tmp_subject_entry_str; |
2227 | 0 | zend_string *subject_entry_str = zval_try_get_tmp_string(subject_entry, &tmp_subject_entry_str); |
2228 | 0 | if (UNEXPECTED(subject_entry_str == NULL)) { |
2229 | 0 | break; |
2230 | 0 | } |
2231 | | |
2232 | 0 | result = php_replace_in_subject_func( |
2233 | 0 | regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags); |
2234 | 0 | if (result != NULL) { |
2235 | | /* Add to return array */ |
2236 | 0 | ZVAL_STR(&zv, result); |
2237 | 0 | if (string_key) { |
2238 | 0 | zend_hash_add_new(return_value_ht, string_key, &zv); |
2239 | 0 | } else { |
2240 | 0 | zend_hash_index_add_new(return_value_ht, num_key, &zv); |
2241 | 0 | } |
2242 | 0 | } |
2243 | 0 | zend_tmp_string_release(tmp_subject_entry_str); |
2244 | 0 | } ZEND_HASH_FOREACH_END(); |
2245 | 0 | } |
2246 | | |
2247 | 154 | return replace_count; |
2248 | 154 | } |
2249 | | |
2250 | | static void _preg_replace_common( |
2251 | | zval *return_value, |
2252 | | HashTable *regex_ht, zend_string *regex_str, |
2253 | | HashTable *replace_ht, zend_string *replace_str, |
2254 | | HashTable *subject_ht, zend_string *subject_str, |
2255 | | zend_long limit, |
2256 | | zval *zcount, |
2257 | | bool is_filter |
2258 | 153 | ) { |
2259 | 153 | size_t replace_count = 0; |
2260 | 153 | zend_string *result; |
2261 | 153 | size_t old_replace_count; |
2262 | | |
2263 | | /* If replace is an array then the regex argument needs to also be an array */ |
2264 | 153 | if (replace_ht && !regex_ht) { |
2265 | 0 | zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given"); |
2266 | 0 | RETURN_THROWS(); |
2267 | 0 | } |
2268 | | |
2269 | 153 | if (subject_str) { |
2270 | 153 | old_replace_count = replace_count; |
2271 | 153 | result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht, |
2272 | 153 | subject_str, limit, &replace_count); |
2273 | 153 | if (result != NULL) { |
2274 | 128 | if (!is_filter || replace_count > old_replace_count) { |
2275 | 128 | RETVAL_STR(result); |
2276 | 128 | } else { |
2277 | 0 | zend_string_release_ex(result, 0); |
2278 | 0 | RETVAL_NULL(); |
2279 | 0 | } |
2280 | 128 | } else { |
2281 | 25 | RETVAL_NULL(); |
2282 | 25 | } |
2283 | 153 | } else { |
2284 | | /* if subject is an array */ |
2285 | 0 | zval *subject_entry, zv; |
2286 | 0 | zend_string *string_key; |
2287 | 0 | zend_ulong num_key; |
2288 | |
|
2289 | 0 | ZEND_ASSERT(subject_ht != NULL); |
2290 | | |
2291 | 0 | array_init_size(return_value, zend_hash_num_elements(subject_ht)); |
2292 | 0 | HashTable *return_value_ht = Z_ARRVAL_P(return_value); |
2293 | | |
2294 | | /* For each subject entry, convert it to string, then perform replacement |
2295 | | and add the result to the return_value array. */ |
2296 | 0 | ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) { |
2297 | 0 | old_replace_count = replace_count; |
2298 | 0 | zend_string *tmp_subject_entry_str; |
2299 | 0 | zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str); |
2300 | 0 | result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht, |
2301 | 0 | subject_entry_str, limit, &replace_count); |
2302 | |
|
2303 | 0 | if (result != NULL) { |
2304 | 0 | if (!is_filter || replace_count > old_replace_count) { |
2305 | | /* Add to return array */ |
2306 | 0 | ZVAL_STR(&zv, result); |
2307 | 0 | if (string_key) { |
2308 | 0 | zend_hash_add_new(return_value_ht, string_key, &zv); |
2309 | 0 | } else { |
2310 | 0 | zend_hash_index_add_new(return_value_ht, num_key, &zv); |
2311 | 0 | } |
2312 | 0 | } else { |
2313 | 0 | zend_string_release_ex(result, 0); |
2314 | 0 | } |
2315 | 0 | } |
2316 | 0 | zend_tmp_string_release(tmp_subject_entry_str); |
2317 | 0 | } ZEND_HASH_FOREACH_END(); |
2318 | 0 | } |
2319 | | |
2320 | 153 | if (zcount) { |
2321 | 0 | ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count); |
2322 | 0 | } |
2323 | 153 | } |
2324 | | |
2325 | | /* {{{ preg_replace_common */ |
2326 | | static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter) |
2327 | 153 | { |
2328 | 153 | zend_string *regex_str, *replace_str, *subject_str; |
2329 | 153 | HashTable *regex_ht, *replace_ht, *subject_ht; |
2330 | 153 | zend_long limit = -1; |
2331 | 153 | zval *zcount = NULL; |
2332 | | |
2333 | | /* Get function parameters and do error-checking. */ |
2334 | 459 | ZEND_PARSE_PARAMETERS_START(3, 5) |
2335 | 765 | Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str) |
2336 | 765 | Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str) |
2337 | 765 | Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str) |
2338 | 765 | Z_PARAM_OPTIONAL |
2339 | 765 | Z_PARAM_LONG(limit) |
2340 | 429 | Z_PARAM_ZVAL(zcount) |
2341 | 429 | ZEND_PARSE_PARAMETERS_END(); |
2342 | | |
2343 | 153 | _preg_replace_common( |
2344 | 153 | return_value, |
2345 | 153 | regex_ht, regex_str, |
2346 | 153 | replace_ht, replace_str, |
2347 | 153 | subject_ht, subject_str, |
2348 | 153 | limit, zcount, is_filter); |
2349 | 153 | } |
2350 | | /* }}} */ |
2351 | | |
2352 | | /* {{{ Perform Perl-style regular expression replacement. */ |
2353 | | PHP_FUNCTION(preg_replace) |
2354 | 153 | { |
2355 | 153 | preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false); |
2356 | 153 | } |
2357 | | /* }}} */ |
2358 | | |
2359 | | ZEND_FRAMELESS_FUNCTION(preg_replace, 3) |
2360 | 0 | { |
2361 | 0 | zend_string *regex_str, *replace_str, *subject_str; |
2362 | 0 | HashTable *regex_ht, *replace_ht, *subject_ht; |
2363 | 0 | zval regex_tmp, replace_tmp, subject_tmp; |
2364 | |
|
2365 | 0 | Z_FLF_PARAM_ARRAY_HT_OR_STR(1, regex_ht, regex_str, regex_tmp); |
2366 | 0 | Z_FLF_PARAM_ARRAY_HT_OR_STR(2, replace_ht, replace_str, replace_tmp); |
2367 | 0 | Z_FLF_PARAM_ARRAY_HT_OR_STR(3, subject_ht, subject_str, subject_tmp); |
2368 | |
|
2369 | 0 | _preg_replace_common( |
2370 | 0 | return_value, |
2371 | 0 | regex_ht, regex_str, |
2372 | 0 | replace_ht, replace_str, |
2373 | 0 | subject_ht, subject_str, |
2374 | 0 | /* limit */ -1, /* zcount */ NULL, /* is_filter */ false); |
2375 | |
|
2376 | 0 | flf_clean:; |
2377 | 0 | Z_FLF_PARAM_FREE_STR(1, regex_tmp); |
2378 | 0 | Z_FLF_PARAM_FREE_STR(2, replace_tmp); |
2379 | 0 | Z_FLF_PARAM_FREE_STR(3, subject_tmp); |
2380 | 0 | } |
2381 | | |
2382 | | /* {{{ Perform Perl-style regular expression replacement using replacement callback. */ |
2383 | | PHP_FUNCTION(preg_replace_callback) |
2384 | 156 | { |
2385 | 156 | zval *zcount = NULL; |
2386 | 156 | zend_string *regex_str; |
2387 | 156 | HashTable *regex_ht; |
2388 | 156 | zend_string *subject_str; |
2389 | 156 | HashTable *subject_ht; |
2390 | 156 | zend_long limit = -1, flags = 0; |
2391 | 156 | size_t replace_count; |
2392 | 156 | zend_fcall_info fci = empty_fcall_info; |
2393 | 156 | zend_fcall_info_cache fcc = empty_fcall_info_cache; |
2394 | | |
2395 | | /* Get function parameters and do error-checking. */ |
2396 | 468 | ZEND_PARSE_PARAMETERS_START(3, 6) |
2397 | 780 | Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str) |
2398 | 780 | Z_PARAM_FUNC(fci, fcc) |
2399 | 924 | Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str) |
2400 | 924 | Z_PARAM_OPTIONAL |
2401 | 924 | Z_PARAM_LONG(limit) |
2402 | 0 | Z_PARAM_ZVAL(zcount) |
2403 | 0 | Z_PARAM_LONG(flags) |
2404 | 156 | ZEND_PARSE_PARAMETERS_END(); |
2405 | | |
2406 | 154 | replace_count = php_preg_replace_func_impl(return_value, regex_str, regex_ht, |
2407 | 154 | &fci, &fcc, |
2408 | 154 | subject_str, subject_ht, limit, flags); |
2409 | 154 | if (zcount) { |
2410 | 0 | ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count); |
2411 | 0 | } |
2412 | 154 | } |
2413 | | /* }}} */ |
2414 | | |
2415 | | /* {{{ Perform Perl-style regular expression replacement using replacement callback. */ |
2416 | | PHP_FUNCTION(preg_replace_callback_array) |
2417 | 0 | { |
2418 | 0 | zval *replace, *zcount = NULL; |
2419 | 0 | HashTable *pattern, *subject_ht; |
2420 | 0 | zend_string *subject_str, *str_idx_regex; |
2421 | 0 | zend_long limit = -1, flags = 0; |
2422 | 0 | size_t replace_count = 0; |
2423 | | |
2424 | | /* Get function parameters and do error-checking. */ |
2425 | 0 | ZEND_PARSE_PARAMETERS_START(2, 5) |
2426 | 0 | Z_PARAM_ARRAY_HT(pattern) |
2427 | 0 | Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str) |
2428 | 0 | Z_PARAM_OPTIONAL |
2429 | 0 | Z_PARAM_LONG(limit) |
2430 | 0 | Z_PARAM_ZVAL(zcount) |
2431 | 0 | Z_PARAM_LONG(flags) |
2432 | 0 | ZEND_PARSE_PARAMETERS_END(); |
2433 | | |
2434 | 0 | if (subject_ht) { |
2435 | 0 | GC_TRY_ADDREF(subject_ht); |
2436 | 0 | } else { |
2437 | 0 | GC_TRY_ADDREF(subject_str); |
2438 | 0 | } |
2439 | |
|
2440 | 0 | ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) { |
2441 | 0 | if (!str_idx_regex) { |
2442 | 0 | zend_argument_type_error(1, "must contain only string patterns as keys"); |
2443 | 0 | goto error; |
2444 | 0 | } |
2445 | | |
2446 | 0 | zend_fcall_info_cache fcc = empty_fcall_info_cache; |
2447 | 0 | zend_fcall_info fci = empty_fcall_info; |
2448 | 0 | fci.size = sizeof(zend_fcall_info); |
2449 | | /* Copy potential trampoline */ |
2450 | 0 | ZVAL_COPY_VALUE(&fci.function_name, replace); |
2451 | |
|
2452 | 0 | if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) { |
2453 | 0 | zend_argument_type_error(1, "must contain only valid callbacks"); |
2454 | 0 | goto error; |
2455 | 0 | } |
2456 | | |
2457 | 0 | zval retval; |
2458 | 0 | replace_count += php_preg_replace_func_impl(&retval, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc, |
2459 | 0 | subject_str, subject_ht, limit, flags); |
2460 | 0 | zend_release_fcall_info_cache(&fcc); |
2461 | |
|
2462 | 0 | switch (Z_TYPE(retval)) { |
2463 | 0 | case IS_ARRAY: |
2464 | 0 | ZEND_ASSERT(subject_ht); |
2465 | 0 | zend_array_release(subject_ht); |
2466 | 0 | subject_ht = Z_ARR(retval); |
2467 | 0 | break; |
2468 | 0 | case IS_STRING: |
2469 | 0 | ZEND_ASSERT(subject_str); |
2470 | 0 | zend_string_release(subject_str); |
2471 | 0 | subject_str = Z_STR(retval); |
2472 | 0 | break; |
2473 | 0 | case IS_NULL: |
2474 | 0 | RETVAL_NULL(); |
2475 | 0 | goto error; |
2476 | 0 | EMPTY_SWITCH_DEFAULT_CASE() |
2477 | 0 | } |
2478 | | |
2479 | 0 | if (EG(exception)) { |
2480 | 0 | goto error; |
2481 | 0 | } |
2482 | 0 | } ZEND_HASH_FOREACH_END(); |
2483 | | |
2484 | 0 | if (zcount) { |
2485 | 0 | ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count); |
2486 | 0 | } |
2487 | | |
2488 | 0 | if (subject_ht) { |
2489 | 0 | RETVAL_ARR(subject_ht); |
2490 | | // Unset the type_flags of immutable arrays to prevent the VM from performing refcounting |
2491 | 0 | if (GC_FLAGS(subject_ht) & IS_ARRAY_IMMUTABLE) { |
2492 | 0 | Z_TYPE_FLAGS_P(return_value) = 0; |
2493 | 0 | } |
2494 | 0 | return; |
2495 | 0 | } else { |
2496 | 0 | RETURN_STR(subject_str); |
2497 | 0 | } |
2498 | | |
2499 | 0 | error: |
2500 | 0 | if (subject_ht) { |
2501 | 0 | zend_array_release(subject_ht); |
2502 | 0 | } else { |
2503 | 0 | zend_string_release(subject_str); |
2504 | 0 | } |
2505 | 0 | } |
2506 | | /* }}} */ |
2507 | | |
2508 | | /* {{{ Perform Perl-style regular expression replacement and only return matches. */ |
2509 | | PHP_FUNCTION(preg_filter) |
2510 | 0 | { |
2511 | 0 | preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true); |
2512 | 0 | } |
2513 | | /* }}} */ |
2514 | | |
2515 | | /* {{{ Split string into an array using a perl-style regular expression as a delimiter */ |
2516 | | PHP_FUNCTION(preg_split) |
2517 | 0 | { |
2518 | 0 | zend_string *regex; /* Regular expression */ |
2519 | 0 | zend_string *subject; /* String to match against */ |
2520 | 0 | zend_long limit_val = -1;/* Integer value of limit */ |
2521 | 0 | zend_long flags = 0; /* Match control flags */ |
2522 | 0 | pcre_cache_entry *pce; /* Compiled regular expression */ |
2523 | | |
2524 | | /* Get function parameters and do error checking */ |
2525 | 0 | ZEND_PARSE_PARAMETERS_START(2, 4) |
2526 | 0 | Z_PARAM_STR(regex) |
2527 | 0 | Z_PARAM_STR(subject) |
2528 | 0 | Z_PARAM_OPTIONAL |
2529 | 0 | Z_PARAM_LONG(limit_val) |
2530 | 0 | Z_PARAM_LONG(flags) |
2531 | 0 | ZEND_PARSE_PARAMETERS_END(); |
2532 | | |
2533 | | /* Compile regex or get it from cache. */ |
2534 | 0 | if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) { |
2535 | 0 | RETURN_FALSE; |
2536 | 0 | } |
2537 | | |
2538 | 0 | pce->refcount++; |
2539 | 0 | php_pcre_split_impl(pce, subject, return_value, limit_val, flags); |
2540 | 0 | pce->refcount--; |
2541 | 0 | } |
2542 | | /* }}} */ |
2543 | | |
2544 | | /* {{{ php_pcre_split */ |
2545 | | PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value, |
2546 | | zend_long limit_val, zend_long flags) |
2547 | 0 | { |
2548 | 0 | uint32_t options; /* Execution options */ |
2549 | 0 | int count; /* Count of matched subpatterns */ |
2550 | 0 | PCRE2_SIZE start_offset; /* Where the new search starts */ |
2551 | 0 | PCRE2_SIZE last_match_offset; /* Location of last match */ |
2552 | 0 | uint32_t no_empty; /* If NO_EMPTY flag is set */ |
2553 | 0 | uint32_t delim_capture; /* If delimiters should be captured */ |
2554 | 0 | uint32_t offset_capture; /* If offsets should be captured */ |
2555 | 0 | uint32_t num_subpats; /* Number of captured subpatterns */ |
2556 | 0 | zval tmp; |
2557 | 0 | pcre2_match_data *match_data; |
2558 | 0 | char *subject = ZSTR_VAL(subject_str); |
2559 | |
|
2560 | 0 | no_empty = flags & PREG_SPLIT_NO_EMPTY; |
2561 | 0 | delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE; |
2562 | 0 | offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE; |
2563 | | |
2564 | | /* Initialize return value */ |
2565 | 0 | array_init(return_value); |
2566 | 0 | HashTable *return_value_ht = Z_ARRVAL_P(return_value); |
2567 | | |
2568 | | /* Calculate the size of the offsets array, and allocate memory for it. */ |
2569 | 0 | num_subpats = pce->capture_count + 1; |
2570 | | |
2571 | | /* Start at the beginning of the string */ |
2572 | 0 | start_offset = 0; |
2573 | 0 | last_match_offset = 0; |
2574 | 0 | PCRE_G(error_code) = PHP_PCRE_NO_ERROR; |
2575 | |
|
2576 | 0 | if (limit_val == -1) { |
2577 | | /* pass */ |
2578 | 0 | } else if (limit_val == 0) { |
2579 | 0 | limit_val = -1; |
2580 | 0 | } else if (limit_val <= 1) { |
2581 | 0 | goto last; |
2582 | 0 | } |
2583 | | |
2584 | 0 | if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) { |
2585 | 0 | match_data = mdata; |
2586 | 0 | } else { |
2587 | 0 | match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm)); |
2588 | 0 | if (!match_data) { |
2589 | 0 | PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR; |
2590 | 0 | zval_ptr_dtor(return_value); |
2591 | 0 | RETURN_FALSE; |
2592 | 0 | } |
2593 | 0 | } |
2594 | | |
2595 | 0 | options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK; |
2596 | | |
2597 | | /* Array of subpattern offsets */ |
2598 | 0 | PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data); |
2599 | |
|
2600 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
2601 | | if ((pce->preg_options & PREG_JIT) && options) { |
2602 | | count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, |
2603 | | PCRE2_NO_UTF_CHECK, match_data, mctx); |
2604 | | } else |
2605 | | #endif |
2606 | 0 | count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, |
2607 | 0 | options, match_data, mctx); |
2608 | |
|
2609 | 0 | while (1) { |
2610 | | /* If something matched */ |
2611 | 0 | if (count >= 0) { |
2612 | | /* Check for too many substrings condition. */ |
2613 | 0 | if (UNEXPECTED(count == 0)) { |
2614 | 0 | php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings"); |
2615 | 0 | count = num_subpats; |
2616 | 0 | } |
2617 | |
|
2618 | 0 | matched: |
2619 | 0 | if (UNEXPECTED(offsets[1] < offsets[0])) { |
2620 | 0 | PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR; |
2621 | 0 | break; |
2622 | 0 | } |
2623 | | |
2624 | 0 | if (!no_empty || offsets[0] != last_match_offset) { |
2625 | 0 | if (offset_capture) { |
2626 | | /* Add (match, offset) pair to the return value */ |
2627 | 0 | add_offset_pair( |
2628 | 0 | return_value_ht, subject, last_match_offset, offsets[0], |
2629 | 0 | NULL, 0); |
2630 | 0 | } else { |
2631 | | /* Add the piece to the return value */ |
2632 | 0 | populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]); |
2633 | 0 | zend_hash_next_index_insert_new(return_value_ht, &tmp); |
2634 | 0 | } |
2635 | | |
2636 | | /* One less left to do */ |
2637 | 0 | if (limit_val != -1) |
2638 | 0 | limit_val--; |
2639 | 0 | } |
2640 | |
|
2641 | 0 | if (delim_capture) { |
2642 | 0 | size_t i; |
2643 | 0 | for (i = 1; i < count; i++) { |
2644 | | /* If we have matched a delimiter */ |
2645 | 0 | if (!no_empty || offsets[2*i] != offsets[2*i+1]) { |
2646 | 0 | if (offset_capture) { |
2647 | 0 | add_offset_pair( |
2648 | 0 | return_value_ht, subject, offsets[2*i], offsets[2*i+1], NULL, 0); |
2649 | 0 | } else { |
2650 | 0 | populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]); |
2651 | 0 | zend_hash_next_index_insert_new(return_value_ht, &tmp); |
2652 | 0 | } |
2653 | 0 | } |
2654 | 0 | } |
2655 | 0 | } |
2656 | | |
2657 | | /* Advance to the position right after the last full match */ |
2658 | 0 | start_offset = last_match_offset = offsets[1]; |
2659 | | |
2660 | | /* If we have matched an empty string, mimic what Perl's /g options does. |
2661 | | This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try |
2662 | | the match again at the same point. If this fails (picked up above) we |
2663 | | advance to the next character. */ |
2664 | 0 | if (start_offset == offsets[0]) { |
2665 | | /* Get next piece if no limit or limit not yet reached and something matched*/ |
2666 | 0 | if (limit_val != -1 && limit_val <= 1) { |
2667 | 0 | break; |
2668 | 0 | } |
2669 | 0 | count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, |
2670 | 0 | PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx); |
2671 | 0 | if (count >= 0) { |
2672 | 0 | goto matched; |
2673 | 0 | } else if (count == PCRE2_ERROR_NOMATCH) { |
2674 | | /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match, |
2675 | | this is not necessarily the end. We need to advance |
2676 | | the start offset, and continue. Fudge the offset values |
2677 | | to achieve this, unless we're already at the end of the string. */ |
2678 | 0 | if (start_offset < ZSTR_LEN(subject_str)) { |
2679 | 0 | start_offset += calculate_unit_length(pce, subject + start_offset); |
2680 | 0 | } else { |
2681 | 0 | break; |
2682 | 0 | } |
2683 | 0 | } else { |
2684 | 0 | goto error; |
2685 | 0 | } |
2686 | 0 | } |
2687 | |
|
2688 | 0 | } else if (count == PCRE2_ERROR_NOMATCH) { |
2689 | 0 | break; |
2690 | 0 | } else { |
2691 | 0 | error: |
2692 | 0 | pcre_handle_exec_error(count); |
2693 | 0 | break; |
2694 | 0 | } |
2695 | | |
2696 | | /* Get next piece if no limit or limit not yet reached and something matched*/ |
2697 | 0 | if (limit_val != -1 && limit_val <= 1) { |
2698 | 0 | break; |
2699 | 0 | } |
2700 | | |
2701 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
2702 | | if (pce->preg_options & PREG_JIT) { |
2703 | | count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, |
2704 | | PCRE2_NO_UTF_CHECK, match_data, mctx); |
2705 | | } else |
2706 | | #endif |
2707 | 0 | count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, |
2708 | 0 | PCRE2_NO_UTF_CHECK, match_data, mctx); |
2709 | 0 | } |
2710 | 0 | if (match_data != mdata) { |
2711 | 0 | pcre2_match_data_free(match_data); |
2712 | 0 | } |
2713 | |
|
2714 | 0 | if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) { |
2715 | 0 | zval_ptr_dtor(return_value); |
2716 | 0 | RETURN_FALSE; |
2717 | 0 | } |
2718 | | |
2719 | 0 | last: |
2720 | 0 | start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */ |
2721 | |
|
2722 | 0 | if (!no_empty || start_offset < ZSTR_LEN(subject_str)) { |
2723 | 0 | if (offset_capture) { |
2724 | | /* Add the last (match, offset) pair to the return value */ |
2725 | 0 | add_offset_pair(return_value_ht, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0); |
2726 | 0 | } else { |
2727 | | /* Add the last piece to the return value */ |
2728 | 0 | if (start_offset == 0) { |
2729 | 0 | ZVAL_STR_COPY(&tmp, subject_str); |
2730 | 0 | } else { |
2731 | 0 | populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str)); |
2732 | 0 | } |
2733 | 0 | zend_hash_next_index_insert_new(return_value_ht, &tmp); |
2734 | 0 | } |
2735 | 0 | } |
2736 | 0 | } |
2737 | | /* }}} */ |
2738 | | |
2739 | | /* {{{ Quote regular expression characters plus an optional character */ |
2740 | | PHP_FUNCTION(preg_quote) |
2741 | 3 | { |
2742 | 3 | zend_string *str; /* Input string argument */ |
2743 | 3 | zend_string *delim = NULL; /* Additional delimiter argument */ |
2744 | 3 | char *in_str; /* Input string */ |
2745 | 3 | char *in_str_end; /* End of the input string */ |
2746 | 3 | zend_string *out_str; /* Output string with quoted characters */ |
2747 | 3 | size_t extra_len; /* Number of additional characters */ |
2748 | 3 | char *p, /* Iterator for input string */ |
2749 | 3 | *q, /* Iterator for output string */ |
2750 | 3 | delim_char = '\0', /* Delimiter character to be quoted */ |
2751 | 3 | c; /* Current character */ |
2752 | | |
2753 | | /* Get the arguments and check for errors */ |
2754 | 9 | ZEND_PARSE_PARAMETERS_START(1, 2) |
2755 | 12 | Z_PARAM_STR(str) |
2756 | 3 | Z_PARAM_OPTIONAL |
2757 | 6 | Z_PARAM_STR_OR_NULL(delim) |
2758 | 3 | ZEND_PARSE_PARAMETERS_END(); |
2759 | | |
2760 | | /* Nothing to do if we got an empty string */ |
2761 | 3 | if (ZSTR_LEN(str) == 0) { |
2762 | 0 | RETURN_EMPTY_STRING(); |
2763 | 0 | } |
2764 | | |
2765 | 3 | in_str = ZSTR_VAL(str); |
2766 | 3 | in_str_end = in_str + ZSTR_LEN(str); |
2767 | | |
2768 | 3 | if (delim) { |
2769 | 0 | delim_char = ZSTR_VAL(delim)[0]; |
2770 | 0 | } |
2771 | | |
2772 | | /* Go through the string and quote necessary characters */ |
2773 | 3 | extra_len = 0; |
2774 | 3 | p = in_str; |
2775 | 16.2k | do { |
2776 | 16.2k | c = *p; |
2777 | 16.2k | switch(c) { |
2778 | 51 | case '.': |
2779 | 58 | case '\\': |
2780 | 96 | case '+': |
2781 | 286 | case '*': |
2782 | 386 | case '?': |
2783 | 432 | case '[': |
2784 | 470 | case '^': |
2785 | 518 | case ']': |
2786 | 562 | case '$': |
2787 | 695 | case '(': |
2788 | 776 | case ')': |
2789 | 849 | case '{': |
2790 | 993 | case '}': |
2791 | 1.07k | case '=': |
2792 | 1.09k | case '!': |
2793 | 1.19k | case '>': |
2794 | 1.20k | case '<': |
2795 | 1.26k | case '|': |
2796 | 1.28k | case ':': |
2797 | 1.55k | case '-': |
2798 | 1.55k | case '#': |
2799 | 1.55k | extra_len++; |
2800 | 1.55k | break; |
2801 | | |
2802 | 3.16k | case '\0': |
2803 | 3.16k | extra_len+=3; |
2804 | 3.16k | break; |
2805 | | |
2806 | 11.5k | default: |
2807 | 11.5k | if (c == delim_char) { |
2808 | 0 | extra_len++; |
2809 | 0 | } |
2810 | 11.5k | break; |
2811 | 16.2k | } |
2812 | 16.2k | p++; |
2813 | 16.2k | } while (p != in_str_end); |
2814 | | |
2815 | 3 | if (extra_len == 0) { |
2816 | 0 | RETURN_STR_COPY(str); |
2817 | 0 | } |
2818 | | |
2819 | | /* Allocate enough memory so that even if each character |
2820 | | is quoted, we won't run out of room */ |
2821 | 3 | out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0); |
2822 | 3 | q = ZSTR_VAL(out_str); |
2823 | 3 | p = in_str; |
2824 | | |
2825 | 16.2k | do { |
2826 | 16.2k | c = *p; |
2827 | 16.2k | switch(c) { |
2828 | 51 | case '.': |
2829 | 58 | case '\\': |
2830 | 96 | case '+': |
2831 | 286 | case '*': |
2832 | 386 | case '?': |
2833 | 432 | case '[': |
2834 | 470 | case '^': |
2835 | 518 | case ']': |
2836 | 562 | case '$': |
2837 | 695 | case '(': |
2838 | 776 | case ')': |
2839 | 849 | case '{': |
2840 | 993 | case '}': |
2841 | 1.07k | case '=': |
2842 | 1.09k | case '!': |
2843 | 1.19k | case '>': |
2844 | 1.20k | case '<': |
2845 | 1.26k | case '|': |
2846 | 1.28k | case ':': |
2847 | 1.55k | case '-': |
2848 | 1.55k | case '#': |
2849 | 1.55k | *q++ = '\\'; |
2850 | 1.55k | *q++ = c; |
2851 | 1.55k | break; |
2852 | | |
2853 | 3.16k | case '\0': |
2854 | 3.16k | *q++ = '\\'; |
2855 | 3.16k | *q++ = '0'; |
2856 | 3.16k | *q++ = '0'; |
2857 | 3.16k | *q++ = '0'; |
2858 | 3.16k | break; |
2859 | | |
2860 | 11.5k | default: |
2861 | 11.5k | if (c == delim_char) { |
2862 | 0 | *q++ = '\\'; |
2863 | 0 | } |
2864 | 11.5k | *q++ = c; |
2865 | 11.5k | break; |
2866 | 16.2k | } |
2867 | 16.2k | p++; |
2868 | 16.2k | } while (p != in_str_end); |
2869 | 3 | *q = '\0'; |
2870 | | |
2871 | 3 | RETURN_NEW_STR(out_str); |
2872 | 3 | } |
2873 | | /* }}} */ |
2874 | | |
2875 | | /* {{{ Searches array and returns entries which match regex */ |
2876 | | PHP_FUNCTION(preg_grep) |
2877 | 0 | { |
2878 | 0 | zend_string *regex; /* Regular expression */ |
2879 | 0 | zval *input; /* Input array */ |
2880 | 0 | zend_long flags = 0; /* Match control flags */ |
2881 | 0 | pcre_cache_entry *pce; /* Compiled regular expression */ |
2882 | | |
2883 | | /* Get arguments and do error checking */ |
2884 | 0 | ZEND_PARSE_PARAMETERS_START(2, 3) |
2885 | 0 | Z_PARAM_STR(regex) |
2886 | 0 | Z_PARAM_ARRAY(input) |
2887 | 0 | Z_PARAM_OPTIONAL |
2888 | 0 | Z_PARAM_LONG(flags) |
2889 | 0 | ZEND_PARSE_PARAMETERS_END(); |
2890 | | |
2891 | | /* Compile regex or get it from cache. */ |
2892 | 0 | if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) { |
2893 | 0 | RETURN_FALSE; |
2894 | 0 | } |
2895 | | |
2896 | 0 | pce->refcount++; |
2897 | 0 | php_pcre_grep_impl(pce, input, return_value, flags); |
2898 | 0 | pce->refcount--; |
2899 | 0 | } |
2900 | | /* }}} */ |
2901 | | |
2902 | | PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */ |
2903 | 0 | { |
2904 | 0 | zval *entry; /* An entry in the input array */ |
2905 | 0 | uint32_t num_subpats; /* Number of captured subpatterns */ |
2906 | 0 | int count; /* Count of matched subpatterns */ |
2907 | 0 | uint32_t options; /* Execution options */ |
2908 | 0 | zend_string *string_key; |
2909 | 0 | zend_ulong num_key; |
2910 | 0 | bool invert; /* Whether to return non-matching |
2911 | | entries */ |
2912 | 0 | pcre2_match_data *match_data; |
2913 | 0 | invert = flags & PREG_GREP_INVERT ? 1 : 0; |
2914 | | |
2915 | | /* Calculate the size of the offsets array, and allocate memory for it. */ |
2916 | 0 | num_subpats = pce->capture_count + 1; |
2917 | | |
2918 | | /* Initialize return array */ |
2919 | 0 | array_init(return_value); |
2920 | 0 | HashTable *return_value_ht = Z_ARRVAL_P(return_value); |
2921 | |
|
2922 | 0 | PCRE_G(error_code) = PHP_PCRE_NO_ERROR; |
2923 | |
|
2924 | 0 | if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) { |
2925 | 0 | match_data = mdata; |
2926 | 0 | } else { |
2927 | 0 | match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm)); |
2928 | 0 | if (!match_data) { |
2929 | 0 | PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR; |
2930 | 0 | return; |
2931 | 0 | } |
2932 | 0 | } |
2933 | | |
2934 | 0 | options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK; |
2935 | | |
2936 | | /* Go through the input array */ |
2937 | 0 | ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) { |
2938 | 0 | zend_string *tmp_subject_str; |
2939 | 0 | zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str); |
2940 | | |
2941 | | /* Perform the match */ |
2942 | | #ifdef HAVE_PCRE_JIT_SUPPORT |
2943 | | if ((pce->preg_options & PREG_JIT) && options) { |
2944 | | count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0, |
2945 | | PCRE2_NO_UTF_CHECK, match_data, mctx); |
2946 | | } else |
2947 | | #endif |
2948 | 0 | count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0, |
2949 | 0 | options, match_data, mctx); |
2950 | | |
2951 | | /* If the entry fits our requirements */ |
2952 | 0 | if (count >= 0) { |
2953 | | /* Check for too many substrings condition. */ |
2954 | 0 | if (UNEXPECTED(count == 0)) { |
2955 | 0 | php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings"); |
2956 | 0 | } |
2957 | 0 | if (!invert) { |
2958 | 0 | Z_TRY_ADDREF_P(entry); |
2959 | | |
2960 | | /* Add to return array */ |
2961 | 0 | if (string_key) { |
2962 | 0 | zend_hash_update(return_value_ht, string_key, entry); |
2963 | 0 | } else { |
2964 | 0 | zend_hash_index_update(return_value_ht, num_key, entry); |
2965 | 0 | } |
2966 | 0 | } |
2967 | 0 | } else if (count == PCRE2_ERROR_NOMATCH) { |
2968 | 0 | if (invert) { |
2969 | 0 | Z_TRY_ADDREF_P(entry); |
2970 | | |
2971 | | /* Add to return array */ |
2972 | 0 | if (string_key) { |
2973 | 0 | zend_hash_update(return_value_ht, string_key, entry); |
2974 | 0 | } else { |
2975 | 0 | zend_hash_index_update(return_value_ht, num_key, entry); |
2976 | 0 | } |
2977 | 0 | } |
2978 | 0 | } else { |
2979 | 0 | pcre_handle_exec_error(count); |
2980 | 0 | zend_tmp_string_release(tmp_subject_str); |
2981 | 0 | break; |
2982 | 0 | } |
2983 | | |
2984 | 0 | zend_tmp_string_release(tmp_subject_str); |
2985 | 0 | } ZEND_HASH_FOREACH_END(); |
2986 | 0 | if (match_data != mdata) { |
2987 | 0 | pcre2_match_data_free(match_data); |
2988 | 0 | } |
2989 | 0 | } |
2990 | | /* }}} */ |
2991 | | |
2992 | | /* {{{ Returns the error code of the last regexp execution. */ |
2993 | | PHP_FUNCTION(preg_last_error) |
2994 | 0 | { |
2995 | 0 | ZEND_PARSE_PARAMETERS_NONE(); |
2996 | | |
2997 | 0 | RETURN_LONG(PCRE_G(error_code)); |
2998 | 0 | } |
2999 | | /* }}} */ |
3000 | | |
3001 | | /* {{{ Returns the error message of the last regexp execution. */ |
3002 | | PHP_FUNCTION(preg_last_error_msg) |
3003 | 0 | { |
3004 | 0 | ZEND_PARSE_PARAMETERS_NONE(); |
3005 | | |
3006 | 0 | RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code))); |
3007 | 0 | } |
3008 | | /* }}} */ |
3009 | | |
3010 | | /* {{{ module definition structures */ |
3011 | | |
3012 | | zend_module_entry pcre_module_entry = { |
3013 | | STANDARD_MODULE_HEADER, |
3014 | | "pcre", |
3015 | | ext_functions, |
3016 | | PHP_MINIT(pcre), |
3017 | | PHP_MSHUTDOWN(pcre), |
3018 | | PHP_RINIT(pcre), |
3019 | | PHP_RSHUTDOWN(pcre), |
3020 | | PHP_MINFO(pcre), |
3021 | | PHP_PCRE_VERSION, |
3022 | | PHP_MODULE_GLOBALS(pcre), |
3023 | | PHP_GINIT(pcre), |
3024 | | PHP_GSHUTDOWN(pcre), |
3025 | | NULL, |
3026 | | STANDARD_MODULE_PROPERTIES_EX |
3027 | | }; |
3028 | | |
3029 | | #ifdef COMPILE_DL_PCRE |
3030 | | ZEND_GET_MODULE(pcre) |
3031 | | #endif |
3032 | | |
3033 | | /* }}} */ |
3034 | | |
3035 | | PHPAPI pcre2_match_context *php_pcre_mctx(void) |
3036 | 5 | {/*{{{*/ |
3037 | 5 | return mctx; |
3038 | 5 | }/*}}}*/ |
3039 | | |
3040 | | PHPAPI pcre2_general_context *php_pcre_gctx(void) |
3041 | 0 | {/*{{{*/ |
3042 | 0 | return gctx; |
3043 | 0 | }/*}}}*/ |
3044 | | |
3045 | | PHPAPI pcre2_compile_context *php_pcre_cctx(void) |
3046 | 0 | {/*{{{*/ |
3047 | 0 | return cctx; |
3048 | 0 | }/*}}}*/ |
3049 | | |
3050 | | PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce) |
3051 | 0 | {/*{{{*/ |
3052 | 0 | assert(NULL != pce); |
3053 | 0 | pce->refcount++; |
3054 | 0 | }/*}}}*/ |
3055 | | |
3056 | | PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce) |
3057 | 0 | {/*{{{*/ |
3058 | 0 | assert(NULL != pce); |
3059 | 0 | assert(0 != pce->refcount); |
3060 | 0 | pce->refcount--; |
3061 | 0 | }/*}}}*/ |
3062 | | |
3063 | | PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce) |
3064 | 0 | {/*{{{*/ |
3065 | 0 | assert(NULL != pce); |
3066 | 0 | return pce->re; |
3067 | 0 | }/*}}}*/ |