Coverage Report

Created: 2026-06-13 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/pcre/php_pcre.c
Line
Count
Source
1
/*
2
   +----------------------------------------------------------------------+
3
   | Copyright © The PHP Group and Contributors.                          |
4
   +----------------------------------------------------------------------+
5
   | This source file is subject to the Modified BSD License that is      |
6
   | bundled with this package in the file LICENSE, and is available      |
7
   | through the World Wide Web at <https://www.php.net/license/>.        |
8
   |                                                                      |
9
   | SPDX-License-Identifier: BSD-3-Clause                                |
10
   +----------------------------------------------------------------------+
11
   | Author: Andrei Zmievski <andrei@php.net>                             |
12
   +----------------------------------------------------------------------+
13
 */
14
15
#include "php.h"
16
#include "php_ini.h"
17
#include "php_pcre.h"
18
#include "ext/standard/info.h"
19
#include "ext/standard/basic_functions.h"
20
#include "zend_smart_str.h"
21
#include "SAPI.h"
22
23
0
#define PREG_PATTERN_ORDER      1
24
0
#define PREG_SET_ORDER        2
25
349
#define PREG_OFFSET_CAPTURE     (1<<8)
26
349
#define PREG_UNMATCHED_AS_NULL    (1<<9)
27
28
0
#define PREG_SPLIT_NO_EMPTY     (1<<0)
29
0
#define PREG_SPLIT_DELIM_CAPTURE  (1<<1)
30
0
#define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
31
32
0
#define PREG_GREP_INVERT      (1<<0)
33
34
#define PREG_JIT                    (1<<3)
35
36
1.65k
#define PCRE_CACHE_SIZE 4096
37
38
#ifdef HAVE_PCRE_JIT_SUPPORT
39
#define PHP_PCRE_JIT_SUPPORT 1
40
#else
41
#define PHP_PCRE_JIT_SUPPORT 0
42
#endif
43
44
char *php_pcre_version;
45
46
#include "php_pcre_arginfo.h"
47
48
struct _pcre_cache_entry {
49
  pcre2_code *re;
50
  /* Pointer is not NULL (during request) when there are named captures.
51
   * Length is equal to capture_count + 1 to account for capture group 0.
52
   * This table cache is only valid during request.
53
   * Trying to store this over multiple requests causes issues when the keys are exposed in user arrays
54
   * (see GH-17122 and GH-17132). */
55
  zend_string **subpats_table;
56
  uint32_t preg_options;
57
  uint32_t name_count;
58
  uint32_t capture_count;
59
  uint32_t compile_options;
60
  uint32_t refcount;
61
};
62
63
PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
64
65
#ifdef HAVE_PCRE_JIT_SUPPORT
66
#define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
67
#define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
68
ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
69
#endif
70
/* General context using (infallible) system allocator. */
71
ZEND_TLS pcre2_general_context *gctx = NULL;
72
/* These two are global per thread for now. Though it is possible to use these
73
  per pattern. Either one can copy it and use in pce, or one does no global
74
  contexts at all, but creates for every pce. */
75
ZEND_TLS pcre2_compile_context *cctx = NULL;
76
ZEND_TLS pcre2_match_context   *mctx = NULL;
77
ZEND_TLS pcre2_match_data      *mdata = NULL;
78
ZEND_TLS bool              mdata_used = 0;
79
ZEND_TLS uint8_t pcre2_init_ok = 0;
80
#if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
81
static MUTEX_T pcre_mt = NULL;
82
#define php_pcre_mutex_alloc() \
83
  if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
84
#define php_pcre_mutex_free() \
85
  if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
86
#define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
87
#define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
88
#else
89
#define php_pcre_mutex_alloc()
90
#define php_pcre_mutex_free()
91
#define php_pcre_mutex_lock()
92
#define php_pcre_mutex_unlock()
93
#endif
94
95
ZEND_TLS HashTable char_tables;
96
97
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats);
98
99
static void php_pcre_free_char_table(zval *data)
100
0
{/*{{{*/
101
0
  void *ptr = Z_PTR_P(data);
102
0
  pefree(ptr, 1);
103
0
}/*}}}*/
104
105
static void pcre_handle_exec_error(int pcre_code) /* {{{ */
106
1.44k
{
107
1.44k
  int preg_code = 0;
108
109
1.44k
  switch (pcre_code) {
110
38
    case PCRE2_ERROR_MATCHLIMIT:
111
38
      preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
112
38
      break;
113
114
0
    case PCRE2_ERROR_RECURSIONLIMIT:
115
0
      preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
116
0
      break;
117
118
0
    case PCRE2_ERROR_BADUTFOFFSET:
119
0
      preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
120
0
      break;
121
122
#ifdef HAVE_PCRE_JIT_SUPPORT
123
    case PCRE2_ERROR_JIT_STACKLIMIT:
124
      preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
125
      break;
126
#endif
127
128
1.40k
    default:
129
1.40k
      if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
130
165
        preg_code = PHP_PCRE_BAD_UTF8_ERROR;
131
1.24k
      } else  {
132
1.24k
        preg_code = PHP_PCRE_INTERNAL_ERROR;
133
1.24k
      }
134
1.40k
      break;
135
1.44k
  }
136
137
1.44k
  PCRE_G(error_code) = preg_code;
138
1.44k
}
139
/* }}} */
140
141
static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
142
0
{
143
0
  switch (error_code) {
144
0
    case PHP_PCRE_NO_ERROR:
145
0
      return "No error";
146
0
    case PHP_PCRE_INTERNAL_ERROR:
147
0
      return "Internal error";
148
0
    case PHP_PCRE_BAD_UTF8_ERROR:
149
0
      return "Malformed UTF-8 characters, possibly incorrectly encoded";
150
0
    case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
151
0
      return "The offset did not correspond to the beginning of a valid UTF-8 code point";
152
0
    case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
153
0
      return "Backtrack limit exhausted";
154
0
    case PHP_PCRE_RECURSION_LIMIT_ERROR:
155
0
      return "Recursion limit exhausted";
156
157
#ifdef HAVE_PCRE_JIT_SUPPORT
158
    case PHP_PCRE_JIT_STACKLIMIT_ERROR:
159
      return "JIT stack limit exhausted";
160
#endif
161
162
0
    default:
163
0
      return "Unknown error";
164
0
  }
165
0
}
166
/* }}} */
167
168
static void php_free_pcre_cache(zval *data) /* {{{ */
169
0
{
170
0
  pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
171
0
  if (!pce) return;
172
0
  if (pce->subpats_table) {
173
0
    free_subpats_table(pce->subpats_table, pce->capture_count + 1);
174
0
  }
175
0
  pcre2_code_free(pce->re);
176
0
  free(pce);
177
0
}
178
/* }}} */
179
180
static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
181
1.91k
{
182
1.91k
  return pemalloc(size, 1);
183
1.91k
}
184
185
static void php_pcre_free(void *block, void *data)
186
200
{
187
200
  pefree(block, 1);
188
200
}
189
190
static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
191
231k
{
192
231k
  return emalloc(size);
193
231k
}
194
195
static void php_pcre_efree(void *block, void *data)
196
232k
{
197
232k
  efree(block);
198
232k
}
199
200
3.41k
#define PHP_PCRE_PREALLOC_MDATA_SIZE 32
201
202
static void php_pcre_init_pcre2(uint8_t jit)
203
16
{/*{{{*/
204
16
  if (!gctx) {
205
16
    gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
206
16
    if (!gctx) {
207
0
      pcre2_init_ok = 0;
208
0
      return;
209
0
    }
210
16
  }
211
212
16
  if (!cctx) {
213
16
    cctx = pcre2_compile_context_create(gctx);
214
16
    if (!cctx) {
215
0
      pcre2_init_ok = 0;
216
0
      return;
217
0
    }
218
16
  }
219
220
16
  if (!mctx) {
221
16
    mctx = pcre2_match_context_create(gctx);
222
16
    if (!mctx) {
223
0
      pcre2_init_ok = 0;
224
0
      return;
225
0
    }
226
16
  }
227
228
#ifdef HAVE_PCRE_JIT_SUPPORT
229
  if (jit && !jit_stack) {
230
    jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
231
    if (!jit_stack) {
232
      pcre2_init_ok = 0;
233
      return;
234
    }
235
  }
236
#endif
237
238
16
  if (!mdata) {
239
16
    mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
240
16
    if (!mdata) {
241
0
      pcre2_init_ok = 0;
242
0
      return;
243
0
    }
244
16
  }
245
246
16
  pcre2_init_ok = 1;
247
16
}/*}}}*/
248
249
static void php_pcre_shutdown_pcre2(void)
250
0
{/*{{{*/
251
0
  if (gctx) {
252
0
    pcre2_general_context_free(gctx);
253
0
    gctx = NULL;
254
0
  }
255
256
0
  if (cctx) {
257
0
    pcre2_compile_context_free(cctx);
258
0
    cctx = NULL;
259
0
  }
260
261
0
  if (mctx) {
262
0
    pcre2_match_context_free(mctx);
263
0
    mctx = NULL;
264
0
  }
265
266
#ifdef HAVE_PCRE_JIT_SUPPORT
267
  /* Stack may only be destroyed when no cached patterns
268
    possibly associated with it do exist. */
269
  if (jit_stack) {
270
    pcre2_jit_stack_free(jit_stack);
271
    jit_stack = NULL;
272
  }
273
#endif
274
275
0
  if (mdata) {
276
0
    pcre2_match_data_free(mdata);
277
0
    mdata = NULL;
278
0
  }
279
280
0
  pcre2_init_ok = 0;
281
0
}/*}}}*/
282
283
static PHP_GINIT_FUNCTION(pcre) /* {{{ */
284
16
{
285
16
  php_pcre_mutex_alloc();
286
287
16
  zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
288
289
16
  pcre_globals->backtrack_limit = 0;
290
16
  pcre_globals->recursion_limit = 0;
291
16
  pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
292
16
  ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
293
16
  ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
294
#ifdef HAVE_PCRE_JIT_SUPPORT
295
  pcre_globals->jit = 1;
296
#endif
297
298
16
  php_pcre_init_pcre2(1);
299
16
  zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
300
16
}
301
/* }}} */
302
303
static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
304
0
{
305
0
  zend_hash_destroy(&pcre_globals->pcre_cache);
306
307
0
  php_pcre_shutdown_pcre2();
308
0
  zend_hash_destroy(&char_tables);
309
0
  php_pcre_mutex_free();
310
0
}
311
/* }}} */
312
313
static PHP_INI_MH(OnUpdateBacktrackLimit)
314
16
{/*{{{*/
315
16
  OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
316
16
  if (mctx) {
317
16
    pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
318
16
  }
319
320
16
  return SUCCESS;
321
16
}/*}}}*/
322
323
static PHP_INI_MH(OnUpdateRecursionLimit)
324
16
{/*{{{*/
325
16
  OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
326
16
  if (mctx) {
327
16
    pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
328
16
  }
329
330
16
  return SUCCESS;
331
16
}/*}}}*/
332
333
#ifdef HAVE_PCRE_JIT_SUPPORT
334
static PHP_INI_MH(OnUpdateJit)
335
{/*{{{*/
336
  OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
337
  if (PCRE_G(jit) && jit_stack) {
338
    pcre2_jit_stack_assign(mctx, NULL, jit_stack);
339
  } else {
340
    pcre2_jit_stack_assign(mctx, NULL, NULL);
341
  }
342
343
  return SUCCESS;
344
}/*}}}*/
345
#endif
346
347
PHP_INI_BEGIN()
348
  STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
349
  STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
350
#ifdef HAVE_PCRE_JIT_SUPPORT
351
  STD_PHP_INI_BOOLEAN("pcre.jit",           "1",       PHP_INI_ALL, OnUpdateJit,            jit,             zend_pcre_globals, pcre_globals)
352
#endif
353
PHP_INI_END()
354
355
static char *_pcre2_config_str(uint32_t what)
356
24
{/*{{{*/
357
24
  int len = pcre2_config(what, NULL);
358
24
  char *ret = (char *) malloc(len + 1);
359
360
24
  len = pcre2_config(what, ret);
361
24
  if (!len) {
362
0
    free(ret);
363
0
    return NULL;
364
0
  }
365
366
24
  return ret;
367
24
}/*}}}*/
368
369
/* {{{ PHP_MINFO_FUNCTION(pcre) */
370
static PHP_MINFO_FUNCTION(pcre)
371
4
{
372
#ifdef HAVE_PCRE_JIT_SUPPORT
373
  uint32_t flag = 0;
374
  char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
375
#endif
376
4
  char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
377
4
  char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
378
379
4
  php_info_print_table_start();
380
4
  php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
381
4
  php_info_print_table_row(2, "PCRE Library Version", version);
382
4
  free(version);
383
4
  php_info_print_table_row(2, "PCRE Unicode Version", unicode);
384
4
  free(unicode);
385
386
#ifdef HAVE_PCRE_JIT_SUPPORT
387
  if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
388
    php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
389
  } else {
390
    php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
391
  }
392
  if (jit_target) {
393
    php_info_print_table_row(2, "PCRE JIT Target", jit_target);
394
  }
395
  free(jit_target);
396
#else
397
4
  php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
398
4
#endif
399
400
#ifdef HAVE_PCRE_VALGRIND_SUPPORT
401
  php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
402
#endif
403
404
4
  php_info_print_table_end();
405
406
4
  DISPLAY_INI_ENTRIES();
407
4
}
408
/* }}} */
409
410
/* {{{ PHP_MINIT_FUNCTION(pcre) */
411
static PHP_MINIT_FUNCTION(pcre)
412
16
{
413
#ifdef HAVE_PCRE_JIT_SUPPORT
414
  if (UNEXPECTED(!pcre2_init_ok)) {
415
    /* Retry. */
416
    php_pcre_init_pcre2(PCRE_G(jit));
417
    if (!pcre2_init_ok) {
418
      return FAILURE;
419
    }
420
  }
421
#endif
422
423
16
  REGISTER_INI_ENTRIES();
424
425
16
  php_pcre_version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
426
427
16
  register_php_pcre_symbols(module_number);
428
429
16
  return SUCCESS;
430
16
}
431
/* }}} */
432
433
/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
434
static PHP_MSHUTDOWN_FUNCTION(pcre)
435
0
{
436
0
  UNREGISTER_INI_ENTRIES();
437
438
0
  free(php_pcre_version);
439
440
0
  return SUCCESS;
441
0
}
442
/* }}} */
443
444
/* {{{ PHP_RINIT_FUNCTION(pcre) */
445
static PHP_RINIT_FUNCTION(pcre)
446
229k
{
447
#ifdef HAVE_PCRE_JIT_SUPPORT
448
  if (UNEXPECTED(!pcre2_init_ok)) {
449
    /* Retry. */
450
    php_pcre_mutex_lock();
451
    php_pcre_init_pcre2(PCRE_G(jit));
452
    if (!pcre2_init_ok) {
453
      php_pcre_mutex_unlock();
454
      return FAILURE;
455
    }
456
    php_pcre_mutex_unlock();
457
  }
458
459
  mdata_used = 0;
460
#endif
461
462
229k
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
463
229k
  PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
464
229k
  if (!PCRE_G(gctx_zmm)) {
465
0
    return FAILURE;
466
0
  }
467
468
229k
  return SUCCESS;
469
229k
}
470
/* }}} */
471
472
static PHP_RSHUTDOWN_FUNCTION(pcre)
473
229k
{
474
229k
  pcre_cache_entry *pce;
475
75.4M
  ZEND_HASH_MAP_FOREACH_PTR(&PCRE_G(pcre_cache), pce) {
476
75.4M
    if (pce->subpats_table) {
477
0
      free_subpats_table(pce->subpats_table, pce->capture_count + 1);
478
0
      pce->subpats_table = NULL;
479
0
    }
480
75.4M
  } ZEND_HASH_FOREACH_END();
481
482
229k
  pcre2_general_context_free(PCRE_G(gctx_zmm));
483
229k
  PCRE_G(gctx_zmm) = NULL;
484
485
229k
  zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
486
229k
  zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
487
229k
  ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
488
229k
  ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
489
229k
  return SUCCESS;
490
229k
}
491
492
/* {{{ static pcre_clean_cache */
493
static int pcre_clean_cache(zval *data, void *arg)
494
0
{
495
0
  pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
496
0
  int *num_clean = (int *)arg;
497
498
0
  if (!pce->refcount) {
499
0
    if (--(*num_clean) == 0) {
500
0
      return ZEND_HASH_APPLY_REMOVE|ZEND_HASH_APPLY_STOP;
501
0
    }
502
0
    return ZEND_HASH_APPLY_REMOVE;
503
0
  } else {
504
0
    return ZEND_HASH_APPLY_KEEP;
505
0
  }
506
0
}
507
/* }}} */
508
509
0
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
510
0
  uint32_t i;
511
0
  for (i = 0; i < num_subpats; i++) {
512
0
    if (subpat_names[i]) {
513
0
      zend_string_release_ex(subpat_names[i], false);
514
0
    }
515
0
  }
516
0
  efree(subpat_names);
517
0
}
518
519
/* {{{ static make_subpats_table */
520
static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce)
521
0
{
522
0
  uint32_t num_subpats = pce->capture_count + 1;
523
0
  uint32_t name_size, ni = 0;
524
0
  char *name_table;
525
0
  zend_string **subpat_names;
526
0
  int rc1, rc2;
527
528
0
  rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
529
0
  rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
530
0
  if (rc1 < 0 || rc2 < 0) {
531
0
    php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
532
0
    return NULL;
533
0
  }
534
535
0
  subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
536
0
  while (ni++ < name_cnt) {
537
0
    unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
538
0
    const char *name = name_table + 2;
539
0
    subpat_names[name_idx] = zend_string_init(name, strlen(name), false);
540
0
    name_table += name_size;
541
0
  }
542
0
  return subpat_names;
543
0
}
544
/* }}} */
545
546
static zend_string **ensure_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce)
547
0
{
548
0
  if (!pce->subpats_table) {
549
0
    pce->subpats_table = make_subpats_table(name_cnt, pce);
550
0
  }
551
0
  return pce->subpats_table;
552
0
}
553
554
/* {{{ static calculate_unit_length */
555
/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
556
static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
557
491
{
558
491
  size_t unit_len;
559
560
491
  if (pce->compile_options & PCRE2_UTF) {
561
23
    const char *end = start;
562
563
    /* skip continuation bytes */
564
23
    while ((*++end & 0xC0) == 0x80);
565
23
    unit_len = end - start;
566
468
  } else {
567
468
    unit_len = 1;
568
468
  }
569
491
  return unit_len;
570
491
}
571
/* }}} */
572
573
/* {{{ pcre_get_compiled_regex_cache */
574
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bool locale_aware)
575
5.44k
{
576
5.44k
  pcre2_code      *re = NULL;
577
#if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !defined(HAVE_BUNDLED_PCRE)
578
  uint32_t       coptions = PCRE2_NO_START_OPTIMIZE;
579
#else
580
5.44k
  uint32_t       coptions = 0;
581
5.44k
#endif
582
5.44k
  uint32_t       eoptions = 0;
583
5.44k
  PCRE2_UCHAR           error[128];
584
5.44k
  PCRE2_SIZE           erroffset;
585
5.44k
  int                  errnumber;
586
5.44k
  char         delimiter;
587
5.44k
  char         start_delimiter;
588
5.44k
  char         end_delimiter;
589
5.44k
  char        *p, *pp;
590
5.44k
  char        *pattern;
591
5.44k
  size_t         pattern_len;
592
5.44k
  uint32_t       poptions = 0;
593
5.44k
  const uint8_t       *tables = NULL;
594
5.44k
  zval                *zv;
595
5.44k
  pcre_cache_entry   new_entry;
596
5.44k
  int          rc;
597
5.44k
  zend_string     *key;
598
5.44k
  pcre_cache_entry  *ret;
599
600
5.44k
  if (locale_aware && BG(ctype_string)) {
601
0
    key = zend_string_concat2(
602
0
      ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
603
0
      ZSTR_VAL(regex), ZSTR_LEN(regex));
604
5.44k
  } else {
605
5.44k
    key = regex;
606
5.44k
  }
607
608
  /* Try to lookup the cached regex entry, and if successful, just pass
609
     back the compiled pattern, otherwise go on and compile it. */
610
5.44k
  zv = zend_hash_find(&PCRE_G(pcre_cache), key);
611
5.44k
  if (zv) {
612
2.56k
    if (key != regex) {
613
0
      zend_string_release_ex(key, 0);
614
0
    }
615
2.56k
    return (pcre_cache_entry*)Z_PTR_P(zv);
616
2.56k
  }
617
618
2.87k
  p = ZSTR_VAL(regex);
619
2.87k
  const char* end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex);
620
621
  /* Parse through the leading whitespace, and display a warning if we
622
     get to the end without encountering a delimiter. */
623
2.87k
  while (isspace((unsigned char)*p)) p++;
624
2.87k
  if (p >= end_p) {
625
3
    if (key != regex) {
626
0
      zend_string_release_ex(key, 0);
627
0
    }
628
3
    php_error_docref(NULL, E_WARNING, "Empty regular expression");
629
3
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
630
3
    return NULL;
631
3
  }
632
633
  /* Get the delimiter and display a warning if it is alphanumeric
634
     or a backslash. */
635
2.86k
  delimiter = *p++;
636
2.86k
  if (isalnum((unsigned char)delimiter) || delimiter == '\\' || delimiter == '\0') {
637
24
    if (key != regex) {
638
0
      zend_string_release_ex(key, 0);
639
0
    }
640
24
    php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash, or NUL byte");
641
24
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
642
24
    return NULL;
643
24
  }
644
645
2.84k
  start_delimiter = delimiter;
646
2.84k
  if ((pp = strchr("([{< )]}> )]}>", delimiter)))
647
64
    delimiter = pp[5];
648
2.84k
  end_delimiter = delimiter;
649
650
2.84k
  pp = p;
651
652
2.84k
  if (start_delimiter == end_delimiter) {
653
    /* We need to iterate through the pattern, searching for the ending delimiter,
654
       but skipping the backslashed delimiters.  If the ending delimiter is not
655
       found, display a warning. */
656
1.00M
    while (pp < end_p) {
657
1.00M
      if (*pp == '\\' && pp + 1 < end_p) pp++;
658
958k
      else if (*pp == delimiter)
659
2.76k
        break;
660
999k
      pp++;
661
999k
    }
662
2.79k
  } else {
663
    /* We iterate through the pattern, searching for the matching ending
664
     * delimiter. For each matching starting delimiter, we increment nesting
665
     * level, and decrement it for each matching ending delimiter. If we
666
     * reach the end of the pattern without matching, display a warning.
667
     */
668
52
    int brackets = 1;   /* brackets nesting level */
669
28.9k
    while (pp < end_p) {
670
28.8k
      if (*pp == '\\' && pp + 1 < end_p) pp++;
671
28.2k
      else if (*pp == end_delimiter && --brackets <= 0)
672
4
        break;
673
28.2k
      else if (*pp == start_delimiter)
674
1.37k
        brackets++;
675
28.8k
      pp++;
676
28.8k
    }
677
52
  }
678
679
2.84k
  if (pp >= end_p) {
680
75
    if (key != regex) {
681
0
      zend_string_release_ex(key, 0);
682
0
    }
683
75
    if (start_delimiter == end_delimiter) {
684
27
      php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
685
48
    } else {
686
48
      php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
687
48
    }
688
75
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
689
75
    return NULL;
690
75
  }
691
692
  /* Make a copy of the actual pattern. */
693
2.77k
  pattern_len = pp - p;
694
2.77k
  pattern = estrndup(p, pattern_len);
695
696
  /* Move on to the options */
697
2.77k
  pp++;
698
699
  /* Parse through the options, setting appropriate flags.  Display
700
     a warning if we encounter an unknown modifier. */
701
6.67k
  while (pp < end_p) {
702
4.04k
    switch (*pp++) {
703
      /* Perl compatible options */
704
1.22k
      case 'i': coptions |= PCRE2_CASELESS;   break;
705
309
      case 'm': coptions |= PCRE2_MULTILINE;   break;
706
43
      case 'n': coptions |= PCRE2_NO_AUTO_CAPTURE; break;
707
261
      case 's': coptions |= PCRE2_DOTALL;   break;
708
32
      case 'x': coptions |= PCRE2_EXTENDED;   break;
709
710
      /* PCRE specific options */
711
321
      case 'A': coptions |= PCRE2_ANCHORED;   break;
712
5
      case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
713
0
#ifdef PCRE2_EXTRA_CASELESS_RESTRICT
714
22
      case 'r': eoptions |= PCRE2_EXTRA_CASELESS_RESTRICT; break;
715
0
#endif
716
3
      case 'S': /* Pass. */         break;
717
133
      case 'X': /* Pass. */         break;
718
304
      case 'U': coptions |= PCRE2_UNGREEDY;   break;
719
886
      case 'u': coptions |= PCRE2_UTF;
720
  /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
721
     characters, even in UTF-8 mode. However, this can be changed by setting
722
     the PCRE2_UCP option. */
723
886
#ifdef PCRE2_UCP
724
886
            coptions |= PCRE2_UCP;
725
886
#endif
726
886
        break;
727
17
      case 'J': coptions |= PCRE2_DUPNAMES;   break;
728
729
21
      case ' ':
730
321
      case '\n':
731
344
      case '\r':
732
344
        break;
733
734
0
      case 'e': /* legacy eval */
735
139
      default:
736
139
        if (pp[-1]) {
737
116
          php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]);
738
116
        } else {
739
23
          php_error_docref(NULL, E_WARNING, "NUL byte is not a valid modifier");
740
23
        }
741
139
        pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
742
139
        efree(pattern);
743
139
        if (key != regex) {
744
0
          zend_string_release_ex(key, 0);
745
0
        }
746
139
        return NULL;
747
4.04k
    }
748
4.04k
  }
749
750
2.63k
  if (key != regex) {
751
0
    zv = zend_hash_str_lookup(&char_tables, ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)));
752
0
    if (Z_ISNULL_P(zv)) {
753
0
      tables = pcre2_maketables(gctx);
754
0
      if (UNEXPECTED(!tables)) {
755
        /* Remove the placeholder entry created by zend_hash_str_lookup(),
756
         * set ptr to NULL first so the destructor (pefree) is safe. */
757
0
        ZVAL_PTR(zv, NULL);
758
0
        zend_hash_str_del(&char_tables, ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)));
759
0
        php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
760
0
        pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
761
0
        zend_string_release_ex(key, 0);
762
0
        efree(pattern);
763
0
        return NULL;
764
0
      }
765
0
      ZVAL_PTR(zv, (void *)tables);
766
0
    } else {
767
0
      tables = Z_PTR_P(zv);
768
0
    }
769
0
  }
770
2.63k
  pcre2_set_character_tables(cctx, tables);
771
772
2.63k
  pcre2_set_compile_extra_options(cctx, eoptions);
773
774
  /* Compile pattern and display a warning if compilation failed. */
775
2.63k
  re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
776
777
2.63k
  if (re == NULL) {
778
976
    if (key != regex) {
779
0
      zend_string_release_ex(key, 0);
780
0
    }
781
976
    pcre2_get_error_message(errnumber, error, sizeof(error));
782
976
    php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
783
976
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
784
976
    efree(pattern);
785
976
    return NULL;
786
976
  }
787
788
#ifdef HAVE_PCRE_JIT_SUPPORT
789
  if (PCRE_G(jit)) {
790
    /* Enable PCRE JIT compiler */
791
    rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
792
    if (EXPECTED(rc >= 0)) {
793
      size_t jit_size = 0;
794
      if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
795
        poptions |= PREG_JIT;
796
      }
797
    } else if (rc == PCRE2_ERROR_NOMEMORY) {
798
      php_error_docref(NULL, E_WARNING,
799
        "Allocation of JIT memory failed, PCRE JIT will be disabled. "
800
        "This is likely caused by security restrictions. "
801
        "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
802
      PCRE_G(jit) = 0;
803
    } else {
804
      pcre2_get_error_message(rc, error, sizeof(error));
805
      php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
806
      pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
807
    }
808
  }
809
#endif
810
1.65k
  efree(pattern);
811
812
  /*
813
   * If we reached cache limit, clean out the items from the head of the list;
814
   * these are supposedly the oldest ones (but not necessarily the least used
815
   * ones).
816
   */
817
1.65k
  if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
818
0
    int num_clean = PCRE_CACHE_SIZE / 8;
819
0
    zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
820
0
  }
821
822
  /* Store the compiled pattern and extra info in the cache. */
823
1.65k
  new_entry.re = re;
824
1.65k
  new_entry.preg_options = poptions;
825
1.65k
  new_entry.compile_options = coptions;
826
1.65k
  new_entry.refcount = 0;
827
1.65k
  new_entry.subpats_table = NULL;
828
829
1.65k
  if ((rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count)) < 0 ||
830
1.65k
      (rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count)) < 0) {
831
0
    if (key != regex) {
832
0
      zend_string_release_ex(key, 0);
833
0
    }
834
0
    pcre2_code_free(new_entry.re);
835
0
    php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
836
0
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
837
0
    return NULL;
838
0
  }
839
840
  /*
841
   * Interned strings are not duplicated when stored in HashTable,
842
   * but all the interned strings created during HTTP request are removed
843
   * at end of request. However PCRE_G(pcre_cache) must be consistent
844
   * on the next request as well. So we disable usage of interned strings
845
   * as hash keys especually for this table.
846
   * See bug #63180
847
   */
848
1.65k
  if (!(GC_FLAGS(key) & IS_STR_PERMANENT)) {
849
870
    zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
850
870
    GC_MAKE_PERSISTENT_LOCAL(str);
851
852
870
    ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
853
870
    zend_string_release(str);
854
870
  } else {
855
785
    ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
856
785
  }
857
858
1.65k
  if (key != regex) {
859
0
    zend_string_release_ex(key, 0);
860
0
  }
861
862
1.65k
  return ret;
863
1.65k
}
864
/* }}} */
865
866
/* {{{ pcre_get_compiled_regex_cache */
867
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
868
5.44k
{
869
5.44k
  return pcre_get_compiled_regex_cache_ex(regex, true);
870
5.44k
}
871
/* }}} */
872
873
/* {{{ pcre_get_compiled_regex */
874
PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
875
0
{
876
0
  pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
877
878
0
  if (capture_count) {
879
0
    *capture_count = pce ? pce->capture_count : 0;
880
0
  }
881
882
0
  return pce ? pce->re : NULL;
883
0
}
884
/* }}} */
885
886
/* XXX For the cases where it's only about match yes/no and no capture
887
    required, perhaps just a minimum sized data would suffice. */
888
PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
889
0
{/*{{{*/
890
891
0
  assert(NULL != re);
892
893
0
  if (EXPECTED(!mdata_used)) {
894
0
    int rc = 0;
895
896
0
    if (!capture_count) {
897
      /* As we deal with a non cached pattern, no other way to gather this info. */
898
0
      rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
899
0
    }
900
901
0
    if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
902
0
      mdata_used = 1;
903
0
      return mdata;
904
0
    }
905
0
  }
906
907
0
  return pcre2_match_data_create_from_pattern(re, gctx);
908
0
}/*}}}*/
909
910
PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
911
0
{/*{{{*/
912
0
  if (UNEXPECTED(match_data != mdata)) {
913
0
    pcre2_match_data_free(match_data);
914
0
  } else {
915
0
    mdata_used = 0;
916
0
  }
917
0
}/*}}}*/
918
919
0
static void init_unmatched_null_pair(zval *pair) {
920
0
  zval val1, val2;
921
0
  ZVAL_NULL(&val1);
922
0
  ZVAL_LONG(&val2, -1);
923
0
  ZVAL_ARR(pair, zend_new_pair(&val1, &val2));
924
0
}
925
926
0
static void init_unmatched_empty_pair(zval *pair) {
927
0
  zval val1, val2;
928
0
  ZVAL_EMPTY_STRING(&val1);
929
0
  ZVAL_LONG(&val2, -1);
930
0
  ZVAL_ARR(pair, zend_new_pair(&val1, &val2));
931
0
}
932
933
static zend_always_inline void populate_match_value_str(
934
688
    zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
935
688
  ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
936
688
}
937
938
static zend_always_inline void populate_match_value(
939
    zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
940
688
    bool unmatched_as_null) {
941
688
  if (PCRE2_UNSET == start_offset) {
942
0
    if (unmatched_as_null) {
943
0
      ZVAL_NULL(val);
944
0
    } else {
945
0
      ZVAL_EMPTY_STRING(val);
946
0
    }
947
688
  } else {
948
688
    populate_match_value_str(val, subject, start_offset, end_offset);
949
688
  }
950
688
}
951
952
static inline void add_named(
953
0
    HashTable *const subpats, zend_string *name, zval *val, bool unmatched) {
954
0
  ZEND_ASSERT(!(GC_FLAGS(name) & IS_STR_PERSISTENT));
955
956
  /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
957
   * In this case we want to preserve the one that actually has a value. */
958
0
  if (!unmatched) {
959
0
    zend_hash_update(subpats, name, val);
960
0
  } else {
961
0
    if (!zend_hash_add(subpats, name, val)) {
962
0
      return;
963
0
    }
964
0
  }
965
0
  Z_TRY_ADDREF_P(val);
966
0
}
967
968
/* {{{ add_offset_pair */
969
static inline void add_offset_pair(
970
    HashTable *const result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
971
    zend_string *name, zend_long unmatched_as_null)
972
0
{
973
0
  zval match_pair;
974
975
  /* Add (match, offset) to the return value */
976
0
  if (PCRE2_UNSET == start_offset) {
977
0
    if (unmatched_as_null) {
978
0
      do {
979
0
        if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
980
0
          if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) {
981
0
            init_unmatched_null_pair(&match_pair);
982
0
            break;
983
0
          } else {
984
0
            init_unmatched_null_pair(&PCRE_G(unmatched_null_pair));
985
0
          }
986
0
        }
987
0
        ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
988
0
      } while (0);
989
0
    } else {
990
0
      do {
991
0
        if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
992
0
          if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) {
993
0
            init_unmatched_empty_pair(&match_pair);
994
0
            break;
995
0
          } else {
996
0
            init_unmatched_empty_pair(&PCRE_G(unmatched_empty_pair));
997
0
          }
998
0
        }
999
0
        ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1000
0
      } while (0);
1001
0
    }
1002
0
  } else {
1003
0
    zval val1, val2;
1004
0
    populate_match_value_str(&val1, subject, start_offset, end_offset);
1005
0
    ZVAL_LONG(&val2, start_offset);
1006
0
    ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1007
0
  }
1008
1009
0
  if (name) {
1010
0
    add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1011
0
  }
1012
0
  zend_hash_next_index_insert_new(result, &match_pair);
1013
0
}
1014
/* }}} */
1015
1016
static void populate_subpat_array(
1017
    HashTable *subpats_ht, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1018
349
    uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1019
349
  zend_long offset_capture = flags & PREG_OFFSET_CAPTURE;
1020
349
  zend_long unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1021
349
  zval val;
1022
349
  int i;
1023
349
  if (subpat_names) {
1024
0
    if (offset_capture) {
1025
0
      for (i = 0; i < count; i++) {
1026
0
        add_offset_pair(
1027
0
          subpats_ht, subject, offsets[2*i], offsets[2*i+1],
1028
0
          subpat_names[i], unmatched_as_null);
1029
0
      }
1030
0
      if (unmatched_as_null) {
1031
0
        for (i = count; i < num_subpats; i++) {
1032
0
          add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1033
0
        }
1034
0
      }
1035
0
    } else {
1036
0
      for (i = 0; i < count; i++) {
1037
0
        populate_match_value(
1038
0
          &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1039
0
        if (subpat_names[i]) {
1040
0
          add_named(subpats_ht, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1041
0
        }
1042
0
        zend_hash_next_index_insert_new(subpats_ht, &val);
1043
0
      }
1044
0
      if (unmatched_as_null) {
1045
0
        for (i = count; i < num_subpats; i++) {
1046
0
          ZVAL_NULL(&val);
1047
0
          if (subpat_names[i]) {
1048
0
            zend_hash_add(subpats_ht, subpat_names[i], &val);
1049
0
          }
1050
0
          zend_hash_next_index_insert_new(subpats_ht, &val);
1051
0
        }
1052
0
      }
1053
0
    }
1054
349
  } else {
1055
349
    if (offset_capture) {
1056
0
      for (i = 0; i < count; i++) {
1057
0
        add_offset_pair(
1058
0
          subpats_ht, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1059
0
      }
1060
0
      if (unmatched_as_null) {
1061
0
        for (i = count; i < num_subpats; i++) {
1062
0
          add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1063
0
        }
1064
0
      }
1065
349
    } else {
1066
1.03k
      for (i = 0; i < count; i++) {
1067
688
        populate_match_value(
1068
688
          &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1069
688
        zend_hash_next_index_insert_new(subpats_ht, &val);
1070
688
      }
1071
349
      if (unmatched_as_null) {
1072
0
        ZVAL_NULL(&val);
1073
0
        for (i = count; i < num_subpats; i++) {
1074
0
          zend_hash_next_index_insert_new(subpats_ht, &val);
1075
0
        }
1076
0
      }
1077
349
    }
1078
349
  }
1079
  /* Add MARK, if available */
1080
349
  if (mark) {
1081
0
    ZVAL_STRING(&val, (char *)mark);
1082
0
    zend_hash_str_update(subpats_ht, ZEND_STRL("MARK"), &val);
1083
0
  }
1084
349
}
1085
1086
static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, bool global) /* {{{ */
1087
4.89k
{
1088
  /* parameters */
1089
4.89k
  zend_string    *regex;      /* Regular expression */
1090
4.89k
  zend_string    *subject;      /* String to match against */
1091
4.89k
  pcre_cache_entry *pce;        /* Compiled regular expression */
1092
4.89k
  zval       *subpats = NULL; /* Array for subpatterns */
1093
4.89k
  zend_long     flags = 0;    /* Match control flags */
1094
4.89k
  zend_long     start_offset = 0; /* Where the new search starts */
1095
1096
14.6k
  ZEND_PARSE_PARAMETERS_START(2, 5)
1097
19.5k
    Z_PARAM_STR(regex)
1098
24.3k
    Z_PARAM_STR(subject)
1099
4.87k
    Z_PARAM_OPTIONAL
1100
9.76k
    Z_PARAM_ZVAL(subpats)
1101
9.76k
    Z_PARAM_LONG(flags)
1102
0
    Z_PARAM_LONG(start_offset)
1103
4.89k
  ZEND_PARSE_PARAMETERS_END();
1104
1105
  /* Compile regex or get it from cache. */
1106
4.87k
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1107
1.09k
    RETURN_FALSE;
1108
1.09k
  }
1109
1110
3.78k
  if (start_offset == ZEND_LONG_MIN) {
1111
0
    zend_argument_value_error(5, "must be greater than " ZEND_LONG_FMT, ZEND_LONG_MIN);
1112
0
    RETURN_THROWS();
1113
0
  }
1114
1115
3.78k
  pce->refcount++;
1116
3.78k
  php_pcre_match_impl(pce, subject, return_value, subpats,
1117
3.78k
    global, flags, start_offset);
1118
3.78k
  pce->refcount--;
1119
3.78k
}
1120
/* }}} */
1121
1122
static zend_always_inline bool is_known_valid_utf8(
1123
1.31k
    zend_string *subject_str, PCRE2_SIZE start_offset) {
1124
1.31k
  if (!ZSTR_IS_VALID_UTF8(subject_str)) {
1125
    /* We don't know whether the string is valid UTF-8 or not. */
1126
1.31k
    return false;
1127
1.31k
  }
1128
1129
1
  if (start_offset == ZSTR_LEN(subject_str)) {
1130
    /* Degenerate case: Offset points to end of string. */
1131
1
    return true;
1132
1
  }
1133
1134
  /* Check that the offset does not point to an UTF-8 continuation byte. */
1135
0
  return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1136
1
}
1137
1138
/* {{{ php_pcre_match_impl() */
1139
PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1140
  zval *subpats, bool global, zend_long flags, zend_off_t start_offset)
1141
3.78k
{
1142
3.78k
  zval       result_set;    /* Holds a set of subpatterns after
1143
                       a global match */
1144
3.78k
  HashTable    **match_sets = NULL; /* An array of sets of matches for each
1145
                       subpattern after a global match */
1146
3.78k
  uint32_t     options;     /* Execution options */
1147
3.78k
  int        count;       /* Count of matched subpatterns */
1148
3.78k
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1149
3.78k
  int        matched;     /* Has anything matched */
1150
3.78k
  zend_string    **subpat_names;    /* Array for named subpatterns */
1151
3.78k
  size_t       i;
1152
3.78k
  uint32_t     subpats_order;   /* Order of subpattern matches */
1153
3.78k
  uint32_t     offset_capture;  /* Capture match offsets: yes/no */
1154
3.78k
  zend_long    unmatched_as_null; /* Null non-matches: yes/no */
1155
3.78k
  PCRE2_SPTR       mark = NULL;   /* Target for MARK name */
1156
3.78k
  HashTable   *marks = NULL;   /* Array of marks for PREG_PATTERN_ORDER */
1157
3.78k
  pcre2_match_data *match_data;
1158
3.78k
  PCRE2_SIZE     start_offset2, orig_start_offset;
1159
3.78k
  bool old_mdata_used;
1160
1161
3.78k
  char *subject = ZSTR_VAL(subject_str);
1162
3.78k
  size_t subject_len = ZSTR_LEN(subject_str);
1163
1164
  /* Overwrite the passed-in value for subpatterns with an empty array. */
1165
3.78k
  if (subpats != NULL) {
1166
1
    subpats = zend_try_array_init(subpats);
1167
1
    if (!subpats) {
1168
0
      RETURN_THROWS();
1169
0
    }
1170
1
  }
1171
1172
3.78k
  subpats_order = global ? PREG_PATTERN_ORDER : 0;
1173
1174
3.78k
  if (flags) {
1175
0
    offset_capture = flags & PREG_OFFSET_CAPTURE;
1176
0
    unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1177
1178
    /*
1179
     * subpats_order is pre-set to pattern mode so we change it only if
1180
     * necessary.
1181
     */
1182
0
    if (flags & 0xff) {
1183
0
      subpats_order = flags & 0xff;
1184
0
      if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1185
0
        (!global && subpats_order != 0)) {
1186
0
        zend_argument_value_error(4, "must be a PREG_* constant");
1187
0
        RETURN_THROWS();
1188
0
      }
1189
0
    }
1190
3.78k
  } else {
1191
3.78k
    offset_capture = 0;
1192
3.78k
    unmatched_as_null = 0;
1193
3.78k
  }
1194
1195
  /* Negative offset counts from the end of the string. */
1196
3.78k
  if (start_offset < 0) {
1197
0
    if ((PCRE2_SIZE)-start_offset <= subject_len) {
1198
0
      start_offset2 = subject_len + start_offset;
1199
0
    } else {
1200
0
      start_offset2 = 0;
1201
0
    }
1202
3.78k
  } else {
1203
3.78k
    start_offset2 = (PCRE2_SIZE)start_offset;
1204
3.78k
  }
1205
1206
3.78k
  if (start_offset2 > subject_len) {
1207
0
    pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1208
0
    RETURN_FALSE;
1209
0
  }
1210
1211
  /* Calculate the size of the offsets array, and allocate memory for it. */
1212
3.78k
  num_subpats = pce->capture_count + 1;
1213
1214
  /*
1215
   * Build a mapping from subpattern numbers to their names. We will
1216
   * allocate the table only if there are any named subpatterns.
1217
   */
1218
3.78k
  subpat_names = NULL;
1219
3.78k
  if (subpats && pce->name_count > 0) {
1220
0
    subpat_names = ensure_subpats_table(pce->name_count, pce);
1221
0
    if (UNEXPECTED(!subpat_names)) {
1222
0
      RETURN_FALSE;
1223
0
    }
1224
0
  }
1225
1226
3.78k
  matched = 0;
1227
3.78k
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1228
1229
3.78k
  old_mdata_used = mdata_used;
1230
3.78k
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1231
3.16k
    mdata_used = true;
1232
3.16k
    match_data = mdata;
1233
3.16k
  } else {
1234
629
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1235
629
    if (!match_data) {
1236
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1237
0
      RETURN_FALSE;
1238
0
    }
1239
629
  }
1240
1241
  /* Allocate match sets array and initialize the values. */
1242
3.78k
  if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1243
0
    match_sets = safe_emalloc(num_subpats, sizeof(HashTable *), 0);
1244
0
    for (i=0; i<num_subpats; i++) {
1245
0
      match_sets[i] = zend_new_array(0);
1246
0
    }
1247
0
  }
1248
1249
  /* Array of subpattern offsets */
1250
3.78k
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1251
1252
3.78k
  orig_start_offset = start_offset2;
1253
3.78k
  options =
1254
3.78k
    (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1255
3.78k
      ? 0 : PCRE2_NO_UTF_CHECK;
1256
1257
  /* Execute the regular expression. */
1258
#ifdef HAVE_PCRE_JIT_SUPPORT
1259
  if ((pce->preg_options & PREG_JIT) && options) {
1260
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1261
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1262
  } else
1263
#endif
1264
3.78k
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1265
3.78k
      options, match_data, mctx);
1266
1267
3.78k
  while (1) {
1268
    /* If something has matched */
1269
3.78k
    if (count >= 0) {
1270
      /* Check for too many substrings condition. */
1271
239
      if (UNEXPECTED(count == 0)) {
1272
0
        php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1273
0
        count = num_subpats;
1274
0
      }
1275
1276
239
matched:
1277
239
      matched++;
1278
1279
      /* If subpatterns array has been passed, fill it in with values. */
1280
239
      if (subpats != NULL) {
1281
        /* Try to get the list of substrings and display a warning if failed. */
1282
0
        if (UNEXPECTED(offsets[1] < offsets[0])) {
1283
0
          if (match_sets) {
1284
0
            for (i = 0; i < num_subpats; i++) {
1285
0
              zend_array_destroy(match_sets[i]);
1286
0
            }
1287
0
            efree(match_sets);
1288
0
          }
1289
0
          if (marks) {
1290
0
            zend_array_destroy(marks);
1291
0
          }
1292
0
          if (match_data != mdata) {
1293
0
            pcre2_match_data_free(match_data);
1294
0
          }
1295
0
          php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1296
0
          RETURN_FALSE;
1297
0
        }
1298
1299
0
        if (global) { /* global pattern matching */
1300
0
          if (subpats_order == PREG_PATTERN_ORDER) {
1301
            /* For each subpattern, insert it into the appropriate array. */
1302
0
            if (offset_capture) {
1303
0
              for (i = 0; i < count; i++) {
1304
0
                add_offset_pair(
1305
0
                  match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1306
0
                  NULL, unmatched_as_null);
1307
0
              }
1308
0
            } else {
1309
0
              for (i = 0; i < count; i++) {
1310
0
                zval val;
1311
0
                populate_match_value(
1312
0
                  &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1313
0
                zend_hash_next_index_insert_new(match_sets[i], &val);
1314
0
              }
1315
0
            }
1316
0
            mark = pcre2_get_mark(match_data);
1317
            /* Add MARK, if available */
1318
0
            if (mark) {
1319
0
              if (!marks) {
1320
0
                marks = zend_new_array(0);
1321
0
              }
1322
0
              zval tmp;
1323
0
              ZVAL_STRING(&tmp, (char *) mark);
1324
0
              zend_hash_index_add_new(marks, matched - 1, &tmp);
1325
0
            }
1326
            /*
1327
             * If the number of captured subpatterns on this run is
1328
             * less than the total possible number, pad the result
1329
             * arrays with NULLs or empty strings.
1330
             */
1331
0
            if (count < num_subpats) {
1332
0
              for (int i = count; i < num_subpats; i++) {
1333
0
                if (offset_capture) {
1334
0
                  add_offset_pair(
1335
0
                    match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1336
0
                    NULL, unmatched_as_null);
1337
0
                } else if (unmatched_as_null) {
1338
0
                  zval tmp;
1339
0
                  ZVAL_NULL(&tmp);
1340
0
                  zend_hash_next_index_insert_new(match_sets[i], &tmp);
1341
0
                } else {
1342
0
                  zval tmp;
1343
0
                  ZVAL_EMPTY_STRING(&tmp);
1344
0
                  zend_hash_next_index_insert_new(match_sets[i], &tmp);
1345
0
                }
1346
0
              }
1347
0
            }
1348
0
          } else {
1349
            /* Allocate and populate the result set array */
1350
0
            mark = pcre2_get_mark(match_data);
1351
0
            array_init_size(&result_set, count + (mark ? 1 : 0));
1352
0
            populate_subpat_array(
1353
0
              Z_ARRVAL(result_set), subject, offsets, subpat_names,
1354
0
              num_subpats, count, mark, flags);
1355
            /* And add it to the output array */
1356
0
            zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &result_set);
1357
0
          }
1358
0
        } else {     /* single pattern matching */
1359
          /* For each subpattern, insert it into the subpatterns array. */
1360
0
          mark = pcre2_get_mark(match_data);
1361
0
          populate_subpat_array(
1362
0
            Z_ARRVAL_P(subpats), subject, offsets, subpat_names, num_subpats, count, mark, flags);
1363
0
          break;
1364
0
        }
1365
0
      }
1366
1367
      /* Advance to the next piece. */
1368
239
      start_offset2 = offsets[1];
1369
1370
      /* If we have matched an empty string, mimic what Perl's /g options does.
1371
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1372
         the match again at the same point. If this fails (picked up above) we
1373
         advance to the next character. */
1374
239
      if (start_offset2 == offsets[0]) {
1375
105
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1376
105
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1377
105
        if (count >= 0) {
1378
3
          if (global) {
1379
0
            goto matched;
1380
3
          } else {
1381
3
            break;
1382
3
          }
1383
102
        } else if (count == PCRE2_ERROR_NOMATCH) {
1384
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1385
             this is not necessarily the end. We need to advance
1386
             the start offset, and continue. Fudge the offset values
1387
             to achieve this, unless we're already at the end of the string. */
1388
96
          if (start_offset2 < subject_len) {
1389
85
            size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1390
1391
85
            start_offset2 += unit_len;
1392
85
          } else {
1393
11
            break;
1394
11
          }
1395
96
        } else {
1396
6
          goto error;
1397
6
        }
1398
105
      }
1399
3.55k
    } else if (count == PCRE2_ERROR_NOMATCH) {
1400
3.34k
      break;
1401
3.34k
    } else {
1402
210
error:
1403
210
      pcre_handle_exec_error(count);
1404
210
      break;
1405
204
    }
1406
1407
219
    if (!global) {
1408
219
      break;
1409
219
    }
1410
1411
    /* Execute the regular expression. */
1412
#ifdef HAVE_PCRE_JIT_SUPPORT
1413
    if ((pce->preg_options & PREG_JIT)) {
1414
      if (start_offset2 > subject_len) {
1415
        pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1416
        break;
1417
      }
1418
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1419
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1420
    } else
1421
#endif
1422
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1423
0
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1424
0
  }
1425
3.78k
  if (match_data != mdata) {
1426
629
    pcre2_match_data_free(match_data);
1427
629
  }
1428
3.78k
  mdata_used = old_mdata_used;
1429
1430
  /* Add the match sets to the output array and clean up */
1431
3.78k
  if (match_sets) {
1432
0
    if (subpat_names) {
1433
0
      for (i = 0; i < num_subpats; i++) {
1434
0
        zval wrapper;
1435
0
        ZVAL_ARR(&wrapper, match_sets[i]);
1436
0
        if (subpat_names[i]) {
1437
0
          zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper);
1438
0
          GC_ADDREF(match_sets[i]);
1439
0
        }
1440
0
        zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1441
0
      }
1442
0
    } else {
1443
0
      for (i = 0; i < num_subpats; i++) {
1444
0
        zval wrapper;
1445
0
        ZVAL_ARR(&wrapper, match_sets[i]);
1446
0
        zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1447
0
      }
1448
0
    }
1449
0
    efree(match_sets);
1450
1451
0
    if (marks) {
1452
0
      zval tmp;
1453
0
      ZVAL_ARR(&tmp, marks);
1454
0
      zend_hash_str_update(Z_ARRVAL_P(subpats), "MARK", sizeof("MARK") - 1, &tmp);
1455
0
    }
1456
0
  }
1457
1458
3.78k
  if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1459
    /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1460
3.57k
    if ((pce->compile_options & PCRE2_UTF)
1461
1.11k
        && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1462
506
      GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1463
506
    }
1464
1465
3.57k
    RETVAL_LONG(matched);
1466
3.57k
  } else {
1467
210
    RETVAL_FALSE;
1468
210
  }
1469
3.78k
}
1470
/* }}} */
1471
1472
/* {{{ Perform a Perl-style regular expression match */
1473
PHP_FUNCTION(preg_match)
1474
4.89k
{
1475
4.89k
  php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1476
4.89k
}
1477
/* }}} */
1478
1479
ZEND_FRAMELESS_FUNCTION(preg_match, 2)
1480
0
{
1481
0
  zval regex_tmp, subject_tmp;
1482
0
  zend_string *regex, *subject;
1483
1484
0
  Z_FLF_PARAM_STR(1, regex, regex_tmp);
1485
0
  Z_FLF_PARAM_STR(2, subject, subject_tmp);
1486
1487
  /* Compile regex or get it from cache. */
1488
0
  pcre_cache_entry *pce;
1489
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1490
0
    RETVAL_FALSE;
1491
0
    goto flf_clean;
1492
0
  }
1493
1494
0
  pce->refcount++;
1495
0
  php_pcre_match_impl(pce, subject, return_value, /* subpats */ NULL,
1496
0
    /* global */ false, /* flags */ 0, /* start_offset */ 0);
1497
0
  pce->refcount--;
1498
1499
0
flf_clean:
1500
0
  Z_FLF_PARAM_FREE_STR(1, regex_tmp);
1501
0
  Z_FLF_PARAM_FREE_STR(2, subject_tmp);
1502
0
}
1503
1504
/* {{{ Perform a Perl-style global regular expression match */
1505
PHP_FUNCTION(preg_match_all)
1506
0
{
1507
0
  php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1508
0
}
1509
/* }}} */
1510
1511
/* {{{ preg_get_backref */
1512
static int preg_get_backref(char **str, int *backref)
1513
10
{
1514
10
  char in_brace = 0;
1515
10
  char *walk = *str;
1516
1517
10
  if (walk[1] == 0)
1518
0
    return 0;
1519
1520
10
  if (*walk == '$' && walk[1] == '{') {
1521
0
    in_brace = 1;
1522
0
    walk++;
1523
0
  }
1524
10
  walk++;
1525
1526
10
  if (*walk >= '0' && *walk <= '9') {
1527
0
    *backref = *walk - '0';
1528
0
    walk++;
1529
0
  } else
1530
10
    return 0;
1531
1532
0
  if (*walk && *walk >= '0' && *walk <= '9') {
1533
0
    *backref = *backref * 10 + *walk - '0';
1534
0
    walk++;
1535
0
  }
1536
1537
0
  if (in_brace) {
1538
0
    if (*walk != '}')
1539
0
      return 0;
1540
0
    else
1541
0
      walk++;
1542
0
  }
1543
1544
0
  *str = walk;
1545
0
  return 1;
1546
0
}
1547
/* }}} */
1548
1549
/* Return NULL if an exception has occurred */
1550
static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1551
349
{
1552
349
  zend_string *result_str = NULL;
1553
349
  zval     retval;      /* Function return value */
1554
349
  zval       arg;       /* Argument to pass to function */
1555
1556
349
  array_init_size(&arg, count + (mark ? 1 : 0));
1557
349
  populate_subpat_array(Z_ARRVAL(arg), subject, offsets, subpat_names, num_subpats, count, mark, flags);
1558
1559
349
  fci->retval = &retval;
1560
349
  fci->param_count = 1;
1561
349
  fci->params = &arg;
1562
349
  fci->consumed_args = zend_fci_consumed_arg(0);
1563
349
  zend_call_function(fci, fcc);
1564
349
  zval_ptr_dtor(&arg);
1565
349
  if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1566
58
    return Z_STR(retval);
1567
58
  }
1568
  /* No Exception has occurred */
1569
291
  else if (EXPECTED(Z_TYPE(retval) != IS_UNDEF)) {
1570
270
    result_str = zval_try_get_string_func(&retval);
1571
270
  }
1572
291
  zval_ptr_dtor(&retval);
1573
1574
291
  return result_str;
1575
349
}
1576
1577
/* {{{ php_pcre_replace */
1578
PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1579
                zend_string *subject_str,
1580
                const char *subject, size_t subject_len,
1581
                zend_string *replace_str,
1582
                size_t limit, size_t *replace_count)
1583
332
{
1584
332
  pcre_cache_entry  *pce;         /* Compiled regular expression */
1585
332
  zend_string     *result;      /* Function result */
1586
1587
  /* Abort on pending exception, e.g. thrown from __toString(). */
1588
332
  if (UNEXPECTED(EG(exception))) {
1589
0
    return NULL;
1590
0
  }
1591
1592
  /* Compile regex or get it from cache. */
1593
332
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1594
102
    return NULL;
1595
102
  }
1596
230
  pce->refcount++;
1597
230
  result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1598
230
    limit, replace_count);
1599
230
  pce->refcount--;
1600
1601
230
  return result;
1602
332
}
1603
/* }}} */
1604
1605
/* {{{ php_pcre_replace_impl() */
1606
PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1607
230
{
1608
230
  uint32_t     options;     /* Execution options */
1609
230
  int        count;       /* Count of matched subpatterns */
1610
230
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1611
230
  size_t       new_len;     /* Length of needed storage */
1612
230
  size_t       alloc_len;     /* Actual allocated length */
1613
230
  size_t       match_len;     /* Length of the current match */
1614
230
  int        backref;     /* Backreference number */
1615
230
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
1616
230
  size_t       last_end_offset; /* Where the last search ended */
1617
230
  char      *walkbuf,     /* Location of current replacement in the result */
1618
230
          *walk,        /* Used to walk the replacement string */
1619
230
           walk_last;     /* Last walked character */
1620
230
  const char    *match,       /* The current match */
1621
230
          *piece,       /* The current piece of subject */
1622
230
          *replace_end;   /* End of replacement string */
1623
230
  size_t      result_len;     /* Length of result */
1624
230
  zend_string   *result;      /* Result of replacement */
1625
230
  pcre2_match_data *match_data;
1626
230
  bool old_mdata_used;
1627
1628
  /* Calculate the size of the offsets array, and allocate memory for it. */
1629
230
  num_subpats = pce->capture_count + 1;
1630
230
  alloc_len = 0;
1631
230
  result = NULL;
1632
1633
  /* Initialize */
1634
230
  match = NULL;
1635
230
  start_offset = 0;
1636
230
  last_end_offset = 0;
1637
230
  result_len = 0;
1638
230
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1639
1640
230
  old_mdata_used = mdata_used;
1641
230
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1642
38
    mdata_used = true;
1643
38
    match_data = mdata;
1644
192
  } else {
1645
192
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1646
192
    if (!match_data) {
1647
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1648
0
      return NULL;
1649
0
    }
1650
192
  }
1651
1652
230
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1653
1654
  /* Array of subpattern offsets */
1655
230
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1656
1657
  /* Execute the regular expression. */
1658
#ifdef HAVE_PCRE_JIT_SUPPORT
1659
  if ((pce->preg_options & PREG_JIT) && options) {
1660
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1661
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1662
  } else
1663
#endif
1664
230
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1665
230
      options, match_data, mctx);
1666
1667
683
  while (1) {
1668
683
    piece = subject + last_end_offset;
1669
1670
683
    if (count >= 0 && limit > 0) {
1671
466
      bool simple_string;
1672
1673
      /* Check for too many substrings condition. */
1674
466
      if (UNEXPECTED(count == 0)) {
1675
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1676
0
        count = num_subpats;
1677
0
      }
1678
1679
482
matched:
1680
482
      if (UNEXPECTED(offsets[1] < offsets[0])) {
1681
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1682
0
        if (result) {
1683
0
          zend_string_release_ex(result, 0);
1684
0
          result = NULL;
1685
0
        }
1686
0
        break;
1687
0
      }
1688
1689
482
      if (replace_count) {
1690
482
        ++*replace_count;
1691
482
      }
1692
1693
      /* Set the match location in subject */
1694
482
      match = subject + offsets[0];
1695
1696
482
      new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1697
1698
482
      walk = ZSTR_VAL(replace_str);
1699
482
      replace_end = walk + ZSTR_LEN(replace_str);
1700
482
      walk_last = 0;
1701
482
      simple_string = true;
1702
846
      while (walk < replace_end) {
1703
364
        if ('\\' == *walk || '$' == *walk) {
1704
5
          simple_string = false;
1705
5
          if (walk_last == '\\') {
1706
0
            walk++;
1707
0
            walk_last = 0;
1708
0
            continue;
1709
0
          }
1710
5
          if (preg_get_backref(&walk, &backref)) {
1711
0
            if (backref < count)
1712
0
              new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1713
0
            continue;
1714
0
          }
1715
5
        }
1716
364
        new_len++;
1717
364
        walk++;
1718
364
        walk_last = walk[-1];
1719
364
      }
1720
1721
482
      if (new_len >= alloc_len) {
1722
158
        alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1723
158
        if (result == NULL) {
1724
106
          result = zend_string_alloc(alloc_len, 0);
1725
106
        } else {
1726
52
          result = zend_string_extend(result, alloc_len, 0);
1727
52
        }
1728
158
      }
1729
1730
482
      if (match-piece > 0) {
1731
        /* copy the part of the string before the match */
1732
437
        memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1733
437
        result_len += (match-piece);
1734
437
      }
1735
1736
482
      if (simple_string) {
1737
        /* copy replacement */
1738
477
        memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1739
477
        result_len += ZSTR_LEN(replace_str);
1740
477
      } else {
1741
        /* copy replacement and backrefs */
1742
5
        walkbuf = ZSTR_VAL(result) + result_len;
1743
1744
5
        walk = ZSTR_VAL(replace_str);
1745
5
        walk_last = 0;
1746
102
        while (walk < replace_end) {
1747
97
          if ('\\' == *walk || '$' == *walk) {
1748
5
            if (walk_last == '\\') {
1749
0
              *(walkbuf-1) = *walk++;
1750
0
              walk_last = 0;
1751
0
              continue;
1752
0
            }
1753
5
            if (preg_get_backref(&walk, &backref)) {
1754
0
              if (backref < count) {
1755
0
                if (offsets[backref<<1] < SIZE_MAX) {
1756
0
                  match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1757
0
                  walkbuf = zend_mempcpy(walkbuf, subject + offsets[backref << 1], match_len);
1758
0
                }
1759
0
              }
1760
0
              continue;
1761
0
            }
1762
5
          }
1763
97
          *walkbuf++ = *walk++;
1764
97
          walk_last = walk[-1];
1765
97
        }
1766
5
        *walkbuf = '\0';
1767
        /* increment the result length by how much we've added to the string */
1768
5
        result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1769
5
      }
1770
1771
482
      limit--;
1772
1773
      /* Advance to the next piece. */
1774
482
      start_offset = last_end_offset = offsets[1];
1775
1776
      /* If we have matched an empty string, mimic what Perl's /g options does.
1777
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1778
         the match again at the same point. If this fails (picked up above) we
1779
         advance to the next character. */
1780
482
      if (start_offset == offsets[0]) {
1781
372
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1782
372
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1783
1784
372
        piece = subject + start_offset;
1785
372
        if (count >= 0 && limit > 0) {
1786
16
          goto matched;
1787
356
        } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1788
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1789
             this is not necessarily the end. We need to advance
1790
             the start offset, and continue. Fudge the offset values
1791
             to achieve this, unless we're already at the end of the string. */
1792
356
          if (start_offset < subject_len) {
1793
343
            size_t unit_len = calculate_unit_length(pce, piece);
1794
343
            start_offset += unit_len;
1795
343
          } else {
1796
13
            goto not_matched;
1797
13
          }
1798
356
        } else {
1799
0
          goto error;
1800
0
        }
1801
372
      }
1802
1803
482
    } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1804
230
not_matched:
1805
230
      if (!result && subject_str) {
1806
124
        result = zend_string_copy(subject_str);
1807
124
        break;
1808
124
      }
1809
      /* now we know exactly how long it is */
1810
106
      alloc_len = result_len + subject_len - last_end_offset;
1811
106
      if (NULL != result) {
1812
106
        result = zend_string_realloc(result, alloc_len, 0);
1813
106
      } else {
1814
0
        result = zend_string_alloc(alloc_len, 0);
1815
0
      }
1816
      /* stick that last bit of string on our output */
1817
106
      memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1818
106
      result_len += subject_len - last_end_offset;
1819
106
      ZSTR_VAL(result)[result_len] = '\0';
1820
106
      ZSTR_LEN(result) = result_len;
1821
106
      break;
1822
230
    } else {
1823
0
error:
1824
0
      pcre_handle_exec_error(count);
1825
0
      if (result) {
1826
0
        zend_string_release_ex(result, 0);
1827
0
        result = NULL;
1828
0
      }
1829
0
      break;
1830
0
    }
1831
1832
#ifdef HAVE_PCRE_JIT_SUPPORT
1833
    if (pce->preg_options & PREG_JIT) {
1834
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1835
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1836
    } else
1837
#endif
1838
453
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1839
453
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1840
453
  }
1841
230
  if (match_data != mdata) {
1842
192
    pcre2_match_data_free(match_data);
1843
192
  }
1844
230
  mdata_used = old_mdata_used;
1845
1846
230
  return result;
1847
230
}
1848
/* }}} */
1849
1850
static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str,
1851
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
1852
  size_t limit, size_t *replace_count, zend_long flags
1853
205
) {
1854
205
  uint32_t     options;     /* Execution options */
1855
205
  int        count;       /* Count of matched subpatterns */
1856
205
  zend_string   **subpat_names;   /* Array for named subpatterns */
1857
205
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1858
205
  size_t       alloc_len;     /* Actual allocated length */
1859
205
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
1860
205
  size_t       last_end_offset; /* Where the last search ended */
1861
205
  const char    *match,       /* The current match */
1862
205
          *piece;       /* The current piece of subject */
1863
205
  size_t      result_len;     /* Length of result */
1864
205
  zend_string   *result;      /* Result of replacement */
1865
205
  pcre2_match_data *match_data;
1866
205
  bool old_mdata_used;
1867
1868
  /* Calculate the size of the offsets array, and allocate memory for it. */
1869
205
  num_subpats = pce->capture_count + 1;
1870
205
  if (pce->name_count > 0) {
1871
0
    subpat_names = ensure_subpats_table(pce->name_count, pce);
1872
0
    if (UNEXPECTED(!subpat_names)) {
1873
0
      return NULL;
1874
0
    }
1875
205
  } else {
1876
205
    subpat_names = NULL;
1877
205
  }
1878
1879
205
  alloc_len = 0;
1880
205
  result = NULL;
1881
1882
  /* Initialize */
1883
205
  match = NULL;
1884
205
  start_offset = 0;
1885
205
  last_end_offset = 0;
1886
205
  result_len = 0;
1887
205
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1888
1889
205
  old_mdata_used = mdata_used;
1890
205
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1891
169
    mdata_used = 1;
1892
169
    match_data = mdata;
1893
169
  } else {
1894
36
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1895
36
    if (!match_data) {
1896
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1897
0
      mdata_used = old_mdata_used;
1898
0
      return NULL;
1899
0
    }
1900
36
  }
1901
1902
205
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1903
1904
  /* Array of subpattern offsets */
1905
205
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1906
1907
  /* Execute the regular expression. */
1908
#ifdef HAVE_PCRE_JIT_SUPPORT
1909
  if ((pce->preg_options & PREG_JIT) && options) {
1910
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1911
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1912
  } else
1913
#endif
1914
205
  count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1915
205
      options, match_data, mctx);
1916
1917
531
  while (1) {
1918
530
    piece = ZSTR_VAL(subject_str) + last_end_offset;
1919
1920
530
    if (count >= 0 && limit) {
1921
      /* Check for too many substrings condition. */
1922
349
      if (UNEXPECTED(count == 0)) {
1923
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1924
0
        count = num_subpats;
1925
0
      }
1926
1927
349
matched:
1928
349
      if (UNEXPECTED(offsets[1] < offsets[0])) {
1929
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1930
0
        if (result) {
1931
0
          zend_string_release_ex(result, 0);
1932
0
          result = NULL;
1933
0
        }
1934
0
        break;
1935
0
      }
1936
1937
349
      if (replace_count) {
1938
349
        ++*replace_count;
1939
349
      }
1940
1941
      /* Set the match location in subject */
1942
349
      match = ZSTR_VAL(subject_str) + offsets[0];
1943
1944
      /* Length of needed storage */
1945
349
      size_t new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1946
1947
      /* Use custom function to get replacement string and its length. */
1948
349
      zend_string *eval_result = preg_do_repl_func(
1949
349
        fci, fcc, ZSTR_VAL(subject_str), offsets, subpat_names, num_subpats, count,
1950
349
        pcre2_get_mark(match_data), flags);
1951
1952
349
      if (UNEXPECTED(eval_result == NULL)) {
1953
20
        goto error;
1954
20
      }
1955
329
      new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1956
329
      if (new_len >= alloc_len) {
1957
228
        alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1958
228
        if (result == NULL) {
1959
133
          result = zend_string_alloc(alloc_len, 0);
1960
133
        } else {
1961
95
          result = zend_string_extend(result, alloc_len, 0);
1962
95
        }
1963
228
      }
1964
1965
329
      if (match-piece > 0) {
1966
        /* copy the part of the string before the match */
1967
324
        memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1968
324
        result_len += (match-piece);
1969
324
      }
1970
1971
      /* If using custom function, copy result to the buffer and clean up. */
1972
329
      memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1973
329
      result_len += ZSTR_LEN(eval_result);
1974
329
      zend_string_release_ex(eval_result, 0);
1975
1976
329
      limit--;
1977
1978
      /* Advance to the next piece. */
1979
329
      start_offset = last_end_offset = offsets[1];
1980
1981
      /* If we have matched an empty string, mimic what Perl's /g options does.
1982
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1983
         the match again at the same point. If this fails (picked up above) we
1984
         advance to the next character. */
1985
329
      if (start_offset == offsets[0]) {
1986
66
        count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1987
66
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1988
1989
66
        piece = ZSTR_VAL(subject_str) + start_offset;
1990
66
        if (count >= 0 && limit) {
1991
0
          goto matched;
1992
66
        } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1993
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1994
             this is not necessarily the end. We need to advance
1995
             the start offset, and continue. Fudge the offset values
1996
             to achieve this, unless we're already at the end of the string. */
1997
66
          if (start_offset < ZSTR_LEN(subject_str)) {
1998
63
            size_t unit_len = calculate_unit_length(pce, piece);
1999
63
            start_offset += unit_len;
2000
63
          } else {
2001
3
            goto not_matched;
2002
3
          }
2003
66
        } else {
2004
0
          goto error;
2005
0
        }
2006
66
      }
2007
2008
329
    } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2009
184
not_matched:
2010
184
      if (result == NULL) {
2011
52
        result = zend_string_copy(subject_str);
2012
52
        break;
2013
52
      }
2014
      /* now we know exactly how long it is */
2015
132
      size_t segment_len = ZSTR_LEN(subject_str) - last_end_offset;
2016
132
      alloc_len = result_len + segment_len;
2017
132
      result = zend_string_realloc(result, alloc_len, 0);
2018
      /* stick that last bit of string on our output */
2019
132
      memcpy(ZSTR_VAL(result) + result_len, piece, segment_len);
2020
132
      result_len += segment_len;
2021
132
      ZSTR_VAL(result)[result_len] = '\0';
2022
132
      ZSTR_LEN(result) = result_len;
2023
132
      break;
2024
184
    } else {
2025
20
error:
2026
20
      pcre_handle_exec_error(count);
2027
20
      if (result) {
2028
0
        zend_string_release_ex(result, 0);
2029
0
        result = NULL;
2030
0
      }
2031
20
      break;
2032
0
    }
2033
#ifdef HAVE_PCRE_JIT_SUPPORT
2034
    if ((pce->preg_options & PREG_JIT)) {
2035
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2036
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2037
    } else
2038
#endif
2039
326
    count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2040
326
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2041
326
  }
2042
205
  if (match_data != mdata) {
2043
36
    pcre2_match_data_free(match_data);
2044
36
  }
2045
205
  mdata_used = old_mdata_used;
2046
2047
205
  return result;
2048
205
}
2049
2050
static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2051
                zend_string *subject_str,
2052
                zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2053
                size_t limit, size_t *replace_count, zend_long flags)
2054
230
{
2055
230
  pcre_cache_entry  *pce;         /* Compiled regular expression */
2056
230
  zend_string     *result;      /* Function result */
2057
2058
  /* Compile regex or get it from cache. */
2059
230
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2060
25
    return NULL;
2061
25
  }
2062
205
  pce->refcount++;
2063
205
  result = php_pcre_replace_func_impl(pce, subject_str, fci, fcc, limit, replace_count, flags);
2064
205
  pce->refcount--;
2065
2066
205
  return result;
2067
230
}
2068
2069
/* {{{ php_pcre_replace_array */
2070
static zend_string *php_pcre_replace_array(HashTable *regex,
2071
  zend_string *replace_str, HashTable *replace_ht,
2072
  zend_string *subject_str, size_t limit, size_t *replace_count)
2073
0
{
2074
0
  zval    *regex_entry;
2075
0
  zend_string *result;
2076
2077
0
  zend_string_addref(subject_str);
2078
2079
0
  if (replace_ht) {
2080
0
    uint32_t replace_idx = 0;
2081
2082
    /* For each entry in the regex array, get the entry */
2083
0
    ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2084
      /* Make sure we're dealing with strings. */
2085
0
      zend_string *tmp_regex_str;
2086
0
      zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2087
0
      zend_string *replace_entry_str, *tmp_replace_entry_str;
2088
0
      zval *zv;
2089
2090
      /* Get current entry */
2091
0
      while (1) {
2092
0
        if (replace_idx == replace_ht->nNumUsed) {
2093
0
          replace_entry_str = ZSTR_EMPTY_ALLOC();
2094
0
          tmp_replace_entry_str = NULL;
2095
0
          break;
2096
0
        }
2097
0
        zv = ZEND_HASH_ELEMENT(replace_ht, replace_idx);
2098
0
        replace_idx++;
2099
0
        if (Z_TYPE_P(zv) != IS_UNDEF) {
2100
0
          replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
2101
0
          break;
2102
0
        }
2103
0
      }
2104
2105
      /* Do the actual replacement and put the result back into subject_str
2106
         for further replacements. */
2107
0
      result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2108
0
        ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2109
0
      zend_tmp_string_release(tmp_replace_entry_str);
2110
0
      zend_tmp_string_release(tmp_regex_str);
2111
0
      zend_string_release_ex(subject_str, 0);
2112
0
      subject_str = result;
2113
0
      if (UNEXPECTED(result == NULL)) {
2114
0
        break;
2115
0
      }
2116
0
    } ZEND_HASH_FOREACH_END();
2117
2118
0
  } else {
2119
0
    ZEND_ASSERT(replace_str != NULL);
2120
2121
    /* For each entry in the regex array, get the entry */
2122
0
    ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2123
      /* Make sure we're dealing with strings. */
2124
0
      zend_string *tmp_regex_str;
2125
0
      zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2126
2127
      /* Do the actual replacement and put the result back into subject_str
2128
         for further replacements. */
2129
0
      result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2130
0
        ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2131
0
      zend_tmp_string_release(tmp_regex_str);
2132
0
      zend_string_release_ex(subject_str, 0);
2133
0
      subject_str = result;
2134
2135
0
      if (UNEXPECTED(result == NULL)) {
2136
0
        break;
2137
0
      }
2138
0
    } ZEND_HASH_FOREACH_END();
2139
0
  }
2140
2141
0
  return subject_str;
2142
0
}
2143
/* }}} */
2144
2145
/* {{{ php_replace_in_subject */
2146
static zend_always_inline zend_string *php_replace_in_subject(
2147
  zend_string *regex_str, HashTable *regex_ht,
2148
  zend_string *replace_str, HashTable *replace_ht,
2149
  zend_string *subject, size_t limit, size_t *replace_count)
2150
332
{
2151
332
  zend_string *result;
2152
2153
332
  if (regex_str) {
2154
332
    ZEND_ASSERT(replace_str != NULL);
2155
332
    result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2156
332
      replace_str, limit, replace_count);
2157
332
  } else {
2158
0
    ZEND_ASSERT(regex_ht != NULL);
2159
0
    result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2160
0
      limit, replace_count);
2161
0
  }
2162
332
  return result;
2163
332
}
2164
/* }}} */
2165
2166
static zend_string *php_replace_in_subject_func(zend_string *regex_str, const HashTable *regex_ht,
2167
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2168
  zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
2169
230
{
2170
230
  zend_string *result;
2171
2172
230
  if (regex_str) {
2173
230
    result = php_pcre_replace_func(regex_str, subject, fci, fcc, limit, replace_count, flags);
2174
230
    return result;
2175
230
  } else {
2176
    /* If regex is an array */
2177
0
    zval    *regex_entry;
2178
2179
0
    ZEND_ASSERT(regex_ht != NULL);
2180
2181
0
    zend_string_addref(subject);
2182
2183
    /* For each entry in the regex array, get the entry */
2184
0
    ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
2185
      /* Make sure we're dealing with strings. */
2186
0
      zend_string *tmp_regex_entry_str;
2187
0
      zend_string *regex_entry_str = zval_try_get_tmp_string(regex_entry, &tmp_regex_entry_str);
2188
0
      if (UNEXPECTED(regex_entry_str == NULL)) {
2189
0
        break;
2190
0
      }
2191
2192
      /* Do the actual replacement and put the result back into subject
2193
         for further replacements. */
2194
0
      result = php_pcre_replace_func(
2195
0
        regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
2196
0
      zend_tmp_string_release(tmp_regex_entry_str);
2197
0
      zend_string_release(subject);
2198
0
      subject = result;
2199
0
      if (UNEXPECTED(result == NULL)) {
2200
0
        break;
2201
0
      }
2202
0
    } ZEND_HASH_FOREACH_END();
2203
2204
0
    return subject;
2205
0
  }
2206
230
}
2207
2208
static size_t php_preg_replace_func_impl(zval *return_value,
2209
  zend_string *regex_str, const HashTable *regex_ht,
2210
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2211
  zend_string *subject_str, const HashTable *subject_ht, zend_long limit_val, zend_long flags)
2212
230
{
2213
230
  zend_string *result;
2214
230
  size_t replace_count = 0;
2215
2216
230
  if (subject_str) {
2217
230
    result = php_replace_in_subject_func(
2218
230
      regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
2219
230
    if (result != NULL) {
2220
184
      RETVAL_STR(result);
2221
184
    } else {
2222
46
      RETVAL_NULL();
2223
46
    }
2224
230
  } else {
2225
    /* if subject is an array */
2226
0
    zval    *subject_entry, zv;
2227
0
    zend_string *string_key;
2228
0
    zend_ulong   num_key;
2229
2230
0
    ZEND_ASSERT(subject_ht != NULL);
2231
2232
0
    array_init_size(return_value, zend_hash_num_elements(subject_ht));
2233
0
    HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2234
2235
    /* For each subject entry, convert it to string, then perform replacement
2236
       and add the result to the return_value array. */
2237
0
    ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2238
0
      zend_string *tmp_subject_entry_str;
2239
0
      zend_string *subject_entry_str = zval_try_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2240
0
      if (UNEXPECTED(subject_entry_str == NULL)) {
2241
0
        break;
2242
0
      }
2243
2244
0
      result = php_replace_in_subject_func(
2245
0
        regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
2246
0
      if (result != NULL) {
2247
        /* Add to return array */
2248
0
        ZVAL_STR(&zv, result);
2249
0
        if (string_key) {
2250
0
          zend_hash_add_new(return_value_ht, string_key, &zv);
2251
0
        } else {
2252
0
          zend_hash_index_add_new(return_value_ht, num_key, &zv);
2253
0
        }
2254
0
      }
2255
0
      zend_tmp_string_release(tmp_subject_entry_str);
2256
0
    } ZEND_HASH_FOREACH_END();
2257
0
  }
2258
2259
230
  return replace_count;
2260
230
}
2261
2262
static void _preg_replace_common(
2263
  zval *return_value,
2264
  HashTable *regex_ht, zend_string *regex_str,
2265
  HashTable *replace_ht, zend_string *replace_str,
2266
  HashTable *subject_ht, zend_string *subject_str,
2267
  zend_long limit,
2268
  zval *zcount,
2269
  bool is_filter
2270
332
) {
2271
332
  size_t replace_count = 0;
2272
332
  zend_string *result;
2273
332
  size_t old_replace_count;
2274
2275
  /* If replace is an array then the regex argument needs to also be an array */
2276
332
  if (replace_ht && !regex_ht) {
2277
0
    zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
2278
0
    RETURN_THROWS();
2279
0
  }
2280
2281
332
  if (subject_str) {
2282
332
    old_replace_count = replace_count;
2283
332
    result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2284
332
      subject_str, limit, &replace_count);
2285
332
    if (result != NULL) {
2286
230
      if (!is_filter || replace_count > old_replace_count) {
2287
230
        RETVAL_STR(result);
2288
230
      } else {
2289
0
        zend_string_release_ex(result, 0);
2290
0
        RETVAL_NULL();
2291
0
      }
2292
230
    } else {
2293
102
      RETVAL_NULL();
2294
102
    }
2295
332
  } else {
2296
    /* if subject is an array */
2297
0
    zval    *subject_entry, zv;
2298
0
    zend_string *string_key;
2299
0
    zend_ulong   num_key;
2300
2301
0
    ZEND_ASSERT(subject_ht != NULL);
2302
2303
0
    array_init_size(return_value, zend_hash_num_elements(subject_ht));
2304
0
    HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2305
2306
    /* For each subject entry, convert it to string, then perform replacement
2307
       and add the result to the return_value array. */
2308
0
    ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2309
0
      old_replace_count = replace_count;
2310
0
      zend_string *tmp_subject_entry_str;
2311
0
      zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2312
0
      result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2313
0
        subject_entry_str, limit, &replace_count);
2314
2315
0
      if (result != NULL) {
2316
0
        if (!is_filter || replace_count > old_replace_count) {
2317
          /* Add to return array */
2318
0
          ZVAL_STR(&zv, result);
2319
0
          if (string_key) {
2320
0
            zend_hash_add_new(return_value_ht, string_key, &zv);
2321
0
          } else {
2322
0
            zend_hash_index_add_new(return_value_ht, num_key, &zv);
2323
0
          }
2324
0
        } else {
2325
0
          zend_string_release_ex(result, 0);
2326
0
        }
2327
0
      }
2328
0
      zend_tmp_string_release(tmp_subject_entry_str);
2329
0
    } ZEND_HASH_FOREACH_END();
2330
0
  }
2331
2332
332
  if (zcount) {
2333
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2334
0
  }
2335
332
}
2336
2337
/* {{{ preg_replace_common */
2338
static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
2339
338
{
2340
338
  zend_string *regex_str, *replace_str, *subject_str;
2341
338
  HashTable *regex_ht, *replace_ht, *subject_ht;
2342
338
  zend_long limit = -1;
2343
338
  zval *zcount = NULL;
2344
2345
  /* Get function parameters and do error-checking. */
2346
1.01k
  ZEND_PARSE_PARAMETERS_START(3, 5)
2347
1.68k
    Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2348
1.68k
    Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
2349
1.68k
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2350
1.68k
    Z_PARAM_OPTIONAL
2351
1.68k
    Z_PARAM_LONG(limit)
2352
831
    Z_PARAM_ZVAL(zcount)
2353
831
  ZEND_PARSE_PARAMETERS_END();
2354
2355
332
  _preg_replace_common(
2356
332
    return_value,
2357
332
    regex_ht, regex_str,
2358
332
    replace_ht, replace_str,
2359
332
    subject_ht, subject_str,
2360
332
    limit, zcount, is_filter);
2361
332
}
2362
/* }}} */
2363
2364
/* {{{ Perform Perl-style regular expression replacement. */
2365
PHP_FUNCTION(preg_replace)
2366
338
{
2367
338
  preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
2368
338
}
2369
/* }}} */
2370
2371
ZEND_FRAMELESS_FUNCTION(preg_replace, 3)
2372
0
{
2373
0
  zend_string *regex_str, *replace_str, *subject_str;
2374
0
  HashTable *regex_ht, *replace_ht, *subject_ht;
2375
0
  zval regex_tmp, replace_tmp, subject_tmp;
2376
2377
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(1, regex_ht, regex_str, regex_tmp);
2378
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(2, replace_ht, replace_str, replace_tmp);
2379
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(3, subject_ht, subject_str, subject_tmp);
2380
2381
0
  _preg_replace_common(
2382
0
    return_value,
2383
0
    regex_ht, regex_str,
2384
0
    replace_ht, replace_str,
2385
0
    subject_ht, subject_str,
2386
0
    /* limit */ -1, /* zcount */ NULL, /* is_filter */ false);
2387
2388
0
flf_clean:;
2389
0
  Z_FLF_PARAM_FREE_STR(1, regex_tmp);
2390
0
  Z_FLF_PARAM_FREE_STR(2, replace_tmp);
2391
0
  Z_FLF_PARAM_FREE_STR(3, subject_tmp);
2392
0
}
2393
2394
/* {{{ Perform Perl-style regular expression replacement using replacement callback. */
2395
PHP_FUNCTION(preg_replace_callback)
2396
233
{
2397
233
  zval *zcount = NULL;
2398
233
  zend_string *regex_str;
2399
233
  HashTable *regex_ht;
2400
233
  zend_string *subject_str;
2401
233
  HashTable *subject_ht;
2402
233
  zend_long limit = -1, flags = 0;
2403
233
  size_t replace_count;
2404
233
  zend_fcall_info fci = empty_fcall_info;
2405
233
  zend_fcall_info_cache fcc = empty_fcall_info_cache;
2406
2407
  /* Get function parameters and do error-checking. */
2408
698
  ZEND_PARSE_PARAMETERS_START(3, 6)
2409
1.16k
    Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2410
1.16k
    Z_PARAM_FUNC(fci, fcc)
2411
1.38k
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2412
1.38k
    Z_PARAM_OPTIONAL
2413
1.38k
    Z_PARAM_LONG(limit)
2414
0
    Z_PARAM_ZVAL(zcount)
2415
0
    Z_PARAM_LONG(flags)
2416
233
  ZEND_PARSE_PARAMETERS_END();
2417
2418
230
  replace_count = php_preg_replace_func_impl(return_value, regex_str, regex_ht,
2419
230
    &fci, &fcc,
2420
230
    subject_str, subject_ht, limit, flags);
2421
230
  if (zcount) {
2422
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2423
0
  }
2424
230
}
2425
/* }}} */
2426
2427
/* {{{ Perform Perl-style regular expression replacement using replacement callback. */
2428
PHP_FUNCTION(preg_replace_callback_array)
2429
0
{
2430
0
  zval *replace, *zcount = NULL;
2431
0
  HashTable *pattern, *subject_ht;
2432
0
  zend_string *subject_str, *str_idx_regex;
2433
0
  zend_long limit = -1, flags = 0;
2434
0
  size_t replace_count = 0;
2435
2436
  /* Get function parameters and do error-checking. */
2437
0
  ZEND_PARSE_PARAMETERS_START(2, 5)
2438
0
    Z_PARAM_ARRAY_HT(pattern)
2439
0
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2440
0
    Z_PARAM_OPTIONAL
2441
0
    Z_PARAM_LONG(limit)
2442
0
    Z_PARAM_ZVAL(zcount)
2443
0
    Z_PARAM_LONG(flags)
2444
0
  ZEND_PARSE_PARAMETERS_END();
2445
2446
0
  if (subject_ht) {
2447
0
    GC_TRY_ADDREF(subject_ht);
2448
0
  } else {
2449
0
    GC_TRY_ADDREF(subject_str);
2450
0
  }
2451
2452
0
  ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
2453
0
    if (!str_idx_regex) {
2454
0
      zend_argument_type_error(1, "must contain only string patterns as keys");
2455
0
      goto error;
2456
0
    }
2457
2458
0
    zend_fcall_info_cache fcc = empty_fcall_info_cache;
2459
0
    zend_fcall_info fci = empty_fcall_info;
2460
0
    fci.size = sizeof(zend_fcall_info);
2461
    /* Copy potential trampoline */
2462
0
    ZVAL_COPY_VALUE(&fci.function_name, replace);
2463
2464
0
    if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2465
0
      zend_argument_type_error(1, "must contain only valid callbacks");
2466
0
      goto error;
2467
0
    }
2468
2469
0
    zval retval;
2470
0
    replace_count += php_preg_replace_func_impl(&retval, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
2471
0
      subject_str, subject_ht, limit, flags);
2472
0
    zend_release_fcall_info_cache(&fcc);
2473
2474
0
    switch (Z_TYPE(retval)) {
2475
0
      case IS_ARRAY:
2476
0
        ZEND_ASSERT(subject_ht);
2477
0
        zend_array_release(subject_ht);
2478
0
        subject_ht = Z_ARR(retval);
2479
0
        break;
2480
0
      case IS_STRING:
2481
0
        ZEND_ASSERT(subject_str);
2482
0
        zend_string_release(subject_str);
2483
0
        subject_str = Z_STR(retval);
2484
0
        break;
2485
0
      case IS_NULL:
2486
0
        RETVAL_NULL();
2487
0
        goto error;
2488
0
      default: ZEND_UNREACHABLE();
2489
0
    }
2490
2491
0
    if (EG(exception)) {
2492
0
      goto error;
2493
0
    }
2494
0
  } ZEND_HASH_FOREACH_END();
2495
2496
0
  if (zcount) {
2497
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2498
0
  }
2499
2500
0
  if (subject_ht) {
2501
0
    RETVAL_ARR(subject_ht);
2502
    // Unset the type_flags of immutable arrays to prevent the VM from performing refcounting
2503
0
    if (GC_FLAGS(subject_ht) & IS_ARRAY_IMMUTABLE) {
2504
0
      Z_TYPE_FLAGS_P(return_value) = 0;
2505
0
    }
2506
0
    return;
2507
0
  } else {
2508
0
    RETURN_STR(subject_str);
2509
0
  }
2510
2511
0
error:
2512
0
  if (subject_ht) {
2513
0
    zend_array_release(subject_ht);
2514
0
  } else {
2515
0
    zend_string_release(subject_str);
2516
0
  }
2517
0
}
2518
/* }}} */
2519
2520
/* {{{ Perform Perl-style regular expression replacement and only return matches. */
2521
PHP_FUNCTION(preg_filter)
2522
0
{
2523
0
  preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
2524
0
}
2525
/* }}} */
2526
2527
/* {{{ Split string into an array using a perl-style regular expression as a delimiter */
2528
PHP_FUNCTION(preg_split)
2529
0
{
2530
0
  zend_string     *regex;     /* Regular expression */
2531
0
  zend_string     *subject;   /* String to match against */
2532
0
  zend_long      limit_val = -1;/* Integer value of limit */
2533
0
  zend_long      flags = 0;   /* Match control flags */
2534
0
  pcre_cache_entry  *pce;     /* Compiled regular expression */
2535
2536
  /* Get function parameters and do error checking */
2537
0
  ZEND_PARSE_PARAMETERS_START(2, 4)
2538
0
    Z_PARAM_STR(regex)
2539
0
    Z_PARAM_STR(subject)
2540
0
    Z_PARAM_OPTIONAL
2541
0
    Z_PARAM_LONG(limit_val)
2542
0
    Z_PARAM_LONG(flags)
2543
0
  ZEND_PARSE_PARAMETERS_END();
2544
2545
  /* Compile regex or get it from cache. */
2546
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2547
0
    RETURN_FALSE;
2548
0
  }
2549
2550
0
  pce->refcount++;
2551
0
  php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2552
0
  pce->refcount--;
2553
0
}
2554
/* }}} */
2555
2556
/* {{{ php_pcre_split */
2557
PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2558
  zend_long limit_val, zend_long flags)
2559
0
{
2560
0
  uint32_t     options;     /* Execution options */
2561
0
  int        count;       /* Count of matched subpatterns */
2562
0
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
2563
0
  PCRE2_SIZE     last_match_offset; /* Location of last match */
2564
0
  uint32_t     no_empty;      /* If NO_EMPTY flag is set */
2565
0
  uint32_t     delim_capture;   /* If delimiters should be captured */
2566
0
  uint32_t     offset_capture;  /* If offsets should be captured */
2567
0
  uint32_t     num_subpats;   /* Number of captured subpatterns */
2568
0
  zval       tmp;
2569
0
  pcre2_match_data *match_data;
2570
0
  bool old_mdata_used;
2571
0
  char *subject = ZSTR_VAL(subject_str);
2572
2573
0
  no_empty = flags & PREG_SPLIT_NO_EMPTY;
2574
0
  delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2575
0
  offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2576
2577
  /* Initialize return value */
2578
0
  array_init(return_value);
2579
0
  HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2580
2581
  /* Calculate the size of the offsets array, and allocate memory for it. */
2582
0
  num_subpats = pce->capture_count + 1;
2583
2584
  /* Start at the beginning of the string */
2585
0
  start_offset = 0;
2586
0
  last_match_offset = 0;
2587
0
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2588
2589
0
  if (limit_val == -1) {
2590
    /* pass */
2591
0
  } else if (limit_val == 0) {
2592
0
    limit_val = -1;
2593
0
  } else if (limit_val <= 1) {
2594
0
    goto last;
2595
0
  }
2596
2597
0
  old_mdata_used = mdata_used;
2598
0
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2599
0
    mdata_used = true;
2600
0
    match_data = mdata;
2601
0
  } else {
2602
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2603
0
    if (!match_data) {
2604
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2605
0
      zval_ptr_dtor(return_value);
2606
0
      RETURN_FALSE;
2607
0
    }
2608
0
  }
2609
2610
0
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2611
2612
  /* Array of subpattern offsets */
2613
0
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
2614
2615
#ifdef HAVE_PCRE_JIT_SUPPORT
2616
  if ((pce->preg_options & PREG_JIT) && options) {
2617
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2618
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2619
  } else
2620
#endif
2621
0
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2622
0
      options, match_data, mctx);
2623
2624
0
  while (1) {
2625
    /* If something matched */
2626
0
    if (count >= 0) {
2627
      /* Check for too many substrings condition. */
2628
0
      if (UNEXPECTED(count == 0)) {
2629
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2630
0
        count = num_subpats;
2631
0
      }
2632
2633
0
matched:
2634
0
      if (UNEXPECTED(offsets[1] < offsets[0])) {
2635
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2636
0
        break;
2637
0
      }
2638
2639
0
      if (!no_empty || offsets[0] != last_match_offset) {
2640
0
        if (offset_capture) {
2641
          /* Add (match, offset) pair to the return value */
2642
0
          add_offset_pair(
2643
0
            return_value_ht, subject, last_match_offset, offsets[0],
2644
0
            NULL, 0);
2645
0
        } else {
2646
          /* Add the piece to the return value */
2647
0
          populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2648
0
          zend_hash_next_index_insert_new(return_value_ht, &tmp);
2649
0
        }
2650
2651
        /* One less left to do */
2652
0
        if (limit_val != -1)
2653
0
          limit_val--;
2654
0
      }
2655
2656
0
      if (delim_capture) {
2657
0
        size_t i;
2658
0
        for (i = 1; i < count; i++) {
2659
          /* If we have matched a delimiter */
2660
0
          if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2661
0
            if (offset_capture) {
2662
0
              add_offset_pair(
2663
0
                return_value_ht, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2664
0
            } else {
2665
0
              populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2666
0
              zend_hash_next_index_insert_new(return_value_ht, &tmp);
2667
0
            }
2668
0
          }
2669
0
        }
2670
0
      }
2671
2672
      /* Advance to the position right after the last full match */
2673
0
      start_offset = last_match_offset = offsets[1];
2674
2675
      /* If we have matched an empty string, mimic what Perl's /g options does.
2676
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2677
         the match again at the same point. If this fails (picked up above) we
2678
         advance to the next character. */
2679
0
      if (start_offset == offsets[0]) {
2680
        /* Get next piece if no limit or limit not yet reached and something matched*/
2681
0
        if (limit_val != -1 && limit_val <= 1) {
2682
0
          break;
2683
0
        }
2684
0
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2685
0
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2686
0
        if (count >= 0) {
2687
0
          goto matched;
2688
0
        } else if (count == PCRE2_ERROR_NOMATCH) {
2689
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2690
             this is not necessarily the end. We need to advance
2691
             the start offset, and continue. Fudge the offset values
2692
             to achieve this, unless we're already at the end of the string. */
2693
0
          if (start_offset < ZSTR_LEN(subject_str)) {
2694
0
            start_offset += calculate_unit_length(pce, subject + start_offset);
2695
0
          } else {
2696
0
            break;
2697
0
          }
2698
0
        } else {
2699
0
          goto error;
2700
0
        }
2701
0
      }
2702
2703
0
    } else if (count == PCRE2_ERROR_NOMATCH) {
2704
0
      break;
2705
0
    } else {
2706
0
error:
2707
0
      pcre_handle_exec_error(count);
2708
0
      break;
2709
0
    }
2710
2711
    /* Get next piece if no limit or limit not yet reached and something matched*/
2712
0
    if (limit_val != -1 && limit_val <= 1) {
2713
0
      break;
2714
0
    }
2715
2716
#ifdef HAVE_PCRE_JIT_SUPPORT
2717
    if (pce->preg_options & PREG_JIT) {
2718
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2719
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2720
    } else
2721
#endif
2722
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2723
0
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2724
0
  }
2725
0
  if (match_data != mdata) {
2726
0
    pcre2_match_data_free(match_data);
2727
0
  }
2728
0
  mdata_used = old_mdata_used;
2729
2730
0
  if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2731
0
    zval_ptr_dtor(return_value);
2732
0
    RETURN_FALSE;
2733
0
  }
2734
2735
0
last:
2736
0
  start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2737
2738
0
  if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2739
0
    if (offset_capture) {
2740
      /* Add the last (match, offset) pair to the return value */
2741
0
      add_offset_pair(return_value_ht, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2742
0
    } else {
2743
      /* Add the last piece to the return value */
2744
0
      if (start_offset == 0) {
2745
0
        ZVAL_STR_COPY(&tmp, subject_str);
2746
0
      } else {
2747
0
        populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2748
0
      }
2749
0
      zend_hash_next_index_insert_new(return_value_ht, &tmp);
2750
0
    }
2751
0
  }
2752
0
}
2753
/* }}} */
2754
2755
/* {{{ Quote regular expression characters plus an optional character */
2756
PHP_FUNCTION(preg_quote)
2757
42
{
2758
42
  zend_string *str;           /* Input string argument */
2759
42
  zend_string *delim = NULL;   /* Additional delimiter argument */
2760
42
  char    *in_str;      /* Input string */
2761
42
  char    *in_str_end;      /* End of the input string */
2762
42
  zend_string *out_str;     /* Output string with quoted characters */
2763
42
  size_t       extra_len;         /* Number of additional characters */
2764
42
  char    *p,         /* Iterator for input string */
2765
42
        *q,         /* Iterator for output string */
2766
42
         delim_char = '\0', /* Delimiter character to be quoted */
2767
42
         c;         /* Current character */
2768
2769
  /* Get the arguments and check for errors */
2770
126
  ZEND_PARSE_PARAMETERS_START(1, 2)
2771
168
    Z_PARAM_STR(str)
2772
42
    Z_PARAM_OPTIONAL
2773
92
    Z_PARAM_STR_OR_NULL(delim)
2774
42
  ZEND_PARSE_PARAMETERS_END();
2775
2776
  /* Nothing to do if we got an empty string */
2777
42
  if (ZSTR_LEN(str) == 0) {
2778
0
    RETURN_EMPTY_STRING();
2779
0
  }
2780
2781
42
  in_str = ZSTR_VAL(str);
2782
42
  in_str_end = in_str + ZSTR_LEN(str);
2783
2784
42
  if (delim) {
2785
4
    delim_char = ZSTR_VAL(delim)[0];
2786
4
  }
2787
2788
  /* Go through the string and quote necessary characters */
2789
42
  extra_len = 0;
2790
42
  p = in_str;
2791
54.7k
  do {
2792
54.7k
    c = *p;
2793
54.7k
    switch(c) {
2794
737
      case '.':
2795
929
      case '\\':
2796
1.33k
      case '+':
2797
1.36k
      case '*':
2798
1.52k
      case '?':
2799
1.67k
      case '[':
2800
1.74k
      case '^':
2801
1.86k
      case ']':
2802
1.87k
      case '$':
2803
2.11k
      case '(':
2804
2.75k
      case ')':
2805
2.83k
      case '{':
2806
3.20k
      case '}':
2807
3.68k
      case '=':
2808
3.68k
      case '!':
2809
3.92k
      case '>':
2810
3.96k
      case '<':
2811
4.04k
      case '|':
2812
4.49k
      case ':':
2813
4.74k
      case '-':
2814
5.13k
      case '#':
2815
5.13k
        extra_len++;
2816
5.13k
        break;
2817
2818
2.10k
      case '\0':
2819
2.10k
        extra_len+=3;
2820
2.10k
        break;
2821
2822
47.5k
      default:
2823
47.5k
        if (c == delim_char) {
2824
8
          extra_len++;
2825
8
        }
2826
47.5k
        break;
2827
54.7k
    }
2828
54.7k
    p++;
2829
54.7k
  } while (p != in_str_end);
2830
2831
42
  if (extra_len == 0) {
2832
1
    RETURN_STR_COPY(str);
2833
1
  }
2834
2835
  /* Allocate enough memory so that even if each character
2836
     is quoted, we won't run out of room */
2837
41
  out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2838
41
  q = ZSTR_VAL(out_str);
2839
41
  p = in_str;
2840
2841
54.7k
  do {
2842
54.7k
    c = *p;
2843
54.7k
    switch(c) {
2844
737
      case '.':
2845
929
      case '\\':
2846
1.33k
      case '+':
2847
1.36k
      case '*':
2848
1.52k
      case '?':
2849
1.67k
      case '[':
2850
1.74k
      case '^':
2851
1.86k
      case ']':
2852
1.87k
      case '$':
2853
2.11k
      case '(':
2854
2.75k
      case ')':
2855
2.83k
      case '{':
2856
3.20k
      case '}':
2857
3.68k
      case '=':
2858
3.68k
      case '!':
2859
3.92k
      case '>':
2860
3.96k
      case '<':
2861
4.04k
      case '|':
2862
4.49k
      case ':':
2863
4.74k
      case '-':
2864
5.13k
      case '#':
2865
5.13k
        *q++ = '\\';
2866
5.13k
        *q++ = c;
2867
5.13k
        break;
2868
2869
2.10k
      case '\0':
2870
2.10k
        *q++ = '\\';
2871
2.10k
        *q++ = '0';
2872
2.10k
        *q++ = '0';
2873
2.10k
        *q++ = '0';
2874
2.10k
        break;
2875
2876
47.4k
      default:
2877
47.4k
        if (c == delim_char) {
2878
8
          *q++ = '\\';
2879
8
        }
2880
47.4k
        *q++ = c;
2881
47.4k
        break;
2882
54.7k
    }
2883
54.7k
    p++;
2884
54.7k
  } while (p != in_str_end);
2885
41
  *q = '\0';
2886
2887
41
  RETURN_NEW_STR(out_str);
2888
41
}
2889
/* }}} */
2890
2891
/* {{{ Searches array and returns entries which match regex */
2892
PHP_FUNCTION(preg_grep)
2893
0
{
2894
0
  zend_string     *regex;     /* Regular expression */
2895
0
  zval        *input;     /* Input array */
2896
0
  zend_long      flags = 0;   /* Match control flags */
2897
0
  pcre_cache_entry  *pce;     /* Compiled regular expression */
2898
2899
  /* Get arguments and do error checking */
2900
0
  ZEND_PARSE_PARAMETERS_START(2, 3)
2901
0
    Z_PARAM_STR(regex)
2902
0
    Z_PARAM_ARRAY(input)
2903
0
    Z_PARAM_OPTIONAL
2904
0
    Z_PARAM_LONG(flags)
2905
0
  ZEND_PARSE_PARAMETERS_END();
2906
2907
  /* Compile regex or get it from cache. */
2908
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2909
0
    RETURN_FALSE;
2910
0
  }
2911
2912
0
  pce->refcount++;
2913
0
  php_pcre_grep_impl(pce, input, return_value, flags);
2914
0
  pce->refcount--;
2915
0
}
2916
/* }}} */
2917
2918
PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2919
0
{
2920
0
  zval            *entry;             /* An entry in the input array */
2921
0
  uint32_t     num_subpats;   /* Number of captured subpatterns */
2922
0
  int        count;       /* Count of matched subpatterns */
2923
0
  uint32_t     options;     /* Execution options */
2924
0
  zend_string   *string_key;
2925
0
  zend_ulong     num_key;
2926
0
  bool     invert;      /* Whether to return non-matching
2927
                       entries */
2928
0
  bool old_mdata_used;
2929
0
  pcre2_match_data *match_data;
2930
0
  invert = flags & PREG_GREP_INVERT ? 1 : 0;
2931
2932
  /* Calculate the size of the offsets array, and allocate memory for it. */
2933
0
  num_subpats = pce->capture_count + 1;
2934
2935
  /* Initialize return array */
2936
0
  array_init(return_value);
2937
0
  HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2938
2939
0
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2940
2941
0
  old_mdata_used = mdata_used;
2942
0
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2943
0
    mdata_used = true;
2944
0
    match_data = mdata;
2945
0
  } else {
2946
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2947
0
    if (!match_data) {
2948
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2949
0
      return;
2950
0
    }
2951
0
  }
2952
2953
0
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2954
2955
  /* Go through the input array */
2956
0
  ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2957
0
    zend_string *tmp_subject_str;
2958
0
    zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2959
2960
    /* Perform the match */
2961
#ifdef HAVE_PCRE_JIT_SUPPORT
2962
    if ((pce->preg_options & PREG_JIT) && options) {
2963
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2964
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2965
    } else
2966
#endif
2967
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2968
0
        options, match_data, mctx);
2969
2970
    /* If the entry fits our requirements */
2971
0
    if (count >= 0) {
2972
      /* Check for too many substrings condition. */
2973
0
      if (UNEXPECTED(count == 0)) {
2974
0
        php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2975
0
      }
2976
0
      if (!invert) {
2977
0
        Z_TRY_ADDREF_P(entry);
2978
2979
        /* Add to return array */
2980
0
        if (string_key) {
2981
0
          zend_hash_update(return_value_ht, string_key, entry);
2982
0
        } else {
2983
0
          zend_hash_index_update(return_value_ht, num_key, entry);
2984
0
        }
2985
0
      }
2986
0
    } else if (count == PCRE2_ERROR_NOMATCH) {
2987
0
      if (invert) {
2988
0
        Z_TRY_ADDREF_P(entry);
2989
2990
        /* Add to return array */
2991
0
        if (string_key) {
2992
0
          zend_hash_update(return_value_ht, string_key, entry);
2993
0
        } else {
2994
0
          zend_hash_index_update(return_value_ht, num_key, entry);
2995
0
        }
2996
0
      }
2997
0
    } else {
2998
0
      pcre_handle_exec_error(count);
2999
0
      zend_tmp_string_release(tmp_subject_str);
3000
0
      break;
3001
0
    }
3002
3003
0
    zend_tmp_string_release(tmp_subject_str);
3004
0
  } ZEND_HASH_FOREACH_END();
3005
0
  if (match_data != mdata) {
3006
0
    pcre2_match_data_free(match_data);
3007
0
  }
3008
3009
0
  mdata_used = old_mdata_used;
3010
3011
0
  if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
3012
0
    zend_array_destroy(Z_ARR_P(return_value));
3013
0
    RETURN_FALSE;
3014
0
  }
3015
0
}
3016
/* }}} */
3017
3018
/* {{{ Returns the error code of the last regexp execution. */
3019
PHP_FUNCTION(preg_last_error)
3020
0
{
3021
0
  ZEND_PARSE_PARAMETERS_NONE();
3022
3023
0
  RETURN_LONG(PCRE_G(error_code));
3024
0
}
3025
/* }}} */
3026
3027
/* {{{ Returns the error message of the last regexp execution. */
3028
PHP_FUNCTION(preg_last_error_msg)
3029
0
{
3030
0
  ZEND_PARSE_PARAMETERS_NONE();
3031
3032
0
  RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
3033
0
}
3034
/* }}} */
3035
3036
/* {{{ module definition structures */
3037
3038
zend_module_entry pcre_module_entry = {
3039
  STANDARD_MODULE_HEADER,
3040
  "pcre",
3041
  ext_functions,
3042
  PHP_MINIT(pcre),
3043
  PHP_MSHUTDOWN(pcre),
3044
  PHP_RINIT(pcre),
3045
  PHP_RSHUTDOWN(pcre),
3046
  PHP_MINFO(pcre),
3047
  PHP_PCRE_VERSION,
3048
  PHP_MODULE_GLOBALS(pcre),
3049
  PHP_GINIT(pcre),
3050
  PHP_GSHUTDOWN(pcre),
3051
  NULL,
3052
  STANDARD_MODULE_PROPERTIES_EX
3053
};
3054
3055
#ifdef COMPILE_DL_PCRE
3056
ZEND_GET_MODULE(pcre)
3057
#endif
3058
3059
/* }}} */
3060
3061
PHPAPI pcre2_match_context *php_pcre_mctx(void)
3062
10
{/*{{{*/
3063
10
  return mctx;
3064
10
}/*}}}*/
3065
3066
PHPAPI pcre2_general_context *php_pcre_gctx(void)
3067
0
{/*{{{*/
3068
0
  return gctx;
3069
0
}/*}}}*/
3070
3071
PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3072
0
{/*{{{*/
3073
0
  return cctx;
3074
0
}/*}}}*/
3075
3076
PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3077
0
{/*{{{*/
3078
0
  assert(NULL != pce);
3079
0
  pce->refcount++;
3080
0
}/*}}}*/
3081
3082
PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3083
0
{/*{{{*/
3084
0
  assert(NULL != pce);
3085
0
  assert(0 != pce->refcount);
3086
0
  pce->refcount--;
3087
0
}/*}}}*/
3088
3089
PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3090
0
{/*{{{*/
3091
0
  assert(NULL != pce);
3092
0
  return pce->re;
3093
0
}/*}}}*/