Coverage Report

Created: 2026-04-01 06:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/pcre/php_pcre.c
Line
Count
Source
1
/*
2
   +----------------------------------------------------------------------+
3
   | Copyright (c) The PHP Group                                          |
4
   +----------------------------------------------------------------------+
5
   | This source file is subject to version 3.01 of the PHP license,      |
6
   | that is bundled with this package in the file LICENSE, and is        |
7
   | available through the world-wide-web at the following url:           |
8
   | https://www.php.net/license/3_01.txt                                 |
9
   | If you did not receive a copy of the PHP license and are unable to   |
10
   | obtain it through the world-wide-web, please send a note to          |
11
   | license@php.net so we can mail you a copy immediately.               |
12
   +----------------------------------------------------------------------+
13
   | Author: Andrei Zmievski <andrei@php.net>                             |
14
   +----------------------------------------------------------------------+
15
 */
16
17
#include "php.h"
18
#include "php_ini.h"
19
#include "php_pcre.h"
20
#include "ext/standard/info.h"
21
#include "ext/standard/basic_functions.h"
22
#include "zend_smart_str.h"
23
#include "SAPI.h"
24
25
0
#define PREG_PATTERN_ORDER      1
26
0
#define PREG_SET_ORDER        2
27
377
#define PREG_OFFSET_CAPTURE     (1<<8)
28
377
#define PREG_UNMATCHED_AS_NULL    (1<<9)
29
30
0
#define PREG_SPLIT_NO_EMPTY     (1<<0)
31
0
#define PREG_SPLIT_DELIM_CAPTURE  (1<<1)
32
0
#define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
33
34
0
#define PREG_GREP_INVERT      (1<<0)
35
36
#define PREG_JIT                    (1<<3)
37
38
1.61k
#define PCRE_CACHE_SIZE 4096
39
40
#ifdef HAVE_PCRE_JIT_SUPPORT
41
#define PHP_PCRE_JIT_SUPPORT 1
42
#else
43
#define PHP_PCRE_JIT_SUPPORT 0
44
#endif
45
46
char *php_pcre_version;
47
48
#include "php_pcre_arginfo.h"
49
50
struct _pcre_cache_entry {
51
  pcre2_code *re;
52
  /* Pointer is not NULL (during request) when there are named captures.
53
   * Length is equal to capture_count + 1 to account for capture group 0.
54
   * This table cache is only valid during request.
55
   * Trying to store this over multiple requests causes issues when the keys are exposed in user arrays
56
   * (see GH-17122 and GH-17132). */
57
  zend_string **subpats_table;
58
  uint32_t preg_options;
59
  uint32_t name_count;
60
  uint32_t capture_count;
61
  uint32_t compile_options;
62
  uint32_t refcount;
63
};
64
65
PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
66
67
#ifdef HAVE_PCRE_JIT_SUPPORT
68
#define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
69
#define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
70
ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
71
#endif
72
/* General context using (infallible) system allocator. */
73
ZEND_TLS pcre2_general_context *gctx = NULL;
74
/* These two are global per thread for now. Though it is possible to use these
75
  per pattern. Either one can copy it and use in pce, or one does no global
76
  contexts at all, but creates for every pce. */
77
ZEND_TLS pcre2_compile_context *cctx = NULL;
78
ZEND_TLS pcre2_match_context   *mctx = NULL;
79
ZEND_TLS pcre2_match_data      *mdata = NULL;
80
ZEND_TLS bool              mdata_used = 0;
81
ZEND_TLS uint8_t pcre2_init_ok = 0;
82
#if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
83
static MUTEX_T pcre_mt = NULL;
84
#define php_pcre_mutex_alloc() \
85
  if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
86
#define php_pcre_mutex_free() \
87
  if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
88
#define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
89
#define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
90
#else
91
#define php_pcre_mutex_alloc()
92
#define php_pcre_mutex_free()
93
#define php_pcre_mutex_lock()
94
#define php_pcre_mutex_unlock()
95
#endif
96
97
ZEND_TLS HashTable char_tables;
98
99
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats);
100
101
static void php_pcre_free_char_table(zval *data)
102
0
{/*{{{*/
103
0
  void *ptr = Z_PTR_P(data);
104
0
  pefree(ptr, 1);
105
0
}/*}}}*/
106
107
static void pcre_handle_exec_error(int pcre_code) /* {{{ */
108
1.45k
{
109
1.45k
  int preg_code = 0;
110
111
1.45k
  switch (pcre_code) {
112
28
    case PCRE2_ERROR_MATCHLIMIT:
113
28
      preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
114
28
      break;
115
116
0
    case PCRE2_ERROR_RECURSIONLIMIT:
117
0
      preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
118
0
      break;
119
120
0
    case PCRE2_ERROR_BADUTFOFFSET:
121
0
      preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
122
0
      break;
123
124
#ifdef HAVE_PCRE_JIT_SUPPORT
125
    case PCRE2_ERROR_JIT_STACKLIMIT:
126
      preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
127
      break;
128
#endif
129
130
1.43k
    default:
131
1.43k
      if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
132
164
        preg_code = PHP_PCRE_BAD_UTF8_ERROR;
133
1.26k
      } else  {
134
1.26k
        preg_code = PHP_PCRE_INTERNAL_ERROR;
135
1.26k
      }
136
1.43k
      break;
137
1.45k
  }
138
139
1.45k
  PCRE_G(error_code) = preg_code;
140
1.45k
}
141
/* }}} */
142
143
static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
144
0
{
145
0
  switch (error_code) {
146
0
    case PHP_PCRE_NO_ERROR:
147
0
      return "No error";
148
0
    case PHP_PCRE_INTERNAL_ERROR:
149
0
      return "Internal error";
150
0
    case PHP_PCRE_BAD_UTF8_ERROR:
151
0
      return "Malformed UTF-8 characters, possibly incorrectly encoded";
152
0
    case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
153
0
      return "The offset did not correspond to the beginning of a valid UTF-8 code point";
154
0
    case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
155
0
      return "Backtrack limit exhausted";
156
0
    case PHP_PCRE_RECURSION_LIMIT_ERROR:
157
0
      return "Recursion limit exhausted";
158
159
#ifdef HAVE_PCRE_JIT_SUPPORT
160
    case PHP_PCRE_JIT_STACKLIMIT_ERROR:
161
      return "JIT stack limit exhausted";
162
#endif
163
164
0
    default:
165
0
      return "Unknown error";
166
0
  }
167
0
}
168
/* }}} */
169
170
static void php_free_pcre_cache(zval *data) /* {{{ */
171
0
{
172
0
  pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
173
0
  if (!pce) return;
174
0
  if (pce->subpats_table) {
175
0
    free_subpats_table(pce->subpats_table, pce->capture_count + 1);
176
0
  }
177
0
  pcre2_code_free(pce->re);
178
0
  free(pce);
179
0
}
180
/* }}} */
181
182
static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
183
1.89k
{
184
1.89k
  return pemalloc(size, 1);
185
1.89k
}
186
187
static void php_pcre_free(void *block, void *data)
188
212
{
189
212
  pefree(block, 1);
190
212
}
191
192
static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
193
226k
{
194
226k
  return emalloc(size);
195
226k
}
196
197
static void php_pcre_efree(void *block, void *data)
198
226k
{
199
226k
  efree(block);
200
226k
}
201
202
3.42k
#define PHP_PCRE_PREALLOC_MDATA_SIZE 32
203
204
static void php_pcre_init_pcre2(uint8_t jit)
205
16
{/*{{{*/
206
16
  if (!gctx) {
207
16
    gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
208
16
    if (!gctx) {
209
0
      pcre2_init_ok = 0;
210
0
      return;
211
0
    }
212
16
  }
213
214
16
  if (!cctx) {
215
16
    cctx = pcre2_compile_context_create(gctx);
216
16
    if (!cctx) {
217
0
      pcre2_init_ok = 0;
218
0
      return;
219
0
    }
220
16
  }
221
222
16
  if (!mctx) {
223
16
    mctx = pcre2_match_context_create(gctx);
224
16
    if (!mctx) {
225
0
      pcre2_init_ok = 0;
226
0
      return;
227
0
    }
228
16
  }
229
230
#ifdef HAVE_PCRE_JIT_SUPPORT
231
  if (jit && !jit_stack) {
232
    jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
233
    if (!jit_stack) {
234
      pcre2_init_ok = 0;
235
      return;
236
    }
237
  }
238
#endif
239
240
16
  if (!mdata) {
241
16
    mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
242
16
    if (!mdata) {
243
0
      pcre2_init_ok = 0;
244
0
      return;
245
0
    }
246
16
  }
247
248
16
  pcre2_init_ok = 1;
249
16
}/*}}}*/
250
251
static void php_pcre_shutdown_pcre2(void)
252
0
{/*{{{*/
253
0
  if (gctx) {
254
0
    pcre2_general_context_free(gctx);
255
0
    gctx = NULL;
256
0
  }
257
258
0
  if (cctx) {
259
0
    pcre2_compile_context_free(cctx);
260
0
    cctx = NULL;
261
0
  }
262
263
0
  if (mctx) {
264
0
    pcre2_match_context_free(mctx);
265
0
    mctx = NULL;
266
0
  }
267
268
#ifdef HAVE_PCRE_JIT_SUPPORT
269
  /* Stack may only be destroyed when no cached patterns
270
    possibly associated with it do exist. */
271
  if (jit_stack) {
272
    pcre2_jit_stack_free(jit_stack);
273
    jit_stack = NULL;
274
  }
275
#endif
276
277
0
  if (mdata) {
278
0
    pcre2_match_data_free(mdata);
279
0
    mdata = NULL;
280
0
  }
281
282
0
  pcre2_init_ok = 0;
283
0
}/*}}}*/
284
285
static PHP_GINIT_FUNCTION(pcre) /* {{{ */
286
16
{
287
16
  php_pcre_mutex_alloc();
288
289
16
  zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
290
291
16
  pcre_globals->backtrack_limit = 0;
292
16
  pcre_globals->recursion_limit = 0;
293
16
  pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
294
16
  ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
295
16
  ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
296
#ifdef HAVE_PCRE_JIT_SUPPORT
297
  pcre_globals->jit = 1;
298
#endif
299
300
16
  php_pcre_init_pcre2(1);
301
16
  zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
302
16
}
303
/* }}} */
304
305
static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
306
0
{
307
0
  zend_hash_destroy(&pcre_globals->pcre_cache);
308
309
0
  php_pcre_shutdown_pcre2();
310
0
  zend_hash_destroy(&char_tables);
311
0
  php_pcre_mutex_free();
312
0
}
313
/* }}} */
314
315
static PHP_INI_MH(OnUpdateBacktrackLimit)
316
16
{/*{{{*/
317
16
  OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
318
16
  if (mctx) {
319
16
    pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
320
16
  }
321
322
16
  return SUCCESS;
323
16
}/*}}}*/
324
325
static PHP_INI_MH(OnUpdateRecursionLimit)
326
16
{/*{{{*/
327
16
  OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
328
16
  if (mctx) {
329
16
    pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
330
16
  }
331
332
16
  return SUCCESS;
333
16
}/*}}}*/
334
335
#ifdef HAVE_PCRE_JIT_SUPPORT
336
static PHP_INI_MH(OnUpdateJit)
337
{/*{{{*/
338
  OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
339
  if (PCRE_G(jit) && jit_stack) {
340
    pcre2_jit_stack_assign(mctx, NULL, jit_stack);
341
  } else {
342
    pcre2_jit_stack_assign(mctx, NULL, NULL);
343
  }
344
345
  return SUCCESS;
346
}/*}}}*/
347
#endif
348
349
PHP_INI_BEGIN()
350
  STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
351
  STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
352
#ifdef HAVE_PCRE_JIT_SUPPORT
353
  STD_PHP_INI_BOOLEAN("pcre.jit",           "1",       PHP_INI_ALL, OnUpdateJit,            jit,             zend_pcre_globals, pcre_globals)
354
#endif
355
PHP_INI_END()
356
357
static char *_pcre2_config_str(uint32_t what)
358
24
{/*{{{*/
359
24
  int len = pcre2_config(what, NULL);
360
24
  char *ret = (char *) malloc(len + 1);
361
362
24
  len = pcre2_config(what, ret);
363
24
  if (!len) {
364
0
    free(ret);
365
0
    return NULL;
366
0
  }
367
368
24
  return ret;
369
24
}/*}}}*/
370
371
/* {{{ PHP_MINFO_FUNCTION(pcre) */
372
static PHP_MINFO_FUNCTION(pcre)
373
4
{
374
#ifdef HAVE_PCRE_JIT_SUPPORT
375
  uint32_t flag = 0;
376
  char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
377
#endif
378
4
  char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
379
4
  char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
380
381
4
  php_info_print_table_start();
382
4
  php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
383
4
  php_info_print_table_row(2, "PCRE Library Version", version);
384
4
  free(version);
385
4
  php_info_print_table_row(2, "PCRE Unicode Version", unicode);
386
4
  free(unicode);
387
388
#ifdef HAVE_PCRE_JIT_SUPPORT
389
  if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
390
    php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
391
  } else {
392
    php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
393
  }
394
  if (jit_target) {
395
    php_info_print_table_row(2, "PCRE JIT Target", jit_target);
396
  }
397
  free(jit_target);
398
#else
399
4
  php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
400
4
#endif
401
402
#ifdef HAVE_PCRE_VALGRIND_SUPPORT
403
  php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
404
#endif
405
406
4
  php_info_print_table_end();
407
408
4
  DISPLAY_INI_ENTRIES();
409
4
}
410
/* }}} */
411
412
/* {{{ PHP_MINIT_FUNCTION(pcre) */
413
static PHP_MINIT_FUNCTION(pcre)
414
16
{
415
#ifdef HAVE_PCRE_JIT_SUPPORT
416
  if (UNEXPECTED(!pcre2_init_ok)) {
417
    /* Retry. */
418
    php_pcre_init_pcre2(PCRE_G(jit));
419
    if (!pcre2_init_ok) {
420
      return FAILURE;
421
    }
422
  }
423
#endif
424
425
16
  REGISTER_INI_ENTRIES();
426
427
16
  php_pcre_version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
428
429
16
  register_php_pcre_symbols(module_number);
430
431
16
  return SUCCESS;
432
16
}
433
/* }}} */
434
435
/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
436
static PHP_MSHUTDOWN_FUNCTION(pcre)
437
0
{
438
0
  UNREGISTER_INI_ENTRIES();
439
440
0
  free(php_pcre_version);
441
442
0
  return SUCCESS;
443
0
}
444
/* }}} */
445
446
/* {{{ PHP_RINIT_FUNCTION(pcre) */
447
static PHP_RINIT_FUNCTION(pcre)
448
224k
{
449
#ifdef HAVE_PCRE_JIT_SUPPORT
450
  if (UNEXPECTED(!pcre2_init_ok)) {
451
    /* Retry. */
452
    php_pcre_mutex_lock();
453
    php_pcre_init_pcre2(PCRE_G(jit));
454
    if (!pcre2_init_ok) {
455
      php_pcre_mutex_unlock();
456
      return FAILURE;
457
    }
458
    php_pcre_mutex_unlock();
459
  }
460
461
  mdata_used = 0;
462
#endif
463
464
224k
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
465
224k
  PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
466
224k
  if (!PCRE_G(gctx_zmm)) {
467
0
    return FAILURE;
468
0
  }
469
470
224k
  return SUCCESS;
471
224k
}
472
/* }}} */
473
474
static PHP_RSHUTDOWN_FUNCTION(pcre)
475
224k
{
476
224k
  pcre_cache_entry *pce;
477
68.8M
  ZEND_HASH_MAP_FOREACH_PTR(&PCRE_G(pcre_cache), pce) {
478
68.8M
    if (pce->subpats_table) {
479
0
      free_subpats_table(pce->subpats_table, pce->capture_count + 1);
480
0
      pce->subpats_table = NULL;
481
0
    }
482
68.8M
  } ZEND_HASH_FOREACH_END();
483
484
224k
  pcre2_general_context_free(PCRE_G(gctx_zmm));
485
224k
  PCRE_G(gctx_zmm) = NULL;
486
487
224k
  zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
488
224k
  zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
489
224k
  ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
490
224k
  ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
491
224k
  return SUCCESS;
492
224k
}
493
494
/* {{{ static pcre_clean_cache */
495
static int pcre_clean_cache(zval *data, void *arg)
496
0
{
497
0
  pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
498
0
  int *num_clean = (int *)arg;
499
500
0
  if (!pce->refcount) {
501
0
    if (--(*num_clean) == 0) {
502
0
      return ZEND_HASH_APPLY_REMOVE|ZEND_HASH_APPLY_STOP;
503
0
    }
504
0
    return ZEND_HASH_APPLY_REMOVE;
505
0
  } else {
506
0
    return ZEND_HASH_APPLY_KEEP;
507
0
  }
508
0
}
509
/* }}} */
510
511
0
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
512
0
  uint32_t i;
513
0
  for (i = 0; i < num_subpats; i++) {
514
0
    if (subpat_names[i]) {
515
0
      zend_string_release_ex(subpat_names[i], false);
516
0
    }
517
0
  }
518
0
  efree(subpat_names);
519
0
}
520
521
/* {{{ static make_subpats_table */
522
static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce)
523
0
{
524
0
  uint32_t num_subpats = pce->capture_count + 1;
525
0
  uint32_t name_size, ni = 0;
526
0
  char *name_table;
527
0
  zend_string **subpat_names;
528
0
  int rc1, rc2;
529
530
0
  rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
531
0
  rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
532
0
  if (rc1 < 0 || rc2 < 0) {
533
0
    php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
534
0
    return NULL;
535
0
  }
536
537
0
  subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
538
0
  while (ni++ < name_cnt) {
539
0
    unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
540
0
    const char *name = name_table + 2;
541
0
    subpat_names[name_idx] = zend_string_init(name, strlen(name), false);
542
0
    name_table += name_size;
543
0
  }
544
0
  return subpat_names;
545
0
}
546
/* }}} */
547
548
static zend_string **ensure_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce)
549
0
{
550
0
  if (!pce->subpats_table) {
551
0
    pce->subpats_table = make_subpats_table(name_cnt, pce);
552
0
  }
553
0
  return pce->subpats_table;
554
0
}
555
556
/* {{{ static calculate_unit_length */
557
/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
558
static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
559
769
{
560
769
  size_t unit_len;
561
562
769
  if (pce->compile_options & PCRE2_UTF) {
563
28
    const char *end = start;
564
565
    /* skip continuation bytes */
566
28
    while ((*++end & 0xC0) == 0x80);
567
28
    unit_len = end - start;
568
741
  } else {
569
741
    unit_len = 1;
570
741
  }
571
769
  return unit_len;
572
769
}
573
/* }}} */
574
575
/* {{{ pcre_get_compiled_regex_cache */
576
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bool locale_aware)
577
5.35k
{
578
5.35k
  pcre2_code      *re = NULL;
579
#if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !defined(HAVE_BUNDLED_PCRE)
580
  uint32_t       coptions = PCRE2_NO_START_OPTIMIZE;
581
#else
582
5.35k
  uint32_t       coptions = 0;
583
5.35k
#endif
584
5.35k
  uint32_t       eoptions = 0;
585
5.35k
  PCRE2_UCHAR           error[128];
586
5.35k
  PCRE2_SIZE           erroffset;
587
5.35k
  int                  errnumber;
588
5.35k
  char         delimiter;
589
5.35k
  char         start_delimiter;
590
5.35k
  char         end_delimiter;
591
5.35k
  char        *p, *pp;
592
5.35k
  char        *pattern;
593
5.35k
  size_t         pattern_len;
594
5.35k
  uint32_t       poptions = 0;
595
5.35k
  const uint8_t       *tables = NULL;
596
5.35k
  zval                *zv;
597
5.35k
  pcre_cache_entry   new_entry;
598
5.35k
  int          rc;
599
5.35k
  zend_string     *key;
600
5.35k
  pcre_cache_entry  *ret;
601
602
5.35k
  if (locale_aware && BG(ctype_string)) {
603
0
    key = zend_string_concat2(
604
0
      ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
605
0
      ZSTR_VAL(regex), ZSTR_LEN(regex));
606
5.35k
  } else {
607
5.35k
    key = regex;
608
5.35k
  }
609
610
  /* Try to lookup the cached regex entry, and if successful, just pass
611
     back the compiled pattern, otherwise go on and compile it. */
612
5.35k
  zv = zend_hash_find(&PCRE_G(pcre_cache), key);
613
5.35k
  if (zv) {
614
2.49k
    if (key != regex) {
615
0
      zend_string_release_ex(key, 0);
616
0
    }
617
2.49k
    return (pcre_cache_entry*)Z_PTR_P(zv);
618
2.49k
  }
619
620
2.86k
  p = ZSTR_VAL(regex);
621
2.86k
  const char* end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex);
622
623
  /* Parse through the leading whitespace, and display a warning if we
624
     get to the end without encountering a delimiter. */
625
2.86k
  while (isspace((int)*(unsigned char *)p)) p++;
626
2.86k
  if (p >= end_p) {
627
12
    if (key != regex) {
628
0
      zend_string_release_ex(key, 0);
629
0
    }
630
12
    php_error_docref(NULL, E_WARNING, "Empty regular expression");
631
12
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
632
12
    return NULL;
633
12
  }
634
635
  /* Get the delimiter and display a warning if it is alphanumeric
636
     or a backslash. */
637
2.85k
  delimiter = *p++;
638
2.85k
  if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\' || delimiter == '\0') {
639
21
    if (key != regex) {
640
0
      zend_string_release_ex(key, 0);
641
0
    }
642
21
    php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash, or NUL byte");
643
21
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
644
21
    return NULL;
645
21
  }
646
647
2.83k
  start_delimiter = delimiter;
648
2.83k
  if ((pp = strchr("([{< )]}> )]}>", delimiter)))
649
76
    delimiter = pp[5];
650
2.83k
  end_delimiter = delimiter;
651
652
2.83k
  pp = p;
653
654
2.83k
  if (start_delimiter == end_delimiter) {
655
    /* We need to iterate through the pattern, searching for the ending delimiter,
656
       but skipping the backslashed delimiters.  If the ending delimiter is not
657
       found, display a warning. */
658
924k
    while (pp < end_p) {
659
924k
      if (*pp == '\\' && pp + 1 < end_p) pp++;
660
878k
      else if (*pp == delimiter)
661
2.74k
        break;
662
921k
      pp++;
663
921k
    }
664
2.76k
  } else {
665
    /* We iterate through the pattern, searching for the matching ending
666
     * delimiter. For each matching starting delimiter, we increment nesting
667
     * level, and decrement it for each matching ending delimiter. If we
668
     * reach the end of the pattern without matching, display a warning.
669
     */
670
65
    int brackets = 1;   /* brackets nesting level */
671
34.4k
    while (pp < end_p) {
672
34.4k
      if (*pp == '\\' && pp + 1 < end_p) pp++;
673
33.7k
      else if (*pp == end_delimiter && --brackets <= 0)
674
9
        break;
675
33.6k
      else if (*pp == start_delimiter)
676
1.71k
        brackets++;
677
34.3k
      pp++;
678
34.3k
    }
679
65
  }
680
681
2.83k
  if (pp >= end_p) {
682
81
    if (key != regex) {
683
0
      zend_string_release_ex(key, 0);
684
0
    }
685
81
    if (start_delimiter == end_delimiter) {
686
25
      php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
687
56
    } else {
688
56
      php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
689
56
    }
690
81
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
691
81
    return NULL;
692
81
  }
693
694
  /* Make a copy of the actual pattern. */
695
2.75k
  pattern_len = pp - p;
696
2.75k
  pattern = estrndup(p, pattern_len);
697
698
  /* Move on to the options */
699
2.75k
  pp++;
700
701
  /* Parse through the options, setting appropriate flags.  Display
702
     a warning if we encounter an unknown modifier. */
703
6.37k
  while (pp < end_p) {
704
3.86k
    switch (*pp++) {
705
      /* Perl compatible options */
706
1.12k
      case 'i': coptions |= PCRE2_CASELESS;   break;
707
318
      case 'm': coptions |= PCRE2_MULTILINE;   break;
708
36
      case 'n': coptions |= PCRE2_NO_AUTO_CAPTURE; break;
709
275
      case 's': coptions |= PCRE2_DOTALL;   break;
710
18
      case 'x': coptions |= PCRE2_EXTENDED;   break;
711
712
      /* PCRE specific options */
713
363
      case 'A': coptions |= PCRE2_ANCHORED;   break;
714
7
      case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
715
0
#ifdef PCRE2_EXTRA_CASELESS_RESTRICT
716
27
      case 'r': eoptions |= PCRE2_EXTRA_CASELESS_RESTRICT; break;
717
0
#endif
718
3
      case 'S': /* Pass. */         break;
719
97
      case 'X': /* Pass. */         break;
720
266
      case 'U': coptions |= PCRE2_UNGREEDY;   break;
721
731
      case 'u': coptions |= PCRE2_UTF;
722
  /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
723
     characters, even in UTF-8 mode. However, this can be changed by setting
724
     the PCRE2_UCP option. */
725
731
#ifdef PCRE2_UCP
726
731
            coptions |= PCRE2_UCP;
727
731
#endif
728
731
        break;
729
36
      case 'J': coptions |= PCRE2_DUPNAMES;   break;
730
731
61
      case ' ':
732
302
      case '\n':
733
319
      case '\r':
734
319
        break;
735
736
2
      case 'e': /* legacy eval */
737
242
      default:
738
242
        if (pp[-1]) {
739
219
          php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]);
740
219
        } else {
741
23
          php_error_docref(NULL, E_WARNING, "NUL byte is not a valid modifier");
742
23
        }
743
242
        pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
744
242
        efree(pattern);
745
242
        if (key != regex) {
746
0
          zend_string_release_ex(key, 0);
747
0
        }
748
242
        return NULL;
749
3.86k
    }
750
3.86k
  }
751
752
2.50k
  if (key != regex) {
753
0
    tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(ctype_string));
754
0
    if (!tables) {
755
0
      zend_string *_k;
756
0
      tables = pcre2_maketables(gctx);
757
0
      if (UNEXPECTED(!tables)) {
758
0
        php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
759
0
        pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
760
0
        zend_string_release_ex(key, 0);
761
0
        efree(pattern);
762
0
        return NULL;
763
0
      }
764
0
      _k = zend_string_init(ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)), 1);
765
0
      GC_MAKE_PERSISTENT_LOCAL(_k);
766
0
      zend_hash_add_ptr(&char_tables, _k, (void *)tables);
767
0
      zend_string_release(_k);
768
0
    }
769
0
  }
770
2.50k
  pcre2_set_character_tables(cctx, tables);
771
772
2.50k
  pcre2_set_compile_extra_options(cctx, eoptions);
773
774
  /* Compile pattern and display a warning if compilation failed. */
775
2.50k
  re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
776
777
2.50k
  if (re == NULL) {
778
890
    if (key != regex) {
779
0
      zend_string_release_ex(key, 0);
780
0
    }
781
890
    pcre2_get_error_message(errnumber, error, sizeof(error));
782
890
    php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
783
890
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
784
890
    efree(pattern);
785
890
    return NULL;
786
890
  }
787
788
#ifdef HAVE_PCRE_JIT_SUPPORT
789
  if (PCRE_G(jit)) {
790
    /* Enable PCRE JIT compiler */
791
    rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
792
    if (EXPECTED(rc >= 0)) {
793
      size_t jit_size = 0;
794
      if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
795
        poptions |= PREG_JIT;
796
      }
797
    } else if (rc == PCRE2_ERROR_NOMEMORY) {
798
      php_error_docref(NULL, E_WARNING,
799
        "Allocation of JIT memory failed, PCRE JIT will be disabled. "
800
        "This is likely caused by security restrictions. "
801
        "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
802
      PCRE_G(jit) = 0;
803
    } else {
804
      pcre2_get_error_message(rc, error, sizeof(error));
805
      php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
806
      pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
807
    }
808
  }
809
#endif
810
1.61k
  efree(pattern);
811
812
  /*
813
   * If we reached cache limit, clean out the items from the head of the list;
814
   * these are supposedly the oldest ones (but not necessarily the least used
815
   * ones).
816
   */
817
1.61k
  if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
818
0
    int num_clean = PCRE_CACHE_SIZE / 8;
819
0
    zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
820
0
  }
821
822
  /* Store the compiled pattern and extra info in the cache. */
823
1.61k
  new_entry.re = re;
824
1.61k
  new_entry.preg_options = poptions;
825
1.61k
  new_entry.compile_options = coptions;
826
1.61k
  new_entry.refcount = 0;
827
1.61k
  new_entry.subpats_table = NULL;
828
829
1.61k
  rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
830
1.61k
  if (rc < 0) {
831
0
    if (key != regex) {
832
0
      zend_string_release_ex(key, 0);
833
0
    }
834
0
    pcre2_code_free(new_entry.re);
835
0
    php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
836
0
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
837
0
    return NULL;
838
0
  }
839
840
1.61k
  rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
841
1.61k
  if (rc < 0) {
842
0
    if (key != regex) {
843
0
      zend_string_release_ex(key, 0);
844
0
    }
845
0
    pcre2_code_free(new_entry.re);
846
0
    php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
847
0
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
848
0
    return NULL;
849
0
  }
850
851
  /*
852
   * Interned strings are not duplicated when stored in HashTable,
853
   * but all the interned strings created during HTTP request are removed
854
   * at end of request. However PCRE_G(pcre_cache) must be consistent
855
   * on the next request as well. So we disable usage of interned strings
856
   * as hash keys especually for this table.
857
   * See bug #63180
858
   */
859
1.61k
  if (!(GC_FLAGS(key) & IS_STR_PERMANENT)) {
860
850
    zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
861
850
    GC_MAKE_PERSISTENT_LOCAL(str);
862
863
850
    ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
864
850
    zend_string_release(str);
865
850
  } else {
866
769
    ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
867
769
  }
868
869
1.61k
  if (key != regex) {
870
0
    zend_string_release_ex(key, 0);
871
0
  }
872
873
1.61k
  return ret;
874
1.61k
}
875
/* }}} */
876
877
/* {{{ pcre_get_compiled_regex_cache */
878
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
879
5.35k
{
880
5.35k
  return pcre_get_compiled_regex_cache_ex(regex, true);
881
5.35k
}
882
/* }}} */
883
884
/* {{{ pcre_get_compiled_regex */
885
PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
886
0
{
887
0
  pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
888
889
0
  if (capture_count) {
890
0
    *capture_count = pce ? pce->capture_count : 0;
891
0
  }
892
893
0
  return pce ? pce->re : NULL;
894
0
}
895
/* }}} */
896
897
/* XXX For the cases where it's only about match yes/no and no capture
898
    required, perhaps just a minimum sized data would suffice. */
899
PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
900
0
{/*{{{*/
901
902
0
  assert(NULL != re);
903
904
0
  if (EXPECTED(!mdata_used)) {
905
0
    int rc = 0;
906
907
0
    if (!capture_count) {
908
      /* As we deal with a non cached pattern, no other way to gather this info. */
909
0
      rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
910
0
    }
911
912
0
    if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
913
0
      mdata_used = 1;
914
0
      return mdata;
915
0
    }
916
0
  }
917
918
0
  return pcre2_match_data_create_from_pattern(re, gctx);
919
0
}/*}}}*/
920
921
PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
922
0
{/*{{{*/
923
0
  if (UNEXPECTED(match_data != mdata)) {
924
0
    pcre2_match_data_free(match_data);
925
0
  } else {
926
0
    mdata_used = 0;
927
0
  }
928
0
}/*}}}*/
929
930
0
static void init_unmatched_null_pair(zval *pair) {
931
0
  zval val1, val2;
932
0
  ZVAL_NULL(&val1);
933
0
  ZVAL_LONG(&val2, -1);
934
0
  ZVAL_ARR(pair, zend_new_pair(&val1, &val2));
935
0
}
936
937
0
static void init_unmatched_empty_pair(zval *pair) {
938
0
  zval val1, val2;
939
0
  ZVAL_EMPTY_STRING(&val1);
940
0
  ZVAL_LONG(&val2, -1);
941
0
  ZVAL_ARR(pair, zend_new_pair(&val1, &val2));
942
0
}
943
944
static zend_always_inline void populate_match_value_str(
945
744
    zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
946
744
  ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
947
744
}
948
949
static zend_always_inline void populate_match_value(
950
    zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
951
744
    bool unmatched_as_null) {
952
744
  if (PCRE2_UNSET == start_offset) {
953
0
    if (unmatched_as_null) {
954
0
      ZVAL_NULL(val);
955
0
    } else {
956
0
      ZVAL_EMPTY_STRING(val);
957
0
    }
958
744
  } else {
959
744
    populate_match_value_str(val, subject, start_offset, end_offset);
960
744
  }
961
744
}
962
963
static inline void add_named(
964
0
    HashTable *const subpats, zend_string *name, zval *val, bool unmatched) {
965
0
  ZEND_ASSERT(!(GC_FLAGS(name) & IS_STR_PERSISTENT));
966
967
  /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
968
   * In this case we want to preserve the one that actually has a value. */
969
0
  if (!unmatched) {
970
0
    zend_hash_update(subpats, name, val);
971
0
  } else {
972
0
    if (!zend_hash_add(subpats, name, val)) {
973
0
      return;
974
0
    }
975
0
  }
976
0
  Z_TRY_ADDREF_P(val);
977
0
}
978
979
/* {{{ add_offset_pair */
980
static inline void add_offset_pair(
981
    HashTable *const result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
982
    zend_string *name, zend_long unmatched_as_null)
983
0
{
984
0
  zval match_pair;
985
986
  /* Add (match, offset) to the return value */
987
0
  if (PCRE2_UNSET == start_offset) {
988
0
    if (unmatched_as_null) {
989
0
      do {
990
0
        if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
991
0
          if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) {
992
0
            init_unmatched_null_pair(&match_pair);
993
0
            break;
994
0
          } else {
995
0
            init_unmatched_null_pair(&PCRE_G(unmatched_null_pair));
996
0
          }
997
0
        }
998
0
        ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
999
0
      } while (0);
1000
0
    } else {
1001
0
      do {
1002
0
        if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
1003
0
          if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) {
1004
0
            init_unmatched_empty_pair(&match_pair);
1005
0
            break;
1006
0
          } else {
1007
0
            init_unmatched_empty_pair(&PCRE_G(unmatched_empty_pair));
1008
0
          }
1009
0
        }
1010
0
        ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1011
0
      } while (0);
1012
0
    }
1013
0
  } else {
1014
0
    zval val1, val2;
1015
0
    populate_match_value_str(&val1, subject, start_offset, end_offset);
1016
0
    ZVAL_LONG(&val2, start_offset);
1017
0
    ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1018
0
  }
1019
1020
0
  if (name) {
1021
0
    add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1022
0
  }
1023
0
  zend_hash_next_index_insert_new(result, &match_pair);
1024
0
}
1025
/* }}} */
1026
1027
static void populate_subpat_array(
1028
    HashTable *subpats_ht, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1029
377
    uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1030
377
  zend_long offset_capture = flags & PREG_OFFSET_CAPTURE;
1031
377
  zend_long unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1032
377
  zval val;
1033
377
  int i;
1034
377
  if (subpat_names) {
1035
0
    if (offset_capture) {
1036
0
      for (i = 0; i < count; i++) {
1037
0
        add_offset_pair(
1038
0
          subpats_ht, subject, offsets[2*i], offsets[2*i+1],
1039
0
          subpat_names[i], unmatched_as_null);
1040
0
      }
1041
0
      if (unmatched_as_null) {
1042
0
        for (i = count; i < num_subpats; i++) {
1043
0
          add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1044
0
        }
1045
0
      }
1046
0
    } else {
1047
0
      for (i = 0; i < count; i++) {
1048
0
        populate_match_value(
1049
0
          &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1050
0
        if (subpat_names[i]) {
1051
0
          add_named(subpats_ht, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1052
0
        }
1053
0
        zend_hash_next_index_insert_new(subpats_ht, &val);
1054
0
      }
1055
0
      if (unmatched_as_null) {
1056
0
        for (i = count; i < num_subpats; i++) {
1057
0
          ZVAL_NULL(&val);
1058
0
          if (subpat_names[i]) {
1059
0
            zend_hash_add(subpats_ht, subpat_names[i], &val);
1060
0
          }
1061
0
          zend_hash_next_index_insert_new(subpats_ht, &val);
1062
0
        }
1063
0
      }
1064
0
    }
1065
377
  } else {
1066
377
    if (offset_capture) {
1067
0
      for (i = 0; i < count; i++) {
1068
0
        add_offset_pair(
1069
0
          subpats_ht, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1070
0
      }
1071
0
      if (unmatched_as_null) {
1072
0
        for (i = count; i < num_subpats; i++) {
1073
0
          add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1074
0
        }
1075
0
      }
1076
377
    } else {
1077
1.12k
      for (i = 0; i < count; i++) {
1078
744
        populate_match_value(
1079
744
          &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1080
744
        zend_hash_next_index_insert_new(subpats_ht, &val);
1081
744
      }
1082
377
      if (unmatched_as_null) {
1083
0
        ZVAL_NULL(&val);
1084
0
        for (i = count; i < num_subpats; i++) {
1085
0
          zend_hash_next_index_insert_new(subpats_ht, &val);
1086
0
        }
1087
0
      }
1088
377
    }
1089
377
  }
1090
  /* Add MARK, if available */
1091
377
  if (mark) {
1092
0
    ZVAL_STRING(&val, (char *)mark);
1093
0
    zend_hash_str_add_new(subpats_ht, ZEND_STRL("MARK"), &val);
1094
0
  }
1095
377
}
1096
1097
static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, bool global) /* {{{ */
1098
4.78k
{
1099
  /* parameters */
1100
4.78k
  zend_string    *regex;      /* Regular expression */
1101
4.78k
  zend_string    *subject;      /* String to match against */
1102
4.78k
  pcre_cache_entry *pce;        /* Compiled regular expression */
1103
4.78k
  zval       *subpats = NULL; /* Array for subpatterns */
1104
4.78k
  zend_long     flags = 0;    /* Match control flags */
1105
4.78k
  zend_long     start_offset = 0; /* Where the new search starts */
1106
1107
14.3k
  ZEND_PARSE_PARAMETERS_START(2, 5)
1108
19.1k
    Z_PARAM_STR(regex)
1109
23.8k
    Z_PARAM_STR(subject)
1110
4.77k
    Z_PARAM_OPTIONAL
1111
9.56k
    Z_PARAM_ZVAL(subpats)
1112
9.56k
    Z_PARAM_LONG(flags)
1113
0
    Z_PARAM_LONG(start_offset)
1114
4.78k
  ZEND_PARSE_PARAMETERS_END();
1115
1116
  /* Compile regex or get it from cache. */
1117
4.77k
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1118
1.02k
    RETURN_FALSE;
1119
1.02k
  }
1120
1121
3.75k
  if (start_offset == ZEND_LONG_MIN) {
1122
0
    zend_argument_value_error(5, "must be greater than " ZEND_LONG_FMT, ZEND_LONG_MIN);
1123
0
    RETURN_THROWS();
1124
0
  }
1125
1126
3.75k
  pce->refcount++;
1127
3.75k
  php_pcre_match_impl(pce, subject, return_value, subpats,
1128
3.75k
    global, flags, start_offset);
1129
3.75k
  pce->refcount--;
1130
3.75k
}
1131
/* }}} */
1132
1133
static zend_always_inline bool is_known_valid_utf8(
1134
1.22k
    zend_string *subject_str, PCRE2_SIZE start_offset) {
1135
1.22k
  if (!ZSTR_IS_VALID_UTF8(subject_str)) {
1136
    /* We don't know whether the string is valid UTF-8 or not. */
1137
1.21k
    return false;
1138
1.21k
  }
1139
1140
1
  if (start_offset == ZSTR_LEN(subject_str)) {
1141
    /* Degenerate case: Offset points to end of string. */
1142
1
    return true;
1143
1
  }
1144
1145
  /* Check that the offset does not point to an UTF-8 continuation byte. */
1146
0
  return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1147
1
}
1148
1149
/* {{{ php_pcre_match_impl() */
1150
PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1151
  zval *subpats, bool global, zend_long flags, zend_off_t start_offset)
1152
3.75k
{
1153
3.75k
  zval       result_set;    /* Holds a set of subpatterns after
1154
                       a global match */
1155
3.75k
  HashTable    **match_sets = NULL; /* An array of sets of matches for each
1156
                       subpattern after a global match */
1157
3.75k
  uint32_t     options;     /* Execution options */
1158
3.75k
  int        count;       /* Count of matched subpatterns */
1159
3.75k
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1160
3.75k
  int        matched;     /* Has anything matched */
1161
3.75k
  zend_string    **subpat_names;    /* Array for named subpatterns */
1162
3.75k
  size_t       i;
1163
3.75k
  uint32_t     subpats_order;   /* Order of subpattern matches */
1164
3.75k
  uint32_t     offset_capture;  /* Capture match offsets: yes/no */
1165
3.75k
  zend_long    unmatched_as_null; /* Null non-matches: yes/no */
1166
3.75k
  PCRE2_SPTR       mark = NULL;   /* Target for MARK name */
1167
3.75k
  HashTable   *marks = NULL;   /* Array of marks for PREG_PATTERN_ORDER */
1168
3.75k
  pcre2_match_data *match_data;
1169
3.75k
  PCRE2_SIZE     start_offset2, orig_start_offset;
1170
3.75k
  bool old_mdata_used;
1171
1172
3.75k
  char *subject = ZSTR_VAL(subject_str);
1173
3.75k
  size_t subject_len = ZSTR_LEN(subject_str);
1174
1175
  /* Overwrite the passed-in value for subpatterns with an empty array. */
1176
3.75k
  if (subpats != NULL) {
1177
1
    subpats = zend_try_array_init(subpats);
1178
1
    if (!subpats) {
1179
0
      RETURN_THROWS();
1180
0
    }
1181
1
  }
1182
1183
3.75k
  subpats_order = global ? PREG_PATTERN_ORDER : 0;
1184
1185
3.75k
  if (flags) {
1186
0
    offset_capture = flags & PREG_OFFSET_CAPTURE;
1187
0
    unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1188
1189
    /*
1190
     * subpats_order is pre-set to pattern mode so we change it only if
1191
     * necessary.
1192
     */
1193
0
    if (flags & 0xff) {
1194
0
      subpats_order = flags & 0xff;
1195
0
      if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1196
0
        (!global && subpats_order != 0)) {
1197
0
        zend_argument_value_error(4, "must be a PREG_* constant");
1198
0
        RETURN_THROWS();
1199
0
      }
1200
0
    }
1201
3.75k
  } else {
1202
3.75k
    offset_capture = 0;
1203
3.75k
    unmatched_as_null = 0;
1204
3.75k
  }
1205
1206
  /* Negative offset counts from the end of the string. */
1207
3.75k
  if (start_offset < 0) {
1208
0
    if ((PCRE2_SIZE)-start_offset <= subject_len) {
1209
0
      start_offset2 = subject_len + start_offset;
1210
0
    } else {
1211
0
      start_offset2 = 0;
1212
0
    }
1213
3.75k
  } else {
1214
3.75k
    start_offset2 = (PCRE2_SIZE)start_offset;
1215
3.75k
  }
1216
1217
3.75k
  if (start_offset2 > subject_len) {
1218
0
    pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1219
0
    RETURN_FALSE;
1220
0
  }
1221
1222
  /* Calculate the size of the offsets array, and allocate memory for it. */
1223
3.75k
  num_subpats = pce->capture_count + 1;
1224
1225
  /*
1226
   * Build a mapping from subpattern numbers to their names. We will
1227
   * allocate the table only if there are any named subpatterns.
1228
   */
1229
3.75k
  subpat_names = NULL;
1230
3.75k
  if (subpats && pce->name_count > 0) {
1231
0
    subpat_names = ensure_subpats_table(pce->name_count, pce);
1232
0
    if (UNEXPECTED(!subpat_names)) {
1233
0
      RETURN_FALSE;
1234
0
    }
1235
0
  }
1236
1237
3.75k
  matched = 0;
1238
3.75k
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1239
1240
3.75k
  old_mdata_used = mdata_used;
1241
3.75k
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1242
3.19k
    mdata_used = true;
1243
3.19k
    match_data = mdata;
1244
3.19k
  } else {
1245
554
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1246
554
    if (!match_data) {
1247
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1248
0
      RETURN_FALSE;
1249
0
    }
1250
554
  }
1251
1252
  /* Allocate match sets array and initialize the values. */
1253
3.75k
  if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1254
0
    match_sets = safe_emalloc(num_subpats, sizeof(HashTable *), 0);
1255
0
    for (i=0; i<num_subpats; i++) {
1256
0
      match_sets[i] = zend_new_array(0);
1257
0
    }
1258
0
  }
1259
1260
  /* Array of subpattern offsets */
1261
3.75k
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1262
1263
3.75k
  orig_start_offset = start_offset2;
1264
3.75k
  options =
1265
3.75k
    (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1266
3.75k
      ? 0 : PCRE2_NO_UTF_CHECK;
1267
1268
  /* Execute the regular expression. */
1269
#ifdef HAVE_PCRE_JIT_SUPPORT
1270
  if ((pce->preg_options & PREG_JIT) && options) {
1271
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1272
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1273
  } else
1274
#endif
1275
3.75k
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1276
3.75k
      options, match_data, mctx);
1277
1278
3.75k
  while (1) {
1279
    /* If something has matched */
1280
3.75k
    if (count >= 0) {
1281
      /* Check for too many substrings condition. */
1282
244
      if (UNEXPECTED(count == 0)) {
1283
0
        php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1284
0
        count = num_subpats;
1285
0
      }
1286
1287
244
matched:
1288
244
      matched++;
1289
1290
      /* If subpatterns array has been passed, fill it in with values. */
1291
244
      if (subpats != NULL) {
1292
        /* Try to get the list of substrings and display a warning if failed. */
1293
0
        if (UNEXPECTED(offsets[1] < offsets[0])) {
1294
0
          if (match_sets) {
1295
0
            for (i = 0; i < num_subpats; i++) {
1296
0
              zend_array_destroy(match_sets[i]);
1297
0
            }
1298
0
            efree(match_sets);
1299
0
          }
1300
0
          if (marks) {
1301
0
            zend_array_destroy(marks);
1302
0
          }
1303
0
          if (match_data != mdata) {
1304
0
            pcre2_match_data_free(match_data);
1305
0
          }
1306
0
          php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1307
0
          RETURN_FALSE;
1308
0
        }
1309
1310
0
        if (global) { /* global pattern matching */
1311
0
          if (subpats_order == PREG_PATTERN_ORDER) {
1312
            /* For each subpattern, insert it into the appropriate array. */
1313
0
            if (offset_capture) {
1314
0
              for (i = 0; i < count; i++) {
1315
0
                add_offset_pair(
1316
0
                  match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1317
0
                  NULL, unmatched_as_null);
1318
0
              }
1319
0
            } else {
1320
0
              for (i = 0; i < count; i++) {
1321
0
                zval val;
1322
0
                populate_match_value(
1323
0
                  &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1324
0
                zend_hash_next_index_insert_new(match_sets[i], &val);
1325
0
              }
1326
0
            }
1327
0
            mark = pcre2_get_mark(match_data);
1328
            /* Add MARK, if available */
1329
0
            if (mark) {
1330
0
              if (!marks) {
1331
0
                marks = zend_new_array(0);
1332
0
              }
1333
0
              zval tmp;
1334
0
              ZVAL_STRING(&tmp, (char *) mark);
1335
0
              zend_hash_index_add_new(marks, matched - 1, &tmp);
1336
0
            }
1337
            /*
1338
             * If the number of captured subpatterns on this run is
1339
             * less than the total possible number, pad the result
1340
             * arrays with NULLs or empty strings.
1341
             */
1342
0
            if (count < num_subpats) {
1343
0
              for (int i = count; i < num_subpats; i++) {
1344
0
                if (offset_capture) {
1345
0
                  add_offset_pair(
1346
0
                    match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1347
0
                    NULL, unmatched_as_null);
1348
0
                } else if (unmatched_as_null) {
1349
0
                  zval tmp;
1350
0
                  ZVAL_NULL(&tmp);
1351
0
                  zend_hash_next_index_insert_new(match_sets[i], &tmp);
1352
0
                } else {
1353
0
                  zval tmp;
1354
0
                  ZVAL_EMPTY_STRING(&tmp);
1355
0
                  zend_hash_next_index_insert_new(match_sets[i], &tmp);
1356
0
                }
1357
0
              }
1358
0
            }
1359
0
          } else {
1360
            /* Allocate and populate the result set array */
1361
0
            mark = pcre2_get_mark(match_data);
1362
0
            array_init_size(&result_set, count + (mark ? 1 : 0));
1363
0
            populate_subpat_array(
1364
0
              Z_ARRVAL(result_set), subject, offsets, subpat_names,
1365
0
              num_subpats, count, mark, flags);
1366
            /* And add it to the output array */
1367
0
            zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &result_set);
1368
0
          }
1369
0
        } else {     /* single pattern matching */
1370
          /* For each subpattern, insert it into the subpatterns array. */
1371
0
          mark = pcre2_get_mark(match_data);
1372
0
          populate_subpat_array(
1373
0
            Z_ARRVAL_P(subpats), subject, offsets, subpat_names, num_subpats, count, mark, flags);
1374
0
          break;
1375
0
        }
1376
0
      }
1377
1378
      /* Advance to the next piece. */
1379
244
      start_offset2 = offsets[1];
1380
1381
      /* If we have matched an empty string, mimic what Perl's /g options does.
1382
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1383
         the match again at the same point. If this fails (picked up above) we
1384
         advance to the next character. */
1385
244
      if (start_offset2 == offsets[0]) {
1386
91
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1387
91
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1388
91
        if (count >= 0) {
1389
5
          if (global) {
1390
0
            goto matched;
1391
5
          } else {
1392
5
            break;
1393
5
          }
1394
86
        } else if (count == PCRE2_ERROR_NOMATCH) {
1395
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1396
             this is not necessarily the end. We need to advance
1397
             the start offset, and continue. Fudge the offset values
1398
             to achieve this, unless we're already at the end of the string. */
1399
83
          if (start_offset2 < subject_len) {
1400
72
            size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1401
1402
72
            start_offset2 += unit_len;
1403
72
          } else {
1404
11
            break;
1405
11
          }
1406
83
        } else {
1407
3
          goto error;
1408
3
        }
1409
91
      }
1410
3.50k
    } else if (count == PCRE2_ERROR_NOMATCH) {
1411
3.31k
      break;
1412
3.31k
    } else {
1413
196
error:
1414
196
      pcre_handle_exec_error(count);
1415
196
      break;
1416
193
    }
1417
1418
225
    if (!global) {
1419
225
      break;
1420
225
    }
1421
1422
    /* Execute the regular expression. */
1423
#ifdef HAVE_PCRE_JIT_SUPPORT
1424
    if ((pce->preg_options & PREG_JIT)) {
1425
      if (start_offset2 > subject_len) {
1426
        pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1427
        break;
1428
      }
1429
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1430
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1431
    } else
1432
#endif
1433
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1434
0
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1435
0
  }
1436
3.75k
  if (match_data != mdata) {
1437
554
    pcre2_match_data_free(match_data);
1438
554
  }
1439
3.75k
  mdata_used = old_mdata_used;
1440
1441
  /* Add the match sets to the output array and clean up */
1442
3.75k
  if (match_sets) {
1443
0
    if (subpat_names) {
1444
0
      for (i = 0; i < num_subpats; i++) {
1445
0
        zval wrapper;
1446
0
        ZVAL_ARR(&wrapper, match_sets[i]);
1447
0
        if (subpat_names[i]) {
1448
0
          zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper);
1449
0
          GC_ADDREF(match_sets[i]);
1450
0
        }
1451
0
        zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1452
0
      }
1453
0
    } else {
1454
0
      for (i = 0; i < num_subpats; i++) {
1455
0
        zval wrapper;
1456
0
        ZVAL_ARR(&wrapper, match_sets[i]);
1457
0
        zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1458
0
      }
1459
0
    }
1460
0
    efree(match_sets);
1461
1462
0
    if (marks) {
1463
0
      zval tmp;
1464
0
      ZVAL_ARR(&tmp, marks);
1465
0
      zend_hash_str_update(Z_ARRVAL_P(subpats), "MARK", sizeof("MARK") - 1, &tmp);
1466
0
    }
1467
0
  }
1468
1469
3.75k
  if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1470
    /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1471
3.55k
    if ((pce->compile_options & PCRE2_UTF)
1472
1.03k
        && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1473
441
      GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1474
441
    }
1475
1476
3.55k
    RETVAL_LONG(matched);
1477
3.55k
  } else {
1478
196
    RETVAL_FALSE;
1479
196
  }
1480
3.75k
}
1481
/* }}} */
1482
1483
/* {{{ Perform a Perl-style regular expression match */
1484
PHP_FUNCTION(preg_match)
1485
4.78k
{
1486
4.78k
  php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1487
4.78k
}
1488
/* }}} */
1489
1490
ZEND_FRAMELESS_FUNCTION(preg_match, 2)
1491
0
{
1492
0
  zval regex_tmp, subject_tmp;
1493
0
  zend_string *regex, *subject;
1494
1495
0
  Z_FLF_PARAM_STR(1, regex, regex_tmp);
1496
0
  Z_FLF_PARAM_STR(2, subject, subject_tmp);
1497
1498
  /* Compile regex or get it from cache. */
1499
0
  pcre_cache_entry *pce;
1500
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1501
0
    RETVAL_FALSE;
1502
0
    goto flf_clean;
1503
0
  }
1504
1505
0
  pce->refcount++;
1506
0
  php_pcre_match_impl(pce, subject, return_value, /* subpats */ NULL,
1507
0
    /* global */ false, /* flags */ 0, /* start_offset */ 0);
1508
0
  pce->refcount--;
1509
1510
0
flf_clean:
1511
0
  Z_FLF_PARAM_FREE_STR(1, regex_tmp);
1512
0
  Z_FLF_PARAM_FREE_STR(2, subject_tmp);
1513
0
}
1514
1515
/* {{{ Perform a Perl-style global regular expression match */
1516
PHP_FUNCTION(preg_match_all)
1517
0
{
1518
0
  php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1519
0
}
1520
/* }}} */
1521
1522
/* {{{ preg_get_backref */
1523
static int preg_get_backref(char **str, int *backref)
1524
10
{
1525
10
  char in_brace = 0;
1526
10
  char *walk = *str;
1527
1528
10
  if (walk[1] == 0)
1529
0
    return 0;
1530
1531
10
  if (*walk == '$' && walk[1] == '{') {
1532
0
    in_brace = 1;
1533
0
    walk++;
1534
0
  }
1535
10
  walk++;
1536
1537
10
  if (*walk >= '0' && *walk <= '9') {
1538
0
    *backref = *walk - '0';
1539
0
    walk++;
1540
0
  } else
1541
10
    return 0;
1542
1543
0
  if (*walk && *walk >= '0' && *walk <= '9') {
1544
0
    *backref = *backref * 10 + *walk - '0';
1545
0
    walk++;
1546
0
  }
1547
1548
0
  if (in_brace) {
1549
0
    if (*walk != '}')
1550
0
      return 0;
1551
0
    else
1552
0
      walk++;
1553
0
  }
1554
1555
0
  *str = walk;
1556
0
  return 1;
1557
0
}
1558
/* }}} */
1559
1560
/* Return NULL if an exception has occurred */
1561
static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1562
377
{
1563
377
  zend_string *result_str = NULL;
1564
377
  zval     retval;      /* Function return value */
1565
377
  zval       arg;       /* Argument to pass to function */
1566
1567
377
  array_init_size(&arg, count + (mark ? 1 : 0));
1568
377
  populate_subpat_array(Z_ARRVAL(arg), subject, offsets, subpat_names, num_subpats, count, mark, flags);
1569
1570
377
  fci->retval = &retval;
1571
377
  fci->param_count = 1;
1572
377
  fci->params = &arg;
1573
377
  zend_call_function(fci, fcc);
1574
377
  zval_ptr_dtor(&arg);
1575
377
  if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1576
42
    return Z_STR(retval);
1577
42
  }
1578
  /* No Exception has occurred */
1579
335
  else if (EXPECTED(Z_TYPE(retval) != IS_UNDEF)) {
1580
316
    result_str = zval_try_get_string_func(&retval);
1581
316
  }
1582
335
  zval_ptr_dtor(&retval);
1583
1584
335
  return result_str;
1585
377
}
1586
1587
/* {{{ php_pcre_replace */
1588
PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1589
                zend_string *subject_str,
1590
                const char *subject, size_t subject_len,
1591
                zend_string *replace_str,
1592
                size_t limit, size_t *replace_count)
1593
403
{
1594
403
  pcre_cache_entry  *pce;         /* Compiled regular expression */
1595
403
  zend_string     *result;      /* Function result */
1596
1597
  /* Abort on pending exception, e.g. thrown from __toString(). */
1598
403
  if (UNEXPECTED(EG(exception))) {
1599
0
    return NULL;
1600
0
  }
1601
1602
  /* Compile regex or get it from cache. */
1603
403
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1604
208
    return NULL;
1605
208
  }
1606
195
  pce->refcount++;
1607
195
  result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1608
195
    limit, replace_count);
1609
195
  pce->refcount--;
1610
1611
195
  return result;
1612
403
}
1613
/* }}} */
1614
1615
/* {{{ php_pcre_replace_impl() */
1616
PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1617
195
{
1618
195
  uint32_t     options;     /* Execution options */
1619
195
  int        count;       /* Count of matched subpatterns */
1620
195
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1621
195
  size_t       new_len;     /* Length of needed storage */
1622
195
  size_t       alloc_len;     /* Actual allocated length */
1623
195
  size_t       match_len;     /* Length of the current match */
1624
195
  int        backref;     /* Backreference number */
1625
195
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
1626
195
  size_t       last_end_offset; /* Where the last search ended */
1627
195
  char      *walkbuf,     /* Location of current replacement in the result */
1628
195
          *walk,        /* Used to walk the replacement string */
1629
195
           walk_last;     /* Last walked character */
1630
195
  const char    *match,       /* The current match */
1631
195
          *piece,       /* The current piece of subject */
1632
195
          *replace_end;   /* End of replacement string */
1633
195
  size_t      result_len;     /* Length of result */
1634
195
  zend_string   *result;      /* Result of replacement */
1635
195
  pcre2_match_data *match_data;
1636
195
  bool old_mdata_used;
1637
1638
  /* Calculate the size of the offsets array, and allocate memory for it. */
1639
195
  num_subpats = pce->capture_count + 1;
1640
195
  alloc_len = 0;
1641
195
  result = NULL;
1642
1643
  /* Initialize */
1644
195
  match = NULL;
1645
195
  start_offset = 0;
1646
195
  last_end_offset = 0;
1647
195
  result_len = 0;
1648
195
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1649
1650
195
  old_mdata_used = mdata_used;
1651
195
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1652
59
    mdata_used = true;
1653
59
    match_data = mdata;
1654
136
  } else {
1655
136
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1656
136
    if (!match_data) {
1657
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1658
0
      return NULL;
1659
0
    }
1660
136
  }
1661
1662
195
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1663
1664
  /* Array of subpattern offsets */
1665
195
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1666
1667
  /* Execute the regular expression. */
1668
#ifdef HAVE_PCRE_JIT_SUPPORT
1669
  if ((pce->preg_options & PREG_JIT) && options) {
1670
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1671
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1672
  } else
1673
#endif
1674
195
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1675
195
      options, match_data, mctx);
1676
1677
858
  while (1) {
1678
858
    piece = subject + last_end_offset;
1679
1680
858
    if (count >= 0 && limit > 0) {
1681
685
      bool simple_string;
1682
1683
      /* Check for too many substrings condition. */
1684
685
      if (UNEXPECTED(count == 0)) {
1685
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1686
0
        count = num_subpats;
1687
0
      }
1688
1689
717
matched:
1690
717
      if (UNEXPECTED(offsets[1] < offsets[0])) {
1691
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1692
0
        if (result) {
1693
0
          zend_string_release_ex(result, 0);
1694
0
          result = NULL;
1695
0
        }
1696
0
        break;
1697
0
      }
1698
1699
717
      if (replace_count) {
1700
717
        ++*replace_count;
1701
717
      }
1702
1703
      /* Set the match location in subject */
1704
717
      match = subject + offsets[0];
1705
1706
717
      new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1707
1708
717
      walk = ZSTR_VAL(replace_str);
1709
717
      replace_end = walk + ZSTR_LEN(replace_str);
1710
717
      walk_last = 0;
1711
717
      simple_string = true;
1712
1.22k
      while (walk < replace_end) {
1713
509
        if ('\\' == *walk || '$' == *walk) {
1714
5
          simple_string = false;
1715
5
          if (walk_last == '\\') {
1716
0
            walk++;
1717
0
            walk_last = 0;
1718
0
            continue;
1719
0
          }
1720
5
          if (preg_get_backref(&walk, &backref)) {
1721
0
            if (backref < count)
1722
0
              new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1723
0
            continue;
1724
0
          }
1725
5
        }
1726
509
        new_len++;
1727
509
        walk++;
1728
509
        walk_last = walk[-1];
1729
509
      }
1730
1731
717
      if (new_len >= alloc_len) {
1732
203
        alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1733
203
        if (result == NULL) {
1734
96
          result = zend_string_alloc(alloc_len, 0);
1735
107
        } else {
1736
107
          result = zend_string_extend(result, alloc_len, 0);
1737
107
        }
1738
203
      }
1739
1740
717
      if (match-piece > 0) {
1741
        /* copy the part of the string before the match */
1742
623
        memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1743
623
        result_len += (match-piece);
1744
623
      }
1745
1746
717
      if (simple_string) {
1747
        /* copy replacement */
1748
712
        memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1749
712
        result_len += ZSTR_LEN(replace_str);
1750
712
      } else {
1751
        /* copy replacement and backrefs */
1752
5
        walkbuf = ZSTR_VAL(result) + result_len;
1753
1754
5
        walk = ZSTR_VAL(replace_str);
1755
5
        walk_last = 0;
1756
102
        while (walk < replace_end) {
1757
97
          if ('\\' == *walk || '$' == *walk) {
1758
5
            if (walk_last == '\\') {
1759
0
              *(walkbuf-1) = *walk++;
1760
0
              walk_last = 0;
1761
0
              continue;
1762
0
            }
1763
5
            if (preg_get_backref(&walk, &backref)) {
1764
0
              if (backref < count) {
1765
0
                if (offsets[backref<<1] < SIZE_MAX) {
1766
0
                  match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1767
0
                  walkbuf = zend_mempcpy(walkbuf, subject + offsets[backref << 1], match_len);
1768
0
                }
1769
0
              }
1770
0
              continue;
1771
0
            }
1772
5
          }
1773
97
          *walkbuf++ = *walk++;
1774
97
          walk_last = walk[-1];
1775
97
        }
1776
5
        *walkbuf = '\0';
1777
        /* increment the result length by how much we've added to the string */
1778
5
        result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1779
5
      }
1780
1781
717
      limit--;
1782
1783
      /* Advance to the next piece. */
1784
717
      start_offset = last_end_offset = offsets[1];
1785
1786
      /* If we have matched an empty string, mimic what Perl's /g options does.
1787
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1788
         the match again at the same point. If this fails (picked up above) we
1789
         advance to the next character. */
1790
717
      if (start_offset == offsets[0]) {
1791
603
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1792
603
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1793
1794
603
        piece = subject + start_offset;
1795
603
        if (count >= 0 && limit > 0) {
1796
32
          goto matched;
1797
571
        } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1798
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1799
             this is not necessarily the end. We need to advance
1800
             the start offset, and continue. Fudge the offset values
1801
             to achieve this, unless we're already at the end of the string. */
1802
571
          if (start_offset < subject_len) {
1803
549
            size_t unit_len = calculate_unit_length(pce, piece);
1804
549
            start_offset += unit_len;
1805
549
          } else {
1806
22
            goto not_matched;
1807
22
          }
1808
571
        } else {
1809
0
          goto error;
1810
0
        }
1811
603
      }
1812
1813
717
    } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1814
195
not_matched:
1815
195
      if (!result && subject_str) {
1816
99
        result = zend_string_copy(subject_str);
1817
99
        break;
1818
99
      }
1819
      /* now we know exactly how long it is */
1820
96
      alloc_len = result_len + subject_len - last_end_offset;
1821
96
      if (NULL != result) {
1822
96
        result = zend_string_realloc(result, alloc_len, 0);
1823
96
      } else {
1824
0
        result = zend_string_alloc(alloc_len, 0);
1825
0
      }
1826
      /* stick that last bit of string on our output */
1827
96
      memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1828
96
      result_len += subject_len - last_end_offset;
1829
96
      ZSTR_VAL(result)[result_len] = '\0';
1830
96
      ZSTR_LEN(result) = result_len;
1831
96
      break;
1832
195
    } else {
1833
0
error:
1834
0
      pcre_handle_exec_error(count);
1835
0
      if (result) {
1836
0
        zend_string_release_ex(result, 0);
1837
0
        result = NULL;
1838
0
      }
1839
0
      break;
1840
0
    }
1841
1842
#ifdef HAVE_PCRE_JIT_SUPPORT
1843
    if (pce->preg_options & PREG_JIT) {
1844
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1845
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1846
    } else
1847
#endif
1848
663
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1849
663
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1850
663
  }
1851
195
  if (match_data != mdata) {
1852
136
    pcre2_match_data_free(match_data);
1853
136
  }
1854
195
  mdata_used = old_mdata_used;
1855
1856
195
  return result;
1857
195
}
1858
/* }}} */
1859
1860
static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str,
1861
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
1862
  size_t limit, size_t *replace_count, zend_long flags
1863
164
) {
1864
164
  uint32_t     options;     /* Execution options */
1865
164
  int        count;       /* Count of matched subpatterns */
1866
164
  zend_string   **subpat_names;   /* Array for named subpatterns */
1867
164
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1868
164
  size_t       alloc_len;     /* Actual allocated length */
1869
164
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
1870
164
  size_t       last_end_offset; /* Where the last search ended */
1871
164
  const char    *match,       /* The current match */
1872
164
          *piece;       /* The current piece of subject */
1873
164
  size_t      result_len;     /* Length of result */
1874
164
  zend_string   *result;      /* Result of replacement */
1875
164
  pcre2_match_data *match_data;
1876
164
  bool old_mdata_used;
1877
1878
  /* Calculate the size of the offsets array, and allocate memory for it. */
1879
164
  num_subpats = pce->capture_count + 1;
1880
164
  if (pce->name_count > 0) {
1881
0
    subpat_names = ensure_subpats_table(pce->name_count, pce);
1882
0
    if (UNEXPECTED(!subpat_names)) {
1883
0
      return NULL;
1884
0
    }
1885
164
  } else {
1886
164
    subpat_names = NULL;
1887
164
  }
1888
1889
164
  alloc_len = 0;
1890
164
  result = NULL;
1891
1892
  /* Initialize */
1893
164
  match = NULL;
1894
164
  start_offset = 0;
1895
164
  last_end_offset = 0;
1896
164
  result_len = 0;
1897
164
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1898
1899
164
  old_mdata_used = mdata_used;
1900
164
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1901
128
    mdata_used = 1;
1902
128
    match_data = mdata;
1903
128
  } else {
1904
36
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1905
36
    if (!match_data) {
1906
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1907
0
      mdata_used = old_mdata_used;
1908
0
      return NULL;
1909
0
    }
1910
36
  }
1911
1912
164
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1913
1914
  /* Array of subpattern offsets */
1915
164
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1916
1917
  /* Execute the regular expression. */
1918
#ifdef HAVE_PCRE_JIT_SUPPORT
1919
  if ((pce->preg_options & PREG_JIT) && options) {
1920
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1921
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1922
  } else
1923
#endif
1924
164
  count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1925
164
      options, match_data, mctx);
1926
1927
516
  while (1) {
1928
514
    piece = ZSTR_VAL(subject_str) + last_end_offset;
1929
1930
514
    if (count >= 0 && limit) {
1931
      /* Check for too many substrings condition. */
1932
377
      if (UNEXPECTED(count == 0)) {
1933
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1934
0
        count = num_subpats;
1935
0
      }
1936
1937
377
matched:
1938
377
      if (UNEXPECTED(offsets[1] < offsets[0])) {
1939
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1940
0
        if (result) {
1941
0
          zend_string_release_ex(result, 0);
1942
0
          result = NULL;
1943
0
        }
1944
0
        break;
1945
0
      }
1946
1947
377
      if (replace_count) {
1948
377
        ++*replace_count;
1949
377
      }
1950
1951
      /* Set the match location in subject */
1952
377
      match = ZSTR_VAL(subject_str) + offsets[0];
1953
1954
      /* Length of needed storage */
1955
377
      size_t new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1956
1957
      /* Use custom function to get replacement string and its length. */
1958
377
      zend_string *eval_result = preg_do_repl_func(
1959
377
        fci, fcc, ZSTR_VAL(subject_str), offsets, subpat_names, num_subpats, count,
1960
377
        pcre2_get_mark(match_data), flags);
1961
1962
377
      if (UNEXPECTED(eval_result == NULL)) {
1963
17
        goto error;
1964
17
      }
1965
360
      new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1966
360
      if (new_len >= alloc_len) {
1967
210
        alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1968
210
        if (result == NULL) {
1969
110
          result = zend_string_alloc(alloc_len, 0);
1970
110
        } else {
1971
100
          result = zend_string_extend(result, alloc_len, 0);
1972
100
        }
1973
210
      }
1974
1975
360
      if (match-piece > 0) {
1976
        /* copy the part of the string before the match */
1977
348
        memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1978
348
        result_len += (match-piece);
1979
348
      }
1980
1981
      /* If using custom function, copy result to the buffer and clean up. */
1982
360
      memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1983
360
      result_len += ZSTR_LEN(eval_result);
1984
360
      zend_string_release_ex(eval_result, 0);
1985
1986
360
      limit--;
1987
1988
      /* Advance to the next piece. */
1989
360
      start_offset = last_end_offset = offsets[1];
1990
1991
      /* If we have matched an empty string, mimic what Perl's /g options does.
1992
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1993
         the match again at the same point. If this fails (picked up above) we
1994
         advance to the next character. */
1995
360
      if (start_offset == offsets[0]) {
1996
156
        count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1997
156
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1998
1999
156
        piece = ZSTR_VAL(subject_str) + start_offset;
2000
156
        if (count >= 0 && limit) {
2001
0
          goto matched;
2002
156
        } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2003
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2004
             this is not necessarily the end. We need to advance
2005
             the start offset, and continue. Fudge the offset values
2006
             to achieve this, unless we're already at the end of the string. */
2007
156
          if (start_offset < ZSTR_LEN(subject_str)) {
2008
148
            size_t unit_len = calculate_unit_length(pce, piece);
2009
148
            start_offset += unit_len;
2010
148
          } else {
2011
8
            goto not_matched;
2012
8
          }
2013
156
        } else {
2014
0
          goto error;
2015
0
        }
2016
156
      }
2017
2018
360
    } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2019
145
not_matched:
2020
145
      if (result == NULL) {
2021
37
        result = zend_string_copy(subject_str);
2022
37
        break;
2023
37
      }
2024
      /* now we know exactly how long it is */
2025
108
      size_t segment_len = ZSTR_LEN(subject_str) - last_end_offset;
2026
108
      alloc_len = result_len + segment_len;
2027
108
      result = zend_string_realloc(result, alloc_len, 0);
2028
      /* stick that last bit of string on our output */
2029
108
      memcpy(ZSTR_VAL(result) + result_len, piece, segment_len);
2030
108
      result_len += segment_len;
2031
108
      ZSTR_VAL(result)[result_len] = '\0';
2032
108
      ZSTR_LEN(result) = result_len;
2033
108
      break;
2034
145
    } else {
2035
17
error:
2036
17
      pcre_handle_exec_error(count);
2037
17
      if (result) {
2038
0
        zend_string_release_ex(result, 0);
2039
0
        result = NULL;
2040
0
      }
2041
17
      break;
2042
0
    }
2043
#ifdef HAVE_PCRE_JIT_SUPPORT
2044
    if ((pce->preg_options & PREG_JIT)) {
2045
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2046
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2047
    } else
2048
#endif
2049
352
    count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2050
352
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2051
352
  }
2052
164
  if (match_data != mdata) {
2053
35
    pcre2_match_data_free(match_data);
2054
35
  }
2055
164
  mdata_used = old_mdata_used;
2056
2057
164
  return result;
2058
164
}
2059
2060
static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2061
                zend_string *subject_str,
2062
                zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2063
                size_t limit, size_t *replace_count, zend_long flags)
2064
175
{
2065
175
  pcre_cache_entry  *pce;         /* Compiled regular expression */
2066
175
  zend_string     *result;      /* Function result */
2067
2068
  /* Compile regex or get it from cache. */
2069
175
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2070
11
    return NULL;
2071
11
  }
2072
164
  pce->refcount++;
2073
164
  result = php_pcre_replace_func_impl(pce, subject_str, fci, fcc, limit, replace_count, flags);
2074
164
  pce->refcount--;
2075
2076
164
  return result;
2077
175
}
2078
2079
/* {{{ php_pcre_replace_array */
2080
static zend_string *php_pcre_replace_array(HashTable *regex,
2081
  zend_string *replace_str, HashTable *replace_ht,
2082
  zend_string *subject_str, size_t limit, size_t *replace_count)
2083
0
{
2084
0
  zval    *regex_entry;
2085
0
  zend_string *result;
2086
2087
0
  zend_string_addref(subject_str);
2088
2089
0
  if (replace_ht) {
2090
0
    uint32_t replace_idx = 0;
2091
2092
    /* For each entry in the regex array, get the entry */
2093
0
    ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2094
      /* Make sure we're dealing with strings. */
2095
0
      zend_string *tmp_regex_str;
2096
0
      zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2097
0
      zend_string *replace_entry_str, *tmp_replace_entry_str;
2098
0
      zval *zv;
2099
2100
      /* Get current entry */
2101
0
      while (1) {
2102
0
        if (replace_idx == replace_ht->nNumUsed) {
2103
0
          replace_entry_str = ZSTR_EMPTY_ALLOC();
2104
0
          tmp_replace_entry_str = NULL;
2105
0
          break;
2106
0
        }
2107
0
        zv = ZEND_HASH_ELEMENT(replace_ht, replace_idx);
2108
0
        replace_idx++;
2109
0
        if (Z_TYPE_P(zv) != IS_UNDEF) {
2110
0
          replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
2111
0
          break;
2112
0
        }
2113
0
      }
2114
2115
      /* Do the actual replacement and put the result back into subject_str
2116
         for further replacements. */
2117
0
      result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2118
0
        ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2119
0
      zend_tmp_string_release(tmp_replace_entry_str);
2120
0
      zend_tmp_string_release(tmp_regex_str);
2121
0
      zend_string_release_ex(subject_str, 0);
2122
0
      subject_str = result;
2123
0
      if (UNEXPECTED(result == NULL)) {
2124
0
        break;
2125
0
      }
2126
0
    } ZEND_HASH_FOREACH_END();
2127
2128
0
  } else {
2129
0
    ZEND_ASSERT(replace_str != NULL);
2130
2131
    /* For each entry in the regex array, get the entry */
2132
0
    ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2133
      /* Make sure we're dealing with strings. */
2134
0
      zend_string *tmp_regex_str;
2135
0
      zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2136
2137
      /* Do the actual replacement and put the result back into subject_str
2138
         for further replacements. */
2139
0
      result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2140
0
        ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2141
0
      zend_tmp_string_release(tmp_regex_str);
2142
0
      zend_string_release_ex(subject_str, 0);
2143
0
      subject_str = result;
2144
2145
0
      if (UNEXPECTED(result == NULL)) {
2146
0
        break;
2147
0
      }
2148
0
    } ZEND_HASH_FOREACH_END();
2149
0
  }
2150
2151
0
  return subject_str;
2152
0
}
2153
/* }}} */
2154
2155
/* {{{ php_replace_in_subject */
2156
static zend_always_inline zend_string *php_replace_in_subject(
2157
  zend_string *regex_str, HashTable *regex_ht,
2158
  zend_string *replace_str, HashTable *replace_ht,
2159
  zend_string *subject, size_t limit, size_t *replace_count)
2160
403
{
2161
403
  zend_string *result;
2162
2163
403
  if (regex_str) {
2164
403
    ZEND_ASSERT(replace_str != NULL);
2165
403
    result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2166
403
      replace_str, limit, replace_count);
2167
403
  } else {
2168
0
    ZEND_ASSERT(regex_ht != NULL);
2169
0
    result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2170
0
      limit, replace_count);
2171
0
  }
2172
403
  return result;
2173
403
}
2174
/* }}} */
2175
2176
static zend_string *php_replace_in_subject_func(zend_string *regex_str, const HashTable *regex_ht,
2177
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2178
  zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
2179
175
{
2180
175
  zend_string *result;
2181
2182
175
  if (regex_str) {
2183
175
    result = php_pcre_replace_func(regex_str, subject, fci, fcc, limit, replace_count, flags);
2184
175
    return result;
2185
175
  } else {
2186
    /* If regex is an array */
2187
0
    zval    *regex_entry;
2188
2189
0
    ZEND_ASSERT(regex_ht != NULL);
2190
2191
0
    zend_string_addref(subject);
2192
2193
    /* For each entry in the regex array, get the entry */
2194
0
    ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
2195
      /* Make sure we're dealing with strings. */
2196
0
      zend_string *tmp_regex_entry_str;
2197
0
      zend_string *regex_entry_str = zval_try_get_tmp_string(regex_entry, &tmp_regex_entry_str);
2198
0
      if (UNEXPECTED(regex_entry_str == NULL)) {
2199
0
        break;
2200
0
      }
2201
2202
      /* Do the actual replacement and put the result back into subject
2203
         for further replacements. */
2204
0
      result = php_pcre_replace_func(
2205
0
        regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
2206
0
      zend_tmp_string_release(tmp_regex_entry_str);
2207
0
      zend_string_release(subject);
2208
0
      subject = result;
2209
0
      if (UNEXPECTED(result == NULL)) {
2210
0
        break;
2211
0
      }
2212
0
    } ZEND_HASH_FOREACH_END();
2213
2214
0
    return subject;
2215
0
  }
2216
175
}
2217
2218
static size_t php_preg_replace_func_impl(zval *return_value,
2219
  zend_string *regex_str, const HashTable *regex_ht,
2220
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2221
  zend_string *subject_str, const HashTable *subject_ht, zend_long limit_val, zend_long flags)
2222
175
{
2223
175
  zend_string *result;
2224
175
  size_t replace_count = 0;
2225
2226
175
  if (subject_str) {
2227
175
    result = php_replace_in_subject_func(
2228
175
      regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
2229
175
    if (result != NULL) {
2230
145
      RETVAL_STR(result);
2231
145
    } else {
2232
30
      RETVAL_NULL();
2233
30
    }
2234
175
  } else {
2235
    /* if subject is an array */
2236
0
    zval    *subject_entry, zv;
2237
0
    zend_string *string_key;
2238
0
    zend_ulong   num_key;
2239
2240
0
    ZEND_ASSERT(subject_ht != NULL);
2241
2242
0
    array_init_size(return_value, zend_hash_num_elements(subject_ht));
2243
0
    HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2244
2245
    /* For each subject entry, convert it to string, then perform replacement
2246
       and add the result to the return_value array. */
2247
0
    ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2248
0
      zend_string *tmp_subject_entry_str;
2249
0
      zend_string *subject_entry_str = zval_try_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2250
0
      if (UNEXPECTED(subject_entry_str == NULL)) {
2251
0
        break;
2252
0
      }
2253
2254
0
      result = php_replace_in_subject_func(
2255
0
        regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
2256
0
      if (result != NULL) {
2257
        /* Add to return array */
2258
0
        ZVAL_STR(&zv, result);
2259
0
        if (string_key) {
2260
0
          zend_hash_add_new(return_value_ht, string_key, &zv);
2261
0
        } else {
2262
0
          zend_hash_index_add_new(return_value_ht, num_key, &zv);
2263
0
        }
2264
0
      }
2265
0
      zend_tmp_string_release(tmp_subject_entry_str);
2266
0
    } ZEND_HASH_FOREACH_END();
2267
0
  }
2268
2269
175
  return replace_count;
2270
175
}
2271
2272
static void _preg_replace_common(
2273
  zval *return_value,
2274
  HashTable *regex_ht, zend_string *regex_str,
2275
  HashTable *replace_ht, zend_string *replace_str,
2276
  HashTable *subject_ht, zend_string *subject_str,
2277
  zend_long limit,
2278
  zval *zcount,
2279
  bool is_filter
2280
403
) {
2281
403
  size_t replace_count = 0;
2282
403
  zend_string *result;
2283
403
  size_t old_replace_count;
2284
2285
  /* If replace is an array then the regex argument needs to also be an array */
2286
403
  if (replace_ht && !regex_ht) {
2287
0
    zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
2288
0
    RETURN_THROWS();
2289
0
  }
2290
2291
403
  if (subject_str) {
2292
403
    old_replace_count = replace_count;
2293
403
    result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2294
403
      subject_str, limit, &replace_count);
2295
403
    if (result != NULL) {
2296
195
      if (!is_filter || replace_count > old_replace_count) {
2297
195
        RETVAL_STR(result);
2298
195
      } else {
2299
0
        zend_string_release_ex(result, 0);
2300
0
        RETVAL_NULL();
2301
0
      }
2302
208
    } else {
2303
208
      RETVAL_NULL();
2304
208
    }
2305
403
  } else {
2306
    /* if subject is an array */
2307
0
    zval    *subject_entry, zv;
2308
0
    zend_string *string_key;
2309
0
    zend_ulong   num_key;
2310
2311
0
    ZEND_ASSERT(subject_ht != NULL);
2312
2313
0
    array_init_size(return_value, zend_hash_num_elements(subject_ht));
2314
0
    HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2315
2316
    /* For each subject entry, convert it to string, then perform replacement
2317
       and add the result to the return_value array. */
2318
0
    ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2319
0
      old_replace_count = replace_count;
2320
0
      zend_string *tmp_subject_entry_str;
2321
0
      zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2322
0
      result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2323
0
        subject_entry_str, limit, &replace_count);
2324
2325
0
      if (result != NULL) {
2326
0
        if (!is_filter || replace_count > old_replace_count) {
2327
          /* Add to return array */
2328
0
          ZVAL_STR(&zv, result);
2329
0
          if (string_key) {
2330
0
            zend_hash_add_new(return_value_ht, string_key, &zv);
2331
0
          } else {
2332
0
            zend_hash_index_add_new(return_value_ht, num_key, &zv);
2333
0
          }
2334
0
        } else {
2335
0
          zend_string_release_ex(result, 0);
2336
0
        }
2337
0
      }
2338
0
      zend_tmp_string_release(tmp_subject_entry_str);
2339
0
    } ZEND_HASH_FOREACH_END();
2340
0
  }
2341
2342
403
  if (zcount) {
2343
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2344
0
  }
2345
403
}
2346
2347
/* {{{ preg_replace_common */
2348
static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
2349
406
{
2350
406
  zend_string *regex_str, *replace_str, *subject_str;
2351
406
  HashTable *regex_ht, *replace_ht, *subject_ht;
2352
406
  zend_long limit = -1;
2353
406
  zval *zcount = NULL;
2354
2355
  /* Get function parameters and do error-checking. */
2356
1.21k
  ZEND_PARSE_PARAMETERS_START(3, 5)
2357
2.02k
    Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2358
2.02k
    Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
2359
2.02k
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2360
2.02k
    Z_PARAM_OPTIONAL
2361
2.02k
    Z_PARAM_LONG(limit)
2362
978
    Z_PARAM_ZVAL(zcount)
2363
978
  ZEND_PARSE_PARAMETERS_END();
2364
2365
403
  _preg_replace_common(
2366
403
    return_value,
2367
403
    regex_ht, regex_str,
2368
403
    replace_ht, replace_str,
2369
403
    subject_ht, subject_str,
2370
403
    limit, zcount, is_filter);
2371
403
}
2372
/* }}} */
2373
2374
/* {{{ Perform Perl-style regular expression replacement. */
2375
PHP_FUNCTION(preg_replace)
2376
406
{
2377
406
  preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
2378
406
}
2379
/* }}} */
2380
2381
ZEND_FRAMELESS_FUNCTION(preg_replace, 3)
2382
0
{
2383
0
  zend_string *regex_str, *replace_str, *subject_str;
2384
0
  HashTable *regex_ht, *replace_ht, *subject_ht;
2385
0
  zval regex_tmp, replace_tmp, subject_tmp;
2386
2387
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(1, regex_ht, regex_str, regex_tmp);
2388
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(2, replace_ht, replace_str, replace_tmp);
2389
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(3, subject_ht, subject_str, subject_tmp);
2390
2391
0
  _preg_replace_common(
2392
0
    return_value,
2393
0
    regex_ht, regex_str,
2394
0
    replace_ht, replace_str,
2395
0
    subject_ht, subject_str,
2396
0
    /* limit */ -1, /* zcount */ NULL, /* is_filter */ false);
2397
2398
0
flf_clean:;
2399
0
  Z_FLF_PARAM_FREE_STR(1, regex_tmp);
2400
0
  Z_FLF_PARAM_FREE_STR(2, replace_tmp);
2401
0
  Z_FLF_PARAM_FREE_STR(3, subject_tmp);
2402
0
}
2403
2404
/* {{{ Perform Perl-style regular expression replacement using replacement callback. */
2405
PHP_FUNCTION(preg_replace_callback)
2406
178
{
2407
178
  zval *zcount = NULL;
2408
178
  zend_string *regex_str;
2409
178
  HashTable *regex_ht;
2410
178
  zend_string *subject_str;
2411
178
  HashTable *subject_ht;
2412
178
  zend_long limit = -1, flags = 0;
2413
178
  size_t replace_count;
2414
178
  zend_fcall_info fci = empty_fcall_info;
2415
178
  zend_fcall_info_cache fcc = empty_fcall_info_cache;
2416
2417
  /* Get function parameters and do error-checking. */
2418
533
  ZEND_PARSE_PARAMETERS_START(3, 6)
2419
885
    Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2420
885
    Z_PARAM_FUNC(fci, fcc)
2421
1.05k
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2422
1.05k
    Z_PARAM_OPTIONAL
2423
1.05k
    Z_PARAM_LONG(limit)
2424
0
    Z_PARAM_ZVAL(zcount)
2425
0
    Z_PARAM_LONG(flags)
2426
178
  ZEND_PARSE_PARAMETERS_END();
2427
2428
175
  replace_count = php_preg_replace_func_impl(return_value, regex_str, regex_ht,
2429
175
    &fci, &fcc,
2430
175
    subject_str, subject_ht, limit, flags);
2431
175
  if (zcount) {
2432
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2433
0
  }
2434
175
}
2435
/* }}} */
2436
2437
/* {{{ Perform Perl-style regular expression replacement using replacement callback. */
2438
PHP_FUNCTION(preg_replace_callback_array)
2439
0
{
2440
0
  zval *replace, *zcount = NULL;
2441
0
  HashTable *pattern, *subject_ht;
2442
0
  zend_string *subject_str, *str_idx_regex;
2443
0
  zend_long limit = -1, flags = 0;
2444
0
  size_t replace_count = 0;
2445
2446
  /* Get function parameters and do error-checking. */
2447
0
  ZEND_PARSE_PARAMETERS_START(2, 5)
2448
0
    Z_PARAM_ARRAY_HT(pattern)
2449
0
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2450
0
    Z_PARAM_OPTIONAL
2451
0
    Z_PARAM_LONG(limit)
2452
0
    Z_PARAM_ZVAL(zcount)
2453
0
    Z_PARAM_LONG(flags)
2454
0
  ZEND_PARSE_PARAMETERS_END();
2455
2456
0
  if (subject_ht) {
2457
0
    GC_TRY_ADDREF(subject_ht);
2458
0
  } else {
2459
0
    GC_TRY_ADDREF(subject_str);
2460
0
  }
2461
2462
0
  ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
2463
0
    if (!str_idx_regex) {
2464
0
      zend_argument_type_error(1, "must contain only string patterns as keys");
2465
0
      goto error;
2466
0
    }
2467
2468
0
    zend_fcall_info_cache fcc = empty_fcall_info_cache;
2469
0
    zend_fcall_info fci = empty_fcall_info;
2470
0
    fci.size = sizeof(zend_fcall_info);
2471
    /* Copy potential trampoline */
2472
0
    ZVAL_COPY_VALUE(&fci.function_name, replace);
2473
2474
0
    if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2475
0
      zend_argument_type_error(1, "must contain only valid callbacks");
2476
0
      goto error;
2477
0
    }
2478
2479
0
    zval retval;
2480
0
    replace_count += php_preg_replace_func_impl(&retval, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
2481
0
      subject_str, subject_ht, limit, flags);
2482
0
    zend_release_fcall_info_cache(&fcc);
2483
2484
0
    switch (Z_TYPE(retval)) {
2485
0
      case IS_ARRAY:
2486
0
        ZEND_ASSERT(subject_ht);
2487
0
        zend_array_release(subject_ht);
2488
0
        subject_ht = Z_ARR(retval);
2489
0
        break;
2490
0
      case IS_STRING:
2491
0
        ZEND_ASSERT(subject_str);
2492
0
        zend_string_release(subject_str);
2493
0
        subject_str = Z_STR(retval);
2494
0
        break;
2495
0
      case IS_NULL:
2496
0
        RETVAL_NULL();
2497
0
        goto error;
2498
0
      EMPTY_SWITCH_DEFAULT_CASE()
2499
0
    }
2500
2501
0
    if (EG(exception)) {
2502
0
      goto error;
2503
0
    }
2504
0
  } ZEND_HASH_FOREACH_END();
2505
2506
0
  if (zcount) {
2507
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2508
0
  }
2509
2510
0
  if (subject_ht) {
2511
0
    RETVAL_ARR(subject_ht);
2512
    // Unset the type_flags of immutable arrays to prevent the VM from performing refcounting
2513
0
    if (GC_FLAGS(subject_ht) & IS_ARRAY_IMMUTABLE) {
2514
0
      Z_TYPE_FLAGS_P(return_value) = 0;
2515
0
    }
2516
0
    return;
2517
0
  } else {
2518
0
    RETURN_STR(subject_str);
2519
0
  }
2520
2521
0
error:
2522
0
  if (subject_ht) {
2523
0
    zend_array_release(subject_ht);
2524
0
  } else {
2525
0
    zend_string_release(subject_str);
2526
0
  }
2527
0
}
2528
/* }}} */
2529
2530
/* {{{ Perform Perl-style regular expression replacement and only return matches. */
2531
PHP_FUNCTION(preg_filter)
2532
0
{
2533
0
  preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
2534
0
}
2535
/* }}} */
2536
2537
/* {{{ Split string into an array using a perl-style regular expression as a delimiter */
2538
PHP_FUNCTION(preg_split)
2539
0
{
2540
0
  zend_string     *regex;     /* Regular expression */
2541
0
  zend_string     *subject;   /* String to match against */
2542
0
  zend_long      limit_val = -1;/* Integer value of limit */
2543
0
  zend_long      flags = 0;   /* Match control flags */
2544
0
  pcre_cache_entry  *pce;     /* Compiled regular expression */
2545
2546
  /* Get function parameters and do error checking */
2547
0
  ZEND_PARSE_PARAMETERS_START(2, 4)
2548
0
    Z_PARAM_STR(regex)
2549
0
    Z_PARAM_STR(subject)
2550
0
    Z_PARAM_OPTIONAL
2551
0
    Z_PARAM_LONG(limit_val)
2552
0
    Z_PARAM_LONG(flags)
2553
0
  ZEND_PARSE_PARAMETERS_END();
2554
2555
  /* Compile regex or get it from cache. */
2556
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2557
0
    RETURN_FALSE;
2558
0
  }
2559
2560
0
  pce->refcount++;
2561
0
  php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2562
0
  pce->refcount--;
2563
0
}
2564
/* }}} */
2565
2566
/* {{{ php_pcre_split */
2567
PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2568
  zend_long limit_val, zend_long flags)
2569
0
{
2570
0
  uint32_t     options;     /* Execution options */
2571
0
  int        count;       /* Count of matched subpatterns */
2572
0
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
2573
0
  PCRE2_SIZE     last_match_offset; /* Location of last match */
2574
0
  uint32_t     no_empty;      /* If NO_EMPTY flag is set */
2575
0
  uint32_t     delim_capture;   /* If delimiters should be captured */
2576
0
  uint32_t     offset_capture;  /* If offsets should be captured */
2577
0
  uint32_t     num_subpats;   /* Number of captured subpatterns */
2578
0
  zval       tmp;
2579
0
  pcre2_match_data *match_data;
2580
0
  bool old_mdata_used;
2581
0
  char *subject = ZSTR_VAL(subject_str);
2582
2583
0
  no_empty = flags & PREG_SPLIT_NO_EMPTY;
2584
0
  delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2585
0
  offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2586
2587
  /* Initialize return value */
2588
0
  array_init(return_value);
2589
0
  HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2590
2591
  /* Calculate the size of the offsets array, and allocate memory for it. */
2592
0
  num_subpats = pce->capture_count + 1;
2593
2594
  /* Start at the beginning of the string */
2595
0
  start_offset = 0;
2596
0
  last_match_offset = 0;
2597
0
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2598
2599
0
  if (limit_val == -1) {
2600
    /* pass */
2601
0
  } else if (limit_val == 0) {
2602
0
    limit_val = -1;
2603
0
  } else if (limit_val <= 1) {
2604
0
    goto last;
2605
0
  }
2606
2607
0
  old_mdata_used = mdata_used;
2608
0
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2609
0
    mdata_used = true;
2610
0
    match_data = mdata;
2611
0
  } else {
2612
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2613
0
    if (!match_data) {
2614
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2615
0
      zval_ptr_dtor(return_value);
2616
0
      RETURN_FALSE;
2617
0
    }
2618
0
  }
2619
2620
0
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2621
2622
  /* Array of subpattern offsets */
2623
0
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
2624
2625
#ifdef HAVE_PCRE_JIT_SUPPORT
2626
  if ((pce->preg_options & PREG_JIT) && options) {
2627
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2628
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2629
  } else
2630
#endif
2631
0
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2632
0
      options, match_data, mctx);
2633
2634
0
  while (1) {
2635
    /* If something matched */
2636
0
    if (count >= 0) {
2637
      /* Check for too many substrings condition. */
2638
0
      if (UNEXPECTED(count == 0)) {
2639
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2640
0
        count = num_subpats;
2641
0
      }
2642
2643
0
matched:
2644
0
      if (UNEXPECTED(offsets[1] < offsets[0])) {
2645
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2646
0
        break;
2647
0
      }
2648
2649
0
      if (!no_empty || offsets[0] != last_match_offset) {
2650
0
        if (offset_capture) {
2651
          /* Add (match, offset) pair to the return value */
2652
0
          add_offset_pair(
2653
0
            return_value_ht, subject, last_match_offset, offsets[0],
2654
0
            NULL, 0);
2655
0
        } else {
2656
          /* Add the piece to the return value */
2657
0
          populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2658
0
          zend_hash_next_index_insert_new(return_value_ht, &tmp);
2659
0
        }
2660
2661
        /* One less left to do */
2662
0
        if (limit_val != -1)
2663
0
          limit_val--;
2664
0
      }
2665
2666
0
      if (delim_capture) {
2667
0
        size_t i;
2668
0
        for (i = 1; i < count; i++) {
2669
          /* If we have matched a delimiter */
2670
0
          if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2671
0
            if (offset_capture) {
2672
0
              add_offset_pair(
2673
0
                return_value_ht, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2674
0
            } else {
2675
0
              populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2676
0
              zend_hash_next_index_insert_new(return_value_ht, &tmp);
2677
0
            }
2678
0
          }
2679
0
        }
2680
0
      }
2681
2682
      /* Advance to the position right after the last full match */
2683
0
      start_offset = last_match_offset = offsets[1];
2684
2685
      /* If we have matched an empty string, mimic what Perl's /g options does.
2686
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2687
         the match again at the same point. If this fails (picked up above) we
2688
         advance to the next character. */
2689
0
      if (start_offset == offsets[0]) {
2690
        /* Get next piece if no limit or limit not yet reached and something matched*/
2691
0
        if (limit_val != -1 && limit_val <= 1) {
2692
0
          break;
2693
0
        }
2694
0
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2695
0
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2696
0
        if (count >= 0) {
2697
0
          goto matched;
2698
0
        } else if (count == PCRE2_ERROR_NOMATCH) {
2699
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2700
             this is not necessarily the end. We need to advance
2701
             the start offset, and continue. Fudge the offset values
2702
             to achieve this, unless we're already at the end of the string. */
2703
0
          if (start_offset < ZSTR_LEN(subject_str)) {
2704
0
            start_offset += calculate_unit_length(pce, subject + start_offset);
2705
0
          } else {
2706
0
            break;
2707
0
          }
2708
0
        } else {
2709
0
          goto error;
2710
0
        }
2711
0
      }
2712
2713
0
    } else if (count == PCRE2_ERROR_NOMATCH) {
2714
0
      break;
2715
0
    } else {
2716
0
error:
2717
0
      pcre_handle_exec_error(count);
2718
0
      break;
2719
0
    }
2720
2721
    /* Get next piece if no limit or limit not yet reached and something matched*/
2722
0
    if (limit_val != -1 && limit_val <= 1) {
2723
0
      break;
2724
0
    }
2725
2726
#ifdef HAVE_PCRE_JIT_SUPPORT
2727
    if (pce->preg_options & PREG_JIT) {
2728
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2729
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2730
    } else
2731
#endif
2732
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2733
0
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2734
0
  }
2735
0
  if (match_data != mdata) {
2736
0
    pcre2_match_data_free(match_data);
2737
0
  }
2738
0
  mdata_used = old_mdata_used;
2739
2740
0
  if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2741
0
    zval_ptr_dtor(return_value);
2742
0
    RETURN_FALSE;
2743
0
  }
2744
2745
0
last:
2746
0
  start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2747
2748
0
  if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2749
0
    if (offset_capture) {
2750
      /* Add the last (match, offset) pair to the return value */
2751
0
      add_offset_pair(return_value_ht, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2752
0
    } else {
2753
      /* Add the last piece to the return value */
2754
0
      if (start_offset == 0) {
2755
0
        ZVAL_STR_COPY(&tmp, subject_str);
2756
0
      } else {
2757
0
        populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2758
0
      }
2759
0
      zend_hash_next_index_insert_new(return_value_ht, &tmp);
2760
0
    }
2761
0
  }
2762
0
}
2763
/* }}} */
2764
2765
/* {{{ Quote regular expression characters plus an optional character */
2766
PHP_FUNCTION(preg_quote)
2767
42
{
2768
42
  zend_string *str;           /* Input string argument */
2769
42
  zend_string *delim = NULL;   /* Additional delimiter argument */
2770
42
  char    *in_str;      /* Input string */
2771
42
  char    *in_str_end;      /* End of the input string */
2772
42
  zend_string *out_str;     /* Output string with quoted characters */
2773
42
  size_t       extra_len;         /* Number of additional characters */
2774
42
  char    *p,         /* Iterator for input string */
2775
42
        *q,         /* Iterator for output string */
2776
42
         delim_char = '\0', /* Delimiter character to be quoted */
2777
42
         c;         /* Current character */
2778
2779
  /* Get the arguments and check for errors */
2780
126
  ZEND_PARSE_PARAMETERS_START(1, 2)
2781
168
    Z_PARAM_STR(str)
2782
42
    Z_PARAM_OPTIONAL
2783
84
    Z_PARAM_STR_OR_NULL(delim)
2784
42
  ZEND_PARSE_PARAMETERS_END();
2785
2786
  /* Nothing to do if we got an empty string */
2787
42
  if (ZSTR_LEN(str) == 0) {
2788
0
    RETURN_EMPTY_STRING();
2789
0
  }
2790
2791
42
  in_str = ZSTR_VAL(str);
2792
42
  in_str_end = in_str + ZSTR_LEN(str);
2793
2794
42
  if (delim) {
2795
0
    delim_char = ZSTR_VAL(delim)[0];
2796
0
  }
2797
2798
  /* Go through the string and quote necessary characters */
2799
42
  extra_len = 0;
2800
42
  p = in_str;
2801
57.1k
  do {
2802
57.1k
    c = *p;
2803
57.1k
    switch(c) {
2804
733
      case '.':
2805
913
      case '\\':
2806
1.35k
      case '+':
2807
1.36k
      case '*':
2808
1.46k
      case '?':
2809
1.59k
      case '[':
2810
1.66k
      case '^':
2811
1.77k
      case ']':
2812
1.77k
      case '$':
2813
1.90k
      case '(':
2814
2.38k
      case ')':
2815
2.46k
      case '{':
2816
2.87k
      case '}':
2817
3.37k
      case '=':
2818
3.37k
      case '!':
2819
3.64k
      case '>':
2820
3.69k
      case '<':
2821
3.74k
      case '|':
2822
4.21k
      case ':':
2823
4.47k
      case '-':
2824
4.89k
      case '#':
2825
4.89k
        extra_len++;
2826
4.89k
        break;
2827
2828
2.60k
      case '\0':
2829
2.60k
        extra_len+=3;
2830
2.60k
        break;
2831
2832
49.6k
      default:
2833
49.6k
        if (c == delim_char) {
2834
0
          extra_len++;
2835
0
        }
2836
49.6k
        break;
2837
57.1k
    }
2838
57.1k
    p++;
2839
57.1k
  } while (p != in_str_end);
2840
2841
42
  if (extra_len == 0) {
2842
1
    RETURN_STR_COPY(str);
2843
1
  }
2844
2845
  /* Allocate enough memory so that even if each character
2846
     is quoted, we won't run out of room */
2847
41
  out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2848
41
  q = ZSTR_VAL(out_str);
2849
41
  p = in_str;
2850
2851
57.1k
  do {
2852
57.1k
    c = *p;
2853
57.1k
    switch(c) {
2854
733
      case '.':
2855
913
      case '\\':
2856
1.35k
      case '+':
2857
1.36k
      case '*':
2858
1.46k
      case '?':
2859
1.59k
      case '[':
2860
1.66k
      case '^':
2861
1.77k
      case ']':
2862
1.77k
      case '$':
2863
1.90k
      case '(':
2864
2.38k
      case ')':
2865
2.46k
      case '{':
2866
2.87k
      case '}':
2867
3.37k
      case '=':
2868
3.37k
      case '!':
2869
3.64k
      case '>':
2870
3.69k
      case '<':
2871
3.74k
      case '|':
2872
4.21k
      case ':':
2873
4.47k
      case '-':
2874
4.89k
      case '#':
2875
4.89k
        *q++ = '\\';
2876
4.89k
        *q++ = c;
2877
4.89k
        break;
2878
2879
2.60k
      case '\0':
2880
2.60k
        *q++ = '\\';
2881
2.60k
        *q++ = '0';
2882
2.60k
        *q++ = '0';
2883
2.60k
        *q++ = '0';
2884
2.60k
        break;
2885
2886
49.6k
      default:
2887
49.6k
        if (c == delim_char) {
2888
0
          *q++ = '\\';
2889
0
        }
2890
49.6k
        *q++ = c;
2891
49.6k
        break;
2892
57.1k
    }
2893
57.1k
    p++;
2894
57.1k
  } while (p != in_str_end);
2895
41
  *q = '\0';
2896
2897
41
  RETURN_NEW_STR(out_str);
2898
41
}
2899
/* }}} */
2900
2901
/* {{{ Searches array and returns entries which match regex */
2902
PHP_FUNCTION(preg_grep)
2903
0
{
2904
0
  zend_string     *regex;     /* Regular expression */
2905
0
  zval        *input;     /* Input array */
2906
0
  zend_long      flags = 0;   /* Match control flags */
2907
0
  pcre_cache_entry  *pce;     /* Compiled regular expression */
2908
2909
  /* Get arguments and do error checking */
2910
0
  ZEND_PARSE_PARAMETERS_START(2, 3)
2911
0
    Z_PARAM_STR(regex)
2912
0
    Z_PARAM_ARRAY(input)
2913
0
    Z_PARAM_OPTIONAL
2914
0
    Z_PARAM_LONG(flags)
2915
0
  ZEND_PARSE_PARAMETERS_END();
2916
2917
  /* Compile regex or get it from cache. */
2918
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2919
0
    RETURN_FALSE;
2920
0
  }
2921
2922
0
  pce->refcount++;
2923
0
  php_pcre_grep_impl(pce, input, return_value, flags);
2924
0
  pce->refcount--;
2925
0
}
2926
/* }}} */
2927
2928
PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2929
0
{
2930
0
  zval            *entry;             /* An entry in the input array */
2931
0
  uint32_t     num_subpats;   /* Number of captured subpatterns */
2932
0
  int        count;       /* Count of matched subpatterns */
2933
0
  uint32_t     options;     /* Execution options */
2934
0
  zend_string   *string_key;
2935
0
  zend_ulong     num_key;
2936
0
  bool     invert;      /* Whether to return non-matching
2937
                       entries */
2938
0
  bool old_mdata_used;
2939
0
  pcre2_match_data *match_data;
2940
0
  invert = flags & PREG_GREP_INVERT ? 1 : 0;
2941
2942
  /* Calculate the size of the offsets array, and allocate memory for it. */
2943
0
  num_subpats = pce->capture_count + 1;
2944
2945
  /* Initialize return array */
2946
0
  array_init(return_value);
2947
0
  HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2948
2949
0
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2950
2951
0
  old_mdata_used = mdata_used;
2952
0
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2953
0
    mdata_used = true;
2954
0
    match_data = mdata;
2955
0
  } else {
2956
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2957
0
    if (!match_data) {
2958
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2959
0
      return;
2960
0
    }
2961
0
  }
2962
2963
0
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2964
2965
  /* Go through the input array */
2966
0
  ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2967
0
    zend_string *tmp_subject_str;
2968
0
    zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2969
2970
    /* Perform the match */
2971
#ifdef HAVE_PCRE_JIT_SUPPORT
2972
    if ((pce->preg_options & PREG_JIT) && options) {
2973
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2974
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2975
    } else
2976
#endif
2977
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2978
0
        options, match_data, mctx);
2979
2980
    /* If the entry fits our requirements */
2981
0
    if (count >= 0) {
2982
      /* Check for too many substrings condition. */
2983
0
      if (UNEXPECTED(count == 0)) {
2984
0
        php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2985
0
      }
2986
0
      if (!invert) {
2987
0
        Z_TRY_ADDREF_P(entry);
2988
2989
        /* Add to return array */
2990
0
        if (string_key) {
2991
0
          zend_hash_update(return_value_ht, string_key, entry);
2992
0
        } else {
2993
0
          zend_hash_index_update(return_value_ht, num_key, entry);
2994
0
        }
2995
0
      }
2996
0
    } else if (count == PCRE2_ERROR_NOMATCH) {
2997
0
      if (invert) {
2998
0
        Z_TRY_ADDREF_P(entry);
2999
3000
        /* Add to return array */
3001
0
        if (string_key) {
3002
0
          zend_hash_update(return_value_ht, string_key, entry);
3003
0
        } else {
3004
0
          zend_hash_index_update(return_value_ht, num_key, entry);
3005
0
        }
3006
0
      }
3007
0
    } else {
3008
0
      pcre_handle_exec_error(count);
3009
0
      zend_tmp_string_release(tmp_subject_str);
3010
0
      break;
3011
0
    }
3012
3013
0
    zend_tmp_string_release(tmp_subject_str);
3014
0
  } ZEND_HASH_FOREACH_END();
3015
0
  if (match_data != mdata) {
3016
0
    pcre2_match_data_free(match_data);
3017
0
  }
3018
3019
0
  mdata_used = old_mdata_used;
3020
3021
0
  if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
3022
0
    zend_array_destroy(Z_ARR_P(return_value));
3023
0
    RETURN_FALSE;
3024
0
  }
3025
0
}
3026
/* }}} */
3027
3028
/* {{{ Returns the error code of the last regexp execution. */
3029
PHP_FUNCTION(preg_last_error)
3030
0
{
3031
0
  ZEND_PARSE_PARAMETERS_NONE();
3032
3033
0
  RETURN_LONG(PCRE_G(error_code));
3034
0
}
3035
/* }}} */
3036
3037
/* {{{ Returns the error message of the last regexp execution. */
3038
PHP_FUNCTION(preg_last_error_msg)
3039
0
{
3040
0
  ZEND_PARSE_PARAMETERS_NONE();
3041
3042
0
  RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
3043
0
}
3044
/* }}} */
3045
3046
/* {{{ module definition structures */
3047
3048
zend_module_entry pcre_module_entry = {
3049
  STANDARD_MODULE_HEADER,
3050
  "pcre",
3051
  ext_functions,
3052
  PHP_MINIT(pcre),
3053
  PHP_MSHUTDOWN(pcre),
3054
  PHP_RINIT(pcre),
3055
  PHP_RSHUTDOWN(pcre),
3056
  PHP_MINFO(pcre),
3057
  PHP_PCRE_VERSION,
3058
  PHP_MODULE_GLOBALS(pcre),
3059
  PHP_GINIT(pcre),
3060
  PHP_GSHUTDOWN(pcre),
3061
  NULL,
3062
  STANDARD_MODULE_PROPERTIES_EX
3063
};
3064
3065
#ifdef COMPILE_DL_PCRE
3066
ZEND_GET_MODULE(pcre)
3067
#endif
3068
3069
/* }}} */
3070
3071
PHPAPI pcre2_match_context *php_pcre_mctx(void)
3072
9
{/*{{{*/
3073
9
  return mctx;
3074
9
}/*}}}*/
3075
3076
PHPAPI pcre2_general_context *php_pcre_gctx(void)
3077
0
{/*{{{*/
3078
0
  return gctx;
3079
0
}/*}}}*/
3080
3081
PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3082
0
{/*{{{*/
3083
0
  return cctx;
3084
0
}/*}}}*/
3085
3086
PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3087
0
{/*{{{*/
3088
0
  assert(NULL != pce);
3089
0
  pce->refcount++;
3090
0
}/*}}}*/
3091
3092
PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3093
0
{/*{{{*/
3094
0
  assert(NULL != pce);
3095
0
  assert(0 != pce->refcount);
3096
0
  pce->refcount--;
3097
0
}/*}}}*/
3098
3099
PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3100
0
{/*{{{*/
3101
0
  assert(NULL != pce);
3102
0
  return pce->re;
3103
0
}/*}}}*/