Coverage Report

Created: 2026-06-02 06:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/pcre/php_pcre.c
Line
Count
Source
1
/*
2
   +----------------------------------------------------------------------+
3
   | Copyright © The PHP Group and Contributors.                          |
4
   +----------------------------------------------------------------------+
5
   | This source file is subject to the Modified BSD License that is      |
6
   | bundled with this package in the file LICENSE, and is available      |
7
   | through the World Wide Web at <https://www.php.net/license/>.        |
8
   |                                                                      |
9
   | SPDX-License-Identifier: BSD-3-Clause                                |
10
   +----------------------------------------------------------------------+
11
   | Author: Andrei Zmievski <andrei@php.net>                             |
12
   +----------------------------------------------------------------------+
13
 */
14
15
#include "php.h"
16
#include "php_ini.h"
17
#include "php_pcre.h"
18
#include "ext/standard/info.h"
19
#include "ext/standard/basic_functions.h"
20
#include "zend_smart_str.h"
21
#include "SAPI.h"
22
23
0
#define PREG_PATTERN_ORDER      1
24
0
#define PREG_SET_ORDER        2
25
2
#define PREG_OFFSET_CAPTURE     (1<<8)
26
2
#define PREG_UNMATCHED_AS_NULL    (1<<9)
27
28
0
#define PREG_SPLIT_NO_EMPTY     (1<<0)
29
0
#define PREG_SPLIT_DELIM_CAPTURE  (1<<1)
30
0
#define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
31
32
0
#define PREG_GREP_INVERT      (1<<0)
33
34
#define PREG_JIT                    (1<<3)
35
36
298
#define PCRE_CACHE_SIZE 4096
37
38
#ifdef HAVE_PCRE_JIT_SUPPORT
39
#define PHP_PCRE_JIT_SUPPORT 1
40
#else
41
#define PHP_PCRE_JIT_SUPPORT 0
42
#endif
43
44
char *php_pcre_version;
45
46
#include "php_pcre_arginfo.h"
47
48
struct _pcre_cache_entry {
49
  pcre2_code *re;
50
  /* Pointer is not NULL (during request) when there are named captures.
51
   * Length is equal to capture_count + 1 to account for capture group 0.
52
   * This table cache is only valid during request.
53
   * Trying to store this over multiple requests causes issues when the keys are exposed in user arrays
54
   * (see GH-17122 and GH-17132). */
55
  zend_string **subpats_table;
56
  uint32_t preg_options;
57
  uint32_t name_count;
58
  uint32_t capture_count;
59
  uint32_t compile_options;
60
  uint32_t refcount;
61
};
62
63
PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
64
65
#ifdef HAVE_PCRE_JIT_SUPPORT
66
#define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
67
#define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
68
ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
69
#endif
70
/* General context using (infallible) system allocator. */
71
ZEND_TLS pcre2_general_context *gctx = NULL;
72
/* These two are global per thread for now. Though it is possible to use these
73
  per pattern. Either one can copy it and use in pce, or one does no global
74
  contexts at all, but creates for every pce. */
75
ZEND_TLS pcre2_compile_context *cctx = NULL;
76
ZEND_TLS pcre2_match_context   *mctx = NULL;
77
ZEND_TLS pcre2_match_data      *mdata = NULL;
78
ZEND_TLS bool              mdata_used = 0;
79
ZEND_TLS uint8_t pcre2_init_ok = 0;
80
#if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
81
static MUTEX_T pcre_mt = NULL;
82
#define php_pcre_mutex_alloc() \
83
  if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
84
#define php_pcre_mutex_free() \
85
  if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
86
#define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
87
#define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
88
#else
89
#define php_pcre_mutex_alloc()
90
#define php_pcre_mutex_free()
91
#define php_pcre_mutex_lock()
92
#define php_pcre_mutex_unlock()
93
#endif
94
95
ZEND_TLS HashTable char_tables;
96
97
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats);
98
99
static void php_pcre_free_char_table(zval *data)
100
0
{/*{{{*/
101
0
  void *ptr = Z_PTR_P(data);
102
0
  pefree(ptr, 1);
103
0
}/*}}}*/
104
105
static void pcre_handle_exec_error(int pcre_code) /* {{{ */
106
75
{
107
75
  int preg_code = 0;
108
109
75
  switch (pcre_code) {
110
3
    case PCRE2_ERROR_MATCHLIMIT:
111
3
      preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
112
3
      break;
113
114
0
    case PCRE2_ERROR_RECURSIONLIMIT:
115
0
      preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
116
0
      break;
117
118
0
    case PCRE2_ERROR_BADUTFOFFSET:
119
0
      preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
120
0
      break;
121
122
#ifdef HAVE_PCRE_JIT_SUPPORT
123
    case PCRE2_ERROR_JIT_STACKLIMIT:
124
      preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
125
      break;
126
#endif
127
128
72
    default:
129
72
      if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
130
14
        preg_code = PHP_PCRE_BAD_UTF8_ERROR;
131
58
      } else  {
132
58
        preg_code = PHP_PCRE_INTERNAL_ERROR;
133
58
      }
134
72
      break;
135
75
  }
136
137
75
  PCRE_G(error_code) = preg_code;
138
75
}
139
/* }}} */
140
141
static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
142
0
{
143
0
  switch (error_code) {
144
0
    case PHP_PCRE_NO_ERROR:
145
0
      return "No error";
146
0
    case PHP_PCRE_INTERNAL_ERROR:
147
0
      return "Internal error";
148
0
    case PHP_PCRE_BAD_UTF8_ERROR:
149
0
      return "Malformed UTF-8 characters, possibly incorrectly encoded";
150
0
    case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
151
0
      return "The offset did not correspond to the beginning of a valid UTF-8 code point";
152
0
    case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
153
0
      return "Backtrack limit exhausted";
154
0
    case PHP_PCRE_RECURSION_LIMIT_ERROR:
155
0
      return "Recursion limit exhausted";
156
157
#ifdef HAVE_PCRE_JIT_SUPPORT
158
    case PHP_PCRE_JIT_STACKLIMIT_ERROR:
159
      return "JIT stack limit exhausted";
160
#endif
161
162
0
    default:
163
0
      return "Unknown error";
164
0
  }
165
0
}
166
/* }}} */
167
168
static void php_free_pcre_cache(zval *data) /* {{{ */
169
0
{
170
0
  pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
171
0
  if (!pce) return;
172
0
  if (pce->subpats_table) {
173
0
    free_subpats_table(pce->subpats_table, pce->capture_count + 1);
174
0
  }
175
0
  pcre2_code_free(pce->re);
176
0
  free(pce);
177
0
}
178
/* }}} */
179
180
static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
181
307
{
182
307
  return pemalloc(size, 1);
183
307
}
184
185
static void php_pcre_free(void *block, void *data)
186
1
{
187
1
  pefree(block, 1);
188
1
}
189
190
static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
191
33.5k
{
192
33.5k
  return emalloc(size);
193
33.5k
}
194
195
static void php_pcre_efree(void *block, void *data)
196
33.5k
{
197
33.5k
  efree(block);
198
33.5k
}
199
200
324
#define PHP_PCRE_PREALLOC_MDATA_SIZE 32
201
202
static void php_pcre_init_pcre2(uint8_t jit)
203
2
{/*{{{*/
204
2
  if (!gctx) {
205
2
    gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
206
2
    if (!gctx) {
207
0
      pcre2_init_ok = 0;
208
0
      return;
209
0
    }
210
2
  }
211
212
2
  if (!cctx) {
213
2
    cctx = pcre2_compile_context_create(gctx);
214
2
    if (!cctx) {
215
0
      pcre2_init_ok = 0;
216
0
      return;
217
0
    }
218
2
  }
219
220
2
  if (!mctx) {
221
2
    mctx = pcre2_match_context_create(gctx);
222
2
    if (!mctx) {
223
0
      pcre2_init_ok = 0;
224
0
      return;
225
0
    }
226
2
  }
227
228
#ifdef HAVE_PCRE_JIT_SUPPORT
229
  if (jit && !jit_stack) {
230
    jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
231
    if (!jit_stack) {
232
      pcre2_init_ok = 0;
233
      return;
234
    }
235
  }
236
#endif
237
238
2
  if (!mdata) {
239
2
    mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
240
2
    if (!mdata) {
241
0
      pcre2_init_ok = 0;
242
0
      return;
243
0
    }
244
2
  }
245
246
2
  pcre2_init_ok = 1;
247
2
}/*}}}*/
248
249
static void php_pcre_shutdown_pcre2(void)
250
0
{/*{{{*/
251
0
  if (gctx) {
252
0
    pcre2_general_context_free(gctx);
253
0
    gctx = NULL;
254
0
  }
255
256
0
  if (cctx) {
257
0
    pcre2_compile_context_free(cctx);
258
0
    cctx = NULL;
259
0
  }
260
261
0
  if (mctx) {
262
0
    pcre2_match_context_free(mctx);
263
0
    mctx = NULL;
264
0
  }
265
266
#ifdef HAVE_PCRE_JIT_SUPPORT
267
  /* Stack may only be destroyed when no cached patterns
268
    possibly associated with it do exist. */
269
  if (jit_stack) {
270
    pcre2_jit_stack_free(jit_stack);
271
    jit_stack = NULL;
272
  }
273
#endif
274
275
0
  if (mdata) {
276
0
    pcre2_match_data_free(mdata);
277
0
    mdata = NULL;
278
0
  }
279
280
0
  pcre2_init_ok = 0;
281
0
}/*}}}*/
282
283
static PHP_GINIT_FUNCTION(pcre) /* {{{ */
284
2
{
285
2
  php_pcre_mutex_alloc();
286
287
2
  zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
288
289
2
  pcre_globals->backtrack_limit = 0;
290
2
  pcre_globals->recursion_limit = 0;
291
2
  pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
292
2
  ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
293
2
  ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
294
#ifdef HAVE_PCRE_JIT_SUPPORT
295
  pcre_globals->jit = 1;
296
#endif
297
298
2
  php_pcre_init_pcre2(1);
299
2
  zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
300
2
}
301
/* }}} */
302
303
static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
304
0
{
305
0
  zend_hash_destroy(&pcre_globals->pcre_cache);
306
307
0
  php_pcre_shutdown_pcre2();
308
0
  zend_hash_destroy(&char_tables);
309
0
  php_pcre_mutex_free();
310
0
}
311
/* }}} */
312
313
static PHP_INI_MH(OnUpdateBacktrackLimit)
314
2
{/*{{{*/
315
2
  OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
316
2
  if (mctx) {
317
2
    pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
318
2
  }
319
320
2
  return SUCCESS;
321
2
}/*}}}*/
322
323
static PHP_INI_MH(OnUpdateRecursionLimit)
324
2
{/*{{{*/
325
2
  OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
326
2
  if (mctx) {
327
2
    pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
328
2
  }
329
330
2
  return SUCCESS;
331
2
}/*}}}*/
332
333
#ifdef HAVE_PCRE_JIT_SUPPORT
334
static PHP_INI_MH(OnUpdateJit)
335
{/*{{{*/
336
  OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
337
  if (PCRE_G(jit) && jit_stack) {
338
    pcre2_jit_stack_assign(mctx, NULL, jit_stack);
339
  } else {
340
    pcre2_jit_stack_assign(mctx, NULL, NULL);
341
  }
342
343
  return SUCCESS;
344
}/*}}}*/
345
#endif
346
347
PHP_INI_BEGIN()
348
  STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
349
  STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
350
#ifdef HAVE_PCRE_JIT_SUPPORT
351
  STD_PHP_INI_BOOLEAN("pcre.jit",           "1",       PHP_INI_ALL, OnUpdateJit,            jit,             zend_pcre_globals, pcre_globals)
352
#endif
353
PHP_INI_END()
354
355
static char *_pcre2_config_str(uint32_t what)
356
4
{/*{{{*/
357
4
  int len = pcre2_config(what, NULL);
358
4
  char *ret = (char *) malloc(len + 1);
359
360
4
  len = pcre2_config(what, ret);
361
4
  if (!len) {
362
0
    free(ret);
363
0
    return NULL;
364
0
  }
365
366
4
  return ret;
367
4
}/*}}}*/
368
369
/* {{{ PHP_MINFO_FUNCTION(pcre) */
370
static PHP_MINFO_FUNCTION(pcre)
371
1
{
372
#ifdef HAVE_PCRE_JIT_SUPPORT
373
  uint32_t flag = 0;
374
  char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
375
#endif
376
1
  char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
377
1
  char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
378
379
1
  php_info_print_table_start();
380
1
  php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
381
1
  php_info_print_table_row(2, "PCRE Library Version", version);
382
1
  free(version);
383
1
  php_info_print_table_row(2, "PCRE Unicode Version", unicode);
384
1
  free(unicode);
385
386
#ifdef HAVE_PCRE_JIT_SUPPORT
387
  if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
388
    php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
389
  } else {
390
    php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
391
  }
392
  if (jit_target) {
393
    php_info_print_table_row(2, "PCRE JIT Target", jit_target);
394
  }
395
  free(jit_target);
396
#else
397
1
  php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
398
1
#endif
399
400
#ifdef HAVE_PCRE_VALGRIND_SUPPORT
401
  php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
402
#endif
403
404
1
  php_info_print_table_end();
405
406
1
  DISPLAY_INI_ENTRIES();
407
1
}
408
/* }}} */
409
410
/* {{{ PHP_MINIT_FUNCTION(pcre) */
411
static PHP_MINIT_FUNCTION(pcre)
412
2
{
413
#ifdef HAVE_PCRE_JIT_SUPPORT
414
  if (UNEXPECTED(!pcre2_init_ok)) {
415
    /* Retry. */
416
    php_pcre_init_pcre2(PCRE_G(jit));
417
    if (!pcre2_init_ok) {
418
      return FAILURE;
419
    }
420
  }
421
#endif
422
423
2
  REGISTER_INI_ENTRIES();
424
425
2
  php_pcre_version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
426
427
2
  register_php_pcre_symbols(module_number);
428
429
2
  return SUCCESS;
430
2
}
431
/* }}} */
432
433
/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
434
static PHP_MSHUTDOWN_FUNCTION(pcre)
435
0
{
436
0
  UNREGISTER_INI_ENTRIES();
437
438
0
  free(php_pcre_version);
439
440
0
  return SUCCESS;
441
0
}
442
/* }}} */
443
444
/* {{{ PHP_RINIT_FUNCTION(pcre) */
445
static PHP_RINIT_FUNCTION(pcre)
446
33.5k
{
447
#ifdef HAVE_PCRE_JIT_SUPPORT
448
  if (UNEXPECTED(!pcre2_init_ok)) {
449
    /* Retry. */
450
    php_pcre_mutex_lock();
451
    php_pcre_init_pcre2(PCRE_G(jit));
452
    if (!pcre2_init_ok) {
453
      php_pcre_mutex_unlock();
454
      return FAILURE;
455
    }
456
    php_pcre_mutex_unlock();
457
  }
458
459
  mdata_used = 0;
460
#endif
461
462
33.5k
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
463
33.5k
  PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
464
33.5k
  if (!PCRE_G(gctx_zmm)) {
465
0
    return FAILURE;
466
0
  }
467
468
33.5k
  return SUCCESS;
469
33.5k
}
470
/* }}} */
471
472
static PHP_RSHUTDOWN_FUNCTION(pcre)
473
33.5k
{
474
33.5k
  pcre_cache_entry *pce;
475
17.1M
  ZEND_HASH_MAP_FOREACH_PTR(&PCRE_G(pcre_cache), pce) {
476
17.1M
    if (pce->subpats_table) {
477
0
      free_subpats_table(pce->subpats_table, pce->capture_count + 1);
478
0
      pce->subpats_table = NULL;
479
0
    }
480
17.1M
  } ZEND_HASH_FOREACH_END();
481
482
33.5k
  pcre2_general_context_free(PCRE_G(gctx_zmm));
483
33.5k
  PCRE_G(gctx_zmm) = NULL;
484
485
33.5k
  zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
486
33.5k
  zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
487
33.5k
  ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
488
33.5k
  ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
489
33.5k
  return SUCCESS;
490
33.5k
}
491
492
/* {{{ static pcre_clean_cache */
493
static int pcre_clean_cache(zval *data, void *arg)
494
0
{
495
0
  pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
496
0
  int *num_clean = (int *)arg;
497
498
0
  if (!pce->refcount) {
499
0
    if (--(*num_clean) == 0) {
500
0
      return ZEND_HASH_APPLY_REMOVE|ZEND_HASH_APPLY_STOP;
501
0
    }
502
0
    return ZEND_HASH_APPLY_REMOVE;
503
0
  } else {
504
0
    return ZEND_HASH_APPLY_KEEP;
505
0
  }
506
0
}
507
/* }}} */
508
509
0
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
510
0
  uint32_t i;
511
0
  for (i = 0; i < num_subpats; i++) {
512
0
    if (subpat_names[i]) {
513
0
      zend_string_release_ex(subpat_names[i], false);
514
0
    }
515
0
  }
516
0
  efree(subpat_names);
517
0
}
518
519
/* {{{ static make_subpats_table */
520
static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce)
521
0
{
522
0
  uint32_t num_subpats = pce->capture_count + 1;
523
0
  uint32_t name_size, ni = 0;
524
0
  char *name_table;
525
0
  zend_string **subpat_names;
526
0
  int rc1, rc2;
527
528
0
  rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
529
0
  rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
530
0
  if (rc1 < 0 || rc2 < 0) {
531
0
    php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
532
0
    return NULL;
533
0
  }
534
535
0
  subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
536
0
  while (ni++ < name_cnt) {
537
0
    unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
538
0
    const char *name = name_table + 2;
539
0
    subpat_names[name_idx] = zend_string_init(name, strlen(name), false);
540
0
    name_table += name_size;
541
0
  }
542
0
  return subpat_names;
543
0
}
544
/* }}} */
545
546
static zend_string **ensure_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce)
547
0
{
548
0
  if (!pce->subpats_table) {
549
0
    pce->subpats_table = make_subpats_table(name_cnt, pce);
550
0
  }
551
0
  return pce->subpats_table;
552
0
}
553
554
/* {{{ static calculate_unit_length */
555
/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
556
static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
557
125
{
558
125
  size_t unit_len;
559
560
125
  if (pce->compile_options & PCRE2_UTF) {
561
0
    const char *end = start;
562
563
    /* skip continuation bytes */
564
0
    while ((*++end & 0xC0) == 0x80);
565
0
    unit_len = end - start;
566
125
  } else {
567
125
    unit_len = 1;
568
125
  }
569
125
  return unit_len;
570
125
}
571
/* }}} */
572
573
/* {{{ pcre_get_compiled_regex_cache */
574
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bool locale_aware)
575
380
{
576
380
  pcre2_code      *re = NULL;
577
#if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !defined(HAVE_BUNDLED_PCRE)
578
  uint32_t       coptions = PCRE2_NO_START_OPTIMIZE;
579
#else
580
380
  uint32_t       coptions = 0;
581
380
#endif
582
380
  uint32_t       eoptions = 0;
583
380
  PCRE2_UCHAR           error[128];
584
380
  PCRE2_SIZE           erroffset;
585
380
  int                  errnumber;
586
380
  char         delimiter;
587
380
  char         start_delimiter;
588
380
  char         end_delimiter;
589
380
  char        *p, *pp;
590
380
  char        *pattern;
591
380
  size_t         pattern_len;
592
380
  uint32_t       poptions = 0;
593
380
  const uint8_t       *tables = NULL;
594
380
  zval                *zv;
595
380
  pcre_cache_entry   new_entry;
596
380
  int          rc;
597
380
  zend_string     *key;
598
380
  pcre_cache_entry  *ret;
599
600
380
  if (locale_aware && BG(ctype_string)) {
601
0
    key = zend_string_concat2(
602
0
      ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
603
0
      ZSTR_VAL(regex), ZSTR_LEN(regex));
604
380
  } else {
605
380
    key = regex;
606
380
  }
607
608
  /* Try to lookup the cached regex entry, and if successful, just pass
609
     back the compiled pattern, otherwise go on and compile it. */
610
380
  zv = zend_hash_find(&PCRE_G(pcre_cache), key);
611
380
  if (zv) {
612
24
    if (key != regex) {
613
0
      zend_string_release_ex(key, 0);
614
0
    }
615
24
    return (pcre_cache_entry*)Z_PTR_P(zv);
616
24
  }
617
618
356
  p = ZSTR_VAL(regex);
619
356
  const char* end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex);
620
621
  /* Parse through the leading whitespace, and display a warning if we
622
     get to the end without encountering a delimiter. */
623
356
  while (isspace((unsigned char)*p)) p++;
624
356
  if (p >= end_p) {
625
0
    if (key != regex) {
626
0
      zend_string_release_ex(key, 0);
627
0
    }
628
0
    php_error_docref(NULL, E_WARNING, "Empty regular expression");
629
0
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
630
0
    return NULL;
631
0
  }
632
633
  /* Get the delimiter and display a warning if it is alphanumeric
634
     or a backslash. */
635
356
  delimiter = *p++;
636
356
  if (isalnum((unsigned char)delimiter) || delimiter == '\\' || delimiter == '\0') {
637
1
    if (key != regex) {
638
0
      zend_string_release_ex(key, 0);
639
0
    }
640
1
    php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash, or NUL byte");
641
1
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
642
1
    return NULL;
643
1
  }
644
645
355
  start_delimiter = delimiter;
646
355
  if ((pp = strchr("([{< )]}> )]}>", delimiter)))
647
4
    delimiter = pp[5];
648
355
  end_delimiter = delimiter;
649
650
355
  pp = p;
651
652
355
  if (start_delimiter == end_delimiter) {
653
    /* We need to iterate through the pattern, searching for the ending delimiter,
654
       but skipping the backslashed delimiters.  If the ending delimiter is not
655
       found, display a warning. */
656
16.4k
    while (pp < end_p) {
657
16.4k
      if (*pp == '\\' && pp + 1 < end_p) pp++;
658
15.3k
      else if (*pp == delimiter)
659
351
        break;
660
16.1k
      pp++;
661
16.1k
    }
662
354
  } else {
663
    /* We iterate through the pattern, searching for the matching ending
664
     * delimiter. For each matching starting delimiter, we increment nesting
665
     * level, and decrement it for each matching ending delimiter. If we
666
     * reach the end of the pattern without matching, display a warning.
667
     */
668
1
    int brackets = 1;   /* brackets nesting level */
669
67
    while (pp < end_p) {
670
66
      if (*pp == '\\' && pp + 1 < end_p) pp++;
671
62
      else if (*pp == end_delimiter && --brackets <= 0)
672
0
        break;
673
62
      else if (*pp == start_delimiter)
674
5
        brackets++;
675
66
      pp++;
676
66
    }
677
1
  }
678
679
355
  if (pp >= end_p) {
680
4
    if (key != regex) {
681
0
      zend_string_release_ex(key, 0);
682
0
    }
683
4
    if (start_delimiter == end_delimiter) {
684
3
      php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
685
3
    } else {
686
1
      php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
687
1
    }
688
4
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
689
4
    return NULL;
690
4
  }
691
692
  /* Make a copy of the actual pattern. */
693
351
  pattern_len = pp - p;
694
351
  pattern = estrndup(p, pattern_len);
695
696
  /* Move on to the options */
697
351
  pp++;
698
699
  /* Parse through the options, setting appropriate flags.  Display
700
     a warning if we encounter an unknown modifier. */
701
905
  while (pp < end_p) {
702
561
    switch (*pp++) {
703
      /* Perl compatible options */
704
209
      case 'i': coptions |= PCRE2_CASELESS;   break;
705
36
      case 'm': coptions |= PCRE2_MULTILINE;   break;
706
4
      case 'n': coptions |= PCRE2_NO_AUTO_CAPTURE; break;
707
48
      case 's': coptions |= PCRE2_DOTALL;   break;
708
5
      case 'x': coptions |= PCRE2_EXTENDED;   break;
709
710
      /* PCRE specific options */
711
3
      case 'A': coptions |= PCRE2_ANCHORED;   break;
712
1
      case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
713
0
#ifdef PCRE2_EXTRA_CASELESS_RESTRICT
714
11
      case 'r': eoptions |= PCRE2_EXTRA_CASELESS_RESTRICT; break;
715
0
#endif
716
0
      case 'S': /* Pass. */         break;
717
1
      case 'X': /* Pass. */         break;
718
26
      case 'U': coptions |= PCRE2_UNGREEDY;   break;
719
172
      case 'u': coptions |= PCRE2_UTF;
720
  /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
721
     characters, even in UTF-8 mode. However, this can be changed by setting
722
     the PCRE2_UCP option. */
723
172
#ifdef PCRE2_UCP
724
172
            coptions |= PCRE2_UCP;
725
172
#endif
726
172
        break;
727
15
      case 'J': coptions |= PCRE2_DUPNAMES;   break;
728
729
6
      case ' ':
730
22
      case '\n':
731
23
      case '\r':
732
23
        break;
733
734
0
      case 'e': /* legacy eval */
735
7
      default:
736
7
        if (pp[-1]) {
737
7
          php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]);
738
7
        } else {
739
0
          php_error_docref(NULL, E_WARNING, "NUL byte is not a valid modifier");
740
0
        }
741
7
        pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
742
7
        efree(pattern);
743
7
        if (key != regex) {
744
0
          zend_string_release_ex(key, 0);
745
0
        }
746
7
        return NULL;
747
561
    }
748
561
  }
749
750
344
  if (key != regex) {
751
0
    zv = zend_hash_str_lookup(&char_tables, ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)));
752
0
    if (Z_ISNULL_P(zv)) {
753
0
      tables = pcre2_maketables(gctx);
754
0
      if (UNEXPECTED(!tables)) {
755
        /* Remove the placeholder entry created by zend_hash_str_lookup(),
756
         * set ptr to NULL first so the destructor (pefree) is safe. */
757
0
        ZVAL_PTR(zv, NULL);
758
0
        zend_hash_str_del(&char_tables, ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)));
759
0
        php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
760
0
        pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
761
0
        zend_string_release_ex(key, 0);
762
0
        efree(pattern);
763
0
        return NULL;
764
0
      }
765
0
      ZVAL_PTR(zv, (void *)tables);
766
0
    } else {
767
0
      tables = Z_PTR_P(zv);
768
0
    }
769
0
  }
770
344
  pcre2_set_character_tables(cctx, tables);
771
772
344
  pcre2_set_compile_extra_options(cctx, eoptions);
773
774
  /* Compile pattern and display a warning if compilation failed. */
775
344
  re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
776
777
344
  if (re == NULL) {
778
46
    if (key != regex) {
779
0
      zend_string_release_ex(key, 0);
780
0
    }
781
46
    pcre2_get_error_message(errnumber, error, sizeof(error));
782
46
    php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
783
46
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
784
46
    efree(pattern);
785
46
    return NULL;
786
46
  }
787
788
#ifdef HAVE_PCRE_JIT_SUPPORT
789
  if (PCRE_G(jit)) {
790
    /* Enable PCRE JIT compiler */
791
    rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
792
    if (EXPECTED(rc >= 0)) {
793
      size_t jit_size = 0;
794
      if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
795
        poptions |= PREG_JIT;
796
      }
797
    } else if (rc == PCRE2_ERROR_NOMEMORY) {
798
      php_error_docref(NULL, E_WARNING,
799
        "Allocation of JIT memory failed, PCRE JIT will be disabled. "
800
        "This is likely caused by security restrictions. "
801
        "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
802
      PCRE_G(jit) = 0;
803
    } else {
804
      pcre2_get_error_message(rc, error, sizeof(error));
805
      php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
806
      pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
807
    }
808
  }
809
#endif
810
298
  efree(pattern);
811
812
  /*
813
   * If we reached cache limit, clean out the items from the head of the list;
814
   * these are supposedly the oldest ones (but not necessarily the least used
815
   * ones).
816
   */
817
298
  if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
818
0
    int num_clean = PCRE_CACHE_SIZE / 8;
819
0
    zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
820
0
  }
821
822
  /* Store the compiled pattern and extra info in the cache. */
823
298
  new_entry.re = re;
824
298
  new_entry.preg_options = poptions;
825
298
  new_entry.compile_options = coptions;
826
298
  new_entry.refcount = 0;
827
298
  new_entry.subpats_table = NULL;
828
829
298
  if ((rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count)) < 0 ||
830
298
      (rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count)) < 0) {
831
0
    if (key != regex) {
832
0
      zend_string_release_ex(key, 0);
833
0
    }
834
0
    pcre2_code_free(new_entry.re);
835
0
    php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
836
0
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
837
0
    return NULL;
838
0
  }
839
840
  /*
841
   * Interned strings are not duplicated when stored in HashTable,
842
   * but all the interned strings created during HTTP request are removed
843
   * at end of request. However PCRE_G(pcre_cache) must be consistent
844
   * on the next request as well. So we disable usage of interned strings
845
   * as hash keys especually for this table.
846
   * See bug #63180
847
   */
848
298
  if (!(GC_FLAGS(key) & IS_STR_PERMANENT)) {
849
298
    zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
850
298
    GC_MAKE_PERSISTENT_LOCAL(str);
851
852
298
    ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
853
298
    zend_string_release(str);
854
298
  } else {
855
0
    ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
856
0
  }
857
858
298
  if (key != regex) {
859
0
    zend_string_release_ex(key, 0);
860
0
  }
861
862
298
  return ret;
863
298
}
864
/* }}} */
865
866
/* {{{ pcre_get_compiled_regex_cache */
867
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
868
380
{
869
380
  return pcre_get_compiled_regex_cache_ex(regex, true);
870
380
}
871
/* }}} */
872
873
/* {{{ pcre_get_compiled_regex */
874
PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
875
0
{
876
0
  pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
877
878
0
  if (capture_count) {
879
0
    *capture_count = pce ? pce->capture_count : 0;
880
0
  }
881
882
0
  return pce ? pce->re : NULL;
883
0
}
884
/* }}} */
885
886
/* XXX For the cases where it's only about match yes/no and no capture
887
    required, perhaps just a minimum sized data would suffice. */
888
PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
889
0
{/*{{{*/
890
891
0
  assert(NULL != re);
892
893
0
  if (EXPECTED(!mdata_used)) {
894
0
    int rc = 0;
895
896
0
    if (!capture_count) {
897
      /* As we deal with a non cached pattern, no other way to gather this info. */
898
0
      rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
899
0
    }
900
901
0
    if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
902
0
      mdata_used = 1;
903
0
      return mdata;
904
0
    }
905
0
  }
906
907
0
  return pcre2_match_data_create_from_pattern(re, gctx);
908
0
}/*}}}*/
909
910
PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
911
0
{/*{{{*/
912
0
  if (UNEXPECTED(match_data != mdata)) {
913
0
    pcre2_match_data_free(match_data);
914
0
  } else {
915
0
    mdata_used = 0;
916
0
  }
917
0
}/*}}}*/
918
919
0
static void init_unmatched_null_pair(zval *pair) {
920
0
  zval val1, val2;
921
0
  ZVAL_NULL(&val1);
922
0
  ZVAL_LONG(&val2, -1);
923
0
  ZVAL_ARR(pair, zend_new_pair(&val1, &val2));
924
0
}
925
926
0
static void init_unmatched_empty_pair(zval *pair) {
927
0
  zval val1, val2;
928
0
  ZVAL_EMPTY_STRING(&val1);
929
0
  ZVAL_LONG(&val2, -1);
930
0
  ZVAL_ARR(pair, zend_new_pair(&val1, &val2));
931
0
}
932
933
static zend_always_inline void populate_match_value_str(
934
2
    zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
935
2
  ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
936
2
}
937
938
static zend_always_inline void populate_match_value(
939
    zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
940
2
    bool unmatched_as_null) {
941
2
  if (PCRE2_UNSET == start_offset) {
942
0
    if (unmatched_as_null) {
943
0
      ZVAL_NULL(val);
944
0
    } else {
945
0
      ZVAL_EMPTY_STRING(val);
946
0
    }
947
2
  } else {
948
2
    populate_match_value_str(val, subject, start_offset, end_offset);
949
2
  }
950
2
}
951
952
static inline void add_named(
953
0
    HashTable *const subpats, zend_string *name, zval *val, bool unmatched) {
954
0
  ZEND_ASSERT(!(GC_FLAGS(name) & IS_STR_PERSISTENT));
955
956
  /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
957
   * In this case we want to preserve the one that actually has a value. */
958
0
  if (!unmatched) {
959
0
    zend_hash_update(subpats, name, val);
960
0
  } else {
961
0
    if (!zend_hash_add(subpats, name, val)) {
962
0
      return;
963
0
    }
964
0
  }
965
0
  Z_TRY_ADDREF_P(val);
966
0
}
967
968
/* {{{ add_offset_pair */
969
static inline void add_offset_pair(
970
    HashTable *const result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
971
    zend_string *name, zend_long unmatched_as_null)
972
0
{
973
0
  zval match_pair;
974
975
  /* Add (match, offset) to the return value */
976
0
  if (PCRE2_UNSET == start_offset) {
977
0
    if (unmatched_as_null) {
978
0
      do {
979
0
        if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
980
0
          if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) {
981
0
            init_unmatched_null_pair(&match_pair);
982
0
            break;
983
0
          } else {
984
0
            init_unmatched_null_pair(&PCRE_G(unmatched_null_pair));
985
0
          }
986
0
        }
987
0
        ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
988
0
      } while (0);
989
0
    } else {
990
0
      do {
991
0
        if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
992
0
          if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) {
993
0
            init_unmatched_empty_pair(&match_pair);
994
0
            break;
995
0
          } else {
996
0
            init_unmatched_empty_pair(&PCRE_G(unmatched_empty_pair));
997
0
          }
998
0
        }
999
0
        ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1000
0
      } while (0);
1001
0
    }
1002
0
  } else {
1003
0
    zval val1, val2;
1004
0
    populate_match_value_str(&val1, subject, start_offset, end_offset);
1005
0
    ZVAL_LONG(&val2, start_offset);
1006
0
    ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1007
0
  }
1008
1009
0
  if (name) {
1010
0
    add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1011
0
  }
1012
0
  zend_hash_next_index_insert_new(result, &match_pair);
1013
0
}
1014
/* }}} */
1015
1016
static void populate_subpat_array(
1017
    HashTable *subpats_ht, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1018
2
    uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1019
2
  zend_long offset_capture = flags & PREG_OFFSET_CAPTURE;
1020
2
  zend_long unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1021
2
  zval val;
1022
2
  int i;
1023
2
  if (subpat_names) {
1024
0
    if (offset_capture) {
1025
0
      for (i = 0; i < count; i++) {
1026
0
        add_offset_pair(
1027
0
          subpats_ht, subject, offsets[2*i], offsets[2*i+1],
1028
0
          subpat_names[i], unmatched_as_null);
1029
0
      }
1030
0
      if (unmatched_as_null) {
1031
0
        for (i = count; i < num_subpats; i++) {
1032
0
          add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1033
0
        }
1034
0
      }
1035
0
    } else {
1036
0
      for (i = 0; i < count; i++) {
1037
0
        populate_match_value(
1038
0
          &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1039
0
        if (subpat_names[i]) {
1040
0
          add_named(subpats_ht, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1041
0
        }
1042
0
        zend_hash_next_index_insert_new(subpats_ht, &val);
1043
0
      }
1044
0
      if (unmatched_as_null) {
1045
0
        for (i = count; i < num_subpats; i++) {
1046
0
          ZVAL_NULL(&val);
1047
0
          if (subpat_names[i]) {
1048
0
            zend_hash_add(subpats_ht, subpat_names[i], &val);
1049
0
          }
1050
0
          zend_hash_next_index_insert_new(subpats_ht, &val);
1051
0
        }
1052
0
      }
1053
0
    }
1054
2
  } else {
1055
2
    if (offset_capture) {
1056
0
      for (i = 0; i < count; i++) {
1057
0
        add_offset_pair(
1058
0
          subpats_ht, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1059
0
      }
1060
0
      if (unmatched_as_null) {
1061
0
        for (i = count; i < num_subpats; i++) {
1062
0
          add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1063
0
        }
1064
0
      }
1065
2
    } else {
1066
4
      for (i = 0; i < count; i++) {
1067
2
        populate_match_value(
1068
2
          &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1069
2
        zend_hash_next_index_insert_new(subpats_ht, &val);
1070
2
      }
1071
2
      if (unmatched_as_null) {
1072
0
        ZVAL_NULL(&val);
1073
0
        for (i = count; i < num_subpats; i++) {
1074
0
          zend_hash_next_index_insert_new(subpats_ht, &val);
1075
0
        }
1076
0
      }
1077
2
    }
1078
2
  }
1079
  /* Add MARK, if available */
1080
2
  if (mark) {
1081
0
    ZVAL_STRING(&val, (char *)mark);
1082
0
    zend_hash_str_update(subpats_ht, ZEND_STRL("MARK"), &val);
1083
0
  }
1084
2
}
1085
1086
static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, bool global) /* {{{ */
1087
353
{
1088
  /* parameters */
1089
353
  zend_string    *regex;      /* Regular expression */
1090
353
  zend_string    *subject;      /* String to match against */
1091
353
  pcre_cache_entry *pce;        /* Compiled regular expression */
1092
353
  zval       *subpats = NULL; /* Array for subpatterns */
1093
353
  zend_long     flags = 0;    /* Match control flags */
1094
353
  zend_long     start_offset = 0; /* Where the new search starts */
1095
1096
1.05k
  ZEND_PARSE_PARAMETERS_START(2, 5)
1097
1.40k
    Z_PARAM_STR(regex)
1098
1.76k
    Z_PARAM_STR(subject)
1099
352
    Z_PARAM_OPTIONAL
1100
708
    Z_PARAM_ZVAL(subpats)
1101
708
    Z_PARAM_LONG(flags)
1102
0
    Z_PARAM_LONG(start_offset)
1103
353
  ZEND_PARSE_PARAMETERS_END();
1104
1105
  /* Compile regex or get it from cache. */
1106
352
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1107
46
    RETURN_FALSE;
1108
46
  }
1109
1110
306
  if (start_offset == ZEND_LONG_MIN) {
1111
0
    zend_argument_value_error(5, "must be greater than " ZEND_LONG_FMT, ZEND_LONG_MIN);
1112
0
    RETURN_THROWS();
1113
0
  }
1114
1115
306
  pce->refcount++;
1116
306
  php_pcre_match_impl(pce, subject, return_value, subpats,
1117
306
    global, flags, start_offset);
1118
306
  pce->refcount--;
1119
306
}
1120
/* }}} */
1121
1122
static zend_always_inline bool is_known_valid_utf8(
1123
164
    zend_string *subject_str, PCRE2_SIZE start_offset) {
1124
164
  if (!ZSTR_IS_VALID_UTF8(subject_str)) {
1125
    /* We don't know whether the string is valid UTF-8 or not. */
1126
163
    return false;
1127
163
  }
1128
1129
1
  if (start_offset == ZSTR_LEN(subject_str)) {
1130
    /* Degenerate case: Offset points to end of string. */
1131
1
    return true;
1132
1
  }
1133
1134
  /* Check that the offset does not point to an UTF-8 continuation byte. */
1135
0
  return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1136
1
}
1137
1138
/* {{{ php_pcre_match_impl() */
1139
PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1140
  zval *subpats, bool global, zend_long flags, zend_off_t start_offset)
1141
306
{
1142
306
  zval       result_set;    /* Holds a set of subpatterns after
1143
                       a global match */
1144
306
  HashTable    **match_sets = NULL; /* An array of sets of matches for each
1145
                       subpattern after a global match */
1146
306
  uint32_t     options;     /* Execution options */
1147
306
  int        count;       /* Count of matched subpatterns */
1148
306
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1149
306
  int        matched;     /* Has anything matched */
1150
306
  zend_string    **subpat_names;    /* Array for named subpatterns */
1151
306
  size_t       i;
1152
306
  uint32_t     subpats_order;   /* Order of subpattern matches */
1153
306
  uint32_t     offset_capture;  /* Capture match offsets: yes/no */
1154
306
  zend_long    unmatched_as_null; /* Null non-matches: yes/no */
1155
306
  PCRE2_SPTR       mark = NULL;   /* Target for MARK name */
1156
306
  HashTable   *marks = NULL;   /* Array of marks for PREG_PATTERN_ORDER */
1157
306
  pcre2_match_data *match_data;
1158
306
  PCRE2_SIZE     start_offset2, orig_start_offset;
1159
306
  bool old_mdata_used;
1160
1161
306
  char *subject = ZSTR_VAL(subject_str);
1162
306
  size_t subject_len = ZSTR_LEN(subject_str);
1163
1164
  /* Overwrite the passed-in value for subpatterns with an empty array. */
1165
306
  if (subpats != NULL) {
1166
1
    subpats = zend_try_array_init(subpats);
1167
1
    if (!subpats) {
1168
0
      RETURN_THROWS();
1169
0
    }
1170
1
  }
1171
1172
306
  subpats_order = global ? PREG_PATTERN_ORDER : 0;
1173
1174
306
  if (flags) {
1175
0
    offset_capture = flags & PREG_OFFSET_CAPTURE;
1176
0
    unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1177
1178
    /*
1179
     * subpats_order is pre-set to pattern mode so we change it only if
1180
     * necessary.
1181
     */
1182
0
    if (flags & 0xff) {
1183
0
      subpats_order = flags & 0xff;
1184
0
      if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1185
0
        (!global && subpats_order != 0)) {
1186
0
        zend_argument_value_error(4, "must be a PREG_* constant");
1187
0
        RETURN_THROWS();
1188
0
      }
1189
0
    }
1190
306
  } else {
1191
306
    offset_capture = 0;
1192
306
    unmatched_as_null = 0;
1193
306
  }
1194
1195
  /* Negative offset counts from the end of the string. */
1196
306
  if (start_offset < 0) {
1197
0
    if ((PCRE2_SIZE)-start_offset <= subject_len) {
1198
0
      start_offset2 = subject_len + start_offset;
1199
0
    } else {
1200
0
      start_offset2 = 0;
1201
0
    }
1202
306
  } else {
1203
306
    start_offset2 = (PCRE2_SIZE)start_offset;
1204
306
  }
1205
1206
306
  if (start_offset2 > subject_len) {
1207
0
    pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1208
0
    RETURN_FALSE;
1209
0
  }
1210
1211
  /* Calculate the size of the offsets array, and allocate memory for it. */
1212
306
  num_subpats = pce->capture_count + 1;
1213
1214
  /*
1215
   * Build a mapping from subpattern numbers to their names. We will
1216
   * allocate the table only if there are any named subpatterns.
1217
   */
1218
306
  subpat_names = NULL;
1219
306
  if (subpats && pce->name_count > 0) {
1220
0
    subpat_names = ensure_subpats_table(pce->name_count, pce);
1221
0
    if (UNEXPECTED(!subpat_names)) {
1222
0
      RETURN_FALSE;
1223
0
    }
1224
0
  }
1225
1226
306
  matched = 0;
1227
306
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1228
1229
306
  old_mdata_used = mdata_used;
1230
306
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1231
306
    mdata_used = true;
1232
306
    match_data = mdata;
1233
306
  } else {
1234
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1235
0
    if (!match_data) {
1236
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1237
0
      RETURN_FALSE;
1238
0
    }
1239
0
  }
1240
1241
  /* Allocate match sets array and initialize the values. */
1242
306
  if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1243
0
    match_sets = safe_emalloc(num_subpats, sizeof(HashTable *), 0);
1244
0
    for (i=0; i<num_subpats; i++) {
1245
0
      match_sets[i] = zend_new_array(0);
1246
0
    }
1247
0
  }
1248
1249
  /* Array of subpattern offsets */
1250
306
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1251
1252
306
  orig_start_offset = start_offset2;
1253
306
  options =
1254
306
    (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1255
306
      ? 0 : PCRE2_NO_UTF_CHECK;
1256
1257
  /* Execute the regular expression. */
1258
#ifdef HAVE_PCRE_JIT_SUPPORT
1259
  if ((pce->preg_options & PREG_JIT) && options) {
1260
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1261
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1262
  } else
1263
#endif
1264
306
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1265
306
      options, match_data, mctx);
1266
1267
306
  while (1) {
1268
    /* If something has matched */
1269
306
    if (count >= 0) {
1270
      /* Check for too many substrings condition. */
1271
18
      if (UNEXPECTED(count == 0)) {
1272
0
        php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1273
0
        count = num_subpats;
1274
0
      }
1275
1276
18
matched:
1277
18
      matched++;
1278
1279
      /* If subpatterns array has been passed, fill it in with values. */
1280
18
      if (subpats != NULL) {
1281
        /* Try to get the list of substrings and display a warning if failed. */
1282
0
        if (UNEXPECTED(offsets[1] < offsets[0])) {
1283
0
          if (match_sets) {
1284
0
            for (i = 0; i < num_subpats; i++) {
1285
0
              zend_array_destroy(match_sets[i]);
1286
0
            }
1287
0
            efree(match_sets);
1288
0
          }
1289
0
          if (marks) {
1290
0
            zend_array_destroy(marks);
1291
0
          }
1292
0
          if (match_data != mdata) {
1293
0
            pcre2_match_data_free(match_data);
1294
0
          }
1295
0
          php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1296
0
          RETURN_FALSE;
1297
0
        }
1298
1299
0
        if (global) { /* global pattern matching */
1300
0
          if (subpats_order == PREG_PATTERN_ORDER) {
1301
            /* For each subpattern, insert it into the appropriate array. */
1302
0
            if (offset_capture) {
1303
0
              for (i = 0; i < count; i++) {
1304
0
                add_offset_pair(
1305
0
                  match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1306
0
                  NULL, unmatched_as_null);
1307
0
              }
1308
0
            } else {
1309
0
              for (i = 0; i < count; i++) {
1310
0
                zval val;
1311
0
                populate_match_value(
1312
0
                  &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1313
0
                zend_hash_next_index_insert_new(match_sets[i], &val);
1314
0
              }
1315
0
            }
1316
0
            mark = pcre2_get_mark(match_data);
1317
            /* Add MARK, if available */
1318
0
            if (mark) {
1319
0
              if (!marks) {
1320
0
                marks = zend_new_array(0);
1321
0
              }
1322
0
              zval tmp;
1323
0
              ZVAL_STRING(&tmp, (char *) mark);
1324
0
              zend_hash_index_add_new(marks, matched - 1, &tmp);
1325
0
            }
1326
            /*
1327
             * If the number of captured subpatterns on this run is
1328
             * less than the total possible number, pad the result
1329
             * arrays with NULLs or empty strings.
1330
             */
1331
0
            if (count < num_subpats) {
1332
0
              for (int i = count; i < num_subpats; i++) {
1333
0
                if (offset_capture) {
1334
0
                  add_offset_pair(
1335
0
                    match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1336
0
                    NULL, unmatched_as_null);
1337
0
                } else if (unmatched_as_null) {
1338
0
                  zval tmp;
1339
0
                  ZVAL_NULL(&tmp);
1340
0
                  zend_hash_next_index_insert_new(match_sets[i], &tmp);
1341
0
                } else {
1342
0
                  zval tmp;
1343
0
                  ZVAL_EMPTY_STRING(&tmp);
1344
0
                  zend_hash_next_index_insert_new(match_sets[i], &tmp);
1345
0
                }
1346
0
              }
1347
0
            }
1348
0
          } else {
1349
            /* Allocate and populate the result set array */
1350
0
            mark = pcre2_get_mark(match_data);
1351
0
            array_init_size(&result_set, count + (mark ? 1 : 0));
1352
0
            populate_subpat_array(
1353
0
              Z_ARRVAL(result_set), subject, offsets, subpat_names,
1354
0
              num_subpats, count, mark, flags);
1355
            /* And add it to the output array */
1356
0
            zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &result_set);
1357
0
          }
1358
0
        } else {     /* single pattern matching */
1359
          /* For each subpattern, insert it into the subpatterns array. */
1360
0
          mark = pcre2_get_mark(match_data);
1361
0
          populate_subpat_array(
1362
0
            Z_ARRVAL_P(subpats), subject, offsets, subpat_names, num_subpats, count, mark, flags);
1363
0
          break;
1364
0
        }
1365
0
      }
1366
1367
      /* Advance to the next piece. */
1368
18
      start_offset2 = offsets[1];
1369
1370
      /* If we have matched an empty string, mimic what Perl's /g options does.
1371
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1372
         the match again at the same point. If this fails (picked up above) we
1373
         advance to the next character. */
1374
18
      if (start_offset2 == offsets[0]) {
1375
3
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1376
3
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1377
3
        if (count >= 0) {
1378
0
          if (global) {
1379
0
            goto matched;
1380
0
          } else {
1381
0
            break;
1382
0
          }
1383
3
        } else if (count == PCRE2_ERROR_NOMATCH) {
1384
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1385
             this is not necessarily the end. We need to advance
1386
             the start offset, and continue. Fudge the offset values
1387
             to achieve this, unless we're already at the end of the string. */
1388
3
          if (start_offset2 < subject_len) {
1389
3
            size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1390
1391
3
            start_offset2 += unit_len;
1392
3
          } else {
1393
0
            break;
1394
0
          }
1395
3
        } else {
1396
0
          goto error;
1397
0
        }
1398
3
      }
1399
288
    } else if (count == PCRE2_ERROR_NOMATCH) {
1400
271
      break;
1401
271
    } else {
1402
17
error:
1403
17
      pcre_handle_exec_error(count);
1404
17
      break;
1405
17
    }
1406
1407
18
    if (!global) {
1408
18
      break;
1409
18
    }
1410
1411
    /* Execute the regular expression. */
1412
#ifdef HAVE_PCRE_JIT_SUPPORT
1413
    if ((pce->preg_options & PREG_JIT)) {
1414
      if (start_offset2 > subject_len) {
1415
        pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1416
        break;
1417
      }
1418
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1419
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1420
    } else
1421
#endif
1422
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1423
0
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1424
0
  }
1425
306
  if (match_data != mdata) {
1426
0
    pcre2_match_data_free(match_data);
1427
0
  }
1428
306
  mdata_used = old_mdata_used;
1429
1430
  /* Add the match sets to the output array and clean up */
1431
306
  if (match_sets) {
1432
0
    if (subpat_names) {
1433
0
      for (i = 0; i < num_subpats; i++) {
1434
0
        zval wrapper;
1435
0
        ZVAL_ARR(&wrapper, match_sets[i]);
1436
0
        if (subpat_names[i]) {
1437
0
          zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper);
1438
0
          GC_ADDREF(match_sets[i]);
1439
0
        }
1440
0
        zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1441
0
      }
1442
0
    } else {
1443
0
      for (i = 0; i < num_subpats; i++) {
1444
0
        zval wrapper;
1445
0
        ZVAL_ARR(&wrapper, match_sets[i]);
1446
0
        zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1447
0
      }
1448
0
    }
1449
0
    efree(match_sets);
1450
1451
0
    if (marks) {
1452
0
      zval tmp;
1453
0
      ZVAL_ARR(&tmp, marks);
1454
0
      zend_hash_str_update(Z_ARRVAL_P(subpats), "MARK", sizeof("MARK") - 1, &tmp);
1455
0
    }
1456
0
  }
1457
1458
306
  if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1459
    /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1460
289
    if ((pce->compile_options & PCRE2_UTF)
1461
147
        && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1462
146
      GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1463
146
    }
1464
1465
289
    RETVAL_LONG(matched);
1466
289
  } else {
1467
17
    RETVAL_FALSE;
1468
17
  }
1469
306
}
1470
/* }}} */
1471
1472
/* {{{ Perform a Perl-style regular expression match */
1473
PHP_FUNCTION(preg_match)
1474
353
{
1475
353
  php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1476
353
}
1477
/* }}} */
1478
1479
ZEND_FRAMELESS_FUNCTION(preg_match, 2)
1480
0
{
1481
0
  zval regex_tmp, subject_tmp;
1482
0
  zend_string *regex, *subject;
1483
1484
0
  Z_FLF_PARAM_STR(1, regex, regex_tmp);
1485
0
  Z_FLF_PARAM_STR(2, subject, subject_tmp);
1486
1487
  /* Compile regex or get it from cache. */
1488
0
  pcre_cache_entry *pce;
1489
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1490
0
    RETVAL_FALSE;
1491
0
    goto flf_clean;
1492
0
  }
1493
1494
0
  pce->refcount++;
1495
0
  php_pcre_match_impl(pce, subject, return_value, /* subpats */ NULL,
1496
0
    /* global */ false, /* flags */ 0, /* start_offset */ 0);
1497
0
  pce->refcount--;
1498
1499
0
flf_clean:
1500
0
  Z_FLF_PARAM_FREE_STR(1, regex_tmp);
1501
0
  Z_FLF_PARAM_FREE_STR(2, subject_tmp);
1502
0
}
1503
1504
/* {{{ Perform a Perl-style global regular expression match */
1505
PHP_FUNCTION(preg_match_all)
1506
0
{
1507
0
  php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1508
0
}
1509
/* }}} */
1510
1511
/* {{{ preg_get_backref */
1512
static int preg_get_backref(char **str, int *backref)
1513
0
{
1514
0
  char in_brace = 0;
1515
0
  char *walk = *str;
1516
1517
0
  if (walk[1] == 0)
1518
0
    return 0;
1519
1520
0
  if (*walk == '$' && walk[1] == '{') {
1521
0
    in_brace = 1;
1522
0
    walk++;
1523
0
  }
1524
0
  walk++;
1525
1526
0
  if (*walk >= '0' && *walk <= '9') {
1527
0
    *backref = *walk - '0';
1528
0
    walk++;
1529
0
  } else
1530
0
    return 0;
1531
1532
0
  if (*walk && *walk >= '0' && *walk <= '9') {
1533
0
    *backref = *backref * 10 + *walk - '0';
1534
0
    walk++;
1535
0
  }
1536
1537
0
  if (in_brace) {
1538
0
    if (*walk != '}')
1539
0
      return 0;
1540
0
    else
1541
0
      walk++;
1542
0
  }
1543
1544
0
  *str = walk;
1545
0
  return 1;
1546
0
}
1547
/* }}} */
1548
1549
/* Return NULL if an exception has occurred */
1550
static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1551
2
{
1552
2
  zend_string *result_str = NULL;
1553
2
  zval     retval;      /* Function return value */
1554
2
  zval       arg;       /* Argument to pass to function */
1555
1556
2
  array_init_size(&arg, count + (mark ? 1 : 0));
1557
2
  populate_subpat_array(Z_ARRVAL(arg), subject, offsets, subpat_names, num_subpats, count, mark, flags);
1558
1559
2
  fci->retval = &retval;
1560
2
  fci->param_count = 1;
1561
2
  fci->params = &arg;
1562
2
  fci->consumed_args = zend_fci_consumed_arg(0);
1563
2
  zend_call_function(fci, fcc);
1564
2
  zval_ptr_dtor(&arg);
1565
2
  if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1566
0
    return Z_STR(retval);
1567
0
  }
1568
  /* No Exception has occurred */
1569
2
  else if (EXPECTED(Z_TYPE(retval) != IS_UNDEF)) {
1570
2
    result_str = zval_try_get_string_func(&retval);
1571
2
  }
1572
2
  zval_ptr_dtor(&retval);
1573
1574
2
  return result_str;
1575
2
}
1576
1577
/* {{{ php_pcre_replace */
1578
PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1579
                zend_string *subject_str,
1580
                const char *subject, size_t subject_len,
1581
                zend_string *replace_str,
1582
                size_t limit, size_t *replace_count)
1583
26
{
1584
26
  pcre_cache_entry  *pce;         /* Compiled regular expression */
1585
26
  zend_string     *result;      /* Function result */
1586
1587
  /* Abort on pending exception, e.g. thrown from __toString(). */
1588
26
  if (UNEXPECTED(EG(exception))) {
1589
0
    return NULL;
1590
0
  }
1591
1592
  /* Compile regex or get it from cache. */
1593
26
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1594
12
    return NULL;
1595
12
  }
1596
14
  pce->refcount++;
1597
14
  result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1598
14
    limit, replace_count);
1599
14
  pce->refcount--;
1600
1601
14
  return result;
1602
26
}
1603
/* }}} */
1604
1605
/* {{{ php_pcre_replace_impl() */
1606
PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1607
14
{
1608
14
  uint32_t     options;     /* Execution options */
1609
14
  int        count;       /* Count of matched subpatterns */
1610
14
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1611
14
  size_t       new_len;     /* Length of needed storage */
1612
14
  size_t       alloc_len;     /* Actual allocated length */
1613
14
  size_t       match_len;     /* Length of the current match */
1614
14
  int        backref;     /* Backreference number */
1615
14
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
1616
14
  size_t       last_end_offset; /* Where the last search ended */
1617
14
  char      *walkbuf,     /* Location of current replacement in the result */
1618
14
          *walk,        /* Used to walk the replacement string */
1619
14
           walk_last;     /* Last walked character */
1620
14
  const char    *match,       /* The current match */
1621
14
          *piece,       /* The current piece of subject */
1622
14
          *replace_end;   /* End of replacement string */
1623
14
  size_t      result_len;     /* Length of result */
1624
14
  zend_string   *result;      /* Result of replacement */
1625
14
  pcre2_match_data *match_data;
1626
14
  bool old_mdata_used;
1627
1628
  /* Calculate the size of the offsets array, and allocate memory for it. */
1629
14
  num_subpats = pce->capture_count + 1;
1630
14
  alloc_len = 0;
1631
14
  result = NULL;
1632
1633
  /* Initialize */
1634
14
  match = NULL;
1635
14
  start_offset = 0;
1636
14
  last_end_offset = 0;
1637
14
  result_len = 0;
1638
14
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1639
1640
14
  old_mdata_used = mdata_used;
1641
14
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1642
14
    mdata_used = true;
1643
14
    match_data = mdata;
1644
14
  } else {
1645
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1646
0
    if (!match_data) {
1647
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1648
0
      return NULL;
1649
0
    }
1650
0
  }
1651
1652
14
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1653
1654
  /* Array of subpattern offsets */
1655
14
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1656
1657
  /* Execute the regular expression. */
1658
#ifdef HAVE_PCRE_JIT_SUPPORT
1659
  if ((pce->preg_options & PREG_JIT) && options) {
1660
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1661
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1662
  } else
1663
#endif
1664
14
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1665
14
      options, match_data, mctx);
1666
1667
139
  while (1) {
1668
139
    piece = subject + last_end_offset;
1669
1670
139
    if (count >= 0 && limit > 0) {
1671
132
      bool simple_string;
1672
1673
      /* Check for too many substrings condition. */
1674
132
      if (UNEXPECTED(count == 0)) {
1675
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1676
0
        count = num_subpats;
1677
0
      }
1678
1679
135
matched:
1680
135
      if (UNEXPECTED(offsets[1] < offsets[0])) {
1681
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1682
0
        if (result) {
1683
0
          zend_string_release_ex(result, 0);
1684
0
          result = NULL;
1685
0
        }
1686
0
        break;
1687
0
      }
1688
1689
135
      if (replace_count) {
1690
135
        ++*replace_count;
1691
135
      }
1692
1693
      /* Set the match location in subject */
1694
135
      match = subject + offsets[0];
1695
1696
135
      new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1697
1698
135
      walk = ZSTR_VAL(replace_str);
1699
135
      replace_end = walk + ZSTR_LEN(replace_str);
1700
135
      walk_last = 0;
1701
135
      simple_string = true;
1702
255
      while (walk < replace_end) {
1703
120
        if ('\\' == *walk || '$' == *walk) {
1704
0
          simple_string = false;
1705
0
          if (walk_last == '\\') {
1706
0
            walk++;
1707
0
            walk_last = 0;
1708
0
            continue;
1709
0
          }
1710
0
          if (preg_get_backref(&walk, &backref)) {
1711
0
            if (backref < count)
1712
0
              new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1713
0
            continue;
1714
0
          }
1715
0
        }
1716
120
        new_len++;
1717
120
        walk++;
1718
120
        walk_last = walk[-1];
1719
120
      }
1720
1721
135
      if (new_len >= alloc_len) {
1722
31
        alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1723
31
        if (result == NULL) {
1724
7
          result = zend_string_alloc(alloc_len, 0);
1725
24
        } else {
1726
24
          result = zend_string_extend(result, alloc_len, 0);
1727
24
        }
1728
31
      }
1729
1730
135
      if (match-piece > 0) {
1731
        /* copy the part of the string before the match */
1732
122
        memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1733
122
        result_len += (match-piece);
1734
122
      }
1735
1736
135
      if (simple_string) {
1737
        /* copy replacement */
1738
135
        memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1739
135
        result_len += ZSTR_LEN(replace_str);
1740
135
      } else {
1741
        /* copy replacement and backrefs */
1742
0
        walkbuf = ZSTR_VAL(result) + result_len;
1743
1744
0
        walk = ZSTR_VAL(replace_str);
1745
0
        walk_last = 0;
1746
0
        while (walk < replace_end) {
1747
0
          if ('\\' == *walk || '$' == *walk) {
1748
0
            if (walk_last == '\\') {
1749
0
              *(walkbuf-1) = *walk++;
1750
0
              walk_last = 0;
1751
0
              continue;
1752
0
            }
1753
0
            if (preg_get_backref(&walk, &backref)) {
1754
0
              if (backref < count) {
1755
0
                if (offsets[backref<<1] < SIZE_MAX) {
1756
0
                  match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1757
0
                  walkbuf = zend_mempcpy(walkbuf, subject + offsets[backref << 1], match_len);
1758
0
                }
1759
0
              }
1760
0
              continue;
1761
0
            }
1762
0
          }
1763
0
          *walkbuf++ = *walk++;
1764
0
          walk_last = walk[-1];
1765
0
        }
1766
0
        *walkbuf = '\0';
1767
        /* increment the result length by how much we've added to the string */
1768
0
        result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1769
0
      }
1770
1771
135
      limit--;
1772
1773
      /* Advance to the next piece. */
1774
135
      start_offset = last_end_offset = offsets[1];
1775
1776
      /* If we have matched an empty string, mimic what Perl's /g options does.
1777
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1778
         the match again at the same point. If this fails (picked up above) we
1779
         advance to the next character. */
1780
135
      if (start_offset == offsets[0]) {
1781
132
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1782
132
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1783
1784
132
        piece = subject + start_offset;
1785
132
        if (count >= 0 && limit > 0) {
1786
3
          goto matched;
1787
129
        } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1788
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1789
             this is not necessarily the end. We need to advance
1790
             the start offset, and continue. Fudge the offset values
1791
             to achieve this, unless we're already at the end of the string. */
1792
129
          if (start_offset < subject_len) {
1793
122
            size_t unit_len = calculate_unit_length(pce, piece);
1794
122
            start_offset += unit_len;
1795
122
          } else {
1796
7
            goto not_matched;
1797
7
          }
1798
129
        } else {
1799
0
          goto error;
1800
0
        }
1801
132
      }
1802
1803
135
    } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1804
14
not_matched:
1805
14
      if (!result && subject_str) {
1806
7
        result = zend_string_copy(subject_str);
1807
7
        break;
1808
7
      }
1809
      /* now we know exactly how long it is */
1810
7
      alloc_len = result_len + subject_len - last_end_offset;
1811
7
      if (NULL != result) {
1812
7
        result = zend_string_realloc(result, alloc_len, 0);
1813
7
      } else {
1814
0
        result = zend_string_alloc(alloc_len, 0);
1815
0
      }
1816
      /* stick that last bit of string on our output */
1817
7
      memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1818
7
      result_len += subject_len - last_end_offset;
1819
7
      ZSTR_VAL(result)[result_len] = '\0';
1820
7
      ZSTR_LEN(result) = result_len;
1821
7
      break;
1822
14
    } else {
1823
0
error:
1824
0
      pcre_handle_exec_error(count);
1825
0
      if (result) {
1826
0
        zend_string_release_ex(result, 0);
1827
0
        result = NULL;
1828
0
      }
1829
0
      break;
1830
0
    }
1831
1832
#ifdef HAVE_PCRE_JIT_SUPPORT
1833
    if (pce->preg_options & PREG_JIT) {
1834
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1835
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1836
    } else
1837
#endif
1838
125
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1839
125
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1840
125
  }
1841
14
  if (match_data != mdata) {
1842
0
    pcre2_match_data_free(match_data);
1843
0
  }
1844
14
  mdata_used = old_mdata_used;
1845
1846
14
  return result;
1847
14
}
1848
/* }}} */
1849
1850
static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str,
1851
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
1852
  size_t limit, size_t *replace_count, zend_long flags
1853
2
) {
1854
2
  uint32_t     options;     /* Execution options */
1855
2
  int        count;       /* Count of matched subpatterns */
1856
2
  zend_string   **subpat_names;   /* Array for named subpatterns */
1857
2
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1858
2
  size_t       alloc_len;     /* Actual allocated length */
1859
2
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
1860
2
  size_t       last_end_offset; /* Where the last search ended */
1861
2
  const char    *match,       /* The current match */
1862
2
          *piece;       /* The current piece of subject */
1863
2
  size_t      result_len;     /* Length of result */
1864
2
  zend_string   *result;      /* Result of replacement */
1865
2
  pcre2_match_data *match_data;
1866
2
  bool old_mdata_used;
1867
1868
  /* Calculate the size of the offsets array, and allocate memory for it. */
1869
2
  num_subpats = pce->capture_count + 1;
1870
2
  if (pce->name_count > 0) {
1871
0
    subpat_names = ensure_subpats_table(pce->name_count, pce);
1872
0
    if (UNEXPECTED(!subpat_names)) {
1873
0
      return NULL;
1874
0
    }
1875
2
  } else {
1876
2
    subpat_names = NULL;
1877
2
  }
1878
1879
2
  alloc_len = 0;
1880
2
  result = NULL;
1881
1882
  /* Initialize */
1883
2
  match = NULL;
1884
2
  start_offset = 0;
1885
2
  last_end_offset = 0;
1886
2
  result_len = 0;
1887
2
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1888
1889
2
  old_mdata_used = mdata_used;
1890
2
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1891
2
    mdata_used = 1;
1892
2
    match_data = mdata;
1893
2
  } else {
1894
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1895
0
    if (!match_data) {
1896
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1897
0
      mdata_used = old_mdata_used;
1898
0
      return NULL;
1899
0
    }
1900
0
  }
1901
1902
2
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1903
1904
  /* Array of subpattern offsets */
1905
2
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1906
1907
  /* Execute the regular expression. */
1908
#ifdef HAVE_PCRE_JIT_SUPPORT
1909
  if ((pce->preg_options & PREG_JIT) && options) {
1910
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1911
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1912
  } else
1913
#endif
1914
2
  count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1915
2
      options, match_data, mctx);
1916
1917
2
  while (1) {
1918
2
    piece = ZSTR_VAL(subject_str) + last_end_offset;
1919
1920
2
    if (count >= 0 && limit) {
1921
      /* Check for too many substrings condition. */
1922
2
      if (UNEXPECTED(count == 0)) {
1923
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1924
0
        count = num_subpats;
1925
0
      }
1926
1927
2
matched:
1928
2
      if (UNEXPECTED(offsets[1] < offsets[0])) {
1929
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1930
0
        if (result) {
1931
0
          zend_string_release_ex(result, 0);
1932
0
          result = NULL;
1933
0
        }
1934
0
        break;
1935
0
      }
1936
1937
2
      if (replace_count) {
1938
2
        ++*replace_count;
1939
2
      }
1940
1941
      /* Set the match location in subject */
1942
2
      match = ZSTR_VAL(subject_str) + offsets[0];
1943
1944
      /* Length of needed storage */
1945
2
      size_t new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1946
1947
      /* Use custom function to get replacement string and its length. */
1948
2
      zend_string *eval_result = preg_do_repl_func(
1949
2
        fci, fcc, ZSTR_VAL(subject_str), offsets, subpat_names, num_subpats, count,
1950
2
        pcre2_get_mark(match_data), flags);
1951
1952
2
      if (UNEXPECTED(eval_result == NULL)) {
1953
0
        goto error;
1954
0
      }
1955
2
      new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1956
2
      if (new_len >= alloc_len) {
1957
2
        alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1958
2
        if (result == NULL) {
1959
2
          result = zend_string_alloc(alloc_len, 0);
1960
2
        } else {
1961
0
          result = zend_string_extend(result, alloc_len, 0);
1962
0
        }
1963
2
      }
1964
1965
2
      if (match-piece > 0) {
1966
        /* copy the part of the string before the match */
1967
0
        memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1968
0
        result_len += (match-piece);
1969
0
      }
1970
1971
      /* If using custom function, copy result to the buffer and clean up. */
1972
2
      memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1973
2
      result_len += ZSTR_LEN(eval_result);
1974
2
      zend_string_release_ex(eval_result, 0);
1975
1976
2
      limit--;
1977
1978
      /* Advance to the next piece. */
1979
2
      start_offset = last_end_offset = offsets[1];
1980
1981
      /* If we have matched an empty string, mimic what Perl's /g options does.
1982
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1983
         the match again at the same point. If this fails (picked up above) we
1984
         advance to the next character. */
1985
2
      if (start_offset == offsets[0]) {
1986
2
        count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1987
2
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1988
1989
2
        piece = ZSTR_VAL(subject_str) + start_offset;
1990
2
        if (count >= 0 && limit) {
1991
0
          goto matched;
1992
2
        } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1993
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1994
             this is not necessarily the end. We need to advance
1995
             the start offset, and continue. Fudge the offset values
1996
             to achieve this, unless we're already at the end of the string. */
1997
2
          if (start_offset < ZSTR_LEN(subject_str)) {
1998
0
            size_t unit_len = calculate_unit_length(pce, piece);
1999
0
            start_offset += unit_len;
2000
2
          } else {
2001
2
            goto not_matched;
2002
2
          }
2003
2
        } else {
2004
0
          goto error;
2005
0
        }
2006
2
      }
2007
2008
2
    } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2009
2
not_matched:
2010
2
      if (result == NULL) {
2011
0
        result = zend_string_copy(subject_str);
2012
0
        break;
2013
0
      }
2014
      /* now we know exactly how long it is */
2015
2
      size_t segment_len = ZSTR_LEN(subject_str) - last_end_offset;
2016
2
      alloc_len = result_len + segment_len;
2017
2
      result = zend_string_realloc(result, alloc_len, 0);
2018
      /* stick that last bit of string on our output */
2019
2
      memcpy(ZSTR_VAL(result) + result_len, piece, segment_len);
2020
2
      result_len += segment_len;
2021
2
      ZSTR_VAL(result)[result_len] = '\0';
2022
2
      ZSTR_LEN(result) = result_len;
2023
2
      break;
2024
2
    } else {
2025
0
error:
2026
0
      pcre_handle_exec_error(count);
2027
0
      if (result) {
2028
0
        zend_string_release_ex(result, 0);
2029
0
        result = NULL;
2030
0
      }
2031
0
      break;
2032
0
    }
2033
#ifdef HAVE_PCRE_JIT_SUPPORT
2034
    if ((pce->preg_options & PREG_JIT)) {
2035
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2036
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2037
    } else
2038
#endif
2039
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2040
0
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2041
0
  }
2042
2
  if (match_data != mdata) {
2043
0
    pcre2_match_data_free(match_data);
2044
0
  }
2045
2
  mdata_used = old_mdata_used;
2046
2047
2
  return result;
2048
2
}
2049
2050
static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2051
                zend_string *subject_str,
2052
                zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2053
                size_t limit, size_t *replace_count, zend_long flags)
2054
2
{
2055
2
  pcre_cache_entry  *pce;         /* Compiled regular expression */
2056
2
  zend_string     *result;      /* Function result */
2057
2058
  /* Compile regex or get it from cache. */
2059
2
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2060
0
    return NULL;
2061
0
  }
2062
2
  pce->refcount++;
2063
2
  result = php_pcre_replace_func_impl(pce, subject_str, fci, fcc, limit, replace_count, flags);
2064
2
  pce->refcount--;
2065
2066
2
  return result;
2067
2
}
2068
2069
/* {{{ php_pcre_replace_array */
2070
static zend_string *php_pcre_replace_array(HashTable *regex,
2071
  zend_string *replace_str, HashTable *replace_ht,
2072
  zend_string *subject_str, size_t limit, size_t *replace_count)
2073
0
{
2074
0
  zval    *regex_entry;
2075
0
  zend_string *result;
2076
2077
0
  zend_string_addref(subject_str);
2078
2079
0
  if (replace_ht) {
2080
0
    uint32_t replace_idx = 0;
2081
2082
    /* For each entry in the regex array, get the entry */
2083
0
    ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2084
      /* Make sure we're dealing with strings. */
2085
0
      zend_string *tmp_regex_str;
2086
0
      zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2087
0
      zend_string *replace_entry_str, *tmp_replace_entry_str;
2088
0
      zval *zv;
2089
2090
      /* Get current entry */
2091
0
      while (1) {
2092
0
        if (replace_idx == replace_ht->nNumUsed) {
2093
0
          replace_entry_str = ZSTR_EMPTY_ALLOC();
2094
0
          tmp_replace_entry_str = NULL;
2095
0
          break;
2096
0
        }
2097
0
        zv = ZEND_HASH_ELEMENT(replace_ht, replace_idx);
2098
0
        replace_idx++;
2099
0
        if (Z_TYPE_P(zv) != IS_UNDEF) {
2100
0
          replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
2101
0
          break;
2102
0
        }
2103
0
      }
2104
2105
      /* Do the actual replacement and put the result back into subject_str
2106
         for further replacements. */
2107
0
      result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2108
0
        ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2109
0
      zend_tmp_string_release(tmp_replace_entry_str);
2110
0
      zend_tmp_string_release(tmp_regex_str);
2111
0
      zend_string_release_ex(subject_str, 0);
2112
0
      subject_str = result;
2113
0
      if (UNEXPECTED(result == NULL)) {
2114
0
        break;
2115
0
      }
2116
0
    } ZEND_HASH_FOREACH_END();
2117
2118
0
  } else {
2119
0
    ZEND_ASSERT(replace_str != NULL);
2120
2121
    /* For each entry in the regex array, get the entry */
2122
0
    ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2123
      /* Make sure we're dealing with strings. */
2124
0
      zend_string *tmp_regex_str;
2125
0
      zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2126
2127
      /* Do the actual replacement and put the result back into subject_str
2128
         for further replacements. */
2129
0
      result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2130
0
        ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2131
0
      zend_tmp_string_release(tmp_regex_str);
2132
0
      zend_string_release_ex(subject_str, 0);
2133
0
      subject_str = result;
2134
2135
0
      if (UNEXPECTED(result == NULL)) {
2136
0
        break;
2137
0
      }
2138
0
    } ZEND_HASH_FOREACH_END();
2139
0
  }
2140
2141
0
  return subject_str;
2142
0
}
2143
/* }}} */
2144
2145
/* {{{ php_replace_in_subject */
2146
static zend_always_inline zend_string *php_replace_in_subject(
2147
  zend_string *regex_str, HashTable *regex_ht,
2148
  zend_string *replace_str, HashTable *replace_ht,
2149
  zend_string *subject, size_t limit, size_t *replace_count)
2150
26
{
2151
26
  zend_string *result;
2152
2153
26
  if (regex_str) {
2154
26
    ZEND_ASSERT(replace_str != NULL);
2155
26
    result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2156
26
      replace_str, limit, replace_count);
2157
26
  } else {
2158
0
    ZEND_ASSERT(regex_ht != NULL);
2159
0
    result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2160
0
      limit, replace_count);
2161
0
  }
2162
26
  return result;
2163
26
}
2164
/* }}} */
2165
2166
static zend_string *php_replace_in_subject_func(zend_string *regex_str, const HashTable *regex_ht,
2167
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2168
  zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
2169
2
{
2170
2
  zend_string *result;
2171
2172
2
  if (regex_str) {
2173
2
    result = php_pcre_replace_func(regex_str, subject, fci, fcc, limit, replace_count, flags);
2174
2
    return result;
2175
2
  } else {
2176
    /* If regex is an array */
2177
0
    zval    *regex_entry;
2178
2179
0
    ZEND_ASSERT(regex_ht != NULL);
2180
2181
0
    zend_string_addref(subject);
2182
2183
    /* For each entry in the regex array, get the entry */
2184
0
    ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
2185
      /* Make sure we're dealing with strings. */
2186
0
      zend_string *tmp_regex_entry_str;
2187
0
      zend_string *regex_entry_str = zval_try_get_tmp_string(regex_entry, &tmp_regex_entry_str);
2188
0
      if (UNEXPECTED(regex_entry_str == NULL)) {
2189
0
        break;
2190
0
      }
2191
2192
      /* Do the actual replacement and put the result back into subject
2193
         for further replacements. */
2194
0
      result = php_pcre_replace_func(
2195
0
        regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
2196
0
      zend_tmp_string_release(tmp_regex_entry_str);
2197
0
      zend_string_release(subject);
2198
0
      subject = result;
2199
0
      if (UNEXPECTED(result == NULL)) {
2200
0
        break;
2201
0
      }
2202
0
    } ZEND_HASH_FOREACH_END();
2203
2204
0
    return subject;
2205
0
  }
2206
2
}
2207
2208
static size_t php_preg_replace_func_impl(zval *return_value,
2209
  zend_string *regex_str, const HashTable *regex_ht,
2210
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2211
  zend_string *subject_str, const HashTable *subject_ht, zend_long limit_val, zend_long flags)
2212
2
{
2213
2
  zend_string *result;
2214
2
  size_t replace_count = 0;
2215
2216
2
  if (subject_str) {
2217
2
    result = php_replace_in_subject_func(
2218
2
      regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
2219
2
    if (result != NULL) {
2220
2
      RETVAL_STR(result);
2221
2
    } else {
2222
0
      RETVAL_NULL();
2223
0
    }
2224
2
  } else {
2225
    /* if subject is an array */
2226
0
    zval    *subject_entry, zv;
2227
0
    zend_string *string_key;
2228
0
    zend_ulong   num_key;
2229
2230
0
    ZEND_ASSERT(subject_ht != NULL);
2231
2232
0
    array_init_size(return_value, zend_hash_num_elements(subject_ht));
2233
0
    HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2234
2235
    /* For each subject entry, convert it to string, then perform replacement
2236
       and add the result to the return_value array. */
2237
0
    ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2238
0
      zend_string *tmp_subject_entry_str;
2239
0
      zend_string *subject_entry_str = zval_try_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2240
0
      if (UNEXPECTED(subject_entry_str == NULL)) {
2241
0
        break;
2242
0
      }
2243
2244
0
      result = php_replace_in_subject_func(
2245
0
        regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
2246
0
      if (result != NULL) {
2247
        /* Add to return array */
2248
0
        ZVAL_STR(&zv, result);
2249
0
        if (string_key) {
2250
0
          zend_hash_add_new(return_value_ht, string_key, &zv);
2251
0
        } else {
2252
0
          zend_hash_index_add_new(return_value_ht, num_key, &zv);
2253
0
        }
2254
0
      }
2255
0
      zend_tmp_string_release(tmp_subject_entry_str);
2256
0
    } ZEND_HASH_FOREACH_END();
2257
0
  }
2258
2259
2
  return replace_count;
2260
2
}
2261
2262
static void _preg_replace_common(
2263
  zval *return_value,
2264
  HashTable *regex_ht, zend_string *regex_str,
2265
  HashTable *replace_ht, zend_string *replace_str,
2266
  HashTable *subject_ht, zend_string *subject_str,
2267
  zend_long limit,
2268
  zval *zcount,
2269
  bool is_filter
2270
26
) {
2271
26
  size_t replace_count = 0;
2272
26
  zend_string *result;
2273
26
  size_t old_replace_count;
2274
2275
  /* If replace is an array then the regex argument needs to also be an array */
2276
26
  if (replace_ht && !regex_ht) {
2277
0
    zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
2278
0
    RETURN_THROWS();
2279
0
  }
2280
2281
26
  if (subject_str) {
2282
26
    old_replace_count = replace_count;
2283
26
    result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2284
26
      subject_str, limit, &replace_count);
2285
26
    if (result != NULL) {
2286
14
      if (!is_filter || replace_count > old_replace_count) {
2287
14
        RETVAL_STR(result);
2288
14
      } else {
2289
0
        zend_string_release_ex(result, 0);
2290
0
        RETVAL_NULL();
2291
0
      }
2292
14
    } else {
2293
12
      RETVAL_NULL();
2294
12
    }
2295
26
  } else {
2296
    /* if subject is an array */
2297
0
    zval    *subject_entry, zv;
2298
0
    zend_string *string_key;
2299
0
    zend_ulong   num_key;
2300
2301
0
    ZEND_ASSERT(subject_ht != NULL);
2302
2303
0
    array_init_size(return_value, zend_hash_num_elements(subject_ht));
2304
0
    HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2305
2306
    /* For each subject entry, convert it to string, then perform replacement
2307
       and add the result to the return_value array. */
2308
0
    ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2309
0
      old_replace_count = replace_count;
2310
0
      zend_string *tmp_subject_entry_str;
2311
0
      zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2312
0
      result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2313
0
        subject_entry_str, limit, &replace_count);
2314
2315
0
      if (result != NULL) {
2316
0
        if (!is_filter || replace_count > old_replace_count) {
2317
          /* Add to return array */
2318
0
          ZVAL_STR(&zv, result);
2319
0
          if (string_key) {
2320
0
            zend_hash_add_new(return_value_ht, string_key, &zv);
2321
0
          } else {
2322
0
            zend_hash_index_add_new(return_value_ht, num_key, &zv);
2323
0
          }
2324
0
        } else {
2325
0
          zend_string_release_ex(result, 0);
2326
0
        }
2327
0
      }
2328
0
      zend_tmp_string_release(tmp_subject_entry_str);
2329
0
    } ZEND_HASH_FOREACH_END();
2330
0
  }
2331
2332
26
  if (zcount) {
2333
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2334
0
  }
2335
26
}
2336
2337
/* {{{ preg_replace_common */
2338
static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
2339
27
{
2340
27
  zend_string *regex_str, *replace_str, *subject_str;
2341
27
  HashTable *regex_ht, *replace_ht, *subject_ht;
2342
27
  zend_long limit = -1;
2343
27
  zval *zcount = NULL;
2344
2345
  /* Get function parameters and do error-checking. */
2346
80
  ZEND_PARSE_PARAMETERS_START(3, 5)
2347
130
    Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2348
130
    Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
2349
130
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2350
130
    Z_PARAM_OPTIONAL
2351
130
    Z_PARAM_LONG(limit)
2352
9
    Z_PARAM_ZVAL(zcount)
2353
27
  ZEND_PARSE_PARAMETERS_END();
2354
2355
26
  _preg_replace_common(
2356
26
    return_value,
2357
26
    regex_ht, regex_str,
2358
26
    replace_ht, replace_str,
2359
26
    subject_ht, subject_str,
2360
26
    limit, zcount, is_filter);
2361
26
}
2362
/* }}} */
2363
2364
/* {{{ Perform Perl-style regular expression replacement. */
2365
PHP_FUNCTION(preg_replace)
2366
27
{
2367
27
  preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
2368
27
}
2369
/* }}} */
2370
2371
ZEND_FRAMELESS_FUNCTION(preg_replace, 3)
2372
0
{
2373
0
  zend_string *regex_str, *replace_str, *subject_str;
2374
0
  HashTable *regex_ht, *replace_ht, *subject_ht;
2375
0
  zval regex_tmp, replace_tmp, subject_tmp;
2376
2377
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(1, regex_ht, regex_str, regex_tmp);
2378
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(2, replace_ht, replace_str, replace_tmp);
2379
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(3, subject_ht, subject_str, subject_tmp);
2380
2381
0
  _preg_replace_common(
2382
0
    return_value,
2383
0
    regex_ht, regex_str,
2384
0
    replace_ht, replace_str,
2385
0
    subject_ht, subject_str,
2386
0
    /* limit */ -1, /* zcount */ NULL, /* is_filter */ false);
2387
2388
0
flf_clean:;
2389
0
  Z_FLF_PARAM_FREE_STR(1, regex_tmp);
2390
0
  Z_FLF_PARAM_FREE_STR(2, replace_tmp);
2391
0
  Z_FLF_PARAM_FREE_STR(3, subject_tmp);
2392
0
}
2393
2394
/* {{{ Perform Perl-style regular expression replacement using replacement callback. */
2395
PHP_FUNCTION(preg_replace_callback)
2396
2
{
2397
2
  zval *zcount = NULL;
2398
2
  zend_string *regex_str;
2399
2
  HashTable *regex_ht;
2400
2
  zend_string *subject_str;
2401
2
  HashTable *subject_ht;
2402
2
  zend_long limit = -1, flags = 0;
2403
2
  size_t replace_count;
2404
2
  zend_fcall_info fci = empty_fcall_info;
2405
2
  zend_fcall_info_cache fcc = empty_fcall_info_cache;
2406
2407
  /* Get function parameters and do error-checking. */
2408
6
  ZEND_PARSE_PARAMETERS_START(3, 6)
2409
10
    Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2410
10
    Z_PARAM_FUNC(fci, fcc)
2411
12
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2412
12
    Z_PARAM_OPTIONAL
2413
12
    Z_PARAM_LONG(limit)
2414
0
    Z_PARAM_ZVAL(zcount)
2415
0
    Z_PARAM_LONG(flags)
2416
2
  ZEND_PARSE_PARAMETERS_END();
2417
2418
2
  replace_count = php_preg_replace_func_impl(return_value, regex_str, regex_ht,
2419
2
    &fci, &fcc,
2420
2
    subject_str, subject_ht, limit, flags);
2421
2
  if (zcount) {
2422
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2423
0
  }
2424
2
}
2425
/* }}} */
2426
2427
/* {{{ Perform Perl-style regular expression replacement using replacement callback. */
2428
PHP_FUNCTION(preg_replace_callback_array)
2429
0
{
2430
0
  zval *replace, *zcount = NULL;
2431
0
  HashTable *pattern, *subject_ht;
2432
0
  zend_string *subject_str, *str_idx_regex;
2433
0
  zend_long limit = -1, flags = 0;
2434
0
  size_t replace_count = 0;
2435
2436
  /* Get function parameters and do error-checking. */
2437
0
  ZEND_PARSE_PARAMETERS_START(2, 5)
2438
0
    Z_PARAM_ARRAY_HT(pattern)
2439
0
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2440
0
    Z_PARAM_OPTIONAL
2441
0
    Z_PARAM_LONG(limit)
2442
0
    Z_PARAM_ZVAL(zcount)
2443
0
    Z_PARAM_LONG(flags)
2444
0
  ZEND_PARSE_PARAMETERS_END();
2445
2446
0
  if (subject_ht) {
2447
0
    GC_TRY_ADDREF(subject_ht);
2448
0
  } else {
2449
0
    GC_TRY_ADDREF(subject_str);
2450
0
  }
2451
2452
0
  ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
2453
0
    if (!str_idx_regex) {
2454
0
      zend_argument_type_error(1, "must contain only string patterns as keys");
2455
0
      goto error;
2456
0
    }
2457
2458
0
    zend_fcall_info_cache fcc = empty_fcall_info_cache;
2459
0
    zend_fcall_info fci = empty_fcall_info;
2460
0
    fci.size = sizeof(zend_fcall_info);
2461
    /* Copy potential trampoline */
2462
0
    ZVAL_COPY_VALUE(&fci.function_name, replace);
2463
2464
0
    if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2465
0
      zend_argument_type_error(1, "must contain only valid callbacks");
2466
0
      goto error;
2467
0
    }
2468
2469
0
    zval retval;
2470
0
    replace_count += php_preg_replace_func_impl(&retval, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
2471
0
      subject_str, subject_ht, limit, flags);
2472
0
    zend_release_fcall_info_cache(&fcc);
2473
2474
0
    switch (Z_TYPE(retval)) {
2475
0
      case IS_ARRAY:
2476
0
        ZEND_ASSERT(subject_ht);
2477
0
        zend_array_release(subject_ht);
2478
0
        subject_ht = Z_ARR(retval);
2479
0
        break;
2480
0
      case IS_STRING:
2481
0
        ZEND_ASSERT(subject_str);
2482
0
        zend_string_release(subject_str);
2483
0
        subject_str = Z_STR(retval);
2484
0
        break;
2485
0
      case IS_NULL:
2486
0
        RETVAL_NULL();
2487
0
        goto error;
2488
0
      default: ZEND_UNREACHABLE();
2489
0
    }
2490
2491
0
    if (EG(exception)) {
2492
0
      goto error;
2493
0
    }
2494
0
  } ZEND_HASH_FOREACH_END();
2495
2496
0
  if (zcount) {
2497
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2498
0
  }
2499
2500
0
  if (subject_ht) {
2501
0
    RETVAL_ARR(subject_ht);
2502
    // Unset the type_flags of immutable arrays to prevent the VM from performing refcounting
2503
0
    if (GC_FLAGS(subject_ht) & IS_ARRAY_IMMUTABLE) {
2504
0
      Z_TYPE_FLAGS_P(return_value) = 0;
2505
0
    }
2506
0
    return;
2507
0
  } else {
2508
0
    RETURN_STR(subject_str);
2509
0
  }
2510
2511
0
error:
2512
0
  if (subject_ht) {
2513
0
    zend_array_release(subject_ht);
2514
0
  } else {
2515
0
    zend_string_release(subject_str);
2516
0
  }
2517
0
}
2518
/* }}} */
2519
2520
/* {{{ Perform Perl-style regular expression replacement and only return matches. */
2521
PHP_FUNCTION(preg_filter)
2522
0
{
2523
0
  preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
2524
0
}
2525
/* }}} */
2526
2527
/* {{{ Split string into an array using a perl-style regular expression as a delimiter */
2528
PHP_FUNCTION(preg_split)
2529
0
{
2530
0
  zend_string     *regex;     /* Regular expression */
2531
0
  zend_string     *subject;   /* String to match against */
2532
0
  zend_long      limit_val = -1;/* Integer value of limit */
2533
0
  zend_long      flags = 0;   /* Match control flags */
2534
0
  pcre_cache_entry  *pce;     /* Compiled regular expression */
2535
2536
  /* Get function parameters and do error checking */
2537
0
  ZEND_PARSE_PARAMETERS_START(2, 4)
2538
0
    Z_PARAM_STR(regex)
2539
0
    Z_PARAM_STR(subject)
2540
0
    Z_PARAM_OPTIONAL
2541
0
    Z_PARAM_LONG(limit_val)
2542
0
    Z_PARAM_LONG(flags)
2543
0
  ZEND_PARSE_PARAMETERS_END();
2544
2545
  /* Compile regex or get it from cache. */
2546
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2547
0
    RETURN_FALSE;
2548
0
  }
2549
2550
0
  pce->refcount++;
2551
0
  php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2552
0
  pce->refcount--;
2553
0
}
2554
/* }}} */
2555
2556
/* {{{ php_pcre_split */
2557
PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2558
  zend_long limit_val, zend_long flags)
2559
0
{
2560
0
  uint32_t     options;     /* Execution options */
2561
0
  int        count;       /* Count of matched subpatterns */
2562
0
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
2563
0
  PCRE2_SIZE     last_match_offset; /* Location of last match */
2564
0
  uint32_t     no_empty;      /* If NO_EMPTY flag is set */
2565
0
  uint32_t     delim_capture;   /* If delimiters should be captured */
2566
0
  uint32_t     offset_capture;  /* If offsets should be captured */
2567
0
  uint32_t     num_subpats;   /* Number of captured subpatterns */
2568
0
  zval       tmp;
2569
0
  pcre2_match_data *match_data;
2570
0
  bool old_mdata_used;
2571
0
  char *subject = ZSTR_VAL(subject_str);
2572
2573
0
  no_empty = flags & PREG_SPLIT_NO_EMPTY;
2574
0
  delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2575
0
  offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2576
2577
  /* Initialize return value */
2578
0
  array_init(return_value);
2579
0
  HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2580
2581
  /* Calculate the size of the offsets array, and allocate memory for it. */
2582
0
  num_subpats = pce->capture_count + 1;
2583
2584
  /* Start at the beginning of the string */
2585
0
  start_offset = 0;
2586
0
  last_match_offset = 0;
2587
0
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2588
2589
0
  if (limit_val == -1) {
2590
    /* pass */
2591
0
  } else if (limit_val == 0) {
2592
0
    limit_val = -1;
2593
0
  } else if (limit_val <= 1) {
2594
0
    goto last;
2595
0
  }
2596
2597
0
  old_mdata_used = mdata_used;
2598
0
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2599
0
    mdata_used = true;
2600
0
    match_data = mdata;
2601
0
  } else {
2602
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2603
0
    if (!match_data) {
2604
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2605
0
      zval_ptr_dtor(return_value);
2606
0
      RETURN_FALSE;
2607
0
    }
2608
0
  }
2609
2610
0
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2611
2612
  /* Array of subpattern offsets */
2613
0
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
2614
2615
#ifdef HAVE_PCRE_JIT_SUPPORT
2616
  if ((pce->preg_options & PREG_JIT) && options) {
2617
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2618
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2619
  } else
2620
#endif
2621
0
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2622
0
      options, match_data, mctx);
2623
2624
0
  while (1) {
2625
    /* If something matched */
2626
0
    if (count >= 0) {
2627
      /* Check for too many substrings condition. */
2628
0
      if (UNEXPECTED(count == 0)) {
2629
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2630
0
        count = num_subpats;
2631
0
      }
2632
2633
0
matched:
2634
0
      if (UNEXPECTED(offsets[1] < offsets[0])) {
2635
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2636
0
        break;
2637
0
      }
2638
2639
0
      if (!no_empty || offsets[0] != last_match_offset) {
2640
0
        if (offset_capture) {
2641
          /* Add (match, offset) pair to the return value */
2642
0
          add_offset_pair(
2643
0
            return_value_ht, subject, last_match_offset, offsets[0],
2644
0
            NULL, 0);
2645
0
        } else {
2646
          /* Add the piece to the return value */
2647
0
          populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2648
0
          zend_hash_next_index_insert_new(return_value_ht, &tmp);
2649
0
        }
2650
2651
        /* One less left to do */
2652
0
        if (limit_val != -1)
2653
0
          limit_val--;
2654
0
      }
2655
2656
0
      if (delim_capture) {
2657
0
        size_t i;
2658
0
        for (i = 1; i < count; i++) {
2659
          /* If we have matched a delimiter */
2660
0
          if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2661
0
            if (offset_capture) {
2662
0
              add_offset_pair(
2663
0
                return_value_ht, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2664
0
            } else {
2665
0
              populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2666
0
              zend_hash_next_index_insert_new(return_value_ht, &tmp);
2667
0
            }
2668
0
          }
2669
0
        }
2670
0
      }
2671
2672
      /* Advance to the position right after the last full match */
2673
0
      start_offset = last_match_offset = offsets[1];
2674
2675
      /* If we have matched an empty string, mimic what Perl's /g options does.
2676
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2677
         the match again at the same point. If this fails (picked up above) we
2678
         advance to the next character. */
2679
0
      if (start_offset == offsets[0]) {
2680
        /* Get next piece if no limit or limit not yet reached and something matched*/
2681
0
        if (limit_val != -1 && limit_val <= 1) {
2682
0
          break;
2683
0
        }
2684
0
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2685
0
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2686
0
        if (count >= 0) {
2687
0
          goto matched;
2688
0
        } else if (count == PCRE2_ERROR_NOMATCH) {
2689
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2690
             this is not necessarily the end. We need to advance
2691
             the start offset, and continue. Fudge the offset values
2692
             to achieve this, unless we're already at the end of the string. */
2693
0
          if (start_offset < ZSTR_LEN(subject_str)) {
2694
0
            start_offset += calculate_unit_length(pce, subject + start_offset);
2695
0
          } else {
2696
0
            break;
2697
0
          }
2698
0
        } else {
2699
0
          goto error;
2700
0
        }
2701
0
      }
2702
2703
0
    } else if (count == PCRE2_ERROR_NOMATCH) {
2704
0
      break;
2705
0
    } else {
2706
0
error:
2707
0
      pcre_handle_exec_error(count);
2708
0
      break;
2709
0
    }
2710
2711
    /* Get next piece if no limit or limit not yet reached and something matched*/
2712
0
    if (limit_val != -1 && limit_val <= 1) {
2713
0
      break;
2714
0
    }
2715
2716
#ifdef HAVE_PCRE_JIT_SUPPORT
2717
    if (pce->preg_options & PREG_JIT) {
2718
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2719
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2720
    } else
2721
#endif
2722
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2723
0
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2724
0
  }
2725
0
  if (match_data != mdata) {
2726
0
    pcre2_match_data_free(match_data);
2727
0
  }
2728
0
  mdata_used = old_mdata_used;
2729
2730
0
  if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2731
0
    zval_ptr_dtor(return_value);
2732
0
    RETURN_FALSE;
2733
0
  }
2734
2735
0
last:
2736
0
  start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2737
2738
0
  if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2739
0
    if (offset_capture) {
2740
      /* Add the last (match, offset) pair to the return value */
2741
0
      add_offset_pair(return_value_ht, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2742
0
    } else {
2743
      /* Add the last piece to the return value */
2744
0
      if (start_offset == 0) {
2745
0
        ZVAL_STR_COPY(&tmp, subject_str);
2746
0
      } else {
2747
0
        populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2748
0
      }
2749
0
      zend_hash_next_index_insert_new(return_value_ht, &tmp);
2750
0
    }
2751
0
  }
2752
0
}
2753
/* }}} */
2754
2755
/* {{{ Quote regular expression characters plus an optional character */
2756
PHP_FUNCTION(preg_quote)
2757
5
{
2758
5
  zend_string *str;           /* Input string argument */
2759
5
  zend_string *delim = NULL;   /* Additional delimiter argument */
2760
5
  char    *in_str;      /* Input string */
2761
5
  char    *in_str_end;      /* End of the input string */
2762
5
  zend_string *out_str;     /* Output string with quoted characters */
2763
5
  size_t       extra_len;         /* Number of additional characters */
2764
5
  char    *p,         /* Iterator for input string */
2765
5
        *q,         /* Iterator for output string */
2766
5
         delim_char = '\0', /* Delimiter character to be quoted */
2767
5
         c;         /* Current character */
2768
2769
  /* Get the arguments and check for errors */
2770
15
  ZEND_PARSE_PARAMETERS_START(1, 2)
2771
20
    Z_PARAM_STR(str)
2772
5
    Z_PARAM_OPTIONAL
2773
10
    Z_PARAM_STR_OR_NULL(delim)
2774
5
  ZEND_PARSE_PARAMETERS_END();
2775
2776
  /* Nothing to do if we got an empty string */
2777
5
  if (ZSTR_LEN(str) == 0) {
2778
0
    RETURN_EMPTY_STRING();
2779
0
  }
2780
2781
5
  in_str = ZSTR_VAL(str);
2782
5
  in_str_end = in_str + ZSTR_LEN(str);
2783
2784
5
  if (delim) {
2785
0
    delim_char = ZSTR_VAL(delim)[0];
2786
0
  }
2787
2788
  /* Go through the string and quote necessary characters */
2789
5
  extra_len = 0;
2790
5
  p = in_str;
2791
353
  do {
2792
353
    c = *p;
2793
353
    switch(c) {
2794
1
      case '.':
2795
4
      case '\\':
2796
15
      case '+':
2797
15
      case '*':
2798
25
      case '?':
2799
28
      case '[':
2800
29
      case '^':
2801
32
      case ']':
2802
33
      case '$':
2803
38
      case '(':
2804
39
      case ')':
2805
42
      case '{':
2806
44
      case '}':
2807
44
      case '=':
2808
44
      case '!':
2809
46
      case '>':
2810
47
      case '<':
2811
48
      case '|':
2812
61
      case ':':
2813
65
      case '-':
2814
66
      case '#':
2815
66
        extra_len++;
2816
66
        break;
2817
2818
19
      case '\0':
2819
19
        extra_len+=3;
2820
19
        break;
2821
2822
268
      default:
2823
268
        if (c == delim_char) {
2824
0
          extra_len++;
2825
0
        }
2826
268
        break;
2827
353
    }
2828
353
    p++;
2829
353
  } while (p != in_str_end);
2830
2831
5
  if (extra_len == 0) {
2832
1
    RETURN_STR_COPY(str);
2833
1
  }
2834
2835
  /* Allocate enough memory so that even if each character
2836
     is quoted, we won't run out of room */
2837
4
  out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2838
4
  q = ZSTR_VAL(out_str);
2839
4
  p = in_str;
2840
2841
344
  do {
2842
344
    c = *p;
2843
344
    switch(c) {
2844
1
      case '.':
2845
4
      case '\\':
2846
15
      case '+':
2847
15
      case '*':
2848
25
      case '?':
2849
28
      case '[':
2850
29
      case '^':
2851
32
      case ']':
2852
33
      case '$':
2853
38
      case '(':
2854
39
      case ')':
2855
42
      case '{':
2856
44
      case '}':
2857
44
      case '=':
2858
44
      case '!':
2859
46
      case '>':
2860
47
      case '<':
2861
48
      case '|':
2862
61
      case ':':
2863
65
      case '-':
2864
66
      case '#':
2865
66
        *q++ = '\\';
2866
66
        *q++ = c;
2867
66
        break;
2868
2869
19
      case '\0':
2870
19
        *q++ = '\\';
2871
19
        *q++ = '0';
2872
19
        *q++ = '0';
2873
19
        *q++ = '0';
2874
19
        break;
2875
2876
259
      default:
2877
259
        if (c == delim_char) {
2878
0
          *q++ = '\\';
2879
0
        }
2880
259
        *q++ = c;
2881
259
        break;
2882
344
    }
2883
344
    p++;
2884
344
  } while (p != in_str_end);
2885
4
  *q = '\0';
2886
2887
4
  RETURN_NEW_STR(out_str);
2888
4
}
2889
/* }}} */
2890
2891
/* {{{ Searches array and returns entries which match regex */
2892
PHP_FUNCTION(preg_grep)
2893
0
{
2894
0
  zend_string     *regex;     /* Regular expression */
2895
0
  zval        *input;     /* Input array */
2896
0
  zend_long      flags = 0;   /* Match control flags */
2897
0
  pcre_cache_entry  *pce;     /* Compiled regular expression */
2898
2899
  /* Get arguments and do error checking */
2900
0
  ZEND_PARSE_PARAMETERS_START(2, 3)
2901
0
    Z_PARAM_STR(regex)
2902
0
    Z_PARAM_ARRAY(input)
2903
0
    Z_PARAM_OPTIONAL
2904
0
    Z_PARAM_LONG(flags)
2905
0
  ZEND_PARSE_PARAMETERS_END();
2906
2907
  /* Compile regex or get it from cache. */
2908
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2909
0
    RETURN_FALSE;
2910
0
  }
2911
2912
0
  pce->refcount++;
2913
0
  php_pcre_grep_impl(pce, input, return_value, flags);
2914
0
  pce->refcount--;
2915
0
}
2916
/* }}} */
2917
2918
PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2919
0
{
2920
0
  zval            *entry;             /* An entry in the input array */
2921
0
  uint32_t     num_subpats;   /* Number of captured subpatterns */
2922
0
  int        count;       /* Count of matched subpatterns */
2923
0
  uint32_t     options;     /* Execution options */
2924
0
  zend_string   *string_key;
2925
0
  zend_ulong     num_key;
2926
0
  bool     invert;      /* Whether to return non-matching
2927
                       entries */
2928
0
  bool old_mdata_used;
2929
0
  pcre2_match_data *match_data;
2930
0
  invert = flags & PREG_GREP_INVERT ? 1 : 0;
2931
2932
  /* Calculate the size of the offsets array, and allocate memory for it. */
2933
0
  num_subpats = pce->capture_count + 1;
2934
2935
  /* Initialize return array */
2936
0
  array_init(return_value);
2937
0
  HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2938
2939
0
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2940
2941
0
  old_mdata_used = mdata_used;
2942
0
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2943
0
    mdata_used = true;
2944
0
    match_data = mdata;
2945
0
  } else {
2946
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2947
0
    if (!match_data) {
2948
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2949
0
      return;
2950
0
    }
2951
0
  }
2952
2953
0
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2954
2955
  /* Go through the input array */
2956
0
  ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2957
0
    zend_string *tmp_subject_str;
2958
0
    zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2959
2960
    /* Perform the match */
2961
#ifdef HAVE_PCRE_JIT_SUPPORT
2962
    if ((pce->preg_options & PREG_JIT) && options) {
2963
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2964
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2965
    } else
2966
#endif
2967
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2968
0
        options, match_data, mctx);
2969
2970
    /* If the entry fits our requirements */
2971
0
    if (count >= 0) {
2972
      /* Check for too many substrings condition. */
2973
0
      if (UNEXPECTED(count == 0)) {
2974
0
        php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2975
0
      }
2976
0
      if (!invert) {
2977
0
        Z_TRY_ADDREF_P(entry);
2978
2979
        /* Add to return array */
2980
0
        if (string_key) {
2981
0
          zend_hash_update(return_value_ht, string_key, entry);
2982
0
        } else {
2983
0
          zend_hash_index_update(return_value_ht, num_key, entry);
2984
0
        }
2985
0
      }
2986
0
    } else if (count == PCRE2_ERROR_NOMATCH) {
2987
0
      if (invert) {
2988
0
        Z_TRY_ADDREF_P(entry);
2989
2990
        /* Add to return array */
2991
0
        if (string_key) {
2992
0
          zend_hash_update(return_value_ht, string_key, entry);
2993
0
        } else {
2994
0
          zend_hash_index_update(return_value_ht, num_key, entry);
2995
0
        }
2996
0
      }
2997
0
    } else {
2998
0
      pcre_handle_exec_error(count);
2999
0
      zend_tmp_string_release(tmp_subject_str);
3000
0
      break;
3001
0
    }
3002
3003
0
    zend_tmp_string_release(tmp_subject_str);
3004
0
  } ZEND_HASH_FOREACH_END();
3005
0
  if (match_data != mdata) {
3006
0
    pcre2_match_data_free(match_data);
3007
0
  }
3008
3009
0
  mdata_used = old_mdata_used;
3010
3011
0
  if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
3012
0
    zend_array_destroy(Z_ARR_P(return_value));
3013
0
    RETURN_FALSE;
3014
0
  }
3015
0
}
3016
/* }}} */
3017
3018
/* {{{ Returns the error code of the last regexp execution. */
3019
PHP_FUNCTION(preg_last_error)
3020
0
{
3021
0
  ZEND_PARSE_PARAMETERS_NONE();
3022
3023
0
  RETURN_LONG(PCRE_G(error_code));
3024
0
}
3025
/* }}} */
3026
3027
/* {{{ Returns the error message of the last regexp execution. */
3028
PHP_FUNCTION(preg_last_error_msg)
3029
0
{
3030
0
  ZEND_PARSE_PARAMETERS_NONE();
3031
3032
0
  RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
3033
0
}
3034
/* }}} */
3035
3036
/* {{{ module definition structures */
3037
3038
zend_module_entry pcre_module_entry = {
3039
  STANDARD_MODULE_HEADER,
3040
  "pcre",
3041
  ext_functions,
3042
  PHP_MINIT(pcre),
3043
  PHP_MSHUTDOWN(pcre),
3044
  PHP_RINIT(pcre),
3045
  PHP_RSHUTDOWN(pcre),
3046
  PHP_MINFO(pcre),
3047
  PHP_PCRE_VERSION,
3048
  PHP_MODULE_GLOBALS(pcre),
3049
  PHP_GINIT(pcre),
3050
  PHP_GSHUTDOWN(pcre),
3051
  NULL,
3052
  STANDARD_MODULE_PROPERTIES_EX
3053
};
3054
3055
#ifdef COMPILE_DL_PCRE
3056
ZEND_GET_MODULE(pcre)
3057
#endif
3058
3059
/* }}} */
3060
3061
PHPAPI pcre2_match_context *php_pcre_mctx(void)
3062
3
{/*{{{*/
3063
3
  return mctx;
3064
3
}/*}}}*/
3065
3066
PHPAPI pcre2_general_context *php_pcre_gctx(void)
3067
0
{/*{{{*/
3068
0
  return gctx;
3069
0
}/*}}}*/
3070
3071
PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3072
0
{/*{{{*/
3073
0
  return cctx;
3074
0
}/*}}}*/
3075
3076
PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3077
0
{/*{{{*/
3078
0
  assert(NULL != pce);
3079
0
  pce->refcount++;
3080
0
}/*}}}*/
3081
3082
PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3083
0
{/*{{{*/
3084
0
  assert(NULL != pce);
3085
0
  assert(0 != pce->refcount);
3086
0
  pce->refcount--;
3087
0
}/*}}}*/
3088
3089
PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3090
0
{/*{{{*/
3091
0
  assert(NULL != pce);
3092
0
  return pce->re;
3093
0
}/*}}}*/