Coverage Report

Created: 2025-07-23 06:33

/src/php-src/ext/pcre/php_pcre.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
   +----------------------------------------------------------------------+
3
   | Copyright (c) The PHP Group                                          |
4
   +----------------------------------------------------------------------+
5
   | This source file is subject to version 3.01 of the PHP license,      |
6
   | that is bundled with this package in the file LICENSE, and is        |
7
   | available through the world-wide-web at the following url:           |
8
   | https://www.php.net/license/3_01.txt                                 |
9
   | If you did not receive a copy of the PHP license and are unable to   |
10
   | obtain it through the world-wide-web, please send a note to          |
11
   | license@php.net so we can mail you a copy immediately.               |
12
   +----------------------------------------------------------------------+
13
   | Author: Andrei Zmievski <andrei@php.net>                             |
14
   +----------------------------------------------------------------------+
15
 */
16
17
#include "php.h"
18
#include "php_ini.h"
19
#include "php_pcre.h"
20
#include "ext/standard/info.h"
21
#include "ext/standard/basic_functions.h"
22
#include "zend_smart_str.h"
23
#include "SAPI.h"
24
25
0
#define PREG_PATTERN_ORDER      1
26
0
#define PREG_SET_ORDER        2
27
196
#define PREG_OFFSET_CAPTURE     (1<<8)
28
196
#define PREG_UNMATCHED_AS_NULL    (1<<9)
29
30
0
#define PREG_SPLIT_NO_EMPTY     (1<<0)
31
0
#define PREG_SPLIT_DELIM_CAPTURE  (1<<1)
32
0
#define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
33
34
0
#define PREG_GREP_INVERT      (1<<0)
35
36
#define PREG_JIT                    (1<<3)
37
38
1.06k
#define PCRE_CACHE_SIZE 4096
39
40
#ifdef HAVE_PCRE_JIT_SUPPORT
41
#define PHP_PCRE_JIT_SUPPORT 1
42
#else
43
#define PHP_PCRE_JIT_SUPPORT 0
44
#endif
45
46
char *php_pcre_version;
47
48
#include "php_pcre_arginfo.h"
49
50
struct _pcre_cache_entry {
51
  pcre2_code *re;
52
  /* Pointer is not NULL (during request) when there are named captures.
53
   * Length is equal to capture_count + 1 to account for capture group 0.
54
   * This table cache is only valid during request.
55
   * Trying to store this over multiple requests causes issues when the keys are exposed in user arrays
56
   * (see GH-17122 and GH-17132). */
57
  zend_string **subpats_table;
58
  uint32_t preg_options;
59
  uint32_t name_count;
60
  uint32_t capture_count;
61
  uint32_t compile_options;
62
  uint32_t refcount;
63
};
64
65
PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
66
67
#ifdef HAVE_PCRE_JIT_SUPPORT
68
#define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
69
#define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
70
ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
71
#endif
72
/* General context using (infallible) system allocator. */
73
ZEND_TLS pcre2_general_context *gctx = NULL;
74
/* These two are global per thread for now. Though it is possible to use these
75
  per pattern. Either one can copy it and use in pce, or one does no global
76
  contexts at all, but creates for every pce. */
77
ZEND_TLS pcre2_compile_context *cctx = NULL;
78
ZEND_TLS pcre2_match_context   *mctx = NULL;
79
ZEND_TLS pcre2_match_data      *mdata = NULL;
80
ZEND_TLS bool              mdata_used = 0;
81
ZEND_TLS uint8_t pcre2_init_ok = 0;
82
#if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
83
static MUTEX_T pcre_mt = NULL;
84
#define php_pcre_mutex_alloc() \
85
  if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
86
#define php_pcre_mutex_free() \
87
  if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
88
#define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
89
#define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
90
#else
91
#define php_pcre_mutex_alloc()
92
#define php_pcre_mutex_free()
93
#define php_pcre_mutex_lock()
94
#define php_pcre_mutex_unlock()
95
#endif
96
97
ZEND_TLS HashTable char_tables;
98
99
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats);
100
101
static void php_pcre_free_char_table(zval *data)
102
0
{/*{{{*/
103
0
  void *ptr = Z_PTR_P(data);
104
0
  pefree(ptr, 1);
105
0
}/*}}}*/
106
107
static void pcre_handle_exec_error(int pcre_code) /* {{{ */
108
1.33k
{
109
1.33k
  int preg_code = 0;
110
111
1.33k
  switch (pcre_code) {
112
13
    case PCRE2_ERROR_MATCHLIMIT:
113
13
      preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
114
13
      break;
115
116
0
    case PCRE2_ERROR_RECURSIONLIMIT:
117
0
      preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
118
0
      break;
119
120
0
    case PCRE2_ERROR_BADUTFOFFSET:
121
0
      preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
122
0
      break;
123
124
#ifdef HAVE_PCRE_JIT_SUPPORT
125
    case PCRE2_ERROR_JIT_STACKLIMIT:
126
      preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
127
      break;
128
#endif
129
130
1.32k
    default:
131
1.32k
      if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
132
129
        preg_code = PHP_PCRE_BAD_UTF8_ERROR;
133
1.19k
      } else  {
134
1.19k
        preg_code = PHP_PCRE_INTERNAL_ERROR;
135
1.19k
      }
136
1.32k
      break;
137
1.33k
  }
138
139
1.33k
  PCRE_G(error_code) = preg_code;
140
1.33k
}
141
/* }}} */
142
143
static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
144
0
{
145
0
  switch (error_code) {
146
0
    case PHP_PCRE_NO_ERROR:
147
0
      return "No error";
148
0
    case PHP_PCRE_INTERNAL_ERROR:
149
0
      return "Internal error";
150
0
    case PHP_PCRE_BAD_UTF8_ERROR:
151
0
      return "Malformed UTF-8 characters, possibly incorrectly encoded";
152
0
    case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
153
0
      return "The offset did not correspond to the beginning of a valid UTF-8 code point";
154
0
    case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
155
0
      return "Backtrack limit exhausted";
156
0
    case PHP_PCRE_RECURSION_LIMIT_ERROR:
157
0
      return "Recursion limit exhausted";
158
159
#ifdef HAVE_PCRE_JIT_SUPPORT
160
    case PHP_PCRE_JIT_STACKLIMIT_ERROR:
161
      return "JIT stack limit exhausted";
162
#endif
163
164
0
    default:
165
0
      return "Unknown error";
166
0
  }
167
0
}
168
/* }}} */
169
170
static void php_free_pcre_cache(zval *data) /* {{{ */
171
0
{
172
0
  pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
173
0
  if (!pce) return;
174
0
  if (pce->subpats_table) {
175
0
    free_subpats_table(pce->subpats_table, pce->capture_count + 1);
176
0
  }
177
0
  pcre2_code_free(pce->re);
178
0
  free(pce);
179
0
}
180
/* }}} */
181
182
static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
183
4.36k
{
184
4.36k
  return pemalloc(size, 1);
185
4.36k
}
186
187
static void php_pcre_free(void *block, void *data)
188
3.23k
{
189
3.23k
  pefree(block, 1);
190
3.23k
}
191
192
static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
193
268k
{
194
268k
  return emalloc(size);
195
268k
}
196
197
static void php_pcre_efree(void *block, void *data)
198
268k
{
199
268k
  efree(block);
200
268k
}
201
202
3.27k
#define PHP_PCRE_PREALLOC_MDATA_SIZE 32
203
204
static void php_pcre_init_pcre2(uint8_t jit)
205
16
{/*{{{*/
206
16
  if (!gctx) {
207
16
    gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
208
16
    if (!gctx) {
209
0
      pcre2_init_ok = 0;
210
0
      return;
211
0
    }
212
16
  }
213
214
16
  if (!cctx) {
215
16
    cctx = pcre2_compile_context_create(gctx);
216
16
    if (!cctx) {
217
0
      pcre2_init_ok = 0;
218
0
      return;
219
0
    }
220
16
  }
221
222
16
  if (!mctx) {
223
16
    mctx = pcre2_match_context_create(gctx);
224
16
    if (!mctx) {
225
0
      pcre2_init_ok = 0;
226
0
      return;
227
0
    }
228
16
  }
229
230
#ifdef HAVE_PCRE_JIT_SUPPORT
231
  if (jit && !jit_stack) {
232
    jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
233
    if (!jit_stack) {
234
      pcre2_init_ok = 0;
235
      return;
236
    }
237
  }
238
#endif
239
240
16
  if (!mdata) {
241
16
    mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
242
16
    if (!mdata) {
243
0
      pcre2_init_ok = 0;
244
0
      return;
245
0
    }
246
16
  }
247
248
16
  pcre2_init_ok = 1;
249
16
}/*}}}*/
250
251
static void php_pcre_shutdown_pcre2(void)
252
0
{/*{{{*/
253
0
  if (gctx) {
254
0
    pcre2_general_context_free(gctx);
255
0
    gctx = NULL;
256
0
  }
257
258
0
  if (cctx) {
259
0
    pcre2_compile_context_free(cctx);
260
0
    cctx = NULL;
261
0
  }
262
263
0
  if (mctx) {
264
0
    pcre2_match_context_free(mctx);
265
0
    mctx = NULL;
266
0
  }
267
268
#ifdef HAVE_PCRE_JIT_SUPPORT
269
  /* Stack may only be destroyed when no cached patterns
270
    possibly associated with it do exist. */
271
  if (jit_stack) {
272
    pcre2_jit_stack_free(jit_stack);
273
    jit_stack = NULL;
274
  }
275
#endif
276
277
0
  if (mdata) {
278
0
    pcre2_match_data_free(mdata);
279
0
    mdata = NULL;
280
0
  }
281
282
0
  pcre2_init_ok = 0;
283
0
}/*}}}*/
284
285
static PHP_GINIT_FUNCTION(pcre) /* {{{ */
286
16
{
287
16
  php_pcre_mutex_alloc();
288
289
16
  zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
290
291
16
  pcre_globals->backtrack_limit = 0;
292
16
  pcre_globals->recursion_limit = 0;
293
16
  pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
294
16
  ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
295
16
  ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
296
#ifdef HAVE_PCRE_JIT_SUPPORT
297
  pcre_globals->jit = 1;
298
#endif
299
300
16
  php_pcre_init_pcre2(1);
301
16
  zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
302
16
}
303
/* }}} */
304
305
static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
306
0
{
307
0
  zend_hash_destroy(&pcre_globals->pcre_cache);
308
309
0
  php_pcre_shutdown_pcre2();
310
0
  zend_hash_destroy(&char_tables);
311
0
  php_pcre_mutex_free();
312
0
}
313
/* }}} */
314
315
static PHP_INI_MH(OnUpdateBacktrackLimit)
316
16
{/*{{{*/
317
16
  OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
318
16
  if (mctx) {
319
16
    pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
320
16
  }
321
322
16
  return SUCCESS;
323
16
}/*}}}*/
324
325
static PHP_INI_MH(OnUpdateRecursionLimit)
326
16
{/*{{{*/
327
16
  OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
328
16
  if (mctx) {
329
16
    pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
330
16
  }
331
332
16
  return SUCCESS;
333
16
}/*}}}*/
334
335
#ifdef HAVE_PCRE_JIT_SUPPORT
336
static PHP_INI_MH(OnUpdateJit)
337
{/*{{{*/
338
  OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
339
  if (PCRE_G(jit) && jit_stack) {
340
    pcre2_jit_stack_assign(mctx, NULL, jit_stack);
341
  } else {
342
    pcre2_jit_stack_assign(mctx, NULL, NULL);
343
  }
344
345
  return SUCCESS;
346
}/*}}}*/
347
#endif
348
349
PHP_INI_BEGIN()
350
  STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
351
  STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
352
#ifdef HAVE_PCRE_JIT_SUPPORT
353
  STD_PHP_INI_BOOLEAN("pcre.jit",           "1",       PHP_INI_ALL, OnUpdateJit,            jit,             zend_pcre_globals, pcre_globals)
354
#endif
355
PHP_INI_END()
356
357
static char *_pcre2_config_str(uint32_t what)
358
38
{/*{{{*/
359
38
  int len = pcre2_config(what, NULL);
360
38
  char *ret = (char *) malloc(len + 1);
361
362
38
  len = pcre2_config(what, ret);
363
38
  if (!len) {
364
0
    free(ret);
365
0
    return NULL;
366
0
  }
367
368
38
  return ret;
369
38
}/*}}}*/
370
371
/* {{{ PHP_MINFO_FUNCTION(pcre) */
372
static PHP_MINFO_FUNCTION(pcre)
373
11
{
374
#ifdef HAVE_PCRE_JIT_SUPPORT
375
  uint32_t flag = 0;
376
  char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
377
#endif
378
11
  char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
379
11
  char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
380
381
11
  php_info_print_table_start();
382
11
  php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
383
11
  php_info_print_table_row(2, "PCRE Library Version", version);
384
11
  free(version);
385
11
  php_info_print_table_row(2, "PCRE Unicode Version", unicode);
386
11
  free(unicode);
387
388
#ifdef HAVE_PCRE_JIT_SUPPORT
389
  if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
390
    php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
391
  } else {
392
    php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
393
  }
394
  if (jit_target) {
395
    php_info_print_table_row(2, "PCRE JIT Target", jit_target);
396
  }
397
  free(jit_target);
398
#else
399
11
  php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
400
11
#endif
401
402
#ifdef HAVE_PCRE_VALGRIND_SUPPORT
403
  php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
404
#endif
405
406
11
  php_info_print_table_end();
407
408
11
  DISPLAY_INI_ENTRIES();
409
11
}
410
/* }}} */
411
412
/* {{{ PHP_MINIT_FUNCTION(pcre) */
413
static PHP_MINIT_FUNCTION(pcre)
414
16
{
415
#ifdef HAVE_PCRE_JIT_SUPPORT
416
  if (UNEXPECTED(!pcre2_init_ok)) {
417
    /* Retry. */
418
    php_pcre_init_pcre2(PCRE_G(jit));
419
    if (!pcre2_init_ok) {
420
      return FAILURE;
421
    }
422
  }
423
#endif
424
425
16
  REGISTER_INI_ENTRIES();
426
427
16
  php_pcre_version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
428
429
16
  register_php_pcre_symbols(module_number);
430
431
16
  return SUCCESS;
432
16
}
433
/* }}} */
434
435
/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
436
static PHP_MSHUTDOWN_FUNCTION(pcre)
437
0
{
438
0
  UNREGISTER_INI_ENTRIES();
439
440
0
  free(php_pcre_version);
441
442
0
  return SUCCESS;
443
0
}
444
/* }}} */
445
446
/* {{{ PHP_RINIT_FUNCTION(pcre) */
447
static PHP_RINIT_FUNCTION(pcre)
448
268k
{
449
#ifdef HAVE_PCRE_JIT_SUPPORT
450
  if (UNEXPECTED(!pcre2_init_ok)) {
451
    /* Retry. */
452
    php_pcre_mutex_lock();
453
    php_pcre_init_pcre2(PCRE_G(jit));
454
    if (!pcre2_init_ok) {
455
      php_pcre_mutex_unlock();
456
      return FAILURE;
457
    }
458
    php_pcre_mutex_unlock();
459
  }
460
461
  mdata_used = 0;
462
#endif
463
464
268k
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
465
268k
  PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
466
268k
  if (!PCRE_G(gctx_zmm)) {
467
0
    return FAILURE;
468
0
  }
469
470
268k
  return SUCCESS;
471
268k
}
472
/* }}} */
473
474
static PHP_RSHUTDOWN_FUNCTION(pcre)
475
268k
{
476
268k
  pcre_cache_entry *pce;
477
49.8M
  ZEND_HASH_MAP_FOREACH_PTR(&PCRE_G(pcre_cache), pce) {
478
49.8M
    if (pce->subpats_table) {
479
0
      free_subpats_table(pce->subpats_table, pce->capture_count + 1);
480
0
      pce->subpats_table = NULL;
481
0
    }
482
49.8M
  } ZEND_HASH_FOREACH_END();
483
484
268k
  pcre2_general_context_free(PCRE_G(gctx_zmm));
485
268k
  PCRE_G(gctx_zmm) = NULL;
486
487
268k
  zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
488
268k
  zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
489
268k
  ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
490
268k
  ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
491
268k
  return SUCCESS;
492
268k
}
493
494
/* {{{ static pcre_clean_cache */
495
static int pcre_clean_cache(zval *data, void *arg)
496
0
{
497
0
  pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
498
0
  int *num_clean = (int *)arg;
499
500
0
  if (!pce->refcount) {
501
0
    if (--(*num_clean) == 0) {
502
0
      return ZEND_HASH_APPLY_REMOVE|ZEND_HASH_APPLY_STOP;
503
0
    }
504
0
    return ZEND_HASH_APPLY_REMOVE;
505
0
  } else {
506
0
    return ZEND_HASH_APPLY_KEEP;
507
0
  }
508
0
}
509
/* }}} */
510
511
0
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
512
0
  uint32_t i;
513
0
  for (i = 0; i < num_subpats; i++) {
514
0
    if (subpat_names[i]) {
515
0
      zend_string_release_ex(subpat_names[i], false);
516
0
    }
517
0
  }
518
0
  efree(subpat_names);
519
0
}
520
521
/* {{{ static make_subpats_table */
522
static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce)
523
0
{
524
0
  uint32_t num_subpats = pce->capture_count + 1;
525
0
  uint32_t name_size, ni = 0;
526
0
  char *name_table;
527
0
  zend_string **subpat_names;
528
0
  int rc1, rc2;
529
530
0
  rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
531
0
  rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
532
0
  if (rc1 < 0 || rc2 < 0) {
533
0
    php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
534
0
    return NULL;
535
0
  }
536
537
0
  subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
538
0
  while (ni++ < name_cnt) {
539
0
    unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
540
0
    const char *name = name_table + 2;
541
0
    subpat_names[name_idx] = zend_string_init(name, strlen(name), false);
542
0
    name_table += name_size;
543
0
  }
544
0
  return subpat_names;
545
0
}
546
/* }}} */
547
548
static zend_string **ensure_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce)
549
0
{
550
0
  if (!pce->subpats_table) {
551
0
    pce->subpats_table = make_subpats_table(name_cnt, pce);
552
0
  }
553
0
  return pce->subpats_table;
554
0
}
555
556
/* {{{ static calculate_unit_length */
557
/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
558
static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
559
69.5k
{
560
69.5k
  size_t unit_len;
561
562
69.5k
  if (pce->compile_options & PCRE2_UTF) {
563
6
    const char *end = start;
564
565
    /* skip continuation bytes */
566
6
    while ((*++end & 0xC0) == 0x80);
567
6
    unit_len = end - start;
568
69.5k
  } else {
569
69.5k
    unit_len = 1;
570
69.5k
  }
571
69.5k
  return unit_len;
572
69.5k
}
573
/* }}} */
574
575
/* {{{ pcre_get_compiled_regex_cache */
576
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bool locale_aware)
577
4.57k
{
578
4.57k
  pcre2_code      *re = NULL;
579
#if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !defined(HAVE_BUNDLED_PCRE)
580
  uint32_t       coptions = PCRE2_NO_START_OPTIMIZE;
581
#else
582
4.57k
  uint32_t       coptions = 0;
583
4.57k
#endif
584
4.57k
  uint32_t       eoptions = 0;
585
4.57k
  PCRE2_UCHAR           error[128];
586
4.57k
  PCRE2_SIZE           erroffset;
587
4.57k
  int                  errnumber;
588
4.57k
  char         delimiter;
589
4.57k
  char         start_delimiter;
590
4.57k
  char         end_delimiter;
591
4.57k
  char        *p, *pp;
592
4.57k
  char        *pattern;
593
4.57k
  size_t         pattern_len;
594
4.57k
  uint32_t       poptions = 0;
595
4.57k
  const uint8_t       *tables = NULL;
596
4.57k
  zval                *zv;
597
4.57k
  pcre_cache_entry   new_entry;
598
4.57k
  int          rc;
599
4.57k
  zend_string     *key;
600
4.57k
  pcre_cache_entry  *ret;
601
602
4.57k
  if (locale_aware && BG(ctype_string)) {
603
0
    key = zend_string_concat2(
604
0
      ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
605
0
      ZSTR_VAL(regex), ZSTR_LEN(regex));
606
4.57k
  } else {
607
4.57k
    key = regex;
608
4.57k
  }
609
610
  /* Try to lookup the cached regex entry, and if successful, just pass
611
     back the compiled pattern, otherwise go on and compile it. */
612
4.57k
  zv = zend_hash_find(&PCRE_G(pcre_cache), key);
613
4.57k
  if (zv) {
614
2.32k
    if (key != regex) {
615
0
      zend_string_release_ex(key, 0);
616
0
    }
617
2.32k
    return (pcre_cache_entry*)Z_PTR_P(zv);
618
2.32k
  }
619
620
2.24k
  p = ZSTR_VAL(regex);
621
2.24k
  const char* end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex);
622
623
  /* Parse through the leading whitespace, and display a warning if we
624
     get to the end without encountering a delimiter. */
625
2.24k
  while (isspace((int)*(unsigned char *)p)) p++;
626
2.24k
  if (p >= end_p) {
627
0
    if (key != regex) {
628
0
      zend_string_release_ex(key, 0);
629
0
    }
630
0
    php_error_docref(NULL, E_WARNING, "Empty regular expression");
631
0
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
632
0
    return NULL;
633
0
  }
634
635
  /* Get the delimiter and display a warning if it is alphanumeric
636
     or a backslash. */
637
2.24k
  delimiter = *p++;
638
2.24k
  if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\' || delimiter == '\0') {
639
14
    if (key != regex) {
640
0
      zend_string_release_ex(key, 0);
641
0
    }
642
14
    php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash, or NUL byte");
643
14
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
644
14
    return NULL;
645
14
  }
646
647
2.23k
  start_delimiter = delimiter;
648
2.23k
  if ((pp = strchr("([{< )]}> )]}>", delimiter)))
649
57
    delimiter = pp[5];
650
2.23k
  end_delimiter = delimiter;
651
652
2.23k
  pp = p;
653
654
2.23k
  if (start_delimiter == end_delimiter) {
655
    /* We need to iterate through the pattern, searching for the ending delimiter,
656
       but skipping the backslashed delimiters.  If the ending delimiter is not
657
       found, display a warning. */
658
1.67M
    while (pp < end_p) {
659
1.67M
      if (*pp == '\\' && pp + 1 < end_p) pp++;
660
1.60M
      else if (*pp == delimiter)
661
2.15k
        break;
662
1.67M
      pp++;
663
1.67M
    }
664
2.17k
  } else {
665
    /* We iterate through the pattern, searching for the matching ending
666
     * delimiter. For each matching starting delimiter, we increment nesting
667
     * level, and decrement it for each matching ending delimiter. If we
668
     * reach the end of the pattern without matching, display a warning.
669
     */
670
51
    int brackets = 1;   /* brackets nesting level */
671
33.6k
    while (pp < end_p) {
672
33.5k
      if (*pp == '\\' && pp + 1 < end_p) pp++;
673
33.0k
      else if (*pp == end_delimiter && --brackets <= 0)
674
2
        break;
675
33.0k
      else if (*pp == start_delimiter)
676
1.93k
        brackets++;
677
33.5k
      pp++;
678
33.5k
    }
679
51
  }
680
681
2.23k
  if (pp >= end_p) {
682
75
    if (key != regex) {
683
0
      zend_string_release_ex(key, 0);
684
0
    }
685
75
    if (start_delimiter == end_delimiter) {
686
26
      php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
687
49
    } else {
688
49
      php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
689
49
    }
690
75
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
691
75
    return NULL;
692
75
  }
693
694
  /* Make a copy of the actual pattern. */
695
2.15k
  pattern_len = pp - p;
696
2.15k
  pattern = estrndup(p, pattern_len);
697
698
  /* Move on to the options */
699
2.15k
  pp++;
700
701
  /* Parse through the options, setting appropriate flags.  Display
702
     a warning if we encounter an unknown modifier. */
703
4.62k
  while (pp < end_p) {
704
2.58k
    switch (*pp++) {
705
      /* Perl compatible options */
706
924
      case 'i': coptions |= PCRE2_CASELESS;   break;
707
99
      case 'm': coptions |= PCRE2_MULTILINE;   break;
708
42
      case 'n': coptions |= PCRE2_NO_AUTO_CAPTURE; break;
709
159
      case 's': coptions |= PCRE2_DOTALL;   break;
710
0
      case 'x': coptions |= PCRE2_EXTENDED;   break;
711
712
      /* PCRE specific options */
713
336
      case 'A': coptions |= PCRE2_ANCHORED;   break;
714
99
      case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
715
0
#ifdef PCRE2_EXTRA_CASELESS_RESTRICT
716
6
      case 'r': eoptions |= PCRE2_EXTRA_CASELESS_RESTRICT; break;
717
0
#endif
718
1
      case 'S': /* Pass. */         break;
719
1
      case 'X': /* Pass. */         break;
720
195
      case 'U': coptions |= PCRE2_UNGREEDY;   break;
721
343
      case 'u': coptions |= PCRE2_UTF;
722
  /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
723
     characters, even in UTF-8 mode. However, this can be changed by setting
724
     the PCRE2_UCP option. */
725
343
#ifdef PCRE2_UCP
726
343
            coptions |= PCRE2_UCP;
727
343
#endif
728
343
        break;
729
0
      case 'J': coptions |= PCRE2_DUPNAMES;   break;
730
731
37
      case ' ':
732
259
      case '\n':
733
268
      case '\r':
734
268
        break;
735
736
5
      case 'e': /* legacy eval */
737
108
      default:
738
108
        if (pp[-1]) {
739
82
          php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]);
740
82
        } else {
741
26
          php_error_docref(NULL, E_WARNING, "NUL byte is not a valid modifier");
742
26
        }
743
108
        pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
744
108
        efree(pattern);
745
108
        if (key != regex) {
746
0
          zend_string_release_ex(key, 0);
747
0
        }
748
108
        return NULL;
749
2.58k
    }
750
2.58k
  }
751
752
2.04k
  if (key != regex) {
753
0
    tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(ctype_string));
754
0
    if (!tables) {
755
0
      zend_string *_k;
756
0
      tables = pcre2_maketables(gctx);
757
0
      if (UNEXPECTED(!tables)) {
758
0
        php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
759
0
        pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
760
0
        zend_string_release_ex(key, 0);
761
0
        efree(pattern);
762
0
        return NULL;
763
0
      }
764
0
      _k = zend_string_init(ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)), 1);
765
0
      GC_MAKE_PERSISTENT_LOCAL(_k);
766
0
      zend_hash_add_ptr(&char_tables, _k, (void *)tables);
767
0
      zend_string_release(_k);
768
0
    }
769
0
  }
770
2.04k
  pcre2_set_character_tables(cctx, tables);
771
772
2.04k
  pcre2_set_compile_extra_options(cctx, eoptions);
773
774
  /* Compile pattern and display a warning if compilation failed. */
775
2.04k
  re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
776
777
2.04k
  if (re == NULL) {
778
985
    if (key != regex) {
779
0
      zend_string_release_ex(key, 0);
780
0
    }
781
985
    pcre2_get_error_message(errnumber, error, sizeof(error));
782
985
    php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
783
985
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
784
985
    efree(pattern);
785
985
    return NULL;
786
985
  }
787
788
#ifdef HAVE_PCRE_JIT_SUPPORT
789
  if (PCRE_G(jit)) {
790
    /* Enable PCRE JIT compiler */
791
    rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
792
    if (EXPECTED(rc >= 0)) {
793
      size_t jit_size = 0;
794
      if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
795
        poptions |= PREG_JIT;
796
      }
797
    } else if (rc == PCRE2_ERROR_NOMEMORY) {
798
      php_error_docref(NULL, E_WARNING,
799
        "Allocation of JIT memory failed, PCRE JIT will be disabled. "
800
        "This is likely caused by security restrictions. "
801
        "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
802
      PCRE_G(jit) = 0;
803
    } else {
804
      pcre2_get_error_message(rc, error, sizeof(error));
805
      php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
806
      pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
807
    }
808
  }
809
#endif
810
1.06k
  efree(pattern);
811
812
  /*
813
   * If we reached cache limit, clean out the items from the head of the list;
814
   * these are supposedly the oldest ones (but not necessarily the least used
815
   * ones).
816
   */
817
1.06k
  if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
818
0
    int num_clean = PCRE_CACHE_SIZE / 8;
819
0
    zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
820
0
  }
821
822
  /* Store the compiled pattern and extra info in the cache. */
823
1.06k
  new_entry.re = re;
824
1.06k
  new_entry.preg_options = poptions;
825
1.06k
  new_entry.compile_options = coptions;
826
1.06k
  new_entry.refcount = 0;
827
1.06k
  new_entry.subpats_table = NULL;
828
829
1.06k
  rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
830
1.06k
  if (rc < 0) {
831
0
    if (key != regex) {
832
0
      zend_string_release_ex(key, 0);
833
0
    }
834
0
    php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
835
0
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
836
0
    return NULL;
837
0
  }
838
839
1.06k
  rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
840
1.06k
  if (rc < 0) {
841
0
    if (key != regex) {
842
0
      zend_string_release_ex(key, 0);
843
0
    }
844
0
    php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
845
0
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
846
0
    return NULL;
847
0
  }
848
849
  /*
850
   * Interned strings are not duplicated when stored in HashTable,
851
   * but all the interned strings created during HTTP request are removed
852
   * at end of request. However PCRE_G(pcre_cache) must be consistent
853
   * on the next request as well. So we disable usage of interned strings
854
   * as hash keys especually for this table.
855
   * See bug #63180
856
   */
857
1.06k
  if (!(GC_FLAGS(key) & IS_STR_PERMANENT)) {
858
354
    zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
859
354
    GC_MAKE_PERSISTENT_LOCAL(str);
860
861
354
    ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
862
354
    zend_string_release(str);
863
708
  } else {
864
708
    ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
865
708
  }
866
867
1.06k
  if (key != regex) {
868
0
    zend_string_release_ex(key, 0);
869
0
  }
870
871
1.06k
  return ret;
872
1.06k
}
873
/* }}} */
874
875
/* {{{ pcre_get_compiled_regex_cache */
876
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
877
4.57k
{
878
4.57k
  return pcre_get_compiled_regex_cache_ex(regex, true);
879
4.57k
}
880
/* }}} */
881
882
/* {{{ pcre_get_compiled_regex */
883
PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
884
0
{
885
0
  pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
886
887
0
  if (capture_count) {
888
0
    *capture_count = pce ? pce->capture_count : 0;
889
0
  }
890
891
0
  return pce ? pce->re : NULL;
892
0
}
893
/* }}} */
894
895
/* XXX For the cases where it's only about match yes/no and no capture
896
    required, perhaps just a minimum sized data would suffice. */
897
PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
898
0
{/*{{{*/
899
900
0
  assert(NULL != re);
901
902
0
  if (EXPECTED(!mdata_used)) {
903
0
    int rc = 0;
904
905
0
    if (!capture_count) {
906
      /* As we deal with a non cached pattern, no other way to gather this info. */
907
0
      rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
908
0
    }
909
910
0
    if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
911
0
      mdata_used = 1;
912
0
      return mdata;
913
0
    }
914
0
  }
915
916
0
  return pcre2_match_data_create_from_pattern(re, gctx);
917
0
}/*}}}*/
918
919
PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
920
0
{/*{{{*/
921
0
  if (UNEXPECTED(match_data != mdata)) {
922
0
    pcre2_match_data_free(match_data);
923
0
  } else {
924
0
    mdata_used = 0;
925
0
  }
926
0
}/*}}}*/
927
928
0
static void init_unmatched_null_pair(zval *pair) {
929
0
  zval val1, val2;
930
0
  ZVAL_NULL(&val1);
931
0
  ZVAL_LONG(&val2, -1);
932
0
  ZVAL_ARR(pair, zend_new_pair(&val1, &val2));
933
0
}
934
935
0
static void init_unmatched_empty_pair(zval *pair) {
936
0
  zval val1, val2;
937
0
  ZVAL_EMPTY_STRING(&val1);
938
0
  ZVAL_LONG(&val2, -1);
939
0
  ZVAL_ARR(pair, zend_new_pair(&val1, &val2));
940
0
}
941
942
static zend_always_inline void populate_match_value_str(
943
382
    zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
944
382
  ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
945
382
}
946
947
static zend_always_inline void populate_match_value(
948
    zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
949
382
    bool unmatched_as_null) {
950
382
  if (PCRE2_UNSET == start_offset) {
951
0
    if (unmatched_as_null) {
952
0
      ZVAL_NULL(val);
953
0
    } else {
954
0
      ZVAL_EMPTY_STRING(val);
955
0
    }
956
382
  } else {
957
382
    populate_match_value_str(val, subject, start_offset, end_offset);
958
382
  }
959
382
}
960
961
static inline void add_named(
962
0
    HashTable *const subpats, zend_string *name, zval *val, bool unmatched) {
963
0
  ZEND_ASSERT(!(GC_FLAGS(name) & IS_STR_PERSISTENT));
964
965
  /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
966
   * In this case we want to preserve the one that actually has a value. */
967
0
  if (!unmatched) {
968
0
    zend_hash_update(subpats, name, val);
969
0
  } else {
970
0
    if (!zend_hash_add(subpats, name, val)) {
971
0
      return;
972
0
    }
973
0
  }
974
0
  Z_TRY_ADDREF_P(val);
975
0
}
976
977
/* {{{ add_offset_pair */
978
static inline void add_offset_pair(
979
    HashTable *const result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
980
    zend_string *name, zend_long unmatched_as_null)
981
0
{
982
0
  zval match_pair;
983
984
  /* Add (match, offset) to the return value */
985
0
  if (PCRE2_UNSET == start_offset) {
986
0
    if (unmatched_as_null) {
987
0
      do {
988
0
        if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
989
0
          if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) {
990
0
            init_unmatched_null_pair(&match_pair);
991
0
            break;
992
0
          } else {
993
0
            init_unmatched_null_pair(&PCRE_G(unmatched_null_pair));
994
0
          }
995
0
        }
996
0
        ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
997
0
      } while (0);
998
0
    } else {
999
0
      do {
1000
0
        if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
1001
0
          if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) {
1002
0
            init_unmatched_empty_pair(&match_pair);
1003
0
            break;
1004
0
          } else {
1005
0
            init_unmatched_empty_pair(&PCRE_G(unmatched_empty_pair));
1006
0
          }
1007
0
        }
1008
0
        ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1009
0
      } while (0);
1010
0
    }
1011
0
  } else {
1012
0
    zval val1, val2;
1013
0
    populate_match_value_str(&val1, subject, start_offset, end_offset);
1014
0
    ZVAL_LONG(&val2, start_offset);
1015
0
    ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1016
0
  }
1017
1018
0
  if (name) {
1019
0
    add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1020
0
  }
1021
0
  zend_hash_next_index_insert_new(result, &match_pair);
1022
0
}
1023
/* }}} */
1024
1025
static void populate_subpat_array(
1026
    HashTable *subpats_ht, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1027
196
    uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1028
196
  zend_long offset_capture = flags & PREG_OFFSET_CAPTURE;
1029
196
  zend_long unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1030
196
  zval val;
1031
196
  int i;
1032
196
  if (subpat_names) {
1033
0
    if (offset_capture) {
1034
0
      for (i = 0; i < count; i++) {
1035
0
        add_offset_pair(
1036
0
          subpats_ht, subject, offsets[2*i], offsets[2*i+1],
1037
0
          subpat_names[i], unmatched_as_null);
1038
0
      }
1039
0
      if (unmatched_as_null) {
1040
0
        for (i = count; i < num_subpats; i++) {
1041
0
          add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1042
0
        }
1043
0
      }
1044
0
    } else {
1045
0
      for (i = 0; i < count; i++) {
1046
0
        populate_match_value(
1047
0
          &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1048
0
        if (subpat_names[i]) {
1049
0
          add_named(subpats_ht, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1050
0
        }
1051
0
        zend_hash_next_index_insert_new(subpats_ht, &val);
1052
0
      }
1053
0
      if (unmatched_as_null) {
1054
0
        for (i = count; i < num_subpats; i++) {
1055
0
          ZVAL_NULL(&val);
1056
0
          if (subpat_names[i]) {
1057
0
            zend_hash_add(subpats_ht, subpat_names[i], &val);
1058
0
          }
1059
0
          zend_hash_next_index_insert_new(subpats_ht, &val);
1060
0
        }
1061
0
      }
1062
0
    }
1063
196
  } else {
1064
196
    if (offset_capture) {
1065
0
      for (i = 0; i < count; i++) {
1066
0
        add_offset_pair(
1067
0
          subpats_ht, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1068
0
      }
1069
0
      if (unmatched_as_null) {
1070
0
        for (i = count; i < num_subpats; i++) {
1071
0
          add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1072
0
        }
1073
0
      }
1074
196
    } else {
1075
578
      for (i = 0; i < count; i++) {
1076
382
        populate_match_value(
1077
382
          &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1078
382
        zend_hash_next_index_insert_new(subpats_ht, &val);
1079
382
      }
1080
196
      if (unmatched_as_null) {
1081
0
        ZVAL_NULL(&val);
1082
0
        for (i = count; i < num_subpats; i++) {
1083
0
          zend_hash_next_index_insert_new(subpats_ht, &val);
1084
0
        }
1085
0
      }
1086
196
    }
1087
196
  }
1088
  /* Add MARK, if available */
1089
196
  if (mark) {
1090
0
    ZVAL_STRING(&val, (char *)mark);
1091
0
    zend_hash_str_add_new(subpats_ht, ZEND_STRL("MARK"), &val);
1092
0
  }
1093
196
}
1094
1095
static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, bool global) /* {{{ */
1096
4.09k
{
1097
  /* parameters */
1098
4.09k
  zend_string    *regex;      /* Regular expression */
1099
4.09k
  zend_string    *subject;      /* String to match against */
1100
4.09k
  pcre_cache_entry *pce;        /* Compiled regular expression */
1101
4.09k
  zval       *subpats = NULL; /* Array for subpatterns */
1102
4.09k
  zend_long     flags = 0;    /* Match control flags */
1103
4.09k
  zend_long     start_offset = 0; /* Where the new search starts */
1104
1105
12.2k
  ZEND_PARSE_PARAMETERS_START(2, 5)
1106
16.3k
    Z_PARAM_STR(regex)
1107
20.4k
    Z_PARAM_STR(subject)
1108
4.08k
    Z_PARAM_OPTIONAL
1109
8.17k
    Z_PARAM_ZVAL(subpats)
1110
8.17k
    Z_PARAM_LONG(flags)
1111
0
    Z_PARAM_LONG(start_offset)
1112
4.09k
  ZEND_PARSE_PARAMETERS_END();
1113
1114
  /* Compile regex or get it from cache. */
1115
4.08k
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1116
1.10k
    RETURN_FALSE;
1117
1.10k
  }
1118
1119
2.98k
  if (start_offset == ZEND_LONG_MIN) {
1120
0
    zend_argument_value_error(5, "must be greater than " ZEND_LONG_FMT, ZEND_LONG_MIN);
1121
0
    RETURN_THROWS();
1122
0
  }
1123
1124
2.98k
  pce->refcount++;
1125
2.98k
  php_pcre_match_impl(pce, subject, return_value, subpats,
1126
2.98k
    global, flags, start_offset);
1127
2.98k
  pce->refcount--;
1128
2.98k
}
1129
/* }}} */
1130
1131
static zend_always_inline bool is_known_valid_utf8(
1132
840
    zend_string *subject_str, PCRE2_SIZE start_offset) {
1133
840
  if (!ZSTR_IS_VALID_UTF8(subject_str)) {
1134
    /* We don't know whether the string is valid UTF-8 or not. */
1135
838
    return 0;
1136
838
  }
1137
1138
2
  if (start_offset == ZSTR_LEN(subject_str)) {
1139
    /* Degenerate case: Offset points to end of string. */
1140
2
    return 1;
1141
2
  }
1142
1143
  /* Check that the offset does not point to an UTF-8 continuation byte. */
1144
0
  return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1145
2
}
1146
1147
/* {{{ php_pcre_match_impl() */
1148
PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1149
  zval *subpats, bool global, zend_long flags, zend_off_t start_offset)
1150
2.98k
{
1151
2.98k
  zval       result_set;    /* Holds a set of subpatterns after
1152
                       a global match */
1153
2.98k
  HashTable    **match_sets = NULL; /* An array of sets of matches for each
1154
                       subpattern after a global match */
1155
2.98k
  uint32_t     options;     /* Execution options */
1156
2.98k
  int        count;       /* Count of matched subpatterns */
1157
2.98k
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1158
2.98k
  int        matched;     /* Has anything matched */
1159
2.98k
  zend_string    **subpat_names;    /* Array for named subpatterns */
1160
2.98k
  size_t       i;
1161
2.98k
  uint32_t     subpats_order;   /* Order of subpattern matches */
1162
2.98k
  uint32_t     offset_capture;  /* Capture match offsets: yes/no */
1163
2.98k
  zend_long    unmatched_as_null; /* Null non-matches: yes/no */
1164
2.98k
  PCRE2_SPTR       mark = NULL;   /* Target for MARK name */
1165
2.98k
  HashTable   *marks = NULL;    /* Array of marks for PREG_PATTERN_ORDER */
1166
2.98k
  pcre2_match_data *match_data;
1167
2.98k
  PCRE2_SIZE     start_offset2, orig_start_offset;
1168
1169
2.98k
  char *subject = ZSTR_VAL(subject_str);
1170
2.98k
  size_t subject_len = ZSTR_LEN(subject_str);
1171
1172
  /* Overwrite the passed-in value for subpatterns with an empty array. */
1173
2.98k
  if (subpats != NULL) {
1174
0
    subpats = zend_try_array_init(subpats);
1175
0
    if (!subpats) {
1176
0
      RETURN_THROWS();
1177
0
    }
1178
0
  }
1179
1180
2.98k
  subpats_order = global ? PREG_PATTERN_ORDER : 0;
1181
1182
2.98k
  if (flags) {
1183
0
    offset_capture = flags & PREG_OFFSET_CAPTURE;
1184
0
    unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1185
1186
    /*
1187
     * subpats_order is pre-set to pattern mode so we change it only if
1188
     * necessary.
1189
     */
1190
0
    if (flags & 0xff) {
1191
0
      subpats_order = flags & 0xff;
1192
0
      if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1193
0
        (!global && subpats_order != 0)) {
1194
0
        zend_argument_value_error(4, "must be a PREG_* constant");
1195
0
        RETURN_THROWS();
1196
0
      }
1197
0
    }
1198
2.98k
  } else {
1199
2.98k
    offset_capture = 0;
1200
2.98k
    unmatched_as_null = 0;
1201
2.98k
  }
1202
1203
  /* Negative offset counts from the end of the string. */
1204
2.98k
  if (start_offset < 0) {
1205
0
    if ((PCRE2_SIZE)-start_offset <= subject_len) {
1206
0
      start_offset2 = subject_len + start_offset;
1207
0
    } else {
1208
0
      start_offset2 = 0;
1209
0
    }
1210
2.98k
  } else {
1211
2.98k
    start_offset2 = (PCRE2_SIZE)start_offset;
1212
2.98k
  }
1213
1214
2.98k
  if (start_offset2 > subject_len) {
1215
0
    pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1216
0
    RETURN_FALSE;
1217
0
  }
1218
1219
  /* Calculate the size of the offsets array, and allocate memory for it. */
1220
2.98k
  num_subpats = pce->capture_count + 1;
1221
1222
  /*
1223
   * Build a mapping from subpattern numbers to their names. We will
1224
   * allocate the table only if there are any named subpatterns.
1225
   */
1226
2.98k
  subpat_names = NULL;
1227
2.98k
  if (subpats && pce->name_count > 0) {
1228
0
    subpat_names = ensure_subpats_table(pce->name_count, pce);
1229
0
    if (UNEXPECTED(!subpat_names)) {
1230
0
      RETURN_FALSE;
1231
0
    }
1232
0
  }
1233
1234
2.98k
  matched = 0;
1235
2.98k
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1236
1237
2.98k
  if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1238
2.95k
    match_data = mdata;
1239
2.95k
  } else {
1240
29
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1241
29
    if (!match_data) {
1242
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1243
0
      RETURN_FALSE;
1244
0
    }
1245
29
  }
1246
1247
  /* Allocate match sets array and initialize the values. */
1248
2.98k
  if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1249
0
    match_sets = safe_emalloc(num_subpats, sizeof(HashTable *), 0);
1250
0
    for (i=0; i<num_subpats; i++) {
1251
0
      match_sets[i] = zend_new_array(0);
1252
0
    }
1253
0
  }
1254
1255
  /* Array of subpattern offsets */
1256
2.98k
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1257
1258
2.98k
  orig_start_offset = start_offset2;
1259
2.98k
  options =
1260
2.98k
    (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1261
2.98k
      ? 0 : PCRE2_NO_UTF_CHECK;
1262
1263
  /* Execute the regular expression. */
1264
#ifdef HAVE_PCRE_JIT_SUPPORT
1265
  if ((pce->preg_options & PREG_JIT) && options) {
1266
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1267
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1268
  } else
1269
#endif
1270
2.98k
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1271
2.98k
      options, match_data, mctx);
1272
1273
2.98k
  while (1) {
1274
    /* If something has matched */
1275
2.98k
    if (count >= 0) {
1276
      /* Check for too many substrings condition. */
1277
200
      if (UNEXPECTED(count == 0)) {
1278
0
        php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1279
0
        count = num_subpats;
1280
0
      }
1281
1282
200
matched:
1283
200
      matched++;
1284
1285
      /* If subpatterns array has been passed, fill it in with values. */
1286
200
      if (subpats != NULL) {
1287
        /* Try to get the list of substrings and display a warning if failed. */
1288
0
        if (UNEXPECTED(offsets[1] < offsets[0])) {
1289
0
          if (match_sets) efree(match_sets);
1290
0
          php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1291
0
          RETURN_FALSE;
1292
0
        }
1293
1294
0
        if (global) { /* global pattern matching */
1295
0
          if (subpats_order == PREG_PATTERN_ORDER) {
1296
            /* For each subpattern, insert it into the appropriate array. */
1297
0
            if (offset_capture) {
1298
0
              for (i = 0; i < count; i++) {
1299
0
                add_offset_pair(
1300
0
                  match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1301
0
                  NULL, unmatched_as_null);
1302
0
              }
1303
0
            } else {
1304
0
              for (i = 0; i < count; i++) {
1305
0
                zval val;
1306
0
                populate_match_value(
1307
0
                  &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1308
0
                zend_hash_next_index_insert_new(match_sets[i], &val);
1309
0
              }
1310
0
            }
1311
0
            mark = pcre2_get_mark(match_data);
1312
            /* Add MARK, if available */
1313
0
            if (mark) {
1314
0
              if (!marks) {
1315
0
                marks = zend_new_array(0);
1316
0
              }
1317
0
              zval tmp;
1318
0
              ZVAL_STRING(&tmp, (char *) mark);
1319
0
              zend_hash_index_add_new(marks, matched - 1, &tmp);
1320
0
            }
1321
            /*
1322
             * If the number of captured subpatterns on this run is
1323
             * less than the total possible number, pad the result
1324
             * arrays with NULLs or empty strings.
1325
             */
1326
0
            if (count < num_subpats) {
1327
0
              for (int i = count; i < num_subpats; i++) {
1328
0
                if (offset_capture) {
1329
0
                  add_offset_pair(
1330
0
                    match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1331
0
                    NULL, unmatched_as_null);
1332
0
                } else if (unmatched_as_null) {
1333
0
                  zval tmp;
1334
0
                  ZVAL_NULL(&tmp);
1335
0
                  zend_hash_next_index_insert_new(match_sets[i], &tmp);
1336
0
                } else {
1337
0
                  zval tmp;
1338
0
                  ZVAL_EMPTY_STRING(&tmp);
1339
0
                  zend_hash_next_index_insert_new(match_sets[i], &tmp);
1340
0
                }
1341
0
              }
1342
0
            }
1343
0
          } else {
1344
            /* Allocate and populate the result set array */
1345
0
            mark = pcre2_get_mark(match_data);
1346
0
            array_init_size(&result_set, count + (mark ? 1 : 0));
1347
0
            populate_subpat_array(
1348
0
              Z_ARRVAL(result_set), subject, offsets, subpat_names,
1349
0
              num_subpats, count, mark, flags);
1350
            /* And add it to the output array */
1351
0
            zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &result_set);
1352
0
          }
1353
0
        } else {     /* single pattern matching */
1354
          /* For each subpattern, insert it into the subpatterns array. */
1355
0
          mark = pcre2_get_mark(match_data);
1356
0
          populate_subpat_array(
1357
0
            Z_ARRVAL_P(subpats), subject, offsets, subpat_names, num_subpats, count, mark, flags);
1358
0
          break;
1359
0
        }
1360
0
      }
1361
1362
      /* Advance to the next piece. */
1363
200
      start_offset2 = offsets[1];
1364
1365
      /* If we have matched an empty string, mimic what Perl's /g options does.
1366
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1367
         the match again at the same point. If this fails (picked up above) we
1368
         advance to the next character. */
1369
200
      if (start_offset2 == offsets[0]) {
1370
71
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1371
71
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1372
71
        if (count >= 0) {
1373
12
          if (global) {
1374
0
            goto matched;
1375
12
          } else {
1376
12
            break;
1377
12
          }
1378
59
        } else if (count == PCRE2_ERROR_NOMATCH) {
1379
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1380
             this is not necessarily the end. We need to advance
1381
             the start offset, and continue. Fudge the offset values
1382
             to achieve this, unless we're already at the end of the string. */
1383
59
          if (start_offset2 < subject_len) {
1384
47
            size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1385
1386
47
            start_offset2 += unit_len;
1387
47
          } else {
1388
12
            break;
1389
12
          }
1390
59
        } else {
1391
0
          goto error;
1392
0
        }
1393
71
      }
1394
2.78k
    } else if (count == PCRE2_ERROR_NOMATCH) {
1395
2.64k
      break;
1396
2.64k
    } else {
1397
144
error:
1398
144
      pcre_handle_exec_error(count);
1399
144
      break;
1400
144
    }
1401
1402
176
    if (!global) {
1403
176
      break;
1404
176
    }
1405
1406
    /* Execute the regular expression. */
1407
#ifdef HAVE_PCRE_JIT_SUPPORT
1408
    if ((pce->preg_options & PREG_JIT)) {
1409
      if (start_offset2 > subject_len) {
1410
        pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1411
        break;
1412
      }
1413
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1414
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1415
    } else
1416
#endif
1417
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1418
0
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1419
0
  }
1420
2.98k
  if (match_data != mdata) {
1421
29
    pcre2_match_data_free(match_data);
1422
29
  }
1423
1424
  /* Add the match sets to the output array and clean up */
1425
2.98k
  if (match_sets) {
1426
0
    if (subpat_names) {
1427
0
      for (i = 0; i < num_subpats; i++) {
1428
0
        zval wrapper;
1429
0
        ZVAL_ARR(&wrapper, match_sets[i]);
1430
0
        if (subpat_names[i]) {
1431
0
          zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper);
1432
0
          GC_ADDREF(match_sets[i]);
1433
0
        }
1434
0
        zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1435
0
      }
1436
0
    } else {
1437
0
      for (i = 0; i < num_subpats; i++) {
1438
0
        zval wrapper;
1439
0
        ZVAL_ARR(&wrapper, match_sets[i]);
1440
0
        zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1441
0
      }
1442
0
    }
1443
0
    efree(match_sets);
1444
1445
0
    if (marks) {
1446
0
      zval tmp;
1447
0
      ZVAL_ARR(&tmp, marks);
1448
0
      zend_hash_str_update(Z_ARRVAL_P(subpats), "MARK", sizeof("MARK") - 1, &tmp);
1449
0
    }
1450
0
  }
1451
1452
2.98k
  if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1453
    /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1454
2.84k
    if ((pce->compile_options & PCRE2_UTF)
1455
2.84k
        && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1456
190
      GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1457
190
    }
1458
1459
2.84k
    RETVAL_LONG(matched);
1460
2.84k
  } else {
1461
144
    RETVAL_FALSE;
1462
144
  }
1463
2.98k
}
1464
/* }}} */
1465
1466
/* {{{ Perform a Perl-style regular expression match */
1467
PHP_FUNCTION(preg_match)
1468
4.09k
{
1469
4.09k
  php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1470
4.09k
}
1471
/* }}} */
1472
1473
ZEND_FRAMELESS_FUNCTION(preg_match, 2)
1474
0
{
1475
0
  zval regex_tmp, subject_tmp;
1476
0
  zend_string *regex, *subject;
1477
1478
0
  Z_FLF_PARAM_STR(1, regex, regex_tmp);
1479
0
  Z_FLF_PARAM_STR(2, subject, subject_tmp);
1480
1481
  /* Compile regex or get it from cache. */
1482
0
  pcre_cache_entry *pce;
1483
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1484
0
    RETURN_FALSE;
1485
0
  }
1486
1487
0
  pce->refcount++;
1488
0
  php_pcre_match_impl(pce, subject, return_value, /* subpats */ NULL,
1489
    /* global */ false, /* flags */ 0, /* start_offset */ 0);
1490
0
  pce->refcount--;
1491
1492
0
flf_clean:
1493
0
  Z_FLF_PARAM_FREE_STR(1, regex_tmp);
1494
0
  Z_FLF_PARAM_FREE_STR(2, subject_tmp);
1495
0
}
1496
1497
/* {{{ Perform a Perl-style global regular expression match */
1498
PHP_FUNCTION(preg_match_all)
1499
0
{
1500
0
  php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1501
0
}
1502
/* }}} */
1503
1504
/* {{{ preg_get_backref */
1505
static int preg_get_backref(char **str, int *backref)
1506
576k
{
1507
576k
  char in_brace = 0;
1508
576k
  char *walk = *str;
1509
1510
576k
  if (walk[1] == 0)
1511
2.93k
    return 0;
1512
1513
573k
  if (*walk == '$' && walk[1] == '{') {
1514
32
    in_brace = 1;
1515
32
    walk++;
1516
32
  }
1517
573k
  walk++;
1518
1519
573k
  if (*walk >= '0' && *walk <= '9') {
1520
25.3k
    *backref = *walk - '0';
1521
25.3k
    walk++;
1522
25.3k
  } else
1523
547k
    return 0;
1524
1525
25.3k
  if (*walk && *walk >= '0' && *walk <= '9') {
1526
25.2k
    *backref = *backref * 10 + *walk - '0';
1527
25.2k
    walk++;
1528
25.2k
  }
1529
1530
25.3k
  if (in_brace) {
1531
0
    if (*walk != '}')
1532
0
      return 0;
1533
0
    else
1534
0
      walk++;
1535
0
  }
1536
1537
25.3k
  *str = walk;
1538
25.3k
  return 1;
1539
25.3k
}
1540
/* }}} */
1541
1542
/* Return NULL if an exception has occurred */
1543
static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1544
196
{
1545
196
  zend_string *result_str = NULL;
1546
196
  zval     retval;      /* Function return value */
1547
196
  zval       arg;       /* Argument to pass to function */
1548
1549
196
  array_init_size(&arg, count + (mark ? 1 : 0));
1550
196
  populate_subpat_array(Z_ARRVAL(arg), subject, offsets, subpat_names, num_subpats, count, mark, flags);
1551
1552
196
  fci->retval = &retval;
1553
196
  fci->param_count = 1;
1554
196
  fci->params = &arg;
1555
196
  zend_call_function(fci, fcc);
1556
196
  zval_ptr_dtor(&arg);
1557
196
  if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1558
28
    return Z_STR(retval);
1559
28
  }
1560
  /* No Exception has occurred */
1561
168
  else if (EXPECTED(Z_TYPE(retval) != IS_UNDEF)) {
1562
156
    result_str = zval_try_get_string_func(&retval);
1563
156
  }
1564
168
  zval_ptr_dtor(&retval);
1565
1566
168
  return result_str;
1567
196
}
1568
1569
/* {{{ php_pcre_replace */
1570
PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1571
                zend_string *subject_str,
1572
                const char *subject, size_t subject_len,
1573
                zend_string *replace_str,
1574
                size_t limit, size_t *replace_count)
1575
333
{
1576
333
  pcre_cache_entry  *pce;         /* Compiled regular expression */
1577
333
  zend_string     *result;      /* Function result */
1578
1579
  /* Abort on pending exception, e.g. thrown from __toString(). */
1580
333
  if (UNEXPECTED(EG(exception))) {
1581
0
    return NULL;
1582
0
  }
1583
1584
  /* Compile regex or get it from cache. */
1585
333
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1586
64
    return NULL;
1587
64
  }
1588
269
  pce->refcount++;
1589
269
  result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1590
269
    limit, replace_count);
1591
269
  pce->refcount--;
1592
1593
269
  return result;
1594
333
}
1595
/* }}} */
1596
1597
/* {{{ php_pcre_replace_impl() */
1598
PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1599
269
{
1600
269
  uint32_t     options;     /* Execution options */
1601
269
  int        count;       /* Count of matched subpatterns */
1602
269
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1603
269
  size_t       new_len;     /* Length of needed storage */
1604
269
  size_t       alloc_len;     /* Actual allocated length */
1605
269
  size_t       match_len;     /* Length of the current match */
1606
269
  int        backref;     /* Backreference number */
1607
269
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
1608
269
  size_t       last_end_offset; /* Where the last search ended */
1609
269
  char      *walkbuf,     /* Location of current replacement in the result */
1610
269
          *walk,        /* Used to walk the replacement string */
1611
269
           walk_last;     /* Last walked character */
1612
269
  const char    *match,       /* The current match */
1613
269
          *piece,       /* The current piece of subject */
1614
269
          *replace_end;   /* End of replacement string */
1615
269
  size_t      result_len;     /* Length of result */
1616
269
  zend_string   *result;      /* Result of replacement */
1617
269
  pcre2_match_data *match_data;
1618
1619
  /* Calculate the size of the offsets array, and allocate memory for it. */
1620
269
  num_subpats = pce->capture_count + 1;
1621
269
  alloc_len = 0;
1622
269
  result = NULL;
1623
1624
  /* Initialize */
1625
269
  match = NULL;
1626
269
  start_offset = 0;
1627
269
  last_end_offset = 0;
1628
269
  result_len = 0;
1629
269
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1630
1631
269
  if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1632
124
    match_data = mdata;
1633
145
  } else {
1634
145
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1635
145
    if (!match_data) {
1636
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1637
0
      return NULL;
1638
0
    }
1639
145
  }
1640
1641
269
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1642
1643
  /* Array of subpattern offsets */
1644
269
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1645
1646
  /* Execute the regular expression. */
1647
#ifdef HAVE_PCRE_JIT_SUPPORT
1648
  if ((pce->preg_options & PREG_JIT) && options) {
1649
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1650
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1651
  } else
1652
#endif
1653
269
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1654
269
      options, match_data, mctx);
1655
1656
71.8k
  while (1) {
1657
71.8k
    piece = subject + last_end_offset;
1658
1659
71.8k
    if (count >= 0 && limit > 0) {
1660
71.6k
      bool simple_string;
1661
1662
      /* Check for too many substrings condition. */
1663
71.6k
      if (UNEXPECTED(count == 0)) {
1664
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1665
0
        count = num_subpats;
1666
0
      }
1667
1668
71.8k
matched:
1669
71.8k
      if (UNEXPECTED(offsets[1] < offsets[0])) {
1670
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1671
0
        if (result) {
1672
0
          zend_string_release_ex(result, 0);
1673
0
          result = NULL;
1674
0
        }
1675
0
        break;
1676
0
      }
1677
1678
71.8k
      if (replace_count) {
1679
71.8k
        ++*replace_count;
1680
71.8k
      }
1681
1682
      /* Set the match location in subject */
1683
71.8k
      match = subject + offsets[0];
1684
1685
71.8k
      new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1686
1687
71.8k
      walk = ZSTR_VAL(replace_str);
1688
71.8k
      replace_end = walk + ZSTR_LEN(replace_str);
1689
71.8k
      walk_last = 0;
1690
71.8k
      simple_string = 1;
1691
19.6M
      while (walk < replace_end) {
1692
19.5M
        if ('\\' == *walk || '$' == *walk) {
1693
288k
          simple_string = 0;
1694
288k
          if (walk_last == '\\') {
1695
22
            walk++;
1696
22
            walk_last = 0;
1697
22
            continue;
1698
22
          }
1699
288k
          if (preg_get_backref(&walk, &backref)) {
1700
12.6k
            if (backref < count)
1701
1
              new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1702
12.6k
            continue;
1703
12.6k
          }
1704
288k
        }
1705
19.5M
        new_len++;
1706
19.5M
        walk++;
1707
19.5M
        walk_last = walk[-1];
1708
19.5M
      }
1709
1710
71.8k
      if (new_len >= alloc_len) {
1711
452
        alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1712
452
        if (result == NULL) {
1713
136
          result = zend_string_alloc(alloc_len, 0);
1714
316
        } else {
1715
316
          result = zend_string_extend(result, alloc_len, 0);
1716
316
        }
1717
452
      }
1718
1719
71.8k
      if (match-piece > 0) {
1720
        /* copy the part of the string before the match */
1721
71.3k
        memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1722
71.3k
        result_len += (match-piece);
1723
71.3k
      }
1724
1725
71.8k
      if (simple_string) {
1726
        /* copy replacement */
1727
54.2k
        memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1728
54.2k
        result_len += ZSTR_LEN(replace_str);
1729
54.2k
      } else {
1730
        /* copy replacement and backrefs */
1731
17.6k
        walkbuf = ZSTR_VAL(result) + result_len;
1732
1733
17.6k
        walk = ZSTR_VAL(replace_str);
1734
17.6k
        walk_last = 0;
1735
19.5M
        while (walk < replace_end) {
1736
19.5M
          if ('\\' == *walk || '$' == *walk) {
1737
288k
            if (walk_last == '\\') {
1738
22
              *(walkbuf-1) = *walk++;
1739
22
              walk_last = 0;
1740
22
              continue;
1741
22
            }
1742
288k
            if (preg_get_backref(&walk, &backref)) {
1743
12.6k
              if (backref < count) {
1744
1
                if (offsets[backref<<1] < SIZE_MAX) {
1745
1
                  match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1746
1
                  walkbuf = zend_mempcpy(walkbuf, subject + offsets[backref << 1], match_len);
1747
1
                }
1748
1
              }
1749
12.6k
              continue;
1750
12.6k
            }
1751
288k
          }
1752
19.5M
          *walkbuf++ = *walk++;
1753
19.5M
          walk_last = walk[-1];
1754
19.5M
        }
1755
17.6k
        *walkbuf = '\0';
1756
        /* increment the result length by how much we've added to the string */
1757
17.6k
        result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1758
17.6k
      }
1759
1760
71.8k
      limit--;
1761
1762
      /* Advance to the next piece. */
1763
71.8k
      start_offset = last_end_offset = offsets[1];
1764
1765
      /* If we have matched an empty string, mimic what Perl's /g options does.
1766
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1767
         the match again at the same point. If this fails (picked up above) we
1768
         advance to the next character. */
1769
71.8k
      if (start_offset == offsets[0]) {
1770
69.7k
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1771
69.7k
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1772
1773
69.7k
        piece = subject + start_offset;
1774
69.7k
        if (count >= 0 && limit > 0) {
1775
243
          goto matched;
1776
69.5k
        } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1777
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1778
             this is not necessarily the end. We need to advance
1779
             the start offset, and continue. Fudge the offset values
1780
             to achieve this, unless we're already at the end of the string. */
1781
69.5k
          if (start_offset < subject_len) {
1782
69.5k
            size_t unit_len = calculate_unit_length(pce, piece);
1783
69.5k
            start_offset += unit_len;
1784
69.5k
          } else {
1785
45
            goto not_matched;
1786
45
          }
1787
69.5k
        } else {
1788
0
          goto error;
1789
0
        }
1790
69.7k
      }
1791
1792
71.8k
    } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1793
269
not_matched:
1794
269
      if (!result && subject_str) {
1795
133
        result = zend_string_copy(subject_str);
1796
133
        break;
1797
133
      }
1798
      /* now we know exactly how long it is */
1799
136
      alloc_len = result_len + subject_len - last_end_offset;
1800
136
      if (NULL != result) {
1801
136
        result = zend_string_realloc(result, alloc_len, 0);
1802
136
      } else {
1803
0
        result = zend_string_alloc(alloc_len, 0);
1804
0
      }
1805
      /* stick that last bit of string on our output */
1806
136
      memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1807
136
      result_len += subject_len - last_end_offset;
1808
136
      ZSTR_VAL(result)[result_len] = '\0';
1809
136
      ZSTR_LEN(result) = result_len;
1810
136
      break;
1811
269
    } else {
1812
0
error:
1813
0
      pcre_handle_exec_error(count);
1814
0
      if (result) {
1815
0
        zend_string_release_ex(result, 0);
1816
0
        result = NULL;
1817
0
      }
1818
0
      break;
1819
0
    }
1820
1821
#ifdef HAVE_PCRE_JIT_SUPPORT
1822
    if (pce->preg_options & PREG_JIT) {
1823
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1824
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1825
    } else
1826
#endif
1827
71.5k
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1828
71.5k
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1829
71.5k
  }
1830
269
  if (match_data != mdata) {
1831
145
    pcre2_match_data_free(match_data);
1832
145
  }
1833
1834
269
  return result;
1835
269
}
1836
/* }}} */
1837
1838
static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str,
1839
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
1840
  size_t limit, size_t *replace_count, zend_long flags
1841
135
) {
1842
135
  uint32_t     options;     /* Execution options */
1843
135
  int        count;       /* Count of matched subpatterns */
1844
135
  zend_string   **subpat_names;   /* Array for named subpatterns */
1845
135
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1846
135
  size_t       alloc_len;     /* Actual allocated length */
1847
135
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
1848
135
  size_t       last_end_offset; /* Where the last search ended */
1849
135
  const char    *match,       /* The current match */
1850
135
          *piece;       /* The current piece of subject */
1851
135
  size_t      result_len;     /* Length of result */
1852
135
  zend_string   *result;      /* Result of replacement */
1853
135
  pcre2_match_data *match_data;
1854
135
  bool old_mdata_used;
1855
1856
  /* Calculate the size of the offsets array, and allocate memory for it. */
1857
135
  num_subpats = pce->capture_count + 1;
1858
135
  if (pce->name_count > 0) {
1859
0
    subpat_names = ensure_subpats_table(pce->name_count, pce);
1860
0
    if (UNEXPECTED(!subpat_names)) {
1861
0
      return NULL;
1862
0
    }
1863
135
  } else {
1864
135
    subpat_names = NULL;
1865
135
  }
1866
1867
135
  alloc_len = 0;
1868
135
  result = NULL;
1869
1870
  /* Initialize */
1871
135
  match = NULL;
1872
135
  start_offset = 0;
1873
135
  last_end_offset = 0;
1874
135
  result_len = 0;
1875
135
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1876
1877
135
  old_mdata_used = mdata_used;
1878
135
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1879
135
    mdata_used = 1;
1880
135
    match_data = mdata;
1881
135
  } else {
1882
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1883
0
    if (!match_data) {
1884
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1885
0
      mdata_used = old_mdata_used;
1886
0
      return NULL;
1887
0
    }
1888
0
  }
1889
1890
135
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1891
1892
  /* Array of subpattern offsets */
1893
135
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1894
1895
  /* Execute the regular expression. */
1896
#ifdef HAVE_PCRE_JIT_SUPPORT
1897
  if ((pce->preg_options & PREG_JIT) && options) {
1898
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1899
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1900
  } else
1901
#endif
1902
135
  count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1903
135
      options, match_data, mctx);
1904
1905
318
  while (1) {
1906
318
    piece = ZSTR_VAL(subject_str) + last_end_offset;
1907
1908
318
    if (count >= 0 && limit) {
1909
      /* Check for too many substrings condition. */
1910
196
      if (UNEXPECTED(count == 0)) {
1911
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1912
0
        count = num_subpats;
1913
0
      }
1914
1915
196
matched:
1916
196
      if (UNEXPECTED(offsets[1] < offsets[0])) {
1917
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1918
0
        if (result) {
1919
0
          zend_string_release_ex(result, 0);
1920
0
          result = NULL;
1921
0
        }
1922
0
        break;
1923
0
      }
1924
1925
196
      if (replace_count) {
1926
196
        ++*replace_count;
1927
196
      }
1928
1929
      /* Set the match location in subject */
1930
196
      match = ZSTR_VAL(subject_str) + offsets[0];
1931
1932
      /* Length of needed storage */
1933
196
      size_t new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1934
1935
      /* Use custom function to get replacement string and its length. */
1936
196
      zend_string *eval_result = preg_do_repl_func(
1937
196
        fci, fcc, ZSTR_VAL(subject_str), offsets, subpat_names, num_subpats, count,
1938
196
        pcre2_get_mark(match_data), flags);
1939
1940
196
      if (UNEXPECTED(eval_result == NULL)) {
1941
12
        goto error;
1942
12
      }
1943
184
      new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1944
184
      if (new_len >= alloc_len) {
1945
161
        alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1946
161
        if (result == NULL) {
1947
98
          result = zend_string_alloc(alloc_len, 0);
1948
98
        } else {
1949
63
          result = zend_string_extend(result, alloc_len, 0);
1950
63
        }
1951
161
      }
1952
1953
184
      if (match-piece > 0) {
1954
        /* copy the part of the string before the match */
1955
183
        memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1956
183
        result_len += (match-piece);
1957
183
      }
1958
1959
      /* If using custom function, copy result to the buffer and clean up. */
1960
184
      memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1961
184
      result_len += ZSTR_LEN(eval_result);
1962
184
      zend_string_release_ex(eval_result, 0);
1963
1964
184
      limit--;
1965
1966
      /* Advance to the next piece. */
1967
184
      start_offset = last_end_offset = offsets[1];
1968
1969
      /* If we have matched an empty string, mimic what Perl's /g options does.
1970
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1971
         the match again at the same point. If this fails (picked up above) we
1972
         advance to the next character. */
1973
184
      if (start_offset == offsets[0]) {
1974
1
        count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1975
1
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1976
1977
1
        piece = ZSTR_VAL(subject_str) + start_offset;
1978
1
        if (count >= 0 && limit) {
1979
0
          goto matched;
1980
1
        } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1981
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1982
             this is not necessarily the end. We need to advance
1983
             the start offset, and continue. Fudge the offset values
1984
             to achieve this, unless we're already at the end of the string. */
1985
1
          if (start_offset < ZSTR_LEN(subject_str)) {
1986
0
            size_t unit_len = calculate_unit_length(pce, piece);
1987
0
            start_offset += unit_len;
1988
1
          } else {
1989
1
            goto not_matched;
1990
1
          }
1991
1
        } else {
1992
0
          goto error;
1993
0
        }
1994
1
      }
1995
1996
184
    } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1997
123
not_matched:
1998
123
      if (result == NULL) {
1999
25
        result = zend_string_copy(subject_str);
2000
25
        break;
2001
25
      }
2002
      /* now we know exactly how long it is */
2003
98
      size_t segment_len = ZSTR_LEN(subject_str) - last_end_offset;
2004
98
      alloc_len = result_len + segment_len;
2005
98
      result = zend_string_realloc(result, alloc_len, 0);
2006
      /* stick that last bit of string on our output */
2007
98
      memcpy(ZSTR_VAL(result) + result_len, piece, segment_len);
2008
98
      result_len += segment_len;
2009
98
      ZSTR_VAL(result)[result_len] = '\0';
2010
98
      ZSTR_LEN(result) = result_len;
2011
98
      break;
2012
123
    } else {
2013
12
error:
2014
12
      pcre_handle_exec_error(count);
2015
12
      if (result) {
2016
0
        zend_string_release_ex(result, 0);
2017
0
        result = NULL;
2018
0
      }
2019
12
      break;
2020
0
    }
2021
#ifdef HAVE_PCRE_JIT_SUPPORT
2022
    if ((pce->preg_options & PREG_JIT)) {
2023
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2024
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2025
    } else
2026
#endif
2027
183
    count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2028
183
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2029
183
  }
2030
135
  if (match_data != mdata) {
2031
0
    pcre2_match_data_free(match_data);
2032
0
  }
2033
135
  mdata_used = old_mdata_used;
2034
2035
135
  return result;
2036
135
}
2037
2038
static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2039
                zend_string *subject_str,
2040
                zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2041
                size_t limit, size_t *replace_count, zend_long flags)
2042
153
{
2043
153
  pcre_cache_entry  *pce;         /* Compiled regular expression */
2044
153
  zend_string     *result;      /* Function result */
2045
2046
  /* Compile regex or get it from cache. */
2047
153
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2048
18
    return NULL;
2049
18
  }
2050
135
  pce->refcount++;
2051
135
  result = php_pcre_replace_func_impl(pce, subject_str, fci, fcc, limit, replace_count, flags);
2052
135
  pce->refcount--;
2053
2054
135
  return result;
2055
153
}
2056
2057
/* {{{ php_pcre_replace_array */
2058
static zend_string *php_pcre_replace_array(HashTable *regex,
2059
  zend_string *replace_str, HashTable *replace_ht,
2060
  zend_string *subject_str, size_t limit, size_t *replace_count)
2061
0
{
2062
0
  zval    *regex_entry;
2063
0
  zend_string *result;
2064
2065
0
  zend_string_addref(subject_str);
2066
2067
0
  if (replace_ht) {
2068
0
    uint32_t replace_idx = 0;
2069
2070
    /* For each entry in the regex array, get the entry */
2071
0
    ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2072
      /* Make sure we're dealing with strings. */
2073
0
      zend_string *tmp_regex_str;
2074
0
      zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2075
0
      zend_string *replace_entry_str, *tmp_replace_entry_str;
2076
0
      zval *zv;
2077
2078
      /* Get current entry */
2079
0
      while (1) {
2080
0
        if (replace_idx == replace_ht->nNumUsed) {
2081
0
          replace_entry_str = ZSTR_EMPTY_ALLOC();
2082
0
          tmp_replace_entry_str = NULL;
2083
0
          break;
2084
0
        }
2085
0
        zv = ZEND_HASH_ELEMENT(replace_ht, replace_idx);
2086
0
        replace_idx++;
2087
0
        if (Z_TYPE_P(zv) != IS_UNDEF) {
2088
0
          replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
2089
0
          break;
2090
0
        }
2091
0
      }
2092
2093
      /* Do the actual replacement and put the result back into subject_str
2094
         for further replacements. */
2095
0
      result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2096
0
        ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2097
0
      zend_tmp_string_release(tmp_replace_entry_str);
2098
0
      zend_tmp_string_release(tmp_regex_str);
2099
0
      zend_string_release_ex(subject_str, 0);
2100
0
      subject_str = result;
2101
0
      if (UNEXPECTED(result == NULL)) {
2102
0
        break;
2103
0
      }
2104
0
    } ZEND_HASH_FOREACH_END();
2105
2106
0
  } else {
2107
0
    ZEND_ASSERT(replace_str != NULL);
2108
2109
    /* For each entry in the regex array, get the entry */
2110
0
    ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2111
      /* Make sure we're dealing with strings. */
2112
0
      zend_string *tmp_regex_str;
2113
0
      zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2114
2115
      /* Do the actual replacement and put the result back into subject_str
2116
         for further replacements. */
2117
0
      result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2118
0
        ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2119
0
      zend_tmp_string_release(tmp_regex_str);
2120
0
      zend_string_release_ex(subject_str, 0);
2121
0
      subject_str = result;
2122
2123
0
      if (UNEXPECTED(result == NULL)) {
2124
0
        break;
2125
0
      }
2126
0
    } ZEND_HASH_FOREACH_END();
2127
0
  }
2128
2129
0
  return subject_str;
2130
0
}
2131
/* }}} */
2132
2133
/* {{{ php_replace_in_subject */
2134
static zend_always_inline zend_string *php_replace_in_subject(
2135
  zend_string *regex_str, HashTable *regex_ht,
2136
  zend_string *replace_str, HashTable *replace_ht,
2137
  zend_string *subject, size_t limit, size_t *replace_count)
2138
333
{
2139
333
  zend_string *result;
2140
2141
333
  if (regex_str) {
2142
333
    ZEND_ASSERT(replace_str != NULL);
2143
333
    result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2144
333
      replace_str, limit, replace_count);
2145
333
  } else {
2146
0
    ZEND_ASSERT(regex_ht != NULL);
2147
0
    result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2148
0
      limit, replace_count);
2149
0
  }
2150
333
  return result;
2151
333
}
2152
/* }}} */
2153
2154
static zend_string *php_replace_in_subject_func(zend_string *regex_str, const HashTable *regex_ht,
2155
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2156
  zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
2157
153
{
2158
153
  zend_string *result;
2159
2160
153
  if (regex_str) {
2161
153
    result = php_pcre_replace_func(regex_str, subject, fci, fcc, limit, replace_count, flags);
2162
153
    return result;
2163
153
  } else {
2164
    /* If regex is an array */
2165
0
    zval    *regex_entry;
2166
2167
0
    ZEND_ASSERT(regex_ht != NULL);
2168
2169
0
    zend_string_addref(subject);
2170
2171
    /* For each entry in the regex array, get the entry */
2172
0
    ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
2173
      /* Make sure we're dealing with strings. */
2174
0
      zend_string *tmp_regex_entry_str;
2175
0
      zend_string *regex_entry_str = zval_try_get_tmp_string(regex_entry, &tmp_regex_entry_str);
2176
0
      if (UNEXPECTED(regex_entry_str == NULL)) {
2177
0
        break;
2178
0
      }
2179
2180
      /* Do the actual replacement and put the result back into subject
2181
         for further replacements. */
2182
0
      result = php_pcre_replace_func(
2183
0
        regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
2184
0
      zend_tmp_string_release(tmp_regex_entry_str);
2185
0
      zend_string_release(subject);
2186
0
      subject = result;
2187
0
      if (UNEXPECTED(result == NULL)) {
2188
0
        break;
2189
0
      }
2190
0
    } ZEND_HASH_FOREACH_END();
2191
2192
0
    return subject;
2193
0
  }
2194
153
}
2195
2196
static size_t php_preg_replace_func_impl(zval *return_value,
2197
  zend_string *regex_str, const HashTable *regex_ht,
2198
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2199
  zend_string *subject_str, const HashTable *subject_ht, zend_long limit_val, zend_long flags)
2200
153
{
2201
153
  zend_string *result;
2202
153
  size_t replace_count = 0;
2203
2204
153
  if (subject_str) {
2205
153
    result = php_replace_in_subject_func(
2206
153
      regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
2207
153
    if (result != NULL) {
2208
123
      RETVAL_STR(result);
2209
123
    } else {
2210
30
      RETVAL_NULL();
2211
30
    }
2212
153
  } else {
2213
    /* if subject is an array */
2214
0
    zval    *subject_entry, zv;
2215
0
    zend_string *string_key;
2216
0
    zend_ulong   num_key;
2217
2218
0
    ZEND_ASSERT(subject_ht != NULL);
2219
2220
0
    array_init_size(return_value, zend_hash_num_elements(subject_ht));
2221
0
    HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2222
2223
    /* For each subject entry, convert it to string, then perform replacement
2224
       and add the result to the return_value array. */
2225
0
    ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2226
0
      zend_string *tmp_subject_entry_str;
2227
0
      zend_string *subject_entry_str = zval_try_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2228
0
      if (UNEXPECTED(subject_entry_str == NULL)) {
2229
0
        break;
2230
0
      }
2231
2232
0
      result = php_replace_in_subject_func(
2233
0
        regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
2234
0
      if (result != NULL) {
2235
        /* Add to return array */
2236
0
        ZVAL_STR(&zv, result);
2237
0
        if (string_key) {
2238
0
          zend_hash_add_new(return_value_ht, string_key, &zv);
2239
0
        } else {
2240
0
          zend_hash_index_add_new(return_value_ht, num_key, &zv);
2241
0
        }
2242
0
      }
2243
0
      zend_tmp_string_release(tmp_subject_entry_str);
2244
0
    } ZEND_HASH_FOREACH_END();
2245
0
  }
2246
2247
153
  return replace_count;
2248
153
}
2249
2250
static void _preg_replace_common(
2251
  zval *return_value,
2252
  HashTable *regex_ht, zend_string *regex_str,
2253
  HashTable *replace_ht, zend_string *replace_str,
2254
  HashTable *subject_ht, zend_string *subject_str,
2255
  zend_long limit,
2256
  zval *zcount,
2257
  bool is_filter
2258
333
) {
2259
333
  size_t replace_count = 0;
2260
333
  zend_string *result;
2261
333
  size_t old_replace_count;
2262
2263
  /* If replace is an array then the regex argument needs to also be an array */
2264
333
  if (replace_ht && !regex_ht) {
2265
0
    zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
2266
0
    RETURN_THROWS();
2267
0
  }
2268
2269
333
  if (subject_str) {
2270
333
    old_replace_count = replace_count;
2271
333
    result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2272
333
      subject_str, limit, &replace_count);
2273
333
    if (result != NULL) {
2274
269
      if (!is_filter || replace_count > old_replace_count) {
2275
269
        RETVAL_STR(result);
2276
269
      } else {
2277
0
        zend_string_release_ex(result, 0);
2278
0
        RETVAL_NULL();
2279
0
      }
2280
269
    } else {
2281
64
      RETVAL_NULL();
2282
64
    }
2283
333
  } else {
2284
    /* if subject is an array */
2285
0
    zval    *subject_entry, zv;
2286
0
    zend_string *string_key;
2287
0
    zend_ulong   num_key;
2288
2289
0
    ZEND_ASSERT(subject_ht != NULL);
2290
2291
0
    array_init_size(return_value, zend_hash_num_elements(subject_ht));
2292
0
    HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2293
2294
    /* For each subject entry, convert it to string, then perform replacement
2295
       and add the result to the return_value array. */
2296
0
    ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2297
0
      old_replace_count = replace_count;
2298
0
      zend_string *tmp_subject_entry_str;
2299
0
      zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2300
0
      result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2301
0
        subject_entry_str, limit, &replace_count);
2302
2303
0
      if (result != NULL) {
2304
0
        if (!is_filter || replace_count > old_replace_count) {
2305
          /* Add to return array */
2306
0
          ZVAL_STR(&zv, result);
2307
0
          if (string_key) {
2308
0
            zend_hash_add_new(return_value_ht, string_key, &zv);
2309
0
          } else {
2310
0
            zend_hash_index_add_new(return_value_ht, num_key, &zv);
2311
0
          }
2312
0
        } else {
2313
0
          zend_string_release_ex(result, 0);
2314
0
        }
2315
0
      }
2316
0
      zend_tmp_string_release(tmp_subject_entry_str);
2317
0
    } ZEND_HASH_FOREACH_END();
2318
0
  }
2319
2320
333
  if (zcount) {
2321
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2322
0
  }
2323
333
}
2324
2325
/* {{{ preg_replace_common */
2326
static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
2327
337
{
2328
337
  zend_string *regex_str, *replace_str, *subject_str;
2329
337
  HashTable *regex_ht, *replace_ht, *subject_ht;
2330
337
  zend_long limit = -1;
2331
337
  zval *zcount = NULL;
2332
2333
  /* Get function parameters and do error-checking. */
2334
1.00k
  ZEND_PARSE_PARAMETERS_START(3, 5)
2335
1.66k
    Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2336
1.66k
    Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
2337
1.66k
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2338
1.66k
    Z_PARAM_OPTIONAL
2339
1.66k
    Z_PARAM_LONG(limit)
2340
510
    Z_PARAM_ZVAL(zcount)
2341
510
  ZEND_PARSE_PARAMETERS_END();
2342
2343
333
  _preg_replace_common(
2344
333
    return_value,
2345
333
    regex_ht, regex_str,
2346
333
    replace_ht, replace_str,
2347
333
    subject_ht, subject_str,
2348
333
    limit, zcount, is_filter);
2349
333
}
2350
/* }}} */
2351
2352
/* {{{ Perform Perl-style regular expression replacement. */
2353
PHP_FUNCTION(preg_replace)
2354
337
{
2355
337
  preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
2356
337
}
2357
/* }}} */
2358
2359
ZEND_FRAMELESS_FUNCTION(preg_replace, 3)
2360
0
{
2361
0
  zend_string *regex_str, *replace_str, *subject_str;
2362
0
  HashTable *regex_ht, *replace_ht, *subject_ht;
2363
0
  zval regex_tmp, replace_tmp, subject_tmp;
2364
2365
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(1, regex_ht, regex_str, regex_tmp);
2366
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(2, replace_ht, replace_str, replace_tmp);
2367
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(3, subject_ht, subject_str, subject_tmp);
2368
2369
0
  _preg_replace_common(
2370
0
    return_value,
2371
0
    regex_ht, regex_str,
2372
0
    replace_ht, replace_str,
2373
0
    subject_ht, subject_str,
2374
0
    /* limit */ -1, /* zcount */ NULL, /* is_filter */ false);
2375
2376
0
flf_clean:;
2377
0
  Z_FLF_PARAM_FREE_STR(1, regex_tmp);
2378
0
  Z_FLF_PARAM_FREE_STR(2, replace_tmp);
2379
0
  Z_FLF_PARAM_FREE_STR(3, subject_tmp);
2380
0
}
2381
2382
/* {{{ Perform Perl-style regular expression replacement using replacement callback. */
2383
PHP_FUNCTION(preg_replace_callback)
2384
155
{
2385
155
  zval *zcount = NULL;
2386
155
  zend_string *regex_str;
2387
155
  HashTable *regex_ht;
2388
155
  zend_string *subject_str;
2389
155
  HashTable *subject_ht;
2390
155
  zend_long limit = -1, flags = 0;
2391
155
  size_t replace_count;
2392
155
  zend_fcall_info fci = empty_fcall_info;
2393
155
  zend_fcall_info_cache fcc = empty_fcall_info_cache;
2394
2395
  /* Get function parameters and do error-checking. */
2396
465
  ZEND_PARSE_PARAMETERS_START(3, 6)
2397
775
    Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2398
775
    Z_PARAM_FUNC(fci, fcc)
2399
918
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2400
918
    Z_PARAM_OPTIONAL
2401
918
    Z_PARAM_LONG(limit)
2402
0
    Z_PARAM_ZVAL(zcount)
2403
0
    Z_PARAM_LONG(flags)
2404
155
  ZEND_PARSE_PARAMETERS_END();
2405
2406
153
  replace_count = php_preg_replace_func_impl(return_value, regex_str, regex_ht,
2407
153
    &fci, &fcc,
2408
153
    subject_str, subject_ht, limit, flags);
2409
153
  if (zcount) {
2410
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2411
0
  }
2412
153
}
2413
/* }}} */
2414
2415
/* {{{ Perform Perl-style regular expression replacement using replacement callback. */
2416
PHP_FUNCTION(preg_replace_callback_array)
2417
0
{
2418
0
  zval *replace, *zcount = NULL;
2419
0
  HashTable *pattern, *subject_ht;
2420
0
  zend_string *subject_str, *str_idx_regex;
2421
0
  zend_long limit = -1, flags = 0;
2422
0
  size_t replace_count = 0;
2423
2424
  /* Get function parameters and do error-checking. */
2425
0
  ZEND_PARSE_PARAMETERS_START(2, 5)
2426
0
    Z_PARAM_ARRAY_HT(pattern)
2427
0
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2428
0
    Z_PARAM_OPTIONAL
2429
0
    Z_PARAM_LONG(limit)
2430
0
    Z_PARAM_ZVAL(zcount)
2431
0
    Z_PARAM_LONG(flags)
2432
0
  ZEND_PARSE_PARAMETERS_END();
2433
2434
0
  if (subject_ht) {
2435
0
    GC_TRY_ADDREF(subject_ht);
2436
0
  } else {
2437
0
    GC_TRY_ADDREF(subject_str);
2438
0
  }
2439
2440
0
  ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
2441
0
    if (!str_idx_regex) {
2442
0
      zend_argument_type_error(1, "must contain only string patterns as keys");
2443
0
      goto error;
2444
0
    }
2445
2446
0
    zend_fcall_info_cache fcc = empty_fcall_info_cache;
2447
0
    zend_fcall_info fci = empty_fcall_info;
2448
0
    fci.size = sizeof(zend_fcall_info);
2449
    /* Copy potential trampoline */
2450
0
    ZVAL_COPY_VALUE(&fci.function_name, replace);
2451
2452
0
    if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2453
0
      zend_argument_type_error(1, "must contain only valid callbacks");
2454
0
      goto error;
2455
0
    }
2456
2457
0
    zval retval;
2458
0
    replace_count += php_preg_replace_func_impl(&retval, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
2459
0
      subject_str, subject_ht, limit, flags);
2460
0
    zend_release_fcall_info_cache(&fcc);
2461
2462
0
    switch (Z_TYPE(retval)) {
2463
0
      case IS_ARRAY:
2464
0
        ZEND_ASSERT(subject_ht);
2465
0
        zend_array_release(subject_ht);
2466
0
        subject_ht = Z_ARR(retval);
2467
0
        break;
2468
0
      case IS_STRING:
2469
0
        ZEND_ASSERT(subject_str);
2470
0
        zend_string_release(subject_str);
2471
0
        subject_str = Z_STR(retval);
2472
0
        break;
2473
0
      case IS_NULL:
2474
0
        RETVAL_NULL();
2475
0
        goto error;
2476
0
      EMPTY_SWITCH_DEFAULT_CASE()
2477
0
    }
2478
2479
0
    if (EG(exception)) {
2480
0
      goto error;
2481
0
    }
2482
0
  } ZEND_HASH_FOREACH_END();
2483
2484
0
  if (zcount) {
2485
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2486
0
  }
2487
2488
0
  if (subject_ht) {
2489
0
    RETVAL_ARR(subject_ht);
2490
    // Unset the type_flags of immutable arrays to prevent the VM from performing refcounting
2491
0
    if (GC_FLAGS(subject_ht) & IS_ARRAY_IMMUTABLE) {
2492
0
      Z_TYPE_FLAGS_P(return_value) = 0;
2493
0
    }
2494
0
    return;
2495
0
  } else {
2496
0
    RETURN_STR(subject_str);
2497
0
  }
2498
2499
0
error:
2500
0
  if (subject_ht) {
2501
0
    zend_array_release(subject_ht);
2502
0
  } else {
2503
0
    zend_string_release(subject_str);
2504
0
  }
2505
0
}
2506
/* }}} */
2507
2508
/* {{{ Perform Perl-style regular expression replacement and only return matches. */
2509
PHP_FUNCTION(preg_filter)
2510
0
{
2511
0
  preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
2512
0
}
2513
/* }}} */
2514
2515
/* {{{ Split string into an array using a perl-style regular expression as a delimiter */
2516
PHP_FUNCTION(preg_split)
2517
0
{
2518
0
  zend_string     *regex;     /* Regular expression */
2519
0
  zend_string     *subject;   /* String to match against */
2520
0
  zend_long      limit_val = -1;/* Integer value of limit */
2521
0
  zend_long      flags = 0;   /* Match control flags */
2522
0
  pcre_cache_entry  *pce;     /* Compiled regular expression */
2523
2524
  /* Get function parameters and do error checking */
2525
0
  ZEND_PARSE_PARAMETERS_START(2, 4)
2526
0
    Z_PARAM_STR(regex)
2527
0
    Z_PARAM_STR(subject)
2528
0
    Z_PARAM_OPTIONAL
2529
0
    Z_PARAM_LONG(limit_val)
2530
0
    Z_PARAM_LONG(flags)
2531
0
  ZEND_PARSE_PARAMETERS_END();
2532
2533
  /* Compile regex or get it from cache. */
2534
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2535
0
    RETURN_FALSE;
2536
0
  }
2537
2538
0
  pce->refcount++;
2539
0
  php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2540
0
  pce->refcount--;
2541
0
}
2542
/* }}} */
2543
2544
/* {{{ php_pcre_split */
2545
PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2546
  zend_long limit_val, zend_long flags)
2547
0
{
2548
0
  uint32_t     options;     /* Execution options */
2549
0
  int        count;       /* Count of matched subpatterns */
2550
0
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
2551
0
  PCRE2_SIZE     last_match_offset; /* Location of last match */
2552
0
  uint32_t     no_empty;      /* If NO_EMPTY flag is set */
2553
0
  uint32_t     delim_capture;   /* If delimiters should be captured */
2554
0
  uint32_t     offset_capture;  /* If offsets should be captured */
2555
0
  uint32_t     num_subpats;   /* Number of captured subpatterns */
2556
0
  zval       tmp;
2557
0
  pcre2_match_data *match_data;
2558
0
  char *subject = ZSTR_VAL(subject_str);
2559
2560
0
  no_empty = flags & PREG_SPLIT_NO_EMPTY;
2561
0
  delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2562
0
  offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2563
2564
  /* Initialize return value */
2565
0
  array_init(return_value);
2566
0
  HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2567
2568
  /* Calculate the size of the offsets array, and allocate memory for it. */
2569
0
  num_subpats = pce->capture_count + 1;
2570
2571
  /* Start at the beginning of the string */
2572
0
  start_offset = 0;
2573
0
  last_match_offset = 0;
2574
0
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2575
2576
0
  if (limit_val == -1) {
2577
    /* pass */
2578
0
  } else if (limit_val == 0) {
2579
0
    limit_val = -1;
2580
0
  } else if (limit_val <= 1) {
2581
0
    goto last;
2582
0
  }
2583
2584
0
  if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2585
0
    match_data = mdata;
2586
0
  } else {
2587
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2588
0
    if (!match_data) {
2589
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2590
0
      zval_ptr_dtor(return_value);
2591
0
      RETURN_FALSE;
2592
0
    }
2593
0
  }
2594
2595
0
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2596
2597
  /* Array of subpattern offsets */
2598
0
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
2599
2600
#ifdef HAVE_PCRE_JIT_SUPPORT
2601
  if ((pce->preg_options & PREG_JIT) && options) {
2602
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2603
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2604
  } else
2605
#endif
2606
0
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2607
0
      options, match_data, mctx);
2608
2609
0
  while (1) {
2610
    /* If something matched */
2611
0
    if (count >= 0) {
2612
      /* Check for too many substrings condition. */
2613
0
      if (UNEXPECTED(count == 0)) {
2614
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2615
0
        count = num_subpats;
2616
0
      }
2617
2618
0
matched:
2619
0
      if (UNEXPECTED(offsets[1] < offsets[0])) {
2620
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2621
0
        break;
2622
0
      }
2623
2624
0
      if (!no_empty || offsets[0] != last_match_offset) {
2625
0
        if (offset_capture) {
2626
          /* Add (match, offset) pair to the return value */
2627
0
          add_offset_pair(
2628
0
            return_value_ht, subject, last_match_offset, offsets[0],
2629
0
            NULL, 0);
2630
0
        } else {
2631
          /* Add the piece to the return value */
2632
0
          populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2633
0
          zend_hash_next_index_insert_new(return_value_ht, &tmp);
2634
0
        }
2635
2636
        /* One less left to do */
2637
0
        if (limit_val != -1)
2638
0
          limit_val--;
2639
0
      }
2640
2641
0
      if (delim_capture) {
2642
0
        size_t i;
2643
0
        for (i = 1; i < count; i++) {
2644
          /* If we have matched a delimiter */
2645
0
          if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2646
0
            if (offset_capture) {
2647
0
              add_offset_pair(
2648
0
                return_value_ht, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2649
0
            } else {
2650
0
              populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2651
0
              zend_hash_next_index_insert_new(return_value_ht, &tmp);
2652
0
            }
2653
0
          }
2654
0
        }
2655
0
      }
2656
2657
      /* Advance to the position right after the last full match */
2658
0
      start_offset = last_match_offset = offsets[1];
2659
2660
      /* If we have matched an empty string, mimic what Perl's /g options does.
2661
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2662
         the match again at the same point. If this fails (picked up above) we
2663
         advance to the next character. */
2664
0
      if (start_offset == offsets[0]) {
2665
        /* Get next piece if no limit or limit not yet reached and something matched*/
2666
0
        if (limit_val != -1 && limit_val <= 1) {
2667
0
          break;
2668
0
        }
2669
0
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2670
0
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2671
0
        if (count >= 0) {
2672
0
          goto matched;
2673
0
        } else if (count == PCRE2_ERROR_NOMATCH) {
2674
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2675
             this is not necessarily the end. We need to advance
2676
             the start offset, and continue. Fudge the offset values
2677
             to achieve this, unless we're already at the end of the string. */
2678
0
          if (start_offset < ZSTR_LEN(subject_str)) {
2679
0
            start_offset += calculate_unit_length(pce, subject + start_offset);
2680
0
          } else {
2681
0
            break;
2682
0
          }
2683
0
        } else {
2684
0
          goto error;
2685
0
        }
2686
0
      }
2687
2688
0
    } else if (count == PCRE2_ERROR_NOMATCH) {
2689
0
      break;
2690
0
    } else {
2691
0
error:
2692
0
      pcre_handle_exec_error(count);
2693
0
      break;
2694
0
    }
2695
2696
    /* Get next piece if no limit or limit not yet reached and something matched*/
2697
0
    if (limit_val != -1 && limit_val <= 1) {
2698
0
      break;
2699
0
    }
2700
2701
#ifdef HAVE_PCRE_JIT_SUPPORT
2702
    if (pce->preg_options & PREG_JIT) {
2703
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2704
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2705
    } else
2706
#endif
2707
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2708
0
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2709
0
  }
2710
0
  if (match_data != mdata) {
2711
0
    pcre2_match_data_free(match_data);
2712
0
  }
2713
2714
0
  if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2715
0
    zval_ptr_dtor(return_value);
2716
0
    RETURN_FALSE;
2717
0
  }
2718
2719
0
last:
2720
0
  start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2721
2722
0
  if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2723
0
    if (offset_capture) {
2724
      /* Add the last (match, offset) pair to the return value */
2725
0
      add_offset_pair(return_value_ht, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2726
0
    } else {
2727
      /* Add the last piece to the return value */
2728
0
      if (start_offset == 0) {
2729
0
        ZVAL_STR_COPY(&tmp, subject_str);
2730
0
      } else {
2731
0
        populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2732
0
      }
2733
0
      zend_hash_next_index_insert_new(return_value_ht, &tmp);
2734
0
    }
2735
0
  }
2736
0
}
2737
/* }}} */
2738
2739
/* {{{ Quote regular expression characters plus an optional character */
2740
PHP_FUNCTION(preg_quote)
2741
3
{
2742
3
  zend_string *str;           /* Input string argument */
2743
3
  zend_string *delim = NULL;    /* Additional delimiter argument */
2744
3
  char    *in_str;      /* Input string */
2745
3
  char    *in_str_end;      /* End of the input string */
2746
3
  zend_string *out_str;     /* Output string with quoted characters */
2747
3
  size_t       extra_len;         /* Number of additional characters */
2748
3
  char    *p,         /* Iterator for input string */
2749
3
        *q,         /* Iterator for output string */
2750
3
         delim_char = '\0', /* Delimiter character to be quoted */
2751
3
         c;         /* Current character */
2752
2753
  /* Get the arguments and check for errors */
2754
9
  ZEND_PARSE_PARAMETERS_START(1, 2)
2755
12
    Z_PARAM_STR(str)
2756
3
    Z_PARAM_OPTIONAL
2757
6
    Z_PARAM_STR_OR_NULL(delim)
2758
3
  ZEND_PARSE_PARAMETERS_END();
2759
2760
  /* Nothing to do if we got an empty string */
2761
3
  if (ZSTR_LEN(str) == 0) {
2762
0
    RETURN_EMPTY_STRING();
2763
0
  }
2764
2765
3
  in_str = ZSTR_VAL(str);
2766
3
  in_str_end = in_str + ZSTR_LEN(str);
2767
2768
3
  if (delim) {
2769
0
    delim_char = ZSTR_VAL(delim)[0];
2770
0
  }
2771
2772
  /* Go through the string and quote necessary characters */
2773
3
  extra_len = 0;
2774
3
  p = in_str;
2775
9.83k
  do {
2776
9.83k
    c = *p;
2777
9.83k
    switch(c) {
2778
14
      case '.':
2779
18
      case '\\':
2780
28
      case '+':
2781
372
      case '*':
2782
518
      case '?':
2783
580
      case '[':
2784
635
      case '^':
2785
705
      case ']':
2786
709
      case '$':
2787
743
      case '(':
2788
838
      case ')':
2789
877
      case '{':
2790
984
      case '}':
2791
1.01k
      case '=':
2792
1.01k
      case '!':
2793
1.06k
      case '>':
2794
1.06k
      case '<':
2795
1.09k
      case '|':
2796
1.11k
      case ':':
2797
1.23k
      case '-':
2798
1.23k
      case '#':
2799
1.23k
        extra_len++;
2800
1.23k
        break;
2801
2802
1.87k
      case '\0':
2803
1.87k
        extra_len+=3;
2804
1.87k
        break;
2805
2806
6.73k
      default:
2807
6.73k
        if (c == delim_char) {
2808
0
          extra_len++;
2809
0
        }
2810
6.73k
        break;
2811
9.83k
    }
2812
9.83k
    p++;
2813
9.83k
  } while (p != in_str_end);
2814
2815
3
  if (extra_len == 0) {
2816
0
    RETURN_STR_COPY(str);
2817
0
  }
2818
2819
  /* Allocate enough memory so that even if each character
2820
     is quoted, we won't run out of room */
2821
3
  out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2822
3
  q = ZSTR_VAL(out_str);
2823
3
  p = in_str;
2824
2825
9.83k
  do {
2826
9.83k
    c = *p;
2827
9.83k
    switch(c) {
2828
14
      case '.':
2829
18
      case '\\':
2830
28
      case '+':
2831
372
      case '*':
2832
518
      case '?':
2833
580
      case '[':
2834
635
      case '^':
2835
705
      case ']':
2836
709
      case '$':
2837
743
      case '(':
2838
838
      case ')':
2839
877
      case '{':
2840
984
      case '}':
2841
1.01k
      case '=':
2842
1.01k
      case '!':
2843
1.06k
      case '>':
2844
1.06k
      case '<':
2845
1.09k
      case '|':
2846
1.11k
      case ':':
2847
1.23k
      case '-':
2848
1.23k
      case '#':
2849
1.23k
        *q++ = '\\';
2850
1.23k
        *q++ = c;
2851
1.23k
        break;
2852
2853
1.87k
      case '\0':
2854
1.87k
        *q++ = '\\';
2855
1.87k
        *q++ = '0';
2856
1.87k
        *q++ = '0';
2857
1.87k
        *q++ = '0';
2858
1.87k
        break;
2859
2860
6.73k
      default:
2861
6.73k
        if (c == delim_char) {
2862
0
          *q++ = '\\';
2863
0
        }
2864
6.73k
        *q++ = c;
2865
6.73k
        break;
2866
9.83k
    }
2867
9.83k
    p++;
2868
9.83k
  } while (p != in_str_end);
2869
3
  *q = '\0';
2870
2871
3
  RETURN_NEW_STR(out_str);
2872
3
}
2873
/* }}} */
2874
2875
/* {{{ Searches array and returns entries which match regex */
2876
PHP_FUNCTION(preg_grep)
2877
0
{
2878
0
  zend_string     *regex;     /* Regular expression */
2879
0
  zval        *input;     /* Input array */
2880
0
  zend_long      flags = 0;   /* Match control flags */
2881
0
  pcre_cache_entry  *pce;     /* Compiled regular expression */
2882
2883
  /* Get arguments and do error checking */
2884
0
  ZEND_PARSE_PARAMETERS_START(2, 3)
2885
0
    Z_PARAM_STR(regex)
2886
0
    Z_PARAM_ARRAY(input)
2887
0
    Z_PARAM_OPTIONAL
2888
0
    Z_PARAM_LONG(flags)
2889
0
  ZEND_PARSE_PARAMETERS_END();
2890
2891
  /* Compile regex or get it from cache. */
2892
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2893
0
    RETURN_FALSE;
2894
0
  }
2895
2896
0
  pce->refcount++;
2897
0
  php_pcre_grep_impl(pce, input, return_value, flags);
2898
0
  pce->refcount--;
2899
0
}
2900
/* }}} */
2901
2902
PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2903
0
{
2904
0
  zval            *entry;             /* An entry in the input array */
2905
0
  uint32_t     num_subpats;   /* Number of captured subpatterns */
2906
0
  int        count;       /* Count of matched subpatterns */
2907
0
  uint32_t     options;     /* Execution options */
2908
0
  zend_string   *string_key;
2909
0
  zend_ulong     num_key;
2910
0
  bool     invert;      /* Whether to return non-matching
2911
                       entries */
2912
0
  pcre2_match_data *match_data;
2913
0
  invert = flags & PREG_GREP_INVERT ? 1 : 0;
2914
2915
  /* Calculate the size of the offsets array, and allocate memory for it. */
2916
0
  num_subpats = pce->capture_count + 1;
2917
2918
  /* Initialize return array */
2919
0
  array_init(return_value);
2920
0
  HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2921
2922
0
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2923
2924
0
  if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2925
0
    match_data = mdata;
2926
0
  } else {
2927
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2928
0
    if (!match_data) {
2929
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2930
0
      return;
2931
0
    }
2932
0
  }
2933
2934
0
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2935
2936
  /* Go through the input array */
2937
0
  ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2938
0
    zend_string *tmp_subject_str;
2939
0
    zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2940
2941
    /* Perform the match */
2942
#ifdef HAVE_PCRE_JIT_SUPPORT
2943
    if ((pce->preg_options & PREG_JIT) && options) {
2944
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2945
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2946
    } else
2947
#endif
2948
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2949
0
        options, match_data, mctx);
2950
2951
    /* If the entry fits our requirements */
2952
0
    if (count >= 0) {
2953
      /* Check for too many substrings condition. */
2954
0
      if (UNEXPECTED(count == 0)) {
2955
0
        php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2956
0
      }
2957
0
      if (!invert) {
2958
0
        Z_TRY_ADDREF_P(entry);
2959
2960
        /* Add to return array */
2961
0
        if (string_key) {
2962
0
          zend_hash_update(return_value_ht, string_key, entry);
2963
0
        } else {
2964
0
          zend_hash_index_update(return_value_ht, num_key, entry);
2965
0
        }
2966
0
      }
2967
0
    } else if (count == PCRE2_ERROR_NOMATCH) {
2968
0
      if (invert) {
2969
0
        Z_TRY_ADDREF_P(entry);
2970
2971
        /* Add to return array */
2972
0
        if (string_key) {
2973
0
          zend_hash_update(return_value_ht, string_key, entry);
2974
0
        } else {
2975
0
          zend_hash_index_update(return_value_ht, num_key, entry);
2976
0
        }
2977
0
      }
2978
0
    } else {
2979
0
      pcre_handle_exec_error(count);
2980
0
      zend_tmp_string_release(tmp_subject_str);
2981
0
      break;
2982
0
    }
2983
2984
0
    zend_tmp_string_release(tmp_subject_str);
2985
0
  } ZEND_HASH_FOREACH_END();
2986
0
  if (match_data != mdata) {
2987
0
    pcre2_match_data_free(match_data);
2988
0
  }
2989
0
}
2990
/* }}} */
2991
2992
/* {{{ Returns the error code of the last regexp execution. */
2993
PHP_FUNCTION(preg_last_error)
2994
0
{
2995
0
  ZEND_PARSE_PARAMETERS_NONE();
2996
2997
0
  RETURN_LONG(PCRE_G(error_code));
2998
0
}
2999
/* }}} */
3000
3001
/* {{{ Returns the error message of the last regexp execution. */
3002
PHP_FUNCTION(preg_last_error_msg)
3003
0
{
3004
0
  ZEND_PARSE_PARAMETERS_NONE();
3005
3006
0
  RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
3007
0
}
3008
/* }}} */
3009
3010
/* {{{ module definition structures */
3011
3012
zend_module_entry pcre_module_entry = {
3013
  STANDARD_MODULE_HEADER,
3014
  "pcre",
3015
  ext_functions,
3016
  PHP_MINIT(pcre),
3017
  PHP_MSHUTDOWN(pcre),
3018
  PHP_RINIT(pcre),
3019
  PHP_RSHUTDOWN(pcre),
3020
  PHP_MINFO(pcre),
3021
  PHP_PCRE_VERSION,
3022
  PHP_MODULE_GLOBALS(pcre),
3023
  PHP_GINIT(pcre),
3024
  PHP_GSHUTDOWN(pcre),
3025
  NULL,
3026
  STANDARD_MODULE_PROPERTIES_EX
3027
};
3028
3029
#ifdef COMPILE_DL_PCRE
3030
ZEND_GET_MODULE(pcre)
3031
#endif
3032
3033
/* }}} */
3034
3035
PHPAPI pcre2_match_context *php_pcre_mctx(void)
3036
5
{/*{{{*/
3037
5
  return mctx;
3038
5
}/*}}}*/
3039
3040
PHPAPI pcre2_general_context *php_pcre_gctx(void)
3041
0
{/*{{{*/
3042
0
  return gctx;
3043
0
}/*}}}*/
3044
3045
PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3046
0
{/*{{{*/
3047
0
  return cctx;
3048
0
}/*}}}*/
3049
3050
PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3051
0
{/*{{{*/
3052
0
  assert(NULL != pce);
3053
0
  pce->refcount++;
3054
0
}/*}}}*/
3055
3056
PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3057
0
{/*{{{*/
3058
0
  assert(NULL != pce);
3059
0
  assert(0 != pce->refcount);
3060
0
  pce->refcount--;
3061
0
}/*}}}*/
3062
3063
PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3064
0
{/*{{{*/
3065
0
  assert(NULL != pce);
3066
0
  return pce->re;
3067
0
}/*}}}*/