Coverage Report

Created: 2026-06-02 06:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/pcre/php_pcre.c
Line
Count
Source
1
/*
2
   +----------------------------------------------------------------------+
3
   | Copyright © The PHP Group and Contributors.                          |
4
   +----------------------------------------------------------------------+
5
   | This source file is subject to the Modified BSD License that is      |
6
   | bundled with this package in the file LICENSE, and is available      |
7
   | through the World Wide Web at <https://www.php.net/license/>.        |
8
   |                                                                      |
9
   | SPDX-License-Identifier: BSD-3-Clause                                |
10
   +----------------------------------------------------------------------+
11
   | Author: Andrei Zmievski <andrei@php.net>                             |
12
   +----------------------------------------------------------------------+
13
 */
14
15
#include "php.h"
16
#include "php_ini.h"
17
#include "php_pcre.h"
18
#include "ext/standard/info.h"
19
#include "ext/standard/basic_functions.h"
20
#include "zend_smart_str.h"
21
#include "SAPI.h"
22
23
0
#define PREG_PATTERN_ORDER      1
24
0
#define PREG_SET_ORDER        2
25
375
#define PREG_OFFSET_CAPTURE     (1<<8)
26
375
#define PREG_UNMATCHED_AS_NULL    (1<<9)
27
28
0
#define PREG_SPLIT_NO_EMPTY     (1<<0)
29
0
#define PREG_SPLIT_DELIM_CAPTURE  (1<<1)
30
0
#define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
31
32
0
#define PREG_GREP_INVERT      (1<<0)
33
34
#define PREG_JIT                    (1<<3)
35
36
1.33k
#define PCRE_CACHE_SIZE 4096
37
38
#ifdef HAVE_PCRE_JIT_SUPPORT
39
#define PHP_PCRE_JIT_SUPPORT 1
40
#else
41
#define PHP_PCRE_JIT_SUPPORT 0
42
#endif
43
44
char *php_pcre_version;
45
46
#include "php_pcre_arginfo.h"
47
48
struct _pcre_cache_entry {
49
  pcre2_code *re;
50
  /* Pointer is not NULL (during request) when there are named captures.
51
   * Length is equal to capture_count + 1 to account for capture group 0.
52
   * This table cache is only valid during request.
53
   * Trying to store this over multiple requests causes issues when the keys are exposed in user arrays
54
   * (see GH-17122 and GH-17132). */
55
  zend_string **subpats_table;
56
  uint32_t preg_options;
57
  uint32_t name_count;
58
  uint32_t capture_count;
59
  uint32_t compile_options;
60
  uint32_t refcount;
61
};
62
63
PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
64
65
#ifdef HAVE_PCRE_JIT_SUPPORT
66
#define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
67
#define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
68
ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
69
#endif
70
/* General context using (infallible) system allocator. */
71
ZEND_TLS pcre2_general_context *gctx = NULL;
72
/* These two are global per thread for now. Though it is possible to use these
73
  per pattern. Either one can copy it and use in pce, or one does no global
74
  contexts at all, but creates for every pce. */
75
ZEND_TLS pcre2_compile_context *cctx = NULL;
76
ZEND_TLS pcre2_match_context   *mctx = NULL;
77
ZEND_TLS pcre2_match_data      *mdata = NULL;
78
ZEND_TLS bool              mdata_used = 0;
79
ZEND_TLS uint8_t pcre2_init_ok = 0;
80
#if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
81
static MUTEX_T pcre_mt = NULL;
82
#define php_pcre_mutex_alloc() \
83
  if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
84
#define php_pcre_mutex_free() \
85
  if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
86
#define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
87
#define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
88
#else
89
#define php_pcre_mutex_alloc()
90
#define php_pcre_mutex_free()
91
#define php_pcre_mutex_lock()
92
#define php_pcre_mutex_unlock()
93
#endif
94
95
ZEND_TLS HashTable char_tables;
96
97
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats);
98
99
static void php_pcre_free_char_table(zval *data)
100
0
{/*{{{*/
101
0
  void *ptr = Z_PTR_P(data);
102
0
  pefree(ptr, 1);
103
0
}/*}}}*/
104
105
static void pcre_handle_exec_error(int pcre_code) /* {{{ */
106
1.35k
{
107
1.35k
  int preg_code = 0;
108
109
1.35k
  switch (pcre_code) {
110
9
    case PCRE2_ERROR_MATCHLIMIT:
111
9
      preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
112
9
      break;
113
114
0
    case PCRE2_ERROR_RECURSIONLIMIT:
115
0
      preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
116
0
      break;
117
118
0
    case PCRE2_ERROR_BADUTFOFFSET:
119
0
      preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
120
0
      break;
121
122
#ifdef HAVE_PCRE_JIT_SUPPORT
123
    case PCRE2_ERROR_JIT_STACKLIMIT:
124
      preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
125
      break;
126
#endif
127
128
1.34k
    default:
129
1.34k
      if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
130
147
        preg_code = PHP_PCRE_BAD_UTF8_ERROR;
131
1.19k
      } else  {
132
1.19k
        preg_code = PHP_PCRE_INTERNAL_ERROR;
133
1.19k
      }
134
1.34k
      break;
135
1.35k
  }
136
137
1.35k
  PCRE_G(error_code) = preg_code;
138
1.35k
}
139
/* }}} */
140
141
static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
142
0
{
143
0
  switch (error_code) {
144
0
    case PHP_PCRE_NO_ERROR:
145
0
      return "No error";
146
0
    case PHP_PCRE_INTERNAL_ERROR:
147
0
      return "Internal error";
148
0
    case PHP_PCRE_BAD_UTF8_ERROR:
149
0
      return "Malformed UTF-8 characters, possibly incorrectly encoded";
150
0
    case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
151
0
      return "The offset did not correspond to the beginning of a valid UTF-8 code point";
152
0
    case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
153
0
      return "Backtrack limit exhausted";
154
0
    case PHP_PCRE_RECURSION_LIMIT_ERROR:
155
0
      return "Recursion limit exhausted";
156
157
#ifdef HAVE_PCRE_JIT_SUPPORT
158
    case PHP_PCRE_JIT_STACKLIMIT_ERROR:
159
      return "JIT stack limit exhausted";
160
#endif
161
162
0
    default:
163
0
      return "Unknown error";
164
0
  }
165
0
}
166
/* }}} */
167
168
static void php_free_pcre_cache(zval *data) /* {{{ */
169
0
{
170
0
  pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
171
0
  if (!pce) return;
172
0
  if (pce->subpats_table) {
173
0
    free_subpats_table(pce->subpats_table, pce->capture_count + 1);
174
0
  }
175
0
  pcre2_code_free(pce->re);
176
0
  free(pce);
177
0
}
178
/* }}} */
179
180
static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
181
1.60k
{
182
1.60k
  return pemalloc(size, 1);
183
1.60k
}
184
185
static void php_pcre_free(void *block, void *data)
186
199
{
187
199
  pefree(block, 1);
188
199
}
189
190
static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
191
230k
{
192
230k
  return emalloc(size);
193
230k
}
194
195
static void php_pcre_efree(void *block, void *data)
196
230k
{
197
230k
  efree(block);
198
230k
}
199
200
3.03k
#define PHP_PCRE_PREALLOC_MDATA_SIZE 32
201
202
static void php_pcre_init_pcre2(uint8_t jit)
203
16
{/*{{{*/
204
16
  if (!gctx) {
205
16
    gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
206
16
    if (!gctx) {
207
0
      pcre2_init_ok = 0;
208
0
      return;
209
0
    }
210
16
  }
211
212
16
  if (!cctx) {
213
16
    cctx = pcre2_compile_context_create(gctx);
214
16
    if (!cctx) {
215
0
      pcre2_init_ok = 0;
216
0
      return;
217
0
    }
218
16
  }
219
220
16
  if (!mctx) {
221
16
    mctx = pcre2_match_context_create(gctx);
222
16
    if (!mctx) {
223
0
      pcre2_init_ok = 0;
224
0
      return;
225
0
    }
226
16
  }
227
228
#ifdef HAVE_PCRE_JIT_SUPPORT
229
  if (jit && !jit_stack) {
230
    jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
231
    if (!jit_stack) {
232
      pcre2_init_ok = 0;
233
      return;
234
    }
235
  }
236
#endif
237
238
16
  if (!mdata) {
239
16
    mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
240
16
    if (!mdata) {
241
0
      pcre2_init_ok = 0;
242
0
      return;
243
0
    }
244
16
  }
245
246
16
  pcre2_init_ok = 1;
247
16
}/*}}}*/
248
249
static void php_pcre_shutdown_pcre2(void)
250
0
{/*{{{*/
251
0
  if (gctx) {
252
0
    pcre2_general_context_free(gctx);
253
0
    gctx = NULL;
254
0
  }
255
256
0
  if (cctx) {
257
0
    pcre2_compile_context_free(cctx);
258
0
    cctx = NULL;
259
0
  }
260
261
0
  if (mctx) {
262
0
    pcre2_match_context_free(mctx);
263
0
    mctx = NULL;
264
0
  }
265
266
#ifdef HAVE_PCRE_JIT_SUPPORT
267
  /* Stack may only be destroyed when no cached patterns
268
    possibly associated with it do exist. */
269
  if (jit_stack) {
270
    pcre2_jit_stack_free(jit_stack);
271
    jit_stack = NULL;
272
  }
273
#endif
274
275
0
  if (mdata) {
276
0
    pcre2_match_data_free(mdata);
277
0
    mdata = NULL;
278
0
  }
279
280
0
  pcre2_init_ok = 0;
281
0
}/*}}}*/
282
283
static PHP_GINIT_FUNCTION(pcre) /* {{{ */
284
16
{
285
16
  php_pcre_mutex_alloc();
286
287
16
  zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
288
289
16
  pcre_globals->backtrack_limit = 0;
290
16
  pcre_globals->recursion_limit = 0;
291
16
  pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
292
16
  ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
293
16
  ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
294
#ifdef HAVE_PCRE_JIT_SUPPORT
295
  pcre_globals->jit = 1;
296
#endif
297
298
16
  php_pcre_init_pcre2(1);
299
16
  zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
300
16
}
301
/* }}} */
302
303
static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
304
0
{
305
0
  zend_hash_destroy(&pcre_globals->pcre_cache);
306
307
0
  php_pcre_shutdown_pcre2();
308
0
  zend_hash_destroy(&char_tables);
309
0
  php_pcre_mutex_free();
310
0
}
311
/* }}} */
312
313
static PHP_INI_MH(OnUpdateBacktrackLimit)
314
16
{/*{{{*/
315
16
  OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
316
16
  if (mctx) {
317
16
    pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
318
16
  }
319
320
16
  return SUCCESS;
321
16
}/*}}}*/
322
323
static PHP_INI_MH(OnUpdateRecursionLimit)
324
16
{/*{{{*/
325
16
  OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
326
16
  if (mctx) {
327
16
    pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
328
16
  }
329
330
16
  return SUCCESS;
331
16
}/*}}}*/
332
333
#ifdef HAVE_PCRE_JIT_SUPPORT
334
static PHP_INI_MH(OnUpdateJit)
335
{/*{{{*/
336
  OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
337
  if (PCRE_G(jit) && jit_stack) {
338
    pcre2_jit_stack_assign(mctx, NULL, jit_stack);
339
  } else {
340
    pcre2_jit_stack_assign(mctx, NULL, NULL);
341
  }
342
343
  return SUCCESS;
344
}/*}}}*/
345
#endif
346
347
PHP_INI_BEGIN()
348
  STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
349
  STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
350
#ifdef HAVE_PCRE_JIT_SUPPORT
351
  STD_PHP_INI_BOOLEAN("pcre.jit",           "1",       PHP_INI_ALL, OnUpdateJit,            jit,             zend_pcre_globals, pcre_globals)
352
#endif
353
PHP_INI_END()
354
355
static char *_pcre2_config_str(uint32_t what)
356
24
{/*{{{*/
357
24
  int len = pcre2_config(what, NULL);
358
24
  char *ret = (char *) malloc(len + 1);
359
360
24
  len = pcre2_config(what, ret);
361
24
  if (!len) {
362
0
    free(ret);
363
0
    return NULL;
364
0
  }
365
366
24
  return ret;
367
24
}/*}}}*/
368
369
/* {{{ PHP_MINFO_FUNCTION(pcre) */
370
static PHP_MINFO_FUNCTION(pcre)
371
4
{
372
#ifdef HAVE_PCRE_JIT_SUPPORT
373
  uint32_t flag = 0;
374
  char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
375
#endif
376
4
  char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
377
4
  char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
378
379
4
  php_info_print_table_start();
380
4
  php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
381
4
  php_info_print_table_row(2, "PCRE Library Version", version);
382
4
  free(version);
383
4
  php_info_print_table_row(2, "PCRE Unicode Version", unicode);
384
4
  free(unicode);
385
386
#ifdef HAVE_PCRE_JIT_SUPPORT
387
  if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
388
    php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
389
  } else {
390
    php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
391
  }
392
  if (jit_target) {
393
    php_info_print_table_row(2, "PCRE JIT Target", jit_target);
394
  }
395
  free(jit_target);
396
#else
397
4
  php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
398
4
#endif
399
400
#ifdef HAVE_PCRE_VALGRIND_SUPPORT
401
  php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
402
#endif
403
404
4
  php_info_print_table_end();
405
406
4
  DISPLAY_INI_ENTRIES();
407
4
}
408
/* }}} */
409
410
/* {{{ PHP_MINIT_FUNCTION(pcre) */
411
static PHP_MINIT_FUNCTION(pcre)
412
16
{
413
#ifdef HAVE_PCRE_JIT_SUPPORT
414
  if (UNEXPECTED(!pcre2_init_ok)) {
415
    /* Retry. */
416
    php_pcre_init_pcre2(PCRE_G(jit));
417
    if (!pcre2_init_ok) {
418
      return FAILURE;
419
    }
420
  }
421
#endif
422
423
16
  REGISTER_INI_ENTRIES();
424
425
16
  php_pcre_version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
426
427
16
  register_php_pcre_symbols(module_number);
428
429
16
  return SUCCESS;
430
16
}
431
/* }}} */
432
433
/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
434
static PHP_MSHUTDOWN_FUNCTION(pcre)
435
0
{
436
0
  UNREGISTER_INI_ENTRIES();
437
438
0
  free(php_pcre_version);
439
440
0
  return SUCCESS;
441
0
}
442
/* }}} */
443
444
/* {{{ PHP_RINIT_FUNCTION(pcre) */
445
static PHP_RINIT_FUNCTION(pcre)
446
228k
{
447
#ifdef HAVE_PCRE_JIT_SUPPORT
448
  if (UNEXPECTED(!pcre2_init_ok)) {
449
    /* Retry. */
450
    php_pcre_mutex_lock();
451
    php_pcre_init_pcre2(PCRE_G(jit));
452
    if (!pcre2_init_ok) {
453
      php_pcre_mutex_unlock();
454
      return FAILURE;
455
    }
456
    php_pcre_mutex_unlock();
457
  }
458
459
  mdata_used = 0;
460
#endif
461
462
228k
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
463
228k
  PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
464
228k
  if (!PCRE_G(gctx_zmm)) {
465
0
    return FAILURE;
466
0
  }
467
468
228k
  return SUCCESS;
469
228k
}
470
/* }}} */
471
472
static PHP_RSHUTDOWN_FUNCTION(pcre)
473
228k
{
474
228k
  pcre_cache_entry *pce;
475
56.8M
  ZEND_HASH_MAP_FOREACH_PTR(&PCRE_G(pcre_cache), pce) {
476
56.8M
    if (pce->subpats_table) {
477
0
      free_subpats_table(pce->subpats_table, pce->capture_count + 1);
478
0
      pce->subpats_table = NULL;
479
0
    }
480
56.8M
  } ZEND_HASH_FOREACH_END();
481
482
228k
  pcre2_general_context_free(PCRE_G(gctx_zmm));
483
228k
  PCRE_G(gctx_zmm) = NULL;
484
485
228k
  zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
486
228k
  zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
487
228k
  ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
488
228k
  ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
489
228k
  return SUCCESS;
490
228k
}
491
492
/* {{{ static pcre_clean_cache */
493
static int pcre_clean_cache(zval *data, void *arg)
494
0
{
495
0
  pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
496
0
  int *num_clean = (int *)arg;
497
498
0
  if (!pce->refcount) {
499
0
    if (--(*num_clean) == 0) {
500
0
      return ZEND_HASH_APPLY_REMOVE|ZEND_HASH_APPLY_STOP;
501
0
    }
502
0
    return ZEND_HASH_APPLY_REMOVE;
503
0
  } else {
504
0
    return ZEND_HASH_APPLY_KEEP;
505
0
  }
506
0
}
507
/* }}} */
508
509
0
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
510
0
  uint32_t i;
511
0
  for (i = 0; i < num_subpats; i++) {
512
0
    if (subpat_names[i]) {
513
0
      zend_string_release_ex(subpat_names[i], false);
514
0
    }
515
0
  }
516
0
  efree(subpat_names);
517
0
}
518
519
/* {{{ static make_subpats_table */
520
static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce)
521
0
{
522
0
  uint32_t num_subpats = pce->capture_count + 1;
523
0
  uint32_t name_size, ni = 0;
524
0
  char *name_table;
525
0
  zend_string **subpat_names;
526
0
  int rc1, rc2;
527
528
0
  rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
529
0
  rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
530
0
  if (rc1 < 0 || rc2 < 0) {
531
0
    php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
532
0
    return NULL;
533
0
  }
534
535
0
  subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
536
0
  while (ni++ < name_cnt) {
537
0
    unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
538
0
    const char *name = name_table + 2;
539
0
    subpat_names[name_idx] = zend_string_init(name, strlen(name), false);
540
0
    name_table += name_size;
541
0
  }
542
0
  return subpat_names;
543
0
}
544
/* }}} */
545
546
static zend_string **ensure_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce)
547
0
{
548
0
  if (!pce->subpats_table) {
549
0
    pce->subpats_table = make_subpats_table(name_cnt, pce);
550
0
  }
551
0
  return pce->subpats_table;
552
0
}
553
554
/* {{{ static calculate_unit_length */
555
/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
556
static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
557
328
{
558
328
  size_t unit_len;
559
560
328
  if (pce->compile_options & PCRE2_UTF) {
561
22
    const char *end = start;
562
563
    /* skip continuation bytes */
564
22
    while ((*++end & 0xC0) == 0x80);
565
22
    unit_len = end - start;
566
306
  } else {
567
306
    unit_len = 1;
568
306
  }
569
328
  return unit_len;
570
328
}
571
/* }}} */
572
573
/* {{{ pcre_get_compiled_regex_cache */
574
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bool locale_aware)
575
4.98k
{
576
4.98k
  pcre2_code      *re = NULL;
577
#if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !defined(HAVE_BUNDLED_PCRE)
578
  uint32_t       coptions = PCRE2_NO_START_OPTIMIZE;
579
#else
580
4.98k
  uint32_t       coptions = 0;
581
4.98k
#endif
582
4.98k
  uint32_t       eoptions = 0;
583
4.98k
  PCRE2_UCHAR           error[128];
584
4.98k
  PCRE2_SIZE           erroffset;
585
4.98k
  int                  errnumber;
586
4.98k
  char         delimiter;
587
4.98k
  char         start_delimiter;
588
4.98k
  char         end_delimiter;
589
4.98k
  char        *p, *pp;
590
4.98k
  char        *pattern;
591
4.98k
  size_t         pattern_len;
592
4.98k
  uint32_t       poptions = 0;
593
4.98k
  const uint8_t       *tables = NULL;
594
4.98k
  zval                *zv;
595
4.98k
  pcre_cache_entry   new_entry;
596
4.98k
  int          rc;
597
4.98k
  zend_string     *key;
598
4.98k
  pcre_cache_entry  *ret;
599
600
4.98k
  if (locale_aware && BG(ctype_string)) {
601
0
    key = zend_string_concat2(
602
0
      ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
603
0
      ZSTR_VAL(regex), ZSTR_LEN(regex));
604
4.98k
  } else {
605
4.98k
    key = regex;
606
4.98k
  }
607
608
  /* Try to lookup the cached regex entry, and if successful, just pass
609
     back the compiled pattern, otherwise go on and compile it. */
610
4.98k
  zv = zend_hash_find(&PCRE_G(pcre_cache), key);
611
4.98k
  if (zv) {
612
2.47k
    if (key != regex) {
613
0
      zend_string_release_ex(key, 0);
614
0
    }
615
2.47k
    return (pcre_cache_entry*)Z_PTR_P(zv);
616
2.47k
  }
617
618
2.51k
  p = ZSTR_VAL(regex);
619
2.51k
  const char* end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex);
620
621
  /* Parse through the leading whitespace, and display a warning if we
622
     get to the end without encountering a delimiter. */
623
2.51k
  while (isspace((unsigned char)*p)) p++;
624
2.51k
  if (p >= end_p) {
625
3
    if (key != regex) {
626
0
      zend_string_release_ex(key, 0);
627
0
    }
628
3
    php_error_docref(NULL, E_WARNING, "Empty regular expression");
629
3
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
630
3
    return NULL;
631
3
  }
632
633
  /* Get the delimiter and display a warning if it is alphanumeric
634
     or a backslash. */
635
2.50k
  delimiter = *p++;
636
2.50k
  if (isalnum((unsigned char)delimiter) || delimiter == '\\' || delimiter == '\0') {
637
24
    if (key != regex) {
638
0
      zend_string_release_ex(key, 0);
639
0
    }
640
24
    php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash, or NUL byte");
641
24
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
642
24
    return NULL;
643
24
  }
644
645
2.48k
  start_delimiter = delimiter;
646
2.48k
  if ((pp = strchr("([{< )]}> )]}>", delimiter)))
647
64
    delimiter = pp[5];
648
2.48k
  end_delimiter = delimiter;
649
650
2.48k
  pp = p;
651
652
2.48k
  if (start_delimiter == end_delimiter) {
653
    /* We need to iterate through the pattern, searching for the ending delimiter,
654
       but skipping the backslashed delimiters.  If the ending delimiter is not
655
       found, display a warning. */
656
929k
    while (pp < end_p) {
657
929k
      if (*pp == '\\' && pp + 1 < end_p) pp++;
658
888k
      else if (*pp == delimiter)
659
2.40k
        break;
660
926k
      pp++;
661
926k
    }
662
2.43k
  } else {
663
    /* We iterate through the pattern, searching for the matching ending
664
     * delimiter. For each matching starting delimiter, we increment nesting
665
     * level, and decrement it for each matching ending delimiter. If we
666
     * reach the end of the pattern without matching, display a warning.
667
     */
668
52
    int brackets = 1;   /* brackets nesting level */
669
28.9k
    while (pp < end_p) {
670
28.8k
      if (*pp == '\\' && pp + 1 < end_p) pp++;
671
28.2k
      else if (*pp == end_delimiter && --brackets <= 0)
672
4
        break;
673
28.2k
      else if (*pp == start_delimiter)
674
1.37k
        brackets++;
675
28.8k
      pp++;
676
28.8k
    }
677
52
  }
678
679
2.48k
  if (pp >= end_p) {
680
71
    if (key != regex) {
681
0
      zend_string_release_ex(key, 0);
682
0
    }
683
71
    if (start_delimiter == end_delimiter) {
684
23
      php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
685
48
    } else {
686
48
      php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
687
48
    }
688
71
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
689
71
    return NULL;
690
71
  }
691
692
  /* Make a copy of the actual pattern. */
693
2.41k
  pattern_len = pp - p;
694
2.41k
  pattern = estrndup(p, pattern_len);
695
696
  /* Move on to the options */
697
2.41k
  pp++;
698
699
  /* Parse through the options, setting appropriate flags.  Display
700
     a warning if we encounter an unknown modifier. */
701
5.37k
  while (pp < end_p) {
702
3.16k
    switch (*pp++) {
703
      /* Perl compatible options */
704
1.01k
      case 'i': coptions |= PCRE2_CASELESS;   break;
705
149
      case 'm': coptions |= PCRE2_MULTILINE;   break;
706
42
      case 'n': coptions |= PCRE2_NO_AUTO_CAPTURE; break;
707
189
      case 's': coptions |= PCRE2_DOTALL;   break;
708
27
      case 'x': coptions |= PCRE2_EXTENDED;   break;
709
710
      /* PCRE specific options */
711
312
      case 'A': coptions |= PCRE2_ANCHORED;   break;
712
5
      case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
713
0
#ifdef PCRE2_EXTRA_CASELESS_RESTRICT
714
13
      case 'r': eoptions |= PCRE2_EXTRA_CASELESS_RESTRICT; break;
715
0
#endif
716
2
      case 'S': /* Pass. */         break;
717
1
      case 'X': /* Pass. */         break;
718
262
      case 'U': coptions |= PCRE2_UNGREEDY;   break;
719
598
      case 'u': coptions |= PCRE2_UTF;
720
  /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
721
     characters, even in UTF-8 mode. However, this can be changed by setting
722
     the PCRE2_UCP option. */
723
598
#ifdef PCRE2_UCP
724
598
            coptions |= PCRE2_UCP;
725
598
#endif
726
598
        break;
727
17
      case 'J': coptions |= PCRE2_DUPNAMES;   break;
728
729
15
      case ' ':
730
313
      case '\n':
731
332
      case '\r':
732
332
        break;
733
734
0
      case 'e': /* legacy eval */
735
196
      default:
736
196
        if (pp[-1]) {
737
173
          php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]);
738
173
        } else {
739
23
          php_error_docref(NULL, E_WARNING, "NUL byte is not a valid modifier");
740
23
        }
741
196
        pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
742
196
        efree(pattern);
743
196
        if (key != regex) {
744
0
          zend_string_release_ex(key, 0);
745
0
        }
746
196
        return NULL;
747
3.16k
    }
748
3.16k
  }
749
750
2.21k
  if (key != regex) {
751
0
    zv = zend_hash_str_lookup(&char_tables, ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)));
752
0
    if (Z_ISNULL_P(zv)) {
753
0
      tables = pcre2_maketables(gctx);
754
0
      if (UNEXPECTED(!tables)) {
755
        /* Remove the placeholder entry created by zend_hash_str_lookup(),
756
         * set ptr to NULL first so the destructor (pefree) is safe. */
757
0
        ZVAL_PTR(zv, NULL);
758
0
        zend_hash_str_del(&char_tables, ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)));
759
0
        php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
760
0
        pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
761
0
        zend_string_release_ex(key, 0);
762
0
        efree(pattern);
763
0
        return NULL;
764
0
      }
765
0
      ZVAL_PTR(zv, (void *)tables);
766
0
    } else {
767
0
      tables = Z_PTR_P(zv);
768
0
    }
769
0
  }
770
2.21k
  pcre2_set_character_tables(cctx, tables);
771
772
2.21k
  pcre2_set_compile_extra_options(cctx, eoptions);
773
774
  /* Compile pattern and display a warning if compilation failed. */
775
2.21k
  re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
776
777
2.21k
  if (re == NULL) {
778
879
    if (key != regex) {
779
0
      zend_string_release_ex(key, 0);
780
0
    }
781
879
    pcre2_get_error_message(errnumber, error, sizeof(error));
782
879
    php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
783
879
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
784
879
    efree(pattern);
785
879
    return NULL;
786
879
  }
787
788
#ifdef HAVE_PCRE_JIT_SUPPORT
789
  if (PCRE_G(jit)) {
790
    /* Enable PCRE JIT compiler */
791
    rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
792
    if (EXPECTED(rc >= 0)) {
793
      size_t jit_size = 0;
794
      if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
795
        poptions |= PREG_JIT;
796
      }
797
    } else if (rc == PCRE2_ERROR_NOMEMORY) {
798
      php_error_docref(NULL, E_WARNING,
799
        "Allocation of JIT memory failed, PCRE JIT will be disabled. "
800
        "This is likely caused by security restrictions. "
801
        "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
802
      PCRE_G(jit) = 0;
803
    } else {
804
      pcre2_get_error_message(rc, error, sizeof(error));
805
      php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
806
      pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
807
    }
808
  }
809
#endif
810
1.33k
  efree(pattern);
811
812
  /*
813
   * If we reached cache limit, clean out the items from the head of the list;
814
   * these are supposedly the oldest ones (but not necessarily the least used
815
   * ones).
816
   */
817
1.33k
  if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
818
0
    int num_clean = PCRE_CACHE_SIZE / 8;
819
0
    zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
820
0
  }
821
822
  /* Store the compiled pattern and extra info in the cache. */
823
1.33k
  new_entry.re = re;
824
1.33k
  new_entry.preg_options = poptions;
825
1.33k
  new_entry.compile_options = coptions;
826
1.33k
  new_entry.refcount = 0;
827
1.33k
  new_entry.subpats_table = NULL;
828
829
1.33k
  if ((rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count)) < 0 ||
830
1.33k
      (rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count)) < 0) {
831
0
    if (key != regex) {
832
0
      zend_string_release_ex(key, 0);
833
0
    }
834
0
    pcre2_code_free(new_entry.re);
835
0
    php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
836
0
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
837
0
    return NULL;
838
0
  }
839
840
  /*
841
   * Interned strings are not duplicated when stored in HashTable,
842
   * but all the interned strings created during HTTP request are removed
843
   * at end of request. However PCRE_G(pcre_cache) must be consistent
844
   * on the next request as well. So we disable usage of interned strings
845
   * as hash keys especually for this table.
846
   * See bug #63180
847
   */
848
1.33k
  if (!(GC_FLAGS(key) & IS_STR_PERMANENT)) {
849
577
    zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
850
577
    GC_MAKE_PERSISTENT_LOCAL(str);
851
852
577
    ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
853
577
    zend_string_release(str);
854
760
  } else {
855
760
    ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
856
760
  }
857
858
1.33k
  if (key != regex) {
859
0
    zend_string_release_ex(key, 0);
860
0
  }
861
862
1.33k
  return ret;
863
1.33k
}
864
/* }}} */
865
866
/* {{{ pcre_get_compiled_regex_cache */
867
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
868
4.98k
{
869
4.98k
  return pcre_get_compiled_regex_cache_ex(regex, true);
870
4.98k
}
871
/* }}} */
872
873
/* {{{ pcre_get_compiled_regex */
874
PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
875
0
{
876
0
  pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
877
878
0
  if (capture_count) {
879
0
    *capture_count = pce ? pce->capture_count : 0;
880
0
  }
881
882
0
  return pce ? pce->re : NULL;
883
0
}
884
/* }}} */
885
886
/* XXX For the cases where it's only about match yes/no and no capture
887
    required, perhaps just a minimum sized data would suffice. */
888
PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
889
0
{/*{{{*/
890
891
0
  assert(NULL != re);
892
893
0
  if (EXPECTED(!mdata_used)) {
894
0
    int rc = 0;
895
896
0
    if (!capture_count) {
897
      /* As we deal with a non cached pattern, no other way to gather this info. */
898
0
      rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
899
0
    }
900
901
0
    if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
902
0
      mdata_used = 1;
903
0
      return mdata;
904
0
    }
905
0
  }
906
907
0
  return pcre2_match_data_create_from_pattern(re, gctx);
908
0
}/*}}}*/
909
910
PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
911
0
{/*{{{*/
912
0
  if (UNEXPECTED(match_data != mdata)) {
913
0
    pcre2_match_data_free(match_data);
914
0
  } else {
915
0
    mdata_used = 0;
916
0
  }
917
0
}/*}}}*/
918
919
0
static void init_unmatched_null_pair(zval *pair) {
920
0
  zval val1, val2;
921
0
  ZVAL_NULL(&val1);
922
0
  ZVAL_LONG(&val2, -1);
923
0
  ZVAL_ARR(pair, zend_new_pair(&val1, &val2));
924
0
}
925
926
0
static void init_unmatched_empty_pair(zval *pair) {
927
0
  zval val1, val2;
928
0
  ZVAL_EMPTY_STRING(&val1);
929
0
  ZVAL_LONG(&val2, -1);
930
0
  ZVAL_ARR(pair, zend_new_pair(&val1, &val2));
931
0
}
932
933
static zend_always_inline void populate_match_value_str(
934
740
    zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
935
740
  ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
936
740
}
937
938
static zend_always_inline void populate_match_value(
939
    zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
940
740
    bool unmatched_as_null) {
941
740
  if (PCRE2_UNSET == start_offset) {
942
0
    if (unmatched_as_null) {
943
0
      ZVAL_NULL(val);
944
0
    } else {
945
0
      ZVAL_EMPTY_STRING(val);
946
0
    }
947
740
  } else {
948
740
    populate_match_value_str(val, subject, start_offset, end_offset);
949
740
  }
950
740
}
951
952
static inline void add_named(
953
0
    HashTable *const subpats, zend_string *name, zval *val, bool unmatched) {
954
0
  ZEND_ASSERT(!(GC_FLAGS(name) & IS_STR_PERSISTENT));
955
956
  /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
957
   * In this case we want to preserve the one that actually has a value. */
958
0
  if (!unmatched) {
959
0
    zend_hash_update(subpats, name, val);
960
0
  } else {
961
0
    if (!zend_hash_add(subpats, name, val)) {
962
0
      return;
963
0
    }
964
0
  }
965
0
  Z_TRY_ADDREF_P(val);
966
0
}
967
968
/* {{{ add_offset_pair */
969
static inline void add_offset_pair(
970
    HashTable *const result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
971
    zend_string *name, zend_long unmatched_as_null)
972
0
{
973
0
  zval match_pair;
974
975
  /* Add (match, offset) to the return value */
976
0
  if (PCRE2_UNSET == start_offset) {
977
0
    if (unmatched_as_null) {
978
0
      do {
979
0
        if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
980
0
          if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) {
981
0
            init_unmatched_null_pair(&match_pair);
982
0
            break;
983
0
          } else {
984
0
            init_unmatched_null_pair(&PCRE_G(unmatched_null_pair));
985
0
          }
986
0
        }
987
0
        ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
988
0
      } while (0);
989
0
    } else {
990
0
      do {
991
0
        if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
992
0
          if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) {
993
0
            init_unmatched_empty_pair(&match_pair);
994
0
            break;
995
0
          } else {
996
0
            init_unmatched_empty_pair(&PCRE_G(unmatched_empty_pair));
997
0
          }
998
0
        }
999
0
        ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1000
0
      } while (0);
1001
0
    }
1002
0
  } else {
1003
0
    zval val1, val2;
1004
0
    populate_match_value_str(&val1, subject, start_offset, end_offset);
1005
0
    ZVAL_LONG(&val2, start_offset);
1006
0
    ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1007
0
  }
1008
1009
0
  if (name) {
1010
0
    add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1011
0
  }
1012
0
  zend_hash_next_index_insert_new(result, &match_pair);
1013
0
}
1014
/* }}} */
1015
1016
static void populate_subpat_array(
1017
    HashTable *subpats_ht, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1018
375
    uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1019
375
  zend_long offset_capture = flags & PREG_OFFSET_CAPTURE;
1020
375
  zend_long unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1021
375
  zval val;
1022
375
  int i;
1023
375
  if (subpat_names) {
1024
0
    if (offset_capture) {
1025
0
      for (i = 0; i < count; i++) {
1026
0
        add_offset_pair(
1027
0
          subpats_ht, subject, offsets[2*i], offsets[2*i+1],
1028
0
          subpat_names[i], unmatched_as_null);
1029
0
      }
1030
0
      if (unmatched_as_null) {
1031
0
        for (i = count; i < num_subpats; i++) {
1032
0
          add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1033
0
        }
1034
0
      }
1035
0
    } else {
1036
0
      for (i = 0; i < count; i++) {
1037
0
        populate_match_value(
1038
0
          &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1039
0
        if (subpat_names[i]) {
1040
0
          add_named(subpats_ht, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1041
0
        }
1042
0
        zend_hash_next_index_insert_new(subpats_ht, &val);
1043
0
      }
1044
0
      if (unmatched_as_null) {
1045
0
        for (i = count; i < num_subpats; i++) {
1046
0
          ZVAL_NULL(&val);
1047
0
          if (subpat_names[i]) {
1048
0
            zend_hash_add(subpats_ht, subpat_names[i], &val);
1049
0
          }
1050
0
          zend_hash_next_index_insert_new(subpats_ht, &val);
1051
0
        }
1052
0
      }
1053
0
    }
1054
375
  } else {
1055
375
    if (offset_capture) {
1056
0
      for (i = 0; i < count; i++) {
1057
0
        add_offset_pair(
1058
0
          subpats_ht, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1059
0
      }
1060
0
      if (unmatched_as_null) {
1061
0
        for (i = count; i < num_subpats; i++) {
1062
0
          add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1063
0
        }
1064
0
      }
1065
375
    } else {
1066
1.11k
      for (i = 0; i < count; i++) {
1067
740
        populate_match_value(
1068
740
          &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1069
740
        zend_hash_next_index_insert_new(subpats_ht, &val);
1070
740
      }
1071
375
      if (unmatched_as_null) {
1072
0
        ZVAL_NULL(&val);
1073
0
        for (i = count; i < num_subpats; i++) {
1074
0
          zend_hash_next_index_insert_new(subpats_ht, &val);
1075
0
        }
1076
0
      }
1077
375
    }
1078
375
  }
1079
  /* Add MARK, if available */
1080
375
  if (mark) {
1081
0
    ZVAL_STRING(&val, (char *)mark);
1082
0
    zend_hash_str_update(subpats_ht, ZEND_STRL("MARK"), &val);
1083
0
  }
1084
375
}
1085
1086
static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, bool global) /* {{{ */
1087
4.43k
{
1088
  /* parameters */
1089
4.43k
  zend_string    *regex;      /* Regular expression */
1090
4.43k
  zend_string    *subject;      /* String to match against */
1091
4.43k
  pcre_cache_entry *pce;        /* Compiled regular expression */
1092
4.43k
  zval       *subpats = NULL; /* Array for subpatterns */
1093
4.43k
  zend_long     flags = 0;    /* Match control flags */
1094
4.43k
  zend_long     start_offset = 0; /* Where the new search starts */
1095
1096
13.3k
  ZEND_PARSE_PARAMETERS_START(2, 5)
1097
17.7k
    Z_PARAM_STR(regex)
1098
22.1k
    Z_PARAM_STR(subject)
1099
4.42k
    Z_PARAM_OPTIONAL
1100
8.86k
    Z_PARAM_ZVAL(subpats)
1101
8.86k
    Z_PARAM_LONG(flags)
1102
0
    Z_PARAM_LONG(start_offset)
1103
4.43k
  ZEND_PARSE_PARAMETERS_END();
1104
1105
  /* Compile regex or get it from cache. */
1106
4.42k
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1107
1.00k
    RETURN_FALSE;
1108
1.00k
  }
1109
1110
3.42k
  if (start_offset == ZEND_LONG_MIN) {
1111
0
    zend_argument_value_error(5, "must be greater than " ZEND_LONG_FMT, ZEND_LONG_MIN);
1112
0
    RETURN_THROWS();
1113
0
  }
1114
1115
3.42k
  pce->refcount++;
1116
3.42k
  php_pcre_match_impl(pce, subject, return_value, subpats,
1117
3.42k
    global, flags, start_offset);
1118
3.42k
  pce->refcount--;
1119
3.42k
}
1120
/* }}} */
1121
1122
static zend_always_inline bool is_known_valid_utf8(
1123
1.07k
    zend_string *subject_str, PCRE2_SIZE start_offset) {
1124
1.07k
  if (!ZSTR_IS_VALID_UTF8(subject_str)) {
1125
    /* We don't know whether the string is valid UTF-8 or not. */
1126
1.07k
    return false;
1127
1.07k
  }
1128
1129
1
  if (start_offset == ZSTR_LEN(subject_str)) {
1130
    /* Degenerate case: Offset points to end of string. */
1131
1
    return true;
1132
1
  }
1133
1134
  /* Check that the offset does not point to an UTF-8 continuation byte. */
1135
0
  return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1136
1
}
1137
1138
/* {{{ php_pcre_match_impl() */
1139
PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1140
  zval *subpats, bool global, zend_long flags, zend_off_t start_offset)
1141
3.42k
{
1142
3.42k
  zval       result_set;    /* Holds a set of subpatterns after
1143
                       a global match */
1144
3.42k
  HashTable    **match_sets = NULL; /* An array of sets of matches for each
1145
                       subpattern after a global match */
1146
3.42k
  uint32_t     options;     /* Execution options */
1147
3.42k
  int        count;       /* Count of matched subpatterns */
1148
3.42k
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1149
3.42k
  int        matched;     /* Has anything matched */
1150
3.42k
  zend_string    **subpat_names;    /* Array for named subpatterns */
1151
3.42k
  size_t       i;
1152
3.42k
  uint32_t     subpats_order;   /* Order of subpattern matches */
1153
3.42k
  uint32_t     offset_capture;  /* Capture match offsets: yes/no */
1154
3.42k
  zend_long    unmatched_as_null; /* Null non-matches: yes/no */
1155
3.42k
  PCRE2_SPTR       mark = NULL;   /* Target for MARK name */
1156
3.42k
  HashTable   *marks = NULL;   /* Array of marks for PREG_PATTERN_ORDER */
1157
3.42k
  pcre2_match_data *match_data;
1158
3.42k
  PCRE2_SIZE     start_offset2, orig_start_offset;
1159
3.42k
  bool old_mdata_used;
1160
1161
3.42k
  char *subject = ZSTR_VAL(subject_str);
1162
3.42k
  size_t subject_len = ZSTR_LEN(subject_str);
1163
1164
  /* Overwrite the passed-in value for subpatterns with an empty array. */
1165
3.42k
  if (subpats != NULL) {
1166
1
    subpats = zend_try_array_init(subpats);
1167
1
    if (!subpats) {
1168
0
      RETURN_THROWS();
1169
0
    }
1170
1
  }
1171
1172
3.42k
  subpats_order = global ? PREG_PATTERN_ORDER : 0;
1173
1174
3.42k
  if (flags) {
1175
0
    offset_capture = flags & PREG_OFFSET_CAPTURE;
1176
0
    unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1177
1178
    /*
1179
     * subpats_order is pre-set to pattern mode so we change it only if
1180
     * necessary.
1181
     */
1182
0
    if (flags & 0xff) {
1183
0
      subpats_order = flags & 0xff;
1184
0
      if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1185
0
        (!global && subpats_order != 0)) {
1186
0
        zend_argument_value_error(4, "must be a PREG_* constant");
1187
0
        RETURN_THROWS();
1188
0
      }
1189
0
    }
1190
3.42k
  } else {
1191
3.42k
    offset_capture = 0;
1192
3.42k
    unmatched_as_null = 0;
1193
3.42k
  }
1194
1195
  /* Negative offset counts from the end of the string. */
1196
3.42k
  if (start_offset < 0) {
1197
0
    if ((PCRE2_SIZE)-start_offset <= subject_len) {
1198
0
      start_offset2 = subject_len + start_offset;
1199
0
    } else {
1200
0
      start_offset2 = 0;
1201
0
    }
1202
3.42k
  } else {
1203
3.42k
    start_offset2 = (PCRE2_SIZE)start_offset;
1204
3.42k
  }
1205
1206
3.42k
  if (start_offset2 > subject_len) {
1207
0
    pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1208
0
    RETURN_FALSE;
1209
0
  }
1210
1211
  /* Calculate the size of the offsets array, and allocate memory for it. */
1212
3.42k
  num_subpats = pce->capture_count + 1;
1213
1214
  /*
1215
   * Build a mapping from subpattern numbers to their names. We will
1216
   * allocate the table only if there are any named subpatterns.
1217
   */
1218
3.42k
  subpat_names = NULL;
1219
3.42k
  if (subpats && pce->name_count > 0) {
1220
0
    subpat_names = ensure_subpats_table(pce->name_count, pce);
1221
0
    if (UNEXPECTED(!subpat_names)) {
1222
0
      RETURN_FALSE;
1223
0
    }
1224
0
  }
1225
1226
3.42k
  matched = 0;
1227
3.42k
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1228
1229
3.42k
  old_mdata_used = mdata_used;
1230
3.42k
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1231
2.81k
    mdata_used = true;
1232
2.81k
    match_data = mdata;
1233
2.81k
  } else {
1234
612
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1235
612
    if (!match_data) {
1236
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1237
0
      RETURN_FALSE;
1238
0
    }
1239
612
  }
1240
1241
  /* Allocate match sets array and initialize the values. */
1242
3.42k
  if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1243
0
    match_sets = safe_emalloc(num_subpats, sizeof(HashTable *), 0);
1244
0
    for (i=0; i<num_subpats; i++) {
1245
0
      match_sets[i] = zend_new_array(0);
1246
0
    }
1247
0
  }
1248
1249
  /* Array of subpattern offsets */
1250
3.42k
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1251
1252
3.42k
  orig_start_offset = start_offset2;
1253
3.42k
  options =
1254
3.42k
    (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1255
3.42k
      ? 0 : PCRE2_NO_UTF_CHECK;
1256
1257
  /* Execute the regular expression. */
1258
#ifdef HAVE_PCRE_JIT_SUPPORT
1259
  if ((pce->preg_options & PREG_JIT) && options) {
1260
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1261
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1262
  } else
1263
#endif
1264
3.42k
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1265
3.42k
      options, match_data, mctx);
1266
1267
3.42k
  while (1) {
1268
    /* If something has matched */
1269
3.42k
    if (count >= 0) {
1270
      /* Check for too many substrings condition. */
1271
222
      if (UNEXPECTED(count == 0)) {
1272
0
        php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1273
0
        count = num_subpats;
1274
0
      }
1275
1276
222
matched:
1277
222
      matched++;
1278
1279
      /* If subpatterns array has been passed, fill it in with values. */
1280
222
      if (subpats != NULL) {
1281
        /* Try to get the list of substrings and display a warning if failed. */
1282
0
        if (UNEXPECTED(offsets[1] < offsets[0])) {
1283
0
          if (match_sets) {
1284
0
            for (i = 0; i < num_subpats; i++) {
1285
0
              zend_array_destroy(match_sets[i]);
1286
0
            }
1287
0
            efree(match_sets);
1288
0
          }
1289
0
          if (marks) {
1290
0
            zend_array_destroy(marks);
1291
0
          }
1292
0
          if (match_data != mdata) {
1293
0
            pcre2_match_data_free(match_data);
1294
0
          }
1295
0
          php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1296
0
          RETURN_FALSE;
1297
0
        }
1298
1299
0
        if (global) { /* global pattern matching */
1300
0
          if (subpats_order == PREG_PATTERN_ORDER) {
1301
            /* For each subpattern, insert it into the appropriate array. */
1302
0
            if (offset_capture) {
1303
0
              for (i = 0; i < count; i++) {
1304
0
                add_offset_pair(
1305
0
                  match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1306
0
                  NULL, unmatched_as_null);
1307
0
              }
1308
0
            } else {
1309
0
              for (i = 0; i < count; i++) {
1310
0
                zval val;
1311
0
                populate_match_value(
1312
0
                  &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1313
0
                zend_hash_next_index_insert_new(match_sets[i], &val);
1314
0
              }
1315
0
            }
1316
0
            mark = pcre2_get_mark(match_data);
1317
            /* Add MARK, if available */
1318
0
            if (mark) {
1319
0
              if (!marks) {
1320
0
                marks = zend_new_array(0);
1321
0
              }
1322
0
              zval tmp;
1323
0
              ZVAL_STRING(&tmp, (char *) mark);
1324
0
              zend_hash_index_add_new(marks, matched - 1, &tmp);
1325
0
            }
1326
            /*
1327
             * If the number of captured subpatterns on this run is
1328
             * less than the total possible number, pad the result
1329
             * arrays with NULLs or empty strings.
1330
             */
1331
0
            if (count < num_subpats) {
1332
0
              for (int i = count; i < num_subpats; i++) {
1333
0
                if (offset_capture) {
1334
0
                  add_offset_pair(
1335
0
                    match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1336
0
                    NULL, unmatched_as_null);
1337
0
                } else if (unmatched_as_null) {
1338
0
                  zval tmp;
1339
0
                  ZVAL_NULL(&tmp);
1340
0
                  zend_hash_next_index_insert_new(match_sets[i], &tmp);
1341
0
                } else {
1342
0
                  zval tmp;
1343
0
                  ZVAL_EMPTY_STRING(&tmp);
1344
0
                  zend_hash_next_index_insert_new(match_sets[i], &tmp);
1345
0
                }
1346
0
              }
1347
0
            }
1348
0
          } else {
1349
            /* Allocate and populate the result set array */
1350
0
            mark = pcre2_get_mark(match_data);
1351
0
            array_init_size(&result_set, count + (mark ? 1 : 0));
1352
0
            populate_subpat_array(
1353
0
              Z_ARRVAL(result_set), subject, offsets, subpat_names,
1354
0
              num_subpats, count, mark, flags);
1355
            /* And add it to the output array */
1356
0
            zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &result_set);
1357
0
          }
1358
0
        } else {     /* single pattern matching */
1359
          /* For each subpattern, insert it into the subpatterns array. */
1360
0
          mark = pcre2_get_mark(match_data);
1361
0
          populate_subpat_array(
1362
0
            Z_ARRVAL_P(subpats), subject, offsets, subpat_names, num_subpats, count, mark, flags);
1363
0
          break;
1364
0
        }
1365
0
      }
1366
1367
      /* Advance to the next piece. */
1368
222
      start_offset2 = offsets[1];
1369
1370
      /* If we have matched an empty string, mimic what Perl's /g options does.
1371
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1372
         the match again at the same point. If this fails (picked up above) we
1373
         advance to the next character. */
1374
222
      if (start_offset2 == offsets[0]) {
1375
95
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1376
95
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1377
95
        if (count >= 0) {
1378
2
          if (global) {
1379
0
            goto matched;
1380
2
          } else {
1381
2
            break;
1382
2
          }
1383
93
        } else if (count == PCRE2_ERROR_NOMATCH) {
1384
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1385
             this is not necessarily the end. We need to advance
1386
             the start offset, and continue. Fudge the offset values
1387
             to achieve this, unless we're already at the end of the string. */
1388
87
          if (start_offset2 < subject_len) {
1389
80
            size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1390
1391
80
            start_offset2 += unit_len;
1392
80
          } else {
1393
7
            break;
1394
7
          }
1395
87
        } else {
1396
6
          goto error;
1397
6
        }
1398
95
      }
1399
3.20k
    } else if (count == PCRE2_ERROR_NOMATCH) {
1400
3.05k
      break;
1401
3.05k
    } else {
1402
162
error:
1403
162
      pcre_handle_exec_error(count);
1404
162
      break;
1405
156
    }
1406
1407
207
    if (!global) {
1408
207
      break;
1409
207
    }
1410
1411
    /* Execute the regular expression. */
1412
#ifdef HAVE_PCRE_JIT_SUPPORT
1413
    if ((pce->preg_options & PREG_JIT)) {
1414
      if (start_offset2 > subject_len) {
1415
        pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1416
        break;
1417
      }
1418
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1419
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1420
    } else
1421
#endif
1422
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1423
0
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1424
0
  }
1425
3.42k
  if (match_data != mdata) {
1426
612
    pcre2_match_data_free(match_data);
1427
612
  }
1428
3.42k
  mdata_used = old_mdata_used;
1429
1430
  /* Add the match sets to the output array and clean up */
1431
3.42k
  if (match_sets) {
1432
0
    if (subpat_names) {
1433
0
      for (i = 0; i < num_subpats; i++) {
1434
0
        zval wrapper;
1435
0
        ZVAL_ARR(&wrapper, match_sets[i]);
1436
0
        if (subpat_names[i]) {
1437
0
          zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper);
1438
0
          GC_ADDREF(match_sets[i]);
1439
0
        }
1440
0
        zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1441
0
      }
1442
0
    } else {
1443
0
      for (i = 0; i < num_subpats; i++) {
1444
0
        zval wrapper;
1445
0
        ZVAL_ARR(&wrapper, match_sets[i]);
1446
0
        zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1447
0
      }
1448
0
    }
1449
0
    efree(match_sets);
1450
1451
0
    if (marks) {
1452
0
      zval tmp;
1453
0
      ZVAL_ARR(&tmp, marks);
1454
0
      zend_hash_str_update(Z_ARRVAL_P(subpats), "MARK", sizeof("MARK") - 1, &tmp);
1455
0
    }
1456
0
  }
1457
1458
3.42k
  if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1459
    /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1460
3.26k
    if ((pce->compile_options & PCRE2_UTF)
1461
924
        && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1462
349
      GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1463
349
    }
1464
1465
3.26k
    RETVAL_LONG(matched);
1466
3.26k
  } else {
1467
162
    RETVAL_FALSE;
1468
162
  }
1469
3.42k
}
1470
/* }}} */
1471
1472
/* {{{ Perform a Perl-style regular expression match */
1473
PHP_FUNCTION(preg_match)
1474
4.43k
{
1475
4.43k
  php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1476
4.43k
}
1477
/* }}} */
1478
1479
ZEND_FRAMELESS_FUNCTION(preg_match, 2)
1480
0
{
1481
0
  zval regex_tmp, subject_tmp;
1482
0
  zend_string *regex, *subject;
1483
1484
0
  Z_FLF_PARAM_STR(1, regex, regex_tmp);
1485
0
  Z_FLF_PARAM_STR(2, subject, subject_tmp);
1486
1487
  /* Compile regex or get it from cache. */
1488
0
  pcre_cache_entry *pce;
1489
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1490
0
    RETVAL_FALSE;
1491
0
    goto flf_clean;
1492
0
  }
1493
1494
0
  pce->refcount++;
1495
0
  php_pcre_match_impl(pce, subject, return_value, /* subpats */ NULL,
1496
0
    /* global */ false, /* flags */ 0, /* start_offset */ 0);
1497
0
  pce->refcount--;
1498
1499
0
flf_clean:
1500
0
  Z_FLF_PARAM_FREE_STR(1, regex_tmp);
1501
0
  Z_FLF_PARAM_FREE_STR(2, subject_tmp);
1502
0
}
1503
1504
/* {{{ Perform a Perl-style global regular expression match */
1505
PHP_FUNCTION(preg_match_all)
1506
0
{
1507
0
  php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1508
0
}
1509
/* }}} */
1510
1511
/* {{{ preg_get_backref */
1512
static int preg_get_backref(char **str, int *backref)
1513
4
{
1514
4
  char in_brace = 0;
1515
4
  char *walk = *str;
1516
1517
4
  if (walk[1] == 0)
1518
0
    return 0;
1519
1520
4
  if (*walk == '$' && walk[1] == '{') {
1521
0
    in_brace = 1;
1522
0
    walk++;
1523
0
  }
1524
4
  walk++;
1525
1526
4
  if (*walk >= '0' && *walk <= '9') {
1527
0
    *backref = *walk - '0';
1528
0
    walk++;
1529
0
  } else
1530
4
    return 0;
1531
1532
0
  if (*walk && *walk >= '0' && *walk <= '9') {
1533
0
    *backref = *backref * 10 + *walk - '0';
1534
0
    walk++;
1535
0
  }
1536
1537
0
  if (in_brace) {
1538
0
    if (*walk != '}')
1539
0
      return 0;
1540
0
    else
1541
0
      walk++;
1542
0
  }
1543
1544
0
  *str = walk;
1545
0
  return 1;
1546
0
}
1547
/* }}} */
1548
1549
/* Return NULL if an exception has occurred */
1550
static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1551
375
{
1552
375
  zend_string *result_str = NULL;
1553
375
  zval     retval;      /* Function return value */
1554
375
  zval       arg;       /* Argument to pass to function */
1555
1556
375
  array_init_size(&arg, count + (mark ? 1 : 0));
1557
375
  populate_subpat_array(Z_ARRVAL(arg), subject, offsets, subpat_names, num_subpats, count, mark, flags);
1558
1559
375
  fci->retval = &retval;
1560
375
  fci->param_count = 1;
1561
375
  fci->params = &arg;
1562
375
  fci->consumed_args = zend_fci_consumed_arg(0);
1563
375
  zend_call_function(fci, fcc);
1564
375
  zval_ptr_dtor(&arg);
1565
375
  if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1566
40
    return Z_STR(retval);
1567
40
  }
1568
  /* No Exception has occurred */
1569
335
  else if (EXPECTED(Z_TYPE(retval) != IS_UNDEF)) {
1570
314
    result_str = zval_try_get_string_func(&retval);
1571
314
  }
1572
335
  zval_ptr_dtor(&retval);
1573
1574
335
  return result_str;
1575
375
}
1576
1577
/* {{{ php_pcre_replace */
1578
PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1579
                zend_string *subject_str,
1580
                const char *subject, size_t subject_len,
1581
                zend_string *replace_str,
1582
                size_t limit, size_t *replace_count)
1583
347
{
1584
347
  pcre_cache_entry  *pce;         /* Compiled regular expression */
1585
347
  zend_string     *result;      /* Function result */
1586
1587
  /* Abort on pending exception, e.g. thrown from __toString(). */
1588
347
  if (UNEXPECTED(EG(exception))) {
1589
0
    return NULL;
1590
0
  }
1591
1592
  /* Compile regex or get it from cache. */
1593
347
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1594
152
    return NULL;
1595
152
  }
1596
195
  pce->refcount++;
1597
195
  result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1598
195
    limit, replace_count);
1599
195
  pce->refcount--;
1600
1601
195
  return result;
1602
347
}
1603
/* }}} */
1604
1605
/* {{{ php_pcre_replace_impl() */
1606
PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1607
195
{
1608
195
  uint32_t     options;     /* Execution options */
1609
195
  int        count;       /* Count of matched subpatterns */
1610
195
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1611
195
  size_t       new_len;     /* Length of needed storage */
1612
195
  size_t       alloc_len;     /* Actual allocated length */
1613
195
  size_t       match_len;     /* Length of the current match */
1614
195
  int        backref;     /* Backreference number */
1615
195
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
1616
195
  size_t       last_end_offset; /* Where the last search ended */
1617
195
  char      *walkbuf,     /* Location of current replacement in the result */
1618
195
          *walk,        /* Used to walk the replacement string */
1619
195
           walk_last;     /* Last walked character */
1620
195
  const char    *match,       /* The current match */
1621
195
          *piece,       /* The current piece of subject */
1622
195
          *replace_end;   /* End of replacement string */
1623
195
  size_t      result_len;     /* Length of result */
1624
195
  zend_string   *result;      /* Result of replacement */
1625
195
  pcre2_match_data *match_data;
1626
195
  bool old_mdata_used;
1627
1628
  /* Calculate the size of the offsets array, and allocate memory for it. */
1629
195
  num_subpats = pce->capture_count + 1;
1630
195
  alloc_len = 0;
1631
195
  result = NULL;
1632
1633
  /* Initialize */
1634
195
  match = NULL;
1635
195
  start_offset = 0;
1636
195
  last_end_offset = 0;
1637
195
  result_len = 0;
1638
195
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1639
1640
195
  old_mdata_used = mdata_used;
1641
195
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1642
23
    mdata_used = true;
1643
23
    match_data = mdata;
1644
172
  } else {
1645
172
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1646
172
    if (!match_data) {
1647
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1648
0
      return NULL;
1649
0
    }
1650
172
  }
1651
1652
195
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1653
1654
  /* Array of subpattern offsets */
1655
195
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1656
1657
  /* Execute the regular expression. */
1658
#ifdef HAVE_PCRE_JIT_SUPPORT
1659
  if ((pce->preg_options & PREG_JIT) && options) {
1660
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1661
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1662
  } else
1663
#endif
1664
195
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1665
195
      options, match_data, mctx);
1666
1667
410
  while (1) {
1668
410
    piece = subject + last_end_offset;
1669
1670
410
    if (count >= 0 && limit > 0) {
1671
222
      bool simple_string;
1672
1673
      /* Check for too many substrings condition. */
1674
222
      if (UNEXPECTED(count == 0)) {
1675
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1676
0
        count = num_subpats;
1677
0
      }
1678
1679
225
matched:
1680
225
      if (UNEXPECTED(offsets[1] < offsets[0])) {
1681
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1682
0
        if (result) {
1683
0
          zend_string_release_ex(result, 0);
1684
0
          result = NULL;
1685
0
        }
1686
0
        break;
1687
0
      }
1688
1689
225
      if (replace_count) {
1690
225
        ++*replace_count;
1691
225
      }
1692
1693
      /* Set the match location in subject */
1694
225
      match = subject + offsets[0];
1695
1696
225
      new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1697
1698
225
      walk = ZSTR_VAL(replace_str);
1699
225
      replace_end = walk + ZSTR_LEN(replace_str);
1700
225
      walk_last = 0;
1701
225
      simple_string = true;
1702
512
      while (walk < replace_end) {
1703
287
        if ('\\' == *walk || '$' == *walk) {
1704
2
          simple_string = false;
1705
2
          if (walk_last == '\\') {
1706
0
            walk++;
1707
0
            walk_last = 0;
1708
0
            continue;
1709
0
          }
1710
2
          if (preg_get_backref(&walk, &backref)) {
1711
0
            if (backref < count)
1712
0
              new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1713
0
            continue;
1714
0
          }
1715
2
        }
1716
287
        new_len++;
1717
287
        walk++;
1718
287
        walk_last = walk[-1];
1719
287
      }
1720
1721
225
      if (new_len >= alloc_len) {
1722
121
        alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1723
121
        if (result == NULL) {
1724
97
          result = zend_string_alloc(alloc_len, 0);
1725
97
        } else {
1726
24
          result = zend_string_extend(result, alloc_len, 0);
1727
24
        }
1728
121
      }
1729
1730
225
      if (match-piece > 0) {
1731
        /* copy the part of the string before the match */
1732
212
        memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1733
212
        result_len += (match-piece);
1734
212
      }
1735
1736
225
      if (simple_string) {
1737
        /* copy replacement */
1738
223
        memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1739
223
        result_len += ZSTR_LEN(replace_str);
1740
223
      } else {
1741
        /* copy replacement and backrefs */
1742
2
        walkbuf = ZSTR_VAL(result) + result_len;
1743
1744
2
        walk = ZSTR_VAL(replace_str);
1745
2
        walk_last = 0;
1746
48
        while (walk < replace_end) {
1747
46
          if ('\\' == *walk || '$' == *walk) {
1748
2
            if (walk_last == '\\') {
1749
0
              *(walkbuf-1) = *walk++;
1750
0
              walk_last = 0;
1751
0
              continue;
1752
0
            }
1753
2
            if (preg_get_backref(&walk, &backref)) {
1754
0
              if (backref < count) {
1755
0
                if (offsets[backref<<1] < SIZE_MAX) {
1756
0
                  match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1757
0
                  walkbuf = zend_mempcpy(walkbuf, subject + offsets[backref << 1], match_len);
1758
0
                }
1759
0
              }
1760
0
              continue;
1761
0
            }
1762
2
          }
1763
46
          *walkbuf++ = *walk++;
1764
46
          walk_last = walk[-1];
1765
46
        }
1766
2
        *walkbuf = '\0';
1767
        /* increment the result length by how much we've added to the string */
1768
2
        result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1769
2
      }
1770
1771
225
      limit--;
1772
1773
      /* Advance to the next piece. */
1774
225
      start_offset = last_end_offset = offsets[1];
1775
1776
      /* If we have matched an empty string, mimic what Perl's /g options does.
1777
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1778
         the match again at the same point. If this fails (picked up above) we
1779
         advance to the next character. */
1780
225
      if (start_offset == offsets[0]) {
1781
132
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1782
132
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1783
1784
132
        piece = subject + start_offset;
1785
132
        if (count >= 0 && limit > 0) {
1786
3
          goto matched;
1787
129
        } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1788
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1789
             this is not necessarily the end. We need to advance
1790
             the start offset, and continue. Fudge the offset values
1791
             to achieve this, unless we're already at the end of the string. */
1792
129
          if (start_offset < subject_len) {
1793
122
            size_t unit_len = calculate_unit_length(pce, piece);
1794
122
            start_offset += unit_len;
1795
122
          } else {
1796
7
            goto not_matched;
1797
7
          }
1798
129
        } else {
1799
0
          goto error;
1800
0
        }
1801
132
      }
1802
1803
225
    } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1804
195
not_matched:
1805
195
      if (!result && subject_str) {
1806
98
        result = zend_string_copy(subject_str);
1807
98
        break;
1808
98
      }
1809
      /* now we know exactly how long it is */
1810
97
      alloc_len = result_len + subject_len - last_end_offset;
1811
97
      if (NULL != result) {
1812
97
        result = zend_string_realloc(result, alloc_len, 0);
1813
97
      } else {
1814
0
        result = zend_string_alloc(alloc_len, 0);
1815
0
      }
1816
      /* stick that last bit of string on our output */
1817
97
      memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1818
97
      result_len += subject_len - last_end_offset;
1819
97
      ZSTR_VAL(result)[result_len] = '\0';
1820
97
      ZSTR_LEN(result) = result_len;
1821
97
      break;
1822
195
    } else {
1823
0
error:
1824
0
      pcre_handle_exec_error(count);
1825
0
      if (result) {
1826
0
        zend_string_release_ex(result, 0);
1827
0
        result = NULL;
1828
0
      }
1829
0
      break;
1830
0
    }
1831
1832
#ifdef HAVE_PCRE_JIT_SUPPORT
1833
    if (pce->preg_options & PREG_JIT) {
1834
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1835
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1836
    } else
1837
#endif
1838
215
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1839
215
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1840
215
  }
1841
195
  if (match_data != mdata) {
1842
172
    pcre2_match_data_free(match_data);
1843
172
  }
1844
195
  mdata_used = old_mdata_used;
1845
1846
195
  return result;
1847
195
}
1848
/* }}} */
1849
1850
static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str,
1851
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
1852
  size_t limit, size_t *replace_count, zend_long flags
1853
184
) {
1854
184
  uint32_t     options;     /* Execution options */
1855
184
  int        count;       /* Count of matched subpatterns */
1856
184
  zend_string   **subpat_names;   /* Array for named subpatterns */
1857
184
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1858
184
  size_t       alloc_len;     /* Actual allocated length */
1859
184
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
1860
184
  size_t       last_end_offset; /* Where the last search ended */
1861
184
  const char    *match,       /* The current match */
1862
184
          *piece;       /* The current piece of subject */
1863
184
  size_t      result_len;     /* Length of result */
1864
184
  zend_string   *result;      /* Result of replacement */
1865
184
  pcre2_match_data *match_data;
1866
184
  bool old_mdata_used;
1867
1868
  /* Calculate the size of the offsets array, and allocate memory for it. */
1869
184
  num_subpats = pce->capture_count + 1;
1870
184
  if (pce->name_count > 0) {
1871
0
    subpat_names = ensure_subpats_table(pce->name_count, pce);
1872
0
    if (UNEXPECTED(!subpat_names)) {
1873
0
      return NULL;
1874
0
    }
1875
184
  } else {
1876
184
    subpat_names = NULL;
1877
184
  }
1878
1879
184
  alloc_len = 0;
1880
184
  result = NULL;
1881
1882
  /* Initialize */
1883
184
  match = NULL;
1884
184
  start_offset = 0;
1885
184
  last_end_offset = 0;
1886
184
  result_len = 0;
1887
184
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1888
1889
184
  old_mdata_used = mdata_used;
1890
184
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1891
152
    mdata_used = 1;
1892
152
    match_data = mdata;
1893
152
  } else {
1894
32
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1895
32
    if (!match_data) {
1896
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1897
0
      mdata_used = old_mdata_used;
1898
0
      return NULL;
1899
0
    }
1900
32
  }
1901
1902
184
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1903
1904
  /* Array of subpattern offsets */
1905
184
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1906
1907
  /* Execute the regular expression. */
1908
#ifdef HAVE_PCRE_JIT_SUPPORT
1909
  if ((pce->preg_options & PREG_JIT) && options) {
1910
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1911
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1912
  } else
1913
#endif
1914
184
  count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1915
184
      options, match_data, mctx);
1916
1917
536
  while (1) {
1918
534
    piece = ZSTR_VAL(subject_str) + last_end_offset;
1919
1920
534
    if (count >= 0 && limit) {
1921
      /* Check for too many substrings condition. */
1922
375
      if (UNEXPECTED(count == 0)) {
1923
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1924
0
        count = num_subpats;
1925
0
      }
1926
1927
375
matched:
1928
375
      if (UNEXPECTED(offsets[1] < offsets[0])) {
1929
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1930
0
        if (result) {
1931
0
          zend_string_release_ex(result, 0);
1932
0
          result = NULL;
1933
0
        }
1934
0
        break;
1935
0
      }
1936
1937
375
      if (replace_count) {
1938
375
        ++*replace_count;
1939
375
      }
1940
1941
      /* Set the match location in subject */
1942
375
      match = ZSTR_VAL(subject_str) + offsets[0];
1943
1944
      /* Length of needed storage */
1945
375
      size_t new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1946
1947
      /* Use custom function to get replacement string and its length. */
1948
375
      zend_string *eval_result = preg_do_repl_func(
1949
375
        fci, fcc, ZSTR_VAL(subject_str), offsets, subpat_names, num_subpats, count,
1950
375
        pcre2_get_mark(match_data), flags);
1951
1952
375
      if (UNEXPECTED(eval_result == NULL)) {
1953
19
        goto error;
1954
19
      }
1955
356
      new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1956
356
      if (new_len >= alloc_len) {
1957
211
        alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1958
211
        if (result == NULL) {
1959
118
          result = zend_string_alloc(alloc_len, 0);
1960
118
        } else {
1961
93
          result = zend_string_extend(result, alloc_len, 0);
1962
93
        }
1963
211
      }
1964
1965
356
      if (match-piece > 0) {
1966
        /* copy the part of the string before the match */
1967
348
        memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1968
348
        result_len += (match-piece);
1969
348
      }
1970
1971
      /* If using custom function, copy result to the buffer and clean up. */
1972
356
      memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1973
356
      result_len += ZSTR_LEN(eval_result);
1974
356
      zend_string_release_ex(eval_result, 0);
1975
1976
356
      limit--;
1977
1978
      /* Advance to the next piece. */
1979
356
      start_offset = last_end_offset = offsets[1];
1980
1981
      /* If we have matched an empty string, mimic what Perl's /g options does.
1982
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1983
         the match again at the same point. If this fails (picked up above) we
1984
         advance to the next character. */
1985
356
      if (start_offset == offsets[0]) {
1986
130
        count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1987
130
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1988
1989
130
        piece = ZSTR_VAL(subject_str) + start_offset;
1990
130
        if (count >= 0 && limit) {
1991
0
          goto matched;
1992
130
        } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1993
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1994
             this is not necessarily the end. We need to advance
1995
             the start offset, and continue. Fudge the offset values
1996
             to achieve this, unless we're already at the end of the string. */
1997
130
          if (start_offset < ZSTR_LEN(subject_str)) {
1998
126
            size_t unit_len = calculate_unit_length(pce, piece);
1999
126
            start_offset += unit_len;
2000
126
          } else {
2001
4
            goto not_matched;
2002
4
          }
2003
130
        } else {
2004
0
          goto error;
2005
0
        }
2006
130
      }
2007
2008
356
    } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2009
163
not_matched:
2010
163
      if (result == NULL) {
2011
47
        result = zend_string_copy(subject_str);
2012
47
        break;
2013
47
      }
2014
      /* now we know exactly how long it is */
2015
116
      size_t segment_len = ZSTR_LEN(subject_str) - last_end_offset;
2016
116
      alloc_len = result_len + segment_len;
2017
116
      result = zend_string_realloc(result, alloc_len, 0);
2018
      /* stick that last bit of string on our output */
2019
116
      memcpy(ZSTR_VAL(result) + result_len, piece, segment_len);
2020
116
      result_len += segment_len;
2021
116
      ZSTR_VAL(result)[result_len] = '\0';
2022
116
      ZSTR_LEN(result) = result_len;
2023
116
      break;
2024
163
    } else {
2025
19
error:
2026
19
      pcre_handle_exec_error(count);
2027
19
      if (result) {
2028
0
        zend_string_release_ex(result, 0);
2029
0
        result = NULL;
2030
0
      }
2031
19
      break;
2032
0
    }
2033
#ifdef HAVE_PCRE_JIT_SUPPORT
2034
    if ((pce->preg_options & PREG_JIT)) {
2035
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2036
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2037
    } else
2038
#endif
2039
352
    count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2040
352
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2041
352
  }
2042
184
  if (match_data != mdata) {
2043
31
    pcre2_match_data_free(match_data);
2044
31
  }
2045
184
  mdata_used = old_mdata_used;
2046
2047
184
  return result;
2048
184
}
2049
2050
static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2051
                zend_string *subject_str,
2052
                zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2053
                size_t limit, size_t *replace_count, zend_long flags)
2054
205
{
2055
205
  pcre_cache_entry  *pce;         /* Compiled regular expression */
2056
205
  zend_string     *result;      /* Function result */
2057
2058
  /* Compile regex or get it from cache. */
2059
205
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2060
21
    return NULL;
2061
21
  }
2062
184
  pce->refcount++;
2063
184
  result = php_pcre_replace_func_impl(pce, subject_str, fci, fcc, limit, replace_count, flags);
2064
184
  pce->refcount--;
2065
2066
184
  return result;
2067
205
}
2068
2069
/* {{{ php_pcre_replace_array */
2070
static zend_string *php_pcre_replace_array(HashTable *regex,
2071
  zend_string *replace_str, HashTable *replace_ht,
2072
  zend_string *subject_str, size_t limit, size_t *replace_count)
2073
0
{
2074
0
  zval    *regex_entry;
2075
0
  zend_string *result;
2076
2077
0
  zend_string_addref(subject_str);
2078
2079
0
  if (replace_ht) {
2080
0
    uint32_t replace_idx = 0;
2081
2082
    /* For each entry in the regex array, get the entry */
2083
0
    ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2084
      /* Make sure we're dealing with strings. */
2085
0
      zend_string *tmp_regex_str;
2086
0
      zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2087
0
      zend_string *replace_entry_str, *tmp_replace_entry_str;
2088
0
      zval *zv;
2089
2090
      /* Get current entry */
2091
0
      while (1) {
2092
0
        if (replace_idx == replace_ht->nNumUsed) {
2093
0
          replace_entry_str = ZSTR_EMPTY_ALLOC();
2094
0
          tmp_replace_entry_str = NULL;
2095
0
          break;
2096
0
        }
2097
0
        zv = ZEND_HASH_ELEMENT(replace_ht, replace_idx);
2098
0
        replace_idx++;
2099
0
        if (Z_TYPE_P(zv) != IS_UNDEF) {
2100
0
          replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
2101
0
          break;
2102
0
        }
2103
0
      }
2104
2105
      /* Do the actual replacement and put the result back into subject_str
2106
         for further replacements. */
2107
0
      result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2108
0
        ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2109
0
      zend_tmp_string_release(tmp_replace_entry_str);
2110
0
      zend_tmp_string_release(tmp_regex_str);
2111
0
      zend_string_release_ex(subject_str, 0);
2112
0
      subject_str = result;
2113
0
      if (UNEXPECTED(result == NULL)) {
2114
0
        break;
2115
0
      }
2116
0
    } ZEND_HASH_FOREACH_END();
2117
2118
0
  } else {
2119
0
    ZEND_ASSERT(replace_str != NULL);
2120
2121
    /* For each entry in the regex array, get the entry */
2122
0
    ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2123
      /* Make sure we're dealing with strings. */
2124
0
      zend_string *tmp_regex_str;
2125
0
      zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2126
2127
      /* Do the actual replacement and put the result back into subject_str
2128
         for further replacements. */
2129
0
      result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2130
0
        ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2131
0
      zend_tmp_string_release(tmp_regex_str);
2132
0
      zend_string_release_ex(subject_str, 0);
2133
0
      subject_str = result;
2134
2135
0
      if (UNEXPECTED(result == NULL)) {
2136
0
        break;
2137
0
      }
2138
0
    } ZEND_HASH_FOREACH_END();
2139
0
  }
2140
2141
0
  return subject_str;
2142
0
}
2143
/* }}} */
2144
2145
/* {{{ php_replace_in_subject */
2146
static zend_always_inline zend_string *php_replace_in_subject(
2147
  zend_string *regex_str, HashTable *regex_ht,
2148
  zend_string *replace_str, HashTable *replace_ht,
2149
  zend_string *subject, size_t limit, size_t *replace_count)
2150
347
{
2151
347
  zend_string *result;
2152
2153
347
  if (regex_str) {
2154
347
    ZEND_ASSERT(replace_str != NULL);
2155
347
    result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2156
347
      replace_str, limit, replace_count);
2157
347
  } else {
2158
0
    ZEND_ASSERT(regex_ht != NULL);
2159
0
    result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2160
0
      limit, replace_count);
2161
0
  }
2162
347
  return result;
2163
347
}
2164
/* }}} */
2165
2166
static zend_string *php_replace_in_subject_func(zend_string *regex_str, const HashTable *regex_ht,
2167
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2168
  zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
2169
205
{
2170
205
  zend_string *result;
2171
2172
205
  if (regex_str) {
2173
205
    result = php_pcre_replace_func(regex_str, subject, fci, fcc, limit, replace_count, flags);
2174
205
    return result;
2175
205
  } else {
2176
    /* If regex is an array */
2177
0
    zval    *regex_entry;
2178
2179
0
    ZEND_ASSERT(regex_ht != NULL);
2180
2181
0
    zend_string_addref(subject);
2182
2183
    /* For each entry in the regex array, get the entry */
2184
0
    ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
2185
      /* Make sure we're dealing with strings. */
2186
0
      zend_string *tmp_regex_entry_str;
2187
0
      zend_string *regex_entry_str = zval_try_get_tmp_string(regex_entry, &tmp_regex_entry_str);
2188
0
      if (UNEXPECTED(regex_entry_str == NULL)) {
2189
0
        break;
2190
0
      }
2191
2192
      /* Do the actual replacement and put the result back into subject
2193
         for further replacements. */
2194
0
      result = php_pcre_replace_func(
2195
0
        regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
2196
0
      zend_tmp_string_release(tmp_regex_entry_str);
2197
0
      zend_string_release(subject);
2198
0
      subject = result;
2199
0
      if (UNEXPECTED(result == NULL)) {
2200
0
        break;
2201
0
      }
2202
0
    } ZEND_HASH_FOREACH_END();
2203
2204
0
    return subject;
2205
0
  }
2206
205
}
2207
2208
static size_t php_preg_replace_func_impl(zval *return_value,
2209
  zend_string *regex_str, const HashTable *regex_ht,
2210
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2211
  zend_string *subject_str, const HashTable *subject_ht, zend_long limit_val, zend_long flags)
2212
205
{
2213
205
  zend_string *result;
2214
205
  size_t replace_count = 0;
2215
2216
205
  if (subject_str) {
2217
205
    result = php_replace_in_subject_func(
2218
205
      regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
2219
205
    if (result != NULL) {
2220
163
      RETVAL_STR(result);
2221
163
    } else {
2222
42
      RETVAL_NULL();
2223
42
    }
2224
205
  } else {
2225
    /* if subject is an array */
2226
0
    zval    *subject_entry, zv;
2227
0
    zend_string *string_key;
2228
0
    zend_ulong   num_key;
2229
2230
0
    ZEND_ASSERT(subject_ht != NULL);
2231
2232
0
    array_init_size(return_value, zend_hash_num_elements(subject_ht));
2233
0
    HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2234
2235
    /* For each subject entry, convert it to string, then perform replacement
2236
       and add the result to the return_value array. */
2237
0
    ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2238
0
      zend_string *tmp_subject_entry_str;
2239
0
      zend_string *subject_entry_str = zval_try_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2240
0
      if (UNEXPECTED(subject_entry_str == NULL)) {
2241
0
        break;
2242
0
      }
2243
2244
0
      result = php_replace_in_subject_func(
2245
0
        regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
2246
0
      if (result != NULL) {
2247
        /* Add to return array */
2248
0
        ZVAL_STR(&zv, result);
2249
0
        if (string_key) {
2250
0
          zend_hash_add_new(return_value_ht, string_key, &zv);
2251
0
        } else {
2252
0
          zend_hash_index_add_new(return_value_ht, num_key, &zv);
2253
0
        }
2254
0
      }
2255
0
      zend_tmp_string_release(tmp_subject_entry_str);
2256
0
    } ZEND_HASH_FOREACH_END();
2257
0
  }
2258
2259
205
  return replace_count;
2260
205
}
2261
2262
static void _preg_replace_common(
2263
  zval *return_value,
2264
  HashTable *regex_ht, zend_string *regex_str,
2265
  HashTable *replace_ht, zend_string *replace_str,
2266
  HashTable *subject_ht, zend_string *subject_str,
2267
  zend_long limit,
2268
  zval *zcount,
2269
  bool is_filter
2270
347
) {
2271
347
  size_t replace_count = 0;
2272
347
  zend_string *result;
2273
347
  size_t old_replace_count;
2274
2275
  /* If replace is an array then the regex argument needs to also be an array */
2276
347
  if (replace_ht && !regex_ht) {
2277
0
    zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
2278
0
    RETURN_THROWS();
2279
0
  }
2280
2281
347
  if (subject_str) {
2282
347
    old_replace_count = replace_count;
2283
347
    result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2284
347
      subject_str, limit, &replace_count);
2285
347
    if (result != NULL) {
2286
195
      if (!is_filter || replace_count > old_replace_count) {
2287
195
        RETVAL_STR(result);
2288
195
      } else {
2289
0
        zend_string_release_ex(result, 0);
2290
0
        RETVAL_NULL();
2291
0
      }
2292
195
    } else {
2293
152
      RETVAL_NULL();
2294
152
    }
2295
347
  } else {
2296
    /* if subject is an array */
2297
0
    zval    *subject_entry, zv;
2298
0
    zend_string *string_key;
2299
0
    zend_ulong   num_key;
2300
2301
0
    ZEND_ASSERT(subject_ht != NULL);
2302
2303
0
    array_init_size(return_value, zend_hash_num_elements(subject_ht));
2304
0
    HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2305
2306
    /* For each subject entry, convert it to string, then perform replacement
2307
       and add the result to the return_value array. */
2308
0
    ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2309
0
      old_replace_count = replace_count;
2310
0
      zend_string *tmp_subject_entry_str;
2311
0
      zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2312
0
      result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2313
0
        subject_entry_str, limit, &replace_count);
2314
2315
0
      if (result != NULL) {
2316
0
        if (!is_filter || replace_count > old_replace_count) {
2317
          /* Add to return array */
2318
0
          ZVAL_STR(&zv, result);
2319
0
          if (string_key) {
2320
0
            zend_hash_add_new(return_value_ht, string_key, &zv);
2321
0
          } else {
2322
0
            zend_hash_index_add_new(return_value_ht, num_key, &zv);
2323
0
          }
2324
0
        } else {
2325
0
          zend_string_release_ex(result, 0);
2326
0
        }
2327
0
      }
2328
0
      zend_tmp_string_release(tmp_subject_entry_str);
2329
0
    } ZEND_HASH_FOREACH_END();
2330
0
  }
2331
2332
347
  if (zcount) {
2333
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2334
0
  }
2335
347
}
2336
2337
/* {{{ preg_replace_common */
2338
static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
2339
353
{
2340
353
  zend_string *regex_str, *replace_str, *subject_str;
2341
353
  HashTable *regex_ht, *replace_ht, *subject_ht;
2342
353
  zend_long limit = -1;
2343
353
  zval *zcount = NULL;
2344
2345
  /* Get function parameters and do error-checking. */
2346
1.05k
  ZEND_PARSE_PARAMETERS_START(3, 5)
2347
1.76k
    Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2348
1.76k
    Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
2349
1.76k
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2350
1.76k
    Z_PARAM_OPTIONAL
2351
1.76k
    Z_PARAM_LONG(limit)
2352
963
    Z_PARAM_ZVAL(zcount)
2353
963
  ZEND_PARSE_PARAMETERS_END();
2354
2355
347
  _preg_replace_common(
2356
347
    return_value,
2357
347
    regex_ht, regex_str,
2358
347
    replace_ht, replace_str,
2359
347
    subject_ht, subject_str,
2360
347
    limit, zcount, is_filter);
2361
347
}
2362
/* }}} */
2363
2364
/* {{{ Perform Perl-style regular expression replacement. */
2365
PHP_FUNCTION(preg_replace)
2366
353
{
2367
353
  preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
2368
353
}
2369
/* }}} */
2370
2371
ZEND_FRAMELESS_FUNCTION(preg_replace, 3)
2372
0
{
2373
0
  zend_string *regex_str, *replace_str, *subject_str;
2374
0
  HashTable *regex_ht, *replace_ht, *subject_ht;
2375
0
  zval regex_tmp, replace_tmp, subject_tmp;
2376
2377
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(1, regex_ht, regex_str, regex_tmp);
2378
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(2, replace_ht, replace_str, replace_tmp);
2379
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(3, subject_ht, subject_str, subject_tmp);
2380
2381
0
  _preg_replace_common(
2382
0
    return_value,
2383
0
    regex_ht, regex_str,
2384
0
    replace_ht, replace_str,
2385
0
    subject_ht, subject_str,
2386
0
    /* limit */ -1, /* zcount */ NULL, /* is_filter */ false);
2387
2388
0
flf_clean:;
2389
0
  Z_FLF_PARAM_FREE_STR(1, regex_tmp);
2390
0
  Z_FLF_PARAM_FREE_STR(2, replace_tmp);
2391
0
  Z_FLF_PARAM_FREE_STR(3, subject_tmp);
2392
0
}
2393
2394
/* {{{ Perform Perl-style regular expression replacement using replacement callback. */
2395
PHP_FUNCTION(preg_replace_callback)
2396
207
{
2397
207
  zval *zcount = NULL;
2398
207
  zend_string *regex_str;
2399
207
  HashTable *regex_ht;
2400
207
  zend_string *subject_str;
2401
207
  HashTable *subject_ht;
2402
207
  zend_long limit = -1, flags = 0;
2403
207
  size_t replace_count;
2404
207
  zend_fcall_info fci = empty_fcall_info;
2405
207
  zend_fcall_info_cache fcc = empty_fcall_info_cache;
2406
2407
  /* Get function parameters and do error-checking. */
2408
621
  ZEND_PARSE_PARAMETERS_START(3, 6)
2409
1.03k
    Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2410
1.03k
    Z_PARAM_FUNC(fci, fcc)
2411
1.23k
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2412
1.23k
    Z_PARAM_OPTIONAL
2413
1.23k
    Z_PARAM_LONG(limit)
2414
0
    Z_PARAM_ZVAL(zcount)
2415
0
    Z_PARAM_LONG(flags)
2416
207
  ZEND_PARSE_PARAMETERS_END();
2417
2418
205
  replace_count = php_preg_replace_func_impl(return_value, regex_str, regex_ht,
2419
205
    &fci, &fcc,
2420
205
    subject_str, subject_ht, limit, flags);
2421
205
  if (zcount) {
2422
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2423
0
  }
2424
205
}
2425
/* }}} */
2426
2427
/* {{{ Perform Perl-style regular expression replacement using replacement callback. */
2428
PHP_FUNCTION(preg_replace_callback_array)
2429
0
{
2430
0
  zval *replace, *zcount = NULL;
2431
0
  HashTable *pattern, *subject_ht;
2432
0
  zend_string *subject_str, *str_idx_regex;
2433
0
  zend_long limit = -1, flags = 0;
2434
0
  size_t replace_count = 0;
2435
2436
  /* Get function parameters and do error-checking. */
2437
0
  ZEND_PARSE_PARAMETERS_START(2, 5)
2438
0
    Z_PARAM_ARRAY_HT(pattern)
2439
0
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2440
0
    Z_PARAM_OPTIONAL
2441
0
    Z_PARAM_LONG(limit)
2442
0
    Z_PARAM_ZVAL(zcount)
2443
0
    Z_PARAM_LONG(flags)
2444
0
  ZEND_PARSE_PARAMETERS_END();
2445
2446
0
  if (subject_ht) {
2447
0
    GC_TRY_ADDREF(subject_ht);
2448
0
  } else {
2449
0
    GC_TRY_ADDREF(subject_str);
2450
0
  }
2451
2452
0
  ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
2453
0
    if (!str_idx_regex) {
2454
0
      zend_argument_type_error(1, "must contain only string patterns as keys");
2455
0
      goto error;
2456
0
    }
2457
2458
0
    zend_fcall_info_cache fcc = empty_fcall_info_cache;
2459
0
    zend_fcall_info fci = empty_fcall_info;
2460
0
    fci.size = sizeof(zend_fcall_info);
2461
    /* Copy potential trampoline */
2462
0
    ZVAL_COPY_VALUE(&fci.function_name, replace);
2463
2464
0
    if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2465
0
      zend_argument_type_error(1, "must contain only valid callbacks");
2466
0
      goto error;
2467
0
    }
2468
2469
0
    zval retval;
2470
0
    replace_count += php_preg_replace_func_impl(&retval, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
2471
0
      subject_str, subject_ht, limit, flags);
2472
0
    zend_release_fcall_info_cache(&fcc);
2473
2474
0
    switch (Z_TYPE(retval)) {
2475
0
      case IS_ARRAY:
2476
0
        ZEND_ASSERT(subject_ht);
2477
0
        zend_array_release(subject_ht);
2478
0
        subject_ht = Z_ARR(retval);
2479
0
        break;
2480
0
      case IS_STRING:
2481
0
        ZEND_ASSERT(subject_str);
2482
0
        zend_string_release(subject_str);
2483
0
        subject_str = Z_STR(retval);
2484
0
        break;
2485
0
      case IS_NULL:
2486
0
        RETVAL_NULL();
2487
0
        goto error;
2488
0
      default: ZEND_UNREACHABLE();
2489
0
    }
2490
2491
0
    if (EG(exception)) {
2492
0
      goto error;
2493
0
    }
2494
0
  } ZEND_HASH_FOREACH_END();
2495
2496
0
  if (zcount) {
2497
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2498
0
  }
2499
2500
0
  if (subject_ht) {
2501
0
    RETVAL_ARR(subject_ht);
2502
    // Unset the type_flags of immutable arrays to prevent the VM from performing refcounting
2503
0
    if (GC_FLAGS(subject_ht) & IS_ARRAY_IMMUTABLE) {
2504
0
      Z_TYPE_FLAGS_P(return_value) = 0;
2505
0
    }
2506
0
    return;
2507
0
  } else {
2508
0
    RETURN_STR(subject_str);
2509
0
  }
2510
2511
0
error:
2512
0
  if (subject_ht) {
2513
0
    zend_array_release(subject_ht);
2514
0
  } else {
2515
0
    zend_string_release(subject_str);
2516
0
  }
2517
0
}
2518
/* }}} */
2519
2520
/* {{{ Perform Perl-style regular expression replacement and only return matches. */
2521
PHP_FUNCTION(preg_filter)
2522
0
{
2523
0
  preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
2524
0
}
2525
/* }}} */
2526
2527
/* {{{ Split string into an array using a perl-style regular expression as a delimiter */
2528
PHP_FUNCTION(preg_split)
2529
0
{
2530
0
  zend_string     *regex;     /* Regular expression */
2531
0
  zend_string     *subject;   /* String to match against */
2532
0
  zend_long      limit_val = -1;/* Integer value of limit */
2533
0
  zend_long      flags = 0;   /* Match control flags */
2534
0
  pcre_cache_entry  *pce;     /* Compiled regular expression */
2535
2536
  /* Get function parameters and do error checking */
2537
0
  ZEND_PARSE_PARAMETERS_START(2, 4)
2538
0
    Z_PARAM_STR(regex)
2539
0
    Z_PARAM_STR(subject)
2540
0
    Z_PARAM_OPTIONAL
2541
0
    Z_PARAM_LONG(limit_val)
2542
0
    Z_PARAM_LONG(flags)
2543
0
  ZEND_PARSE_PARAMETERS_END();
2544
2545
  /* Compile regex or get it from cache. */
2546
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2547
0
    RETURN_FALSE;
2548
0
  }
2549
2550
0
  pce->refcount++;
2551
0
  php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2552
0
  pce->refcount--;
2553
0
}
2554
/* }}} */
2555
2556
/* {{{ php_pcre_split */
2557
PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2558
  zend_long limit_val, zend_long flags)
2559
0
{
2560
0
  uint32_t     options;     /* Execution options */
2561
0
  int        count;       /* Count of matched subpatterns */
2562
0
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
2563
0
  PCRE2_SIZE     last_match_offset; /* Location of last match */
2564
0
  uint32_t     no_empty;      /* If NO_EMPTY flag is set */
2565
0
  uint32_t     delim_capture;   /* If delimiters should be captured */
2566
0
  uint32_t     offset_capture;  /* If offsets should be captured */
2567
0
  uint32_t     num_subpats;   /* Number of captured subpatterns */
2568
0
  zval       tmp;
2569
0
  pcre2_match_data *match_data;
2570
0
  bool old_mdata_used;
2571
0
  char *subject = ZSTR_VAL(subject_str);
2572
2573
0
  no_empty = flags & PREG_SPLIT_NO_EMPTY;
2574
0
  delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2575
0
  offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2576
2577
  /* Initialize return value */
2578
0
  array_init(return_value);
2579
0
  HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2580
2581
  /* Calculate the size of the offsets array, and allocate memory for it. */
2582
0
  num_subpats = pce->capture_count + 1;
2583
2584
  /* Start at the beginning of the string */
2585
0
  start_offset = 0;
2586
0
  last_match_offset = 0;
2587
0
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2588
2589
0
  if (limit_val == -1) {
2590
    /* pass */
2591
0
  } else if (limit_val == 0) {
2592
0
    limit_val = -1;
2593
0
  } else if (limit_val <= 1) {
2594
0
    goto last;
2595
0
  }
2596
2597
0
  old_mdata_used = mdata_used;
2598
0
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2599
0
    mdata_used = true;
2600
0
    match_data = mdata;
2601
0
  } else {
2602
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2603
0
    if (!match_data) {
2604
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2605
0
      zval_ptr_dtor(return_value);
2606
0
      RETURN_FALSE;
2607
0
    }
2608
0
  }
2609
2610
0
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2611
2612
  /* Array of subpattern offsets */
2613
0
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
2614
2615
#ifdef HAVE_PCRE_JIT_SUPPORT
2616
  if ((pce->preg_options & PREG_JIT) && options) {
2617
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2618
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2619
  } else
2620
#endif
2621
0
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2622
0
      options, match_data, mctx);
2623
2624
0
  while (1) {
2625
    /* If something matched */
2626
0
    if (count >= 0) {
2627
      /* Check for too many substrings condition. */
2628
0
      if (UNEXPECTED(count == 0)) {
2629
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2630
0
        count = num_subpats;
2631
0
      }
2632
2633
0
matched:
2634
0
      if (UNEXPECTED(offsets[1] < offsets[0])) {
2635
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2636
0
        break;
2637
0
      }
2638
2639
0
      if (!no_empty || offsets[0] != last_match_offset) {
2640
0
        if (offset_capture) {
2641
          /* Add (match, offset) pair to the return value */
2642
0
          add_offset_pair(
2643
0
            return_value_ht, subject, last_match_offset, offsets[0],
2644
0
            NULL, 0);
2645
0
        } else {
2646
          /* Add the piece to the return value */
2647
0
          populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2648
0
          zend_hash_next_index_insert_new(return_value_ht, &tmp);
2649
0
        }
2650
2651
        /* One less left to do */
2652
0
        if (limit_val != -1)
2653
0
          limit_val--;
2654
0
      }
2655
2656
0
      if (delim_capture) {
2657
0
        size_t i;
2658
0
        for (i = 1; i < count; i++) {
2659
          /* If we have matched a delimiter */
2660
0
          if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2661
0
            if (offset_capture) {
2662
0
              add_offset_pair(
2663
0
                return_value_ht, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2664
0
            } else {
2665
0
              populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2666
0
              zend_hash_next_index_insert_new(return_value_ht, &tmp);
2667
0
            }
2668
0
          }
2669
0
        }
2670
0
      }
2671
2672
      /* Advance to the position right after the last full match */
2673
0
      start_offset = last_match_offset = offsets[1];
2674
2675
      /* If we have matched an empty string, mimic what Perl's /g options does.
2676
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2677
         the match again at the same point. If this fails (picked up above) we
2678
         advance to the next character. */
2679
0
      if (start_offset == offsets[0]) {
2680
        /* Get next piece if no limit or limit not yet reached and something matched*/
2681
0
        if (limit_val != -1 && limit_val <= 1) {
2682
0
          break;
2683
0
        }
2684
0
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2685
0
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2686
0
        if (count >= 0) {
2687
0
          goto matched;
2688
0
        } else if (count == PCRE2_ERROR_NOMATCH) {
2689
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2690
             this is not necessarily the end. We need to advance
2691
             the start offset, and continue. Fudge the offset values
2692
             to achieve this, unless we're already at the end of the string. */
2693
0
          if (start_offset < ZSTR_LEN(subject_str)) {
2694
0
            start_offset += calculate_unit_length(pce, subject + start_offset);
2695
0
          } else {
2696
0
            break;
2697
0
          }
2698
0
        } else {
2699
0
          goto error;
2700
0
        }
2701
0
      }
2702
2703
0
    } else if (count == PCRE2_ERROR_NOMATCH) {
2704
0
      break;
2705
0
    } else {
2706
0
error:
2707
0
      pcre_handle_exec_error(count);
2708
0
      break;
2709
0
    }
2710
2711
    /* Get next piece if no limit or limit not yet reached and something matched*/
2712
0
    if (limit_val != -1 && limit_val <= 1) {
2713
0
      break;
2714
0
    }
2715
2716
#ifdef HAVE_PCRE_JIT_SUPPORT
2717
    if (pce->preg_options & PREG_JIT) {
2718
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2719
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2720
    } else
2721
#endif
2722
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2723
0
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2724
0
  }
2725
0
  if (match_data != mdata) {
2726
0
    pcre2_match_data_free(match_data);
2727
0
  }
2728
0
  mdata_used = old_mdata_used;
2729
2730
0
  if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2731
0
    zval_ptr_dtor(return_value);
2732
0
    RETURN_FALSE;
2733
0
  }
2734
2735
0
last:
2736
0
  start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2737
2738
0
  if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2739
0
    if (offset_capture) {
2740
      /* Add the last (match, offset) pair to the return value */
2741
0
      add_offset_pair(return_value_ht, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2742
0
    } else {
2743
      /* Add the last piece to the return value */
2744
0
      if (start_offset == 0) {
2745
0
        ZVAL_STR_COPY(&tmp, subject_str);
2746
0
      } else {
2747
0
        populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2748
0
      }
2749
0
      zend_hash_next_index_insert_new(return_value_ht, &tmp);
2750
0
    }
2751
0
  }
2752
0
}
2753
/* }}} */
2754
2755
/* {{{ Quote regular expression characters plus an optional character */
2756
PHP_FUNCTION(preg_quote)
2757
35
{
2758
35
  zend_string *str;           /* Input string argument */
2759
35
  zend_string *delim = NULL;   /* Additional delimiter argument */
2760
35
  char    *in_str;      /* Input string */
2761
35
  char    *in_str_end;      /* End of the input string */
2762
35
  zend_string *out_str;     /* Output string with quoted characters */
2763
35
  size_t       extra_len;         /* Number of additional characters */
2764
35
  char    *p,         /* Iterator for input string */
2765
35
        *q,         /* Iterator for output string */
2766
35
         delim_char = '\0', /* Delimiter character to be quoted */
2767
35
         c;         /* Current character */
2768
2769
  /* Get the arguments and check for errors */
2770
105
  ZEND_PARSE_PARAMETERS_START(1, 2)
2771
140
    Z_PARAM_STR(str)
2772
35
    Z_PARAM_OPTIONAL
2773
76
    Z_PARAM_STR_OR_NULL(delim)
2774
35
  ZEND_PARSE_PARAMETERS_END();
2775
2776
  /* Nothing to do if we got an empty string */
2777
35
  if (ZSTR_LEN(str) == 0) {
2778
0
    RETURN_EMPTY_STRING();
2779
0
  }
2780
2781
35
  in_str = ZSTR_VAL(str);
2782
35
  in_str_end = in_str + ZSTR_LEN(str);
2783
2784
35
  if (delim) {
2785
3
    delim_char = ZSTR_VAL(delim)[0];
2786
3
  }
2787
2788
  /* Go through the string and quote necessary characters */
2789
35
  extra_len = 0;
2790
35
  p = in_str;
2791
50.8k
  do {
2792
50.8k
    c = *p;
2793
50.8k
    switch(c) {
2794
736
      case '.':
2795
925
      case '\\':
2796
1.29k
      case '+':
2797
1.31k
      case '*':
2798
1.45k
      case '?':
2799
1.60k
      case '[':
2800
1.66k
      case '^':
2801
1.78k
      case ']':
2802
1.78k
      case '$':
2803
2.01k
      case '(':
2804
2.64k
      case ')':
2805
2.72k
      case '{':
2806
3.08k
      case '}':
2807
3.52k
      case '=':
2808
3.53k
      case '!':
2809
3.75k
      case '>':
2810
3.78k
      case '<':
2811
3.86k
      case '|':
2812
4.29k
      case ':':
2813
4.53k
      case '-':
2814
4.87k
      case '#':
2815
4.87k
        extra_len++;
2816
4.87k
        break;
2817
2818
1.93k
      case '\0':
2819
1.93k
        extra_len+=3;
2820
1.93k
        break;
2821
2822
44.0k
      default:
2823
44.0k
        if (c == delim_char) {
2824
0
          extra_len++;
2825
0
        }
2826
44.0k
        break;
2827
50.8k
    }
2828
50.8k
    p++;
2829
50.8k
  } while (p != in_str_end);
2830
2831
35
  if (extra_len == 0) {
2832
1
    RETURN_STR_COPY(str);
2833
1
  }
2834
2835
  /* Allocate enough memory so that even if each character
2836
     is quoted, we won't run out of room */
2837
34
  out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2838
34
  q = ZSTR_VAL(out_str);
2839
34
  p = in_str;
2840
2841
50.8k
  do {
2842
50.8k
    c = *p;
2843
50.8k
    switch(c) {
2844
736
      case '.':
2845
925
      case '\\':
2846
1.29k
      case '+':
2847
1.31k
      case '*':
2848
1.45k
      case '?':
2849
1.60k
      case '[':
2850
1.66k
      case '^':
2851
1.78k
      case ']':
2852
1.78k
      case '$':
2853
2.01k
      case '(':
2854
2.64k
      case ')':
2855
2.72k
      case '{':
2856
3.08k
      case '}':
2857
3.52k
      case '=':
2858
3.53k
      case '!':
2859
3.75k
      case '>':
2860
3.78k
      case '<':
2861
3.86k
      case '|':
2862
4.29k
      case ':':
2863
4.53k
      case '-':
2864
4.87k
      case '#':
2865
4.87k
        *q++ = '\\';
2866
4.87k
        *q++ = c;
2867
4.87k
        break;
2868
2869
1.93k
      case '\0':
2870
1.93k
        *q++ = '\\';
2871
1.93k
        *q++ = '0';
2872
1.93k
        *q++ = '0';
2873
1.93k
        *q++ = '0';
2874
1.93k
        break;
2875
2876
44.0k
      default:
2877
44.0k
        if (c == delim_char) {
2878
0
          *q++ = '\\';
2879
0
        }
2880
44.0k
        *q++ = c;
2881
44.0k
        break;
2882
50.8k
    }
2883
50.8k
    p++;
2884
50.8k
  } while (p != in_str_end);
2885
34
  *q = '\0';
2886
2887
34
  RETURN_NEW_STR(out_str);
2888
34
}
2889
/* }}} */
2890
2891
/* {{{ Searches array and returns entries which match regex */
2892
PHP_FUNCTION(preg_grep)
2893
0
{
2894
0
  zend_string     *regex;     /* Regular expression */
2895
0
  zval        *input;     /* Input array */
2896
0
  zend_long      flags = 0;   /* Match control flags */
2897
0
  pcre_cache_entry  *pce;     /* Compiled regular expression */
2898
2899
  /* Get arguments and do error checking */
2900
0
  ZEND_PARSE_PARAMETERS_START(2, 3)
2901
0
    Z_PARAM_STR(regex)
2902
0
    Z_PARAM_ARRAY(input)
2903
0
    Z_PARAM_OPTIONAL
2904
0
    Z_PARAM_LONG(flags)
2905
0
  ZEND_PARSE_PARAMETERS_END();
2906
2907
  /* Compile regex or get it from cache. */
2908
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2909
0
    RETURN_FALSE;
2910
0
  }
2911
2912
0
  pce->refcount++;
2913
0
  php_pcre_grep_impl(pce, input, return_value, flags);
2914
0
  pce->refcount--;
2915
0
}
2916
/* }}} */
2917
2918
PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2919
0
{
2920
0
  zval            *entry;             /* An entry in the input array */
2921
0
  uint32_t     num_subpats;   /* Number of captured subpatterns */
2922
0
  int        count;       /* Count of matched subpatterns */
2923
0
  uint32_t     options;     /* Execution options */
2924
0
  zend_string   *string_key;
2925
0
  zend_ulong     num_key;
2926
0
  bool     invert;      /* Whether to return non-matching
2927
                       entries */
2928
0
  bool old_mdata_used;
2929
0
  pcre2_match_data *match_data;
2930
0
  invert = flags & PREG_GREP_INVERT ? 1 : 0;
2931
2932
  /* Calculate the size of the offsets array, and allocate memory for it. */
2933
0
  num_subpats = pce->capture_count + 1;
2934
2935
  /* Initialize return array */
2936
0
  array_init(return_value);
2937
0
  HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2938
2939
0
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2940
2941
0
  old_mdata_used = mdata_used;
2942
0
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2943
0
    mdata_used = true;
2944
0
    match_data = mdata;
2945
0
  } else {
2946
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2947
0
    if (!match_data) {
2948
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2949
0
      return;
2950
0
    }
2951
0
  }
2952
2953
0
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2954
2955
  /* Go through the input array */
2956
0
  ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2957
0
    zend_string *tmp_subject_str;
2958
0
    zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2959
2960
    /* Perform the match */
2961
#ifdef HAVE_PCRE_JIT_SUPPORT
2962
    if ((pce->preg_options & PREG_JIT) && options) {
2963
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2964
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2965
    } else
2966
#endif
2967
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2968
0
        options, match_data, mctx);
2969
2970
    /* If the entry fits our requirements */
2971
0
    if (count >= 0) {
2972
      /* Check for too many substrings condition. */
2973
0
      if (UNEXPECTED(count == 0)) {
2974
0
        php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2975
0
      }
2976
0
      if (!invert) {
2977
0
        Z_TRY_ADDREF_P(entry);
2978
2979
        /* Add to return array */
2980
0
        if (string_key) {
2981
0
          zend_hash_update(return_value_ht, string_key, entry);
2982
0
        } else {
2983
0
          zend_hash_index_update(return_value_ht, num_key, entry);
2984
0
        }
2985
0
      }
2986
0
    } else if (count == PCRE2_ERROR_NOMATCH) {
2987
0
      if (invert) {
2988
0
        Z_TRY_ADDREF_P(entry);
2989
2990
        /* Add to return array */
2991
0
        if (string_key) {
2992
0
          zend_hash_update(return_value_ht, string_key, entry);
2993
0
        } else {
2994
0
          zend_hash_index_update(return_value_ht, num_key, entry);
2995
0
        }
2996
0
      }
2997
0
    } else {
2998
0
      pcre_handle_exec_error(count);
2999
0
      zend_tmp_string_release(tmp_subject_str);
3000
0
      break;
3001
0
    }
3002
3003
0
    zend_tmp_string_release(tmp_subject_str);
3004
0
  } ZEND_HASH_FOREACH_END();
3005
0
  if (match_data != mdata) {
3006
0
    pcre2_match_data_free(match_data);
3007
0
  }
3008
3009
0
  mdata_used = old_mdata_used;
3010
3011
0
  if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
3012
0
    zend_array_destroy(Z_ARR_P(return_value));
3013
0
    RETURN_FALSE;
3014
0
  }
3015
0
}
3016
/* }}} */
3017
3018
/* {{{ Returns the error code of the last regexp execution. */
3019
PHP_FUNCTION(preg_last_error)
3020
0
{
3021
0
  ZEND_PARSE_PARAMETERS_NONE();
3022
3023
0
  RETURN_LONG(PCRE_G(error_code));
3024
0
}
3025
/* }}} */
3026
3027
/* {{{ Returns the error message of the last regexp execution. */
3028
PHP_FUNCTION(preg_last_error_msg)
3029
0
{
3030
0
  ZEND_PARSE_PARAMETERS_NONE();
3031
3032
0
  RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
3033
0
}
3034
/* }}} */
3035
3036
/* {{{ module definition structures */
3037
3038
zend_module_entry pcre_module_entry = {
3039
  STANDARD_MODULE_HEADER,
3040
  "pcre",
3041
  ext_functions,
3042
  PHP_MINIT(pcre),
3043
  PHP_MSHUTDOWN(pcre),
3044
  PHP_RINIT(pcre),
3045
  PHP_RSHUTDOWN(pcre),
3046
  PHP_MINFO(pcre),
3047
  PHP_PCRE_VERSION,
3048
  PHP_MODULE_GLOBALS(pcre),
3049
  PHP_GINIT(pcre),
3050
  PHP_GSHUTDOWN(pcre),
3051
  NULL,
3052
  STANDARD_MODULE_PROPERTIES_EX
3053
};
3054
3055
#ifdef COMPILE_DL_PCRE
3056
ZEND_GET_MODULE(pcre)
3057
#endif
3058
3059
/* }}} */
3060
3061
PHPAPI pcre2_match_context *php_pcre_mctx(void)
3062
9
{/*{{{*/
3063
9
  return mctx;
3064
9
}/*}}}*/
3065
3066
PHPAPI pcre2_general_context *php_pcre_gctx(void)
3067
0
{/*{{{*/
3068
0
  return gctx;
3069
0
}/*}}}*/
3070
3071
PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3072
0
{/*{{{*/
3073
0
  return cctx;
3074
0
}/*}}}*/
3075
3076
PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3077
0
{/*{{{*/
3078
0
  assert(NULL != pce);
3079
0
  pce->refcount++;
3080
0
}/*}}}*/
3081
3082
PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3083
0
{/*{{{*/
3084
0
  assert(NULL != pce);
3085
0
  assert(0 != pce->refcount);
3086
0
  pce->refcount--;
3087
0
}/*}}}*/
3088
3089
PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3090
0
{/*{{{*/
3091
0
  assert(NULL != pce);
3092
0
  return pce->re;
3093
0
}/*}}}*/