Coverage Report

Created: 2026-06-02 06:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/pcre/php_pcre.c
Line
Count
Source
1
/*
2
   +----------------------------------------------------------------------+
3
   | Copyright © The PHP Group and Contributors.                          |
4
   +----------------------------------------------------------------------+
5
   | This source file is subject to the Modified BSD License that is      |
6
   | bundled with this package in the file LICENSE, and is available      |
7
   | through the World Wide Web at <https://www.php.net/license/>.        |
8
   |                                                                      |
9
   | SPDX-License-Identifier: BSD-3-Clause                                |
10
   +----------------------------------------------------------------------+
11
   | Author: Andrei Zmievski <andrei@php.net>                             |
12
   +----------------------------------------------------------------------+
13
 */
14
15
#include "php.h"
16
#include "php_ini.h"
17
#include "php_pcre.h"
18
#include "ext/standard/info.h"
19
#include "ext/standard/basic_functions.h"
20
#include "zend_smart_str.h"
21
#include "SAPI.h"
22
23
0
#define PREG_PATTERN_ORDER      1
24
0
#define PREG_SET_ORDER        2
25
129
#define PREG_OFFSET_CAPTURE     (1<<8)
26
129
#define PREG_UNMATCHED_AS_NULL    (1<<9)
27
28
0
#define PREG_SPLIT_NO_EMPTY     (1<<0)
29
0
#define PREG_SPLIT_DELIM_CAPTURE  (1<<1)
30
0
#define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
31
32
0
#define PREG_GREP_INVERT      (1<<0)
33
34
#define PREG_JIT                    (1<<3)
35
36
517
#define PCRE_CACHE_SIZE 4096
37
38
#ifdef HAVE_PCRE_JIT_SUPPORT
39
#define PHP_PCRE_JIT_SUPPORT 1
40
#else
41
#define PHP_PCRE_JIT_SUPPORT 0
42
#endif
43
44
char *php_pcre_version;
45
46
#include "php_pcre_arginfo.h"
47
48
struct _pcre_cache_entry {
49
  pcre2_code *re;
50
  /* Pointer is not NULL (during request) when there are named captures.
51
   * Length is equal to capture_count + 1 to account for capture group 0.
52
   * This table cache is only valid during request.
53
   * Trying to store this over multiple requests causes issues when the keys are exposed in user arrays
54
   * (see GH-17122 and GH-17132). */
55
  zend_string **subpats_table;
56
  uint32_t preg_options;
57
  uint32_t name_count;
58
  uint32_t capture_count;
59
  uint32_t compile_options;
60
  uint32_t refcount;
61
};
62
63
PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
64
65
#ifdef HAVE_PCRE_JIT_SUPPORT
66
#define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
67
#define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
68
ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
69
#endif
70
/* General context using (infallible) system allocator. */
71
ZEND_TLS pcre2_general_context *gctx = NULL;
72
/* These two are global per thread for now. Though it is possible to use these
73
  per pattern. Either one can copy it and use in pce, or one does no global
74
  contexts at all, but creates for every pce. */
75
ZEND_TLS pcre2_compile_context *cctx = NULL;
76
ZEND_TLS pcre2_match_context   *mctx = NULL;
77
ZEND_TLS pcre2_match_data      *mdata = NULL;
78
ZEND_TLS bool              mdata_used = 0;
79
ZEND_TLS uint8_t pcre2_init_ok = 0;
80
#if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
81
static MUTEX_T pcre_mt = NULL;
82
#define php_pcre_mutex_alloc() \
83
  if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
84
#define php_pcre_mutex_free() \
85
  if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
86
#define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
87
#define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
88
#else
89
#define php_pcre_mutex_alloc()
90
#define php_pcre_mutex_free()
91
#define php_pcre_mutex_lock()
92
#define php_pcre_mutex_unlock()
93
#endif
94
95
ZEND_TLS HashTable char_tables;
96
97
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats);
98
99
static void php_pcre_free_char_table(zval *data)
100
0
{/*{{{*/
101
0
  void *ptr = Z_PTR_P(data);
102
0
  pefree(ptr, 1);
103
0
}/*}}}*/
104
105
static void pcre_handle_exec_error(int pcre_code) /* {{{ */
106
796
{
107
796
  int preg_code = 0;
108
109
796
  switch (pcre_code) {
110
0
    case PCRE2_ERROR_MATCHLIMIT:
111
0
      preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
112
0
      break;
113
114
0
    case PCRE2_ERROR_RECURSIONLIMIT:
115
0
      preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
116
0
      break;
117
118
0
    case PCRE2_ERROR_BADUTFOFFSET:
119
0
      preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
120
0
      break;
121
122
#ifdef HAVE_PCRE_JIT_SUPPORT
123
    case PCRE2_ERROR_JIT_STACKLIMIT:
124
      preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
125
      break;
126
#endif
127
128
796
    default:
129
796
      if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
130
99
        preg_code = PHP_PCRE_BAD_UTF8_ERROR;
131
697
      } else  {
132
697
        preg_code = PHP_PCRE_INTERNAL_ERROR;
133
697
      }
134
796
      break;
135
796
  }
136
137
796
  PCRE_G(error_code) = preg_code;
138
796
}
139
/* }}} */
140
141
static const char *php_pcre_get_error_msg(php_pcre_error_code error_code) /* {{{ */
142
0
{
143
0
  switch (error_code) {
144
0
    case PHP_PCRE_NO_ERROR:
145
0
      return "No error";
146
0
    case PHP_PCRE_INTERNAL_ERROR:
147
0
      return "Internal error";
148
0
    case PHP_PCRE_BAD_UTF8_ERROR:
149
0
      return "Malformed UTF-8 characters, possibly incorrectly encoded";
150
0
    case PHP_PCRE_BAD_UTF8_OFFSET_ERROR:
151
0
      return "The offset did not correspond to the beginning of a valid UTF-8 code point";
152
0
    case PHP_PCRE_BACKTRACK_LIMIT_ERROR:
153
0
      return "Backtrack limit exhausted";
154
0
    case PHP_PCRE_RECURSION_LIMIT_ERROR:
155
0
      return "Recursion limit exhausted";
156
157
#ifdef HAVE_PCRE_JIT_SUPPORT
158
    case PHP_PCRE_JIT_STACKLIMIT_ERROR:
159
      return "JIT stack limit exhausted";
160
#endif
161
162
0
    default:
163
0
      return "Unknown error";
164
0
  }
165
0
}
166
/* }}} */
167
168
static void php_free_pcre_cache(zval *data) /* {{{ */
169
0
{
170
0
  pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
171
0
  if (!pce) return;
172
0
  if (pce->subpats_table) {
173
0
    free_subpats_table(pce->subpats_table, pce->capture_count + 1);
174
0
  }
175
0
  pcre2_code_free(pce->re);
176
0
  free(pce);
177
0
}
178
/* }}} */
179
180
static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
181
602
{
182
602
  return pemalloc(size, 1);
183
602
}
184
185
static void php_pcre_free(void *block, void *data)
186
77
{
187
77
  pefree(block, 1);
188
77
}
189
190
static void *php_pcre_emalloc(PCRE2_SIZE size, void *data)
191
44.6k
{
192
44.6k
  return emalloc(size);
193
44.6k
}
194
195
static void php_pcre_efree(void *block, void *data)
196
44.8k
{
197
44.8k
  efree(block);
198
44.8k
}
199
200
1.97k
#define PHP_PCRE_PREALLOC_MDATA_SIZE 32
201
202
static void php_pcre_init_pcre2(uint8_t jit)
203
2
{/*{{{*/
204
2
  if (!gctx) {
205
2
    gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
206
2
    if (!gctx) {
207
0
      pcre2_init_ok = 0;
208
0
      return;
209
0
    }
210
2
  }
211
212
2
  if (!cctx) {
213
2
    cctx = pcre2_compile_context_create(gctx);
214
2
    if (!cctx) {
215
0
      pcre2_init_ok = 0;
216
0
      return;
217
0
    }
218
2
  }
219
220
2
  if (!mctx) {
221
2
    mctx = pcre2_match_context_create(gctx);
222
2
    if (!mctx) {
223
0
      pcre2_init_ok = 0;
224
0
      return;
225
0
    }
226
2
  }
227
228
#ifdef HAVE_PCRE_JIT_SUPPORT
229
  if (jit && !jit_stack) {
230
    jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
231
    if (!jit_stack) {
232
      pcre2_init_ok = 0;
233
      return;
234
    }
235
  }
236
#endif
237
238
2
  if (!mdata) {
239
2
    mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
240
2
    if (!mdata) {
241
0
      pcre2_init_ok = 0;
242
0
      return;
243
0
    }
244
2
  }
245
246
2
  pcre2_init_ok = 1;
247
2
}/*}}}*/
248
249
static void php_pcre_shutdown_pcre2(void)
250
0
{/*{{{*/
251
0
  if (gctx) {
252
0
    pcre2_general_context_free(gctx);
253
0
    gctx = NULL;
254
0
  }
255
256
0
  if (cctx) {
257
0
    pcre2_compile_context_free(cctx);
258
0
    cctx = NULL;
259
0
  }
260
261
0
  if (mctx) {
262
0
    pcre2_match_context_free(mctx);
263
0
    mctx = NULL;
264
0
  }
265
266
#ifdef HAVE_PCRE_JIT_SUPPORT
267
  /* Stack may only be destroyed when no cached patterns
268
    possibly associated with it do exist. */
269
  if (jit_stack) {
270
    pcre2_jit_stack_free(jit_stack);
271
    jit_stack = NULL;
272
  }
273
#endif
274
275
0
  if (mdata) {
276
0
    pcre2_match_data_free(mdata);
277
0
    mdata = NULL;
278
0
  }
279
280
0
  pcre2_init_ok = 0;
281
0
}/*}}}*/
282
283
static PHP_GINIT_FUNCTION(pcre) /* {{{ */
284
2
{
285
2
  php_pcre_mutex_alloc();
286
287
2
  zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
288
289
2
  pcre_globals->backtrack_limit = 0;
290
2
  pcre_globals->recursion_limit = 0;
291
2
  pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
292
2
  ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
293
2
  ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
294
#ifdef HAVE_PCRE_JIT_SUPPORT
295
  pcre_globals->jit = 1;
296
#endif
297
298
2
  php_pcre_init_pcre2(1);
299
2
  zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
300
2
}
301
/* }}} */
302
303
static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
304
0
{
305
0
  zend_hash_destroy(&pcre_globals->pcre_cache);
306
307
0
  php_pcre_shutdown_pcre2();
308
0
  zend_hash_destroy(&char_tables);
309
0
  php_pcre_mutex_free();
310
0
}
311
/* }}} */
312
313
static PHP_INI_MH(OnUpdateBacktrackLimit)
314
2
{/*{{{*/
315
2
  OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
316
2
  if (mctx) {
317
2
    pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
318
2
  }
319
320
2
  return SUCCESS;
321
2
}/*}}}*/
322
323
static PHP_INI_MH(OnUpdateRecursionLimit)
324
2
{/*{{{*/
325
2
  OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
326
2
  if (mctx) {
327
2
    pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
328
2
  }
329
330
2
  return SUCCESS;
331
2
}/*}}}*/
332
333
#ifdef HAVE_PCRE_JIT_SUPPORT
334
static PHP_INI_MH(OnUpdateJit)
335
{/*{{{*/
336
  OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
337
  if (PCRE_G(jit) && jit_stack) {
338
    pcre2_jit_stack_assign(mctx, NULL, jit_stack);
339
  } else {
340
    pcre2_jit_stack_assign(mctx, NULL, NULL);
341
  }
342
343
  return SUCCESS;
344
}/*}}}*/
345
#endif
346
347
PHP_INI_BEGIN()
348
  STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
349
  STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
350
#ifdef HAVE_PCRE_JIT_SUPPORT
351
  STD_PHP_INI_BOOLEAN("pcre.jit",           "1",       PHP_INI_ALL, OnUpdateJit,            jit,             zend_pcre_globals, pcre_globals)
352
#endif
353
PHP_INI_END()
354
355
static char *_pcre2_config_str(uint32_t what)
356
8
{/*{{{*/
357
8
  int len = pcre2_config(what, NULL);
358
8
  char *ret = (char *) malloc(len + 1);
359
360
8
  len = pcre2_config(what, ret);
361
8
  if (!len) {
362
0
    free(ret);
363
0
    return NULL;
364
0
  }
365
366
8
  return ret;
367
8
}/*}}}*/
368
369
/* {{{ PHP_MINFO_FUNCTION(pcre) */
370
static PHP_MINFO_FUNCTION(pcre)
371
3
{
372
#ifdef HAVE_PCRE_JIT_SUPPORT
373
  uint32_t flag = 0;
374
  char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
375
#endif
376
3
  char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
377
3
  char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
378
379
3
  php_info_print_table_start();
380
3
  php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
381
3
  php_info_print_table_row(2, "PCRE Library Version", version);
382
3
  free(version);
383
3
  php_info_print_table_row(2, "PCRE Unicode Version", unicode);
384
3
  free(unicode);
385
386
#ifdef HAVE_PCRE_JIT_SUPPORT
387
  if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
388
    php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
389
  } else {
390
    php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
391
  }
392
  if (jit_target) {
393
    php_info_print_table_row(2, "PCRE JIT Target", jit_target);
394
  }
395
  free(jit_target);
396
#else
397
3
  php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
398
3
#endif
399
400
#ifdef HAVE_PCRE_VALGRIND_SUPPORT
401
  php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
402
#endif
403
404
3
  php_info_print_table_end();
405
406
3
  DISPLAY_INI_ENTRIES();
407
3
}
408
/* }}} */
409
410
/* {{{ PHP_MINIT_FUNCTION(pcre) */
411
static PHP_MINIT_FUNCTION(pcre)
412
2
{
413
#ifdef HAVE_PCRE_JIT_SUPPORT
414
  if (UNEXPECTED(!pcre2_init_ok)) {
415
    /* Retry. */
416
    php_pcre_init_pcre2(PCRE_G(jit));
417
    if (!pcre2_init_ok) {
418
      return FAILURE;
419
    }
420
  }
421
#endif
422
423
2
  REGISTER_INI_ENTRIES();
424
425
2
  php_pcre_version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
426
427
2
  register_php_pcre_symbols(module_number);
428
429
2
  return SUCCESS;
430
2
}
431
/* }}} */
432
433
/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
434
static PHP_MSHUTDOWN_FUNCTION(pcre)
435
0
{
436
0
  UNREGISTER_INI_ENTRIES();
437
438
0
  free(php_pcre_version);
439
440
0
  return SUCCESS;
441
0
}
442
/* }}} */
443
444
/* {{{ PHP_RINIT_FUNCTION(pcre) */
445
static PHP_RINIT_FUNCTION(pcre)
446
44.4k
{
447
#ifdef HAVE_PCRE_JIT_SUPPORT
448
  if (UNEXPECTED(!pcre2_init_ok)) {
449
    /* Retry. */
450
    php_pcre_mutex_lock();
451
    php_pcre_init_pcre2(PCRE_G(jit));
452
    if (!pcre2_init_ok) {
453
      php_pcre_mutex_unlock();
454
      return FAILURE;
455
    }
456
    php_pcre_mutex_unlock();
457
  }
458
459
  mdata_used = 0;
460
#endif
461
462
44.4k
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
463
44.4k
  PCRE_G(gctx_zmm) = pcre2_general_context_create(php_pcre_emalloc, php_pcre_efree, NULL);
464
44.4k
  if (!PCRE_G(gctx_zmm)) {
465
0
    return FAILURE;
466
0
  }
467
468
44.4k
  return SUCCESS;
469
44.4k
}
470
/* }}} */
471
472
static PHP_RSHUTDOWN_FUNCTION(pcre)
473
44.4k
{
474
44.4k
  pcre_cache_entry *pce;
475
23.0M
  ZEND_HASH_MAP_FOREACH_PTR(&PCRE_G(pcre_cache), pce) {
476
23.0M
    if (pce->subpats_table) {
477
0
      free_subpats_table(pce->subpats_table, pce->capture_count + 1);
478
0
      pce->subpats_table = NULL;
479
0
    }
480
23.0M
  } ZEND_HASH_FOREACH_END();
481
482
44.4k
  pcre2_general_context_free(PCRE_G(gctx_zmm));
483
44.4k
  PCRE_G(gctx_zmm) = NULL;
484
485
44.4k
  zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
486
44.4k
  zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
487
44.4k
  ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
488
44.4k
  ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
489
44.4k
  return SUCCESS;
490
44.4k
}
491
492
/* {{{ static pcre_clean_cache */
493
static int pcre_clean_cache(zval *data, void *arg)
494
0
{
495
0
  pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
496
0
  int *num_clean = (int *)arg;
497
498
0
  if (!pce->refcount) {
499
0
    if (--(*num_clean) == 0) {
500
0
      return ZEND_HASH_APPLY_REMOVE|ZEND_HASH_APPLY_STOP;
501
0
    }
502
0
    return ZEND_HASH_APPLY_REMOVE;
503
0
  } else {
504
0
    return ZEND_HASH_APPLY_KEEP;
505
0
  }
506
0
}
507
/* }}} */
508
509
0
static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
510
0
  uint32_t i;
511
0
  for (i = 0; i < num_subpats; i++) {
512
0
    if (subpat_names[i]) {
513
0
      zend_string_release_ex(subpat_names[i], false);
514
0
    }
515
0
  }
516
0
  efree(subpat_names);
517
0
}
518
519
/* {{{ static make_subpats_table */
520
static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce)
521
0
{
522
0
  uint32_t num_subpats = pce->capture_count + 1;
523
0
  uint32_t name_size, ni = 0;
524
0
  char *name_table;
525
0
  zend_string **subpat_names;
526
0
  int rc1, rc2;
527
528
0
  rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
529
0
  rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
530
0
  if (rc1 < 0 || rc2 < 0) {
531
0
    php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
532
0
    return NULL;
533
0
  }
534
535
0
  subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
536
0
  while (ni++ < name_cnt) {
537
0
    unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
538
0
    const char *name = name_table + 2;
539
0
    subpat_names[name_idx] = zend_string_init(name, strlen(name), false);
540
0
    name_table += name_size;
541
0
  }
542
0
  return subpat_names;
543
0
}
544
/* }}} */
545
546
static zend_string **ensure_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce)
547
0
{
548
0
  if (!pce->subpats_table) {
549
0
    pce->subpats_table = make_subpats_table(name_cnt, pce);
550
0
  }
551
0
  return pce->subpats_table;
552
0
}
553
554
/* {{{ static calculate_unit_length */
555
/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
556
static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
557
57
{
558
57
  size_t unit_len;
559
560
57
  if (pce->compile_options & PCRE2_UTF) {
561
18
    const char *end = start;
562
563
    /* skip continuation bytes */
564
18
    while ((*++end & 0xC0) == 0x80);
565
18
    unit_len = end - start;
566
39
  } else {
567
39
    unit_len = 1;
568
39
  }
569
57
  return unit_len;
570
57
}
571
/* }}} */
572
573
/* {{{ pcre_get_compiled_regex_cache */
574
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bool locale_aware)
575
2.75k
{
576
2.75k
  pcre2_code      *re = NULL;
577
#if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !defined(HAVE_BUNDLED_PCRE)
578
  uint32_t       coptions = PCRE2_NO_START_OPTIMIZE;
579
#else
580
2.75k
  uint32_t       coptions = 0;
581
2.75k
#endif
582
2.75k
  uint32_t       eoptions = 0;
583
2.75k
  PCRE2_UCHAR           error[128];
584
2.75k
  PCRE2_SIZE           erroffset;
585
2.75k
  int                  errnumber;
586
2.75k
  char         delimiter;
587
2.75k
  char         start_delimiter;
588
2.75k
  char         end_delimiter;
589
2.75k
  char        *p, *pp;
590
2.75k
  char        *pattern;
591
2.75k
  size_t         pattern_len;
592
2.75k
  uint32_t       poptions = 0;
593
2.75k
  const uint8_t       *tables = NULL;
594
2.75k
  zval                *zv;
595
2.75k
  pcre_cache_entry   new_entry;
596
2.75k
  int          rc;
597
2.75k
  zend_string     *key;
598
2.75k
  pcre_cache_entry  *ret;
599
600
2.75k
  if (locale_aware && BG(ctype_string)) {
601
0
    key = zend_string_concat2(
602
0
      ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)),
603
0
      ZSTR_VAL(regex), ZSTR_LEN(regex));
604
2.75k
  } else {
605
2.75k
    key = regex;
606
2.75k
  }
607
608
  /* Try to lookup the cached regex entry, and if successful, just pass
609
     back the compiled pattern, otherwise go on and compile it. */
610
2.75k
  zv = zend_hash_find(&PCRE_G(pcre_cache), key);
611
2.75k
  if (zv) {
612
1.55k
    if (key != regex) {
613
0
      zend_string_release_ex(key, 0);
614
0
    }
615
1.55k
    return (pcre_cache_entry*)Z_PTR_P(zv);
616
1.55k
  }
617
618
1.19k
  p = ZSTR_VAL(regex);
619
1.19k
  const char* end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex);
620
621
  /* Parse through the leading whitespace, and display a warning if we
622
     get to the end without encountering a delimiter. */
623
1.19k
  while (isspace((unsigned char)*p)) p++;
624
1.19k
  if (p >= end_p) {
625
3
    if (key != regex) {
626
0
      zend_string_release_ex(key, 0);
627
0
    }
628
3
    php_error_docref(NULL, E_WARNING, "Empty regular expression");
629
3
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
630
3
    return NULL;
631
3
  }
632
633
  /* Get the delimiter and display a warning if it is alphanumeric
634
     or a backslash. */
635
1.19k
  delimiter = *p++;
636
1.19k
  if (isalnum((unsigned char)delimiter) || delimiter == '\\' || delimiter == '\0') {
637
9
    if (key != regex) {
638
0
      zend_string_release_ex(key, 0);
639
0
    }
640
9
    php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash, or NUL byte");
641
9
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
642
9
    return NULL;
643
9
  }
644
645
1.18k
  start_delimiter = delimiter;
646
1.18k
  if ((pp = strchr("([{< )]}> )]}>", delimiter)))
647
48
    delimiter = pp[5];
648
1.18k
  end_delimiter = delimiter;
649
650
1.18k
  pp = p;
651
652
1.18k
  if (start_delimiter == end_delimiter) {
653
    /* We need to iterate through the pattern, searching for the ending delimiter,
654
       but skipping the backslashed delimiters.  If the ending delimiter is not
655
       found, display a warning. */
656
454k
    while (pp < end_p) {
657
454k
      if (*pp == '\\' && pp + 1 < end_p) pp++;
658
439k
      else if (*pp == delimiter)
659
1.13k
        break;
660
453k
      pp++;
661
453k
    }
662
1.14k
  } else {
663
    /* We iterate through the pattern, searching for the matching ending
664
     * delimiter. For each matching starting delimiter, we increment nesting
665
     * level, and decrement it for each matching ending delimiter. If we
666
     * reach the end of the pattern without matching, display a warning.
667
     */
668
39
    int brackets = 1;   /* brackets nesting level */
669
25.4k
    while (pp < end_p) {
670
25.3k
      if (*pp == '\\' && pp + 1 < end_p) pp++;
671
25.0k
      else if (*pp == end_delimiter && --brackets <= 0)
672
0
        break;
673
25.0k
      else if (*pp == start_delimiter)
674
1.36k
        brackets++;
675
25.3k
      pp++;
676
25.3k
    }
677
39
  }
678
679
1.18k
  if (pp >= end_p) {
680
51
    if (key != regex) {
681
0
      zend_string_release_ex(key, 0);
682
0
    }
683
51
    if (start_delimiter == end_delimiter) {
684
12
      php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
685
39
    } else {
686
39
      php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
687
39
    }
688
51
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
689
51
    return NULL;
690
51
  }
691
692
  /* Make a copy of the actual pattern. */
693
1.13k
  pattern_len = pp - p;
694
1.13k
  pattern = estrndup(p, pattern_len);
695
696
  /* Move on to the options */
697
1.13k
  pp++;
698
699
  /* Parse through the options, setting appropriate flags.  Display
700
     a warning if we encounter an unknown modifier. */
701
2.17k
  while (pp < end_p) {
702
1.07k
    switch (*pp++) {
703
      /* Perl compatible options */
704
561
      case 'i': coptions |= PCRE2_CASELESS;   break;
705
34
      case 'm': coptions |= PCRE2_MULTILINE;   break;
706
8
      case 'n': coptions |= PCRE2_NO_AUTO_CAPTURE; break;
707
1
      case 's': coptions |= PCRE2_DOTALL;   break;
708
12
      case 'x': coptions |= PCRE2_EXTENDED;   break;
709
710
      /* PCRE specific options */
711
11
      case 'A': coptions |= PCRE2_ANCHORED;   break;
712
0
      case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
713
0
#ifdef PCRE2_EXTRA_CASELESS_RESTRICT
714
0
      case 'r': eoptions |= PCRE2_EXTRA_CASELESS_RESTRICT; break;
715
0
#endif
716
0
      case 'S': /* Pass. */         break;
717
0
      case 'X': /* Pass. */         break;
718
156
      case 'U': coptions |= PCRE2_UNGREEDY;   break;
719
239
      case 'u': coptions |= PCRE2_UTF;
720
  /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
721
     characters, even in UTF-8 mode. However, this can be changed by setting
722
     the PCRE2_UCP option. */
723
239
#ifdef PCRE2_UCP
724
239
            coptions |= PCRE2_UCP;
725
239
#endif
726
239
        break;
727
0
      case 'J': coptions |= PCRE2_DUPNAMES;   break;
728
729
3
      case ' ':
730
3
      case '\n':
731
18
      case '\r':
732
18
        break;
733
734
0
      case 'e': /* legacy eval */
735
39
      default:
736
39
        if (pp[-1]) {
737
18
          php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]);
738
21
        } else {
739
21
          php_error_docref(NULL, E_WARNING, "NUL byte is not a valid modifier");
740
21
        }
741
39
        pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
742
39
        efree(pattern);
743
39
        if (key != regex) {
744
0
          zend_string_release_ex(key, 0);
745
0
        }
746
39
        return NULL;
747
1.07k
    }
748
1.07k
  }
749
750
1.09k
  if (key != regex) {
751
0
    zv = zend_hash_str_lookup(&char_tables, ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)));
752
0
    if (Z_ISNULL_P(zv)) {
753
0
      tables = pcre2_maketables(gctx);
754
0
      if (UNEXPECTED(!tables)) {
755
        /* Remove the placeholder entry created by zend_hash_str_lookup(),
756
         * set ptr to NULL first so the destructor (pefree) is safe. */
757
0
        ZVAL_PTR(zv, NULL);
758
0
        zend_hash_str_del(&char_tables, ZSTR_VAL(BG(ctype_string)), ZSTR_LEN(BG(ctype_string)));
759
0
        php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
760
0
        pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
761
0
        zend_string_release_ex(key, 0);
762
0
        efree(pattern);
763
0
        return NULL;
764
0
      }
765
0
      ZVAL_PTR(zv, (void *)tables);
766
0
    } else {
767
0
      tables = Z_PTR_P(zv);
768
0
    }
769
0
  }
770
1.09k
  pcre2_set_character_tables(cctx, tables);
771
772
1.09k
  pcre2_set_compile_extra_options(cctx, eoptions);
773
774
  /* Compile pattern and display a warning if compilation failed. */
775
1.09k
  re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
776
777
1.09k
  if (re == NULL) {
778
580
    if (key != regex) {
779
0
      zend_string_release_ex(key, 0);
780
0
    }
781
580
    pcre2_get_error_message(errnumber, error, sizeof(error));
782
580
    php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
783
580
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
784
580
    efree(pattern);
785
580
    return NULL;
786
580
  }
787
788
#ifdef HAVE_PCRE_JIT_SUPPORT
789
  if (PCRE_G(jit)) {
790
    /* Enable PCRE JIT compiler */
791
    rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
792
    if (EXPECTED(rc >= 0)) {
793
      size_t jit_size = 0;
794
      if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
795
        poptions |= PREG_JIT;
796
      }
797
    } else if (rc == PCRE2_ERROR_NOMEMORY) {
798
      php_error_docref(NULL, E_WARNING,
799
        "Allocation of JIT memory failed, PCRE JIT will be disabled. "
800
        "This is likely caused by security restrictions. "
801
        "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
802
      PCRE_G(jit) = 0;
803
    } else {
804
      pcre2_get_error_message(rc, error, sizeof(error));
805
      php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
806
      pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
807
    }
808
  }
809
#endif
810
517
  efree(pattern);
811
812
  /*
813
   * If we reached cache limit, clean out the items from the head of the list;
814
   * these are supposedly the oldest ones (but not necessarily the least used
815
   * ones).
816
   */
817
517
  if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
818
0
    int num_clean = PCRE_CACHE_SIZE / 8;
819
0
    zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
820
0
  }
821
822
  /* Store the compiled pattern and extra info in the cache. */
823
517
  new_entry.re = re;
824
517
  new_entry.preg_options = poptions;
825
517
  new_entry.compile_options = coptions;
826
517
  new_entry.refcount = 0;
827
517
  new_entry.subpats_table = NULL;
828
829
517
  if ((rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count)) < 0 ||
830
517
      (rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count)) < 0) {
831
0
    if (key != regex) {
832
0
      zend_string_release_ex(key, 0);
833
0
    }
834
0
    pcre2_code_free(new_entry.re);
835
0
    php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
836
0
    pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
837
0
    return NULL;
838
0
  }
839
840
  /*
841
   * Interned strings are not duplicated when stored in HashTable,
842
   * but all the interned strings created during HTTP request are removed
843
   * at end of request. However PCRE_G(pcre_cache) must be consistent
844
   * on the next request as well. So we disable usage of interned strings
845
   * as hash keys especually for this table.
846
   * See bug #63180
847
   */
848
517
  if (!(GC_FLAGS(key) & IS_STR_PERMANENT)) {
849
117
    zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
850
117
    GC_MAKE_PERSISTENT_LOCAL(str);
851
852
117
    ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
853
117
    zend_string_release(str);
854
400
  } else {
855
400
    ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
856
400
  }
857
858
517
  if (key != regex) {
859
0
    zend_string_release_ex(key, 0);
860
0
  }
861
862
517
  return ret;
863
517
}
864
/* }}} */
865
866
/* {{{ pcre_get_compiled_regex_cache */
867
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
868
2.75k
{
869
2.75k
  return pcre_get_compiled_regex_cache_ex(regex, true);
870
2.75k
}
871
/* }}} */
872
873
/* {{{ pcre_get_compiled_regex */
874
PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
875
0
{
876
0
  pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
877
878
0
  if (capture_count) {
879
0
    *capture_count = pce ? pce->capture_count : 0;
880
0
  }
881
882
0
  return pce ? pce->re : NULL;
883
0
}
884
/* }}} */
885
886
/* XXX For the cases where it's only about match yes/no and no capture
887
    required, perhaps just a minimum sized data would suffice. */
888
PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
889
0
{/*{{{*/
890
891
0
  assert(NULL != re);
892
893
0
  if (EXPECTED(!mdata_used)) {
894
0
    int rc = 0;
895
896
0
    if (!capture_count) {
897
      /* As we deal with a non cached pattern, no other way to gather this info. */
898
0
      rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
899
0
    }
900
901
0
    if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
902
0
      mdata_used = 1;
903
0
      return mdata;
904
0
    }
905
0
  }
906
907
0
  return pcre2_match_data_create_from_pattern(re, gctx);
908
0
}/*}}}*/
909
910
PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
911
0
{/*{{{*/
912
0
  if (UNEXPECTED(match_data != mdata)) {
913
0
    pcre2_match_data_free(match_data);
914
0
  } else {
915
0
    mdata_used = 0;
916
0
  }
917
0
}/*}}}*/
918
919
0
static void init_unmatched_null_pair(zval *pair) {
920
0
  zval val1, val2;
921
0
  ZVAL_NULL(&val1);
922
0
  ZVAL_LONG(&val2, -1);
923
0
  ZVAL_ARR(pair, zend_new_pair(&val1, &val2));
924
0
}
925
926
0
static void init_unmatched_empty_pair(zval *pair) {
927
0
  zval val1, val2;
928
0
  ZVAL_EMPTY_STRING(&val1);
929
0
  ZVAL_LONG(&val2, -1);
930
0
  ZVAL_ARR(pair, zend_new_pair(&val1, &val2));
931
0
}
932
933
static zend_always_inline void populate_match_value_str(
934
252
    zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
935
252
  ZVAL_STRINGL_FAST(val, subject + start_offset, end_offset - start_offset);
936
252
}
937
938
static zend_always_inline void populate_match_value(
939
    zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
940
252
    bool unmatched_as_null) {
941
252
  if (PCRE2_UNSET == start_offset) {
942
0
    if (unmatched_as_null) {
943
0
      ZVAL_NULL(val);
944
0
    } else {
945
0
      ZVAL_EMPTY_STRING(val);
946
0
    }
947
252
  } else {
948
252
    populate_match_value_str(val, subject, start_offset, end_offset);
949
252
  }
950
252
}
951
952
static inline void add_named(
953
0
    HashTable *const subpats, zend_string *name, zval *val, bool unmatched) {
954
0
  ZEND_ASSERT(!(GC_FLAGS(name) & IS_STR_PERSISTENT));
955
956
  /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
957
   * In this case we want to preserve the one that actually has a value. */
958
0
  if (!unmatched) {
959
0
    zend_hash_update(subpats, name, val);
960
0
  } else {
961
0
    if (!zend_hash_add(subpats, name, val)) {
962
0
      return;
963
0
    }
964
0
  }
965
0
  Z_TRY_ADDREF_P(val);
966
0
}
967
968
/* {{{ add_offset_pair */
969
static inline void add_offset_pair(
970
    HashTable *const result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
971
    zend_string *name, zend_long unmatched_as_null)
972
0
{
973
0
  zval match_pair;
974
975
  /* Add (match, offset) to the return value */
976
0
  if (PCRE2_UNSET == start_offset) {
977
0
    if (unmatched_as_null) {
978
0
      do {
979
0
        if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
980
0
          if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) {
981
0
            init_unmatched_null_pair(&match_pair);
982
0
            break;
983
0
          } else {
984
0
            init_unmatched_null_pair(&PCRE_G(unmatched_null_pair));
985
0
          }
986
0
        }
987
0
        ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
988
0
      } while (0);
989
0
    } else {
990
0
      do {
991
0
        if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
992
0
          if (UNEXPECTED(EG(flags) & EG_FLAGS_IN_SHUTDOWN)) {
993
0
            init_unmatched_empty_pair(&match_pair);
994
0
            break;
995
0
          } else {
996
0
            init_unmatched_empty_pair(&PCRE_G(unmatched_empty_pair));
997
0
          }
998
0
        }
999
0
        ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1000
0
      } while (0);
1001
0
    }
1002
0
  } else {
1003
0
    zval val1, val2;
1004
0
    populate_match_value_str(&val1, subject, start_offset, end_offset);
1005
0
    ZVAL_LONG(&val2, start_offset);
1006
0
    ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1007
0
  }
1008
1009
0
  if (name) {
1010
0
    add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1011
0
  }
1012
0
  zend_hash_next_index_insert_new(result, &match_pair);
1013
0
}
1014
/* }}} */
1015
1016
static void populate_subpat_array(
1017
    HashTable *subpats_ht, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1018
129
    uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1019
129
  zend_long offset_capture = flags & PREG_OFFSET_CAPTURE;
1020
129
  zend_long unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1021
129
  zval val;
1022
129
  int i;
1023
129
  if (subpat_names) {
1024
0
    if (offset_capture) {
1025
0
      for (i = 0; i < count; i++) {
1026
0
        add_offset_pair(
1027
0
          subpats_ht, subject, offsets[2*i], offsets[2*i+1],
1028
0
          subpat_names[i], unmatched_as_null);
1029
0
      }
1030
0
      if (unmatched_as_null) {
1031
0
        for (i = count; i < num_subpats; i++) {
1032
0
          add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1033
0
        }
1034
0
      }
1035
0
    } else {
1036
0
      for (i = 0; i < count; i++) {
1037
0
        populate_match_value(
1038
0
          &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1039
0
        if (subpat_names[i]) {
1040
0
          add_named(subpats_ht, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1041
0
        }
1042
0
        zend_hash_next_index_insert_new(subpats_ht, &val);
1043
0
      }
1044
0
      if (unmatched_as_null) {
1045
0
        for (i = count; i < num_subpats; i++) {
1046
0
          ZVAL_NULL(&val);
1047
0
          if (subpat_names[i]) {
1048
0
            zend_hash_add(subpats_ht, subpat_names[i], &val);
1049
0
          }
1050
0
          zend_hash_next_index_insert_new(subpats_ht, &val);
1051
0
        }
1052
0
      }
1053
0
    }
1054
129
  } else {
1055
129
    if (offset_capture) {
1056
0
      for (i = 0; i < count; i++) {
1057
0
        add_offset_pair(
1058
0
          subpats_ht, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1059
0
      }
1060
0
      if (unmatched_as_null) {
1061
0
        for (i = count; i < num_subpats; i++) {
1062
0
          add_offset_pair(subpats_ht, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1063
0
        }
1064
0
      }
1065
129
    } else {
1066
381
      for (i = 0; i < count; i++) {
1067
252
        populate_match_value(
1068
252
          &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1069
252
        zend_hash_next_index_insert_new(subpats_ht, &val);
1070
252
      }
1071
129
      if (unmatched_as_null) {
1072
0
        ZVAL_NULL(&val);
1073
0
        for (i = count; i < num_subpats; i++) {
1074
0
          zend_hash_next_index_insert_new(subpats_ht, &val);
1075
0
        }
1076
0
      }
1077
129
    }
1078
129
  }
1079
  /* Add MARK, if available */
1080
129
  if (mark) {
1081
0
    ZVAL_STRING(&val, (char *)mark);
1082
0
    zend_hash_str_update(subpats_ht, ZEND_STRL("MARK"), &val);
1083
0
  }
1084
129
}
1085
1086
static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, bool global) /* {{{ */
1087
2.54k
{
1088
  /* parameters */
1089
2.54k
  zend_string    *regex;      /* Regular expression */
1090
2.54k
  zend_string    *subject;      /* String to match against */
1091
2.54k
  pcre_cache_entry *pce;        /* Compiled regular expression */
1092
2.54k
  zval       *subpats = NULL; /* Array for subpatterns */
1093
2.54k
  zend_long     flags = 0;    /* Match control flags */
1094
2.54k
  zend_long     start_offset = 0; /* Where the new search starts */
1095
1096
7.63k
  ZEND_PARSE_PARAMETERS_START(2, 5)
1097
10.1k
    Z_PARAM_STR(regex)
1098
12.7k
    Z_PARAM_STR(subject)
1099
2.54k
    Z_PARAM_OPTIONAL
1100
5.08k
    Z_PARAM_ZVAL(subpats)
1101
5.08k
    Z_PARAM_LONG(flags)
1102
0
    Z_PARAM_LONG(start_offset)
1103
2.54k
  ZEND_PARSE_PARAMETERS_END();
1104
1105
  /* Compile regex or get it from cache. */
1106
2.54k
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1107
667
    RETURN_FALSE;
1108
667
  }
1109
1110
1.87k
  if (start_offset == ZEND_LONG_MIN) {
1111
0
    zend_argument_value_error(5, "must be greater than " ZEND_LONG_FMT, ZEND_LONG_MIN);
1112
0
    RETURN_THROWS();
1113
0
  }
1114
1115
1.87k
  pce->refcount++;
1116
1.87k
  php_pcre_match_impl(pce, subject, return_value, subpats,
1117
1.87k
    global, flags, start_offset);
1118
1.87k
  pce->refcount--;
1119
1.87k
}
1120
/* }}} */
1121
1122
static zend_always_inline bool is_known_valid_utf8(
1123
561
    zend_string *subject_str, PCRE2_SIZE start_offset) {
1124
561
  if (!ZSTR_IS_VALID_UTF8(subject_str)) {
1125
    /* We don't know whether the string is valid UTF-8 or not. */
1126
561
    return false;
1127
561
  }
1128
1129
0
  if (start_offset == ZSTR_LEN(subject_str)) {
1130
    /* Degenerate case: Offset points to end of string. */
1131
0
    return true;
1132
0
  }
1133
1134
  /* Check that the offset does not point to an UTF-8 continuation byte. */
1135
0
  return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1136
0
}
1137
1138
/* {{{ php_pcre_match_impl() */
1139
PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1140
  zval *subpats, bool global, zend_long flags, zend_off_t start_offset)
1141
1.87k
{
1142
1.87k
  zval       result_set;    /* Holds a set of subpatterns after
1143
                       a global match */
1144
1.87k
  HashTable    **match_sets = NULL; /* An array of sets of matches for each
1145
                       subpattern after a global match */
1146
1.87k
  uint32_t     options;     /* Execution options */
1147
1.87k
  int        count;       /* Count of matched subpatterns */
1148
1.87k
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1149
1.87k
  int        matched;     /* Has anything matched */
1150
1.87k
  zend_string    **subpat_names;    /* Array for named subpatterns */
1151
1.87k
  size_t       i;
1152
1.87k
  uint32_t     subpats_order;   /* Order of subpattern matches */
1153
1.87k
  uint32_t     offset_capture;  /* Capture match offsets: yes/no */
1154
1.87k
  zend_long    unmatched_as_null; /* Null non-matches: yes/no */
1155
1.87k
  PCRE2_SPTR       mark = NULL;   /* Target for MARK name */
1156
1.87k
  HashTable   *marks = NULL;   /* Array of marks for PREG_PATTERN_ORDER */
1157
1.87k
  pcre2_match_data *match_data;
1158
1.87k
  PCRE2_SIZE     start_offset2, orig_start_offset;
1159
1.87k
  bool old_mdata_used;
1160
1161
1.87k
  char *subject = ZSTR_VAL(subject_str);
1162
1.87k
  size_t subject_len = ZSTR_LEN(subject_str);
1163
1164
  /* Overwrite the passed-in value for subpatterns with an empty array. */
1165
1.87k
  if (subpats != NULL) {
1166
0
    subpats = zend_try_array_init(subpats);
1167
0
    if (!subpats) {
1168
0
      RETURN_THROWS();
1169
0
    }
1170
0
  }
1171
1172
1.87k
  subpats_order = global ? PREG_PATTERN_ORDER : 0;
1173
1174
1.87k
  if (flags) {
1175
0
    offset_capture = flags & PREG_OFFSET_CAPTURE;
1176
0
    unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1177
1178
    /*
1179
     * subpats_order is pre-set to pattern mode so we change it only if
1180
     * necessary.
1181
     */
1182
0
    if (flags & 0xff) {
1183
0
      subpats_order = flags & 0xff;
1184
0
      if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1185
0
        (!global && subpats_order != 0)) {
1186
0
        zend_argument_value_error(4, "must be a PREG_* constant");
1187
0
        RETURN_THROWS();
1188
0
      }
1189
0
    }
1190
1.87k
  } else {
1191
1.87k
    offset_capture = 0;
1192
1.87k
    unmatched_as_null = 0;
1193
1.87k
  }
1194
1195
  /* Negative offset counts from the end of the string. */
1196
1.87k
  if (start_offset < 0) {
1197
0
    if ((PCRE2_SIZE)-start_offset <= subject_len) {
1198
0
      start_offset2 = subject_len + start_offset;
1199
0
    } else {
1200
0
      start_offset2 = 0;
1201
0
    }
1202
1.87k
  } else {
1203
1.87k
    start_offset2 = (PCRE2_SIZE)start_offset;
1204
1.87k
  }
1205
1206
1.87k
  if (start_offset2 > subject_len) {
1207
0
    pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1208
0
    RETURN_FALSE;
1209
0
  }
1210
1211
  /* Calculate the size of the offsets array, and allocate memory for it. */
1212
1.87k
  num_subpats = pce->capture_count + 1;
1213
1214
  /*
1215
   * Build a mapping from subpattern numbers to their names. We will
1216
   * allocate the table only if there are any named subpatterns.
1217
   */
1218
1.87k
  subpat_names = NULL;
1219
1.87k
  if (subpats && pce->name_count > 0) {
1220
0
    subpat_names = ensure_subpats_table(pce->name_count, pce);
1221
0
    if (UNEXPECTED(!subpat_names)) {
1222
0
      RETURN_FALSE;
1223
0
    }
1224
0
  }
1225
1226
1.87k
  matched = 0;
1227
1.87k
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1228
1229
1.87k
  old_mdata_used = mdata_used;
1230
1.87k
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1231
1.84k
    mdata_used = true;
1232
1.84k
    match_data = mdata;
1233
1.84k
  } else {
1234
30
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1235
30
    if (!match_data) {
1236
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1237
0
      RETURN_FALSE;
1238
0
    }
1239
30
  }
1240
1241
  /* Allocate match sets array and initialize the values. */
1242
1.87k
  if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1243
0
    match_sets = safe_emalloc(num_subpats, sizeof(HashTable *), 0);
1244
0
    for (i=0; i<num_subpats; i++) {
1245
0
      match_sets[i] = zend_new_array(0);
1246
0
    }
1247
0
  }
1248
1249
  /* Array of subpattern offsets */
1250
1.87k
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1251
1252
1.87k
  orig_start_offset = start_offset2;
1253
1.87k
  options =
1254
1.87k
    (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1255
1.87k
      ? 0 : PCRE2_NO_UTF_CHECK;
1256
1257
  /* Execute the regular expression. */
1258
#ifdef HAVE_PCRE_JIT_SUPPORT
1259
  if ((pce->preg_options & PREG_JIT) && options) {
1260
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1261
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1262
  } else
1263
#endif
1264
1.87k
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1265
1.87k
      options, match_data, mctx);
1266
1267
1.87k
  while (1) {
1268
    /* If something has matched */
1269
1.87k
    if (count >= 0) {
1270
      /* Check for too many substrings condition. */
1271
126
      if (UNEXPECTED(count == 0)) {
1272
0
        php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1273
0
        count = num_subpats;
1274
0
      }
1275
1276
126
matched:
1277
126
      matched++;
1278
1279
      /* If subpatterns array has been passed, fill it in with values. */
1280
126
      if (subpats != NULL) {
1281
        /* Try to get the list of substrings and display a warning if failed. */
1282
0
        if (UNEXPECTED(offsets[1] < offsets[0])) {
1283
0
          if (match_sets) {
1284
0
            for (i = 0; i < num_subpats; i++) {
1285
0
              zend_array_destroy(match_sets[i]);
1286
0
            }
1287
0
            efree(match_sets);
1288
0
          }
1289
0
          if (marks) {
1290
0
            zend_array_destroy(marks);
1291
0
          }
1292
0
          if (match_data != mdata) {
1293
0
            pcre2_match_data_free(match_data);
1294
0
          }
1295
0
          php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1296
0
          RETURN_FALSE;
1297
0
        }
1298
1299
0
        if (global) { /* global pattern matching */
1300
0
          if (subpats_order == PREG_PATTERN_ORDER) {
1301
            /* For each subpattern, insert it into the appropriate array. */
1302
0
            if (offset_capture) {
1303
0
              for (i = 0; i < count; i++) {
1304
0
                add_offset_pair(
1305
0
                  match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1306
0
                  NULL, unmatched_as_null);
1307
0
              }
1308
0
            } else {
1309
0
              for (i = 0; i < count; i++) {
1310
0
                zval val;
1311
0
                populate_match_value(
1312
0
                  &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1313
0
                zend_hash_next_index_insert_new(match_sets[i], &val);
1314
0
              }
1315
0
            }
1316
0
            mark = pcre2_get_mark(match_data);
1317
            /* Add MARK, if available */
1318
0
            if (mark) {
1319
0
              if (!marks) {
1320
0
                marks = zend_new_array(0);
1321
0
              }
1322
0
              zval tmp;
1323
0
              ZVAL_STRING(&tmp, (char *) mark);
1324
0
              zend_hash_index_add_new(marks, matched - 1, &tmp);
1325
0
            }
1326
            /*
1327
             * If the number of captured subpatterns on this run is
1328
             * less than the total possible number, pad the result
1329
             * arrays with NULLs or empty strings.
1330
             */
1331
0
            if (count < num_subpats) {
1332
0
              for (int i = count; i < num_subpats; i++) {
1333
0
                if (offset_capture) {
1334
0
                  add_offset_pair(
1335
0
                    match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1336
0
                    NULL, unmatched_as_null);
1337
0
                } else if (unmatched_as_null) {
1338
0
                  zval tmp;
1339
0
                  ZVAL_NULL(&tmp);
1340
0
                  zend_hash_next_index_insert_new(match_sets[i], &tmp);
1341
0
                } else {
1342
0
                  zval tmp;
1343
0
                  ZVAL_EMPTY_STRING(&tmp);
1344
0
                  zend_hash_next_index_insert_new(match_sets[i], &tmp);
1345
0
                }
1346
0
              }
1347
0
            }
1348
0
          } else {
1349
            /* Allocate and populate the result set array */
1350
0
            mark = pcre2_get_mark(match_data);
1351
0
            array_init_size(&result_set, count + (mark ? 1 : 0));
1352
0
            populate_subpat_array(
1353
0
              Z_ARRVAL(result_set), subject, offsets, subpat_names,
1354
0
              num_subpats, count, mark, flags);
1355
            /* And add it to the output array */
1356
0
            zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &result_set);
1357
0
          }
1358
0
        } else {     /* single pattern matching */
1359
          /* For each subpattern, insert it into the subpatterns array. */
1360
0
          mark = pcre2_get_mark(match_data);
1361
0
          populate_subpat_array(
1362
0
            Z_ARRVAL_P(subpats), subject, offsets, subpat_names, num_subpats, count, mark, flags);
1363
0
          break;
1364
0
        }
1365
0
      }
1366
1367
      /* Advance to the next piece. */
1368
126
      start_offset2 = offsets[1];
1369
1370
      /* If we have matched an empty string, mimic what Perl's /g options does.
1371
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1372
         the match again at the same point. If this fails (picked up above) we
1373
         advance to the next character. */
1374
126
      if (start_offset2 == offsets[0]) {
1375
66
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1376
66
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1377
66
        if (count >= 0) {
1378
0
          if (global) {
1379
0
            goto matched;
1380
0
          } else {
1381
0
            break;
1382
0
          }
1383
66
        } else if (count == PCRE2_ERROR_NOMATCH) {
1384
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1385
             this is not necessarily the end. We need to advance
1386
             the start offset, and continue. Fudge the offset values
1387
             to achieve this, unless we're already at the end of the string. */
1388
60
          if (start_offset2 < subject_len) {
1389
57
            size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1390
1391
57
            start_offset2 += unit_len;
1392
57
          } else {
1393
3
            break;
1394
3
          }
1395
60
        } else {
1396
6
          goto error;
1397
6
        }
1398
66
      }
1399
1.74k
    } else if (count == PCRE2_ERROR_NOMATCH) {
1400
1.65k
      break;
1401
1.65k
    } else {
1402
105
error:
1403
105
      pcre_handle_exec_error(count);
1404
105
      break;
1405
99
    }
1406
1407
117
    if (!global) {
1408
117
      break;
1409
117
    }
1410
1411
    /* Execute the regular expression. */
1412
#ifdef HAVE_PCRE_JIT_SUPPORT
1413
    if ((pce->preg_options & PREG_JIT)) {
1414
      if (start_offset2 > subject_len) {
1415
        pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1416
        break;
1417
      }
1418
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1419
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1420
    } else
1421
#endif
1422
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1423
0
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1424
0
  }
1425
1.87k
  if (match_data != mdata) {
1426
30
    pcre2_match_data_free(match_data);
1427
30
  }
1428
1.87k
  mdata_used = old_mdata_used;
1429
1430
  /* Add the match sets to the output array and clean up */
1431
1.87k
  if (match_sets) {
1432
0
    if (subpat_names) {
1433
0
      for (i = 0; i < num_subpats; i++) {
1434
0
        zval wrapper;
1435
0
        ZVAL_ARR(&wrapper, match_sets[i]);
1436
0
        if (subpat_names[i]) {
1437
0
          zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &wrapper);
1438
0
          GC_ADDREF(match_sets[i]);
1439
0
        }
1440
0
        zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1441
0
      }
1442
0
    } else {
1443
0
      for (i = 0; i < num_subpats; i++) {
1444
0
        zval wrapper;
1445
0
        ZVAL_ARR(&wrapper, match_sets[i]);
1446
0
        zend_hash_next_index_insert_new(Z_ARRVAL_P(subpats), &wrapper);
1447
0
      }
1448
0
    }
1449
0
    efree(match_sets);
1450
1451
0
    if (marks) {
1452
0
      zval tmp;
1453
0
      ZVAL_ARR(&tmp, marks);
1454
0
      zend_hash_str_update(Z_ARRVAL_P(subpats), "MARK", sizeof("MARK") - 1, &tmp);
1455
0
    }
1456
0
  }
1457
1458
1.87k
  if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1459
    /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1460
1.77k
    if ((pce->compile_options & PCRE2_UTF)
1461
462
        && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1462
72
      GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1463
72
    }
1464
1465
1.77k
    RETVAL_LONG(matched);
1466
1.77k
  } else {
1467
105
    RETVAL_FALSE;
1468
105
  }
1469
1.87k
}
1470
/* }}} */
1471
1472
/* {{{ Perform a Perl-style regular expression match */
1473
PHP_FUNCTION(preg_match)
1474
2.54k
{
1475
2.54k
  php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1476
2.54k
}
1477
/* }}} */
1478
1479
ZEND_FRAMELESS_FUNCTION(preg_match, 2)
1480
0
{
1481
0
  zval regex_tmp, subject_tmp;
1482
0
  zend_string *regex, *subject;
1483
1484
0
  Z_FLF_PARAM_STR(1, regex, regex_tmp);
1485
0
  Z_FLF_PARAM_STR(2, subject, subject_tmp);
1486
1487
  /* Compile regex or get it from cache. */
1488
0
  pcre_cache_entry *pce;
1489
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1490
0
    RETVAL_FALSE;
1491
0
    goto flf_clean;
1492
0
  }
1493
1494
0
  pce->refcount++;
1495
0
  php_pcre_match_impl(pce, subject, return_value, /* subpats */ NULL,
1496
0
    /* global */ false, /* flags */ 0, /* start_offset */ 0);
1497
0
  pce->refcount--;
1498
1499
0
flf_clean:
1500
0
  Z_FLF_PARAM_FREE_STR(1, regex_tmp);
1501
0
  Z_FLF_PARAM_FREE_STR(2, subject_tmp);
1502
0
}
1503
1504
/* {{{ Perform a Perl-style global regular expression match */
1505
PHP_FUNCTION(preg_match_all)
1506
0
{
1507
0
  php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1508
0
}
1509
/* }}} */
1510
1511
/* {{{ preg_get_backref */
1512
static int preg_get_backref(char **str, int *backref)
1513
0
{
1514
0
  char in_brace = 0;
1515
0
  char *walk = *str;
1516
1517
0
  if (walk[1] == 0)
1518
0
    return 0;
1519
1520
0
  if (*walk == '$' && walk[1] == '{') {
1521
0
    in_brace = 1;
1522
0
    walk++;
1523
0
  }
1524
0
  walk++;
1525
1526
0
  if (*walk >= '0' && *walk <= '9') {
1527
0
    *backref = *walk - '0';
1528
0
    walk++;
1529
0
  } else
1530
0
    return 0;
1531
1532
0
  if (*walk && *walk >= '0' && *walk <= '9') {
1533
0
    *backref = *backref * 10 + *walk - '0';
1534
0
    walk++;
1535
0
  }
1536
1537
0
  if (in_brace) {
1538
0
    if (*walk != '}')
1539
0
      return 0;
1540
0
    else
1541
0
      walk++;
1542
0
  }
1543
1544
0
  *str = walk;
1545
0
  return 1;
1546
0
}
1547
/* }}} */
1548
1549
/* Return NULL if an exception has occurred */
1550
static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1551
129
{
1552
129
  zend_string *result_str = NULL;
1553
129
  zval     retval;      /* Function return value */
1554
129
  zval       arg;       /* Argument to pass to function */
1555
1556
129
  array_init_size(&arg, count + (mark ? 1 : 0));
1557
129
  populate_subpat_array(Z_ARRVAL(arg), subject, offsets, subpat_names, num_subpats, count, mark, flags);
1558
1559
129
  fci->retval = &retval;
1560
129
  fci->param_count = 1;
1561
129
  fci->params = &arg;
1562
129
  fci->consumed_args = zend_fci_consumed_arg(0);
1563
129
  zend_call_function(fci, fcc);
1564
129
  zval_ptr_dtor(&arg);
1565
129
  if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1566
12
    return Z_STR(retval);
1567
12
  }
1568
  /* No Exception has occurred */
1569
117
  else if (EXPECTED(Z_TYPE(retval) != IS_UNDEF)) {
1570
108
    result_str = zval_try_get_string_func(&retval);
1571
108
  }
1572
117
  zval_ptr_dtor(&retval);
1573
1574
117
  return result_str;
1575
129
}
1576
1577
/* {{{ php_pcre_replace */
1578
PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1579
                zend_string *subject_str,
1580
                const char *subject, size_t subject_len,
1581
                zend_string *replace_str,
1582
                size_t limit, size_t *replace_count)
1583
117
{
1584
117
  pcre_cache_entry  *pce;         /* Compiled regular expression */
1585
117
  zend_string     *result;      /* Function result */
1586
1587
  /* Abort on pending exception, e.g. thrown from __toString(). */
1588
117
  if (UNEXPECTED(EG(exception))) {
1589
0
    return NULL;
1590
0
  }
1591
1592
  /* Compile regex or get it from cache. */
1593
117
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1594
0
    return NULL;
1595
0
  }
1596
117
  pce->refcount++;
1597
117
  result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1598
117
    limit, replace_count);
1599
117
  pce->refcount--;
1600
1601
117
  return result;
1602
117
}
1603
/* }}} */
1604
1605
/* {{{ php_pcre_replace_impl() */
1606
PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1607
117
{
1608
117
  uint32_t     options;     /* Execution options */
1609
117
  int        count;       /* Count of matched subpatterns */
1610
117
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1611
117
  size_t       new_len;     /* Length of needed storage */
1612
117
  size_t       alloc_len;     /* Actual allocated length */
1613
117
  size_t       match_len;     /* Length of the current match */
1614
117
  int        backref;     /* Backreference number */
1615
117
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
1616
117
  size_t       last_end_offset; /* Where the last search ended */
1617
117
  char      *walkbuf,     /* Location of current replacement in the result */
1618
117
          *walk,        /* Used to walk the replacement string */
1619
117
           walk_last;     /* Last walked character */
1620
117
  const char    *match,       /* The current match */
1621
117
          *piece,       /* The current piece of subject */
1622
117
          *replace_end;   /* End of replacement string */
1623
117
  size_t      result_len;     /* Length of result */
1624
117
  zend_string   *result;      /* Result of replacement */
1625
117
  pcre2_match_data *match_data;
1626
117
  bool old_mdata_used;
1627
1628
  /* Calculate the size of the offsets array, and allocate memory for it. */
1629
117
  num_subpats = pce->capture_count + 1;
1630
117
  alloc_len = 0;
1631
117
  result = NULL;
1632
1633
  /* Initialize */
1634
117
  match = NULL;
1635
117
  start_offset = 0;
1636
117
  last_end_offset = 0;
1637
117
  result_len = 0;
1638
117
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1639
1640
117
  old_mdata_used = mdata_used;
1641
117
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1642
9
    mdata_used = true;
1643
9
    match_data = mdata;
1644
108
  } else {
1645
108
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1646
108
    if (!match_data) {
1647
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1648
0
      return NULL;
1649
0
    }
1650
108
  }
1651
1652
117
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1653
1654
  /* Array of subpattern offsets */
1655
117
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1656
1657
  /* Execute the regular expression. */
1658
#ifdef HAVE_PCRE_JIT_SUPPORT
1659
  if ((pce->preg_options & PREG_JIT) && options) {
1660
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1661
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1662
  } else
1663
#endif
1664
117
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1665
117
      options, match_data, mctx);
1666
1667
171
  while (1) {
1668
171
    piece = subject + last_end_offset;
1669
1670
171
    if (count >= 0 && limit > 0) {
1671
54
      bool simple_string;
1672
1673
      /* Check for too many substrings condition. */
1674
54
      if (UNEXPECTED(count == 0)) {
1675
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1676
0
        count = num_subpats;
1677
0
      }
1678
1679
54
matched:
1680
54
      if (UNEXPECTED(offsets[1] < offsets[0])) {
1681
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1682
0
        if (result) {
1683
0
          zend_string_release_ex(result, 0);
1684
0
          result = NULL;
1685
0
        }
1686
0
        break;
1687
0
      }
1688
1689
54
      if (replace_count) {
1690
54
        ++*replace_count;
1691
54
      }
1692
1693
      /* Set the match location in subject */
1694
54
      match = subject + offsets[0];
1695
1696
54
      new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1697
1698
54
      walk = ZSTR_VAL(replace_str);
1699
54
      replace_end = walk + ZSTR_LEN(replace_str);
1700
54
      walk_last = 0;
1701
54
      simple_string = true;
1702
117
      while (walk < replace_end) {
1703
63
        if ('\\' == *walk || '$' == *walk) {
1704
0
          simple_string = false;
1705
0
          if (walk_last == '\\') {
1706
0
            walk++;
1707
0
            walk_last = 0;
1708
0
            continue;
1709
0
          }
1710
0
          if (preg_get_backref(&walk, &backref)) {
1711
0
            if (backref < count)
1712
0
              new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1713
0
            continue;
1714
0
          }
1715
0
        }
1716
63
        new_len++;
1717
63
        walk++;
1718
63
        walk_last = walk[-1];
1719
63
      }
1720
1721
54
      if (new_len >= alloc_len) {
1722
54
        alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1723
54
        if (result == NULL) {
1724
54
          result = zend_string_alloc(alloc_len, 0);
1725
54
        } else {
1726
0
          result = zend_string_extend(result, alloc_len, 0);
1727
0
        }
1728
54
      }
1729
1730
54
      if (match-piece > 0) {
1731
        /* copy the part of the string before the match */
1732
54
        memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1733
54
        result_len += (match-piece);
1734
54
      }
1735
1736
54
      if (simple_string) {
1737
        /* copy replacement */
1738
54
        memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1739
54
        result_len += ZSTR_LEN(replace_str);
1740
54
      } else {
1741
        /* copy replacement and backrefs */
1742
0
        walkbuf = ZSTR_VAL(result) + result_len;
1743
1744
0
        walk = ZSTR_VAL(replace_str);
1745
0
        walk_last = 0;
1746
0
        while (walk < replace_end) {
1747
0
          if ('\\' == *walk || '$' == *walk) {
1748
0
            if (walk_last == '\\') {
1749
0
              *(walkbuf-1) = *walk++;
1750
0
              walk_last = 0;
1751
0
              continue;
1752
0
            }
1753
0
            if (preg_get_backref(&walk, &backref)) {
1754
0
              if (backref < count) {
1755
0
                if (offsets[backref<<1] < SIZE_MAX) {
1756
0
                  match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1757
0
                  walkbuf = zend_mempcpy(walkbuf, subject + offsets[backref << 1], match_len);
1758
0
                }
1759
0
              }
1760
0
              continue;
1761
0
            }
1762
0
          }
1763
0
          *walkbuf++ = *walk++;
1764
0
          walk_last = walk[-1];
1765
0
        }
1766
0
        *walkbuf = '\0';
1767
        /* increment the result length by how much we've added to the string */
1768
0
        result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1769
0
      }
1770
1771
54
      limit--;
1772
1773
      /* Advance to the next piece. */
1774
54
      start_offset = last_end_offset = offsets[1];
1775
1776
      /* If we have matched an empty string, mimic what Perl's /g options does.
1777
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1778
         the match again at the same point. If this fails (picked up above) we
1779
         advance to the next character. */
1780
54
      if (start_offset == offsets[0]) {
1781
0
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1782
0
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1783
1784
0
        piece = subject + start_offset;
1785
0
        if (count >= 0 && limit > 0) {
1786
0
          goto matched;
1787
0
        } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1788
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1789
             this is not necessarily the end. We need to advance
1790
             the start offset, and continue. Fudge the offset values
1791
             to achieve this, unless we're already at the end of the string. */
1792
0
          if (start_offset < subject_len) {
1793
0
            size_t unit_len = calculate_unit_length(pce, piece);
1794
0
            start_offset += unit_len;
1795
0
          } else {
1796
0
            goto not_matched;
1797
0
          }
1798
0
        } else {
1799
0
          goto error;
1800
0
        }
1801
0
      }
1802
1803
117
    } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1804
117
not_matched:
1805
117
      if (!result && subject_str) {
1806
63
        result = zend_string_copy(subject_str);
1807
63
        break;
1808
63
      }
1809
      /* now we know exactly how long it is */
1810
54
      alloc_len = result_len + subject_len - last_end_offset;
1811
54
      if (NULL != result) {
1812
54
        result = zend_string_realloc(result, alloc_len, 0);
1813
54
      } else {
1814
0
        result = zend_string_alloc(alloc_len, 0);
1815
0
      }
1816
      /* stick that last bit of string on our output */
1817
54
      memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1818
54
      result_len += subject_len - last_end_offset;
1819
54
      ZSTR_VAL(result)[result_len] = '\0';
1820
54
      ZSTR_LEN(result) = result_len;
1821
54
      break;
1822
117
    } else {
1823
0
error:
1824
0
      pcre_handle_exec_error(count);
1825
0
      if (result) {
1826
0
        zend_string_release_ex(result, 0);
1827
0
        result = NULL;
1828
0
      }
1829
0
      break;
1830
0
    }
1831
1832
#ifdef HAVE_PCRE_JIT_SUPPORT
1833
    if (pce->preg_options & PREG_JIT) {
1834
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1835
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1836
    } else
1837
#endif
1838
54
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1839
54
          PCRE2_NO_UTF_CHECK, match_data, mctx);
1840
54
  }
1841
117
  if (match_data != mdata) {
1842
108
    pcre2_match_data_free(match_data);
1843
108
  }
1844
117
  mdata_used = old_mdata_used;
1845
1846
117
  return result;
1847
117
}
1848
/* }}} */
1849
1850
static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str,
1851
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
1852
  size_t limit, size_t *replace_count, zend_long flags
1853
84
) {
1854
84
  uint32_t     options;     /* Execution options */
1855
84
  int        count;       /* Count of matched subpatterns */
1856
84
  zend_string   **subpat_names;   /* Array for named subpatterns */
1857
84
  uint32_t     num_subpats;   /* Number of captured subpatterns */
1858
84
  size_t       alloc_len;     /* Actual allocated length */
1859
84
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
1860
84
  size_t       last_end_offset; /* Where the last search ended */
1861
84
  const char    *match,       /* The current match */
1862
84
          *piece;       /* The current piece of subject */
1863
84
  size_t      result_len;     /* Length of result */
1864
84
  zend_string   *result;      /* Result of replacement */
1865
84
  pcre2_match_data *match_data;
1866
84
  bool old_mdata_used;
1867
1868
  /* Calculate the size of the offsets array, and allocate memory for it. */
1869
84
  num_subpats = pce->capture_count + 1;
1870
84
  if (pce->name_count > 0) {
1871
0
    subpat_names = ensure_subpats_table(pce->name_count, pce);
1872
0
    if (UNEXPECTED(!subpat_names)) {
1873
0
      return NULL;
1874
0
    }
1875
84
  } else {
1876
84
    subpat_names = NULL;
1877
84
  }
1878
1879
84
  alloc_len = 0;
1880
84
  result = NULL;
1881
1882
  /* Initialize */
1883
84
  match = NULL;
1884
84
  start_offset = 0;
1885
84
  last_end_offset = 0;
1886
84
  result_len = 0;
1887
84
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1888
1889
84
  old_mdata_used = mdata_used;
1890
84
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1891
84
    mdata_used = 1;
1892
84
    match_data = mdata;
1893
84
  } else {
1894
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
1895
0
    if (!match_data) {
1896
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1897
0
      mdata_used = old_mdata_used;
1898
0
      return NULL;
1899
0
    }
1900
0
  }
1901
1902
84
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1903
1904
  /* Array of subpattern offsets */
1905
84
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
1906
1907
  /* Execute the regular expression. */
1908
#ifdef HAVE_PCRE_JIT_SUPPORT
1909
  if ((pce->preg_options & PREG_JIT) && options) {
1910
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1911
        PCRE2_NO_UTF_CHECK, match_data, mctx);
1912
  } else
1913
#endif
1914
84
  count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1915
84
      options, match_data, mctx);
1916
1917
204
  while (1) {
1918
204
    piece = ZSTR_VAL(subject_str) + last_end_offset;
1919
1920
204
    if (count >= 0 && limit) {
1921
      /* Check for too many substrings condition. */
1922
129
      if (UNEXPECTED(count == 0)) {
1923
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1924
0
        count = num_subpats;
1925
0
      }
1926
1927
129
matched:
1928
129
      if (UNEXPECTED(offsets[1] < offsets[0])) {
1929
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1930
0
        if (result) {
1931
0
          zend_string_release_ex(result, 0);
1932
0
          result = NULL;
1933
0
        }
1934
0
        break;
1935
0
      }
1936
1937
129
      if (replace_count) {
1938
129
        ++*replace_count;
1939
129
      }
1940
1941
      /* Set the match location in subject */
1942
129
      match = ZSTR_VAL(subject_str) + offsets[0];
1943
1944
      /* Length of needed storage */
1945
129
      size_t new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1946
1947
      /* Use custom function to get replacement string and its length. */
1948
129
      zend_string *eval_result = preg_do_repl_func(
1949
129
        fci, fcc, ZSTR_VAL(subject_str), offsets, subpat_names, num_subpats, count,
1950
129
        pcre2_get_mark(match_data), flags);
1951
1952
129
      if (UNEXPECTED(eval_result == NULL)) {
1953
9
        goto error;
1954
9
      }
1955
120
      new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result) + ZSTR_MAX_OVERHEAD, new_len) -ZSTR_MAX_OVERHEAD;
1956
120
      if (new_len >= alloc_len) {
1957
99
        alloc_len = zend_safe_address_guarded(2, new_len, ZSTR_MAX_OVERHEAD) - ZSTR_MAX_OVERHEAD;
1958
99
        if (result == NULL) {
1959
60
          result = zend_string_alloc(alloc_len, 0);
1960
60
        } else {
1961
39
          result = zend_string_extend(result, alloc_len, 0);
1962
39
        }
1963
99
      }
1964
1965
120
      if (match-piece > 0) {
1966
        /* copy the part of the string before the match */
1967
120
        memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1968
120
        result_len += (match-piece);
1969
120
      }
1970
1971
      /* If using custom function, copy result to the buffer and clean up. */
1972
120
      memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1973
120
      result_len += ZSTR_LEN(eval_result);
1974
120
      zend_string_release_ex(eval_result, 0);
1975
1976
120
      limit--;
1977
1978
      /* Advance to the next piece. */
1979
120
      start_offset = last_end_offset = offsets[1];
1980
1981
      /* If we have matched an empty string, mimic what Perl's /g options does.
1982
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1983
         the match again at the same point. If this fails (picked up above) we
1984
         advance to the next character. */
1985
120
      if (start_offset == offsets[0]) {
1986
0
        count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
1987
0
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1988
1989
0
        piece = ZSTR_VAL(subject_str) + start_offset;
1990
0
        if (count >= 0 && limit) {
1991
0
          goto matched;
1992
0
        } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1993
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1994
             this is not necessarily the end. We need to advance
1995
             the start offset, and continue. Fudge the offset values
1996
             to achieve this, unless we're already at the end of the string. */
1997
0
          if (start_offset < ZSTR_LEN(subject_str)) {
1998
0
            size_t unit_len = calculate_unit_length(pce, piece);
1999
0
            start_offset += unit_len;
2000
0
          } else {
2001
0
            goto not_matched;
2002
0
          }
2003
0
        } else {
2004
0
          goto error;
2005
0
        }
2006
0
      }
2007
2008
120
    } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2009
75
not_matched:
2010
75
      if (result == NULL) {
2011
15
        result = zend_string_copy(subject_str);
2012
15
        break;
2013
15
      }
2014
      /* now we know exactly how long it is */
2015
60
      size_t segment_len = ZSTR_LEN(subject_str) - last_end_offset;
2016
60
      alloc_len = result_len + segment_len;
2017
60
      result = zend_string_realloc(result, alloc_len, 0);
2018
      /* stick that last bit of string on our output */
2019
60
      memcpy(ZSTR_VAL(result) + result_len, piece, segment_len);
2020
60
      result_len += segment_len;
2021
60
      ZSTR_VAL(result)[result_len] = '\0';
2022
60
      ZSTR_LEN(result) = result_len;
2023
60
      break;
2024
75
    } else {
2025
9
error:
2026
9
      pcre_handle_exec_error(count);
2027
9
      if (result) {
2028
0
        zend_string_release_ex(result, 0);
2029
0
        result = NULL;
2030
0
      }
2031
9
      break;
2032
0
    }
2033
#ifdef HAVE_PCRE_JIT_SUPPORT
2034
    if ((pce->preg_options & PREG_JIT)) {
2035
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2036
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2037
    } else
2038
#endif
2039
120
    count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
2040
120
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2041
120
  }
2042
84
  if (match_data != mdata) {
2043
0
    pcre2_match_data_free(match_data);
2044
0
  }
2045
84
  mdata_used = old_mdata_used;
2046
2047
84
  return result;
2048
84
}
2049
2050
static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2051
                zend_string *subject_str,
2052
                zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2053
                size_t limit, size_t *replace_count, zend_long flags)
2054
99
{
2055
99
  pcre_cache_entry  *pce;         /* Compiled regular expression */
2056
99
  zend_string     *result;      /* Function result */
2057
2058
  /* Compile regex or get it from cache. */
2059
99
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2060
15
    return NULL;
2061
15
  }
2062
84
  pce->refcount++;
2063
84
  result = php_pcre_replace_func_impl(pce, subject_str, fci, fcc, limit, replace_count, flags);
2064
84
  pce->refcount--;
2065
2066
84
  return result;
2067
99
}
2068
2069
/* {{{ php_pcre_replace_array */
2070
static zend_string *php_pcre_replace_array(HashTable *regex,
2071
  zend_string *replace_str, HashTable *replace_ht,
2072
  zend_string *subject_str, size_t limit, size_t *replace_count)
2073
0
{
2074
0
  zval    *regex_entry;
2075
0
  zend_string *result;
2076
2077
0
  zend_string_addref(subject_str);
2078
2079
0
  if (replace_ht) {
2080
0
    uint32_t replace_idx = 0;
2081
2082
    /* For each entry in the regex array, get the entry */
2083
0
    ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2084
      /* Make sure we're dealing with strings. */
2085
0
      zend_string *tmp_regex_str;
2086
0
      zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2087
0
      zend_string *replace_entry_str, *tmp_replace_entry_str;
2088
0
      zval *zv;
2089
2090
      /* Get current entry */
2091
0
      while (1) {
2092
0
        if (replace_idx == replace_ht->nNumUsed) {
2093
0
          replace_entry_str = ZSTR_EMPTY_ALLOC();
2094
0
          tmp_replace_entry_str = NULL;
2095
0
          break;
2096
0
        }
2097
0
        zv = ZEND_HASH_ELEMENT(replace_ht, replace_idx);
2098
0
        replace_idx++;
2099
0
        if (Z_TYPE_P(zv) != IS_UNDEF) {
2100
0
          replace_entry_str = zval_get_tmp_string(zv, &tmp_replace_entry_str);
2101
0
          break;
2102
0
        }
2103
0
      }
2104
2105
      /* Do the actual replacement and put the result back into subject_str
2106
         for further replacements. */
2107
0
      result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2108
0
        ZSTR_LEN(subject_str), replace_entry_str, limit, replace_count);
2109
0
      zend_tmp_string_release(tmp_replace_entry_str);
2110
0
      zend_tmp_string_release(tmp_regex_str);
2111
0
      zend_string_release_ex(subject_str, 0);
2112
0
      subject_str = result;
2113
0
      if (UNEXPECTED(result == NULL)) {
2114
0
        break;
2115
0
      }
2116
0
    } ZEND_HASH_FOREACH_END();
2117
2118
0
  } else {
2119
0
    ZEND_ASSERT(replace_str != NULL);
2120
2121
    /* For each entry in the regex array, get the entry */
2122
0
    ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2123
      /* Make sure we're dealing with strings. */
2124
0
      zend_string *tmp_regex_str;
2125
0
      zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2126
2127
      /* Do the actual replacement and put the result back into subject_str
2128
         for further replacements. */
2129
0
      result = php_pcre_replace(regex_str, subject_str, ZSTR_VAL(subject_str),
2130
0
        ZSTR_LEN(subject_str), replace_str, limit, replace_count);
2131
0
      zend_tmp_string_release(tmp_regex_str);
2132
0
      zend_string_release_ex(subject_str, 0);
2133
0
      subject_str = result;
2134
2135
0
      if (UNEXPECTED(result == NULL)) {
2136
0
        break;
2137
0
      }
2138
0
    } ZEND_HASH_FOREACH_END();
2139
0
  }
2140
2141
0
  return subject_str;
2142
0
}
2143
/* }}} */
2144
2145
/* {{{ php_replace_in_subject */
2146
static zend_always_inline zend_string *php_replace_in_subject(
2147
  zend_string *regex_str, HashTable *regex_ht,
2148
  zend_string *replace_str, HashTable *replace_ht,
2149
  zend_string *subject, size_t limit, size_t *replace_count)
2150
117
{
2151
117
  zend_string *result;
2152
2153
117
  if (regex_str) {
2154
117
    ZEND_ASSERT(replace_str != NULL);
2155
117
    result = php_pcre_replace(regex_str, subject, ZSTR_VAL(subject), ZSTR_LEN(subject),
2156
117
      replace_str, limit, replace_count);
2157
117
  } else {
2158
0
    ZEND_ASSERT(regex_ht != NULL);
2159
0
    result = php_pcre_replace_array(regex_ht, replace_str, replace_ht, subject,
2160
0
      limit, replace_count);
2161
0
  }
2162
117
  return result;
2163
117
}
2164
/* }}} */
2165
2166
static zend_string *php_replace_in_subject_func(zend_string *regex_str, const HashTable *regex_ht,
2167
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2168
  zend_string *subject, size_t limit, size_t *replace_count, zend_long flags)
2169
99
{
2170
99
  zend_string *result;
2171
2172
99
  if (regex_str) {
2173
99
    result = php_pcre_replace_func(regex_str, subject, fci, fcc, limit, replace_count, flags);
2174
99
    return result;
2175
99
  } else {
2176
    /* If regex is an array */
2177
0
    zval    *regex_entry;
2178
2179
0
    ZEND_ASSERT(regex_ht != NULL);
2180
2181
0
    zend_string_addref(subject);
2182
2183
    /* For each entry in the regex array, get the entry */
2184
0
    ZEND_HASH_FOREACH_VAL(regex_ht, regex_entry) {
2185
      /* Make sure we're dealing with strings. */
2186
0
      zend_string *tmp_regex_entry_str;
2187
0
      zend_string *regex_entry_str = zval_try_get_tmp_string(regex_entry, &tmp_regex_entry_str);
2188
0
      if (UNEXPECTED(regex_entry_str == NULL)) {
2189
0
        break;
2190
0
      }
2191
2192
      /* Do the actual replacement and put the result back into subject
2193
         for further replacements. */
2194
0
      result = php_pcre_replace_func(
2195
0
        regex_entry_str, subject, fci, fcc, limit, replace_count, flags);
2196
0
      zend_tmp_string_release(tmp_regex_entry_str);
2197
0
      zend_string_release(subject);
2198
0
      subject = result;
2199
0
      if (UNEXPECTED(result == NULL)) {
2200
0
        break;
2201
0
      }
2202
0
    } ZEND_HASH_FOREACH_END();
2203
2204
0
    return subject;
2205
0
  }
2206
99
}
2207
2208
static size_t php_preg_replace_func_impl(zval *return_value,
2209
  zend_string *regex_str, const HashTable *regex_ht,
2210
  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2211
  zend_string *subject_str, const HashTable *subject_ht, zend_long limit_val, zend_long flags)
2212
99
{
2213
99
  zend_string *result;
2214
99
  size_t replace_count = 0;
2215
2216
99
  if (subject_str) {
2217
99
    result = php_replace_in_subject_func(
2218
99
      regex_str, regex_ht, fci, fcc, subject_str, limit_val, &replace_count, flags);
2219
99
    if (result != NULL) {
2220
75
      RETVAL_STR(result);
2221
75
    } else {
2222
24
      RETVAL_NULL();
2223
24
    }
2224
99
  } else {
2225
    /* if subject is an array */
2226
0
    zval    *subject_entry, zv;
2227
0
    zend_string *string_key;
2228
0
    zend_ulong   num_key;
2229
2230
0
    ZEND_ASSERT(subject_ht != NULL);
2231
2232
0
    array_init_size(return_value, zend_hash_num_elements(subject_ht));
2233
0
    HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2234
2235
    /* For each subject entry, convert it to string, then perform replacement
2236
       and add the result to the return_value array. */
2237
0
    ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2238
0
      zend_string *tmp_subject_entry_str;
2239
0
      zend_string *subject_entry_str = zval_try_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2240
0
      if (UNEXPECTED(subject_entry_str == NULL)) {
2241
0
        break;
2242
0
      }
2243
2244
0
      result = php_replace_in_subject_func(
2245
0
        regex_str, regex_ht, fci, fcc, subject_entry_str, limit_val, &replace_count, flags);
2246
0
      if (result != NULL) {
2247
        /* Add to return array */
2248
0
        ZVAL_STR(&zv, result);
2249
0
        if (string_key) {
2250
0
          zend_hash_add_new(return_value_ht, string_key, &zv);
2251
0
        } else {
2252
0
          zend_hash_index_add_new(return_value_ht, num_key, &zv);
2253
0
        }
2254
0
      }
2255
0
      zend_tmp_string_release(tmp_subject_entry_str);
2256
0
    } ZEND_HASH_FOREACH_END();
2257
0
  }
2258
2259
99
  return replace_count;
2260
99
}
2261
2262
static void _preg_replace_common(
2263
  zval *return_value,
2264
  HashTable *regex_ht, zend_string *regex_str,
2265
  HashTable *replace_ht, zend_string *replace_str,
2266
  HashTable *subject_ht, zend_string *subject_str,
2267
  zend_long limit,
2268
  zval *zcount,
2269
  bool is_filter
2270
117
) {
2271
117
  size_t replace_count = 0;
2272
117
  zend_string *result;
2273
117
  size_t old_replace_count;
2274
2275
  /* If replace is an array then the regex argument needs to also be an array */
2276
117
  if (replace_ht && !regex_ht) {
2277
0
    zend_argument_type_error(1, "must be of type array when argument #2 ($replacement) is an array, string given");
2278
0
    RETURN_THROWS();
2279
0
  }
2280
2281
117
  if (subject_str) {
2282
117
    old_replace_count = replace_count;
2283
117
    result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2284
117
      subject_str, limit, &replace_count);
2285
117
    if (result != NULL) {
2286
117
      if (!is_filter || replace_count > old_replace_count) {
2287
117
        RETVAL_STR(result);
2288
117
      } else {
2289
0
        zend_string_release_ex(result, 0);
2290
0
        RETVAL_NULL();
2291
0
      }
2292
117
    } else {
2293
0
      RETVAL_NULL();
2294
0
    }
2295
117
  } else {
2296
    /* if subject is an array */
2297
0
    zval    *subject_entry, zv;
2298
0
    zend_string *string_key;
2299
0
    zend_ulong   num_key;
2300
2301
0
    ZEND_ASSERT(subject_ht != NULL);
2302
2303
0
    array_init_size(return_value, zend_hash_num_elements(subject_ht));
2304
0
    HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2305
2306
    /* For each subject entry, convert it to string, then perform replacement
2307
       and add the result to the return_value array. */
2308
0
    ZEND_HASH_FOREACH_KEY_VAL(subject_ht, num_key, string_key, subject_entry) {
2309
0
      old_replace_count = replace_count;
2310
0
      zend_string *tmp_subject_entry_str;
2311
0
      zend_string *subject_entry_str = zval_get_tmp_string(subject_entry, &tmp_subject_entry_str);
2312
0
      result = php_replace_in_subject(regex_str, regex_ht, replace_str, replace_ht,
2313
0
        subject_entry_str, limit, &replace_count);
2314
2315
0
      if (result != NULL) {
2316
0
        if (!is_filter || replace_count > old_replace_count) {
2317
          /* Add to return array */
2318
0
          ZVAL_STR(&zv, result);
2319
0
          if (string_key) {
2320
0
            zend_hash_add_new(return_value_ht, string_key, &zv);
2321
0
          } else {
2322
0
            zend_hash_index_add_new(return_value_ht, num_key, &zv);
2323
0
          }
2324
0
        } else {
2325
0
          zend_string_release_ex(result, 0);
2326
0
        }
2327
0
      }
2328
0
      zend_tmp_string_release(tmp_subject_entry_str);
2329
0
    } ZEND_HASH_FOREACH_END();
2330
0
  }
2331
2332
117
  if (zcount) {
2333
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2334
0
  }
2335
117
}
2336
2337
/* {{{ preg_replace_common */
2338
static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool is_filter)
2339
120
{
2340
120
  zend_string *regex_str, *replace_str, *subject_str;
2341
120
  HashTable *regex_ht, *replace_ht, *subject_ht;
2342
120
  zend_long limit = -1;
2343
120
  zval *zcount = NULL;
2344
2345
  /* Get function parameters and do error-checking. */
2346
360
  ZEND_PARSE_PARAMETERS_START(3, 5)
2347
600
    Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2348
600
    Z_PARAM_ARRAY_HT_OR_STR(replace_ht, replace_str)
2349
600
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2350
600
    Z_PARAM_OPTIONAL
2351
600
    Z_PARAM_LONG(limit)
2352
342
    Z_PARAM_ZVAL(zcount)
2353
342
  ZEND_PARSE_PARAMETERS_END();
2354
2355
117
  _preg_replace_common(
2356
117
    return_value,
2357
117
    regex_ht, regex_str,
2358
117
    replace_ht, replace_str,
2359
117
    subject_ht, subject_str,
2360
117
    limit, zcount, is_filter);
2361
117
}
2362
/* }}} */
2363
2364
/* {{{ Perform Perl-style regular expression replacement. */
2365
PHP_FUNCTION(preg_replace)
2366
120
{
2367
120
  preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
2368
120
}
2369
/* }}} */
2370
2371
ZEND_FRAMELESS_FUNCTION(preg_replace, 3)
2372
0
{
2373
0
  zend_string *regex_str, *replace_str, *subject_str;
2374
0
  HashTable *regex_ht, *replace_ht, *subject_ht;
2375
0
  zval regex_tmp, replace_tmp, subject_tmp;
2376
2377
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(1, regex_ht, regex_str, regex_tmp);
2378
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(2, replace_ht, replace_str, replace_tmp);
2379
0
  Z_FLF_PARAM_ARRAY_HT_OR_STR(3, subject_ht, subject_str, subject_tmp);
2380
2381
0
  _preg_replace_common(
2382
0
    return_value,
2383
0
    regex_ht, regex_str,
2384
0
    replace_ht, replace_str,
2385
0
    subject_ht, subject_str,
2386
0
    /* limit */ -1, /* zcount */ NULL, /* is_filter */ false);
2387
2388
0
flf_clean:;
2389
0
  Z_FLF_PARAM_FREE_STR(1, regex_tmp);
2390
0
  Z_FLF_PARAM_FREE_STR(2, replace_tmp);
2391
0
  Z_FLF_PARAM_FREE_STR(3, subject_tmp);
2392
0
}
2393
2394
/* {{{ Perform Perl-style regular expression replacement using replacement callback. */
2395
PHP_FUNCTION(preg_replace_callback)
2396
99
{
2397
99
  zval *zcount = NULL;
2398
99
  zend_string *regex_str;
2399
99
  HashTable *regex_ht;
2400
99
  zend_string *subject_str;
2401
99
  HashTable *subject_ht;
2402
99
  zend_long limit = -1, flags = 0;
2403
99
  size_t replace_count;
2404
99
  zend_fcall_info fci = empty_fcall_info;
2405
99
  zend_fcall_info_cache fcc = empty_fcall_info_cache;
2406
2407
  /* Get function parameters and do error-checking. */
2408
297
  ZEND_PARSE_PARAMETERS_START(3, 6)
2409
495
    Z_PARAM_ARRAY_HT_OR_STR(regex_ht, regex_str)
2410
495
    Z_PARAM_FUNC(fci, fcc)
2411
594
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2412
594
    Z_PARAM_OPTIONAL
2413
594
    Z_PARAM_LONG(limit)
2414
0
    Z_PARAM_ZVAL(zcount)
2415
0
    Z_PARAM_LONG(flags)
2416
99
  ZEND_PARSE_PARAMETERS_END();
2417
2418
99
  replace_count = php_preg_replace_func_impl(return_value, regex_str, regex_ht,
2419
99
    &fci, &fcc,
2420
99
    subject_str, subject_ht, limit, flags);
2421
99
  if (zcount) {
2422
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2423
0
  }
2424
99
}
2425
/* }}} */
2426
2427
/* {{{ Perform Perl-style regular expression replacement using replacement callback. */
2428
PHP_FUNCTION(preg_replace_callback_array)
2429
0
{
2430
0
  zval *replace, *zcount = NULL;
2431
0
  HashTable *pattern, *subject_ht;
2432
0
  zend_string *subject_str, *str_idx_regex;
2433
0
  zend_long limit = -1, flags = 0;
2434
0
  size_t replace_count = 0;
2435
2436
  /* Get function parameters and do error-checking. */
2437
0
  ZEND_PARSE_PARAMETERS_START(2, 5)
2438
0
    Z_PARAM_ARRAY_HT(pattern)
2439
0
    Z_PARAM_ARRAY_HT_OR_STR(subject_ht, subject_str)
2440
0
    Z_PARAM_OPTIONAL
2441
0
    Z_PARAM_LONG(limit)
2442
0
    Z_PARAM_ZVAL(zcount)
2443
0
    Z_PARAM_LONG(flags)
2444
0
  ZEND_PARSE_PARAMETERS_END();
2445
2446
0
  if (subject_ht) {
2447
0
    GC_TRY_ADDREF(subject_ht);
2448
0
  } else {
2449
0
    GC_TRY_ADDREF(subject_str);
2450
0
  }
2451
2452
0
  ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
2453
0
    if (!str_idx_regex) {
2454
0
      zend_argument_type_error(1, "must contain only string patterns as keys");
2455
0
      goto error;
2456
0
    }
2457
2458
0
    zend_fcall_info_cache fcc = empty_fcall_info_cache;
2459
0
    zend_fcall_info fci = empty_fcall_info;
2460
0
    fci.size = sizeof(zend_fcall_info);
2461
    /* Copy potential trampoline */
2462
0
    ZVAL_COPY_VALUE(&fci.function_name, replace);
2463
2464
0
    if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2465
0
      zend_argument_type_error(1, "must contain only valid callbacks");
2466
0
      goto error;
2467
0
    }
2468
2469
0
    zval retval;
2470
0
    replace_count += php_preg_replace_func_impl(&retval, str_idx_regex, /* regex_ht */ NULL, &fci, &fcc,
2471
0
      subject_str, subject_ht, limit, flags);
2472
0
    zend_release_fcall_info_cache(&fcc);
2473
2474
0
    switch (Z_TYPE(retval)) {
2475
0
      case IS_ARRAY:
2476
0
        ZEND_ASSERT(subject_ht);
2477
0
        zend_array_release(subject_ht);
2478
0
        subject_ht = Z_ARR(retval);
2479
0
        break;
2480
0
      case IS_STRING:
2481
0
        ZEND_ASSERT(subject_str);
2482
0
        zend_string_release(subject_str);
2483
0
        subject_str = Z_STR(retval);
2484
0
        break;
2485
0
      case IS_NULL:
2486
0
        RETVAL_NULL();
2487
0
        goto error;
2488
0
      default: ZEND_UNREACHABLE();
2489
0
    }
2490
2491
0
    if (EG(exception)) {
2492
0
      goto error;
2493
0
    }
2494
0
  } ZEND_HASH_FOREACH_END();
2495
2496
0
  if (zcount) {
2497
0
    ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2498
0
  }
2499
2500
0
  if (subject_ht) {
2501
0
    RETVAL_ARR(subject_ht);
2502
    // Unset the type_flags of immutable arrays to prevent the VM from performing refcounting
2503
0
    if (GC_FLAGS(subject_ht) & IS_ARRAY_IMMUTABLE) {
2504
0
      Z_TYPE_FLAGS_P(return_value) = 0;
2505
0
    }
2506
0
    return;
2507
0
  } else {
2508
0
    RETURN_STR(subject_str);
2509
0
  }
2510
2511
0
error:
2512
0
  if (subject_ht) {
2513
0
    zend_array_release(subject_ht);
2514
0
  } else {
2515
0
    zend_string_release(subject_str);
2516
0
  }
2517
0
}
2518
/* }}} */
2519
2520
/* {{{ Perform Perl-style regular expression replacement and only return matches. */
2521
PHP_FUNCTION(preg_filter)
2522
0
{
2523
0
  preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
2524
0
}
2525
/* }}} */
2526
2527
/* {{{ Split string into an array using a perl-style regular expression as a delimiter */
2528
PHP_FUNCTION(preg_split)
2529
0
{
2530
0
  zend_string     *regex;     /* Regular expression */
2531
0
  zend_string     *subject;   /* String to match against */
2532
0
  zend_long      limit_val = -1;/* Integer value of limit */
2533
0
  zend_long      flags = 0;   /* Match control flags */
2534
0
  pcre_cache_entry  *pce;     /* Compiled regular expression */
2535
2536
  /* Get function parameters and do error checking */
2537
0
  ZEND_PARSE_PARAMETERS_START(2, 4)
2538
0
    Z_PARAM_STR(regex)
2539
0
    Z_PARAM_STR(subject)
2540
0
    Z_PARAM_OPTIONAL
2541
0
    Z_PARAM_LONG(limit_val)
2542
0
    Z_PARAM_LONG(flags)
2543
0
  ZEND_PARSE_PARAMETERS_END();
2544
2545
  /* Compile regex or get it from cache. */
2546
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2547
0
    RETURN_FALSE;
2548
0
  }
2549
2550
0
  pce->refcount++;
2551
0
  php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2552
0
  pce->refcount--;
2553
0
}
2554
/* }}} */
2555
2556
/* {{{ php_pcre_split */
2557
PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2558
  zend_long limit_val, zend_long flags)
2559
0
{
2560
0
  uint32_t     options;     /* Execution options */
2561
0
  int        count;       /* Count of matched subpatterns */
2562
0
  PCRE2_SIZE     start_offset;    /* Where the new search starts */
2563
0
  PCRE2_SIZE     last_match_offset; /* Location of last match */
2564
0
  uint32_t     no_empty;      /* If NO_EMPTY flag is set */
2565
0
  uint32_t     delim_capture;   /* If delimiters should be captured */
2566
0
  uint32_t     offset_capture;  /* If offsets should be captured */
2567
0
  uint32_t     num_subpats;   /* Number of captured subpatterns */
2568
0
  zval       tmp;
2569
0
  pcre2_match_data *match_data;
2570
0
  bool old_mdata_used;
2571
0
  char *subject = ZSTR_VAL(subject_str);
2572
2573
0
  no_empty = flags & PREG_SPLIT_NO_EMPTY;
2574
0
  delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2575
0
  offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2576
2577
  /* Initialize return value */
2578
0
  array_init(return_value);
2579
0
  HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2580
2581
  /* Calculate the size of the offsets array, and allocate memory for it. */
2582
0
  num_subpats = pce->capture_count + 1;
2583
2584
  /* Start at the beginning of the string */
2585
0
  start_offset = 0;
2586
0
  last_match_offset = 0;
2587
0
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2588
2589
0
  if (limit_val == -1) {
2590
    /* pass */
2591
0
  } else if (limit_val == 0) {
2592
0
    limit_val = -1;
2593
0
  } else if (limit_val <= 1) {
2594
0
    goto last;
2595
0
  }
2596
2597
0
  old_mdata_used = mdata_used;
2598
0
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2599
0
    mdata_used = true;
2600
0
    match_data = mdata;
2601
0
  } else {
2602
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2603
0
    if (!match_data) {
2604
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2605
0
      zval_ptr_dtor(return_value);
2606
0
      RETURN_FALSE;
2607
0
    }
2608
0
  }
2609
2610
0
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2611
2612
  /* Array of subpattern offsets */
2613
0
  PCRE2_SIZE *const offsets = pcre2_get_ovector_pointer(match_data);
2614
2615
#ifdef HAVE_PCRE_JIT_SUPPORT
2616
  if ((pce->preg_options & PREG_JIT) && options) {
2617
    count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2618
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2619
  } else
2620
#endif
2621
0
  count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2622
0
      options, match_data, mctx);
2623
2624
0
  while (1) {
2625
    /* If something matched */
2626
0
    if (count >= 0) {
2627
      /* Check for too many substrings condition. */
2628
0
      if (UNEXPECTED(count == 0)) {
2629
0
        php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2630
0
        count = num_subpats;
2631
0
      }
2632
2633
0
matched:
2634
0
      if (UNEXPECTED(offsets[1] < offsets[0])) {
2635
0
        PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2636
0
        break;
2637
0
      }
2638
2639
0
      if (!no_empty || offsets[0] != last_match_offset) {
2640
0
        if (offset_capture) {
2641
          /* Add (match, offset) pair to the return value */
2642
0
          add_offset_pair(
2643
0
            return_value_ht, subject, last_match_offset, offsets[0],
2644
0
            NULL, 0);
2645
0
        } else {
2646
          /* Add the piece to the return value */
2647
0
          populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2648
0
          zend_hash_next_index_insert_new(return_value_ht, &tmp);
2649
0
        }
2650
2651
        /* One less left to do */
2652
0
        if (limit_val != -1)
2653
0
          limit_val--;
2654
0
      }
2655
2656
0
      if (delim_capture) {
2657
0
        size_t i;
2658
0
        for (i = 1; i < count; i++) {
2659
          /* If we have matched a delimiter */
2660
0
          if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2661
0
            if (offset_capture) {
2662
0
              add_offset_pair(
2663
0
                return_value_ht, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2664
0
            } else {
2665
0
              populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2666
0
              zend_hash_next_index_insert_new(return_value_ht, &tmp);
2667
0
            }
2668
0
          }
2669
0
        }
2670
0
      }
2671
2672
      /* Advance to the position right after the last full match */
2673
0
      start_offset = last_match_offset = offsets[1];
2674
2675
      /* If we have matched an empty string, mimic what Perl's /g options does.
2676
         This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2677
         the match again at the same point. If this fails (picked up above) we
2678
         advance to the next character. */
2679
0
      if (start_offset == offsets[0]) {
2680
        /* Get next piece if no limit or limit not yet reached and something matched*/
2681
0
        if (limit_val != -1 && limit_val <= 1) {
2682
0
          break;
2683
0
        }
2684
0
        count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2685
0
          PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2686
0
        if (count >= 0) {
2687
0
          goto matched;
2688
0
        } else if (count == PCRE2_ERROR_NOMATCH) {
2689
          /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2690
             this is not necessarily the end. We need to advance
2691
             the start offset, and continue. Fudge the offset values
2692
             to achieve this, unless we're already at the end of the string. */
2693
0
          if (start_offset < ZSTR_LEN(subject_str)) {
2694
0
            start_offset += calculate_unit_length(pce, subject + start_offset);
2695
0
          } else {
2696
0
            break;
2697
0
          }
2698
0
        } else {
2699
0
          goto error;
2700
0
        }
2701
0
      }
2702
2703
0
    } else if (count == PCRE2_ERROR_NOMATCH) {
2704
0
      break;
2705
0
    } else {
2706
0
error:
2707
0
      pcre_handle_exec_error(count);
2708
0
      break;
2709
0
    }
2710
2711
    /* Get next piece if no limit or limit not yet reached and something matched*/
2712
0
    if (limit_val != -1 && limit_val <= 1) {
2713
0
      break;
2714
0
    }
2715
2716
#ifdef HAVE_PCRE_JIT_SUPPORT
2717
    if (pce->preg_options & PREG_JIT) {
2718
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2719
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2720
    } else
2721
#endif
2722
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2723
0
        PCRE2_NO_UTF_CHECK, match_data, mctx);
2724
0
  }
2725
0
  if (match_data != mdata) {
2726
0
    pcre2_match_data_free(match_data);
2727
0
  }
2728
0
  mdata_used = old_mdata_used;
2729
2730
0
  if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2731
0
    zval_ptr_dtor(return_value);
2732
0
    RETURN_FALSE;
2733
0
  }
2734
2735
0
last:
2736
0
  start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2737
2738
0
  if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2739
0
    if (offset_capture) {
2740
      /* Add the last (match, offset) pair to the return value */
2741
0
      add_offset_pair(return_value_ht, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2742
0
    } else {
2743
      /* Add the last piece to the return value */
2744
0
      if (start_offset == 0) {
2745
0
        ZVAL_STR_COPY(&tmp, subject_str);
2746
0
      } else {
2747
0
        populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2748
0
      }
2749
0
      zend_hash_next_index_insert_new(return_value_ht, &tmp);
2750
0
    }
2751
0
  }
2752
0
}
2753
/* }}} */
2754
2755
/* {{{ Quote regular expression characters plus an optional character */
2756
PHP_FUNCTION(preg_quote)
2757
30
{
2758
30
  zend_string *str;           /* Input string argument */
2759
30
  zend_string *delim = NULL;   /* Additional delimiter argument */
2760
30
  char    *in_str;      /* Input string */
2761
30
  char    *in_str_end;      /* End of the input string */
2762
30
  zend_string *out_str;     /* Output string with quoted characters */
2763
30
  size_t       extra_len;         /* Number of additional characters */
2764
30
  char    *p,         /* Iterator for input string */
2765
30
        *q,         /* Iterator for output string */
2766
30
         delim_char = '\0', /* Delimiter character to be quoted */
2767
30
         c;         /* Current character */
2768
2769
  /* Get the arguments and check for errors */
2770
90
  ZEND_PARSE_PARAMETERS_START(1, 2)
2771
120
    Z_PARAM_STR(str)
2772
30
    Z_PARAM_OPTIONAL
2773
66
    Z_PARAM_STR_OR_NULL(delim)
2774
30
  ZEND_PARSE_PARAMETERS_END();
2775
2776
  /* Nothing to do if we got an empty string */
2777
30
  if (ZSTR_LEN(str) == 0) {
2778
0
    RETURN_EMPTY_STRING();
2779
0
  }
2780
2781
30
  in_str = ZSTR_VAL(str);
2782
30
  in_str_end = in_str + ZSTR_LEN(str);
2783
2784
30
  if (delim) {
2785
3
    delim_char = ZSTR_VAL(delim)[0];
2786
3
  }
2787
2788
  /* Go through the string and quote necessary characters */
2789
30
  extra_len = 0;
2790
30
  p = in_str;
2791
50.4k
  do {
2792
50.4k
    c = *p;
2793
50.4k
    switch(c) {
2794
735
      case '.':
2795
921
      case '\\':
2796
1.28k
      case '+':
2797
1.30k
      case '*':
2798
1.43k
      case '?':
2799
1.57k
      case '[':
2800
1.63k
      case '^':
2801
1.75k
      case ']':
2802
1.75k
      case '$':
2803
1.98k
      case '(':
2804
2.61k
      case ')':
2805
2.68k
      case '{':
2806
3.03k
      case '}':
2807
3.48k
      case '=':
2808
3.48k
      case '!':
2809
3.70k
      case '>':
2810
3.74k
      case '<':
2811
3.81k
      case '|':
2812
4.23k
      case ':':
2813
4.47k
      case '-':
2814
4.80k
      case '#':
2815
4.80k
        extra_len++;
2816
4.80k
        break;
2817
2818
1.91k
      case '\0':
2819
1.91k
        extra_len+=3;
2820
1.91k
        break;
2821
2822
43.7k
      default:
2823
43.7k
        if (c == delim_char) {
2824
0
          extra_len++;
2825
0
        }
2826
43.7k
        break;
2827
50.4k
    }
2828
50.4k
    p++;
2829
50.4k
  } while (p != in_str_end);
2830
2831
30
  if (extra_len == 0) {
2832
0
    RETURN_STR_COPY(str);
2833
0
  }
2834
2835
  /* Allocate enough memory so that even if each character
2836
     is quoted, we won't run out of room */
2837
30
  out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2838
30
  q = ZSTR_VAL(out_str);
2839
30
  p = in_str;
2840
2841
50.4k
  do {
2842
50.4k
    c = *p;
2843
50.4k
    switch(c) {
2844
735
      case '.':
2845
921
      case '\\':
2846
1.28k
      case '+':
2847
1.30k
      case '*':
2848
1.43k
      case '?':
2849
1.57k
      case '[':
2850
1.63k
      case '^':
2851
1.75k
      case ']':
2852
1.75k
      case '$':
2853
1.98k
      case '(':
2854
2.61k
      case ')':
2855
2.68k
      case '{':
2856
3.03k
      case '}':
2857
3.48k
      case '=':
2858
3.48k
      case '!':
2859
3.70k
      case '>':
2860
3.74k
      case '<':
2861
3.81k
      case '|':
2862
4.23k
      case ':':
2863
4.47k
      case '-':
2864
4.80k
      case '#':
2865
4.80k
        *q++ = '\\';
2866
4.80k
        *q++ = c;
2867
4.80k
        break;
2868
2869
1.91k
      case '\0':
2870
1.91k
        *q++ = '\\';
2871
1.91k
        *q++ = '0';
2872
1.91k
        *q++ = '0';
2873
1.91k
        *q++ = '0';
2874
1.91k
        break;
2875
2876
43.7k
      default:
2877
43.7k
        if (c == delim_char) {
2878
0
          *q++ = '\\';
2879
0
        }
2880
43.7k
        *q++ = c;
2881
43.7k
        break;
2882
50.4k
    }
2883
50.4k
    p++;
2884
50.4k
  } while (p != in_str_end);
2885
30
  *q = '\0';
2886
2887
30
  RETURN_NEW_STR(out_str);
2888
30
}
2889
/* }}} */
2890
2891
/* {{{ Searches array and returns entries which match regex */
2892
PHP_FUNCTION(preg_grep)
2893
0
{
2894
0
  zend_string     *regex;     /* Regular expression */
2895
0
  zval        *input;     /* Input array */
2896
0
  zend_long      flags = 0;   /* Match control flags */
2897
0
  pcre_cache_entry  *pce;     /* Compiled regular expression */
2898
2899
  /* Get arguments and do error checking */
2900
0
  ZEND_PARSE_PARAMETERS_START(2, 3)
2901
0
    Z_PARAM_STR(regex)
2902
0
    Z_PARAM_ARRAY(input)
2903
0
    Z_PARAM_OPTIONAL
2904
0
    Z_PARAM_LONG(flags)
2905
0
  ZEND_PARSE_PARAMETERS_END();
2906
2907
  /* Compile regex or get it from cache. */
2908
0
  if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2909
0
    RETURN_FALSE;
2910
0
  }
2911
2912
0
  pce->refcount++;
2913
0
  php_pcre_grep_impl(pce, input, return_value, flags);
2914
0
  pce->refcount--;
2915
0
}
2916
/* }}} */
2917
2918
PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2919
0
{
2920
0
  zval            *entry;             /* An entry in the input array */
2921
0
  uint32_t     num_subpats;   /* Number of captured subpatterns */
2922
0
  int        count;       /* Count of matched subpatterns */
2923
0
  uint32_t     options;     /* Execution options */
2924
0
  zend_string   *string_key;
2925
0
  zend_ulong     num_key;
2926
0
  bool     invert;      /* Whether to return non-matching
2927
                       entries */
2928
0
  bool old_mdata_used;
2929
0
  pcre2_match_data *match_data;
2930
0
  invert = flags & PREG_GREP_INVERT ? 1 : 0;
2931
2932
  /* Calculate the size of the offsets array, and allocate memory for it. */
2933
0
  num_subpats = pce->capture_count + 1;
2934
2935
  /* Initialize return array */
2936
0
  array_init(return_value);
2937
0
  HashTable *return_value_ht = Z_ARRVAL_P(return_value);
2938
2939
0
  PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2940
2941
0
  old_mdata_used = mdata_used;
2942
0
  if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2943
0
    mdata_used = true;
2944
0
    match_data = mdata;
2945
0
  } else {
2946
0
    match_data = pcre2_match_data_create_from_pattern(pce->re, PCRE_G(gctx_zmm));
2947
0
    if (!match_data) {
2948
0
      PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2949
0
      return;
2950
0
    }
2951
0
  }
2952
2953
0
  options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2954
2955
  /* Go through the input array */
2956
0
  ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2957
0
    zend_string *tmp_subject_str;
2958
0
    zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2959
2960
    /* Perform the match */
2961
#ifdef HAVE_PCRE_JIT_SUPPORT
2962
    if ((pce->preg_options & PREG_JIT) && options) {
2963
      count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2964
          PCRE2_NO_UTF_CHECK, match_data, mctx);
2965
    } else
2966
#endif
2967
0
    count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2968
0
        options, match_data, mctx);
2969
2970
    /* If the entry fits our requirements */
2971
0
    if (count >= 0) {
2972
      /* Check for too many substrings condition. */
2973
0
      if (UNEXPECTED(count == 0)) {
2974
0
        php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2975
0
      }
2976
0
      if (!invert) {
2977
0
        Z_TRY_ADDREF_P(entry);
2978
2979
        /* Add to return array */
2980
0
        if (string_key) {
2981
0
          zend_hash_update(return_value_ht, string_key, entry);
2982
0
        } else {
2983
0
          zend_hash_index_update(return_value_ht, num_key, entry);
2984
0
        }
2985
0
      }
2986
0
    } else if (count == PCRE2_ERROR_NOMATCH) {
2987
0
      if (invert) {
2988
0
        Z_TRY_ADDREF_P(entry);
2989
2990
        /* Add to return array */
2991
0
        if (string_key) {
2992
0
          zend_hash_update(return_value_ht, string_key, entry);
2993
0
        } else {
2994
0
          zend_hash_index_update(return_value_ht, num_key, entry);
2995
0
        }
2996
0
      }
2997
0
    } else {
2998
0
      pcre_handle_exec_error(count);
2999
0
      zend_tmp_string_release(tmp_subject_str);
3000
0
      break;
3001
0
    }
3002
3003
0
    zend_tmp_string_release(tmp_subject_str);
3004
0
  } ZEND_HASH_FOREACH_END();
3005
0
  if (match_data != mdata) {
3006
0
    pcre2_match_data_free(match_data);
3007
0
  }
3008
3009
0
  mdata_used = old_mdata_used;
3010
3011
0
  if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
3012
0
    zend_array_destroy(Z_ARR_P(return_value));
3013
0
    RETURN_FALSE;
3014
0
  }
3015
0
}
3016
/* }}} */
3017
3018
/* {{{ Returns the error code of the last regexp execution. */
3019
PHP_FUNCTION(preg_last_error)
3020
0
{
3021
0
  ZEND_PARSE_PARAMETERS_NONE();
3022
3023
0
  RETURN_LONG(PCRE_G(error_code));
3024
0
}
3025
/* }}} */
3026
3027
/* {{{ Returns the error message of the last regexp execution. */
3028
PHP_FUNCTION(preg_last_error_msg)
3029
0
{
3030
0
  ZEND_PARSE_PARAMETERS_NONE();
3031
3032
0
  RETURN_STRING(php_pcre_get_error_msg(PCRE_G(error_code)));
3033
0
}
3034
/* }}} */
3035
3036
/* {{{ module definition structures */
3037
3038
zend_module_entry pcre_module_entry = {
3039
  STANDARD_MODULE_HEADER,
3040
  "pcre",
3041
  ext_functions,
3042
  PHP_MINIT(pcre),
3043
  PHP_MSHUTDOWN(pcre),
3044
  PHP_RINIT(pcre),
3045
  PHP_RSHUTDOWN(pcre),
3046
  PHP_MINFO(pcre),
3047
  PHP_PCRE_VERSION,
3048
  PHP_MODULE_GLOBALS(pcre),
3049
  PHP_GINIT(pcre),
3050
  PHP_GSHUTDOWN(pcre),
3051
  NULL,
3052
  STANDARD_MODULE_PROPERTIES_EX
3053
};
3054
3055
#ifdef COMPILE_DL_PCRE
3056
ZEND_GET_MODULE(pcre)
3057
#endif
3058
3059
/* }}} */
3060
3061
PHPAPI pcre2_match_context *php_pcre_mctx(void)
3062
0
{/*{{{*/
3063
0
  return mctx;
3064
0
}/*}}}*/
3065
3066
PHPAPI pcre2_general_context *php_pcre_gctx(void)
3067
0
{/*{{{*/
3068
0
  return gctx;
3069
0
}/*}}}*/
3070
3071
PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3072
0
{/*{{{*/
3073
0
  return cctx;
3074
0
}/*}}}*/
3075
3076
PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3077
0
{/*{{{*/
3078
0
  assert(NULL != pce);
3079
0
  pce->refcount++;
3080
0
}/*}}}*/
3081
3082
PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3083
0
{/*{{{*/
3084
0
  assert(NULL != pce);
3085
0
  assert(0 != pce->refcount);
3086
0
  pce->refcount--;
3087
0
}/*}}}*/
3088
3089
PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3090
0
{/*{{{*/
3091
0
  assert(NULL != pce);
3092
0
  return pce->re;
3093
0
}/*}}}*/