Coverage Report

Created: 2025-08-26 06:16

/src/njs/external/njs_regex.c
Line
Count
Source (jump to first uncovered line)
1
2
/*
3
 * Copyright (C) Igor Sysoev
4
 * Copyright (C) Dmitry Volyntsev
5
 * Copyright (C) NGINX, Inc.
6
 */
7
8
9
#include <njs_main.h>
10
11
#ifdef NJS_HAVE_PCRE2
12
13
#define PCRE2_CODE_UNIT_WIDTH 8
14
#include <pcre2.h>
15
16
17
static const u_char* njs_regex_pcre2_error(int errcode, u_char buffer[128]);
18
19
#else
20
21
#include <pcre.h>
22
23
24
static void *njs_pcre_malloc(size_t size);
25
static void njs_pcre_free(void *p);
26
27
28
static njs_regex_generic_ctx_t  *regex_context;
29
30
#endif
31
32
33
njs_regex_generic_ctx_t *
34
njs_regex_generic_ctx_create(njs_pcre_malloc_t private_malloc,
35
    njs_pcre_free_t private_free, void *memory_data)
36
10.2k
{
37
10.2k
#ifdef NJS_HAVE_PCRE2
38
39
10.2k
    return pcre2_general_context_create(private_malloc, private_free,
40
10.2k
                                        memory_data);
41
#else
42
43
    njs_regex_generic_ctx_t  *ctx;
44
45
    ctx = private_malloc(sizeof(njs_regex_generic_ctx_t), memory_data);
46
47
    if (njs_fast_path(ctx != NULL)) {
48
        ctx->private_malloc = private_malloc;
49
        ctx->private_free = private_free;
50
        ctx->memory_data = memory_data;
51
    }
52
53
    return ctx;
54
55
#endif
56
10.2k
}
57
58
59
njs_regex_compile_ctx_t *
60
njs_regex_compile_ctx_create(njs_regex_generic_ctx_t *ctx)
61
10.2k
{
62
10.2k
#ifdef NJS_HAVE_PCRE2
63
10.2k
    pcre2_compile_context  *cc;
64
65
10.2k
    cc = pcre2_compile_context_create(ctx);
66
67
10.2k
#ifdef PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
68
10.2k
    if (njs_fast_path(cc != NULL)) {
69
        /* Workaround for surrogate pairs in regular expressions
70
         *
71
         * This option is needed because njs, unlike the standard ECMAScript,
72
         * stores and processes strings in UTF-8 encoding.
73
         * PCRE2 does not support surrogate pairs by default when it
74
         * is compiled for UTF-8 only strings. But many polyfills
75
         * and transpilers use such surrogate pairs expressions.
76
         */
77
10.2k
        pcre2_set_compile_extra_options(cc,
78
10.2k
                                        PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES);
79
10.2k
    }
80
10.2k
#endif
81
82
10.2k
    return cc;
83
84
#else
85
86
    return ctx;
87
88
#endif
89
10.2k
}
90
91
92
93
njs_int_t
94
njs_regex_escape(njs_mp_t *mp, njs_str_t *text)
95
327k
{
96
327k
#ifdef NJS_HAVE_PCRE2
97
327k
    size_t  anychars, nomatches;
98
327k
    u_char  *p, *dst, *start, *end;
99
100
    /*
101
     * 1) [^] is a valid regexp expression in JavaScript, but PCRE2
102
     * rejects it as invalid, replacing it with equivalent PCRE2 [\s\S]
103
     * expression.
104
     * 2) [] is a valid regexp expression in JavaScript, but PCRE2
105
     * rejects it as invalid, replacing it with equivalent PCRE2 (?!)
106
     * expression which matches nothing.
107
     */
108
109
327k
    start = text->start;
110
327k
    end = text->start + text->length;
111
112
327k
    anychars = 0;
113
327k
    nomatches = 0;
114
115
40.8M
    for (p = start; p < end; p++) {
116
40.5M
        switch (*p) {
117
184k
        case '\\':
118
184k
            p += 1;
119
120
184k
            break;
121
122
258k
        case '[':
123
258k
            if (p + 1 < end && p[1] == ']') {
124
40.2k
                p += 1;
125
40.2k
                nomatches += 1;
126
127
217k
            } else if (p + 2 < end && p[1] == '^' && p[2] == ']') {
128
14.9k
                p += 2;
129
14.9k
                anychars += 1;
130
131
203k
            } else {
132
8.28M
                while (p < end && *p != ']') {
133
8.08M
                    p += 1;
134
8.08M
                }
135
203k
            }
136
137
258k
            break;
138
40.5M
        }
139
40.5M
    }
140
141
327k
    if (!anychars && !nomatches) {
142
293k
        return NJS_OK;
143
293k
    }
144
145
33.9k
    text->length = text->length
146
33.9k
                   + anychars * (njs_length("\\s\\S") - njs_length("^"))
147
33.9k
                   + nomatches * (njs_length("?!"));
148
149
33.9k
    text->start = njs_mp_alloc(mp, text->length);
150
33.9k
    if (njs_slow_path(text->start == NULL)) {
151
0
        return NJS_ERROR;
152
0
    }
153
154
33.9k
    dst = text->start;
155
156
3.75M
    for (p = start; p < end; p++) {
157
158
3.71M
        switch (*p) {
159
21.6k
        case '\\':
160
21.6k
            *dst++ = *p;
161
21.6k
            if (p + 1 < end) {
162
21.6k
                p += 1;
163
21.6k
                *dst++ = *p;
164
21.6k
            }
165
166
21.6k
            continue;
167
168
103k
        case '[':
169
103k
            if (p + 1 < end && p[1] == ']') {
170
40.2k
                p += 1;
171
40.2k
                dst = njs_cpymem(dst, "(?!)", 4);
172
40.2k
                continue;
173
174
63.3k
            } else if (p + 2 < end && p[1] == '^' && p[2] == ']') {
175
14.9k
                p += 2;
176
14.9k
                dst = njs_cpymem(dst, "[\\s\\S]", 6);
177
14.9k
                continue;
178
179
48.4k
            } else {
180
48.4k
                *dst++ = *p++; /* Copy '['. */
181
182
1.03M
                while (p < end && *p != ']') {
183
990k
                    *dst++ = *p++;
184
990k
                }
185
186
48.4k
                if (p < end) {
187
47.8k
                    *dst++ = *p; /* Copy ']'. */
188
47.8k
                }
189
190
48.4k
                continue;
191
48.4k
            }
192
3.71M
        }
193
194
3.59M
        *dst++ = *p;
195
3.59M
    }
196
197
33.9k
    njs_assert(dst == text->start + text->length);
198
199
33.9k
    return NJS_OK;
200
201
#else
202
203
    /*
204
     * 1) PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
205
     * lone closing square brackets as invalid.  Whereas according
206
     * to ES6: 11.8.5 it is a valid regexp expression.
207
     *
208
     * 2) escaping zero byte characters as "\u0000".
209
     *
210
     * Escaping it here as a workaround.
211
     */
212
213
    size_t      brackets, zeros;
214
    u_char      *p, *dst, *start, *end;
215
    njs_bool_t  in;
216
217
    start = text->start;
218
    end = text->start + text->length;
219
220
    in = 0;
221
    zeros = 0;
222
    brackets = 0;
223
224
    for (p = start; p < end; p++) {
225
226
        switch (*p) {
227
        case '[':
228
            in = 1;
229
            break;
230
231
        case ']':
232
            if (!in) {
233
                brackets++;
234
            }
235
236
            in = 0;
237
            break;
238
239
        case '\\':
240
            p++;
241
242
            if (p == end || *p != '\0') {
243
                break;
244
            }
245
246
            /* Fall through. */
247
248
        case '\0':
249
            zeros++;
250
            break;
251
        }
252
    }
253
254
    if (!brackets && !zeros) {
255
        return NJS_OK;
256
    }
257
258
    text->length = text->length + brackets + zeros * njs_length("\\u0000");
259
260
    text->start = njs_mp_alloc(mp, text->length);
261
    if (njs_slow_path(text->start == NULL)) {
262
        return NJS_ERROR;
263
    }
264
265
    in = 0;
266
    dst = text->start;
267
268
    for (p = start; p < end; p++) {
269
270
        switch (*p) {
271
        case '[':
272
            in = 1;
273
            break;
274
275
        case ']':
276
            if (!in) {
277
                *dst++ = '\\';
278
            }
279
280
            in = 0;
281
            break;
282
283
        case '\\':
284
            *dst++ = *p++;
285
286
            if (p == end) {
287
                goto done;
288
            }
289
290
            if (*p != '\0') {
291
                break;
292
            }
293
294
            /* Fall through. */
295
296
        case '\0':
297
            dst = njs_cpymem(dst, "\\u0000", 6);
298
            continue;
299
        }
300
301
        *dst++ = *p;
302
    }
303
304
done:
305
306
    text->length = dst - text->start;
307
308
    return NJS_OK;
309
310
#endif
311
33.9k
}
312
313
314
njs_int_t
315
njs_regex_compile(njs_regex_t *regex, u_char *source, size_t len,
316
    njs_regex_flags_t flags, njs_regex_compile_ctx_t *cctx, njs_trace_t *trace)
317
654k
{
318
654k
#ifdef NJS_HAVE_PCRE2
319
320
654k
    int         ret;
321
654k
    u_char      *error;
322
654k
    size_t      erroff;
323
654k
    njs_uint_t  options;
324
654k
    u_char      errstr[128];
325
326
654k
    options = PCRE2_ALT_BSUX | PCRE2_MATCH_UNSET_BACKREF;
327
328
654k
    if ((flags & NJS_REGEX_IGNORE_CASE)) {
329
182k
         options |= PCRE2_CASELESS;
330
182k
    }
331
332
654k
    if ((flags & NJS_REGEX_MULTILINE)) {
333
3.54k
         options |= PCRE2_MULTILINE;
334
3.54k
    }
335
336
654k
    if ((flags & NJS_REGEX_STICKY)) {
337
200k
         options |= PCRE2_ANCHORED;
338
200k
    }
339
340
654k
    if ((flags & NJS_REGEX_UTF8)) {
341
327k
         options |= PCRE2_UTF;
342
327k
    }
343
344
654k
    regex->code = pcre2_compile(source, len, options, &ret, &erroff, cctx);
345
346
654k
    if (njs_slow_path(regex->code == NULL)) {
347
181k
        error = &source[erroff];
348
349
181k
        njs_alert(trace, NJS_LEVEL_ERROR,
350
181k
                  "pcre_compile2(\"%s\") failed: %s at \"%s\"",
351
181k
                  source, njs_regex_pcre2_error(ret, errstr), error);
352
353
181k
        return NJS_DECLINED;
354
181k
    }
355
356
472k
    ret = pcre2_pattern_info(regex->code, PCRE2_INFO_CAPTURECOUNT,
357
472k
                             &regex->ncaptures);
358
359
472k
    if (njs_slow_path(ret < 0)) {
360
0
        njs_alert(trace, NJS_LEVEL_ERROR,
361
0
               "pcre2_pattern_info(\"%s\", PCRE2_INFO_CAPTURECOUNT) failed: %s",
362
0
               source, njs_regex_pcre2_error(ret, errstr));
363
364
0
        return NJS_ERROR;
365
0
    }
366
367
472k
    ret = pcre2_pattern_info(regex->code, PCRE2_INFO_BACKREFMAX,
368
472k
                             &regex->backrefmax);
369
370
472k
    if (njs_slow_path(ret < 0)) {
371
0
        njs_alert(trace, NJS_LEVEL_ERROR,
372
0
                 "pcre2_pattern_info(\"%s\", PCRE2_INFO_BACKREFMAX) failed: %s",
373
0
                 source, njs_regex_pcre2_error(ret, errstr));
374
375
0
        return NJS_ERROR;
376
0
    }
377
378
    /* Reserve additional elements for the first "$0" capture. */
379
472k
    regex->ncaptures++;
380
381
472k
    if (regex->ncaptures > 1) {
382
100k
        ret = pcre2_pattern_info(regex->code, PCRE2_INFO_NAMECOUNT,
383
100k
                                 &regex->nentries);
384
385
100k
        if (njs_slow_path(ret < 0)) {
386
0
            njs_alert(trace, NJS_LEVEL_ERROR,
387
0
                  "pcre2_pattern_info(\"%s\", PCRE2_INFO_NAMECOUNT) failed: %s",
388
0
                   source, njs_regex_pcre2_error(ret, errstr));
389
390
0
            return NJS_ERROR;
391
0
        }
392
393
100k
        if (regex->nentries != 0) {
394
21.9k
            ret = pcre2_pattern_info(regex->code, PCRE2_INFO_NAMEENTRYSIZE,
395
21.9k
                                     &regex->entry_size);
396
397
21.9k
            if (njs_slow_path(ret < 0)) {
398
0
                njs_alert(trace, NJS_LEVEL_ERROR,
399
0
                          "pcre2_pattern_info(\"%s\", PCRE2_INFO_NAMEENTRYSIZE)"
400
0
                          " failed: %s", source,
401
0
                          njs_regex_pcre2_error(ret, errstr));
402
403
0
                return NJS_ERROR;
404
0
            }
405
406
21.9k
            ret = pcre2_pattern_info(regex->code, PCRE2_INFO_NAMETABLE,
407
21.9k
                                     &regex->entries);
408
409
21.9k
            if (njs_slow_path(ret < 0)) {
410
0
                njs_alert(trace, NJS_LEVEL_ERROR,
411
0
                          "pcre2_pattern_info(\"%s\", PCRE2_INFO_NAMETABLE) "
412
0
                          "failed: %s", source,
413
0
                          njs_regex_pcre2_error(ret, errstr));
414
415
0
                return NJS_ERROR;
416
0
            }
417
21.9k
        }
418
100k
    }
419
420
472k
    return NJS_OK;
421
422
#else
423
424
    int                      ret, err, erroff;
425
    char                     *pattern, *error;
426
    void                     *(*saved_malloc)(size_t size);
427
    void                     (*saved_free)(void *p);
428
    njs_uint_t               options;
429
    const char               *errstr;
430
    njs_regex_generic_ctx_t  *ctx;
431
432
    ctx = cctx;
433
434
    ret = NJS_ERROR;
435
436
    saved_malloc = pcre_malloc;
437
    pcre_malloc = njs_pcre_malloc;
438
    saved_free = pcre_free;
439
    pcre_free = njs_pcre_free;
440
    regex_context = ctx;
441
442
#ifdef PCRE_JAVASCRIPT_COMPAT
443
    /* JavaScript compatibility has been introduced in PCRE-7.7. */
444
    options = PCRE_JAVASCRIPT_COMPAT;
445
#else
446
    options = 0;
447
#endif
448
449
    if ((flags & NJS_REGEX_IGNORE_CASE)) {
450
         options |= PCRE_CASELESS;
451
    }
452
453
    if ((flags & NJS_REGEX_MULTILINE)) {
454
         options |= PCRE_MULTILINE;
455
    }
456
457
    if ((flags & NJS_REGEX_STICKY)) {
458
         options |= PCRE_ANCHORED;
459
    }
460
461
    if ((flags & NJS_REGEX_UTF8)) {
462
         options |= PCRE_UTF8;
463
    }
464
465
    pattern = (char *) source;
466
467
    regex->code = pcre_compile(pattern, options, &errstr, &erroff, NULL);
468
469
    if (njs_slow_path(regex->code == NULL)) {
470
        error = pattern + erroff;
471
472
        if (*error != '\0') {
473
            njs_alert(trace, NJS_LEVEL_ERROR,
474
                      "pcre_compile(\"%s\") failed: %s at \"%s\"",
475
                      pattern, errstr, error);
476
477
        } else {
478
            njs_alert(trace, NJS_LEVEL_ERROR,
479
                      "pcre_compile(\"%s\") failed: %s", pattern, errstr);
480
        }
481
482
        ret = NJS_DECLINED;
483
484
        goto done;
485
    }
486
487
    regex->extra = pcre_study(regex->code, 0, &errstr);
488
489
    if (njs_slow_path(errstr != NULL)) {
490
        njs_alert(trace, NJS_LEVEL_WARN,
491
                  "pcre_study(\"%s\") failed: %s", pattern, errstr);
492
    }
493
494
    err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_CAPTURECOUNT,
495
                        &regex->ncaptures);
496
497
    if (njs_slow_path(err < 0)) {
498
        njs_alert(trace, NJS_LEVEL_ERROR,
499
                  "pcre_fullinfo(\"%s\", PCRE_INFO_CAPTURECOUNT) failed: %d",
500
                  pattern, err);
501
502
        goto done;
503
    }
504
505
    err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_BACKREFMAX,
506
                        &regex->backrefmax);
507
508
    if (njs_slow_path(err < 0)) {
509
        njs_alert(trace, NJS_LEVEL_ERROR,
510
                  "pcre_fullinfo(\"%s\", PCRE_INFO_BACKREFMAX) failed: %d",
511
                  pattern, err);
512
513
        goto done;
514
    }
515
516
    /* Reserve additional elements for the first "$0" capture. */
517
    regex->ncaptures++;
518
519
    if (regex->ncaptures > 1) {
520
        err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMECOUNT,
521
                            &regex->nentries);
522
523
        if (njs_slow_path(err < 0)) {
524
            njs_alert(trace, NJS_LEVEL_ERROR,
525
                      "pcre_fullinfo(\"%s\", PCRE_INFO_NAMECOUNT) failed: %d",
526
                      pattern, err);
527
528
            goto done;
529
        }
530
531
        if (regex->nentries != 0) {
532
            err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMEENTRYSIZE,
533
                                &regex->entry_size);
534
535
            if (njs_slow_path(err < 0)) {
536
                njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
537
                          "PCRE_INFO_NAMEENTRYSIZE) failed: %d", pattern, err);
538
539
                goto done;
540
            }
541
542
            err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMETABLE,
543
                                &regex->entries);
544
545
            if (njs_slow_path(err < 0)) {
546
                njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
547
                          "PCRE_INFO_NAMETABLE) failed: %d", pattern, err);
548
549
                goto done;
550
            }
551
        }
552
    }
553
554
    ret = NJS_OK;
555
556
done:
557
558
    pcre_malloc = saved_malloc;
559
    pcre_free = saved_free;
560
    regex_context = NULL;
561
562
    return ret;
563
564
#endif
565
472k
}
566
567
568
njs_bool_t
569
njs_regex_is_valid(njs_regex_t *regex)
570
1.95M
{
571
1.95M
    return (regex->code != NULL);
572
1.95M
}
573
574
575
njs_int_t
576
njs_regex_named_captures(njs_regex_t *regex, njs_str_t *name, int n)
577
274k
{
578
274k
    char  *entry;
579
580
274k
    if (name == NULL) {
581
245k
        return regex->nentries;
582
245k
    }
583
584
28.5k
    if (n >= regex->nentries) {
585
0
        return NJS_ERROR;
586
0
    }
587
588
28.5k
    entry = regex->entries + regex->entry_size * n;
589
590
28.5k
    name->start = (u_char *) entry + 2;
591
28.5k
    name->length = njs_strlen(name->start);
592
593
28.5k
    return (entry[0] << 8) + entry[1];
594
28.5k
}
595
596
597
njs_regex_match_data_t *
598
njs_regex_match_data(njs_regex_t *regex, njs_regex_generic_ctx_t *ctx)
599
1.30M
{
600
1.30M
#ifdef NJS_HAVE_PCRE2
601
602
1.30M
    if (regex != NULL) {
603
1.29M
        return pcre2_match_data_create_from_pattern(regex->code, ctx);
604
1.29M
    }
605
606
10.2k
    return pcre2_match_data_create(0, ctx);
607
608
#else
609
610
    size_t                  size;
611
    njs_uint_t              ncaptures;
612
    njs_regex_match_data_t  *match_data;
613
614
    if (regex != NULL) {
615
        ncaptures = regex->ncaptures - 1;
616
617
    } else {
618
        ncaptures = 0;
619
    }
620
621
    /* Each capture is stored in 3 "int" vector elements. */
622
    ncaptures *= 3;
623
    size = sizeof(njs_regex_match_data_t) + ncaptures * sizeof(int);
624
625
    match_data = ctx->private_malloc(size, ctx->memory_data);
626
627
    if (njs_fast_path(match_data != NULL)) {
628
        match_data->ncaptures = ncaptures + 3;
629
    }
630
631
    return match_data;
632
633
#endif
634
1.30M
}
635
636
637
void
638
njs_regex_match_data_free(njs_regex_match_data_t *match_data,
639
    njs_regex_generic_ctx_t *ctx)
640
1.29M
{
641
1.29M
#ifdef NJS_HAVE_PCRE2
642
643
1.29M
    pcre2_match_data_free(match_data);
644
645
#else
646
647
    ctx->private_free(match_data, ctx->memory_data);
648
649
#endif
650
1.29M
}
651
652
653
njs_int_t
654
njs_regex_match(njs_regex_t *regex, const u_char *subject, size_t off,
655
    size_t len, njs_regex_match_data_t *match_data, njs_trace_t *trace)
656
1.84M
{
657
1.84M
#ifdef NJS_HAVE_PCRE2
658
659
1.84M
    int     ret;
660
1.84M
    u_char  errstr[128];
661
662
1.84M
    ret = pcre2_match(regex->code, subject, len, off, 0, match_data, NULL);
663
664
1.84M
    if (ret < 0) {
665
1.14M
        if (ret == PCRE2_ERROR_NOMATCH) {
666
1.14M
            return NJS_DECLINED;
667
1.14M
        }
668
669
0
        njs_alert(trace, NJS_LEVEL_ERROR, "pcre2_match() failed: %s",
670
0
                  njs_regex_pcre2_error(ret, errstr));
671
0
        return NJS_ERROR;
672
1.14M
    }
673
674
701k
    return ret;
675
676
#else
677
678
    int  ret;
679
680
    ret = pcre_exec(regex->code, regex->extra, (const char *) subject, len,
681
                    off, 0, match_data->captures, match_data->ncaptures);
682
683
    if (ret <= PCRE_ERROR_NOMATCH) {
684
        if (ret == PCRE_ERROR_NOMATCH) {
685
            return NJS_DECLINED;
686
        }
687
688
        njs_alert(trace, NJS_LEVEL_ERROR, "pcre_exec() failed: %d", ret);
689
        return NJS_ERROR;
690
    }
691
692
    return ret;
693
694
#endif
695
1.84M
}
696
697
698
size_t
699
njs_regex_capture(njs_regex_match_data_t *match_data, njs_uint_t n)
700
1.70M
{
701
1.70M
#ifdef NJS_HAVE_PCRE2
702
703
1.70M
    size_t  c;
704
705
1.70M
    c = pcre2_get_ovector_pointer(match_data)[n];
706
707
1.70M
    if (c == PCRE2_UNSET) {
708
10.9k
        return NJS_REGEX_UNSET;
709
10.9k
    }
710
711
1.69M
    return c;
712
713
#else
714
715
    return match_data->captures[n];
716
717
#endif
718
1.70M
}
719
720
#ifdef NJS_HAVE_PCRE2
721
722
static const u_char *
723
njs_regex_pcre2_error(int errcode, u_char buffer[128])
724
181k
{
725
181k
    pcre2_get_error_message(errcode, buffer, 128);
726
727
181k
    return buffer;
728
181k
}
729
730
#else
731
732
static void *
733
njs_pcre_malloc(size_t size)
734
{
735
    return regex_context->private_malloc(size, regex_context->memory_data);
736
}
737
738
739
static void
740
njs_pcre_free(void *p)
741
{
742
    regex_context->private_free(p, regex_context->memory_data);
743
}
744
745
#endif
746
747