Coverage Report

Created: 2025-07-12 06:23

/src/njs/external/njs_regex.c
Line
Count
Source (jump to first uncovered line)
1
2
/*
3
 * Copyright (C) Igor Sysoev
4
 * Copyright (C) Dmitry Volyntsev
5
 * Copyright (C) NGINX, Inc.
6
 */
7
8
9
#include <njs_main.h>
10
11
#ifdef NJS_HAVE_PCRE2
12
13
#define PCRE2_CODE_UNIT_WIDTH 8
14
#include <pcre2.h>
15
16
17
static const u_char* njs_regex_pcre2_error(int errcode, u_char buffer[128]);
18
19
#else
20
21
#include <pcre.h>
22
23
24
static void *njs_pcre_malloc(size_t size);
25
static void njs_pcre_free(void *p);
26
27
28
static njs_regex_generic_ctx_t  *regex_context;
29
30
#endif
31
32
33
njs_regex_generic_ctx_t *
34
njs_regex_generic_ctx_create(njs_pcre_malloc_t private_malloc,
35
    njs_pcre_free_t private_free, void *memory_data)
36
14.8k
{
37
14.8k
#ifdef NJS_HAVE_PCRE2
38
39
14.8k
    return pcre2_general_context_create(private_malloc, private_free,
40
14.8k
                                        memory_data);
41
#else
42
43
    njs_regex_generic_ctx_t  *ctx;
44
45
    ctx = private_malloc(sizeof(njs_regex_generic_ctx_t), memory_data);
46
47
    if (njs_fast_path(ctx != NULL)) {
48
        ctx->private_malloc = private_malloc;
49
        ctx->private_free = private_free;
50
        ctx->memory_data = memory_data;
51
    }
52
53
    return ctx;
54
55
#endif
56
14.8k
}
57
58
59
njs_regex_compile_ctx_t *
60
njs_regex_compile_ctx_create(njs_regex_generic_ctx_t *ctx)
61
14.8k
{
62
14.8k
#ifdef NJS_HAVE_PCRE2
63
14.8k
    pcre2_compile_context  *cc;
64
65
14.8k
    cc = pcre2_compile_context_create(ctx);
66
67
14.8k
#ifdef PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
68
14.8k
    if (njs_fast_path(cc != NULL)) {
69
        /* Workaround for surrogate pairs in regular expressions
70
         *
71
         * This option is needed because njs, unlike the standard ECMAScript,
72
         * stores and processes strings in UTF-8 encoding.
73
         * PCRE2 does not support surrogate pairs by default when it
74
         * is compiled for UTF-8 only strings. But many polyfills
75
         * and transpilers use such surrogate pairs expressions.
76
         */
77
14.8k
        pcre2_set_compile_extra_options(cc,
78
14.8k
                                        PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES);
79
14.8k
    }
80
14.8k
#endif
81
82
14.8k
    return cc;
83
84
#else
85
86
    return ctx;
87
88
#endif
89
14.8k
}
90
91
92
93
njs_int_t
94
njs_regex_escape(njs_mp_t *mp, njs_str_t *text)
95
328k
{
96
328k
#ifdef NJS_HAVE_PCRE2
97
328k
    size_t  anychars, nomatches;
98
328k
    u_char  *p, *dst, *start, *end;
99
100
    /*
101
     * 1) [^] is a valid regexp expression in JavaScript, but PCRE2
102
     * rejects it as invalid, replacing it with equivalent PCRE2 [\s\S]
103
     * expression.
104
     * 2) [] is a valid regexp expression in JavaScript, but PCRE2
105
     * rejects it as invalid, replacing it with equivalent PCRE2 (?!)
106
     * expression which matches nothing.
107
     */
108
109
328k
    start = text->start;
110
328k
    end = text->start + text->length;
111
112
328k
    anychars = 0;
113
328k
    nomatches = 0;
114
115
47.7M
    for (p = start; p < end; p++) {
116
47.4M
        switch (*p) {
117
234k
        case '\\':
118
234k
            p += 1;
119
120
234k
            break;
121
122
197k
        case '[':
123
197k
            if (p + 1 < end && p[1] == ']') {
124
31.5k
                p += 1;
125
31.5k
                nomatches += 1;
126
127
166k
            } else if (p + 2 < end && p[1] == '^' && p[2] == ']') {
128
2.08k
                p += 2;
129
2.08k
                anychars += 1;
130
131
164k
            } else {
132
5.99M
                while (p < end && *p != ']') {
133
5.83M
                    p += 1;
134
5.83M
                }
135
164k
            }
136
137
197k
            break;
138
47.4M
        }
139
47.4M
    }
140
141
328k
    if (!anychars && !nomatches) {
142
315k
        return NJS_OK;
143
315k
    }
144
145
12.4k
    text->length = text->length
146
12.4k
                   + anychars * (njs_length("\\s\\S") - njs_length("^"))
147
12.4k
                   + nomatches * (njs_length("?!"));
148
149
12.4k
    text->start = njs_mp_alloc(mp, text->length);
150
12.4k
    if (njs_slow_path(text->start == NULL)) {
151
0
        return NJS_ERROR;
152
0
    }
153
154
12.4k
    dst = text->start;
155
156
8.79M
    for (p = start; p < end; p++) {
157
158
8.78M
        switch (*p) {
159
17.6k
        case '\\':
160
17.6k
            *dst++ = *p;
161
17.6k
            if (p + 1 < end) {
162
17.5k
                p += 1;
163
17.5k
                *dst++ = *p;
164
17.5k
            }
165
166
17.6k
            continue;
167
168
51.8k
        case '[':
169
51.8k
            if (p + 1 < end && p[1] == ']') {
170
31.5k
                p += 1;
171
31.5k
                dst = njs_cpymem(dst, "(?!)", 4);
172
31.5k
                continue;
173
174
31.5k
            } else if (p + 2 < end && p[1] == '^' && p[2] == ']') {
175
2.08k
                p += 2;
176
2.08k
                dst = njs_cpymem(dst, "[\\s\\S]", 6);
177
2.08k
                continue;
178
179
18.2k
            } else {
180
18.2k
                *dst++ = *p++; /* Copy '['. */
181
182
575k
                while (p < end && *p != ']') {
183
557k
                    *dst++ = *p++;
184
557k
                }
185
186
18.2k
                if (p < end) {
187
16.9k
                    *dst++ = *p; /* Copy ']'. */
188
16.9k
                }
189
190
18.2k
                continue;
191
18.2k
            }
192
8.78M
        }
193
194
8.71M
        *dst++ = *p;
195
8.71M
    }
196
197
12.4k
    njs_assert(dst == text->start + text->length);
198
199
12.4k
    return NJS_OK;
200
201
#else
202
203
    /*
204
     * 1) PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with
205
     * lone closing square brackets as invalid.  Whereas according
206
     * to ES6: 11.8.5 it is a valid regexp expression.
207
     *
208
     * 2) escaping zero byte characters as "\u0000".
209
     *
210
     * Escaping it here as a workaround.
211
     */
212
213
    size_t      brackets, zeros;
214
    u_char      *p, *dst, *start, *end;
215
    njs_bool_t  in;
216
217
    start = text->start;
218
    end = text->start + text->length;
219
220
    in = 0;
221
    zeros = 0;
222
    brackets = 0;
223
224
    for (p = start; p < end; p++) {
225
226
        switch (*p) {
227
        case '[':
228
            in = 1;
229
            break;
230
231
        case ']':
232
            if (!in) {
233
                brackets++;
234
            }
235
236
            in = 0;
237
            break;
238
239
        case '\\':
240
            p++;
241
242
            if (p == end || *p != '\0') {
243
                break;
244
            }
245
246
            /* Fall through. */
247
248
        case '\0':
249
            zeros++;
250
            break;
251
        }
252
    }
253
254
    if (!brackets && !zeros) {
255
        return NJS_OK;
256
    }
257
258
    text->length = text->length + brackets + zeros * njs_length("\\u0000");
259
260
    text->start = njs_mp_alloc(mp, text->length);
261
    if (njs_slow_path(text->start == NULL)) {
262
        return NJS_ERROR;
263
    }
264
265
    in = 0;
266
    dst = text->start;
267
268
    for (p = start; p < end; p++) {
269
270
        switch (*p) {
271
        case '[':
272
            in = 1;
273
            break;
274
275
        case ']':
276
            if (!in) {
277
                *dst++ = '\\';
278
            }
279
280
            in = 0;
281
            break;
282
283
        case '\\':
284
            *dst++ = *p++;
285
286
            if (p == end) {
287
                goto done;
288
            }
289
290
            if (*p != '\0') {
291
                break;
292
            }
293
294
            /* Fall through. */
295
296
        case '\0':
297
            dst = njs_cpymem(dst, "\\u0000", 6);
298
            continue;
299
        }
300
301
        *dst++ = *p;
302
    }
303
304
done:
305
306
    text->length = dst - text->start;
307
308
    return NJS_OK;
309
310
#endif
311
12.4k
}
312
313
314
njs_int_t
315
njs_regex_compile(njs_regex_t *regex, u_char *source, size_t len,
316
    njs_regex_flags_t flags, njs_regex_compile_ctx_t *cctx, njs_trace_t *trace)
317
656k
{
318
656k
#ifdef NJS_HAVE_PCRE2
319
320
656k
    int         ret;
321
656k
    u_char      *error;
322
656k
    size_t      erroff;
323
656k
    njs_uint_t  options;
324
656k
    u_char      errstr[128];
325
326
656k
    options = PCRE2_ALT_BSUX | PCRE2_MATCH_UNSET_BACKREF;
327
328
656k
    if ((flags & NJS_REGEX_IGNORE_CASE)) {
329
50.2k
         options |= PCRE2_CASELESS;
330
50.2k
    }
331
332
656k
    if ((flags & NJS_REGEX_MULTILINE)) {
333
20.1k
         options |= PCRE2_MULTILINE;
334
20.1k
    }
335
336
656k
    if ((flags & NJS_REGEX_STICKY)) {
337
62.8k
         options |= PCRE2_ANCHORED;
338
62.8k
    }
339
340
656k
    if ((flags & NJS_REGEX_UTF8)) {
341
328k
         options |= PCRE2_UTF;
342
328k
    }
343
344
656k
    regex->code = pcre2_compile(source, len, options, &ret, &erroff, cctx);
345
346
656k
    if (njs_slow_path(regex->code == NULL)) {
347
163k
        error = &source[erroff];
348
349
163k
        njs_alert(trace, NJS_LEVEL_ERROR,
350
163k
                  "pcre_compile2(\"%s\") failed: %s at \"%s\"",
351
163k
                  source, njs_regex_pcre2_error(ret, errstr), error);
352
353
163k
        return NJS_DECLINED;
354
163k
    }
355
356
492k
    ret = pcre2_pattern_info(regex->code, PCRE2_INFO_CAPTURECOUNT,
357
492k
                             &regex->ncaptures);
358
359
492k
    if (njs_slow_path(ret < 0)) {
360
0
        njs_alert(trace, NJS_LEVEL_ERROR,
361
0
               "pcre2_pattern_info(\"%s\", PCRE2_INFO_CAPTURECOUNT) failed: %s",
362
0
               source, njs_regex_pcre2_error(ret, errstr));
363
364
0
        return NJS_ERROR;
365
0
    }
366
367
492k
    ret = pcre2_pattern_info(regex->code, PCRE2_INFO_BACKREFMAX,
368
492k
                             &regex->backrefmax);
369
370
492k
    if (njs_slow_path(ret < 0)) {
371
0
        njs_alert(trace, NJS_LEVEL_ERROR,
372
0
                 "pcre2_pattern_info(\"%s\", PCRE2_INFO_BACKREFMAX) failed: %s",
373
0
                 source, njs_regex_pcre2_error(ret, errstr));
374
375
0
        return NJS_ERROR;
376
0
    }
377
378
    /* Reserve additional elements for the first "$0" capture. */
379
492k
    regex->ncaptures++;
380
381
492k
    if (regex->ncaptures > 1) {
382
101k
        ret = pcre2_pattern_info(regex->code, PCRE2_INFO_NAMECOUNT,
383
101k
                                 &regex->nentries);
384
385
101k
        if (njs_slow_path(ret < 0)) {
386
0
            njs_alert(trace, NJS_LEVEL_ERROR,
387
0
                  "pcre2_pattern_info(\"%s\", PCRE2_INFO_NAMECOUNT) failed: %s",
388
0
                   source, njs_regex_pcre2_error(ret, errstr));
389
390
0
            return NJS_ERROR;
391
0
        }
392
393
101k
        if (regex->nentries != 0) {
394
34.1k
            ret = pcre2_pattern_info(regex->code, PCRE2_INFO_NAMEENTRYSIZE,
395
34.1k
                                     &regex->entry_size);
396
397
34.1k
            if (njs_slow_path(ret < 0)) {
398
0
                njs_alert(trace, NJS_LEVEL_ERROR,
399
0
                          "pcre2_pattern_info(\"%s\", PCRE2_INFO_NAMEENTRYSIZE)"
400
0
                          " failed: %s", source,
401
0
                          njs_regex_pcre2_error(ret, errstr));
402
403
0
                return NJS_ERROR;
404
0
            }
405
406
34.1k
            ret = pcre2_pattern_info(regex->code, PCRE2_INFO_NAMETABLE,
407
34.1k
                                     &regex->entries);
408
409
34.1k
            if (njs_slow_path(ret < 0)) {
410
0
                njs_alert(trace, NJS_LEVEL_ERROR,
411
0
                          "pcre2_pattern_info(\"%s\", PCRE2_INFO_NAMETABLE) "
412
0
                          "failed: %s", source,
413
0
                          njs_regex_pcre2_error(ret, errstr));
414
415
0
                return NJS_ERROR;
416
0
            }
417
34.1k
        }
418
101k
    }
419
420
492k
    return NJS_OK;
421
422
#else
423
424
    int                      ret, err, erroff;
425
    char                     *pattern, *error;
426
    void                     *(*saved_malloc)(size_t size);
427
    void                     (*saved_free)(void *p);
428
    njs_uint_t               options;
429
    const char               *errstr;
430
    njs_regex_generic_ctx_t  *ctx;
431
432
    ctx = cctx;
433
434
    ret = NJS_ERROR;
435
436
    saved_malloc = pcre_malloc;
437
    pcre_malloc = njs_pcre_malloc;
438
    saved_free = pcre_free;
439
    pcre_free = njs_pcre_free;
440
    regex_context = ctx;
441
442
#ifdef PCRE_JAVASCRIPT_COMPAT
443
    /* JavaScript compatibility has been introduced in PCRE-7.7. */
444
    options = PCRE_JAVASCRIPT_COMPAT;
445
#else
446
    options = 0;
447
#endif
448
449
    if ((flags & NJS_REGEX_IGNORE_CASE)) {
450
         options |= PCRE_CASELESS;
451
    }
452
453
    if ((flags & NJS_REGEX_MULTILINE)) {
454
         options |= PCRE_MULTILINE;
455
    }
456
457
    if ((flags & NJS_REGEX_STICKY)) {
458
         options |= PCRE_ANCHORED;
459
    }
460
461
    if ((flags & NJS_REGEX_UTF8)) {
462
         options |= PCRE_UTF8;
463
    }
464
465
    pattern = (char *) source;
466
467
    regex->code = pcre_compile(pattern, options, &errstr, &erroff, NULL);
468
469
    if (njs_slow_path(regex->code == NULL)) {
470
        error = pattern + erroff;
471
472
        if (*error != '\0') {
473
            njs_alert(trace, NJS_LEVEL_ERROR,
474
                      "pcre_compile(\"%s\") failed: %s at \"%s\"",
475
                      pattern, errstr, error);
476
477
        } else {
478
            njs_alert(trace, NJS_LEVEL_ERROR,
479
                      "pcre_compile(\"%s\") failed: %s", pattern, errstr);
480
        }
481
482
        ret = NJS_DECLINED;
483
484
        goto done;
485
    }
486
487
    regex->extra = pcre_study(regex->code, 0, &errstr);
488
489
    if (njs_slow_path(errstr != NULL)) {
490
        njs_alert(trace, NJS_LEVEL_WARN,
491
                  "pcre_study(\"%s\") failed: %s", pattern, errstr);
492
    }
493
494
    err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_CAPTURECOUNT,
495
                        &regex->ncaptures);
496
497
    if (njs_slow_path(err < 0)) {
498
        njs_alert(trace, NJS_LEVEL_ERROR,
499
                  "pcre_fullinfo(\"%s\", PCRE_INFO_CAPTURECOUNT) failed: %d",
500
                  pattern, err);
501
502
        goto done;
503
    }
504
505
    err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_BACKREFMAX,
506
                        &regex->backrefmax);
507
508
    if (njs_slow_path(err < 0)) {
509
        njs_alert(trace, NJS_LEVEL_ERROR,
510
                  "pcre_fullinfo(\"%s\", PCRE_INFO_BACKREFMAX) failed: %d",
511
                  pattern, err);
512
513
        goto done;
514
    }
515
516
    /* Reserve additional elements for the first "$0" capture. */
517
    regex->ncaptures++;
518
519
    if (regex->ncaptures > 1) {
520
        err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMECOUNT,
521
                            &regex->nentries);
522
523
        if (njs_slow_path(err < 0)) {
524
            njs_alert(trace, NJS_LEVEL_ERROR,
525
                      "pcre_fullinfo(\"%s\", PCRE_INFO_NAMECOUNT) failed: %d",
526
                      pattern, err);
527
528
            goto done;
529
        }
530
531
        if (regex->nentries != 0) {
532
            err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMEENTRYSIZE,
533
                                &regex->entry_size);
534
535
            if (njs_slow_path(err < 0)) {
536
                njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
537
                          "PCRE_INFO_NAMEENTRYSIZE) failed: %d", pattern, err);
538
539
                goto done;
540
            }
541
542
            err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMETABLE,
543
                                &regex->entries);
544
545
            if (njs_slow_path(err < 0)) {
546
                njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", "
547
                          "PCRE_INFO_NAMETABLE) failed: %d", pattern, err);
548
549
                goto done;
550
            }
551
        }
552
    }
553
554
    ret = NJS_OK;
555
556
done:
557
558
    pcre_malloc = saved_malloc;
559
    pcre_free = saved_free;
560
    regex_context = NULL;
561
562
    return ret;
563
564
#endif
565
492k
}
566
567
568
njs_bool_t
569
njs_regex_is_valid(njs_regex_t *regex)
570
1.54M
{
571
1.54M
    return (regex->code != NULL);
572
1.54M
}
573
574
575
njs_int_t
576
njs_regex_named_captures(njs_regex_t *regex, njs_str_t *name, int n)
577
339k
{
578
339k
    char  *entry;
579
580
339k
    if (name == NULL) {
581
266k
        return regex->nentries;
582
266k
    }
583
584
73.4k
    if (n >= regex->nentries) {
585
0
        return NJS_ERROR;
586
0
    }
587
588
73.4k
    entry = regex->entries + regex->entry_size * n;
589
590
73.4k
    name->start = (u_char *) entry + 2;
591
73.4k
    name->length = njs_strlen(name->start);
592
593
73.4k
    return (entry[0] << 8) + entry[1];
594
73.4k
}
595
596
597
njs_regex_match_data_t *
598
njs_regex_match_data(njs_regex_t *regex, njs_regex_generic_ctx_t *ctx)
599
892k
{
600
892k
#ifdef NJS_HAVE_PCRE2
601
602
892k
    if (regex != NULL) {
603
878k
        return pcre2_match_data_create_from_pattern(regex->code, ctx);
604
878k
    }
605
606
14.8k
    return pcre2_match_data_create(0, ctx);
607
608
#else
609
610
    size_t                  size;
611
    njs_uint_t              ncaptures;
612
    njs_regex_match_data_t  *match_data;
613
614
    if (regex != NULL) {
615
        ncaptures = regex->ncaptures - 1;
616
617
    } else {
618
        ncaptures = 0;
619
    }
620
621
    /* Each capture is stored in 3 "int" vector elements. */
622
    ncaptures *= 3;
623
    size = sizeof(njs_regex_match_data_t) + ncaptures * sizeof(int);
624
625
    match_data = ctx->private_malloc(size, ctx->memory_data);
626
627
    if (njs_fast_path(match_data != NULL)) {
628
        match_data->ncaptures = ncaptures + 3;
629
    }
630
631
    return match_data;
632
633
#endif
634
892k
}
635
636
637
void
638
njs_regex_match_data_free(njs_regex_match_data_t *match_data,
639
    njs_regex_generic_ctx_t *ctx)
640
878k
{
641
878k
#ifdef NJS_HAVE_PCRE2
642
643
878k
    pcre2_match_data_free(match_data);
644
645
#else
646
647
    ctx->private_free(match_data, ctx->memory_data);
648
649
#endif
650
878k
}
651
652
653
njs_int_t
654
njs_regex_match(njs_regex_t *regex, const u_char *subject, size_t off,
655
    size_t len, njs_regex_match_data_t *match_data, njs_trace_t *trace)
656
902k
{
657
902k
#ifdef NJS_HAVE_PCRE2
658
659
902k
    int     ret;
660
902k
    u_char  errstr[128];
661
662
902k
    ret = pcre2_match(regex->code, subject, len, off, 0, match_data, NULL);
663
664
902k
    if (ret < 0) {
665
621k
        if (ret == PCRE2_ERROR_NOMATCH) {
666
620k
            return NJS_DECLINED;
667
620k
        }
668
669
576
        njs_alert(trace, NJS_LEVEL_ERROR, "pcre2_match() failed: %s",
670
576
                  njs_regex_pcre2_error(ret, errstr));
671
576
        return NJS_ERROR;
672
621k
    }
673
674
281k
    return ret;
675
676
#else
677
678
    int  ret;
679
680
    ret = pcre_exec(regex->code, regex->extra, (const char *) subject, len,
681
                    off, 0, match_data->captures, match_data->ncaptures);
682
683
    if (ret <= PCRE_ERROR_NOMATCH) {
684
        if (ret == PCRE_ERROR_NOMATCH) {
685
            return NJS_DECLINED;
686
        }
687
688
        njs_alert(trace, NJS_LEVEL_ERROR, "pcre_exec() failed: %d", ret);
689
        return NJS_ERROR;
690
    }
691
692
    return ret;
693
694
#endif
695
902k
}
696
697
698
size_t
699
njs_regex_capture(njs_regex_match_data_t *match_data, njs_uint_t n)
700
1.13M
{
701
1.13M
#ifdef NJS_HAVE_PCRE2
702
703
1.13M
    size_t  c;
704
705
1.13M
    c = pcre2_get_ovector_pointer(match_data)[n];
706
707
1.13M
    if (c == PCRE2_UNSET) {
708
18.4k
        return NJS_REGEX_UNSET;
709
18.4k
    }
710
711
1.11M
    return c;
712
713
#else
714
715
    return match_data->captures[n];
716
717
#endif
718
1.13M
}
719
720
#ifdef NJS_HAVE_PCRE2
721
722
static const u_char *
723
njs_regex_pcre2_error(int errcode, u_char buffer[128])
724
164k
{
725
164k
    pcre2_get_error_message(errcode, buffer, 128);
726
727
164k
    return buffer;
728
164k
}
729
730
#else
731
732
static void *
733
njs_pcre_malloc(size_t size)
734
{
735
    return regex_context->private_malloc(size, regex_context->memory_data);
736
}
737
738
739
static void
740
njs_pcre_free(void *p)
741
{
742
    regex_context->private_free(p, regex_context->memory_data);
743
}
744
745
#endif
746
747