/src/njs/external/njs_regex.c
Line | Count | Source (jump to first uncovered line) |
1 | | |
2 | | /* |
3 | | * Copyright (C) Igor Sysoev |
4 | | * Copyright (C) Dmitry Volyntsev |
5 | | * Copyright (C) NGINX, Inc. |
6 | | */ |
7 | | |
8 | | |
9 | | #include <njs_main.h> |
10 | | |
11 | | #ifdef NJS_HAVE_PCRE2 |
12 | | |
13 | | #define PCRE2_CODE_UNIT_WIDTH 8 |
14 | | #include <pcre2.h> |
15 | | |
16 | | |
17 | | static const u_char* njs_regex_pcre2_error(int errcode, u_char buffer[128]); |
18 | | |
19 | | #else |
20 | | |
21 | | #include <pcre.h> |
22 | | |
23 | | |
24 | | static void *njs_pcre_malloc(size_t size); |
25 | | static void njs_pcre_free(void *p); |
26 | | |
27 | | |
28 | | static njs_regex_generic_ctx_t *regex_context; |
29 | | |
30 | | #endif |
31 | | |
32 | | |
33 | | njs_regex_generic_ctx_t * |
34 | | njs_regex_generic_ctx_create(njs_pcre_malloc_t private_malloc, |
35 | | njs_pcre_free_t private_free, void *memory_data) |
36 | 14.8k | { |
37 | 14.8k | #ifdef NJS_HAVE_PCRE2 |
38 | | |
39 | 14.8k | return pcre2_general_context_create(private_malloc, private_free, |
40 | 14.8k | memory_data); |
41 | | #else |
42 | | |
43 | | njs_regex_generic_ctx_t *ctx; |
44 | | |
45 | | ctx = private_malloc(sizeof(njs_regex_generic_ctx_t), memory_data); |
46 | | |
47 | | if (njs_fast_path(ctx != NULL)) { |
48 | | ctx->private_malloc = private_malloc; |
49 | | ctx->private_free = private_free; |
50 | | ctx->memory_data = memory_data; |
51 | | } |
52 | | |
53 | | return ctx; |
54 | | |
55 | | #endif |
56 | 14.8k | } |
57 | | |
58 | | |
59 | | njs_regex_compile_ctx_t * |
60 | | njs_regex_compile_ctx_create(njs_regex_generic_ctx_t *ctx) |
61 | 14.8k | { |
62 | 14.8k | #ifdef NJS_HAVE_PCRE2 |
63 | 14.8k | pcre2_compile_context *cc; |
64 | | |
65 | 14.8k | cc = pcre2_compile_context_create(ctx); |
66 | | |
67 | 14.8k | #ifdef PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES |
68 | 14.8k | if (njs_fast_path(cc != NULL)) { |
69 | | /* Workaround for surrogate pairs in regular expressions |
70 | | * |
71 | | * This option is needed because njs, unlike the standard ECMAScript, |
72 | | * stores and processes strings in UTF-8 encoding. |
73 | | * PCRE2 does not support surrogate pairs by default when it |
74 | | * is compiled for UTF-8 only strings. But many polyfills |
75 | | * and transpilers use such surrogate pairs expressions. |
76 | | */ |
77 | 14.8k | pcre2_set_compile_extra_options(cc, |
78 | 14.8k | PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES); |
79 | 14.8k | } |
80 | 14.8k | #endif |
81 | | |
82 | 14.8k | return cc; |
83 | | |
84 | | #else |
85 | | |
86 | | return ctx; |
87 | | |
88 | | #endif |
89 | 14.8k | } |
90 | | |
91 | | |
92 | | |
93 | | njs_int_t |
94 | | njs_regex_escape(njs_mp_t *mp, njs_str_t *text) |
95 | 328k | { |
96 | 328k | #ifdef NJS_HAVE_PCRE2 |
97 | 328k | size_t anychars, nomatches; |
98 | 328k | u_char *p, *dst, *start, *end; |
99 | | |
100 | | /* |
101 | | * 1) [^] is a valid regexp expression in JavaScript, but PCRE2 |
102 | | * rejects it as invalid, replacing it with equivalent PCRE2 [\s\S] |
103 | | * expression. |
104 | | * 2) [] is a valid regexp expression in JavaScript, but PCRE2 |
105 | | * rejects it as invalid, replacing it with equivalent PCRE2 (?!) |
106 | | * expression which matches nothing. |
107 | | */ |
108 | | |
109 | 328k | start = text->start; |
110 | 328k | end = text->start + text->length; |
111 | | |
112 | 328k | anychars = 0; |
113 | 328k | nomatches = 0; |
114 | | |
115 | 47.7M | for (p = start; p < end; p++) { |
116 | 47.4M | switch (*p) { |
117 | 234k | case '\\': |
118 | 234k | p += 1; |
119 | | |
120 | 234k | break; |
121 | | |
122 | 197k | case '[': |
123 | 197k | if (p + 1 < end && p[1] == ']') { |
124 | 31.5k | p += 1; |
125 | 31.5k | nomatches += 1; |
126 | | |
127 | 166k | } else if (p + 2 < end && p[1] == '^' && p[2] == ']') { |
128 | 2.08k | p += 2; |
129 | 2.08k | anychars += 1; |
130 | | |
131 | 164k | } else { |
132 | 5.99M | while (p < end && *p != ']') { |
133 | 5.83M | p += 1; |
134 | 5.83M | } |
135 | 164k | } |
136 | | |
137 | 197k | break; |
138 | 47.4M | } |
139 | 47.4M | } |
140 | | |
141 | 328k | if (!anychars && !nomatches) { |
142 | 315k | return NJS_OK; |
143 | 315k | } |
144 | | |
145 | 12.4k | text->length = text->length |
146 | 12.4k | + anychars * (njs_length("\\s\\S") - njs_length("^")) |
147 | 12.4k | + nomatches * (njs_length("?!")); |
148 | | |
149 | 12.4k | text->start = njs_mp_alloc(mp, text->length); |
150 | 12.4k | if (njs_slow_path(text->start == NULL)) { |
151 | 0 | return NJS_ERROR; |
152 | 0 | } |
153 | | |
154 | 12.4k | dst = text->start; |
155 | | |
156 | 8.79M | for (p = start; p < end; p++) { |
157 | | |
158 | 8.78M | switch (*p) { |
159 | 17.6k | case '\\': |
160 | 17.6k | *dst++ = *p; |
161 | 17.6k | if (p + 1 < end) { |
162 | 17.5k | p += 1; |
163 | 17.5k | *dst++ = *p; |
164 | 17.5k | } |
165 | | |
166 | 17.6k | continue; |
167 | | |
168 | 51.8k | case '[': |
169 | 51.8k | if (p + 1 < end && p[1] == ']') { |
170 | 31.5k | p += 1; |
171 | 31.5k | dst = njs_cpymem(dst, "(?!)", 4); |
172 | 31.5k | continue; |
173 | | |
174 | 31.5k | } else if (p + 2 < end && p[1] == '^' && p[2] == ']') { |
175 | 2.08k | p += 2; |
176 | 2.08k | dst = njs_cpymem(dst, "[\\s\\S]", 6); |
177 | 2.08k | continue; |
178 | | |
179 | 18.2k | } else { |
180 | 18.2k | *dst++ = *p++; /* Copy '['. */ |
181 | | |
182 | 575k | while (p < end && *p != ']') { |
183 | 557k | *dst++ = *p++; |
184 | 557k | } |
185 | | |
186 | 18.2k | if (p < end) { |
187 | 16.9k | *dst++ = *p; /* Copy ']'. */ |
188 | 16.9k | } |
189 | | |
190 | 18.2k | continue; |
191 | 18.2k | } |
192 | 8.78M | } |
193 | | |
194 | 8.71M | *dst++ = *p; |
195 | 8.71M | } |
196 | | |
197 | 12.4k | njs_assert(dst == text->start + text->length); |
198 | | |
199 | 12.4k | return NJS_OK; |
200 | | |
201 | | #else |
202 | | |
203 | | /* |
204 | | * 1) PCRE with PCRE_JAVASCRIPT_COMPAT flag rejects regexps with |
205 | | * lone closing square brackets as invalid. Whereas according |
206 | | * to ES6: 11.8.5 it is a valid regexp expression. |
207 | | * |
208 | | * 2) escaping zero byte characters as "\u0000". |
209 | | * |
210 | | * Escaping it here as a workaround. |
211 | | */ |
212 | | |
213 | | size_t brackets, zeros; |
214 | | u_char *p, *dst, *start, *end; |
215 | | njs_bool_t in; |
216 | | |
217 | | start = text->start; |
218 | | end = text->start + text->length; |
219 | | |
220 | | in = 0; |
221 | | zeros = 0; |
222 | | brackets = 0; |
223 | | |
224 | | for (p = start; p < end; p++) { |
225 | | |
226 | | switch (*p) { |
227 | | case '[': |
228 | | in = 1; |
229 | | break; |
230 | | |
231 | | case ']': |
232 | | if (!in) { |
233 | | brackets++; |
234 | | } |
235 | | |
236 | | in = 0; |
237 | | break; |
238 | | |
239 | | case '\\': |
240 | | p++; |
241 | | |
242 | | if (p == end || *p != '\0') { |
243 | | break; |
244 | | } |
245 | | |
246 | | /* Fall through. */ |
247 | | |
248 | | case '\0': |
249 | | zeros++; |
250 | | break; |
251 | | } |
252 | | } |
253 | | |
254 | | if (!brackets && !zeros) { |
255 | | return NJS_OK; |
256 | | } |
257 | | |
258 | | text->length = text->length + brackets + zeros * njs_length("\\u0000"); |
259 | | |
260 | | text->start = njs_mp_alloc(mp, text->length); |
261 | | if (njs_slow_path(text->start == NULL)) { |
262 | | return NJS_ERROR; |
263 | | } |
264 | | |
265 | | in = 0; |
266 | | dst = text->start; |
267 | | |
268 | | for (p = start; p < end; p++) { |
269 | | |
270 | | switch (*p) { |
271 | | case '[': |
272 | | in = 1; |
273 | | break; |
274 | | |
275 | | case ']': |
276 | | if (!in) { |
277 | | *dst++ = '\\'; |
278 | | } |
279 | | |
280 | | in = 0; |
281 | | break; |
282 | | |
283 | | case '\\': |
284 | | *dst++ = *p++; |
285 | | |
286 | | if (p == end) { |
287 | | goto done; |
288 | | } |
289 | | |
290 | | if (*p != '\0') { |
291 | | break; |
292 | | } |
293 | | |
294 | | /* Fall through. */ |
295 | | |
296 | | case '\0': |
297 | | dst = njs_cpymem(dst, "\\u0000", 6); |
298 | | continue; |
299 | | } |
300 | | |
301 | | *dst++ = *p; |
302 | | } |
303 | | |
304 | | done: |
305 | | |
306 | | text->length = dst - text->start; |
307 | | |
308 | | return NJS_OK; |
309 | | |
310 | | #endif |
311 | 12.4k | } |
312 | | |
313 | | |
314 | | njs_int_t |
315 | | njs_regex_compile(njs_regex_t *regex, u_char *source, size_t len, |
316 | | njs_regex_flags_t flags, njs_regex_compile_ctx_t *cctx, njs_trace_t *trace) |
317 | 656k | { |
318 | 656k | #ifdef NJS_HAVE_PCRE2 |
319 | | |
320 | 656k | int ret; |
321 | 656k | u_char *error; |
322 | 656k | size_t erroff; |
323 | 656k | njs_uint_t options; |
324 | 656k | u_char errstr[128]; |
325 | | |
326 | 656k | options = PCRE2_ALT_BSUX | PCRE2_MATCH_UNSET_BACKREF; |
327 | | |
328 | 656k | if ((flags & NJS_REGEX_IGNORE_CASE)) { |
329 | 50.2k | options |= PCRE2_CASELESS; |
330 | 50.2k | } |
331 | | |
332 | 656k | if ((flags & NJS_REGEX_MULTILINE)) { |
333 | 20.1k | options |= PCRE2_MULTILINE; |
334 | 20.1k | } |
335 | | |
336 | 656k | if ((flags & NJS_REGEX_STICKY)) { |
337 | 62.8k | options |= PCRE2_ANCHORED; |
338 | 62.8k | } |
339 | | |
340 | 656k | if ((flags & NJS_REGEX_UTF8)) { |
341 | 328k | options |= PCRE2_UTF; |
342 | 328k | } |
343 | | |
344 | 656k | regex->code = pcre2_compile(source, len, options, &ret, &erroff, cctx); |
345 | | |
346 | 656k | if (njs_slow_path(regex->code == NULL)) { |
347 | 163k | error = &source[erroff]; |
348 | | |
349 | 163k | njs_alert(trace, NJS_LEVEL_ERROR, |
350 | 163k | "pcre_compile2(\"%s\") failed: %s at \"%s\"", |
351 | 163k | source, njs_regex_pcre2_error(ret, errstr), error); |
352 | | |
353 | 163k | return NJS_DECLINED; |
354 | 163k | } |
355 | | |
356 | 492k | ret = pcre2_pattern_info(regex->code, PCRE2_INFO_CAPTURECOUNT, |
357 | 492k | ®ex->ncaptures); |
358 | | |
359 | 492k | if (njs_slow_path(ret < 0)) { |
360 | 0 | njs_alert(trace, NJS_LEVEL_ERROR, |
361 | 0 | "pcre2_pattern_info(\"%s\", PCRE2_INFO_CAPTURECOUNT) failed: %s", |
362 | 0 | source, njs_regex_pcre2_error(ret, errstr)); |
363 | |
|
364 | 0 | return NJS_ERROR; |
365 | 0 | } |
366 | | |
367 | 492k | ret = pcre2_pattern_info(regex->code, PCRE2_INFO_BACKREFMAX, |
368 | 492k | ®ex->backrefmax); |
369 | | |
370 | 492k | if (njs_slow_path(ret < 0)) { |
371 | 0 | njs_alert(trace, NJS_LEVEL_ERROR, |
372 | 0 | "pcre2_pattern_info(\"%s\", PCRE2_INFO_BACKREFMAX) failed: %s", |
373 | 0 | source, njs_regex_pcre2_error(ret, errstr)); |
374 | |
|
375 | 0 | return NJS_ERROR; |
376 | 0 | } |
377 | | |
378 | | /* Reserve additional elements for the first "$0" capture. */ |
379 | 492k | regex->ncaptures++; |
380 | | |
381 | 492k | if (regex->ncaptures > 1) { |
382 | 101k | ret = pcre2_pattern_info(regex->code, PCRE2_INFO_NAMECOUNT, |
383 | 101k | ®ex->nentries); |
384 | | |
385 | 101k | if (njs_slow_path(ret < 0)) { |
386 | 0 | njs_alert(trace, NJS_LEVEL_ERROR, |
387 | 0 | "pcre2_pattern_info(\"%s\", PCRE2_INFO_NAMECOUNT) failed: %s", |
388 | 0 | source, njs_regex_pcre2_error(ret, errstr)); |
389 | |
|
390 | 0 | return NJS_ERROR; |
391 | 0 | } |
392 | | |
393 | 101k | if (regex->nentries != 0) { |
394 | 34.1k | ret = pcre2_pattern_info(regex->code, PCRE2_INFO_NAMEENTRYSIZE, |
395 | 34.1k | ®ex->entry_size); |
396 | | |
397 | 34.1k | if (njs_slow_path(ret < 0)) { |
398 | 0 | njs_alert(trace, NJS_LEVEL_ERROR, |
399 | 0 | "pcre2_pattern_info(\"%s\", PCRE2_INFO_NAMEENTRYSIZE)" |
400 | 0 | " failed: %s", source, |
401 | 0 | njs_regex_pcre2_error(ret, errstr)); |
402 | |
|
403 | 0 | return NJS_ERROR; |
404 | 0 | } |
405 | | |
406 | 34.1k | ret = pcre2_pattern_info(regex->code, PCRE2_INFO_NAMETABLE, |
407 | 34.1k | ®ex->entries); |
408 | | |
409 | 34.1k | if (njs_slow_path(ret < 0)) { |
410 | 0 | njs_alert(trace, NJS_LEVEL_ERROR, |
411 | 0 | "pcre2_pattern_info(\"%s\", PCRE2_INFO_NAMETABLE) " |
412 | 0 | "failed: %s", source, |
413 | 0 | njs_regex_pcre2_error(ret, errstr)); |
414 | |
|
415 | 0 | return NJS_ERROR; |
416 | 0 | } |
417 | 34.1k | } |
418 | 101k | } |
419 | | |
420 | 492k | return NJS_OK; |
421 | | |
422 | | #else |
423 | | |
424 | | int ret, err, erroff; |
425 | | char *pattern, *error; |
426 | | void *(*saved_malloc)(size_t size); |
427 | | void (*saved_free)(void *p); |
428 | | njs_uint_t options; |
429 | | const char *errstr; |
430 | | njs_regex_generic_ctx_t *ctx; |
431 | | |
432 | | ctx = cctx; |
433 | | |
434 | | ret = NJS_ERROR; |
435 | | |
436 | | saved_malloc = pcre_malloc; |
437 | | pcre_malloc = njs_pcre_malloc; |
438 | | saved_free = pcre_free; |
439 | | pcre_free = njs_pcre_free; |
440 | | regex_context = ctx; |
441 | | |
442 | | #ifdef PCRE_JAVASCRIPT_COMPAT |
443 | | /* JavaScript compatibility has been introduced in PCRE-7.7. */ |
444 | | options = PCRE_JAVASCRIPT_COMPAT; |
445 | | #else |
446 | | options = 0; |
447 | | #endif |
448 | | |
449 | | if ((flags & NJS_REGEX_IGNORE_CASE)) { |
450 | | options |= PCRE_CASELESS; |
451 | | } |
452 | | |
453 | | if ((flags & NJS_REGEX_MULTILINE)) { |
454 | | options |= PCRE_MULTILINE; |
455 | | } |
456 | | |
457 | | if ((flags & NJS_REGEX_STICKY)) { |
458 | | options |= PCRE_ANCHORED; |
459 | | } |
460 | | |
461 | | if ((flags & NJS_REGEX_UTF8)) { |
462 | | options |= PCRE_UTF8; |
463 | | } |
464 | | |
465 | | pattern = (char *) source; |
466 | | |
467 | | regex->code = pcre_compile(pattern, options, &errstr, &erroff, NULL); |
468 | | |
469 | | if (njs_slow_path(regex->code == NULL)) { |
470 | | error = pattern + erroff; |
471 | | |
472 | | if (*error != '\0') { |
473 | | njs_alert(trace, NJS_LEVEL_ERROR, |
474 | | "pcre_compile(\"%s\") failed: %s at \"%s\"", |
475 | | pattern, errstr, error); |
476 | | |
477 | | } else { |
478 | | njs_alert(trace, NJS_LEVEL_ERROR, |
479 | | "pcre_compile(\"%s\") failed: %s", pattern, errstr); |
480 | | } |
481 | | |
482 | | ret = NJS_DECLINED; |
483 | | |
484 | | goto done; |
485 | | } |
486 | | |
487 | | regex->extra = pcre_study(regex->code, 0, &errstr); |
488 | | |
489 | | if (njs_slow_path(errstr != NULL)) { |
490 | | njs_alert(trace, NJS_LEVEL_WARN, |
491 | | "pcre_study(\"%s\") failed: %s", pattern, errstr); |
492 | | } |
493 | | |
494 | | err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_CAPTURECOUNT, |
495 | | ®ex->ncaptures); |
496 | | |
497 | | if (njs_slow_path(err < 0)) { |
498 | | njs_alert(trace, NJS_LEVEL_ERROR, |
499 | | "pcre_fullinfo(\"%s\", PCRE_INFO_CAPTURECOUNT) failed: %d", |
500 | | pattern, err); |
501 | | |
502 | | goto done; |
503 | | } |
504 | | |
505 | | err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_BACKREFMAX, |
506 | | ®ex->backrefmax); |
507 | | |
508 | | if (njs_slow_path(err < 0)) { |
509 | | njs_alert(trace, NJS_LEVEL_ERROR, |
510 | | "pcre_fullinfo(\"%s\", PCRE_INFO_BACKREFMAX) failed: %d", |
511 | | pattern, err); |
512 | | |
513 | | goto done; |
514 | | } |
515 | | |
516 | | /* Reserve additional elements for the first "$0" capture. */ |
517 | | regex->ncaptures++; |
518 | | |
519 | | if (regex->ncaptures > 1) { |
520 | | err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMECOUNT, |
521 | | ®ex->nentries); |
522 | | |
523 | | if (njs_slow_path(err < 0)) { |
524 | | njs_alert(trace, NJS_LEVEL_ERROR, |
525 | | "pcre_fullinfo(\"%s\", PCRE_INFO_NAMECOUNT) failed: %d", |
526 | | pattern, err); |
527 | | |
528 | | goto done; |
529 | | } |
530 | | |
531 | | if (regex->nentries != 0) { |
532 | | err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMEENTRYSIZE, |
533 | | ®ex->entry_size); |
534 | | |
535 | | if (njs_slow_path(err < 0)) { |
536 | | njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", " |
537 | | "PCRE_INFO_NAMEENTRYSIZE) failed: %d", pattern, err); |
538 | | |
539 | | goto done; |
540 | | } |
541 | | |
542 | | err = pcre_fullinfo(regex->code, NULL, PCRE_INFO_NAMETABLE, |
543 | | ®ex->entries); |
544 | | |
545 | | if (njs_slow_path(err < 0)) { |
546 | | njs_alert(trace, NJS_LEVEL_ERROR, "pcre_fullinfo(\"%s\", " |
547 | | "PCRE_INFO_NAMETABLE) failed: %d", pattern, err); |
548 | | |
549 | | goto done; |
550 | | } |
551 | | } |
552 | | } |
553 | | |
554 | | ret = NJS_OK; |
555 | | |
556 | | done: |
557 | | |
558 | | pcre_malloc = saved_malloc; |
559 | | pcre_free = saved_free; |
560 | | regex_context = NULL; |
561 | | |
562 | | return ret; |
563 | | |
564 | | #endif |
565 | 492k | } |
566 | | |
567 | | |
568 | | njs_bool_t |
569 | | njs_regex_is_valid(njs_regex_t *regex) |
570 | 1.54M | { |
571 | 1.54M | return (regex->code != NULL); |
572 | 1.54M | } |
573 | | |
574 | | |
575 | | njs_int_t |
576 | | njs_regex_named_captures(njs_regex_t *regex, njs_str_t *name, int n) |
577 | 339k | { |
578 | 339k | char *entry; |
579 | | |
580 | 339k | if (name == NULL) { |
581 | 266k | return regex->nentries; |
582 | 266k | } |
583 | | |
584 | 73.4k | if (n >= regex->nentries) { |
585 | 0 | return NJS_ERROR; |
586 | 0 | } |
587 | | |
588 | 73.4k | entry = regex->entries + regex->entry_size * n; |
589 | | |
590 | 73.4k | name->start = (u_char *) entry + 2; |
591 | 73.4k | name->length = njs_strlen(name->start); |
592 | | |
593 | 73.4k | return (entry[0] << 8) + entry[1]; |
594 | 73.4k | } |
595 | | |
596 | | |
597 | | njs_regex_match_data_t * |
598 | | njs_regex_match_data(njs_regex_t *regex, njs_regex_generic_ctx_t *ctx) |
599 | 892k | { |
600 | 892k | #ifdef NJS_HAVE_PCRE2 |
601 | | |
602 | 892k | if (regex != NULL) { |
603 | 878k | return pcre2_match_data_create_from_pattern(regex->code, ctx); |
604 | 878k | } |
605 | | |
606 | 14.8k | return pcre2_match_data_create(0, ctx); |
607 | | |
608 | | #else |
609 | | |
610 | | size_t size; |
611 | | njs_uint_t ncaptures; |
612 | | njs_regex_match_data_t *match_data; |
613 | | |
614 | | if (regex != NULL) { |
615 | | ncaptures = regex->ncaptures - 1; |
616 | | |
617 | | } else { |
618 | | ncaptures = 0; |
619 | | } |
620 | | |
621 | | /* Each capture is stored in 3 "int" vector elements. */ |
622 | | ncaptures *= 3; |
623 | | size = sizeof(njs_regex_match_data_t) + ncaptures * sizeof(int); |
624 | | |
625 | | match_data = ctx->private_malloc(size, ctx->memory_data); |
626 | | |
627 | | if (njs_fast_path(match_data != NULL)) { |
628 | | match_data->ncaptures = ncaptures + 3; |
629 | | } |
630 | | |
631 | | return match_data; |
632 | | |
633 | | #endif |
634 | 892k | } |
635 | | |
636 | | |
637 | | void |
638 | | njs_regex_match_data_free(njs_regex_match_data_t *match_data, |
639 | | njs_regex_generic_ctx_t *ctx) |
640 | 878k | { |
641 | 878k | #ifdef NJS_HAVE_PCRE2 |
642 | | |
643 | 878k | pcre2_match_data_free(match_data); |
644 | | |
645 | | #else |
646 | | |
647 | | ctx->private_free(match_data, ctx->memory_data); |
648 | | |
649 | | #endif |
650 | 878k | } |
651 | | |
652 | | |
653 | | njs_int_t |
654 | | njs_regex_match(njs_regex_t *regex, const u_char *subject, size_t off, |
655 | | size_t len, njs_regex_match_data_t *match_data, njs_trace_t *trace) |
656 | 902k | { |
657 | 902k | #ifdef NJS_HAVE_PCRE2 |
658 | | |
659 | 902k | int ret; |
660 | 902k | u_char errstr[128]; |
661 | | |
662 | 902k | ret = pcre2_match(regex->code, subject, len, off, 0, match_data, NULL); |
663 | | |
664 | 902k | if (ret < 0) { |
665 | 621k | if (ret == PCRE2_ERROR_NOMATCH) { |
666 | 620k | return NJS_DECLINED; |
667 | 620k | } |
668 | | |
669 | 576 | njs_alert(trace, NJS_LEVEL_ERROR, "pcre2_match() failed: %s", |
670 | 576 | njs_regex_pcre2_error(ret, errstr)); |
671 | 576 | return NJS_ERROR; |
672 | 621k | } |
673 | | |
674 | 281k | return ret; |
675 | | |
676 | | #else |
677 | | |
678 | | int ret; |
679 | | |
680 | | ret = pcre_exec(regex->code, regex->extra, (const char *) subject, len, |
681 | | off, 0, match_data->captures, match_data->ncaptures); |
682 | | |
683 | | if (ret <= PCRE_ERROR_NOMATCH) { |
684 | | if (ret == PCRE_ERROR_NOMATCH) { |
685 | | return NJS_DECLINED; |
686 | | } |
687 | | |
688 | | njs_alert(trace, NJS_LEVEL_ERROR, "pcre_exec() failed: %d", ret); |
689 | | return NJS_ERROR; |
690 | | } |
691 | | |
692 | | return ret; |
693 | | |
694 | | #endif |
695 | 902k | } |
696 | | |
697 | | |
698 | | size_t |
699 | | njs_regex_capture(njs_regex_match_data_t *match_data, njs_uint_t n) |
700 | 1.13M | { |
701 | 1.13M | #ifdef NJS_HAVE_PCRE2 |
702 | | |
703 | 1.13M | size_t c; |
704 | | |
705 | 1.13M | c = pcre2_get_ovector_pointer(match_data)[n]; |
706 | | |
707 | 1.13M | if (c == PCRE2_UNSET) { |
708 | 18.4k | return NJS_REGEX_UNSET; |
709 | 18.4k | } |
710 | | |
711 | 1.11M | return c; |
712 | | |
713 | | #else |
714 | | |
715 | | return match_data->captures[n]; |
716 | | |
717 | | #endif |
718 | 1.13M | } |
719 | | |
720 | | #ifdef NJS_HAVE_PCRE2 |
721 | | |
722 | | static const u_char * |
723 | | njs_regex_pcre2_error(int errcode, u_char buffer[128]) |
724 | 164k | { |
725 | 164k | pcre2_get_error_message(errcode, buffer, 128); |
726 | | |
727 | 164k | return buffer; |
728 | 164k | } |
729 | | |
730 | | #else |
731 | | |
732 | | static void * |
733 | | njs_pcre_malloc(size_t size) |
734 | | { |
735 | | return regex_context->private_malloc(size, regex_context->memory_data); |
736 | | } |
737 | | |
738 | | |
739 | | static void |
740 | | njs_pcre_free(void *p) |
741 | | { |
742 | | regex_context->private_free(p, regex_context->memory_data); |
743 | | } |
744 | | |
745 | | #endif |
746 | | |
747 | | |