/src/hermes/include/hermes/Regex/Executor.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | #ifndef HERMES_REGEX_EXECUTOR_H |
9 | | #define HERMES_REGEX_EXECUTOR_H |
10 | | |
11 | | #include "hermes/Regex/RegexBytecode.h" |
12 | | #include "hermes/Regex/RegexTypes.h" |
13 | | #include "hermes/Support/StackOverflowGuard.h" |
14 | | |
15 | | // This file contains the machinery for executing a regexp compiled to bytecode. |
16 | | |
17 | | namespace hermes { |
18 | | namespace regex { |
19 | | |
20 | | /// The result of trying to find a match. |
21 | | enum class MatchRuntimeResult { |
22 | | /// Match found. |
23 | | Match, |
24 | | |
25 | | /// No match found. |
26 | | NoMatch, |
27 | | |
28 | | /// Stack overflow during match attempt. |
29 | | StackOverflow, |
30 | | }; |
31 | | |
32 | | /// A constant used inside a capture group to indicate that the capture group |
33 | | /// did not match. |
34 | | constexpr uint32_t kNotMatched = UINT32_MAX; |
35 | | |
36 | | /// The maximum number of times we will backtrack. |
37 | | constexpr uint32_t kBacktrackLimit = 1u << 30; |
38 | | |
39 | | /// A CapturedRange represents a range of the input string captured by a capture |
40 | | /// group. A CaptureGroup may also not have matched, in which case its start is |
41 | | /// set to kNotMatched. Note that an unmatched capture group is different than a |
42 | | /// capture group that matched an empty string. |
43 | | struct CapturedRange { |
44 | | /// Index of the first captured character, or kNotMatched if not matched. |
45 | | uint32_t start; |
46 | | |
47 | | /// One past the index of the last captured character. |
48 | | uint32_t end; |
49 | | |
50 | | /// \return whether this range was a successful match. |
51 | 0 | bool matched() const { |
52 | 0 | return start != kNotMatched; |
53 | 0 | } |
54 | | }; |
55 | | |
56 | | /// Given a string \p first with length \p length, look for regex matches |
57 | | /// starting at offset \p start. We must have 0 <= start <= length. |
58 | | /// Search using the compiled regex represented by \p bytecode with the flags \p |
59 | | /// matchFlags. If the search succeeds, populate \p captures with the capture |
60 | | /// groups. |
61 | | /// \param guard is used to implement stack overflow prevention. |
62 | | /// \return true if some portion of the string matched the regex |
63 | | /// represented by the bytecode, false otherwise. This is the char16_t overload. |
64 | | MatchRuntimeResult searchWithBytecode( |
65 | | llvh::ArrayRef<uint8_t> bytecode, |
66 | | const char16_t *first, |
67 | | uint32_t start, |
68 | | uint32_t length, |
69 | | std::vector<CapturedRange> *captures, |
70 | | constants::MatchFlagType matchFlags, |
71 | | StackOverflowGuard guard = |
72 | | #ifdef HERMES_CHECK_NATIVE_STACK |
73 | | StackOverflowGuard::nativeStackGuard( |
74 | | 512 * 1024) // this is a conservative gap that should work in |
75 | | // sanitizer builds |
76 | | #else |
77 | | StackOverflowGuard::depthCounterGuard(128) |
78 | | #endif |
79 | | ); |
80 | | |
81 | | /// This is the ASCII overload. |
82 | | MatchRuntimeResult searchWithBytecode( |
83 | | llvh::ArrayRef<uint8_t> bytecode, |
84 | | const char *first, |
85 | | uint32_t start, |
86 | | uint32_t length, |
87 | | std::vector<CapturedRange> *captures, |
88 | | constants::MatchFlagType matchFlags, |
89 | | StackOverflowGuard guard = |
90 | | #ifdef HERMES_CHECK_NATIVE_STACK |
91 | | StackOverflowGuard::nativeStackGuard( |
92 | | 512 * 1024) // this is a conservative gap that should work in |
93 | | // sanitizer builds |
94 | | #else |
95 | | StackOverflowGuard::depthCounterGuard(128) |
96 | | #endif |
97 | | ); |
98 | | |
99 | | } // namespace regex |
100 | | } // namespace hermes |
101 | | |
102 | | #endif |