/src/hermes/include/hermes/Regex/Executor.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | #ifndef HERMES_REGEX_EXECUTOR_H |
9 | | #define HERMES_REGEX_EXECUTOR_H |
10 | | |
11 | | #include "hermes/Regex/RegexBytecode.h" |
12 | | #include "hermes/Regex/RegexTypes.h" |
13 | | |
14 | | // This file contains the machinery for executing a regexp compiled to bytecode. |
15 | | |
16 | | namespace hermes { |
17 | | namespace regex { |
18 | | |
19 | | /// The result of trying to find a match. |
20 | | enum class MatchRuntimeResult { |
21 | | /// Match found. |
22 | | Match, |
23 | | |
24 | | /// No match found. |
25 | | NoMatch, |
26 | | |
27 | | /// Stack overflow during match attempt. |
28 | | StackOverflow, |
29 | | }; |
30 | | |
31 | | /// A constant used inside a capture group to indicate that the capture group |
32 | | /// did not match. |
33 | | constexpr uint32_t kNotMatched = UINT32_MAX; |
34 | | |
35 | | /// The maximum number of times we will backtrack. |
36 | | constexpr uint32_t kBacktrackLimit = 1u << 30; |
37 | | |
38 | | /// A CapturedRange represents a range of the input string captured by a capture |
39 | | /// group. A CaptureGroup may also not have matched, in which case its start is |
40 | | /// set to kNotMatched. Note that an unmatched capture group is different than a |
41 | | /// capture group that matched an empty string. |
42 | | struct CapturedRange { |
43 | | /// Index of the first captured character, or kNotMatched if not matched. |
44 | | uint32_t start; |
45 | | |
46 | | /// One past the index of the last captured character. |
47 | | uint32_t end; |
48 | | |
49 | | /// \return whether this range was a successful match. |
50 | 404k | bool matched() const { |
51 | 404k | return start != kNotMatched; |
52 | 404k | } |
53 | | }; |
54 | | |
55 | | /// Given a string \p first with length \p length, look for regex matches |
56 | | /// starting at offset \p start. We must have 0 <= start <= length. |
57 | | /// Search using the compiled regex represented by \p bytecode with the flags \p |
58 | | /// matchFlags. If the search succeeds, populate \p captures with the capture |
59 | | /// groups. |
60 | | /// \return true if some portion of the string matched the regex represented by |
61 | | /// the bytecode, false otherwise. |
62 | | /// This is the char16_t overload. |
63 | | MatchRuntimeResult searchWithBytecode( |
64 | | llvh::ArrayRef<uint8_t> bytecode, |
65 | | const char16_t *first, |
66 | | uint32_t start, |
67 | | uint32_t length, |
68 | | std::vector<CapturedRange> *captures, |
69 | | constants::MatchFlagType matchFlags); |
70 | | |
71 | | /// This is the ASCII overload. |
72 | | MatchRuntimeResult searchWithBytecode( |
73 | | llvh::ArrayRef<uint8_t> bytecode, |
74 | | const char *first, |
75 | | uint32_t start, |
76 | | uint32_t length, |
77 | | std::vector<CapturedRange> *captures, |
78 | | constants::MatchFlagType matchFlags); |
79 | | |
80 | | } // namespace regex |
81 | | } // namespace hermes |
82 | | |
83 | | #endif |