/src/hermes/include/hermes/Regex/RegexBytecode.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | #ifndef HERMES_REGEX_REGEXBYTECODE_H |
9 | | #define HERMES_REGEX_REGEXBYTECODE_H |
10 | | |
11 | | #include "llvh/ADT/DenseMap.h" |
12 | | #include "llvh/Support/Casting.h" |
13 | | |
14 | | #include <cstdint> |
15 | | #include <vector> |
16 | | #pragma GCC diagnostic push |
17 | | |
18 | | #ifdef HERMES_COMPILER_SUPPORTS_WSHORTEN_64_TO_32 |
19 | | #pragma GCC diagnostic ignored "-Wshorten-64-to-32" |
20 | | #endif |
21 | | namespace hermes { |
22 | | namespace regex { |
23 | | |
24 | | /// Define the enum class of regex opcodes. |
25 | | #define REOP(code) code, |
26 | | enum class Opcode : uint8_t { |
27 | | #include "hermes/Regex/RegexOpcodes.def" |
28 | | }; |
29 | | |
30 | | /// Type representing a jump location, as a 32 bit value. |
31 | | using JumpTarget32 = uint32_t; |
32 | | |
33 | | /// Type representing a set of MatchConstraint flags as a bitmask. |
34 | | using MatchConstraintSet = uint8_t; |
35 | | |
36 | | /// The list of Instructions corresponding to our Opcodes. |
37 | | /// Our instructions are packed with byte alignment, beacuse they need ton be |
38 | | /// serializable directly. |
39 | | LLVM_PACKED_START |
40 | | |
41 | | /// Base instruction type. All instructions derive from this. Note that Insn and |
42 | | /// its subclasses support LLVM RTTI (isa, cast, dyn_cast, etc). |
43 | | struct Insn { |
44 | | Opcode opcode; |
45 | | }; |
46 | | |
47 | | struct GoalInsn : public Insn {}; |
48 | | struct LeftAnchorInsn : public Insn {}; |
49 | | struct RightAnchorInsn : public Insn {}; |
50 | | struct MatchAnyInsn : public Insn {}; |
51 | | struct U16MatchAnyInsn : public Insn {}; |
52 | | struct MatchAnyButNewlineInsn : public Insn {}; |
53 | | struct U16MatchAnyButNewlineInsn : public Insn {}; |
54 | | struct MatchChar8Insn : public Insn { |
55 | | char c; |
56 | | }; |
57 | | |
58 | | // Matches a 16 bit character without attempting to interpret surrogate pairs. |
59 | | struct MatchChar16Insn : public Insn { |
60 | | char16_t c; |
61 | | }; |
62 | | |
63 | | // Matches a code point, decoding a surrogate pair if necessary. |
64 | | struct U16MatchChar32Insn : public Insn { |
65 | | uint32_t c; |
66 | | }; |
67 | | |
68 | | // Instructions for case-insensitive matching. c is already case-folded. |
69 | | struct MatchCharICase8Insn : public Insn { |
70 | | char c; |
71 | | }; |
72 | | |
73 | | // Matches a 16 bit character without attempting to interpret surrogate pairs. |
74 | | struct MatchCharICase16Insn : public Insn { |
75 | | char16_t c; |
76 | | }; |
77 | | |
78 | | // Matches a code point (case insensitive), decoding a surrogate pair if |
79 | | // necessary. |
80 | | struct U16MatchCharICase32Insn : public Insn { |
81 | | uint32_t c; |
82 | | }; |
83 | | |
84 | | struct AlternationInsn : public Insn { |
85 | | /// The primary branch is the Insn following the alternation, while the |
86 | | /// secondary branch is at the secondaryBranch jump target. Both branches have |
87 | | /// constraints which determine whether they are viable. |
88 | | JumpTarget32 secondaryBranch; |
89 | | MatchConstraintSet primaryConstraints; |
90 | | MatchConstraintSet secondaryConstraints; |
91 | | }; |
92 | | struct Jump32Insn : public Insn { |
93 | | JumpTarget32 target; |
94 | | }; |
95 | | |
96 | | struct BackRefInsn : public Insn { |
97 | | uint16_t mexp; |
98 | | }; |
99 | | |
100 | | /// A BracketRange represents an inclusive range of characters in a bracket, |
101 | | /// such as /[a-z]/. Singletons like /[a]/ are represented as the range a-a. |
102 | | struct BracketRange32 { |
103 | | uint32_t start; |
104 | | uint32_t end; |
105 | | }; |
106 | | |
107 | | /// BracketInsn is a variable-width instruction. Each BracketInsn is followed by |
108 | | /// a sequence of BracketRange32 in the bytecode stream. |
109 | | struct BracketInsn : public Insn { |
110 | | /// Number of BracketRange32s following this instruction. |
111 | | uint32_t rangeCount; |
112 | | /// Whether the bracket is negated (leading ^). |
113 | | uint8_t negate : 1; |
114 | | |
115 | | /// A bitmask containing the three positive character classes \d \s \w, and a |
116 | | /// negative companion for their inverts \D \S \W. See CharacterClass::Type |
117 | | /// for the flag values. |
118 | | uint8_t positiveCharClasses : 3; |
119 | | uint8_t negativeCharClasses : 3; |
120 | | |
121 | | /// \return the width of this instruction plus its bracket ranges. |
122 | 0 | uint32_t totalWidth() const { |
123 | 0 | return sizeof(*this) + rangeCount * sizeof(BracketRange32); |
124 | 0 | } |
125 | | }; |
126 | | |
127 | | /// U16BracketInsn is a variant of BracketInsn used in Unicode regular |
128 | | /// expressions. It differs in that surrogate characters are decoded. |
129 | | struct U16BracketInsn : public BracketInsn {}; |
130 | | |
131 | | struct MatchNChar8Insn : public Insn { |
132 | | // number of 8-byte char following this instruction. |
133 | | uint8_t charCount; |
134 | | |
135 | | /// \return the width of this instruction plus its characters. |
136 | 0 | uint32_t totalWidth() const { |
137 | 0 | return sizeof(*this) + charCount * sizeof(char); |
138 | 0 | } |
139 | | }; |
140 | | |
141 | | struct MatchNCharICase8Insn : public Insn { |
142 | | // number of 8-byte char following this instruction. |
143 | | uint8_t charCount; |
144 | | |
145 | | /// \return the width of this instruction plus its characters. |
146 | 0 | uint32_t totalWidth() const { |
147 | 0 | return sizeof(*this) + charCount * sizeof(char); |
148 | 0 | } |
149 | | }; |
150 | | |
151 | | // See BytecodeFileFormatTest for details about bit field layouts. |
152 | | static_assert( |
153 | | sizeof(BracketInsn) == 6, |
154 | | "BracketInsn should take up 6 byte total"); |
155 | | |
156 | | struct WordBoundaryInsn : public Insn { |
157 | | /// Whether the boundary is inverted (\B instead of \b). |
158 | | bool invert; |
159 | | }; |
160 | | |
161 | | /// Begin/EndMarkedSubexpression stores the index of the marked subexpression. |
162 | | /// Note that the first marked subexpression has index 1 (0 is reserved for |
163 | | /// the entire match). |
164 | | struct BeginMarkedSubexpressionInsn : public Insn { |
165 | | uint16_t mexp; |
166 | | }; |
167 | | struct EndMarkedSubexpressionInsn : public Insn { |
168 | | uint16_t mexp; |
169 | | }; |
170 | | |
171 | | /// A LookaroundInsn is immediately followed by bytecode for its contained |
172 | | /// expression. It has a jump target to its continuation. |
173 | | struct LookaroundInsn : public Insn { |
174 | | /// Whether we are inverted: (?!...) instead of (?=...). |
175 | | bool invert; |
176 | | /// Whether we are forwards: (?=...) instead of (?<=...). |
177 | | bool forwards; |
178 | | /// Constraints on what can match the contained expression. |
179 | | MatchConstraintSet constraints; |
180 | | // The subexpression marked regions we want to be able to backtrack. |
181 | | uint16_t mexpBegin; |
182 | | uint16_t mexpEnd; |
183 | | /// Jump target if the lookahead matches. |
184 | | JumpTarget32 continuation; |
185 | | }; |
186 | | |
187 | | /// An instruction for entering a loop. This supports all loop constructs |
188 | | /// available in regexp, and includes optional nongreedy support, min/max |
189 | | /// counts, and enclosed capture groups. The body of the loop is the Insn |
190 | | /// following the BeginLoopInsn, while the not-taken target is stored following |
191 | | /// the loop body. |
192 | | struct BeginLoopInsn : public Insn { |
193 | | /// The LoopID is used to index into the state to count iterations and track |
194 | | /// the entry position. |
195 | | uint32_t loopId; |
196 | | |
197 | | /// Minimum and maximum iterations. |
198 | | /// For an unbounded loop (including Kleene star), max is UINT32_MAX |
199 | | uint32_t min; |
200 | | uint32_t max; |
201 | | |
202 | | /// Range of marked subexpressions enclosed by the loop, as [begin, end). |
203 | | uint16_t mexpBegin; |
204 | | uint16_t mexpEnd; |
205 | | |
206 | | /// Whether the loop is greedy (i.e. * instead of *?) |
207 | | bool greedy; |
208 | | |
209 | | /// Set of constraints on what can match the loop body. |
210 | | MatchConstraintSet loopeeConstraints; |
211 | | |
212 | | /// The not-taken target for the loop. |
213 | | JumpTarget32 notTakenTarget; |
214 | | }; |
215 | | |
216 | | /// An instruction that closes a loop, appearing after the loop body. |
217 | | /// The target is always a BeginLoopInsn. |
218 | | struct EndLoopInsn : public Insn { |
219 | | JumpTarget32 target; |
220 | | }; |
221 | | |
222 | | /// An instruction for entering a simple loop. This supports only loops that: |
223 | | /// - have no minimum or maximum, |
224 | | /// - do not contain any capture groups, |
225 | | /// - are greedy, and |
226 | | /// - have a body that cannot match the empty string. |
227 | | struct BeginSimpleLoopInsn : public Insn { |
228 | | /// We don't need a loop ID like BeginLoopInsn because we don't need to |
229 | | /// track the iteration count or the entry position. |
230 | | |
231 | | /// Set of constraints on what can match the loop body. |
232 | | MatchConstraintSet loopeeConstraints; |
233 | | |
234 | | /// The not-taken target for the loop. |
235 | | JumpTarget32 notTakenTarget; |
236 | | }; |
237 | | |
238 | | /// An instruction that closes a simple loop, appearing after the loop body. |
239 | | /// The target is always a BeginSimpleLoopInsn. |
240 | | struct EndSimpleLoopInsn : public Insn { |
241 | | JumpTarget32 target; |
242 | | }; |
243 | | |
244 | | /// An instruction for entering a loop whose body always matches one character |
245 | | /// and does not contain any capture groups. |
246 | | struct Width1LoopInsn : public Insn { |
247 | | /// The LoopID is used to index into the state to count iterations and track |
248 | | /// the entry position. |
249 | | uint32_t loopId; |
250 | | |
251 | | /// Minimum and maximum iterations. |
252 | | /// For an unbounded loop (including Kleene star), max is UINT32_MAX |
253 | | uint32_t min; |
254 | | uint32_t max; |
255 | | |
256 | | /// Whether the loop is greedy (i.e. * instead of *?) |
257 | | bool greedy; |
258 | | |
259 | | /// The not-taken target for the loop. |
260 | | JumpTarget32 notTakenTarget; |
261 | | }; |
262 | | |
263 | | /// A header that appears at the beginning of a bytecode stream. |
264 | | struct RegexBytecodeHeader { |
265 | | /// Number of capture groups. |
266 | | uint16_t markedCount; |
267 | | |
268 | | /// Number of loops. |
269 | | uint16_t loopCount; |
270 | | |
271 | | /// Syntax flags used to construct the regex. |
272 | | uint8_t syntaxFlags; |
273 | | |
274 | | /// Constraints on what strings can match this regex. |
275 | | MatchConstraintSet constraints; |
276 | | }; |
277 | | |
278 | | LLVM_PACKED_END; |
279 | | |
280 | | /// OpcodeFor<Instruction>::value is the opcode for the Instruction. |
281 | | template <typename Instruction> |
282 | | struct OpcodeFor {}; |
283 | | |
284 | | #define REOP(Code) \ |
285 | | template <> \ |
286 | | struct OpcodeFor<Code##Insn> { \ |
287 | | static constexpr Opcode value = Opcode::Code; \ |
288 | | }; |
289 | | #include "hermes/Regex/RegexOpcodes.def" |
290 | | |
291 | | /// A class representing a regex compiled to a bytecode stream. |
292 | | class RegexBytecodeStream { |
293 | | /// The stream of instructions encoded as bytes. |
294 | | std::vector<uint8_t> bytes_; |
295 | | |
296 | | /// Whether our bytecode has been acquired. |
297 | | bool acquired_ = false; |
298 | | |
299 | | public: |
300 | | /// Type acting as a reallocation-safe pointer to an instruction. |
301 | | /// This stores a pointer to the vector and an offset, rather than a pointer |
302 | | /// into the vector contents. |
303 | | template <typename Instruction> |
304 | | class InstructionWrapper { |
305 | | std::vector<uint8_t> *const bytes_; |
306 | | const uint32_t offset_; |
307 | | |
308 | | public: |
309 | 1.63k | Instruction *operator->() { |
310 | 1.63k | Insn *base = reinterpret_cast<Insn *>(&bytes_->at(offset_)); |
311 | 1.63k | return llvh::cast<Instruction>(base); |
312 | 1.63k | } hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::Width1LoopInsn>::operator->() Line | Count | Source | 309 | 575 | Instruction *operator->() { | 310 | 575 | Insn *base = reinterpret_cast<Insn *>(&bytes_->at(offset_)); | 311 | 575 | return llvh::cast<Instruction>(base); | 312 | 575 | } |
Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::BeginSimpleLoopInsn>::operator->() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::EndSimpleLoopInsn>::operator->() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::BeginLoopInsn>::operator->() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::EndLoopInsn>::operator->() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::AlternationInsn>::operator->() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::Jump32Insn>::operator->() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::BeginMarkedSubexpressionInsn>::operator->() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::EndMarkedSubexpressionInsn>::operator->() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::BackRefInsn>::operator->() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::WordBoundaryInsn>::operator->() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchNCharICase8Insn>::operator->() hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchNChar8Insn>::operator->() Line | Count | Source | 309 | 513 | Instruction *operator->() { | 310 | 513 | Insn *base = reinterpret_cast<Insn *>(&bytes_->at(offset_)); | 311 | 513 | return llvh::cast<Instruction>(base); | 312 | 513 | } |
Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchCharICase8Insn>::operator->() hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchChar8Insn>::operator->() Line | Count | Source | 309 | 538 | Instruction *operator->() { | 310 | 538 | Insn *base = reinterpret_cast<Insn *>(&bytes_->at(offset_)); | 311 | 538 | return llvh::cast<Instruction>(base); | 312 | 538 | } |
Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::U16MatchCharICase32Insn>::operator->() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::U16MatchChar32Insn>::operator->() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchCharICase16Insn>::operator->() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchChar16Insn>::operator->() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::LookaroundInsn>::operator->() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::U16BracketInsn>::operator->() hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::BracketInsn>::operator->() Line | Count | Source | 309 | 11 | Instruction *operator->() { | 310 | 11 | Insn *base = reinterpret_cast<Insn *>(&bytes_->at(offset_)); | 311 | 11 | return llvh::cast<Instruction>(base); | 312 | 11 | } |
|
313 | | |
314 | | InstructionWrapper(std::vector<uint8_t> *bytes, uint32_t offset) |
315 | 1.49k | : bytes_(bytes), offset_(offset) {} hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::GoalInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Line | Count | Source | 315 | 325 | : bytes_(bytes), offset_(offset) {} |
Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchNCharICase8Insn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchNChar8Insn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Line | Count | Source | 315 | 513 | : bytes_(bytes), offset_(offset) {} |
Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchCharICase8Insn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchChar8Insn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Line | Count | Source | 315 | 538 | : bytes_(bytes), offset_(offset) {} |
Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::U16MatchCharICase32Insn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::U16MatchChar32Insn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchCharICase16Insn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchChar16Insn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::AlternationInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::Jump32Insn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::BeginMarkedSubexpressionInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::EndMarkedSubexpressionInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::LookaroundInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::Width1LoopInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Line | Count | Source | 315 | 115 | : bytes_(bytes), offset_(offset) {} |
Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::BeginSimpleLoopInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::EndSimpleLoopInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::BeginLoopInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::EndLoopInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::LeftAnchorInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Line | Count | Source | 315 | 1 | : bytes_(bytes), offset_(offset) {} |
hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::RightAnchorInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Line | Count | Source | 315 | 1 | : bytes_(bytes), offset_(offset) {} |
Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::WordBoundaryInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::U16BracketInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::BracketInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Line | Count | Source | 315 | 4 | : bytes_(bytes), offset_(offset) {} |
Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::BackRefInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::U16MatchAnyInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::U16MatchAnyButNewlineInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchAnyInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchAnyButNewlineInsn>::InstructionWrapper(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*, unsigned int) |
316 | | }; |
317 | | |
318 | | /// Emit an instruction. |
319 | | /// \return a dereferenceable "pointer" to the instruction in the bytecode |
320 | | /// stream. |
321 | | template <typename Instruction> |
322 | 1.49k | InstructionWrapper<Instruction> emit() { |
323 | 1.49k | size_t startSize = bytes_.size(); |
324 | 1.49k | bytes_.resize(startSize + sizeof(Instruction), 0); |
325 | 1.49k | Insn *insn = reinterpret_cast<Insn *>(&bytes_[startSize]); |
326 | 1.49k | insn->opcode = OpcodeFor<Instruction>::value; |
327 | 1.49k | return InstructionWrapper<Instruction>(&bytes_, startSize); |
328 | 1.49k | } hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::GoalInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::GoalInsn>() Line | Count | Source | 322 | 325 | InstructionWrapper<Instruction> emit() { | 323 | 325 | size_t startSize = bytes_.size(); | 324 | 325 | bytes_.resize(startSize + sizeof(Instruction), 0); | 325 | 325 | Insn *insn = reinterpret_cast<Insn *>(&bytes_[startSize]); | 326 | 325 | insn->opcode = OpcodeFor<Instruction>::value; | 327 | 325 | return InstructionWrapper<Instruction>(&bytes_, startSize); | 328 | 325 | } |
hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::Width1LoopInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::Width1LoopInsn>() Line | Count | Source | 322 | 115 | InstructionWrapper<Instruction> emit() { | 323 | 115 | size_t startSize = bytes_.size(); | 324 | 115 | bytes_.resize(startSize + sizeof(Instruction), 0); | 325 | 115 | Insn *insn = reinterpret_cast<Insn *>(&bytes_[startSize]); | 326 | 115 | insn->opcode = OpcodeFor<Instruction>::value; | 327 | 115 | return InstructionWrapper<Instruction>(&bytes_, startSize); | 328 | 115 | } |
Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::BeginSimpleLoopInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::BeginSimpleLoopInsn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::EndSimpleLoopInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::EndSimpleLoopInsn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::BeginLoopInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::BeginLoopInsn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::EndLoopInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::EndLoopInsn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::AlternationInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::AlternationInsn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::Jump32Insn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::Jump32Insn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::BeginMarkedSubexpressionInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::BeginMarkedSubexpressionInsn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::EndMarkedSubexpressionInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::EndMarkedSubexpressionInsn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::BackRefInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::BackRefInsn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::WordBoundaryInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::WordBoundaryInsn>() hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::LeftAnchorInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::LeftAnchorInsn>() Line | Count | Source | 322 | 1 | InstructionWrapper<Instruction> emit() { | 323 | 1 | size_t startSize = bytes_.size(); | 324 | 1 | bytes_.resize(startSize + sizeof(Instruction), 0); | 325 | 1 | Insn *insn = reinterpret_cast<Insn *>(&bytes_[startSize]); | 326 | 1 | insn->opcode = OpcodeFor<Instruction>::value; | 327 | 1 | return InstructionWrapper<Instruction>(&bytes_, startSize); | 328 | 1 | } |
hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::RightAnchorInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::RightAnchorInsn>() Line | Count | Source | 322 | 1 | InstructionWrapper<Instruction> emit() { | 323 | 1 | size_t startSize = bytes_.size(); | 324 | 1 | bytes_.resize(startSize + sizeof(Instruction), 0); | 325 | 1 | Insn *insn = reinterpret_cast<Insn *>(&bytes_[startSize]); | 326 | 1 | insn->opcode = OpcodeFor<Instruction>::value; | 327 | 1 | return InstructionWrapper<Instruction>(&bytes_, startSize); | 328 | 1 | } |
Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::U16MatchAnyInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::U16MatchAnyInsn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::U16MatchAnyButNewlineInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::U16MatchAnyButNewlineInsn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchAnyInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::MatchAnyInsn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchAnyButNewlineInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::MatchAnyButNewlineInsn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchNCharICase8Insn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::MatchNCharICase8Insn>() hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchNChar8Insn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::MatchNChar8Insn>() Line | Count | Source | 322 | 513 | InstructionWrapper<Instruction> emit() { | 323 | 513 | size_t startSize = bytes_.size(); | 324 | 513 | bytes_.resize(startSize + sizeof(Instruction), 0); | 325 | 513 | Insn *insn = reinterpret_cast<Insn *>(&bytes_[startSize]); | 326 | 513 | insn->opcode = OpcodeFor<Instruction>::value; | 327 | 513 | return InstructionWrapper<Instruction>(&bytes_, startSize); | 328 | 513 | } |
Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchCharICase8Insn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::MatchCharICase8Insn>() hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchChar8Insn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::MatchChar8Insn>() Line | Count | Source | 322 | 538 | InstructionWrapper<Instruction> emit() { | 323 | 538 | size_t startSize = bytes_.size(); | 324 | 538 | bytes_.resize(startSize + sizeof(Instruction), 0); | 325 | 538 | Insn *insn = reinterpret_cast<Insn *>(&bytes_[startSize]); | 326 | 538 | insn->opcode = OpcodeFor<Instruction>::value; | 327 | 538 | return InstructionWrapper<Instruction>(&bytes_, startSize); | 328 | 538 | } |
Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::U16MatchCharICase32Insn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::U16MatchCharICase32Insn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::U16MatchChar32Insn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::U16MatchChar32Insn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchCharICase16Insn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::MatchCharICase16Insn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::MatchChar16Insn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::MatchChar16Insn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::LookaroundInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::LookaroundInsn>() Unexecuted instantiation: hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::U16BracketInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::U16BracketInsn>() hermes::regex::RegexBytecodeStream::InstructionWrapper<hermes::regex::BracketInsn> hermes::regex::RegexBytecodeStream::emit<hermes::regex::BracketInsn>() Line | Count | Source | 322 | 4 | InstructionWrapper<Instruction> emit() { | 323 | 4 | size_t startSize = bytes_.size(); | 324 | 4 | bytes_.resize(startSize + sizeof(Instruction), 0); | 325 | 4 | Insn *insn = reinterpret_cast<Insn *>(&bytes_[startSize]); | 326 | 4 | insn->opcode = OpcodeFor<Instruction>::value; | 327 | 4 | return InstructionWrapper<Instruction>(&bytes_, startSize); | 328 | 4 | } |
|
329 | | |
330 | | /// Emit a BracketRange32. |
331 | 2 | void emitBracketRange(BracketRange32 range) { |
332 | 2 | const uint8_t *rangeBytes = reinterpret_cast<const uint8_t *>(&range); |
333 | 2 | bytes_.insert(bytes_.end(), rangeBytes, rangeBytes + sizeof(range)); |
334 | 2 | } |
335 | | |
336 | | /// Emit a Char8 for use inside a MatchNChar8Insn or MatchNCharICase8Insn. |
337 | 128k | void emitChar8(char c) { |
338 | 128k | bytes_.push_back((uint8_t)c); |
339 | 128k | } |
340 | | |
341 | | /// \return the current offset in the stream, which is where the next |
342 | | /// instruction will be emitted. Note the header is omitted. |
343 | 230 | uint32_t currentOffset() const { |
344 | 230 | return bytes_.size() - sizeof(RegexBytecodeHeader); |
345 | 230 | } |
346 | | |
347 | | /// \return the bytecode, transferring ownership of it to the caller. |
348 | 325 | std::vector<uint8_t> acquireBytecode() { |
349 | 325 | assert(!acquired_ && "Bytecode already acquired"); |
350 | 325 | acquired_ = true; |
351 | 325 | return std::move(bytes_); |
352 | 325 | } |
353 | | |
354 | | /// Construct a RegexBytecodeStream starting with a header. |
355 | 325 | RegexBytecodeStream(const RegexBytecodeHeader &header) { |
356 | 325 | const uint8_t *headerBytes = reinterpret_cast<const uint8_t *>(&header); |
357 | 325 | bytes_.insert(bytes_.end(), headerBytes, headerBytes + sizeof header); |
358 | 325 | } |
359 | | }; |
360 | | |
361 | | } // namespace regex |
362 | | } // namespace hermes |
363 | | |
364 | | namespace llvh { |
365 | | /// LLVM RTTI implementation for regex instructions. Rather than defining |
366 | | /// classof() for each instruction struct, which would require a lot of |
367 | | /// error-prone boilerplate, we take the Casting.h header's suggestion of |
368 | | /// specializing isa_impl for the case where From is just Insn and To is one of |
369 | | /// its subclasses. |
370 | | template <typename To, typename From> |
371 | | struct isa_impl< |
372 | | To, |
373 | | From, |
374 | | typename std::enable_if< |
375 | | std::is_same<hermes::regex::Insn, From>::value && |
376 | | std::is_base_of<hermes::regex::Insn, To>::value>::type> { |
377 | 1.65k | static inline bool doit(const From &val) { |
378 | 1.65k | return val.opcode == hermes::regex::OpcodeFor<To>::value; |
379 | 1.65k | } llvh::isa_impl<hermes::regex::Width1LoopInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Line | Count | Source | 377 | 582 | static inline bool doit(const From &val) { | 378 | 582 | return val.opcode == hermes::regex::OpcodeFor<To>::value; | 379 | 582 | } |
Unexecuted instantiation: llvh::isa_impl<hermes::regex::BeginSimpleLoopInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::EndSimpleLoopInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::BeginLoopInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::EndLoopInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::AlternationInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::Jump32Insn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::BeginMarkedSubexpressionInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::EndMarkedSubexpressionInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::BackRefInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::WordBoundaryInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::MatchNCharICase8Insn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) llvh::isa_impl<hermes::regex::MatchNChar8Insn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Line | Count | Source | 377 | 520 | static inline bool doit(const From &val) { | 378 | 520 | return val.opcode == hermes::regex::OpcodeFor<To>::value; | 379 | 520 | } |
Unexecuted instantiation: llvh::isa_impl<hermes::regex::MatchCharICase8Insn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) llvh::isa_impl<hermes::regex::MatchChar8Insn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Line | Count | Source | 377 | 538 | static inline bool doit(const From &val) { | 378 | 538 | return val.opcode == hermes::regex::OpcodeFor<To>::value; | 379 | 538 | } |
Unexecuted instantiation: llvh::isa_impl<hermes::regex::U16MatchCharICase32Insn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::U16MatchChar32Insn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::MatchCharICase16Insn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::MatchChar16Insn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::LookaroundInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::U16BracketInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) llvh::isa_impl<hermes::regex::BracketInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Line | Count | Source | 377 | 18 | static inline bool doit(const From &val) { | 378 | 18 | return val.opcode == hermes::regex::OpcodeFor<To>::value; | 379 | 18 | } |
Unexecuted instantiation: llvh::isa_impl<hermes::regex::GoalInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::LeftAnchorInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::RightAnchorInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::MatchAnyInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::U16MatchAnyInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::MatchAnyButNewlineInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) Unexecuted instantiation: llvh::isa_impl<hermes::regex::U16MatchAnyButNewlineInsn, hermes::regex::Insn, void>::doit(hermes::regex::Insn const&) |
380 | | }; |
381 | | } // namespace llvh |
382 | | #pragma GCC diagnostic pop |
383 | | |
384 | | #endif // HERMES_REGEX_REGEXBYTECODE_H |