Coverage Report

Created: 2025-12-12 07:27

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/hermes/include/hermes/Regex/Regex.h
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
// -*- C++ -*-
9
//===--------------------------- regex ------------------------------------===//
10
//
11
//                     The LLVM Compiler Infrastructure
12
//
13
// This file is dual licensed under the MIT and the University of Illinois Open
14
// Source Licenses. See LICENSE.TXT for details.
15
//
16
//===----------------------------------------------------------------------===//
17
18
#ifndef HERMES_REGEX_COMPILER_H
19
#define HERMES_REGEX_COMPILER_H
20
21
#include "hermes/Regex/RegexBytecode.h"
22
#include "hermes/Regex/RegexNode.h"
23
#include "hermes/Regex/RegexSupport.h"
24
#include "hermes/Regex/RegexTypes.h"
25
#include "hermes/Support/Algorithms.h"
26
#include "hermes/Support/Compiler.h"
27
28
#include <deque>
29
#include <string>
30
#include <vector>
31
32
namespace hermes {
33
namespace regex {
34
35
template <class Traits>
36
class Regex {
37
  // Enable the Parser to add nodes to us.
38
  template <class A, class B>
39
  friend class Parser;
40
41
  using CharT = typename Traits::CodeUnit;
42
  using CodePoint = typename Traits::CodePoint;
43
  using Node = regex::Node;
44
  using BracketNode = regex::BracketNode<Traits>;
45
46
 private:
47
  Traits traits_;
48
  SyntaxFlags flags_ = {};
49
50
  // Number of capture groups encountered so far.
51
  uint16_t markedCount_ = 0;
52
53
  // Number of loops encountered so far.
54
  uint32_t loopCount_ = 0;
55
56
  // The list of nodes so far.
57
  NodeList nodes_;
58
59
  // List of all unique_ptrs to all nodes created by this Regex, used to defer
60
  // destructors and avoid a stack overflow.
61
  NodeHolder nodeHolder_;
62
63
  // The error, which may be set after parsing.
64
  constants::ErrorType error_ = constants::ErrorType::None;
65
66
  // Constraints on the type of strings that can match this regex.
67
  MatchConstraintSet matchConstraints_ = 0;
68
69
  // This holds the named capture groups in the order they were defined.
70
  std::deque<llvh::SmallVector<char16_t, 5>> orderedGroupNames_{};
71
72
  ParsedGroupNamesMapping nameMapping_{};
73
74
  // We can skip double parsing if there were no named backreferences used
75
  // before the definition of the first named capture group.
76
  bool sawNamedBackrefBeforeGroup_{false};
77
78
  // Named backrefs that might be invalid.
79
  std::vector<std::pair<GroupName, BackRefNode *>> unresolvedNamedBackRefs_;
80
81
  /// Construct and and append a node of type NodeType at the end of the nodes_
82
  /// list. The node should be constructible from \p args.
83
  /// \return an observer pointer to the new node.
84
  template <typename NodeType, typename... Args>
85
1.75M
  NodeType *appendNode(Args &&...args) {
86
1.75M
    std::unique_ptr<NodeType> node =
87
1.75M
        std::make_unique<NodeType>(std::forward<Args>(args)...);
88
1.75M
    NodeType *nodePtr = node.get();
89
1.75M
    nodeHolder_.push_back(std::move(node));
90
1.75M
    nodes_.push_back(nodePtr);
91
1.75M
    return nodePtr;
92
1.75M
  }
hermes::regex::Node* hermes::regex::Regex<hermes::regex::UTF16RegexTraits>::appendNode<hermes::regex::Node>()
Line
Count
Source
85
1.82k
  NodeType *appendNode(Args &&...args) {
86
1.82k
    std::unique_ptr<NodeType> node =
87
1.82k
        std::make_unique<NodeType>(std::forward<Args>(args)...);
88
1.82k
    NodeType *nodePtr = node.get();
89
1.82k
    nodeHolder_.push_back(std::move(node));
90
1.82k
    nodes_.push_back(nodePtr);
91
1.82k
    return nodePtr;
92
1.82k
  }
hermes::regex::GoalNode* hermes::regex::Regex<hermes::regex::UTF16RegexTraits>::appendNode<hermes::regex::GoalNode>()
Line
Count
Source
85
1.82k
  NodeType *appendNode(Args &&...args) {
86
1.82k
    std::unique_ptr<NodeType> node =
87
1.82k
        std::make_unique<NodeType>(std::forward<Args>(args)...);
88
1.82k
    NodeType *nodePtr = node.get();
89
1.82k
    nodeHolder_.push_back(std::move(node));
90
1.82k
    nodes_.push_back(nodePtr);
91
1.82k
    return nodePtr;
92
1.82k
  }
hermes::regex::AlternationNode* hermes::regex::Regex<hermes::regex::UTF16RegexTraits>::appendNode<hermes::regex::AlternationNode, std::__1::vector<std::__1::vector<hermes::regex::Node*, std::__1::allocator<hermes::regex::Node*> >, std::__1::allocator<std::__1::vector<hermes::regex::Node*, std::__1::allocator<hermes::regex::Node*> > > > >(std::__1::vector<std::__1::vector<hermes::regex::Node*, std::__1::allocator<hermes::regex::Node*> >, std::__1::allocator<std::__1::vector<hermes::regex::Node*, std::__1::allocator<hermes::regex::Node*> > > >&&)
Line
Count
Source
85
789
  NodeType *appendNode(Args &&...args) {
86
789
    std::unique_ptr<NodeType> node =
87
789
        std::make_unique<NodeType>(std::forward<Args>(args)...);
88
789
    NodeType *nodePtr = node.get();
89
789
    nodeHolder_.push_back(std::move(node));
90
789
    nodes_.push_back(nodePtr);
91
789
    return nodePtr;
92
789
  }
hermes::regex::MarkedSubexpressionNode* hermes::regex::Regex<hermes::regex::UTF16RegexTraits>::appendNode<hermes::regex::MarkedSubexpressionNode, std::__1::vector<hermes::regex::Node*, std::__1::allocator<hermes::regex::Node*> >, unsigned int&>(std::__1::vector<hermes::regex::Node*, std::__1::allocator<hermes::regex::Node*> >&&, unsigned int&)
Line
Count
Source
85
60.6k
  NodeType *appendNode(Args &&...args) {
86
60.6k
    std::unique_ptr<NodeType> node =
87
60.6k
        std::make_unique<NodeType>(std::forward<Args>(args)...);
88
60.6k
    NodeType *nodePtr = node.get();
89
60.6k
    nodeHolder_.push_back(std::move(node));
90
60.6k
    nodes_.push_back(nodePtr);
91
60.6k
    return nodePtr;
92
60.6k
  }
Unexecuted instantiation: hermes::regex::LookaroundNode* hermes::regex::Regex<hermes::regex::UTF16RegexTraits>::appendNode<hermes::regex::LookaroundNode, std::__1::vector<hermes::regex::Node*, std::__1::allocator<hermes::regex::Node*> >, unsigned short&, unsigned short&, bool&, bool&>(std::__1::vector<hermes::regex::Node*, std::__1::allocator<hermes::regex::Node*> >&&, unsigned short&, unsigned short&, bool&, bool&)
hermes::regex::LoopNode* hermes::regex::Regex<hermes::regex::UTF16RegexTraits>::appendNode<hermes::regex::LoopNode, unsigned int, unsigned int&, unsigned int&, bool&, unsigned int&, unsigned short&, std::__1::vector<hermes::regex::Node*, std::__1::allocator<hermes::regex::Node*> > >(unsigned int&&, unsigned int&, unsigned int&, bool&, unsigned int&, unsigned short&, std::__1::vector<hermes::regex::Node*, std::__1::allocator<hermes::regex::Node*> >&&)
Line
Count
Source
85
79.6k
  NodeType *appendNode(Args &&...args) {
86
79.6k
    std::unique_ptr<NodeType> node =
87
79.6k
        std::make_unique<NodeType>(std::forward<Args>(args)...);
88
79.6k
    NodeType *nodePtr = node.get();
89
79.6k
    nodeHolder_.push_back(std::move(node));
90
79.6k
    nodes_.push_back(nodePtr);
91
79.6k
    return nodePtr;
92
79.6k
  }
hermes::regex::LeftAnchorNode* hermes::regex::Regex<hermes::regex::UTF16RegexTraits>::appendNode<hermes::regex::LeftAnchorNode, hermes::regex::SyntaxFlags>(hermes::regex::SyntaxFlags&&)
Line
Count
Source
85
10.3k
  NodeType *appendNode(Args &&...args) {
86
10.3k
    std::unique_ptr<NodeType> node =
87
10.3k
        std::make_unique<NodeType>(std::forward<Args>(args)...);
88
10.3k
    NodeType *nodePtr = node.get();
89
10.3k
    nodeHolder_.push_back(std::move(node));
90
10.3k
    nodes_.push_back(nodePtr);
91
10.3k
    return nodePtr;
92
10.3k
  }
hermes::regex::RightAnchorNode* hermes::regex::Regex<hermes::regex::UTF16RegexTraits>::appendNode<hermes::regex::RightAnchorNode>()
Line
Count
Source
85
1
  NodeType *appendNode(Args &&...args) {
86
1
    std::unique_ptr<NodeType> node =
87
1
        std::make_unique<NodeType>(std::forward<Args>(args)...);
88
1
    NodeType *nodePtr = node.get();
89
1
    nodeHolder_.push_back(std::move(node));
90
1
    nodes_.push_back(nodePtr);
91
1
    return nodePtr;
92
1
  }
hermes::regex::WordBoundaryNode* hermes::regex::Regex<hermes::regex::UTF16RegexTraits>::appendNode<hermes::regex::WordBoundaryNode, bool&>(bool&)
Line
Count
Source
85
42.3k
  NodeType *appendNode(Args &&...args) {
86
42.3k
    std::unique_ptr<NodeType> node =
87
42.3k
        std::make_unique<NodeType>(std::forward<Args>(args)...);
88
42.3k
    NodeType *nodePtr = node.get();
89
42.3k
    nodeHolder_.push_back(std::move(node));
90
42.3k
    nodes_.push_back(nodePtr);
91
42.3k
    return nodePtr;
92
42.3k
  }
hermes::regex::BracketNode<hermes::regex::UTF16RegexTraits>* hermes::regex::Regex<hermes::regex::UTF16RegexTraits>::appendNode<hermes::regex::BracketNode<hermes::regex::UTF16RegexTraits>, hermes::regex::UTF16RegexTraits&, bool&, hermes::regex::SyntaxFlags&>(hermes::regex::UTF16RegexTraits&, bool&, hermes::regex::SyntaxFlags&)
Line
Count
Source
85
45.0k
  NodeType *appendNode(Args &&...args) {
86
45.0k
    std::unique_ptr<NodeType> node =
87
45.0k
        std::make_unique<NodeType>(std::forward<Args>(args)...);
88
45.0k
    NodeType *nodePtr = node.get();
89
45.0k
    nodeHolder_.push_back(std::move(node));
90
45.0k
    nodes_.push_back(nodePtr);
91
45.0k
    return nodePtr;
92
45.0k
  }
hermes::regex::BackRefNode* hermes::regex::Regex<hermes::regex::UTF16RegexTraits>::appendNode<hermes::regex::BackRefNode, unsigned int&>(unsigned int&)
Line
Count
Source
85
110
  NodeType *appendNode(Args &&...args) {
86
110
    std::unique_ptr<NodeType> node =
87
110
        std::make_unique<NodeType>(std::forward<Args>(args)...);
88
110
    NodeType *nodePtr = node.get();
89
110
    nodeHolder_.push_back(std::move(node));
90
110
    nodes_.push_back(nodePtr);
91
110
    return nodePtr;
92
110
  }
Unexecuted instantiation: hermes::regex::BackRefNode* hermes::regex::Regex<hermes::regex::UTF16RegexTraits>::appendNode<hermes::regex::BackRefNode, int>(int&&)
Unexecuted instantiation: hermes::regex::BackRefNode* hermes::regex::Regex<hermes::regex::UTF16RegexTraits>::appendNode<hermes::regex::BackRefNode, unsigned int>(unsigned int&&)
hermes::regex::MatchAnyNode* hermes::regex::Regex<hermes::regex::UTF16RegexTraits>::appendNode<hermes::regex::MatchAnyNode, hermes::regex::SyntaxFlags>(hermes::regex::SyntaxFlags&&)
Line
Count
Source
85
86.2k
  NodeType *appendNode(Args &&...args) {
86
86.2k
    std::unique_ptr<NodeType> node =
87
86.2k
        std::make_unique<NodeType>(std::forward<Args>(args)...);
88
86.2k
    NodeType *nodePtr = node.get();
89
86.2k
    nodeHolder_.push_back(std::move(node));
90
86.2k
    nodes_.push_back(nodePtr);
91
86.2k
    return nodePtr;
92
86.2k
  }
hermes::regex::MatchCharNode* hermes::regex::Regex<hermes::regex::UTF16RegexTraits>::appendNode<hermes::regex::MatchCharNode, llvh::SmallVector<unsigned int, 5u>, hermes::regex::SyntaxFlags>(llvh::SmallVector<unsigned int, 5u>&&, hermes::regex::SyntaxFlags&&)
Line
Count
Source
85
1.42M
  NodeType *appendNode(Args &&...args) {
86
1.42M
    std::unique_ptr<NodeType> node =
87
1.42M
        std::make_unique<NodeType>(std::forward<Args>(args)...);
88
1.42M
    NodeType *nodePtr = node.get();
89
1.42M
    nodeHolder_.push_back(std::move(node));
90
1.42M
    nodes_.push_back(nodePtr);
91
1.42M
    return nodePtr;
92
1.42M
  }
93
94
  /// \return the "current" node, which is the last (rightmost) node created.
95
1.92M
  Node *currentNode() {
96
1.92M
    return nodes_.back();
97
1.92M
  }
98
99
  /// \return the number of marked subexpressions.
100
1.92M
  uint16_t markedCount() const {
101
1.92M
    return markedCount_;
102
1.92M
  }
103
104
  /// Increment the number of marked subexpressions.
105
  /// \return the previous value of markedCount_.
106
60.6k
  uint16_t incrementMarkedCount() {
107
60.6k
    assert(
108
60.6k
        markedCount_ < std::numeric_limits<uint16_t>::max() &&
109
60.6k
        "markedCount_ will overflow");
110
60.6k
    return markedCount_++;
111
60.6k
  }
112
113
  /// Given that the node \p splicePoint is in our node list, remove all nodes
114
  /// after it. \return a list of the removed nodes.
115
211k
  NodeList spliceOut(Node *splicePoint) {
116
211k
    assert(splicePoint && "null node in spliceOut");
117
    // Find the index of the splice point. We expect it to be towards the end.
118
211k
    size_t spliceIndex = nodes_.size();
119
1.32M
    while (spliceIndex--) {
120
1.32M
      if (nodes_[spliceIndex] == splicePoint)
121
211k
        break;
122
1.32M
    }
123
211k
    assert(spliceIndex < nodes_.size() && "Node not in node list");
124
    // Move all nodes after the splice index into a new vector.
125
    // Note this may be empty.
126
211k
    auto firstToMove = nodes_.begin() + spliceIndex + 1;
127
211k
    NodeList result;
128
211k
    std::move(firstToMove, nodes_.end(), std::back_inserter(result));
129
211k
    nodes_.erase(firstToMove, nodes_.end());
130
211k
    return result;
131
211k
  }
132
133
 public:
134
  /// Compile the regex into bytecode. Return the resulting bytecode.
135
1.82k
  std::vector<uint8_t> compile() const {
136
1.82k
    assert(valid() && "Cannot compile invalid regex.");
137
    // TODO: add validation for the loop and reduce the size of loopCount_ to
138
    // uint16_t.
139
1.82k
    assert(
140
1.82k
        markedCount_ <= constants::kMaxCaptureGroupCount &&
141
1.82k
        "Too many capture groups");
142
1.82k
    assert(loopCount_ <= constants::kMaxLoopCount && "Too many loops");
143
1.82k
    RegexBytecodeHeader header = {
144
1.82k
        markedCount_,
145
1.82k
        static_cast<uint16_t>(loopCount_),
146
1.82k
        flags_.toByte(),
147
1.82k
        matchConstraints_};
148
1.82k
    RegexBytecodeStream bcs(header);
149
1.82k
    Node::compile(nodes_, bcs);
150
1.82k
    return bcs.acquireBytecode();
151
1.82k
  }
152
153
  // Constructors
154
  explicit Regex(const CharT *p, const char16_t *f = u"")
155
1
      : Regex(
156
1
            {p, p + std::char_traits<CharT>::length(p)},
157
1
            {f, f + std::char_traits<CharT>::length(f)}) {}
158
159
  Regex(
160
      const llvh::ArrayRef<CharT> pattern,
161
1.83k
      const llvh::ArrayRef<char16_t> flags = {}) {
162
    // Compute the SyntaxFlags based on the flags string.
163
1.83k
    auto sflags = SyntaxFlags::fromString(flags);
164
1.83k
    if (!sflags) {
165
1
      error_ = constants::ErrorType::InvalidFlags;
166
1
      return;
167
1
    }
168
1.82k
    flags_ = *sflags;
169
1.82k
    error_ = parse(pattern.begin(), pattern.end());
170
1.82k
  }
171
172
  // Disallow copy-assignment and copy-construction.
173
  Regex &operator=(const Regex &) = delete;
174
  Regex(const Regex &) = delete;
175
176
  /// Move-assignment and move-construction.
177
  Regex &operator=(Regex &&) = default;
178
  Regex(Regex &&) = default;
179
180
  // Accessors.
181
  unsigned markCount() const {
182
    return markedCount_;
183
  }
184
2.99M
  SyntaxFlags flags() const {
185
2.99M
    return flags_;
186
2.99M
  }
187
188
0
  std::deque<llvh::SmallVector<char16_t, 5>> &getOrderedNamedGroups() {
189
0
    return orderedGroupNames_;
190
0
  }
191
192
1.82k
  std::deque<llvh::SmallVector<char16_t, 5>> acquireOrderedGroupNames() {
193
1.82k
    return std::move(orderedGroupNames_);
194
1.82k
  }
195
196
0
  ParsedGroupNamesMapping &getGroupNamesMapping() {
197
0
    return nameMapping_;
198
0
  }
199
200
1.82k
  ParsedGroupNamesMapping acquireGroupNamesMapping() {
201
1.82k
    return std::move(nameMapping_);
202
1.82k
  }
203
204
0
  void sawNamedBackrefBeforeGroup() {
205
0
    sawNamedBackrefBeforeGroup_ = true;
206
0
  }
207
208
  /// \return any errors produced during parsing, or ErrorType::None if none.
209
2
  constants::ErrorType getError() const {
210
2
    return error_;
211
2
  }
212
213
  /// \return whether the regex was parsed successfully.
214
3.65k
  bool valid() const {
215
3.65k
    return error_ == constants::ErrorType::None;
216
3.65k
  }
217
218
  /// \return the set of match constraints for the regex.
219
  MatchConstraintSet matchConstraints() const {
220
    return matchConstraints_;
221
  }
222
223
 private:
224
  template <class ForwardIterator>
225
  constants::ErrorType parse(ForwardIterator first, ForwardIterator last);
226
227
  /// Attempt to parse the regex from the range [\p first, \p last), using
228
  /// \p backRefLimit as the maximum decimal escape to interpret as a
229
  /// backreference.  The maximum backreference that was in fact encountered
230
  /// is returned by reference in \p out_max_back_ref, if that is larger than
231
  /// its current value. \return an error code.
232
  template <class ForwardIterator>
233
  constants::ErrorType parseWithBackRefLimit(
234
      ForwardIterator first,
235
      ForwardIterator last,
236
      uint32_t backRefLimit,
237
      bool hasNamedGroups,
238
      uint32_t *outMaxBackRef);
239
240
  // Note- this method does not insert into the AST, it populates a different
241
  // datastructure. Returns false if the given name was already defined.
242
  bool addNamedCaptureGroup(GroupName &&identifier, uint32_t groupNum);
243
244
  bool resolveNamedBackRefs();
245
246
  void pushLeftAnchor();
247
  void pushRightAnchor();
248
  void pushMatchAny();
249
  void pushLoop(
250
      uint32_t min,
251
      uint32_t max,
252
      NodeList loopedList,
253
      uint32_t mexp_begin,
254
      bool greedy);
255
  BracketNode *startBracketList(bool negate);
256
  void pushChar(CodePoint c);
257
  void pushCharClass(CharacterClass c);
258
  void pushBackRef(uint32_t i);
259
  void pushNamedBackRef(GroupName &&identifier);
260
  void pushAlternation(std::vector<NodeList> alternatives);
261
  void pushMarkedSubexpression(NodeList, uint32_t mexp);
262
  void pushWordBoundary(bool);
263
  void pushLookaround(NodeList, uint16_t, uint16_t, bool, bool);
264
};
265
266
template <typename Receiver>
267
constants::ErrorType parseRegex(
268
    const char16_t *start,
269
    const char16_t *end,
270
    Receiver *receiver,
271
    SyntaxFlags flags,
272
    uint32_t backRefLimit,
273
    bool hasNamedGroups,
274
    uint32_t *outMaxBackRef);
275
276
template <class Traits>
277
template <class ForwardIterator>
278
constants::ErrorType Regex<Traits>::parse(
279
    ForwardIterator first,
280
1.82k
    ForwardIterator last) {
281
1.82k
  uint32_t maxBackRef = 0;
282
1.82k
  bool hasNamedGroups = flags_.unicode;
283
1.82k
  auto result = parseWithBackRefLimit(
284
1.82k
      first,
285
1.82k
      last,
286
1.82k
      constants::kMaxCaptureGroupCount,
287
1.82k
      hasNamedGroups,
288
1.82k
      &maxBackRef);
289
290
  // Validate loop and capture group count.
291
1.82k
  if (loopCount_ > constants::kMaxLoopCount) {
292
0
    return constants::ErrorType::PatternExceedsParseLimits;
293
0
  }
294
295
  // See comment --DecimalEscape--
296
  // We parsed without a backreference limit because we had to parse to discover
297
  // the limit. Now we know that we wrongly interpreted a decimal escape as a
298
  // backreference. See ES6 Annex B.1.4 DecimalEscape "but only if the integer
299
  // value DecimalEscape is <= NCapturingParens". Now that we know the true
300
  // capture group count, either produce an error (if Unicode) or re-parse with
301
  // that as the limit so overlarge decimal escapes will be ignored.
302
1.82k
  bool reparseForNumberedBackref =
303
1.82k
      result == constants::ErrorType::None && maxBackRef > markedCount_;
304
  // We must also reparse if there were any named capture groups used and it is
305
  // not unicode mode.
306
1.82k
  bool reparseForNamedBackref = false;
307
1.82k
  if (!flags_.unicode && nameMapping_.size() > 0 &&
308
0
      sawNamedBackrefBeforeGroup_) {
309
0
    reparseForNamedBackref = true;
310
0
    hasNamedGroups = true;
311
0
  }
312
313
1.82k
  if (reparseForNumberedBackref || reparseForNamedBackref) {
314
0
    if (flags_.unicode) {
315
0
      return constants::ErrorType::EscapeInvalid;
316
0
    }
317
318
0
    uint32_t backRefLimit = markedCount_;
319
0
    uint32_t reparsedMaxBackRef = 0;
320
0
    loopCount_ = 0;
321
0
    markedCount_ = 0;
322
0
    matchConstraints_ = 0;
323
0
    nameMapping_.clear();
324
0
    orderedGroupNames_.clear();
325
0
    result = parseWithBackRefLimit(
326
0
        first, last, backRefLimit, hasNamedGroups, &reparsedMaxBackRef);
327
0
    assert(
328
0
        reparsedMaxBackRef <= backRefLimit &&
329
0
        "invalid backreference generated");
330
0
    (void)reparsedMaxBackRef;
331
0
  }
332
1.82k
  return result;
333
1.82k
}
334
335
template <class Traits>
336
template <class ForwardIterator>
337
constants::ErrorType Regex<Traits>::parseWithBackRefLimit(
338
    ForwardIterator first,
339
    ForwardIterator last,
340
    uint32_t backRefLimit,
341
    bool hasNamedGroups,
342
1.82k
    uint32_t *outMaxBackRef) {
343
  // Initialize our node list with a single no-op node (it must never be empty.)
344
1.82k
  nodes_.clear();
345
1.82k
  appendNode<Node>();
346
1.82k
  auto result = parseRegex(
347
1.82k
      first, last, this, flags_, backRefLimit, hasNamedGroups, outMaxBackRef);
348
349
  // If we succeeded, add a goal node as the last node and perform optimizations
350
  // on the list.
351
1.82k
  if (result == constants::ErrorType::None) {
352
1.82k
    appendNode<GoalNode>();
353
1.82k
    Node::optimizeNodeList(nodes_, flags_, nodeHolder_);
354
1.82k
    if (!resolveNamedBackRefs()) {
355
0
      return constants::ErrorType::NonexistentNamedCaptureReference;
356
0
    }
357
1.82k
  }
358
359
  // Compute any match constraints.
360
1.82k
  matchConstraints_ = Node::matchConstraintsForList(nodes_);
361
362
1.82k
  return result;
363
1.82k
}
364
365
template <class Traits>
366
1.82k
bool Regex<Traits>::resolveNamedBackRefs() {
367
1.82k
  for (auto &[name, backRef] : unresolvedNamedBackRefs_) {
368
0
    auto search = nameMapping_.find(name);
369
0
    if (search == nameMapping_.end()) {
370
0
      return false;
371
0
    }
372
0
    auto groupNum = search->second;
373
0
    backRef->setBackRef(groupNum - 1);
374
0
  }
375
1.82k
  return true;
376
1.82k
}
377
378
template <class Traits>
379
void Regex<Traits>::pushLoop(
380
    uint32_t min,
381
    uint32_t max,
382
    NodeList loopedExpr,
383
    uint32_t mexp_begin,
384
79.6k
    bool greedy) {
385
79.6k
  appendNode<LoopNode>(
386
79.6k
      loopCount_++,
387
79.6k
      min,
388
79.6k
      max,
389
79.6k
      greedy,
390
79.6k
      mexp_begin,
391
79.6k
      markedCount_,
392
79.6k
      std::move(loopedExpr));
393
79.6k
}
394
395
template <class Traits>
396
1.42M
void Regex<Traits>::pushChar(CodePoint c) {
397
1.42M
  bool icase = flags().ignoreCase;
398
1.42M
  if (icase)
399
51.4k
    c = traits_.canonicalize(c, flags().unicode);
400
1.42M
  appendNode<MatchCharNode>(Node::CodePointList{c}, flags());
401
1.42M
}
402
403
template <class Traits>
404
45.0k
void Regex<Traits>::pushCharClass(CharacterClass c) {
405
45.0k
  auto bracket = startBracketList(false);
406
45.0k
  bracket->addClass(c);
407
45.0k
}
408
409
template <class Traits>
410
60.6k
void Regex<Traits>::pushMarkedSubexpression(NodeList nodes, uint32_t mexp) {
411
60.6k
  appendNode<MarkedSubexpressionNode>(std::move(nodes), mexp);
412
60.6k
}
413
414
template <class Traits>
415
10.3k
void Regex<Traits>::pushLeftAnchor() {
416
10.3k
  appendNode<LeftAnchorNode>(flags());
417
10.3k
}
418
419
template <class Traits>
420
1
void Regex<Traits>::pushRightAnchor() {
421
1
  appendNode<RightAnchorNode>();
422
1
}
423
424
template <class Traits>
425
86.2k
void Regex<Traits>::pushMatchAny() {
426
86.2k
  appendNode<MatchAnyNode>(flags());
427
86.2k
}
428
429
template <class Traits>
430
42.3k
void Regex<Traits>::pushWordBoundary(bool invert) {
431
42.3k
  appendNode<WordBoundaryNode>(invert);
432
42.3k
}
433
434
template <class Traits>
435
110
void Regex<Traits>::pushBackRef(uint32_t i) {
436
110
  appendNode<BackRefNode>(i);
437
110
}
438
439
template <class Traits>
440
0
void Regex<Traits>::pushNamedBackRef(GroupName &&identifier) {
441
0
  auto search = nameMapping_.find(identifier);
442
0
  if (search == nameMapping_.end()) {
443
    // If this name hasn't been defined yet, we have a case of an ambiguous
444
    // named backref. It could be valid or not, because the group name could be
445
    // defined in the future. We will revist these nodes at the end to see if
446
    // they are valid.
447
0
    BackRefNode *backRef = appendNode<BackRefNode>(0);
448
0
    unresolvedNamedBackRefs_.emplace_back(std::move(identifier), backRef);
449
0
    return;
450
0
  }
451
0
  auto groupNum = search->second;
452
0
  appendNode<BackRefNode>(groupNum - 1);
453
0
}
454
455
template <class Traits>
456
789
void Regex<Traits>::pushAlternation(std::vector<NodeList> alternatives) {
457
789
  appendNode<AlternationNode>(std::move(alternatives));
458
789
}
459
460
template <class Traits>
461
45.0k
BracketNode<Traits> *Regex<Traits>::startBracketList(bool negate) {
462
45.0k
  return appendNode<BracketNode>(traits_, negate, flags_);
463
45.0k
}
464
465
template <class Traits>
466
void Regex<Traits>::pushLookaround(
467
    NodeList exp,
468
    uint16_t mexpBegin,
469
    uint16_t mexpEnd,
470
    bool invert,
471
0
    bool forwards) {
472
0
  if (!forwards) {
473
0
    Node::reverseNodeList(exp);
474
0
  }
475
0
  nodeHolder_.push_back(std::make_unique<GoalNode>());
476
0
  exp.push_back(nodeHolder_.back().get());
477
0
  appendNode<LookaroundNode>(
478
0
      std::move(exp), mexpBegin, mexpEnd, invert, forwards);
479
0
}
480
481
template <class Traits>
482
bool Regex<Traits>::addNamedCaptureGroup(
483
    GroupName &&identifier,
484
0
    uint32_t groupNum) {
485
  // Add one to the given group number because later on this is used to index
486
  // into the whole matches array, which will be prepended with the entire match
487
  // string so all these group numbers will be off by one if we don't offset it
488
  // here.
489
0
  auto &elm = orderedGroupNames_.emplace_back(std::move(identifier));
490
0
  auto res = nameMapping_.try_emplace(elm, groupNum + 1);
491
0
  return res.second;
492
0
}
493
494
} // namespace regex
495
} // namespace hermes
496
497
#endif // HERMES_REGEX_COMPILER_H