Coverage Report

Created: 2026-05-27 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/proc/self/cwd/external/re2~/re2/walker-inl.h
Line
Count
Source
1
// Copyright 2006 The RE2 Authors.  All Rights Reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
#ifndef RE2_WALKER_INL_H_
6
#define RE2_WALKER_INL_H_
7
8
// Helper class for traversing Regexps without recursion.
9
// Clients should declare their own subclasses that override
10
// the PreVisit and PostVisit methods, which are called before
11
// and after visiting the subexpressions.
12
13
// Not quite the Visitor pattern, because (among other things)
14
// the Visitor pattern is recursive.
15
16
#include <stack>
17
18
#include "absl/base/macros.h"
19
#include "absl/log/absl_check.h"
20
#include "absl/log/absl_log.h"
21
#include "re2/regexp.h"
22
23
namespace re2 {
24
25
template<typename T> struct WalkState;
26
27
template<typename T> class Regexp::Walker {
28
 public:
29
  Walker();
30
  virtual ~Walker();
31
32
  // Virtual method called before visiting re's children.
33
  // PreVisit passes ownership of its return value to its caller.
34
  // The Arg* that PreVisit returns will be passed to PostVisit as pre_arg
35
  // and passed to the child PreVisits and PostVisits as parent_arg.
36
  // At the top-most Regexp, parent_arg is arg passed to walk.
37
  // If PreVisit sets *stop to true, the walk does not recurse
38
  // into the children.  Instead it behaves as though the return
39
  // value from PreVisit is the return value from PostVisit.
40
  // The default PreVisit returns parent_arg.
41
  virtual T PreVisit(Regexp* re, T parent_arg, bool* stop);
42
43
  // Virtual method called after visiting re's children.
44
  // The pre_arg is the T that PreVisit returned.
45
  // The child_args is a vector of the T that the child PostVisits returned.
46
  // PostVisit takes ownership of pre_arg.
47
  // PostVisit takes ownership of the Ts
48
  // in *child_args, but not the vector itself.
49
  // PostVisit passes ownership of its return value
50
  // to its caller.
51
  // The default PostVisit simply returns pre_arg.
52
  virtual T PostVisit(Regexp* re, T parent_arg, T pre_arg,
53
                      T* child_args, int nchild_args);
54
55
  // Virtual method called to copy a T,
56
  // when Walk notices that more than one child is the same re.
57
  virtual T Copy(T arg);
58
59
  // Virtual method called to do a "quick visit" of the re,
60
  // but not its children.  Only called once the visit budget
61
  // has been used up and we're trying to abort the walk
62
  // as quickly as possible.  Should return a value that
63
  // makes sense for the parent PostVisits still to be run.
64
  // This function is (hopefully) only called by
65
  // WalkExponential, but must be implemented by all clients,
66
  // just in case.
67
  virtual T ShortVisit(Regexp* re, T parent_arg) = 0;
68
69
  // Walks over a regular expression.
70
  // Top_arg is passed as parent_arg to PreVisit and PostVisit of re.
71
  // Returns the T returned by PostVisit on re.
72
  T Walk(Regexp* re, T top_arg);
73
74
  // Like Walk, but doesn't use Copy.  This can lead to
75
  // exponential runtimes on cross-linked Regexps like the
76
  // ones generated by Simplify.  To help limit this,
77
  // at most max_visits nodes will be visited and then
78
  // the walk will be cut off early.
79
  // If the walk *is* cut off early, ShortVisit(re)
80
  // will be called on regexps that cannot be fully
81
  // visited rather than calling PreVisit/PostVisit.
82
  T WalkExponential(Regexp* re, T top_arg, int max_visits);
83
84
  // Clears the stack.  Should never be necessary, since
85
  // Walk always enters and exits with an empty stack.
86
  // Logs DFATAL if stack is not already clear.
87
  void Reset();
88
89
  // Returns whether walk was cut off.
90
0
  bool stopped_early() { return stopped_early_; }
Unexecuted instantiation: re2::Regexp::Walker<re2::Regexp*>::stopped_early()
Unexecuted instantiation: re2::Regexp::Walker<int>::stopped_early()
91
92
 private:
93
  // Walk state for the entire traversal.
94
  std::stack<WalkState<T>> stack_;
95
  bool stopped_early_;
96
  int max_visits_;
97
98
  T WalkInternal(Regexp* re, T top_arg, bool use_copy);
99
100
  Walker(const Walker&) = delete;
101
  Walker& operator=(const Walker&) = delete;
102
};
103
104
template<typename T> T Regexp::Walker<T>::PreVisit(Regexp* re,
105
                                                   T parent_arg,
106
0
                                                   bool* stop) {
107
0
  return parent_arg;
108
0
}
Unexecuted instantiation: re2::Regexp::Walker<int>::PreVisit(re2::Regexp*, int, bool*)
Unexecuted instantiation: re2::Regexp::Walker<re2::Frag>::PreVisit(re2::Regexp*, re2::Frag, bool*)
Unexecuted instantiation: re2::Regexp::Walker<re2::Regexp*>::PreVisit(re2::Regexp*, re2::Regexp*, bool*)
109
110
template<typename T> T Regexp::Walker<T>::PostVisit(Regexp* re,
111
                                                    T parent_arg,
112
                                                    T pre_arg,
113
                                                    T* child_args,
114
0
                                                    int nchild_args) {
115
0
  return pre_arg;
116
0
}
Unexecuted instantiation: re2::Regexp::Walker<int>::PostVisit(re2::Regexp*, int, int, int*, int)
Unexecuted instantiation: re2::Regexp::Walker<re2::Frag>::PostVisit(re2::Regexp*, re2::Frag, re2::Frag, re2::Frag*, int)
Unexecuted instantiation: re2::Regexp::Walker<re2::Regexp*>::PostVisit(re2::Regexp*, re2::Regexp*, re2::Regexp*, re2::Regexp**, int)
117
118
0
template<typename T> T Regexp::Walker<T>::Copy(T arg) {
119
0
  return arg;
120
0
}
Unexecuted instantiation: re2::Regexp::Walker<int>::Copy(int)
Unexecuted instantiation: re2::Regexp::Walker<re2::Frag>::Copy(re2::Frag)
Unexecuted instantiation: re2::Regexp::Walker<re2::Regexp*>::Copy(re2::Regexp*)
121
122
// State about a single level in the traversal.
123
template<typename T> struct WalkState {
124
  WalkState(Regexp* re, T parent)
125
0
    : re(re),
126
0
      n(-1),
127
0
      parent_arg(parent),
128
0
      child_args(NULL) { }
Unexecuted instantiation: re2::WalkState<int>::WalkState(re2::Regexp*, int)
Unexecuted instantiation: re2::WalkState<re2::Frag>::WalkState(re2::Regexp*, re2::Frag)
Unexecuted instantiation: re2::WalkState<re2::Regexp*>::WalkState(re2::Regexp*, re2::Regexp*)
129
130
  Regexp* re;  // The regexp
131
  int n;  // The index of the next child to process; -1 means need to PreVisit
132
  T parent_arg;  // Accumulated arguments.
133
  T pre_arg;
134
  T child_arg;  // One-element buffer for child_args.
135
  T* child_args;
136
};
137
138
0
template<typename T> Regexp::Walker<T>::Walker() {
139
0
  stopped_early_ = false;
140
0
}
Unexecuted instantiation: re2::Regexp::Walker<int>::Walker()
Unexecuted instantiation: re2::Regexp::Walker<re2::Frag>::Walker()
Unexecuted instantiation: re2::Regexp::Walker<re2::Regexp*>::Walker()
141
142
0
template<typename T> Regexp::Walker<T>::~Walker() {
143
0
  Reset();
144
0
}
Unexecuted instantiation: re2::Regexp::Walker<int>::~Walker()
Unexecuted instantiation: re2::Regexp::Walker<re2::Frag>::~Walker()
Unexecuted instantiation: re2::Regexp::Walker<re2::Regexp*>::~Walker()
145
146
// Clears the stack.  Should never be necessary, since
147
// Walk always enters and exits with an empty stack.
148
// Logs DFATAL if stack is not already clear.
149
0
template<typename T> void Regexp::Walker<T>::Reset() {
150
0
  if (!stack_.empty()) {
151
0
    ABSL_LOG(DFATAL) << "Stack not empty.";
152
0
    while (!stack_.empty()) {
153
0
      if (stack_.top().re->nsub_ > 1)
154
0
        delete[] stack_.top().child_args;
155
0
      stack_.pop();
156
0
    }
157
0
  }
158
0
}
Unexecuted instantiation: re2::Regexp::Walker<int>::Reset()
Unexecuted instantiation: re2::Regexp::Walker<re2::Frag>::Reset()
Unexecuted instantiation: re2::Regexp::Walker<re2::Regexp*>::Reset()
159
160
template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
161
0
                                                       bool use_copy) {
162
0
  Reset();
163
164
0
  if (re == NULL) {
165
0
    ABSL_LOG(DFATAL) << "Walk NULL";
166
0
    return top_arg;
167
0
  }
168
169
0
  stack_.push(WalkState<T>(re, top_arg));
170
171
0
  WalkState<T>* s;
172
0
  for (;;) {
173
0
    T t;
174
0
    s = &stack_.top();
175
0
    re = s->re;
176
0
    switch (s->n) {
177
0
      case -1: {
178
0
        if (--max_visits_ < 0) {
179
0
          stopped_early_ = true;
180
0
          t = ShortVisit(re, s->parent_arg);
181
0
          break;
182
0
        }
183
0
        bool stop = false;
184
0
        s->pre_arg = PreVisit(re, s->parent_arg, &stop);
185
0
        if (stop) {
186
0
          t = s->pre_arg;
187
0
          break;
188
0
        }
189
0
        s->n = 0;
190
0
        s->child_args = NULL;
191
0
        if (re->nsub_ == 1)
192
0
          s->child_args = &s->child_arg;
193
0
        else if (re->nsub_ > 1)
194
0
          s->child_args = new T[re->nsub_];
195
0
        [[fallthrough]];
196
0
      }
197
0
      default: {
198
0
        if (re->nsub_ > 0) {
199
0
          Regexp** sub = re->sub();
200
0
          if (s->n < re->nsub_) {
201
0
            if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) {
202
0
              s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
203
0
              s->n++;
204
0
            } else {
205
0
              stack_.push(WalkState<T>(sub[s->n], s->pre_arg));
206
0
            }
207
0
            continue;
208
0
          }
209
0
        }
210
211
0
        t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n);
212
0
        if (re->nsub_ > 1)
213
0
          delete[] s->child_args;
214
0
        break;
215
0
      }
216
0
    }
217
218
    // We've finished stack_.top().
219
    // Update next guy down.
220
0
    stack_.pop();
221
0
    if (stack_.empty())
222
0
      return t;
223
0
    s = &stack_.top();
224
0
    if (s->child_args != NULL)
225
0
      s->child_args[s->n] = t;
226
0
    else
227
0
      s->child_arg = t;
228
0
    s->n++;
229
0
  }
230
0
}
Unexecuted instantiation: re2::Regexp::Walker<int>::WalkInternal(re2::Regexp*, int, bool)
Unexecuted instantiation: re2::Regexp::Walker<re2::Frag>::WalkInternal(re2::Regexp*, re2::Frag, bool)
Unexecuted instantiation: re2::Regexp::Walker<re2::Regexp*>::WalkInternal(re2::Regexp*, re2::Regexp*, bool)
231
232
0
template<typename T> T Regexp::Walker<T>::Walk(Regexp* re, T top_arg) {
233
  // Without the exponential walking behavior,
234
  // this budget should be more than enough for any
235
  // regexp, and yet not enough to get us in trouble
236
  // as far as CPU time.
237
0
  max_visits_ = 1000000;
238
0
  return WalkInternal(re, top_arg, true);
239
0
}
Unexecuted instantiation: re2::Regexp::Walker<int>::Walk(re2::Regexp*, int)
Unexecuted instantiation: re2::Regexp::Walker<re2::Regexp*>::Walk(re2::Regexp*, re2::Regexp*)
240
241
template<typename T> T Regexp::Walker<T>::WalkExponential(Regexp* re, T top_arg,
242
0
                                                          int max_visits) {
243
0
  max_visits_ = max_visits;
244
0
  return WalkInternal(re, top_arg, false);
245
0
}
Unexecuted instantiation: re2::Regexp::Walker<re2::Frag>::WalkExponential(re2::Regexp*, re2::Frag, int)
Unexecuted instantiation: re2::Regexp::Walker<int>::WalkExponential(re2::Regexp*, int, int)
246
247
}  // namespace re2
248
249
#endif  // RE2_WALKER_INL_H_