Coverage Report

Created: 2024-05-19 20:04

/src/re2/re2/walker-inl.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2006 The RE2 Authors.  All Rights Reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
#ifndef RE2_WALKER_INL_H_
6
#define RE2_WALKER_INL_H_
7
8
// Helper class for traversing Regexps without recursion.
9
// Clients should declare their own subclasses that override
10
// the PreVisit and PostVisit methods, which are called before
11
// and after visiting the subexpressions.
12
13
// Not quite the Visitor pattern, because (among other things)
14
// the Visitor pattern is recursive.
15
16
#include <stack>
17
18
#include "util/logging.h"
19
#include "re2/regexp.h"
20
21
namespace re2 {
22
23
template<typename T> struct WalkState;
24
25
template<typename T> class Regexp::Walker {
26
 public:
27
  Walker();
28
  virtual ~Walker();
29
30
  // Virtual method called before visiting re's children.
31
  // PreVisit passes ownership of its return value to its caller.
32
  // The Arg* that PreVisit returns will be passed to PostVisit as pre_arg
33
  // and passed to the child PreVisits and PostVisits as parent_arg.
34
  // At the top-most Regexp, parent_arg is arg passed to walk.
35
  // If PreVisit sets *stop to true, the walk does not recurse
36
  // into the children.  Instead it behaves as though the return
37
  // value from PreVisit is the return value from PostVisit.
38
  // The default PreVisit returns parent_arg.
39
  virtual T PreVisit(Regexp* re, T parent_arg, bool* stop);
40
41
  // Virtual method called after visiting re's children.
42
  // The pre_arg is the T that PreVisit returned.
43
  // The child_args is a vector of the T that the child PostVisits returned.
44
  // PostVisit takes ownership of pre_arg.
45
  // PostVisit takes ownership of the Ts
46
  // in *child_args, but not the vector itself.
47
  // PostVisit passes ownership of its return value
48
  // to its caller.
49
  // The default PostVisit simply returns pre_arg.
50
  virtual T PostVisit(Regexp* re, T parent_arg, T pre_arg,
51
                      T* child_args, int nchild_args);
52
53
  // Virtual method called to copy a T,
54
  // when Walk notices that more than one child is the same re.
55
  virtual T Copy(T arg);
56
57
  // Virtual method called to do a "quick visit" of the re,
58
  // but not its children.  Only called once the visit budget
59
  // has been used up and we're trying to abort the walk
60
  // as quickly as possible.  Should return a value that
61
  // makes sense for the parent PostVisits still to be run.
62
  // This function is (hopefully) only called by
63
  // WalkExponential, but must be implemented by all clients,
64
  // just in case.
65
  virtual T ShortVisit(Regexp* re, T parent_arg) = 0;
66
67
  // Walks over a regular expression.
68
  // Top_arg is passed as parent_arg to PreVisit and PostVisit of re.
69
  // Returns the T returned by PostVisit on re.
70
  T Walk(Regexp* re, T top_arg);
71
72
  // Like Walk, but doesn't use Copy.  This can lead to
73
  // exponential runtimes on cross-linked Regexps like the
74
  // ones generated by Simplify.  To help limit this,
75
  // at most max_visits nodes will be visited and then
76
  // the walk will be cut off early.
77
  // If the walk *is* cut off early, ShortVisit(re)
78
  // will be called on regexps that cannot be fully
79
  // visited rather than calling PreVisit/PostVisit.
80
  T WalkExponential(Regexp* re, T top_arg, int max_visits);
81
82
  // Clears the stack.  Should never be necessary, since
83
  // Walk always enters and exits with an empty stack.
84
  // Logs DFATAL if stack is not already clear.
85
  void Reset();
86
87
  // Returns whether walk was cut off.
88
76.8k
  bool stopped_early() { return stopped_early_; }
re2::Regexp::Walker<re2::Regexp*>::stopped_early()
Line
Count
Source
88
76.8k
  bool stopped_early() { return stopped_early_; }
Unexecuted instantiation: re2::Regexp::Walker<int>::stopped_early()
89
90
 private:
91
  // Walk state for the entire traversal.
92
  std::stack<WalkState<T>> stack_;
93
  bool stopped_early_;
94
  int max_visits_;
95
96
  T WalkInternal(Regexp* re, T top_arg, bool use_copy);
97
98
  Walker(const Walker&) = delete;
99
  Walker& operator=(const Walker&) = delete;
100
};
101
102
template<typename T> T Regexp::Walker<T>::PreVisit(Regexp* re,
103
                                                   T parent_arg,
104
751k
                                                   bool* stop) {
105
751k
  return parent_arg;
106
751k
}
Unexecuted instantiation: re2::Regexp::Walker<int>::PreVisit(re2::Regexp*, int, bool*)
Unexecuted instantiation: re2::Regexp::Walker<re2::Frag>::PreVisit(re2::Regexp*, re2::Frag, bool*)
re2::Regexp::Walker<re2::Regexp*>::PreVisit(re2::Regexp*, re2::Regexp*, bool*)
Line
Count
Source
104
751k
                                                   bool* stop) {
105
751k
  return parent_arg;
106
751k
}
107
108
template<typename T> T Regexp::Walker<T>::PostVisit(Regexp* re,
109
                                                    T parent_arg,
110
                                                    T pre_arg,
111
                                                    T* child_args,
112
749k
                                                    int nchild_args) {
113
749k
  return pre_arg;
114
749k
}
re2::Regexp::Walker<int>::PostVisit(re2::Regexp*, int, int, int*, int)
Line
Count
Source
112
749k
                                                    int nchild_args) {
113
749k
  return pre_arg;
114
749k
}
Unexecuted instantiation: re2::Regexp::Walker<re2::Frag>::PostVisit(re2::Regexp*, re2::Frag, re2::Frag, re2::Frag*, int)
Unexecuted instantiation: re2::Regexp::Walker<re2::Regexp*>::PostVisit(re2::Regexp*, re2::Regexp*, re2::Regexp*, re2::Regexp**, int)
115
116
0
template<typename T> T Regexp::Walker<T>::Copy(T arg) {
117
0
  return arg;
118
0
}
Unexecuted instantiation: re2::Regexp::Walker<int>::Copy(int)
Unexecuted instantiation: re2::Regexp::Walker<re2::Frag>::Copy(re2::Frag)
Unexecuted instantiation: re2::Regexp::Walker<re2::Regexp*>::Copy(re2::Regexp*)
119
120
// State about a single level in the traversal.
121
template<typename T> struct WalkState {
122
  WalkState(Regexp* re, T parent)
123
    : re(re),
124
      n(-1),
125
      parent_arg(parent),
126
34.3M
      child_args(NULL) { }
re2::WalkState<int>::WalkState(re2::Regexp*, int)
Line
Count
Source
126
879k
      child_args(NULL) { }
re2::WalkState<re2::Frag>::WalkState(re2::Regexp*, re2::Frag)
Line
Count
Source
126
32.3M
      child_args(NULL) { }
re2::WalkState<re2::Regexp*>::WalkState(re2::Regexp*, re2::Regexp*)
Line
Count
Source
126
1.18M
      child_args(NULL) { }
127
128
  Regexp* re;  // The regexp
129
  int n;  // The index of the next child to process; -1 means need to PreVisit
130
  T parent_arg;  // Accumulated arguments.
131
  T pre_arg;
132
  T child_arg;  // One-element buffer for child_args.
133
  T* child_args;
134
};
135
136
173k
template<typename T> Regexp::Walker<T>::Walker() {
137
173k
  stopped_early_ = false;
138
173k
}
re2::Regexp::Walker<int>::Walker()
Line
Count
Source
136
58.1k
template<typename T> Regexp::Walker<T>::Walker() {
137
58.1k
  stopped_early_ = false;
138
58.1k
}
re2::Regexp::Walker<re2::Frag>::Walker()
Line
Count
Source
136
38.4k
template<typename T> Regexp::Walker<T>::Walker() {
137
38.4k
  stopped_early_ = false;
138
38.4k
}
re2::Regexp::Walker<re2::Regexp*>::Walker()
Line
Count
Source
136
76.8k
template<typename T> Regexp::Walker<T>::Walker() {
137
76.8k
  stopped_early_ = false;
138
76.8k
}
139
140
173k
template<typename T> Regexp::Walker<T>::~Walker() {
141
173k
  Reset();
142
173k
}
re2::Regexp::Walker<int>::~Walker()
Line
Count
Source
140
58.1k
template<typename T> Regexp::Walker<T>::~Walker() {
141
58.1k
  Reset();
142
58.1k
}
re2::Regexp::Walker<re2::Frag>::~Walker()
Line
Count
Source
140
38.4k
template<typename T> Regexp::Walker<T>::~Walker() {
141
38.4k
  Reset();
142
38.4k
}
re2::Regexp::Walker<re2::Regexp*>::~Walker()
Line
Count
Source
140
76.8k
template<typename T> Regexp::Walker<T>::~Walker() {
141
76.8k
  Reset();
142
76.8k
}
143
144
// Clears the stack.  Should never be necessary, since
145
// Walk always enters and exits with an empty stack.
146
// Logs DFATAL if stack is not already clear.
147
347k
template<typename T> void Regexp::Walker<T>::Reset() {
148
347k
  if (!stack_.empty()) {
149
0
    LOG(DFATAL) << "Stack not empty.";
150
0
    while (!stack_.empty()) {
151
0
      if (stack_.top().re->nsub_ > 1)
152
0
        delete[] stack_.top().child_args;
153
0
      stack_.pop();
154
0
    }
155
0
  }
156
347k
}
re2::Regexp::Walker<int>::Reset()
Line
Count
Source
147
116k
template<typename T> void Regexp::Walker<T>::Reset() {
148
116k
  if (!stack_.empty()) {
149
0
    LOG(DFATAL) << "Stack not empty.";
150
0
    while (!stack_.empty()) {
151
0
      if (stack_.top().re->nsub_ > 1)
152
0
        delete[] stack_.top().child_args;
153
0
      stack_.pop();
154
0
    }
155
0
  }
156
116k
}
re2::Regexp::Walker<re2::Frag>::Reset()
Line
Count
Source
147
76.8k
template<typename T> void Regexp::Walker<T>::Reset() {
148
76.8k
  if (!stack_.empty()) {
149
0
    LOG(DFATAL) << "Stack not empty.";
150
0
    while (!stack_.empty()) {
151
0
      if (stack_.top().re->nsub_ > 1)
152
0
        delete[] stack_.top().child_args;
153
0
      stack_.pop();
154
0
    }
155
0
  }
156
76.8k
}
re2::Regexp::Walker<re2::Regexp*>::Reset()
Line
Count
Source
147
153k
template<typename T> void Regexp::Walker<T>::Reset() {
148
153k
  if (!stack_.empty()) {
149
0
    LOG(DFATAL) << "Stack not empty.";
150
0
    while (!stack_.empty()) {
151
0
      if (stack_.top().re->nsub_ > 1)
152
0
        delete[] stack_.top().child_args;
153
0
      stack_.pop();
154
0
    }
155
0
  }
156
153k
}
157
158
template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
159
173k
                                                       bool use_copy) {
160
173k
  Reset();
161
162
173k
  if (re == NULL) {
163
0
    LOG(DFATAL) << "Walk NULL";
164
0
    return top_arg;
165
0
  }
166
167
173k
  stack_.push(WalkState<T>(re, top_arg));
168
169
173k
  WalkState<T>* s;
170
68.6M
  for (;;) {
171
68.6M
    T t;
172
68.6M
    s = &stack_.top();
173
68.6M
    re = s->re;
174
68.6M
    switch (s->n) {
175
34.3M
      case -1: {
176
34.3M
        if (--max_visits_ < 0) {
177
0
          stopped_early_ = true;
178
0
          t = ShortVisit(re, s->parent_arg);
179
0
          break;
180
0
        }
181
34.3M
        bool stop = false;
182
34.3M
        s->pre_arg = PreVisit(re, s->parent_arg, &stop);
183
34.3M
        if (stop) {
184
297k
          t = s->pre_arg;
185
297k
          break;
186
297k
        }
187
34.0M
        s->n = 0;
188
34.0M
        s->child_args = NULL;
189
34.0M
        if (re->nsub_ == 1)
190
5.66M
          s->child_args = &s->child_arg;
191
28.4M
        else if (re->nsub_ > 1)
192
6.15M
          s->child_args = new T[re->nsub_];
193
34.0M
        FALLTHROUGH_INTENDED;
194
34.0M
      }
195
68.3M
      default: {
196
68.3M
        if (re->nsub_ > 0) {
197
46.0M
          Regexp** sub = re->sub();
198
46.0M
          if (s->n < re->nsub_) {
199
34.2M
            if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) {
200
0
              s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
201
0
              s->n++;
202
34.2M
            } else {
203
34.2M
              stack_.push(WalkState<T>(sub[s->n], s->pre_arg));
204
34.2M
            }
205
34.2M
            continue;
206
34.2M
          }
207
46.0M
        }
208
209
34.0M
        t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n);
210
34.0M
        if (re->nsub_ > 1)
211
6.15M
          delete[] s->child_args;
212
34.0M
        break;
213
68.3M
      }
214
68.6M
    }
215
216
    // We've finished stack_.top().
217
    // Update next guy down.
218
34.3M
    stack_.pop();
219
34.3M
    if (stack_.empty())
220
173k
      return t;
221
34.2M
    s = &stack_.top();
222
34.2M
    if (s->child_args != NULL)
223
34.2M
      s->child_args[s->n] = t;
224
0
    else
225
0
      s->child_arg = t;
226
34.2M
    s->n++;
227
34.2M
  }
228
173k
}
re2::Regexp::Walker<int>::WalkInternal(re2::Regexp*, int, bool)
Line
Count
Source
159
58.1k
                                                       bool use_copy) {
160
58.1k
  Reset();
161
162
58.1k
  if (re == NULL) {
163
0
    LOG(DFATAL) << "Walk NULL";
164
0
    return top_arg;
165
0
  }
166
167
58.1k
  stack_.push(WalkState<T>(re, top_arg));
168
169
58.1k
  WalkState<T>* s;
170
1.70M
  for (;;) {
171
1.70M
    T t;
172
1.70M
    s = &stack_.top();
173
1.70M
    re = s->re;
174
1.70M
    switch (s->n) {
175
879k
      case -1: {
176
879k
        if (--max_visits_ < 0) {
177
0
          stopped_early_ = true;
178
0
          t = ShortVisit(re, s->parent_arg);
179
0
          break;
180
0
        }
181
879k
        bool stop = false;
182
879k
        s->pre_arg = PreVisit(re, s->parent_arg, &stop);
183
879k
        if (stop) {
184
0
          t = s->pre_arg;
185
0
          break;
186
0
        }
187
879k
        s->n = 0;
188
879k
        s->child_args = NULL;
189
879k
        if (re->nsub_ == 1)
190
150k
          s->child_args = &s->child_arg;
191
728k
        else if (re->nsub_ > 1)
192
126k
          s->child_args = new T[re->nsub_];
193
879k
        FALLTHROUGH_INTENDED;
194
879k
      }
195
1.70M
      default: {
196
1.70M
        if (re->nsub_ > 0) {
197
1.09M
          Regexp** sub = re->sub();
198
1.09M
          if (s->n < re->nsub_) {
199
821k
            if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) {
200
0
              s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
201
0
              s->n++;
202
821k
            } else {
203
821k
              stack_.push(WalkState<T>(sub[s->n], s->pre_arg));
204
821k
            }
205
821k
            continue;
206
821k
          }
207
1.09M
        }
208
209
879k
        t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n);
210
879k
        if (re->nsub_ > 1)
211
126k
          delete[] s->child_args;
212
879k
        break;
213
1.70M
      }
214
1.70M
    }
215
216
    // We've finished stack_.top().
217
    // Update next guy down.
218
879k
    stack_.pop();
219
879k
    if (stack_.empty())
220
58.1k
      return t;
221
821k
    s = &stack_.top();
222
821k
    if (s->child_args != NULL)
223
821k
      s->child_args[s->n] = t;
224
0
    else
225
0
      s->child_arg = t;
226
821k
    s->n++;
227
821k
  }
228
58.1k
}
re2::Regexp::Walker<re2::Frag>::WalkInternal(re2::Regexp*, re2::Frag, bool)
Line
Count
Source
159
38.4k
                                                       bool use_copy) {
160
38.4k
  Reset();
161
162
38.4k
  if (re == NULL) {
163
0
    LOG(DFATAL) << "Walk NULL";
164
0
    return top_arg;
165
0
  }
166
167
38.4k
  stack_.push(WalkState<T>(re, top_arg));
168
169
38.4k
  WalkState<T>* s;
170
64.6M
  for (;;) {
171
64.6M
    T t;
172
64.6M
    s = &stack_.top();
173
64.6M
    re = s->re;
174
64.6M
    switch (s->n) {
175
32.3M
      case -1: {
176
32.3M
        if (--max_visits_ < 0) {
177
0
          stopped_early_ = true;
178
0
          t = ShortVisit(re, s->parent_arg);
179
0
          break;
180
0
        }
181
32.3M
        bool stop = false;
182
32.3M
        s->pre_arg = PreVisit(re, s->parent_arg, &stop);
183
32.3M
        if (stop) {
184
14.1k
          t = s->pre_arg;
185
14.1k
          break;
186
14.1k
        }
187
32.3M
        s->n = 0;
188
32.3M
        s->child_args = NULL;
189
32.3M
        if (re->nsub_ == 1)
190
5.36M
          s->child_args = &s->child_arg;
191
26.9M
        else if (re->nsub_ > 1)
192
5.83M
          s->child_args = new T[re->nsub_];
193
32.3M
        FALLTHROUGH_INTENDED;
194
32.3M
      }
195
64.5M
      default: {
196
64.5M
        if (re->nsub_ > 0) {
197
43.4M
          Regexp** sub = re->sub();
198
43.4M
          if (s->n < re->nsub_) {
199
32.2M
            if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) {
200
0
              s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
201
0
              s->n++;
202
32.2M
            } else {
203
32.2M
              stack_.push(WalkState<T>(sub[s->n], s->pre_arg));
204
32.2M
            }
205
32.2M
            continue;
206
32.2M
          }
207
43.4M
        }
208
209
32.3M
        t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n);
210
32.3M
        if (re->nsub_ > 1)
211
5.83M
          delete[] s->child_args;
212
32.3M
        break;
213
64.5M
      }
214
64.6M
    }
215
216
    // We've finished stack_.top().
217
    // Update next guy down.
218
32.3M
    stack_.pop();
219
32.3M
    if (stack_.empty())
220
38.4k
      return t;
221
32.2M
    s = &stack_.top();
222
32.2M
    if (s->child_args != NULL)
223
32.2M
      s->child_args[s->n] = t;
224
0
    else
225
0
      s->child_arg = t;
226
32.2M
    s->n++;
227
32.2M
  }
228
38.4k
}
re2::Regexp::Walker<re2::Regexp*>::WalkInternal(re2::Regexp*, re2::Regexp*, bool)
Line
Count
Source
159
76.8k
                                                       bool use_copy) {
160
76.8k
  Reset();
161
162
76.8k
  if (re == NULL) {
163
0
    LOG(DFATAL) << "Walk NULL";
164
0
    return top_arg;
165
0
  }
166
167
76.8k
  stack_.push(WalkState<T>(re, top_arg));
168
169
76.8k
  WalkState<T>* s;
170
2.30M
  for (;;) {
171
2.30M
    T t;
172
2.30M
    s = &stack_.top();
173
2.30M
    re = s->re;
174
2.30M
    switch (s->n) {
175
1.18M
      case -1: {
176
1.18M
        if (--max_visits_ < 0) {
177
0
          stopped_early_ = true;
178
0
          t = ShortVisit(re, s->parent_arg);
179
0
          break;
180
0
        }
181
1.18M
        bool stop = false;
182
1.18M
        s->pre_arg = PreVisit(re, s->parent_arg, &stop);
183
1.18M
        if (stop) {
184
283k
          t = s->pre_arg;
185
283k
          break;
186
283k
        }
187
906k
        s->n = 0;
188
906k
        s->child_args = NULL;
189
906k
        if (re->nsub_ == 1)
190
156k
          s->child_args = &s->child_arg;
191
749k
        else if (re->nsub_ > 1)
192
193k
          s->child_args = new T[re->nsub_];
193
906k
        FALLTHROUGH_INTENDED;
194
906k
      }
195
2.01M
      default: {
196
2.01M
        if (re->nsub_ > 0) {
197
1.46M
          Regexp** sub = re->sub();
198
1.46M
          if (s->n < re->nsub_) {
199
1.11M
            if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) {
200
0
              s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
201
0
              s->n++;
202
1.11M
            } else {
203
1.11M
              stack_.push(WalkState<T>(sub[s->n], s->pre_arg));
204
1.11M
            }
205
1.11M
            continue;
206
1.11M
          }
207
1.46M
        }
208
209
906k
        t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n);
210
906k
        if (re->nsub_ > 1)
211
193k
          delete[] s->child_args;
212
906k
        break;
213
2.01M
      }
214
2.30M
    }
215
216
    // We've finished stack_.top().
217
    // Update next guy down.
218
1.18M
    stack_.pop();
219
1.18M
    if (stack_.empty())
220
76.8k
      return t;
221
1.11M
    s = &stack_.top();
222
1.11M
    if (s->child_args != NULL)
223
1.11M
      s->child_args[s->n] = t;
224
0
    else
225
0
      s->child_arg = t;
226
1.11M
    s->n++;
227
1.11M
  }
228
76.8k
}
229
230
135k
template<typename T> T Regexp::Walker<T>::Walk(Regexp* re, T top_arg) {
231
  // Without the exponential walking behavior,
232
  // this budget should be more than enough for any
233
  // regexp, and yet not enough to get us in trouble
234
  // as far as CPU time.
235
135k
  max_visits_ = 1000000;
236
135k
  return WalkInternal(re, top_arg, true);
237
135k
}
re2::Regexp::Walker<int>::Walk(re2::Regexp*, int)
Line
Count
Source
230
58.1k
template<typename T> T Regexp::Walker<T>::Walk(Regexp* re, T top_arg) {
231
  // Without the exponential walking behavior,
232
  // this budget should be more than enough for any
233
  // regexp, and yet not enough to get us in trouble
234
  // as far as CPU time.
235
58.1k
  max_visits_ = 1000000;
236
58.1k
  return WalkInternal(re, top_arg, true);
237
58.1k
}
re2::Regexp::Walker<re2::Regexp*>::Walk(re2::Regexp*, re2::Regexp*)
Line
Count
Source
230
76.8k
template<typename T> T Regexp::Walker<T>::Walk(Regexp* re, T top_arg) {
231
  // Without the exponential walking behavior,
232
  // this budget should be more than enough for any
233
  // regexp, and yet not enough to get us in trouble
234
  // as far as CPU time.
235
76.8k
  max_visits_ = 1000000;
236
76.8k
  return WalkInternal(re, top_arg, true);
237
76.8k
}
238
239
template<typename T> T Regexp::Walker<T>::WalkExponential(Regexp* re, T top_arg,
240
38.4k
                                                          int max_visits) {
241
38.4k
  max_visits_ = max_visits;
242
38.4k
  return WalkInternal(re, top_arg, false);
243
38.4k
}
re2::Regexp::Walker<re2::Frag>::WalkExponential(re2::Regexp*, re2::Frag, int)
Line
Count
Source
240
38.4k
                                                          int max_visits) {
241
38.4k
  max_visits_ = max_visits;
242
38.4k
  return WalkInternal(re, top_arg, false);
243
38.4k
}
Unexecuted instantiation: re2::Regexp::Walker<int>::WalkExponential(re2::Regexp*, int, int)
244
245
}  // namespace re2
246
247
#endif  // RE2_WALKER_INL_H_