Coverage Report

Created: 2026-05-27 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/proc/self/cwd/external/re2~/re2/tostring.cc
Line
Count
Source
1
// Copyright 2006 The RE2 Authors.  All Rights Reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
// Format a regular expression structure as a string.
6
// Tested by parse_test.cc
7
8
#include <string.h>
9
10
#include <string>
11
12
#include "absl/log/absl_log.h"
13
#include "absl/strings/str_format.h"
14
#include "re2/regexp.h"
15
#include "re2/walker-inl.h"
16
#include "util/utf.h"
17
18
namespace re2 {
19
20
enum {
21
  PrecAtom,
22
  PrecUnary,
23
  PrecConcat,
24
  PrecAlternate,
25
  PrecEmpty,
26
  PrecParen,
27
  PrecToplevel,
28
};
29
30
// Helper function.  See description below.
31
static void AppendCCRange(std::string* t, Rune lo, Rune hi);
32
33
// Walker to generate string in s_.
34
// The arg pointers are actually integers giving the
35
// context precedence.
36
// The child_args are always NULL.
37
class ToStringWalker : public Regexp::Walker<int> {
38
 public:
39
0
  explicit ToStringWalker(std::string* t) : t_(t) {}
40
41
  virtual int PreVisit(Regexp* re, int parent_arg, bool* stop);
42
  virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg,
43
                        int* child_args, int nchild_args);
44
0
  virtual int ShortVisit(Regexp* re, int parent_arg) {
45
0
    return 0;
46
0
  }
47
48
 private:
49
  std::string* t_;  // The string the walker appends to.
50
51
  ToStringWalker(const ToStringWalker&) = delete;
52
  ToStringWalker& operator=(const ToStringWalker&) = delete;
53
};
54
55
0
std::string Regexp::ToString() {
56
0
  std::string t;
57
0
  ToStringWalker w(&t);
58
0
  w.WalkExponential(this, PrecToplevel, 100000);
59
0
  if (w.stopped_early())
60
0
    t += " [truncated]";
61
0
  return t;
62
0
}
63
64
#define ToString DontCallToString  // Avoid accidental recursion.
65
66
// Visits re before children are processed.
67
// Appends ( if needed and passes new precedence to children.
68
0
int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
69
0
  int prec = parent_arg;
70
0
  int nprec = PrecAtom;
71
72
0
  switch (re->op()) {
73
0
    case kRegexpNoMatch:
74
0
    case kRegexpEmptyMatch:
75
0
    case kRegexpLiteral:
76
0
    case kRegexpAnyChar:
77
0
    case kRegexpAnyByte:
78
0
    case kRegexpBeginLine:
79
0
    case kRegexpEndLine:
80
0
    case kRegexpBeginText:
81
0
    case kRegexpEndText:
82
0
    case kRegexpWordBoundary:
83
0
    case kRegexpNoWordBoundary:
84
0
    case kRegexpCharClass:
85
0
    case kRegexpHaveMatch:
86
0
      nprec = PrecAtom;
87
0
      break;
88
89
0
    case kRegexpConcat:
90
0
    case kRegexpLiteralString:
91
0
      if (prec < PrecConcat)
92
0
        t_->append("(?:");
93
0
      nprec = PrecConcat;
94
0
      break;
95
96
0
    case kRegexpAlternate:
97
0
      if (prec < PrecAlternate)
98
0
        t_->append("(?:");
99
0
      nprec = PrecAlternate;
100
0
      break;
101
102
0
    case kRegexpCapture:
103
0
      t_->append("(");
104
0
      if (re->cap() == 0)
105
0
        ABSL_LOG(DFATAL) << "kRegexpCapture cap() == 0";
106
0
      if (re->name()) {
107
0
        t_->append("?P<");
108
0
        t_->append(*re->name());
109
0
        t_->append(">");
110
0
      }
111
0
      nprec = PrecParen;
112
0
      break;
113
114
0
    case kRegexpStar:
115
0
    case kRegexpPlus:
116
0
    case kRegexpQuest:
117
0
    case kRegexpRepeat:
118
0
      if (prec < PrecUnary)
119
0
        t_->append("(?:");
120
      // The subprecedence here is PrecAtom instead of PrecUnary
121
      // because PCRE treats two unary ops in a row as a parse error.
122
0
      nprec = PrecAtom;
123
0
      break;
124
0
  }
125
126
0
  return nprec;
127
0
}
128
129
0
static void AppendLiteral(std::string *t, Rune r, bool foldcase) {
130
0
  if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) {
131
0
    t->append(1, '\\');
132
0
    t->append(1, static_cast<char>(r));
133
0
  } else if (foldcase && 'a' <= r && r <= 'z') {
134
0
    r -= 'a' - 'A';
135
0
    t->append(1, '[');
136
0
    t->append(1, static_cast<char>(r));
137
0
    t->append(1, static_cast<char>(r) + 'a' - 'A');
138
0
    t->append(1, ']');
139
0
  } else {
140
0
    AppendCCRange(t, r, r);
141
0
  }
142
0
}
143
144
// Visits re after children are processed.
145
// For childless regexps, all the work is done here.
146
// For regexps with children, append any unary suffixes or ).
147
int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
148
0
                              int* child_args, int nchild_args) {
149
0
  int prec = parent_arg;
150
0
  switch (re->op()) {
151
0
    case kRegexpNoMatch:
152
      // There's no simple symbol for "no match", but
153
      // [^0-Runemax] excludes everything.
154
0
      t_->append("[^\\x00-\\x{10ffff}]");
155
0
      break;
156
157
0
    case kRegexpEmptyMatch:
158
      // Append (?:) to make empty string visible,
159
      // unless this is already being parenthesized.
160
0
      if (prec < PrecEmpty)
161
0
        t_->append("(?:)");
162
0
      break;
163
164
0
    case kRegexpLiteral:
165
0
      AppendLiteral(t_, re->rune(),
166
0
                    (re->parse_flags() & Regexp::FoldCase) != 0);
167
0
      break;
168
169
0
    case kRegexpLiteralString:
170
0
      for (int i = 0; i < re->nrunes(); i++)
171
0
        AppendLiteral(t_, re->runes()[i],
172
0
                      (re->parse_flags() & Regexp::FoldCase) != 0);
173
0
      if (prec < PrecConcat)
174
0
        t_->append(")");
175
0
      break;
176
177
0
    case kRegexpConcat:
178
0
      if (prec < PrecConcat)
179
0
        t_->append(")");
180
0
      break;
181
182
0
    case kRegexpAlternate:
183
      // Clumsy but workable: the children all appended |
184
      // at the end of their strings, so just remove the last one.
185
0
      if ((*t_)[t_->size()-1] == '|')
186
0
        t_->erase(t_->size()-1);
187
0
      else
188
0
        ABSL_LOG(DFATAL) << "Bad final char: " << t_;
189
0
      if (prec < PrecAlternate)
190
0
        t_->append(")");
191
0
      break;
192
193
0
    case kRegexpStar:
194
0
      t_->append("*");
195
0
      if (re->parse_flags() & Regexp::NonGreedy)
196
0
        t_->append("?");
197
0
      if (prec < PrecUnary)
198
0
        t_->append(")");
199
0
      break;
200
201
0
    case kRegexpPlus:
202
0
      t_->append("+");
203
0
      if (re->parse_flags() & Regexp::NonGreedy)
204
0
        t_->append("?");
205
0
      if (prec < PrecUnary)
206
0
        t_->append(")");
207
0
      break;
208
209
0
    case kRegexpQuest:
210
0
      t_->append("?");
211
0
      if (re->parse_flags() & Regexp::NonGreedy)
212
0
        t_->append("?");
213
0
      if (prec < PrecUnary)
214
0
        t_->append(")");
215
0
      break;
216
217
0
    case kRegexpRepeat:
218
0
      if (re->max() == -1)
219
0
        t_->append(absl::StrFormat("{%d,}", re->min()));
220
0
      else if (re->min() == re->max())
221
0
        t_->append(absl::StrFormat("{%d}", re->min()));
222
0
      else
223
0
        t_->append(absl::StrFormat("{%d,%d}", re->min(), re->max()));
224
0
      if (re->parse_flags() & Regexp::NonGreedy)
225
0
        t_->append("?");
226
0
      if (prec < PrecUnary)
227
0
        t_->append(")");
228
0
      break;
229
230
0
    case kRegexpAnyChar:
231
0
      t_->append(".");
232
0
      break;
233
234
0
    case kRegexpAnyByte:
235
0
      t_->append("\\C");
236
0
      break;
237
238
0
    case kRegexpBeginLine:
239
0
      t_->append("^");
240
0
      break;
241
242
0
    case kRegexpEndLine:
243
0
      t_->append("$");
244
0
      break;
245
246
0
    case kRegexpBeginText:
247
0
      t_->append("(?-m:^)");
248
0
      break;
249
250
0
    case kRegexpEndText:
251
0
      if (re->parse_flags() & Regexp::WasDollar)
252
0
        t_->append("(?-m:$)");
253
0
      else
254
0
        t_->append("\\z");
255
0
      break;
256
257
0
    case kRegexpWordBoundary:
258
0
      t_->append("\\b");
259
0
      break;
260
261
0
    case kRegexpNoWordBoundary:
262
0
      t_->append("\\B");
263
0
      break;
264
265
0
    case kRegexpCharClass: {
266
0
      if (re->cc()->size() == 0) {
267
0
        t_->append("[^\\x00-\\x{10ffff}]");
268
0
        break;
269
0
      }
270
0
      t_->append("[");
271
      // Heuristic: show class as negated if it contains the
272
      // non-character 0xFFFE and yet somehow isn't full.
273
0
      CharClass* cc = re->cc();
274
0
      if (cc->Contains(0xFFFE) && !cc->full()) {
275
0
        cc = cc->Negate();
276
0
        t_->append("^");
277
0
      }
278
0
      for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i)
279
0
        AppendCCRange(t_, i->lo, i->hi);
280
0
      if (cc != re->cc())
281
0
        cc->Delete();
282
0
      t_->append("]");
283
0
      break;
284
0
    }
285
286
0
    case kRegexpCapture:
287
0
      t_->append(")");
288
0
      break;
289
290
0
    case kRegexpHaveMatch:
291
      // There's no syntax accepted by the parser to generate
292
      // this node (it is generated by RE2::Set) so make something
293
      // up that is readable but won't compile.
294
0
      t_->append(absl::StrFormat("(?HaveMatch:%d)", re->match_id()));
295
0
      break;
296
0
  }
297
298
  // If the parent is an alternation, append the | for it.
299
0
  if (prec == PrecAlternate)
300
0
    t_->append("|");
301
302
0
  return 0;
303
0
}
304
305
// Appends a rune for use in a character class to the string t.
306
0
static void AppendCCChar(std::string* t, Rune r) {
307
0
  if (0x20 <= r && r <= 0x7E) {
308
0
    if (strchr("[]^-\\", r))
309
0
      t->append("\\");
310
0
    t->append(1, static_cast<char>(r));
311
0
    return;
312
0
  }
313
0
  switch (r) {
314
0
    default:
315
0
      break;
316
317
0
    case '\r':
318
0
      t->append("\\r");
319
0
      return;
320
321
0
    case '\t':
322
0
      t->append("\\t");
323
0
      return;
324
325
0
    case '\n':
326
0
      t->append("\\n");
327
0
      return;
328
329
0
    case '\f':
330
0
      t->append("\\f");
331
0
      return;
332
0
  }
333
334
0
  if (r < 0x100) {
335
0
    *t += absl::StrFormat("\\x%02x", static_cast<int>(r));
336
0
    return;
337
0
  }
338
0
  *t += absl::StrFormat("\\x{%x}", static_cast<int>(r));
339
0
}
340
341
0
static void AppendCCRange(std::string* t, Rune lo, Rune hi) {
342
0
  if (lo > hi)
343
0
    return;
344
0
  AppendCCChar(t, lo);
345
0
  if (lo < hi) {
346
0
    t->append("-");
347
0
    AppendCCChar(t, hi);
348
0
  }
349
0
}
350
351
}  // namespace re2