Coverage Report

Created: 2025-11-11 07:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/re2/re2/re2.cc
Line
Count
Source
1
// Copyright 2003-2009 The RE2 Authors.  All Rights Reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
// Regular expression interface RE2.
6
//
7
// Originally the PCRE C++ wrapper, but adapted to use
8
// the new automata-based regular expression engines.
9
10
#include "re2/re2.h"
11
12
#include <errno.h>
13
#include <stddef.h>
14
#include <stdint.h>
15
#include <stdlib.h>
16
#include <string.h>
17
18
#include <algorithm>
19
#include <atomic>
20
#include <map>
21
#include <string>
22
#include <utility>
23
#include <vector>
24
25
#include "absl/base/call_once.h"
26
#include "absl/base/macros.h"
27
#include "absl/container/fixed_array.h"
28
#include "absl/log/absl_check.h"
29
#include "absl/log/absl_log.h"
30
#include "absl/strings/ascii.h"
31
#include "absl/strings/str_format.h"
32
#include "absl/strings/string_view.h"
33
#include "re2/prog.h"
34
#include "re2/regexp.h"
35
#include "re2/sparse_array.h"
36
#include "util/strutil.h"
37
#include "util/utf.h"
38
39
#ifdef _MSC_VER
40
#include <intrin.h>
41
#endif
42
43
namespace re2 {
44
45
// Controls the maximum count permitted by GlobalReplace(); -1 is unlimited.
46
static int maximum_global_replace_count = -1;
47
48
12.2k
void RE2::FUZZING_ONLY_set_maximum_global_replace_count(int i) {
49
12.2k
  maximum_global_replace_count = i;
50
12.2k
}
51
52
// Maximum number of args we can set
53
static const int kMaxArgs = 16;
54
static const int kVecSize = 1+kMaxArgs;
55
56
const int RE2::Options::kDefaultMaxMem;  // initialized in re2.h
57
58
RE2::Options::Options(RE2::CannedOptions opt)
59
0
  : max_mem_(kDefaultMaxMem),
60
0
    encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
61
0
    posix_syntax_(opt == RE2::POSIX),
62
0
    longest_match_(opt == RE2::POSIX),
63
0
    log_errors_(opt != RE2::Quiet),
64
0
    literal_(false),
65
0
    never_nl_(false),
66
0
    dot_nl_(false),
67
0
    never_capture_(false),
68
0
    case_sensitive_(true),
69
0
    perl_classes_(false),
70
0
    word_boundary_(false),
71
0
    one_line_(false) {
72
0
}
73
74
// Empty objects for use as const references.
75
// Statically allocating the storage and then
76
// lazily constructing the objects (in a once
77
// in RE2::Init()) avoids global constructors
78
// and the false positives (thanks, Valgrind)
79
// about memory leaks at program termination.
80
struct EmptyStorage {
81
  std::string empty_string;
82
  std::map<std::string, int> empty_named_groups;
83
  std::map<int, std::string> empty_group_names;
84
};
85
alignas(EmptyStorage) static char empty_storage[sizeof(EmptyStorage)];
86
87
87.8k
static inline std::string* empty_string() {
88
87.8k
  return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_string;
89
87.8k
}
90
91
31.3k
static inline std::map<std::string, int>* empty_named_groups() {
92
31.3k
  return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_named_groups;
93
31.3k
}
94
95
31.3k
static inline std::map<int, std::string>* empty_group_names() {
96
31.3k
  return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_group_names;
97
31.3k
}
98
99
// Converts from Regexp error code to RE2 error code.
100
// Maybe some day they will diverge.  In any event, this
101
// hides the existence of Regexp from RE2 users.
102
1.57k
static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
103
1.57k
  switch (code) {
104
0
    case re2::kRegexpSuccess:
105
0
      return RE2::NoError;
106
0
    case re2::kRegexpInternalError:
107
0
      return RE2::ErrorInternal;
108
229
    case re2::kRegexpBadEscape:
109
229
      return RE2::ErrorBadEscape;
110
0
    case re2::kRegexpBadCharClass:
111
0
      return RE2::ErrorBadCharClass;
112
182
    case re2::kRegexpBadCharRange:
113
182
      return RE2::ErrorBadCharRange;
114
314
    case re2::kRegexpMissingBracket:
115
314
      return RE2::ErrorMissingBracket;
116
275
    case re2::kRegexpMissingParen:
117
275
      return RE2::ErrorMissingParen;
118
53
    case re2::kRegexpUnexpectedParen:
119
53
      return RE2::ErrorUnexpectedParen;
120
46
    case re2::kRegexpTrailingBackslash:
121
46
      return RE2::ErrorTrailingBackslash;
122
26
    case re2::kRegexpRepeatArgument:
123
26
      return RE2::ErrorRepeatArgument;
124
98
    case re2::kRegexpRepeatSize:
125
98
      return RE2::ErrorRepeatSize;
126
8
    case re2::kRegexpRepeatOp:
127
8
      return RE2::ErrorRepeatOp;
128
51
    case re2::kRegexpBadPerlOp:
129
51
      return RE2::ErrorBadPerlOp;
130
211
    case re2::kRegexpBadUTF8:
131
211
      return RE2::ErrorBadUTF8;
132
84
    case re2::kRegexpBadNamedCapture:
133
84
      return RE2::ErrorBadNamedCapture;
134
1.57k
  }
135
0
  return RE2::ErrorInternal;
136
1.57k
}
137
138
0
static std::string trunc(absl::string_view pattern) {
139
0
  if (pattern.size() < 100)
140
0
    return std::string(pattern);
141
0
  return std::string(pattern.substr(0, 100)) + "...";
142
0
}
143
144
145
0
RE2::RE2(const char* pattern) {
146
  // If absl::string_view becomes an alias for std::string_view,
147
  // it will stop allowing NULL to be converted.
148
  // Handle NULL explicitly to keep callers working no matter what.
149
0
  if (pattern == NULL)
150
0
    pattern = "";
151
0
  Init(pattern, DefaultOptions);
152
0
}
153
154
0
RE2::RE2(const std::string& pattern) {
155
0
  Init(pattern, DefaultOptions);
156
0
}
157
158
0
RE2::RE2(absl::string_view pattern) {
159
0
  Init(pattern, DefaultOptions);
160
0
}
161
162
21.9k
RE2::RE2(absl::string_view pattern, const Options& options) {
163
21.9k
  Init(pattern, options);
164
21.9k
}
165
166
41.2k
int RE2::Options::ParseFlags() const {
167
41.2k
  int flags = Regexp::ClassNL;
168
41.2k
  switch (encoding()) {
169
0
    default:
170
0
      if (log_errors())
171
0
        ABSL_LOG(ERROR) << "Unknown encoding " << encoding();
172
0
      break;
173
20.5k
    case RE2::Options::EncodingUTF8:
174
20.5k
      break;
175
20.7k
    case RE2::Options::EncodingLatin1:
176
20.7k
      flags |= Regexp::Latin1;
177
20.7k
      break;
178
41.2k
  }
179
180
41.2k
  if (!posix_syntax())
181
26.9k
    flags |= Regexp::LikePerl;
182
183
41.2k
  if (literal())
184
1.81k
    flags |= Regexp::Literal;
185
186
41.2k
  if (never_nl())
187
15.7k
    flags |= Regexp::NeverNL;
188
189
41.2k
  if (dot_nl())
190
16.2k
    flags |= Regexp::DotNL;
191
192
41.2k
  if (never_capture())
193
27.2k
    flags |= Regexp::NeverCapture;
194
195
41.2k
  if (!case_sensitive())
196
19.0k
    flags |= Regexp::FoldCase;
197
198
41.2k
  if (perl_classes())
199
17.2k
    flags |= Regexp::PerlClasses;
200
201
41.2k
  if (word_boundary())
202
17.0k
    flags |= Regexp::PerlB;
203
204
41.2k
  if (one_line())
205
17.5k
    flags |= Regexp::OneLine;
206
207
41.2k
  return flags;
208
41.2k
}
209
210
21.9k
void RE2::Init(absl::string_view pattern, const Options& options) {
211
21.9k
  static absl::once_flag empty_once;
212
21.9k
  absl::call_once(empty_once, []() {
213
1
    (void) new (empty_storage) EmptyStorage;
214
1
  });
215
216
21.9k
  pattern_ = new std::string(pattern);
217
21.9k
  options_.Copy(options);
218
21.9k
  entire_regexp_ = NULL;
219
21.9k
  suffix_regexp_ = NULL;
220
21.9k
  error_ = empty_string();
221
21.9k
  error_arg_ = empty_string();
222
223
21.9k
  num_captures_ = -1;
224
21.9k
  error_code_ = NoError;
225
21.9k
  longest_match_ = options_.longest_match();
226
21.9k
  is_one_pass_ = false;
227
21.9k
  prefix_foldcase_ = false;
228
21.9k
  prefix_.clear();
229
21.9k
  prog_ = NULL;
230
231
21.9k
  rprog_ = NULL;
232
21.9k
  named_groups_ = NULL;
233
21.9k
  group_names_ = NULL;
234
235
21.9k
  RegexpStatus status;
236
21.9k
  entire_regexp_ = Regexp::Parse(
237
21.9k
    *pattern_,
238
21.9k
    static_cast<Regexp::ParseFlags>(options_.ParseFlags()),
239
21.9k
    &status);
240
21.9k
  if (entire_regexp_ == NULL) {
241
1.57k
    if (options_.log_errors()) {
242
0
      ABSL_LOG(ERROR) << "Error parsing '" << trunc(*pattern_) << "': "
243
0
                      << status.Text();
244
0
    }
245
1.57k
    error_ = new std::string(status.Text());
246
1.57k
    error_code_ = RegexpErrorToRE2(status.code());
247
1.57k
    error_arg_ = new std::string(status.error_arg());
248
1.57k
    return;
249
1.57k
  }
250
251
20.3k
  bool foldcase;
252
20.3k
  re2::Regexp* suffix;
253
20.3k
  if (entire_regexp_->RequiredPrefix(&prefix_, &foldcase, &suffix)) {
254
450
    prefix_foldcase_ = foldcase;
255
450
    suffix_regexp_ = suffix;
256
450
  }
257
19.9k
  else {
258
19.9k
    suffix_regexp_ = entire_regexp_->Incref();
259
19.9k
  }
260
261
  // Two thirds of the memory goes to the forward Prog,
262
  // one third to the reverse prog, because the forward
263
  // Prog has two DFAs but the reverse prog has one.
264
20.3k
  prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3);
265
20.3k
  if (prog_ == NULL) {
266
0
    if (options_.log_errors())
267
0
      ABSL_LOG(ERROR) << "Error compiling '" << trunc(*pattern_) << "'";
268
0
    error_ = new std::string("pattern too large - compile failed");
269
0
    error_code_ = RE2::ErrorPatternTooLarge;
270
0
    return;
271
0
  }
272
273
  // We used to compute this lazily, but it's used during the
274
  // typical control flow for a match call, so we now compute
275
  // it eagerly, which avoids the overhead of absl::once_flag.
276
20.3k
  num_captures_ = suffix_regexp_->NumCaptures();
277
278
  // Could delay this until the first match call that
279
  // cares about submatch information, but the one-pass
280
  // machine's memory gets cut from the DFA memory budget,
281
  // and that is harder to do if the DFA has already
282
  // been built.
283
20.3k
  is_one_pass_ = prog_->IsOnePass();
284
20.3k
}
285
286
// Returns rprog_, computing it if needed.
287
32.8k
re2::Prog* RE2::ReverseProg() const {
288
32.8k
  absl::call_once(rprog_once_, [](const RE2* re) {
289
10.3k
    re->rprog_ =
290
10.3k
        re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
291
10.3k
    if (re->rprog_ == NULL) {
292
0
      if (re->options_.log_errors())
293
0
        ABSL_LOG(ERROR) << "Error reverse compiling '" << trunc(*re->pattern_)
294
0
                        << "'";
295
      // We no longer touch error_ and error_code_ because failing to compile
296
      // the reverse Prog is not a showstopper: falling back to NFA execution
297
      // is fine. More importantly, an RE2 object is supposed to be logically
298
      // immutable: whatever ok() would have returned after Init() completed,
299
      // it should continue to return that no matter what ReverseProg() does.
300
0
    }
301
10.3k
  }, this);
302
32.8k
  return rprog_;
303
32.8k
}
304
305
21.9k
RE2::~RE2() {
306
21.9k
  if (group_names_ != empty_group_names())
307
12.5k
    delete group_names_;
308
21.9k
  if (named_groups_ != empty_named_groups())
309
12.5k
    delete named_groups_;
310
21.9k
  delete rprog_;
311
21.9k
  delete prog_;
312
21.9k
  if (error_arg_ != empty_string())
313
1.57k
    delete error_arg_;
314
21.9k
  if (error_ != empty_string())
315
1.57k
    delete error_;
316
21.9k
  if (suffix_regexp_)
317
20.3k
    suffix_regexp_->Decref();
318
21.9k
  if (entire_regexp_)
319
20.3k
    entire_regexp_->Decref();
320
21.9k
  delete pattern_;
321
21.9k
}
322
323
9.94k
int RE2::ProgramSize() const {
324
9.94k
  if (prog_ == NULL)
325
0
    return -1;
326
9.94k
  return prog_->size();
327
9.94k
}
328
329
9.77k
int RE2::ReverseProgramSize() const {
330
9.77k
  if (prog_ == NULL)
331
0
    return -1;
332
9.77k
  Prog* prog = ReverseProg();
333
9.77k
  if (prog == NULL)
334
0
    return -1;
335
9.77k
  return prog->size();
336
9.77k
}
337
338
// Finds the most significant non-zero bit in n.
339
3.70M
static int FindMSBSet(uint32_t n) {
340
3.70M
  ABSL_DCHECK_NE(n, uint32_t{0});
341
3.70M
#if defined(__GNUC__)
342
3.70M
  return 31 ^ __builtin_clz(n);
343
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
344
  unsigned long c;
345
  _BitScanReverse(&c, n);
346
  return static_cast<int>(c);
347
#else
348
  int c = 0;
349
  for (int shift = 1 << 4; shift != 0; shift >>= 1) {
350
    uint32_t word = n >> shift;
351
    if (word != 0) {
352
      n = word;
353
      c += shift;
354
    }
355
  }
356
  return c;
357
#endif
358
3.70M
}
359
360
19.4k
static int Fanout(Prog* prog, std::vector<int>* histogram) {
361
19.4k
  SparseArray<int> fanout(prog->size());
362
19.4k
  prog->Fanout(&fanout);
363
19.4k
  int data[32] = {};
364
19.4k
  int size = 0;
365
3.73M
  for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
366
3.72M
    if (i->value() == 0)
367
17.5k
      continue;
368
3.70M
    uint32_t value = i->value();
369
3.70M
    int bucket = FindMSBSet(value);
370
3.70M
    bucket += value & (value-1) ? 1 : 0;
371
3.70M
    ++data[bucket];
372
3.70M
    size = std::max(size, bucket+1);
373
3.70M
  }
374
19.4k
  if (histogram != NULL)
375
19.4k
    histogram->assign(data, data+size);
376
19.4k
  return size-1;
377
19.4k
}
378
379
9.73k
int RE2::ProgramFanout(std::vector<int>* histogram) const {
380
9.73k
  if (prog_ == NULL)
381
0
    return -1;
382
9.73k
  return Fanout(prog_, histogram);
383
9.73k
}
384
385
9.72k
int RE2::ReverseProgramFanout(std::vector<int>* histogram) const {
386
9.72k
  if (prog_ == NULL)
387
0
    return -1;
388
9.72k
  Prog* prog = ReverseProg();
389
9.72k
  if (prog == NULL)
390
0
    return -1;
391
9.72k
  return Fanout(prog, histogram);
392
9.72k
}
393
394
// Returns named_groups_, computing it if needed.
395
9.66k
const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
396
9.66k
  absl::call_once(named_groups_once_, [](const RE2* re) {
397
9.66k
    if (re->suffix_regexp_ != NULL)
398
9.66k
      re->named_groups_ = re->suffix_regexp_->NamedCaptures();
399
9.66k
    if (re->named_groups_ == NULL)
400
9.39k
      re->named_groups_ = empty_named_groups();
401
9.66k
  }, this);
402
9.66k
  return *named_groups_;
403
9.66k
}
404
405
// Returns group_names_, computing it if needed.
406
9.66k
const std::map<int, std::string>& RE2::CapturingGroupNames() const {
407
9.66k
  absl::call_once(group_names_once_, [](const RE2* re) {
408
9.66k
    if (re->suffix_regexp_ != NULL)
409
9.66k
      re->group_names_ = re->suffix_regexp_->CaptureNames();
410
9.66k
    if (re->group_names_ == NULL)
411
9.39k
      re->group_names_ = empty_group_names();
412
9.66k
  }, this);
413
9.66k
  return *group_names_;
414
9.66k
}
415
416
/***** Convenience interfaces *****/
417
418
bool RE2::FullMatchN(absl::string_view text, const RE2& re,
419
9.66k
                     const Arg* const args[], int n) {
420
9.66k
  return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n);
421
9.66k
}
422
423
bool RE2::PartialMatchN(absl::string_view text, const RE2& re,
424
19.3k
                        const Arg* const args[], int n) {
425
19.3k
  return re.DoMatch(text, UNANCHORED, NULL, args, n);
426
19.3k
}
427
428
bool RE2::ConsumeN(absl::string_view* input, const RE2& re,
429
9.66k
                   const Arg* const args[], int n) {
430
9.66k
  size_t consumed;
431
9.66k
  if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) {
432
1.95k
    input->remove_prefix(consumed);
433
1.95k
    return true;
434
7.71k
  } else {
435
7.71k
    return false;
436
7.71k
  }
437
9.66k
}
438
439
bool RE2::FindAndConsumeN(absl::string_view* input, const RE2& re,
440
9.66k
                          const Arg* const args[], int n) {
441
9.66k
  size_t consumed;
442
9.66k
  if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) {
443
2.06k
    input->remove_prefix(consumed);
444
2.06k
    return true;
445
7.59k
  } else {
446
7.59k
    return false;
447
7.59k
  }
448
9.66k
}
449
450
bool RE2::Replace(std::string* str,
451
                  const RE2& re,
452
9.66k
                  absl::string_view rewrite) {
453
9.66k
  absl::string_view vec[kVecSize];
454
9.66k
  int nvec = 1 + MaxSubmatch(rewrite);
455
9.66k
  if (nvec > 1 + re.NumberOfCapturingGroups())
456
0
    return false;
457
9.66k
  if (nvec > static_cast<int>(ABSL_ARRAYSIZE(vec)))
458
0
    return false;
459
9.66k
  if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
460
6.08k
    return false;
461
462
3.58k
  std::string s;
463
3.58k
  if (!re.Rewrite(&s, rewrite, vec, nvec))
464
0
    return false;
465
466
3.58k
  ABSL_DCHECK_GE(vec[0].data(), str->data());
467
3.58k
  ABSL_DCHECK_LE(vec[0].data() + vec[0].size(), str->data() + str->size());
468
3.58k
  str->replace(vec[0].data() - str->data(), vec[0].size(), s);
469
3.58k
  return true;
470
3.58k
}
471
472
int RE2::GlobalReplace(std::string* str,
473
                       const RE2& re,
474
9.66k
                       absl::string_view rewrite) {
475
9.66k
  absl::string_view vec[kVecSize];
476
9.66k
  int nvec = 1 + MaxSubmatch(rewrite);
477
9.66k
  if (nvec > 1 + re.NumberOfCapturingGroups())
478
0
    return false;
479
9.66k
  if (nvec > static_cast<int>(ABSL_ARRAYSIZE(vec)))
480
0
    return false;
481
482
9.66k
  const char* p = str->data();
483
9.66k
  const char* ep = p + str->size();
484
9.66k
  const char* lastend = NULL;
485
9.66k
  std::string out;
486
9.66k
  int count = 0;
487
13.2k
  while (p <= ep) {
488
13.2k
    if (maximum_global_replace_count != -1 &&
489
13.2k
        count >= maximum_global_replace_count)
490
3.58k
      break;
491
9.66k
    if (!re.Match(*str, static_cast<size_t>(p - str->data()),
492
9.66k
                  str->size(), UNANCHORED, vec, nvec))
493
6.08k
      break;
494
3.58k
    if (p < vec[0].data())
495
179
      out.append(p, vec[0].data() - p);
496
3.58k
    if (vec[0].data() == lastend && vec[0].empty()) {
497
      // Disallow empty match at end of last match: skip ahead.
498
      //
499
      // fullrune() takes int, not ptrdiff_t. However, it just looks
500
      // at the leading byte and treats any length >= 4 the same.
501
0
      if (re.options().encoding() == RE2::Options::EncodingUTF8 &&
502
0
          fullrune(p, static_cast<int>(std::min(ptrdiff_t{4}, ep - p)))) {
503
        // re is in UTF-8 mode and there is enough left of str
504
        // to allow us to advance by up to UTFmax bytes.
505
0
        Rune r;
506
0
        int n = chartorune(&r, p);
507
        // Some copies of chartorune have a bug that accepts
508
        // encodings of values in (10FFFF, 1FFFFF] as valid.
509
0
        if (r > Runemax) {
510
0
          n = 1;
511
0
          r = Runeerror;
512
0
        }
513
0
        if (!(n == 1 && r == Runeerror)) {  // no decoding error
514
0
          out.append(p, n);
515
0
          p += n;
516
0
          continue;
517
0
        }
518
0
      }
519
      // Most likely, re is in Latin-1 mode. If it is in UTF-8 mode,
520
      // we fell through from above and the GIGO principle applies.
521
0
      if (p < ep)
522
0
        out.append(p, 1);
523
0
      p++;
524
0
      continue;
525
0
    }
526
3.58k
    re.Rewrite(&out, rewrite, vec, nvec);
527
3.58k
    p = vec[0].data() + vec[0].size();
528
3.58k
    lastend = p;
529
3.58k
    count++;
530
3.58k
  }
531
532
9.66k
  if (count == 0)
533
6.08k
    return 0;
534
535
3.58k
  if (p < ep)
536
547
    out.append(p, ep - p);
537
3.58k
  using std::swap;
538
3.58k
  swap(out, *str);
539
3.58k
  return count;
540
9.66k
}
541
542
bool RE2::Extract(absl::string_view text,
543
                  const RE2& re,
544
                  absl::string_view rewrite,
545
0
                  std::string* out) {
546
0
  absl::string_view vec[kVecSize];
547
0
  int nvec = 1 + MaxSubmatch(rewrite);
548
0
  if (nvec > 1 + re.NumberOfCapturingGroups())
549
0
    return false;
550
0
  if (nvec > static_cast<int>(ABSL_ARRAYSIZE(vec)))
551
0
    return false;
552
0
  if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
553
0
    return false;
554
555
0
  out->clear();
556
0
  return re.Rewrite(out, rewrite, vec, nvec);
557
0
}
558
559
9.66k
std::string RE2::QuoteMeta(absl::string_view unquoted) {
560
9.66k
  std::string result;
561
9.66k
  result.reserve(unquoted.size() << 1);
562
563
  // Escape any ascii character not in [A-Za-z_0-9].
564
  //
565
  // Note that it's legal to escape a character even if it has no
566
  // special meaning in a regular expression -- so this function does
567
  // that.  (This also makes it identical to the perl function of the
568
  // same name except for the null-character special case;
569
  // see `perldoc -f quotemeta`.)
570
444k
  for (size_t ii = 0; ii < unquoted.size(); ++ii) {
571
    // Note that using 'isalnum' here raises the benchmark time from
572
    // 32ns to 58ns:
573
434k
    if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
574
420k
        (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
575
407k
        (unquoted[ii] < '0' || unquoted[ii] > '9') &&
576
377k
        unquoted[ii] != '_' &&
577
        // If this is the part of a UTF8 or Latin1 character, we need
578
        // to copy this byte without escaping.  Experimentally this is
579
        // what works correctly with the regexp library.
580
376k
        !(unquoted[ii] & 128)) {
581
298k
      if (unquoted[ii] == '\0') {  // Special handling for null chars.
582
        // Note that this special handling is not strictly required for RE2,
583
        // but this quoting is required for other regexp libraries such as
584
        // PCRE.
585
        // Can't use "\\0" since the next character might be a digit.
586
18.7k
        result += "\\x00";
587
18.7k
        continue;
588
18.7k
      }
589
279k
      result += '\\';
590
279k
    }
591
415k
    result += unquoted[ii];
592
415k
  }
593
594
9.66k
  return result;
595
9.66k
}
596
597
bool RE2::PossibleMatchRange(std::string* min, std::string* max,
598
9.66k
                             int maxlen) const {
599
9.66k
  if (prog_ == NULL)
600
0
    return false;
601
602
9.66k
  int n = static_cast<int>(prefix_.size());
603
9.66k
  if (n > maxlen)
604
13
    n = maxlen;
605
606
  // Determine initial min max from prefix_ literal.
607
9.66k
  *min = prefix_.substr(0, n);
608
9.66k
  *max = prefix_.substr(0, n);
609
9.66k
  if (prefix_foldcase_) {
610
    // prefix is ASCII lowercase; change *min to uppercase.
611
394
    for (int i = 0; i < n; i++) {
612
276
      char& c = (*min)[i];
613
276
      if ('a' <= c && c <= 'z')
614
60
        c += 'A' - 'a';
615
276
    }
616
118
  }
617
618
  // Add to prefix min max using PossibleMatchRange on regexp.
619
9.66k
  std::string dmin, dmax;
620
9.66k
  maxlen -= n;
621
9.66k
  if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) {
622
8.89k
    min->append(dmin);
623
8.89k
    max->append(dmax);
624
8.89k
  } else if (!max->empty()) {
625
    // prog_->PossibleMatchRange has failed us,
626
    // but we still have useful information from prefix_.
627
    // Round up *max to allow any possible suffix.
628
21
    PrefixSuccessor(max);
629
749
  } else {
630
    // Nothing useful.
631
749
    *min = "";
632
749
    *max = "";
633
749
    return false;
634
749
  }
635
636
8.91k
  return true;
637
9.66k
}
638
639
// Avoid possible locale nonsense in standard strcasecmp.
640
// The string a is known to be all lowercase.
641
343
static int ascii_strcasecmp(const char* a, const char* b, size_t len) {
642
343
  const char* ae = a + len;
643
644
896
  for (; a < ae; a++, b++) {
645
707
    uint8_t x = *a;
646
707
    uint8_t y = *b;
647
707
    if ('A' <= y && y <= 'Z')
648
112
      y += 'a' - 'A';
649
707
    if (x != y)
650
154
      return x - y;
651
707
  }
652
189
  return 0;
653
343
}
654
655
656
/***** Actual matching and rewriting code *****/
657
658
bool RE2::Match(absl::string_view text,
659
                size_t startpos,
660
                size_t endpos,
661
                Anchor re_anchor,
662
                absl::string_view* submatch,
663
67.6k
                int nsubmatch) const {
664
67.6k
  if (!ok()) {
665
0
    if (options_.log_errors())
666
0
      ABSL_LOG(ERROR) << "Invalid RE2: " << *error_;
667
0
    return false;
668
0
  }
669
670
67.6k
  if (startpos > endpos || endpos > text.size()) {
671
0
    if (options_.log_errors())
672
0
      ABSL_LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
673
0
                      << "startpos: " << startpos << ", "
674
0
                      << "endpos: " << endpos << ", "
675
0
                      << "text size: " << text.size() << "]";
676
0
    return false;
677
0
  }
678
679
67.6k
  absl::string_view subtext = text;
680
67.6k
  subtext.remove_prefix(startpos);
681
67.6k
  subtext.remove_suffix(text.size() - endpos);
682
683
  // Use DFAs to find exact location of match, filter out non-matches.
684
685
  // Don't ask for the location if we won't use it.
686
  // SearchDFA can do extra optimizations in that case.
687
67.6k
  absl::string_view match;
688
67.6k
  absl::string_view* matchp = &match;
689
67.6k
  if (nsubmatch == 0)
690
22.5k
    matchp = NULL;
691
692
67.6k
  int ncap = 1 + NumberOfCapturingGroups();
693
67.6k
  if (ncap > nsubmatch)
694
32.6k
    ncap = nsubmatch;
695
696
  // If the regexp is anchored explicitly, must not be in middle of text.
697
67.6k
  if (prog_->anchor_start() && startpos != 0)
698
0
    return false;
699
67.6k
  if (prog_->anchor_end() && endpos != text.size())
700
0
    return false;
701
702
  // If the regexp is anchored explicitly, update re_anchor
703
  // so that we can potentially fall into a faster case below.
704
67.6k
  if (prog_->anchor_start() && prog_->anchor_end())
705
298
    re_anchor = ANCHOR_BOTH;
706
67.3k
  else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH)
707
2.29k
    re_anchor = ANCHOR_START;
708
709
  // Check for the required prefix, if any.
710
67.6k
  size_t prefixlen = 0;
711
67.6k
  if (!prefix_.empty()) {
712
1.44k
    if (startpos != 0)
713
0
      return false;
714
1.44k
    prefixlen = prefix_.size();
715
1.44k
    if (prefixlen > subtext.size())
716
826
      return false;
717
620
    if (prefix_foldcase_) {
718
343
      if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0)
719
154
        return false;
720
343
    } else {
721
277
      if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0)
722
147
        return false;
723
277
    }
724
319
    subtext.remove_prefix(prefixlen);
725
    // If there is a required prefix, the anchor must be at least ANCHOR_START.
726
319
    if (re_anchor != ANCHOR_BOTH)
727
262
      re_anchor = ANCHOR_START;
728
319
  }
729
730
66.4k
  Prog::Anchor anchor = Prog::kUnanchored;
731
66.4k
  Prog::MatchKind kind =
732
66.4k
      longest_match_ ? Prog::kLongestMatch : Prog::kFirstMatch;
733
734
66.4k
  bool can_one_pass = is_one_pass_ && ncap <= Prog::kMaxOnePassCapture;
735
66.4k
  bool can_bit_state = prog_->CanBitState();
736
66.4k
  size_t bit_state_text_max_size = prog_->bit_state_text_max_size();
737
738
66.4k
#ifdef RE2_HAVE_THREAD_LOCAL
739
66.4k
  hooks::context = this;
740
66.4k
#endif
741
66.4k
  bool dfa_failed = false;
742
66.4k
  bool skipped_test = false;
743
66.4k
  switch (re_anchor) {
744
0
    default:
745
0
      ABSL_LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
746
0
      return false;
747
748
45.2k
    case UNANCHORED: {
749
45.2k
      if (prog_->anchor_end()) {
750
        // This is a very special case: we don't need the forward DFA because
751
        // we already know where the match must end! Instead, the reverse DFA
752
        // can say whether there is a match and (optionally) where it starts.
753
2.73k
        Prog* prog = ReverseProg();
754
2.73k
        if (prog == NULL) {
755
          // Fall back to NFA below.
756
0
          skipped_test = true;
757
0
          break;
758
0
        }
759
2.73k
        if (!prog->SearchDFA(subtext, text, Prog::kAnchored,
760
2.73k
                             Prog::kLongestMatch, matchp, &dfa_failed, NULL)) {
761
1.75k
          if (dfa_failed) {
762
0
            if (options_.log_errors())
763
0
              ABSL_LOG(ERROR) << "DFA out of memory: "
764
0
                              << "pattern length " << pattern_->size() << ", "
765
0
                              << "program size " << prog->size() << ", "
766
0
                              << "list count " << prog->list_count() << ", "
767
0
                              << "bytemap range " << prog->bytemap_range();
768
            // Fall back to NFA below.
769
0
            skipped_test = true;
770
0
            break;
771
0
          }
772
1.75k
          return false;
773
1.75k
        }
774
985
        if (matchp == NULL)  // Matched.  Don't care where.
775
286
          return true;
776
699
        break;
777
985
      }
778
779
42.4k
      if (!prog_->SearchDFA(subtext, text, anchor, kind,
780
42.4k
                            matchp, &dfa_failed, NULL)) {
781
27.1k
        if (dfa_failed) {
782
0
          if (options_.log_errors())
783
0
            ABSL_LOG(ERROR) << "DFA out of memory: "
784
0
                            << "pattern length " << pattern_->size() << ", "
785
0
                            << "program size " << prog_->size() << ", "
786
0
                            << "list count " << prog_->list_count() << ", "
787
0
                            << "bytemap range " << prog_->bytemap_range();
788
          // Fall back to NFA below.
789
0
          skipped_test = true;
790
0
          break;
791
0
        }
792
27.1k
        return false;
793
27.1k
      }
794
15.3k
      if (matchp == NULL)  // Matched.  Don't care where.
795
4.74k
        return true;
796
      // SearchDFA set match.end() but didn't know where the
797
      // match started.  Run the regexp backward from match.end()
798
      // to find the longest possible match -- that's where it started.
799
10.6k
      Prog* prog = ReverseProg();
800
10.6k
      if (prog == NULL) {
801
        // Fall back to NFA below.
802
0
        skipped_test = true;
803
0
        break;
804
0
      }
805
10.6k
      if (!prog->SearchDFA(match, text, Prog::kAnchored,
806
10.6k
                           Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
807
0
        if (dfa_failed) {
808
0
          if (options_.log_errors())
809
0
            ABSL_LOG(ERROR) << "DFA out of memory: "
810
0
                            << "pattern length " << pattern_->size() << ", "
811
0
                            << "program size " << prog->size() << ", "
812
0
                            << "list count " << prog->list_count() << ", "
813
0
                            << "bytemap range " << prog->bytemap_range();
814
          // Fall back to NFA below.
815
0
          skipped_test = true;
816
0
          break;
817
0
        }
818
0
        if (options_.log_errors())
819
0
          ABSL_LOG(ERROR) << "SearchDFA inconsistency";
820
0
        return false;
821
0
      }
822
10.6k
      break;
823
10.6k
    }
824
825
10.6k
    case ANCHOR_BOTH:
826
21.2k
    case ANCHOR_START:
827
21.2k
      if (re_anchor == ANCHOR_BOTH)
828
9.74k
        kind = Prog::kFullMatch;
829
21.2k
      anchor = Prog::kAnchored;
830
831
      // If only a small amount of text and need submatch
832
      // information anyway and we're going to use OnePass or BitState
833
      // to get it, we might as well not even bother with the DFA:
834
      // OnePass or BitState will be fast enough.
835
      // On tiny texts, OnePass outruns even the DFA, and
836
      // it doesn't have the shared state and occasional mutex that
837
      // the DFA does.
838
21.2k
      if (can_one_pass && text.size() <= 4096 &&
839
14.2k
          (ncap > 1 || text.size() <= 16)) {
840
13.8k
        skipped_test = true;
841
13.8k
        break;
842
13.8k
      }
843
7.39k
      if (can_bit_state && text.size() <= bit_state_text_max_size &&
844
5.45k
          ncap > 1) {
845
2.33k
        skipped_test = true;
846
2.33k
        break;
847
2.33k
      }
848
5.05k
      if (!prog_->SearchDFA(subtext, text, anchor, kind,
849
5.05k
                            &match, &dfa_failed, NULL)) {
850
2.63k
        if (dfa_failed) {
851
0
          if (options_.log_errors())
852
0
            ABSL_LOG(ERROR) << "DFA out of memory: "
853
0
                            << "pattern length " << pattern_->size() << ", "
854
0
                            << "program size " << prog_->size() << ", "
855
0
                            << "list count " << prog_->list_count() << ", "
856
0
                            << "bytemap range " << prog_->bytemap_range();
857
          // Fall back to NFA below.
858
0
          skipped_test = true;
859
0
          break;
860
0
        }
861
2.63k
        return false;
862
2.63k
      }
863
2.41k
      break;
864
66.4k
  }
865
866
29.9k
  if (!skipped_test && ncap <= 1) {
867
    // We know exactly where it matches.  That's enough.
868
9.96k
    if (ncap == 1)
869
9.22k
      submatch[0] = match;
870
19.9k
  } else {
871
19.9k
    absl::string_view subtext1;
872
19.9k
    if (skipped_test) {
873
      // DFA ran out of memory or was skipped:
874
      // need to search in entire original text.
875
16.2k
      subtext1 = subtext;
876
16.2k
    } else {
877
      // DFA found the exact match location:
878
      // let NFA run an anchored, full match search
879
      // to find submatch locations.
880
3.77k
      subtext1 = match;
881
3.77k
      anchor = Prog::kAnchored;
882
3.77k
      kind = Prog::kFullMatch;
883
3.77k
    }
884
885
19.9k
    if (can_one_pass && anchor != Prog::kUnanchored) {
886
15.1k
      if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) {
887
9.55k
        if (!skipped_test && options_.log_errors())
888
9.55k
          ABSL_LOG(ERROR) << "SearchOnePass inconsistency";
889
9.55k
        return false;
890
9.55k
      }
891
15.1k
    } else if (can_bit_state && subtext1.size() <= bit_state_text_max_size) {
892
3.41k
      if (!prog_->SearchBitState(subtext1, text, anchor,
893
3.41k
                                 kind, submatch, ncap)) {
894
1.12k
        if (!skipped_test && options_.log_errors())
895
1.12k
          ABSL_LOG(ERROR) << "SearchBitState inconsistency";
896
1.12k
        return false;
897
1.12k
      }
898
3.41k
    } else {
899
1.47k
      if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) {
900
0
        if (!skipped_test && options_.log_errors())
901
0
          ABSL_LOG(ERROR) << "SearchNFA inconsistency";
902
0
        return false;
903
0
      }
904
1.47k
    }
905
19.9k
  }
906
907
  // Adjust overall match for required prefix that we stripped off.
908
19.2k
  if (prefixlen > 0 && nsubmatch > 0)
909
126
    submatch[0] = absl::string_view(submatch[0].data() - prefixlen,
910
126
                                    submatch[0].size() + prefixlen);
911
912
  // Zero submatches that don't exist in the regexp.
913
19.2k
  for (int i = ncap; i < nsubmatch; i++)
914
0
    submatch[i] = absl::string_view();
915
19.2k
  return true;
916
29.9k
}
917
918
// Internal matcher - like Match() but takes Args not string_views.
919
bool RE2::DoMatch(absl::string_view text,
920
                  Anchor re_anchor,
921
                  size_t* consumed,
922
                  const Arg* const* args,
923
48.2k
                  int n) const {
924
48.2k
  if (!ok()) {
925
0
    if (options_.log_errors())
926
0
      ABSL_LOG(ERROR) << "Invalid RE2: " << *error_;
927
0
    return false;
928
0
  }
929
930
48.2k
  if (NumberOfCapturingGroups() < n) {
931
    // RE has fewer capturing groups than number of Arg pointers passed in.
932
0
    return false;
933
0
  }
934
935
  // Count number of capture groups needed.
936
48.2k
  int nvec;
937
48.2k
  if (n == 0 && consumed == NULL)
938
22.5k
    nvec = 0;
939
25.7k
  else
940
25.7k
    nvec = n+1;
941
942
48.2k
  absl::FixedArray<absl::string_view, kVecSize> vec_storage(nvec);
943
48.2k
  absl::string_view* vec = vec_storage.data();
944
945
48.2k
  if (!Match(text, 0, text.size(), re_anchor, vec, nvec)) {
946
31.1k
    return false;
947
31.1k
  }
948
949
17.1k
  if (consumed != NULL)
950
6.98k
    *consumed = static_cast<size_t>(EndPtr(vec[0]) - BeginPtr(text));
951
952
17.1k
  if (n == 0 || args == NULL) {
953
    // We are not interested in results
954
10.7k
    return true;
955
10.7k
  }
956
957
  // If we got here, we must have matched the whole pattern.
958
6.88k
  for (int i = 0; i < n; i++) {
959
6.37k
    absl::string_view s = vec[i+1];
960
6.37k
    if (!args[i]->Parse(s.data(), s.size())) {
961
      // TODO: Should we indicate what the error was?
962
5.87k
      return false;
963
5.87k
    }
964
6.37k
  }
965
966
502
  return true;
967
6.37k
}
968
969
// Checks that the rewrite string is well-formed with respect to this
970
// regular expression.
971
bool RE2::CheckRewriteString(absl::string_view rewrite,
972
0
                             std::string* error) const {
973
0
  int max_token = -1;
974
0
  for (const char *s = rewrite.data(), *end = s + rewrite.size();
975
0
       s < end; s++) {
976
0
    int c = *s;
977
0
    if (c != '\\') {
978
0
      continue;
979
0
    }
980
0
    if (++s == end) {
981
0
      *error = "Rewrite schema error: '\\' not allowed at end.";
982
0
      return false;
983
0
    }
984
0
    c = *s;
985
0
    if (c == '\\') {
986
0
      continue;
987
0
    }
988
0
    if (!absl::ascii_isdigit(c)) {
989
0
      *error = "Rewrite schema error: "
990
0
               "'\\' must be followed by a digit or '\\'.";
991
0
      return false;
992
0
    }
993
0
    int n = (c - '0');
994
0
    if (max_token < n) {
995
0
      max_token = n;
996
0
    }
997
0
  }
998
999
0
  if (max_token > NumberOfCapturingGroups()) {
1000
0
    *error = absl::StrFormat(
1001
0
        "Rewrite schema requests %d matches, but the regexp only has %d "
1002
0
        "parenthesized subexpressions.",
1003
0
        max_token, NumberOfCapturingGroups());
1004
0
    return false;
1005
0
  }
1006
0
  return true;
1007
0
}
1008
1009
// Returns the maximum submatch needed for the rewrite to be done by Replace().
1010
// E.g. if rewrite == "foo \\2,\\1", returns 2.
1011
19.3k
int RE2::MaxSubmatch(absl::string_view rewrite) {
1012
19.3k
  int max = 0;
1013
19.3k
  for (const char *s = rewrite.data(), *end = s + rewrite.size();
1014
19.3k
       s < end; s++) {
1015
0
    if (*s == '\\') {
1016
0
      s++;
1017
0
      int c = (s < end) ? *s : -1;
1018
0
      if (absl::ascii_isdigit(c)) {
1019
0
        int n = (c - '0');
1020
0
        if (n > max)
1021
0
          max = n;
1022
0
      }
1023
0
    }
1024
0
  }
1025
19.3k
  return max;
1026
19.3k
}
1027
1028
// Append the "rewrite" string, with backslash substitutions from "vec",
1029
// to string "out".
1030
bool RE2::Rewrite(std::string* out,
1031
                  absl::string_view rewrite,
1032
                  const absl::string_view* vec,
1033
7.16k
                  int veclen) const {
1034
7.16k
  for (const char *s = rewrite.data(), *end = s + rewrite.size();
1035
7.16k
       s < end; s++) {
1036
0
    if (*s != '\\') {
1037
0
      out->push_back(*s);
1038
0
      continue;
1039
0
    }
1040
0
    s++;
1041
0
    int c = (s < end) ? *s : -1;
1042
0
    if (absl::ascii_isdigit(c)) {
1043
0
      int n = (c - '0');
1044
0
      if (n >= veclen) {
1045
0
        if (options_.log_errors()) {
1046
0
          ABSL_LOG(ERROR) << "invalid substitution \\" << n
1047
0
                          << " from " << veclen << " groups";
1048
0
        }
1049
0
        return false;
1050
0
      }
1051
0
      absl::string_view snip = vec[n];
1052
0
      if (!snip.empty())
1053
0
        out->append(snip.data(), snip.size());
1054
0
    } else if (c == '\\') {
1055
0
      out->push_back('\\');
1056
0
    } else {
1057
0
      if (options_.log_errors())
1058
0
        ABSL_LOG(ERROR) << "invalid rewrite pattern: " << rewrite;
1059
0
      return false;
1060
0
    }
1061
0
  }
1062
7.16k
  return true;
1063
7.16k
}
1064
1065
/***** Parsers for various types *****/
1066
1067
namespace re2_internal {
1068
1069
template <>
1070
0
bool Parse(const char* str, size_t n, void* dest) {
1071
  // We fail if somebody asked us to store into a non-NULL void* pointer
1072
0
  return (dest == NULL);
1073
0
}
1074
1075
template <>
1076
0
bool Parse(const char* str, size_t n, std::string* dest) {
1077
0
  if (dest == NULL) return true;
1078
0
  dest->assign(str, n);
1079
0
  return true;
1080
0
}
1081
1082
template <>
1083
0
bool Parse(const char* str, size_t n, absl::string_view* dest) {
1084
0
  if (dest == NULL) return true;
1085
0
  *dest = absl::string_view(str, n);
1086
0
  return true;
1087
0
}
1088
1089
template <>
1090
0
bool Parse(const char* str, size_t n, char* dest) {
1091
0
  if (n != 1) return false;
1092
0
  if (dest == NULL) return true;
1093
0
  *dest = str[0];
1094
0
  return true;
1095
0
}
1096
1097
template <>
1098
0
bool Parse(const char* str, size_t n, signed char* dest) {
1099
0
  if (n != 1) return false;
1100
0
  if (dest == NULL) return true;
1101
0
  *dest = str[0];
1102
0
  return true;
1103
0
}
1104
1105
template <>
1106
0
bool Parse(const char* str, size_t n, unsigned char* dest) {
1107
0
  if (n != 1) return false;
1108
0
  if (dest == NULL) return true;
1109
0
  *dest = str[0];
1110
0
  return true;
1111
0
}
1112
1113
// Largest number spec that we are willing to parse
1114
static const int kMaxNumberLength = 32;
1115
1116
// REQUIRES "buf" must have length at least nbuf.
1117
// Copies "str" into "buf" and null-terminates.
1118
// Overwrites *np with the new length.
1119
static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
1120
1.61k
                                   size_t* np, bool accept_spaces) {
1121
1.61k
  size_t n = *np;
1122
1.61k
  if (n == 0) return "";
1123
1.61k
  if (n > 0 && absl::ascii_isspace(*str)) {
1124
    // We are less forgiving than the strtoxxx() routines and do not
1125
    // allow leading spaces. We do allow leading spaces for floats.
1126
119
    if (!accept_spaces) {
1127
58
      return "";
1128
58
    }
1129
533
    while (n > 0 && absl::ascii_isspace(*str)) {
1130
472
      n--;
1131
472
      str++;
1132
472
    }
1133
61
  }
1134
1135
  // Although buf has a fixed maximum size, we can still handle
1136
  // arbitrarily large integers correctly by omitting leading zeros.
1137
  // (Numbers that are still too long will be out of range.)
1138
  // Before deciding whether str is too long,
1139
  // remove leading zeros with s/000+/00/.
1140
  // Leaving the leading two zeros in place means that
1141
  // we don't change 0000x123 (invalid) into 0x123 (valid).
1142
  // Skip over leading - before replacing.
1143
1.55k
  bool neg = false;
1144
1.55k
  if (n >= 1 && str[0] == '-') {
1145
369
    neg = true;
1146
369
    n--;
1147
369
    str++;
1148
369
  }
1149
1150
1.55k
  if (n >= 3 && str[0] == '0' && str[1] == '0') {
1151
1.25k
    while (n >= 3 && str[2] == '0') {
1152
1.18k
      n--;
1153
1.18k
      str++;
1154
1.18k
    }
1155
73
  }
1156
1157
1.55k
  if (neg) {  // make room in buf for -
1158
369
    n++;
1159
369
    str--;
1160
369
  }
1161
1162
1.55k
  if (n > nbuf-1) return "";
1163
1164
1.48k
  memmove(buf, str, n);
1165
1.48k
  if (neg) {
1166
348
    buf[0] = '-';
1167
348
  }
1168
1.48k
  buf[n] = '\0';
1169
1.48k
  *np = n;
1170
1.48k
  return buf;
1171
1.55k
}
1172
1173
template <>
1174
1.60k
bool Parse(const char* str, size_t n, float* dest) {
1175
1.60k
  if (n == 0) return false;
1176
384
  static const int kMaxLength = 200;
1177
384
  char buf[kMaxLength+1];
1178
384
  str = TerminateNumber(buf, sizeof buf, str, &n, true);
1179
384
  char* end;
1180
384
  errno = 0;
1181
384
  float r = strtof(str, &end);
1182
384
  if (end != str + n) return false;   // Leftover junk
1183
157
  if (errno) return false;
1184
155
  if (dest == NULL) return true;
1185
155
  *dest = r;
1186
155
  return true;
1187
155
}
1188
1189
template <>
1190
1.67k
bool Parse(const char* str, size_t n, double* dest) {
1191
1.67k
  if (n == 0) return false;
1192
418
  static const int kMaxLength = 200;
1193
418
  char buf[kMaxLength+1];
1194
418
  str = TerminateNumber(buf, sizeof buf, str, &n, true);
1195
418
  char* end;
1196
418
  errno = 0;
1197
418
  double r = strtod(str, &end);
1198
418
  if (end != str + n) return false;   // Leftover junk
1199
159
  if (errno) return false;
1200
158
  if (dest == NULL) return true;
1201
158
  *dest = r;
1202
158
  return true;
1203
158
}
1204
1205
template <>
1206
3.10k
bool Parse(const char* str, size_t n, long* dest, int radix) {
1207
3.10k
  if (n == 0) return false;
1208
812
  char buf[kMaxNumberLength+1];
1209
812
  str = TerminateNumber(buf, sizeof buf, str, &n, false);
1210
812
  char* end;
1211
812
  errno = 0;
1212
812
  long r = strtol(str, &end, radix);
1213
812
  if (end != str + n) return false;   // Leftover junk
1214
275
  if (errno) return false;
1215
272
  if (dest == NULL) return true;
1216
272
  *dest = r;
1217
272
  return true;
1218
272
}
1219
1220
template <>
1221
0
bool Parse(const char* str, size_t n, unsigned long* dest, int radix) {
1222
0
  if (n == 0) return false;
1223
0
  char buf[kMaxNumberLength+1];
1224
0
  str = TerminateNumber(buf, sizeof buf, str, &n, false);
1225
0
  if (str[0] == '-') {
1226
    // strtoul() will silently accept negative numbers and parse
1227
    // them.  This module is more strict and treats them as errors.
1228
0
    return false;
1229
0
  }
1230
1231
0
  char* end;
1232
0
  errno = 0;
1233
0
  unsigned long r = strtoul(str, &end, radix);
1234
0
  if (end != str + n) return false;   // Leftover junk
1235
0
  if (errno) return false;
1236
0
  if (dest == NULL) return true;
1237
0
  *dest = r;
1238
0
  return true;
1239
0
}
1240
1241
template <>
1242
1.42k
bool Parse(const char* str, size_t n, short* dest, int radix) {
1243
1.42k
  long r;
1244
1.42k
  if (!Parse(str, n, &r, radix)) return false;  // Could not parse
1245
136
  if ((short)r != r) return false;              // Out of range
1246
53
  if (dest == NULL) return true;
1247
53
  *dest = (short)r;
1248
53
  return true;
1249
53
}
1250
1251
template <>
1252
0
bool Parse(const char* str, size_t n, unsigned short* dest, int radix) {
1253
0
  unsigned long r;
1254
0
  if (!Parse(str, n, &r, radix)) return false;  // Could not parse
1255
0
  if ((unsigned short)r != r) return false;     // Out of range
1256
0
  if (dest == NULL) return true;
1257
0
  *dest = (unsigned short)r;
1258
0
  return true;
1259
0
}
1260
1261
template <>
1262
0
bool Parse(const char* str, size_t n, int* dest, int radix) {
1263
0
  long r;
1264
0
  if (!Parse(str, n, &r, radix)) return false;  // Could not parse
1265
0
  if ((int)r != r) return false;                // Out of range
1266
0
  if (dest == NULL) return true;
1267
0
  *dest = (int)r;
1268
0
  return true;
1269
0
}
1270
1271
template <>
1272
0
bool Parse(const char* str, size_t n, unsigned int* dest, int radix) {
1273
0
  unsigned long r;
1274
0
  if (!Parse(str, n, &r, radix)) return false;  // Could not parse
1275
0
  if ((unsigned int)r != r) return false;       // Out of range
1276
0
  if (dest == NULL) return true;
1277
0
  *dest = (unsigned int)r;
1278
0
  return true;
1279
0
}
1280
1281
template <>
1282
0
bool Parse(const char* str, size_t n, long long* dest, int radix) {
1283
0
  if (n == 0) return false;
1284
0
  char buf[kMaxNumberLength+1];
1285
0
  str = TerminateNumber(buf, sizeof buf, str, &n, false);
1286
0
  char* end;
1287
0
  errno = 0;
1288
0
  long long r = strtoll(str, &end, radix);
1289
0
  if (end != str + n) return false;   // Leftover junk
1290
0
  if (errno) return false;
1291
0
  if (dest == NULL) return true;
1292
0
  *dest = r;
1293
0
  return true;
1294
0
}
1295
1296
template <>
1297
0
bool Parse(const char* str, size_t n, unsigned long long* dest, int radix) {
1298
0
  if (n == 0) return false;
1299
0
  char buf[kMaxNumberLength+1];
1300
0
  str = TerminateNumber(buf, sizeof buf, str, &n, false);
1301
0
  if (str[0] == '-') {
1302
    // strtoull() will silently accept negative numbers and parse
1303
    // them.  This module is more strict and treats them as errors.
1304
0
    return false;
1305
0
  }
1306
0
  char* end;
1307
0
  errno = 0;
1308
0
  unsigned long long r = strtoull(str, &end, radix);
1309
0
  if (end != str + n) return false;   // Leftover junk
1310
0
  if (errno) return false;
1311
0
  if (dest == NULL) return true;
1312
0
  *dest = r;
1313
0
  return true;
1314
0
}
1315
1316
}  // namespace re2_internal
1317
1318
namespace hooks {
1319
1320
#ifdef RE2_HAVE_THREAD_LOCAL
1321
thread_local const RE2* context = NULL;
1322
#endif
1323
1324
template <typename T>
1325
union Hook {
1326
0
  void Store(T* cb) { cb_.store(cb, std::memory_order_release); }
Unexecuted instantiation: re2::hooks::Hook<void (re2::hooks::DFAStateCacheReset const&)>::Store(void (*)(re2::hooks::DFAStateCacheReset const&))
Unexecuted instantiation: re2::hooks::Hook<void (re2::hooks::DFASearchFailure const&)>::Store(void (*)(re2::hooks::DFASearchFailure const&))
1327
6
  T* Load() const { return cb_.load(std::memory_order_acquire); }
re2::hooks::Hook<void (re2::hooks::DFAStateCacheReset const&)>::Load() const
Line
Count
Source
1327
6
  T* Load() const { return cb_.load(std::memory_order_acquire); }
Unexecuted instantiation: re2::hooks::Hook<void (re2::hooks::DFASearchFailure const&)>::Load() const
1328
1329
#if !defined(__clang__) && defined(_MSC_VER)
1330
  // Citing https://github.com/protocolbuffers/protobuf/pull/4777 as precedent,
1331
  // this is a gross hack to make std::atomic<T*> constant-initialized on MSVC.
1332
  static_assert(ATOMIC_POINTER_LOCK_FREE == 2,
1333
                "std::atomic<T*> must be always lock-free");
1334
  T* cb_for_constinit_;
1335
#endif
1336
1337
  std::atomic<T*> cb_;
1338
};
1339
1340
template <typename T>
1341
6
static void DoNothing(const T&) {}
re2.cc:void re2::hooks::DoNothing<re2::hooks::DFAStateCacheReset>(re2::hooks::DFAStateCacheReset const&)
Line
Count
Source
1341
6
static void DoNothing(const T&) {}
Unexecuted instantiation: re2.cc:void re2::hooks::DoNothing<re2::hooks::DFASearchFailure>(re2::hooks::DFASearchFailure const&)
1342
1343
#define DEFINE_HOOK(type, name)                                       \
1344
  static Hook<type##Callback> name##_hook = {{&DoNothing<type>}};     \
1345
0
  void Set##type##Hook(type##Callback* cb) { name##_hook.Store(cb); } \
Unexecuted instantiation: re2::hooks::SetDFAStateCacheResetHook(void (*)(re2::hooks::DFAStateCacheReset const&))
Unexecuted instantiation: re2::hooks::SetDFASearchFailureHook(void (*)(re2::hooks::DFASearchFailure const&))
1346
6
  type##Callback* Get##type##Hook() { return name##_hook.Load(); }
re2::hooks::GetDFAStateCacheResetHook()
Line
Count
Source
1346
6
  type##Callback* Get##type##Hook() { return name##_hook.Load(); }
Unexecuted instantiation: re2::hooks::GetDFASearchFailureHook()
1347
1348
DEFINE_HOOK(DFAStateCacheReset, dfa_state_cache_reset)
1349
DEFINE_HOOK(DFASearchFailure, dfa_search_failure)
1350
1351
#undef DEFINE_HOOK
1352
1353
}  // namespace hooks
1354
1355
}  // namespace re2