Coverage Report

Created: 2024-08-17 06:49

/src/re2/re2/re2.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2003-2009 The RE2 Authors.  All Rights Reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
// Regular expression interface RE2.
6
//
7
// Originally the PCRE C++ wrapper, but adapted to use
8
// the new automata-based regular expression engines.
9
10
#include "re2/re2.h"
11
12
#include <assert.h>
13
#include <ctype.h>
14
#include <errno.h>
15
#ifdef _MSC_VER
16
#include <intrin.h>
17
#endif
18
#include <stdint.h>
19
#include <stdlib.h>
20
#include <string.h>
21
#include <algorithm>
22
#include <atomic>
23
#include <iterator>
24
#include <mutex>
25
#include <string>
26
#include <utility>
27
#include <vector>
28
29
#include "util/util.h"
30
#include "util/logging.h"
31
#include "util/strutil.h"
32
#include "util/utf.h"
33
#include "re2/prog.h"
34
#include "re2/regexp.h"
35
#include "re2/sparse_array.h"
36
37
namespace re2 {
38
39
// Controls the maximum count permitted by GlobalReplace(); -1 is unlimited.
40
static int maximum_global_replace_count = -1;
41
42
0
void RE2::FUZZING_ONLY_set_maximum_global_replace_count(int i) {
43
0
  maximum_global_replace_count = i;
44
0
}
45
46
// Maximum number of args we can set
47
static const int kMaxArgs = 16;
48
static const int kVecSize = 1+kMaxArgs;
49
50
const int RE2::Options::kDefaultMaxMem;  // initialized in re2.h
51
52
RE2::Options::Options(RE2::CannedOptions opt)
53
  : max_mem_(kDefaultMaxMem),
54
    encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
55
    posix_syntax_(opt == RE2::POSIX),
56
    longest_match_(opt == RE2::POSIX),
57
    log_errors_(opt != RE2::Quiet),
58
    literal_(false),
59
    never_nl_(false),
60
    dot_nl_(false),
61
    never_capture_(false),
62
    case_sensitive_(true),
63
    perl_classes_(false),
64
    word_boundary_(false),
65
0
    one_line_(false) {
66
0
}
67
68
// Empty objects for use as const references.
69
// Statically allocating the storage and then
70
// lazily constructing the objects (in a once
71
// in RE2::Init()) avoids global constructors
72
// and the false positives (thanks, Valgrind)
73
// about memory leaks at program termination.
74
struct EmptyStorage {
75
  std::string empty_string;
76
  std::map<std::string, int> empty_named_groups;
77
  std::map<int, std::string> empty_group_names;
78
};
79
alignas(EmptyStorage) static char empty_storage[sizeof(EmptyStorage)];
80
81
20.7k
static inline std::string* empty_string() {
82
20.7k
  return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_string;
83
20.7k
}
84
85
5.18k
static inline std::map<std::string, int>* empty_named_groups() {
86
5.18k
  return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_named_groups;
87
5.18k
}
88
89
5.18k
static inline std::map<int, std::string>* empty_group_names() {
90
5.18k
  return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_group_names;
91
5.18k
}
92
93
// Converts from Regexp error code to RE2 error code.
94
// Maybe some day they will diverge.  In any event, this
95
// hides the existence of Regexp from RE2 users.
96
1.11k
static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
97
1.11k
  switch (code) {
98
0
    case re2::kRegexpSuccess:
99
0
      return RE2::NoError;
100
0
    case re2::kRegexpInternalError:
101
0
      return RE2::ErrorInternal;
102
113
    case re2::kRegexpBadEscape:
103
113
      return RE2::ErrorBadEscape;
104
0
    case re2::kRegexpBadCharClass:
105
0
      return RE2::ErrorBadCharClass;
106
208
    case re2::kRegexpBadCharRange:
107
208
      return RE2::ErrorBadCharRange;
108
243
    case re2::kRegexpMissingBracket:
109
243
      return RE2::ErrorMissingBracket;
110
134
    case re2::kRegexpMissingParen:
111
134
      return RE2::ErrorMissingParen;
112
60
    case re2::kRegexpUnexpectedParen:
113
60
      return RE2::ErrorUnexpectedParen;
114
11
    case re2::kRegexpTrailingBackslash:
115
11
      return RE2::ErrorTrailingBackslash;
116
14
    case re2::kRegexpRepeatArgument:
117
14
      return RE2::ErrorRepeatArgument;
118
12
    case re2::kRegexpRepeatSize:
119
12
      return RE2::ErrorRepeatSize;
120
14
    case re2::kRegexpRepeatOp:
121
14
      return RE2::ErrorRepeatOp;
122
47
    case re2::kRegexpBadPerlOp:
123
47
      return RE2::ErrorBadPerlOp;
124
259
    case re2::kRegexpBadUTF8:
125
259
      return RE2::ErrorBadUTF8;
126
0
    case re2::kRegexpBadNamedCapture:
127
0
      return RE2::ErrorBadNamedCapture;
128
1.11k
  }
129
0
  return RE2::ErrorInternal;
130
1.11k
}
131
132
0
static std::string trunc(const StringPiece& pattern) {
133
0
  if (pattern.size() < 100)
134
0
    return std::string(pattern);
135
0
  return std::string(pattern.substr(0, 100)) + "...";
136
0
}
137
138
139
0
RE2::RE2(const char* pattern) {
140
0
  Init(pattern, DefaultOptions);
141
0
}
142
143
0
RE2::RE2(const std::string& pattern) {
144
0
  Init(pattern, DefaultOptions);
145
0
}
146
147
0
RE2::RE2(const StringPiece& pattern) {
148
0
  Init(pattern, DefaultOptions);
149
0
}
150
151
5.18k
RE2::RE2(const StringPiece& pattern, const Options& options) {
152
5.18k
  Init(pattern, options);
153
5.18k
}
154
155
5.18k
int RE2::Options::ParseFlags() const {
156
5.18k
  int flags = Regexp::ClassNL;
157
5.18k
  switch (encoding()) {
158
0
    default:
159
0
      if (log_errors())
160
0
        LOG(ERROR) << "Unknown encoding " << encoding();
161
0
      break;
162
1.32k
    case RE2::Options::EncodingUTF8:
163
1.32k
      break;
164
3.85k
    case RE2::Options::EncodingLatin1:
165
3.85k
      flags |= Regexp::Latin1;
166
3.85k
      break;
167
5.18k
  }
168
169
5.18k
  if (!posix_syntax())
170
4.15k
    flags |= Regexp::LikePerl;
171
172
5.18k
  if (literal())
173
202
    flags |= Regexp::Literal;
174
175
5.18k
  if (never_nl())
176
1.39k
    flags |= Regexp::NeverNL;
177
178
5.18k
  if (dot_nl())
179
1.73k
    flags |= Regexp::DotNL;
180
181
5.18k
  if (never_capture())
182
1.51k
    flags |= Regexp::NeverCapture;
183
184
5.18k
  if (!case_sensitive())
185
4.58k
    flags |= Regexp::FoldCase;
186
187
5.18k
  if (perl_classes())
188
0
    flags |= Regexp::PerlClasses;
189
190
5.18k
  if (word_boundary())
191
0
    flags |= Regexp::PerlB;
192
193
5.18k
  if (one_line())
194
0
    flags |= Regexp::OneLine;
195
196
5.18k
  return flags;
197
5.18k
}
198
199
5.18k
void RE2::Init(const StringPiece& pattern, const Options& options) {
200
5.18k
  static std::once_flag empty_once;
201
5.18k
  std::call_once(empty_once, []() {
202
4
    (void) new (empty_storage) EmptyStorage;
203
4
  });
204
205
5.18k
  pattern_ = new std::string(pattern);
206
5.18k
  options_.Copy(options);
207
5.18k
  entire_regexp_ = NULL;
208
5.18k
  suffix_regexp_ = NULL;
209
5.18k
  error_ = empty_string();
210
5.18k
  error_arg_ = empty_string();
211
212
5.18k
  num_captures_ = -1;
213
5.18k
  error_code_ = NoError;
214
5.18k
  longest_match_ = options_.longest_match();
215
5.18k
  is_one_pass_ = false;
216
5.18k
  prefix_foldcase_ = false;
217
5.18k
  prefix_.clear();
218
5.18k
  prog_ = NULL;
219
220
5.18k
  rprog_ = NULL;
221
5.18k
  named_groups_ = NULL;
222
5.18k
  group_names_ = NULL;
223
224
5.18k
  RegexpStatus status;
225
5.18k
  entire_regexp_ = Regexp::Parse(
226
5.18k
    *pattern_,
227
5.18k
    static_cast<Regexp::ParseFlags>(options_.ParseFlags()),
228
5.18k
    &status);
229
5.18k
  if (entire_regexp_ == NULL) {
230
1.11k
    if (options_.log_errors()) {
231
0
      LOG(ERROR) << "Error parsing '" << trunc(*pattern_) << "': "
232
0
                 << status.Text();
233
0
    }
234
1.11k
    error_ = new std::string(status.Text());
235
1.11k
    error_code_ = RegexpErrorToRE2(status.code());
236
1.11k
    error_arg_ = new std::string(status.error_arg());
237
1.11k
    return;
238
1.11k
  }
239
240
4.07k
  bool foldcase;
241
4.07k
  re2::Regexp* suffix;
242
4.07k
  if (entire_regexp_->RequiredPrefix(&prefix_, &foldcase, &suffix)) {
243
85
    prefix_foldcase_ = foldcase;
244
85
    suffix_regexp_ = suffix;
245
85
  }
246
3.98k
  else {
247
3.98k
    suffix_regexp_ = entire_regexp_->Incref();
248
3.98k
  }
249
250
  // Two thirds of the memory goes to the forward Prog,
251
  // one third to the reverse prog, because the forward
252
  // Prog has two DFAs but the reverse prog has one.
253
4.07k
  prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3);
254
4.07k
  if (prog_ == NULL) {
255
0
    if (options_.log_errors())
256
0
      LOG(ERROR) << "Error compiling '" << trunc(*pattern_) << "'";
257
0
    error_ = new std::string("pattern too large - compile failed");
258
0
    error_code_ = RE2::ErrorPatternTooLarge;
259
0
    return;
260
0
  }
261
262
  // We used to compute this lazily, but it's used during the
263
  // typical control flow for a match call, so we now compute
264
  // it eagerly, which avoids the overhead of std::once_flag.
265
4.07k
  num_captures_ = suffix_regexp_->NumCaptures();
266
267
  // Could delay this until the first match call that
268
  // cares about submatch information, but the one-pass
269
  // machine's memory gets cut from the DFA memory budget,
270
  // and that is harder to do if the DFA has already
271
  // been built.
272
4.07k
  is_one_pass_ = prog_->IsOnePass();
273
4.07k
}
274
275
// Returns rprog_, computing it if needed.
276
0
re2::Prog* RE2::ReverseProg() const {
277
0
  std::call_once(rprog_once_, [](const RE2* re) {
278
0
    re->rprog_ =
279
0
        re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
280
0
    if (re->rprog_ == NULL) {
281
0
      if (re->options_.log_errors())
282
0
        LOG(ERROR) << "Error reverse compiling '" << trunc(*re->pattern_)
283
0
                   << "'";
284
      // We no longer touch error_ and error_code_ because failing to compile
285
      // the reverse Prog is not a showstopper: falling back to NFA execution
286
      // is fine. More importantly, an RE2 object is supposed to be logically
287
      // immutable: whatever ok() would have returned after Init() completed,
288
      // it should continue to return that no matter what ReverseProg() does.
289
0
    }
290
0
  }, this);
291
0
  return rprog_;
292
0
}
293
294
5.18k
RE2::~RE2() {
295
5.18k
  if (group_names_ != empty_group_names())
296
5.18k
    delete group_names_;
297
5.18k
  if (named_groups_ != empty_named_groups())
298
5.18k
    delete named_groups_;
299
5.18k
  delete rprog_;
300
5.18k
  delete prog_;
301
5.18k
  if (error_arg_ != empty_string())
302
1.11k
    delete error_arg_;
303
5.18k
  if (error_ != empty_string())
304
1.11k
    delete error_;
305
5.18k
  if (suffix_regexp_)
306
4.07k
    suffix_regexp_->Decref();
307
5.18k
  if (entire_regexp_)
308
4.07k
    entire_regexp_->Decref();
309
5.18k
  delete pattern_;
310
5.18k
}
311
312
0
int RE2::ProgramSize() const {
313
0
  if (prog_ == NULL)
314
0
    return -1;
315
0
  return prog_->size();
316
0
}
317
318
0
int RE2::ReverseProgramSize() const {
319
0
  if (prog_ == NULL)
320
0
    return -1;
321
0
  Prog* prog = ReverseProg();
322
0
  if (prog == NULL)
323
0
    return -1;
324
0
  return prog->size();
325
0
}
326
327
// Finds the most significant non-zero bit in n.
328
0
static int FindMSBSet(uint32_t n) {
329
0
  DCHECK_NE(n, 0);
330
0
#if defined(__GNUC__)
331
0
  return 31 ^ __builtin_clz(n);
332
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
333
  unsigned long c;
334
  _BitScanReverse(&c, n);
335
  return static_cast<int>(c);
336
#else
337
  int c = 0;
338
  for (int shift = 1 << 4; shift != 0; shift >>= 1) {
339
    uint32_t word = n >> shift;
340
    if (word != 0) {
341
      n = word;
342
      c += shift;
343
    }
344
  }
345
  return c;
346
#endif
347
0
}
348
349
0
static int Fanout(Prog* prog, std::vector<int>* histogram) {
350
0
  SparseArray<int> fanout(prog->size());
351
0
  prog->Fanout(&fanout);
352
0
  int data[32] = {};
353
0
  int size = 0;
354
0
  for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
355
0
    if (i->value() == 0)
356
0
      continue;
357
0
    uint32_t value = i->value();
358
0
    int bucket = FindMSBSet(value);
359
0
    bucket += value & (value-1) ? 1 : 0;
360
0
    ++data[bucket];
361
0
    size = std::max(size, bucket+1);
362
0
  }
363
0
  if (histogram != NULL)
364
0
    histogram->assign(data, data+size);
365
0
  return size-1;
366
0
}
367
368
0
int RE2::ProgramFanout(std::vector<int>* histogram) const {
369
0
  if (prog_ == NULL)
370
0
    return -1;
371
0
  return Fanout(prog_, histogram);
372
0
}
373
374
0
int RE2::ReverseProgramFanout(std::vector<int>* histogram) const {
375
0
  if (prog_ == NULL)
376
0
    return -1;
377
0
  Prog* prog = ReverseProg();
378
0
  if (prog == NULL)
379
0
    return -1;
380
0
  return Fanout(prog, histogram);
381
0
}
382
383
// Returns named_groups_, computing it if needed.
384
0
const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
385
0
  std::call_once(named_groups_once_, [](const RE2* re) {
386
0
    if (re->suffix_regexp_ != NULL)
387
0
      re->named_groups_ = re->suffix_regexp_->NamedCaptures();
388
0
    if (re->named_groups_ == NULL)
389
0
      re->named_groups_ = empty_named_groups();
390
0
  }, this);
391
0
  return *named_groups_;
392
0
}
393
394
// Returns group_names_, computing it if needed.
395
0
const std::map<int, std::string>& RE2::CapturingGroupNames() const {
396
0
  std::call_once(group_names_once_, [](const RE2* re) {
397
0
    if (re->suffix_regexp_ != NULL)
398
0
      re->group_names_ = re->suffix_regexp_->CaptureNames();
399
0
    if (re->group_names_ == NULL)
400
0
      re->group_names_ = empty_group_names();
401
0
  }, this);
402
0
  return *group_names_;
403
0
}
404
405
/***** Convenience interfaces *****/
406
407
bool RE2::FullMatchN(const StringPiece& text, const RE2& re,
408
4.07k
                     const Arg* const args[], int n) {
409
4.07k
  return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n);
410
4.07k
}
411
412
bool RE2::PartialMatchN(const StringPiece& text, const RE2& re,
413
0
                        const Arg* const args[], int n) {
414
0
  return re.DoMatch(text, UNANCHORED, NULL, args, n);
415
0
}
416
417
bool RE2::ConsumeN(StringPiece* input, const RE2& re,
418
0
                   const Arg* const args[], int n) {
419
0
  size_t consumed;
420
0
  if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) {
421
0
    input->remove_prefix(consumed);
422
0
    return true;
423
0
  } else {
424
0
    return false;
425
0
  }
426
0
}
427
428
bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re,
429
0
                          const Arg* const args[], int n) {
430
0
  size_t consumed;
431
0
  if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) {
432
0
    input->remove_prefix(consumed);
433
0
    return true;
434
0
  } else {
435
0
    return false;
436
0
  }
437
0
}
438
439
bool RE2::Replace(std::string* str,
440
                  const RE2& re,
441
0
                  const StringPiece& rewrite) {
442
0
  StringPiece vec[kVecSize];
443
0
  int nvec = 1 + MaxSubmatch(rewrite);
444
0
  if (nvec > 1 + re.NumberOfCapturingGroups())
445
0
    return false;
446
0
  if (nvec > static_cast<int>(arraysize(vec)))
447
0
    return false;
448
0
  if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
449
0
    return false;
450
451
0
  std::string s;
452
0
  if (!re.Rewrite(&s, rewrite, vec, nvec))
453
0
    return false;
454
455
0
  assert(vec[0].data() >= str->data());
456
0
  assert(vec[0].data() + vec[0].size() <= str->data() + str->size());
457
0
  str->replace(vec[0].data() - str->data(), vec[0].size(), s);
458
0
  return true;
459
0
}
460
461
int RE2::GlobalReplace(std::string* str,
462
                       const RE2& re,
463
0
                       const StringPiece& rewrite) {
464
0
  StringPiece vec[kVecSize];
465
0
  int nvec = 1 + MaxSubmatch(rewrite);
466
0
  if (nvec > 1 + re.NumberOfCapturingGroups())
467
0
    return false;
468
0
  if (nvec > static_cast<int>(arraysize(vec)))
469
0
    return false;
470
471
0
  const char* p = str->data();
472
0
  const char* ep = p + str->size();
473
0
  const char* lastend = NULL;
474
0
  std::string out;
475
0
  int count = 0;
476
0
  while (p <= ep) {
477
0
    if (maximum_global_replace_count != -1 &&
478
0
        count >= maximum_global_replace_count)
479
0
      break;
480
0
    if (!re.Match(*str, static_cast<size_t>(p - str->data()),
481
0
                  str->size(), UNANCHORED, vec, nvec))
482
0
      break;
483
0
    if (p < vec[0].data())
484
0
      out.append(p, vec[0].data() - p);
485
0
    if (vec[0].data() == lastend && vec[0].empty()) {
486
      // Disallow empty match at end of last match: skip ahead.
487
      //
488
      // fullrune() takes int, not ptrdiff_t. However, it just looks
489
      // at the leading byte and treats any length >= 4 the same.
490
0
      if (re.options().encoding() == RE2::Options::EncodingUTF8 &&
491
0
          fullrune(p, static_cast<int>(std::min(ptrdiff_t{4}, ep - p)))) {
492
        // re is in UTF-8 mode and there is enough left of str
493
        // to allow us to advance by up to UTFmax bytes.
494
0
        Rune r;
495
0
        int n = chartorune(&r, p);
496
        // Some copies of chartorune have a bug that accepts
497
        // encodings of values in (10FFFF, 1FFFFF] as valid.
498
0
        if (r > Runemax) {
499
0
          n = 1;
500
0
          r = Runeerror;
501
0
        }
502
0
        if (!(n == 1 && r == Runeerror)) {  // no decoding error
503
0
          out.append(p, n);
504
0
          p += n;
505
0
          continue;
506
0
        }
507
0
      }
508
      // Most likely, re is in Latin-1 mode. If it is in UTF-8 mode,
509
      // we fell through from above and the GIGO principle applies.
510
0
      if (p < ep)
511
0
        out.append(p, 1);
512
0
      p++;
513
0
      continue;
514
0
    }
515
0
    re.Rewrite(&out, rewrite, vec, nvec);
516
0
    p = vec[0].data() + vec[0].size();
517
0
    lastend = p;
518
0
    count++;
519
0
  }
520
521
0
  if (count == 0)
522
0
    return 0;
523
524
0
  if (p < ep)
525
0
    out.append(p, ep - p);
526
0
  using std::swap;
527
0
  swap(out, *str);
528
0
  return count;
529
0
}
530
531
bool RE2::Extract(const StringPiece& text,
532
                  const RE2& re,
533
                  const StringPiece& rewrite,
534
0
                  std::string* out) {
535
0
  StringPiece vec[kVecSize];
536
0
  int nvec = 1 + MaxSubmatch(rewrite);
537
0
  if (nvec > 1 + re.NumberOfCapturingGroups())
538
0
    return false;
539
0
  if (nvec > static_cast<int>(arraysize(vec)))
540
0
    return false;
541
0
  if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
542
0
    return false;
543
544
0
  out->clear();
545
0
  return re.Rewrite(out, rewrite, vec, nvec);
546
0
}
547
548
0
std::string RE2::QuoteMeta(const StringPiece& unquoted) {
549
0
  std::string result;
550
0
  result.reserve(unquoted.size() << 1);
551
552
  // Escape any ascii character not in [A-Za-z_0-9].
553
  //
554
  // Note that it's legal to escape a character even if it has no
555
  // special meaning in a regular expression -- so this function does
556
  // that.  (This also makes it identical to the perl function of the
557
  // same name except for the null-character special case;
558
  // see `perldoc -f quotemeta`.)
559
0
  for (size_t ii = 0; ii < unquoted.size(); ++ii) {
560
    // Note that using 'isalnum' here raises the benchmark time from
561
    // 32ns to 58ns:
562
0
    if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
563
0
        (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
564
0
        (unquoted[ii] < '0' || unquoted[ii] > '9') &&
565
0
        unquoted[ii] != '_' &&
566
        // If this is the part of a UTF8 or Latin1 character, we need
567
        // to copy this byte without escaping.  Experimentally this is
568
        // what works correctly with the regexp library.
569
0
        !(unquoted[ii] & 128)) {
570
0
      if (unquoted[ii] == '\0') {  // Special handling for null chars.
571
        // Note that this special handling is not strictly required for RE2,
572
        // but this quoting is required for other regexp libraries such as
573
        // PCRE.
574
        // Can't use "\\0" since the next character might be a digit.
575
0
        result += "\\x00";
576
0
        continue;
577
0
      }
578
0
      result += '\\';
579
0
    }
580
0
    result += unquoted[ii];
581
0
  }
582
583
0
  return result;
584
0
}
585
586
bool RE2::PossibleMatchRange(std::string* min, std::string* max,
587
0
                             int maxlen) const {
588
0
  if (prog_ == NULL)
589
0
    return false;
590
591
0
  int n = static_cast<int>(prefix_.size());
592
0
  if (n > maxlen)
593
0
    n = maxlen;
594
595
  // Determine initial min max from prefix_ literal.
596
0
  *min = prefix_.substr(0, n);
597
0
  *max = prefix_.substr(0, n);
598
0
  if (prefix_foldcase_) {
599
    // prefix is ASCII lowercase; change *min to uppercase.
600
0
    for (int i = 0; i < n; i++) {
601
0
      char& c = (*min)[i];
602
0
      if ('a' <= c && c <= 'z')
603
0
        c += 'A' - 'a';
604
0
    }
605
0
  }
606
607
  // Add to prefix min max using PossibleMatchRange on regexp.
608
0
  std::string dmin, dmax;
609
0
  maxlen -= n;
610
0
  if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) {
611
0
    min->append(dmin);
612
0
    max->append(dmax);
613
0
  } else if (!max->empty()) {
614
    // prog_->PossibleMatchRange has failed us,
615
    // but we still have useful information from prefix_.
616
    // Round up *max to allow any possible suffix.
617
0
    PrefixSuccessor(max);
618
0
  } else {
619
    // Nothing useful.
620
0
    *min = "";
621
0
    *max = "";
622
0
    return false;
623
0
  }
624
625
0
  return true;
626
0
}
627
628
// Avoid possible locale nonsense in standard strcasecmp.
629
// The string a is known to be all lowercase.
630
42
static int ascii_strcasecmp(const char* a, const char* b, size_t len) {
631
42
  const char* ae = a + len;
632
633
45
  for (; a < ae; a++, b++) {
634
43
    uint8_t x = *a;
635
43
    uint8_t y = *b;
636
43
    if ('A' <= y && y <= 'Z')
637
0
      y += 'a' - 'A';
638
43
    if (x != y)
639
40
      return x - y;
640
43
  }
641
2
  return 0;
642
42
}
643
644
645
/***** Actual matching and rewriting code *****/
646
647
bool RE2::Match(const StringPiece& text,
648
                size_t startpos,
649
                size_t endpos,
650
                Anchor re_anchor,
651
                StringPiece* submatch,
652
4.07k
                int nsubmatch) const {
653
4.07k
  if (!ok()) {
654
0
    if (options_.log_errors())
655
0
      LOG(ERROR) << "Invalid RE2: " << *error_;
656
0
    return false;
657
0
  }
658
659
4.07k
  if (startpos > endpos || endpos > text.size()) {
660
0
    if (options_.log_errors())
661
0
      LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
662
0
                 << "startpos: " << startpos << ", "
663
0
                 << "endpos: " << endpos << ", "
664
0
                 << "text size: " << text.size() << "]";
665
0
    return false;
666
0
  }
667
668
4.07k
  StringPiece subtext = text;
669
4.07k
  subtext.remove_prefix(startpos);
670
4.07k
  subtext.remove_suffix(text.size() - endpos);
671
672
  // Use DFAs to find exact location of match, filter out non-matches.
673
674
  // Don't ask for the location if we won't use it.
675
  // SearchDFA can do extra optimizations in that case.
676
4.07k
  StringPiece match;
677
4.07k
  StringPiece* matchp = &match;
678
4.07k
  if (nsubmatch == 0)
679
4.07k
    matchp = NULL;
680
681
4.07k
  int ncap = 1 + NumberOfCapturingGroups();
682
4.07k
  if (ncap > nsubmatch)
683
4.07k
    ncap = nsubmatch;
684
685
  // If the regexp is anchored explicitly, must not be in middle of text.
686
4.07k
  if (prog_->anchor_start() && startpos != 0)
687
0
    return false;
688
4.07k
  if (prog_->anchor_end() && endpos != text.size())
689
0
    return false;
690
691
  // If the regexp is anchored explicitly, update re_anchor
692
  // so that we can potentially fall into a faster case below.
693
4.07k
  if (prog_->anchor_start() && prog_->anchor_end())
694
0
    re_anchor = ANCHOR_BOTH;
695
4.07k
  else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH)
696
0
    re_anchor = ANCHOR_START;
697
698
  // Check for the required prefix, if any.
699
4.07k
  size_t prefixlen = 0;
700
4.07k
  if (!prefix_.empty()) {
701
85
    if (startpos != 0)
702
0
      return false;
703
85
    prefixlen = prefix_.size();
704
85
    if (prefixlen > subtext.size())
705
0
      return false;
706
85
    if (prefix_foldcase_) {
707
42
      if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0)
708
40
        return false;
709
43
    } else {
710
43
      if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0)
711
42
        return false;
712
43
    }
713
3
    subtext.remove_prefix(prefixlen);
714
    // If there is a required prefix, the anchor must be at least ANCHOR_START.
715
3
    if (re_anchor != ANCHOR_BOTH)
716
0
      re_anchor = ANCHOR_START;
717
3
  }
718
719
3.99k
  Prog::Anchor anchor = Prog::kUnanchored;
720
3.99k
  Prog::MatchKind kind =
721
3.99k
      longest_match_ ? Prog::kLongestMatch : Prog::kFirstMatch;
722
723
3.99k
  bool can_one_pass = is_one_pass_ && ncap <= Prog::kMaxOnePassCapture;
724
3.99k
  bool can_bit_state = prog_->CanBitState();
725
3.99k
  size_t bit_state_text_max_size = prog_->bit_state_text_max_size();
726
727
3.99k
#ifdef RE2_HAVE_THREAD_LOCAL
728
3.99k
  hooks::context = this;
729
3.99k
#endif
730
3.99k
  bool dfa_failed = false;
731
3.99k
  bool skipped_test = false;
732
3.99k
  switch (re_anchor) {
733
0
    default:
734
0
      LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
735
0
      return false;
736
737
0
    case UNANCHORED: {
738
0
      if (prog_->anchor_end()) {
739
        // This is a very special case: we don't need the forward DFA because
740
        // we already know where the match must end! Instead, the reverse DFA
741
        // can say whether there is a match and (optionally) where it starts.
742
0
        Prog* prog = ReverseProg();
743
0
        if (prog == NULL) {
744
          // Fall back to NFA below.
745
0
          skipped_test = true;
746
0
          break;
747
0
        }
748
0
        if (!prog->SearchDFA(subtext, text, Prog::kAnchored,
749
0
                             Prog::kLongestMatch, matchp, &dfa_failed, NULL)) {
750
0
          if (dfa_failed) {
751
0
            if (options_.log_errors())
752
0
              LOG(ERROR) << "DFA out of memory: "
753
0
                         << "pattern length " << pattern_->size() << ", "
754
0
                         << "program size " << prog->size() << ", "
755
0
                         << "list count " << prog->list_count() << ", "
756
0
                         << "bytemap range " << prog->bytemap_range();
757
            // Fall back to NFA below.
758
0
            skipped_test = true;
759
0
            break;
760
0
          }
761
0
          return false;
762
0
        }
763
0
        if (matchp == NULL)  // Matched.  Don't care where.
764
0
          return true;
765
0
        break;
766
0
      }
767
768
0
      if (!prog_->SearchDFA(subtext, text, anchor, kind,
769
0
                            matchp, &dfa_failed, NULL)) {
770
0
        if (dfa_failed) {
771
0
          if (options_.log_errors())
772
0
            LOG(ERROR) << "DFA out of memory: "
773
0
                       << "pattern length " << pattern_->size() << ", "
774
0
                       << "program size " << prog_->size() << ", "
775
0
                       << "list count " << prog_->list_count() << ", "
776
0
                       << "bytemap range " << prog_->bytemap_range();
777
          // Fall back to NFA below.
778
0
          skipped_test = true;
779
0
          break;
780
0
        }
781
0
        return false;
782
0
      }
783
0
      if (matchp == NULL)  // Matched.  Don't care where.
784
0
        return true;
785
      // SearchDFA set match.end() but didn't know where the
786
      // match started.  Run the regexp backward from match.end()
787
      // to find the longest possible match -- that's where it started.
788
0
      Prog* prog = ReverseProg();
789
0
      if (prog == NULL) {
790
        // Fall back to NFA below.
791
0
        skipped_test = true;
792
0
        break;
793
0
      }
794
0
      if (!prog->SearchDFA(match, text, Prog::kAnchored,
795
0
                           Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
796
0
        if (dfa_failed) {
797
0
          if (options_.log_errors())
798
0
            LOG(ERROR) << "DFA out of memory: "
799
0
                       << "pattern length " << pattern_->size() << ", "
800
0
                       << "program size " << prog->size() << ", "
801
0
                       << "list count " << prog->list_count() << ", "
802
0
                       << "bytemap range " << prog->bytemap_range();
803
          // Fall back to NFA below.
804
0
          skipped_test = true;
805
0
          break;
806
0
        }
807
0
        if (options_.log_errors())
808
0
          LOG(ERROR) << "SearchDFA inconsistency";
809
0
        return false;
810
0
      }
811
0
      break;
812
0
    }
813
814
3.99k
    case ANCHOR_BOTH:
815
3.99k
    case ANCHOR_START:
816
3.99k
      if (re_anchor == ANCHOR_BOTH)
817
3.99k
        kind = Prog::kFullMatch;
818
3.99k
      anchor = Prog::kAnchored;
819
820
      // If only a small amount of text and need submatch
821
      // information anyway and we're going to use OnePass or BitState
822
      // to get it, we might as well not even bother with the DFA:
823
      // OnePass or BitState will be fast enough.
824
      // On tiny texts, OnePass outruns even the DFA, and
825
      // it doesn't have the shared state and occasional mutex that
826
      // the DFA does.
827
3.99k
      if (can_one_pass && text.size() <= 4096 &&
828
3.99k
          (ncap > 1 || text.size() <= 16)) {
829
126
        skipped_test = true;
830
126
        break;
831
126
      }
832
3.86k
      if (can_bit_state && text.size() <= bit_state_text_max_size &&
833
3.86k
          ncap > 1) {
834
0
        skipped_test = true;
835
0
        break;
836
0
      }
837
3.86k
      if (!prog_->SearchDFA(subtext, text, anchor, kind,
838
3.86k
                            &match, &dfa_failed, NULL)) {
839
3.29k
        if (dfa_failed) {
840
181
          if (options_.log_errors())
841
0
            LOG(ERROR) << "DFA out of memory: "
842
0
                       << "pattern length " << pattern_->size() << ", "
843
0
                       << "program size " << prog_->size() << ", "
844
0
                       << "list count " << prog_->list_count() << ", "
845
0
                       << "bytemap range " << prog_->bytemap_range();
846
          // Fall back to NFA below.
847
181
          skipped_test = true;
848
181
          break;
849
181
        }
850
3.11k
        return false;
851
3.29k
      }
852
573
      break;
853
3.99k
  }
854
855
880
  if (!skipped_test && ncap <= 1) {
856
    // We know exactly where it matches.  That's enough.
857
573
    if (ncap == 1)
858
0
      submatch[0] = match;
859
573
  } else {
860
307
    StringPiece subtext1;
861
307
    if (skipped_test) {
862
      // DFA ran out of memory or was skipped:
863
      // need to search in entire original text.
864
307
      subtext1 = subtext;
865
307
    } else {
866
      // DFA found the exact match location:
867
      // let NFA run an anchored, full match search
868
      // to find submatch locations.
869
0
      subtext1 = match;
870
0
      anchor = Prog::kAnchored;
871
0
      kind = Prog::kFullMatch;
872
0
    }
873
874
307
    if (can_one_pass && anchor != Prog::kUnanchored) {
875
154
      if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) {
876
87
        if (!skipped_test && options_.log_errors())
877
0
          LOG(ERROR) << "SearchOnePass inconsistency";
878
87
        return false;
879
87
      }
880
154
    } else if (can_bit_state && subtext1.size() <= bit_state_text_max_size) {
881
0
      if (!prog_->SearchBitState(subtext1, text, anchor,
882
0
                                 kind, submatch, ncap)) {
883
0
        if (!skipped_test && options_.log_errors())
884
0
          LOG(ERROR) << "SearchBitState inconsistency";
885
0
        return false;
886
0
      }
887
153
    } else {
888
153
      if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) {
889
153
        if (!skipped_test && options_.log_errors())
890
0
          LOG(ERROR) << "SearchNFA inconsistency";
891
153
        return false;
892
153
      }
893
153
    }
894
307
  }
895
896
  // Adjust overall match for required prefix that we stripped off.
897
640
  if (prefixlen > 0 && nsubmatch > 0)
898
0
    submatch[0] = StringPiece(submatch[0].data() - prefixlen,
899
0
                              submatch[0].size() + prefixlen);
900
901
  // Zero submatches that don't exist in the regexp.
902
640
  for (int i = ncap; i < nsubmatch; i++)
903
0
    submatch[i] = StringPiece();
904
640
  return true;
905
880
}
906
907
// Internal matcher - like Match() but takes Args not StringPieces.
908
bool RE2::DoMatch(const StringPiece& text,
909
                  Anchor re_anchor,
910
                  size_t* consumed,
911
                  const Arg* const* args,
912
4.07k
                  int n) const {
913
4.07k
  if (!ok()) {
914
0
    if (options_.log_errors())
915
0
      LOG(ERROR) << "Invalid RE2: " << *error_;
916
0
    return false;
917
0
  }
918
919
4.07k
  if (NumberOfCapturingGroups() < n) {
920
    // RE has fewer capturing groups than number of Arg pointers passed in.
921
0
    return false;
922
0
  }
923
924
  // Count number of capture groups needed.
925
4.07k
  int nvec;
926
4.07k
  if (n == 0 && consumed == NULL)
927
4.07k
    nvec = 0;
928
0
  else
929
0
    nvec = n+1;
930
931
4.07k
  StringPiece* vec;
932
4.07k
  StringPiece stkvec[kVecSize];
933
4.07k
  StringPiece* heapvec = NULL;
934
935
4.07k
  if (nvec <= static_cast<int>(arraysize(stkvec))) {
936
4.07k
    vec = stkvec;
937
4.07k
  } else {
938
0
    vec = new StringPiece[nvec];
939
0
    heapvec = vec;
940
0
  }
941
942
4.07k
  if (!Match(text, 0, text.size(), re_anchor, vec, nvec)) {
943
3.43k
    delete[] heapvec;
944
3.43k
    return false;
945
3.43k
  }
946
947
640
  if (consumed != NULL)
948
0
    *consumed = static_cast<size_t>(EndPtr(vec[0]) - BeginPtr(text));
949
950
640
  if (n == 0 || args == NULL) {
951
    // We are not interested in results
952
640
    delete[] heapvec;
953
640
    return true;
954
640
  }
955
956
  // If we got here, we must have matched the whole pattern.
957
0
  for (int i = 0; i < n; i++) {
958
0
    const StringPiece& s = vec[i+1];
959
0
    if (!args[i]->Parse(s.data(), s.size())) {
960
      // TODO: Should we indicate what the error was?
961
0
      delete[] heapvec;
962
0
      return false;
963
0
    }
964
0
  }
965
966
0
  delete[] heapvec;
967
0
  return true;
968
0
}
969
970
// Checks that the rewrite string is well-formed with respect to this
971
// regular expression.
972
bool RE2::CheckRewriteString(const StringPiece& rewrite,
973
0
                             std::string* error) const {
974
0
  int max_token = -1;
975
0
  for (const char *s = rewrite.data(), *end = s + rewrite.size();
976
0
       s < end; s++) {
977
0
    int c = *s;
978
0
    if (c != '\\') {
979
0
      continue;
980
0
    }
981
0
    if (++s == end) {
982
0
      *error = "Rewrite schema error: '\\' not allowed at end.";
983
0
      return false;
984
0
    }
985
0
    c = *s;
986
0
    if (c == '\\') {
987
0
      continue;
988
0
    }
989
0
    if (!isdigit(c)) {
990
0
      *error = "Rewrite schema error: "
991
0
               "'\\' must be followed by a digit or '\\'.";
992
0
      return false;
993
0
    }
994
0
    int n = (c - '0');
995
0
    if (max_token < n) {
996
0
      max_token = n;
997
0
    }
998
0
  }
999
1000
0
  if (max_token > NumberOfCapturingGroups()) {
1001
0
    *error = StringPrintf(
1002
0
        "Rewrite schema requests %d matches, but the regexp only has %d "
1003
0
        "parenthesized subexpressions.",
1004
0
        max_token, NumberOfCapturingGroups());
1005
0
    return false;
1006
0
  }
1007
0
  return true;
1008
0
}
1009
1010
// Returns the maximum submatch needed for the rewrite to be done by Replace().
1011
// E.g. if rewrite == "foo \\2,\\1", returns 2.
1012
0
int RE2::MaxSubmatch(const StringPiece& rewrite) {
1013
0
  int max = 0;
1014
0
  for (const char *s = rewrite.data(), *end = s + rewrite.size();
1015
0
       s < end; s++) {
1016
0
    if (*s == '\\') {
1017
0
      s++;
1018
0
      int c = (s < end) ? *s : -1;
1019
0
      if (isdigit(c)) {
1020
0
        int n = (c - '0');
1021
0
        if (n > max)
1022
0
          max = n;
1023
0
      }
1024
0
    }
1025
0
  }
1026
0
  return max;
1027
0
}
1028
1029
// Append the "rewrite" string, with backslash subsitutions from "vec",
1030
// to string "out".
1031
bool RE2::Rewrite(std::string* out,
1032
                  const StringPiece& rewrite,
1033
                  const StringPiece* vec,
1034
0
                  int veclen) const {
1035
0
  for (const char *s = rewrite.data(), *end = s + rewrite.size();
1036
0
       s < end; s++) {
1037
0
    if (*s != '\\') {
1038
0
      out->push_back(*s);
1039
0
      continue;
1040
0
    }
1041
0
    s++;
1042
0
    int c = (s < end) ? *s : -1;
1043
0
    if (isdigit(c)) {
1044
0
      int n = (c - '0');
1045
0
      if (n >= veclen) {
1046
0
        if (options_.log_errors()) {
1047
0
          LOG(ERROR) << "invalid substitution \\" << n
1048
0
                     << " from " << veclen << " groups";
1049
0
        }
1050
0
        return false;
1051
0
      }
1052
0
      StringPiece snip = vec[n];
1053
0
      if (!snip.empty())
1054
0
        out->append(snip.data(), snip.size());
1055
0
    } else if (c == '\\') {
1056
0
      out->push_back('\\');
1057
0
    } else {
1058
0
      if (options_.log_errors())
1059
0
        LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
1060
0
      return false;
1061
0
    }
1062
0
  }
1063
0
  return true;
1064
0
}
1065
1066
/***** Parsers for various types *****/
1067
1068
namespace re2_internal {
1069
1070
template <>
1071
0
bool Parse(const char* str, size_t n, void* dest) {
1072
  // We fail if somebody asked us to store into a non-NULL void* pointer
1073
0
  return (dest == NULL);
1074
0
}
1075
1076
template <>
1077
0
bool Parse(const char* str, size_t n, std::string* dest) {
1078
0
  if (dest == NULL) return true;
1079
0
  dest->assign(str, n);
1080
0
  return true;
1081
0
}
1082
1083
template <>
1084
0
bool Parse(const char* str, size_t n, StringPiece* dest) {
1085
0
  if (dest == NULL) return true;
1086
0
  *dest = StringPiece(str, n);
1087
0
  return true;
1088
0
}
1089
1090
template <>
1091
0
bool Parse(const char* str, size_t n, char* dest) {
1092
0
  if (n != 1) return false;
1093
0
  if (dest == NULL) return true;
1094
0
  *dest = str[0];
1095
0
  return true;
1096
0
}
1097
1098
template <>
1099
0
bool Parse(const char* str, size_t n, signed char* dest) {
1100
0
  if (n != 1) return false;
1101
0
  if (dest == NULL) return true;
1102
0
  *dest = str[0];
1103
0
  return true;
1104
0
}
1105
1106
template <>
1107
0
bool Parse(const char* str, size_t n, unsigned char* dest) {
1108
0
  if (n != 1) return false;
1109
0
  if (dest == NULL) return true;
1110
0
  *dest = str[0];
1111
0
  return true;
1112
0
}
1113
1114
// Largest number spec that we are willing to parse
1115
static const int kMaxNumberLength = 32;
1116
1117
// REQUIRES "buf" must have length at least nbuf.
1118
// Copies "str" into "buf" and null-terminates.
1119
// Overwrites *np with the new length.
1120
static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
1121
0
                                   size_t* np, bool accept_spaces) {
1122
0
  size_t n = *np;
1123
0
  if (n == 0) return "";
1124
0
  if (n > 0 && isspace(*str)) {
1125
    // We are less forgiving than the strtoxxx() routines and do not
1126
    // allow leading spaces. We do allow leading spaces for floats.
1127
0
    if (!accept_spaces) {
1128
0
      return "";
1129
0
    }
1130
0
    while (n > 0 && isspace(*str)) {
1131
0
      n--;
1132
0
      str++;
1133
0
    }
1134
0
  }
1135
1136
  // Although buf has a fixed maximum size, we can still handle
1137
  // arbitrarily large integers correctly by omitting leading zeros.
1138
  // (Numbers that are still too long will be out of range.)
1139
  // Before deciding whether str is too long,
1140
  // remove leading zeros with s/000+/00/.
1141
  // Leaving the leading two zeros in place means that
1142
  // we don't change 0000x123 (invalid) into 0x123 (valid).
1143
  // Skip over leading - before replacing.
1144
0
  bool neg = false;
1145
0
  if (n >= 1 && str[0] == '-') {
1146
0
    neg = true;
1147
0
    n--;
1148
0
    str++;
1149
0
  }
1150
1151
0
  if (n >= 3 && str[0] == '0' && str[1] == '0') {
1152
0
    while (n >= 3 && str[2] == '0') {
1153
0
      n--;
1154
0
      str++;
1155
0
    }
1156
0
  }
1157
1158
0
  if (neg) {  // make room in buf for -
1159
0
    n++;
1160
0
    str--;
1161
0
  }
1162
1163
0
  if (n > nbuf-1) return "";
1164
1165
0
  memmove(buf, str, n);
1166
0
  if (neg) {
1167
0
    buf[0] = '-';
1168
0
  }
1169
0
  buf[n] = '\0';
1170
0
  *np = n;
1171
0
  return buf;
1172
0
}
1173
1174
template <>
1175
0
bool Parse(const char* str, size_t n, float* dest) {
1176
0
  if (n == 0) return false;
1177
0
  static const int kMaxLength = 200;
1178
0
  char buf[kMaxLength+1];
1179
0
  str = TerminateNumber(buf, sizeof buf, str, &n, true);
1180
0
  char* end;
1181
0
  errno = 0;
1182
0
  float r = strtof(str, &end);
1183
0
  if (end != str + n) return false;   // Leftover junk
1184
0
  if (errno) return false;
1185
0
  if (dest == NULL) return true;
1186
0
  *dest = r;
1187
0
  return true;
1188
0
}
1189
1190
template <>
1191
0
bool Parse(const char* str, size_t n, double* dest) {
1192
0
  if (n == 0) return false;
1193
0
  static const int kMaxLength = 200;
1194
0
  char buf[kMaxLength+1];
1195
0
  str = TerminateNumber(buf, sizeof buf, str, &n, true);
1196
0
  char* end;
1197
0
  errno = 0;
1198
0
  double r = strtod(str, &end);
1199
0
  if (end != str + n) return false;   // Leftover junk
1200
0
  if (errno) return false;
1201
0
  if (dest == NULL) return true;
1202
0
  *dest = r;
1203
0
  return true;
1204
0
}
1205
1206
template <>
1207
0
bool Parse(const char* str, size_t n, long* dest, int radix) {
1208
0
  if (n == 0) return false;
1209
0
  char buf[kMaxNumberLength+1];
1210
0
  str = TerminateNumber(buf, sizeof buf, str, &n, false);
1211
0
  char* end;
1212
0
  errno = 0;
1213
0
  long r = strtol(str, &end, radix);
1214
0
  if (end != str + n) return false;   // Leftover junk
1215
0
  if (errno) return false;
1216
0
  if (dest == NULL) return true;
1217
0
  *dest = r;
1218
0
  return true;
1219
0
}
1220
1221
template <>
1222
0
bool Parse(const char* str, size_t n, unsigned long* dest, int radix) {
1223
0
  if (n == 0) return false;
1224
0
  char buf[kMaxNumberLength+1];
1225
0
  str = TerminateNumber(buf, sizeof buf, str, &n, false);
1226
0
  if (str[0] == '-') {
1227
    // strtoul() will silently accept negative numbers and parse
1228
    // them.  This module is more strict and treats them as errors.
1229
0
    return false;
1230
0
  }
1231
1232
0
  char* end;
1233
0
  errno = 0;
1234
0
  unsigned long r = strtoul(str, &end, radix);
1235
0
  if (end != str + n) return false;   // Leftover junk
1236
0
  if (errno) return false;
1237
0
  if (dest == NULL) return true;
1238
0
  *dest = r;
1239
0
  return true;
1240
0
}
1241
1242
template <>
1243
0
bool Parse(const char* str, size_t n, short* dest, int radix) {
1244
0
  long r;
1245
0
  if (!Parse(str, n, &r, radix)) return false;  // Could not parse
1246
0
  if ((short)r != r) return false;              // Out of range
1247
0
  if (dest == NULL) return true;
1248
0
  *dest = (short)r;
1249
0
  return true;
1250
0
}
1251
1252
template <>
1253
0
bool Parse(const char* str, size_t n, unsigned short* dest, int radix) {
1254
0
  unsigned long r;
1255
0
  if (!Parse(str, n, &r, radix)) return false;  // Could not parse
1256
0
  if ((unsigned short)r != r) return false;     // Out of range
1257
0
  if (dest == NULL) return true;
1258
0
  *dest = (unsigned short)r;
1259
0
  return true;
1260
0
}
1261
1262
template <>
1263
0
bool Parse(const char* str, size_t n, int* dest, int radix) {
1264
0
  long r;
1265
0
  if (!Parse(str, n, &r, radix)) return false;  // Could not parse
1266
0
  if ((int)r != r) return false;                // Out of range
1267
0
  if (dest == NULL) return true;
1268
0
  *dest = (int)r;
1269
0
  return true;
1270
0
}
1271
1272
template <>
1273
0
bool Parse(const char* str, size_t n, unsigned int* dest, int radix) {
1274
0
  unsigned long r;
1275
0
  if (!Parse(str, n, &r, radix)) return false;  // Could not parse
1276
0
  if ((unsigned int)r != r) return false;       // Out of range
1277
0
  if (dest == NULL) return true;
1278
0
  *dest = (unsigned int)r;
1279
0
  return true;
1280
0
}
1281
1282
template <>
1283
0
bool Parse(const char* str, size_t n, long long* dest, int radix) {
1284
0
  if (n == 0) return false;
1285
0
  char buf[kMaxNumberLength+1];
1286
0
  str = TerminateNumber(buf, sizeof buf, str, &n, false);
1287
0
  char* end;
1288
0
  errno = 0;
1289
0
  long long r = strtoll(str, &end, radix);
1290
0
  if (end != str + n) return false;   // Leftover junk
1291
0
  if (errno) return false;
1292
0
  if (dest == NULL) return true;
1293
0
  *dest = r;
1294
0
  return true;
1295
0
}
1296
1297
template <>
1298
0
bool Parse(const char* str, size_t n, unsigned long long* dest, int radix) {
1299
0
  if (n == 0) return false;
1300
0
  char buf[kMaxNumberLength+1];
1301
0
  str = TerminateNumber(buf, sizeof buf, str, &n, false);
1302
0
  if (str[0] == '-') {
1303
    // strtoull() will silently accept negative numbers and parse
1304
    // them.  This module is more strict and treats them as errors.
1305
0
    return false;
1306
0
  }
1307
0
  char* end;
1308
0
  errno = 0;
1309
0
  unsigned long long r = strtoull(str, &end, radix);
1310
0
  if (end != str + n) return false;   // Leftover junk
1311
0
  if (errno) return false;
1312
0
  if (dest == NULL) return true;
1313
0
  *dest = r;
1314
0
  return true;
1315
0
}
1316
1317
}  // namespace re2_internal
1318
1319
namespace hooks {
1320
1321
#ifdef RE2_HAVE_THREAD_LOCAL
1322
thread_local const RE2* context = NULL;
1323
#endif
1324
1325
template <typename T>
1326
union Hook {
1327
0
  void Store(T* cb) { cb_.store(cb, std::memory_order_release); }
Unexecuted instantiation: re2::hooks::Hook<void (re2::hooks::DFAStateCacheReset const&)>::Store(void (*)(re2::hooks::DFAStateCacheReset const&))
Unexecuted instantiation: re2::hooks::Hook<void (re2::hooks::DFASearchFailure const&)>::Store(void (*)(re2::hooks::DFASearchFailure const&))
1328
181
  T* Load() const { return cb_.load(std::memory_order_acquire); }
Unexecuted instantiation: re2::hooks::Hook<void (re2::hooks::DFAStateCacheReset const&)>::Load() const
re2::hooks::Hook<void (re2::hooks::DFASearchFailure const&)>::Load() const
Line
Count
Source
1328
181
  T* Load() const { return cb_.load(std::memory_order_acquire); }
1329
1330
#if !defined(__clang__) && defined(_MSC_VER)
1331
  // Citing https://github.com/protocolbuffers/protobuf/pull/4777 as precedent,
1332
  // this is a gross hack to make std::atomic<T*> constant-initialized on MSVC.
1333
  static_assert(ATOMIC_POINTER_LOCK_FREE == 2,
1334
                "std::atomic<T*> must be always lock-free");
1335
  T* cb_for_constinit_;
1336
#endif
1337
1338
  std::atomic<T*> cb_;
1339
};
1340
1341
template <typename T>
1342
181
static void DoNothing(const T&) {}
Unexecuted instantiation: re2.cc:void re2::hooks::DoNothing<re2::hooks::DFAStateCacheReset>(re2::hooks::DFAStateCacheReset const&)
re2.cc:void re2::hooks::DoNothing<re2::hooks::DFASearchFailure>(re2::hooks::DFASearchFailure const&)
Line
Count
Source
1342
181
static void DoNothing(const T&) {}
1343
1344
#define DEFINE_HOOK(type, name)                                       \
1345
  static Hook<type##Callback> name##_hook = {{&DoNothing<type>}};     \
1346
0
  void Set##type##Hook(type##Callback* cb) { name##_hook.Store(cb); } \
Unexecuted instantiation: re2::hooks::SetDFAStateCacheResetHook(void (*)(re2::hooks::DFAStateCacheReset const&))
Unexecuted instantiation: re2::hooks::SetDFASearchFailureHook(void (*)(re2::hooks::DFASearchFailure const&))
1347
181
  type##Callback* Get##type##Hook() { return name##_hook.Load(); }
Unexecuted instantiation: re2::hooks::GetDFAStateCacheResetHook()
re2::hooks::GetDFASearchFailureHook()
Line
Count
Source
1347
181
  type##Callback* Get##type##Hook() { return name##_hook.Load(); }
1348
1349
DEFINE_HOOK(DFAStateCacheReset, dfa_state_cache_reset)
1350
DEFINE_HOOK(DFASearchFailure, dfa_search_failure)
1351
1352
#undef DEFINE_HOOK
1353
1354
}  // namespace hooks
1355
1356
}  // namespace re2