Coverage Report

Created: 2022-08-24 06:40

/src/duckdb/third_party/re2/re2/re2.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2003-2009 The RE2 Authors.  All Rights Reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
// Regular expression interface RE2.
6
//
7
// Originally the PCRE C++ wrapper, but adapted to use
8
// the new automata-based regular expression engines.
9
10
#include "re2/re2.h"
11
12
#include <assert.h>
13
#include <ctype.h>
14
#include <errno.h>
15
#include <stdint.h>
16
#include <stdlib.h>
17
#include <string.h>
18
#include <algorithm>
19
#include <iterator>
20
#include <mutex>
21
#include <string>
22
#include <utility>
23
#include <vector>
24
25
#include "util/util.h"
26
#include "util/logging.h"
27
#include "util/sparse_array.h"
28
#include "util/strutil.h"
29
#include "util/utf.h"
30
#include "re2/prog.h"
31
#include "re2/regexp.h"
32
33
namespace duckdb_re2 {
34
35
// Maximum number of args we can set
36
static const int kMaxArgs = 16;
37
static const int kVecSize = 1+kMaxArgs;
38
39
const int RE2::Options::kDefaultMaxMem;  // initialized in re2.h
40
41
RE2::Options::Options(RE2::CannedOptions opt)
42
  : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
43
    posix_syntax_(opt == RE2::POSIX),
44
    longest_match_(opt == RE2::POSIX),
45
    log_errors_(opt != RE2::Quiet),
46
    max_mem_(kDefaultMaxMem),
47
    literal_(false),
48
    never_nl_(false),
49
    dot_nl_(false),
50
    never_capture_(false),
51
    case_sensitive_(true),
52
    perl_classes_(false),
53
    word_boundary_(false),
54
0
    one_line_(false) {
55
0
}
56
57
// static empty objects for use as const references.
58
// To avoid global constructors, allocated in RE2::Init().
59
static const std::string* empty_string;
60
static const std::map<std::string, int>* empty_named_groups;
61
static const std::map<int, std::string>* empty_group_names;
62
63
// Converts from Regexp error code to RE2 error code.
64
// Maybe some day they will diverge.  In any event, this
65
// hides the existence of Regexp from RE2 users.
66
0
static RE2::ErrorCode RegexpErrorToRE2(duckdb_re2::RegexpStatusCode code) {
67
0
  switch (code) {
68
0
    case duckdb_re2::kRegexpSuccess:
69
0
      return RE2::NoError;
70
0
    case duckdb_re2::kRegexpInternalError:
71
0
      return RE2::ErrorInternal;
72
0
    case duckdb_re2::kRegexpBadEscape:
73
0
      return RE2::ErrorBadEscape;
74
0
    case duckdb_re2::kRegexpBadCharClass:
75
0
      return RE2::ErrorBadCharClass;
76
0
    case duckdb_re2::kRegexpBadCharRange:
77
0
      return RE2::ErrorBadCharRange;
78
0
    case duckdb_re2::kRegexpMissingBracket:
79
0
      return RE2::ErrorMissingBracket;
80
0
    case duckdb_re2::kRegexpMissingParen:
81
0
      return RE2::ErrorMissingParen;
82
0
    case duckdb_re2::kRegexpTrailingBackslash:
83
0
      return RE2::ErrorTrailingBackslash;
84
0
    case duckdb_re2::kRegexpRepeatArgument:
85
0
      return RE2::ErrorRepeatArgument;
86
0
    case duckdb_re2::kRegexpRepeatSize:
87
0
      return RE2::ErrorRepeatSize;
88
0
    case duckdb_re2::kRegexpRepeatOp:
89
0
      return RE2::ErrorRepeatOp;
90
0
    case duckdb_re2::kRegexpBadPerlOp:
91
0
      return RE2::ErrorBadPerlOp;
92
0
    case duckdb_re2::kRegexpBadUTF8:
93
0
      return RE2::ErrorBadUTF8;
94
0
    case duckdb_re2::kRegexpBadNamedCapture:
95
0
      return RE2::ErrorBadNamedCapture;
96
0
  }
97
0
  return RE2::ErrorInternal;
98
0
}
99
100
0
static std::string trunc(const StringPiece& pattern) {
101
0
  if (pattern.size() < 100)
102
0
    return std::string(pattern);
103
0
  return std::string(pattern.substr(0, 100)) + "...";
104
0
}
105
106
107
0
RE2::RE2(const char* pattern) {
108
0
  Init(pattern, DefaultOptions);
109
0
}
110
111
0
RE2::RE2(const std::string& pattern) {
112
0
  Init(pattern, DefaultOptions);
113
0
}
114
115
0
RE2::RE2(const StringPiece& pattern) {
116
0
  Init(pattern, DefaultOptions);
117
0
}
118
119
0
RE2::RE2(const StringPiece& pattern, const Options& options) {
120
0
  Init(pattern, options);
121
0
}
122
123
0
int RE2::Options::ParseFlags() const {
124
0
  int flags = Regexp::ClassNL;
125
0
  switch (encoding()) {
126
0
    default:
127
0
      if (log_errors())
128
0
        LOG(ERROR) << "Unknown encoding " << encoding();
129
0
      break;
130
0
    case RE2::Options::EncodingUTF8:
131
0
      break;
132
0
    case RE2::Options::EncodingLatin1:
133
0
      flags |= Regexp::Latin1;
134
0
      break;
135
0
  }
136
137
0
  if (!posix_syntax())
138
0
    flags |= Regexp::LikePerl;
139
140
0
  if (literal())
141
0
    flags |= Regexp::Literal;
142
143
0
  if (never_nl())
144
0
    flags |= Regexp::NeverNL;
145
146
0
  if (dot_nl())
147
0
    flags |= Regexp::DotNL;
148
149
0
  if (never_capture())
150
0
    flags |= Regexp::NeverCapture;
151
152
0
  if (!case_sensitive())
153
0
    flags |= Regexp::FoldCase;
154
155
0
  if (perl_classes())
156
0
    flags |= Regexp::PerlClasses;
157
158
0
  if (word_boundary())
159
0
    flags |= Regexp::PerlB;
160
161
0
  if (one_line())
162
0
    flags |= Regexp::OneLine;
163
164
0
  return flags;
165
0
}
166
167
0
void RE2::Init(const StringPiece& pattern, const Options& options) {
168
0
  static std::once_flag empty_once;
169
0
  std::call_once(empty_once, []() {
170
0
    empty_string = new std::string;
171
0
    empty_named_groups = new std::map<std::string, int>;
172
0
    empty_group_names = new std::map<int, std::string>;
173
0
  });
174
175
0
  pattern_ = std::string(pattern);
176
0
  options_.Copy(options);
177
0
  entire_regexp_ = NULL;
178
0
  suffix_regexp_ = NULL;
179
0
  prog_ = NULL;
180
0
  num_captures_ = -1;
181
0
  rprog_ = NULL;
182
0
  error_ = empty_string;
183
0
  error_code_ = NoError;
184
0
  named_groups_ = NULL;
185
0
  group_names_ = NULL;
186
187
0
  RegexpStatus status;
188
0
  entire_regexp_ = Regexp::Parse(
189
0
    pattern_,
190
0
    static_cast<Regexp::ParseFlags>(options_.ParseFlags()),
191
0
    &status);
192
0
  if (entire_regexp_ == NULL) {
193
0
    if (options_.log_errors()) {
194
0
      LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': "
195
0
                 << status.Text();
196
0
    }
197
0
    error_ = new std::string(status.Text());
198
0
    error_code_ = RegexpErrorToRE2(status.code());
199
0
    error_arg_ = std::string(status.error_arg());
200
0
    return;
201
0
  }
202
203
0
  duckdb_re2::Regexp* suffix;
204
0
  if (entire_regexp_->RequiredPrefix(&prefix_, &prefix_foldcase_, &suffix))
205
0
    suffix_regexp_ = suffix;
206
0
  else
207
0
    suffix_regexp_ = entire_regexp_->Incref();
208
209
  // Two thirds of the memory goes to the forward Prog,
210
  // one third to the reverse prog, because the forward
211
  // Prog has two DFAs but the reverse prog has one.
212
0
  prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3);
213
0
  if (prog_ == NULL) {
214
0
    if (options_.log_errors())
215
0
      LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'";
216
0
    error_ = new std::string("pattern too large - compile failed");
217
0
    error_code_ = RE2::ErrorPatternTooLarge;
218
0
    return;
219
0
  }
220
221
  // We used to compute this lazily, but it's used during the
222
  // typical control flow for a match call, so we now compute
223
  // it eagerly, which avoids the overhead of std::once_flag.
224
0
  num_captures_ = suffix_regexp_->NumCaptures();
225
226
  // Could delay this until the first match call that
227
  // cares about submatch information, but the one-pass
228
  // machine's memory gets cut from the DFA memory budget,
229
  // and that is harder to do if the DFA has already
230
  // been built.
231
0
  is_one_pass_ = prog_->IsOnePass();
232
0
}
233
234
// Returns rprog_, computing it if needed.
235
0
duckdb_re2::Prog* RE2::ReverseProg() const {
236
0
  std::call_once(rprog_once_, [](const RE2* re) {
237
0
    re->rprog_ =
238
0
        re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
239
0
    if (re->rprog_ == NULL) {
240
0
      if (re->options_.log_errors())
241
0
        LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
242
0
      re->error_ =
243
0
          new std::string("pattern too large - reverse compile failed");
244
0
      re->error_code_ = RE2::ErrorPatternTooLarge;
245
0
    }
246
0
  }, this);
247
0
  return rprog_;
248
0
}
249
250
0
RE2::~RE2() {
251
0
  if (suffix_regexp_)
252
0
    suffix_regexp_->Decref();
253
0
  if (entire_regexp_)
254
0
    entire_regexp_->Decref();
255
0
  delete prog_;
256
0
  delete rprog_;
257
0
  if (error_ != empty_string)
258
0
    delete error_;
259
0
  if (named_groups_ != NULL && named_groups_ != empty_named_groups)
260
0
    delete named_groups_;
261
0
  if (group_names_ != NULL &&  group_names_ != empty_group_names)
262
0
    delete group_names_;
263
0
}
264
265
0
int RE2::ProgramSize() const {
266
0
  if (prog_ == NULL)
267
0
    return -1;
268
0
  return prog_->size();
269
0
}
270
271
0
int RE2::ReverseProgramSize() const {
272
0
  if (prog_ == NULL)
273
0
    return -1;
274
0
  Prog* prog = ReverseProg();
275
0
  if (prog == NULL)
276
0
    return -1;
277
0
  return prog->size();
278
0
}
279
280
0
static int Fanout(Prog* prog, std::map<int, int>* histogram) {
281
0
  SparseArray<int> fanout(prog->size());
282
0
  prog->Fanout(&fanout);
283
0
  histogram->clear();
284
0
  for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
285
    // TODO(junyer): Optimise this?
286
0
    int bucket = 0;
287
0
    while (1 << bucket < i->value()) {
288
0
      bucket++;
289
0
    }
290
0
    (*histogram)[bucket]++;
291
0
  }
292
0
  return histogram->rbegin()->first;
293
0
}
294
295
0
int RE2::ProgramFanout(std::map<int, int>* histogram) const {
296
0
  if (prog_ == NULL)
297
0
    return -1;
298
0
  return Fanout(prog_, histogram);
299
0
}
300
301
0
int RE2::ReverseProgramFanout(std::map<int, int>* histogram) const {
302
0
  if (prog_ == NULL)
303
0
    return -1;
304
0
  Prog* prog = ReverseProg();
305
0
  if (prog == NULL)
306
0
    return -1;
307
0
  return Fanout(prog, histogram);
308
0
}
309
310
// Returns named_groups_, computing it if needed.
311
0
const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
312
0
  std::call_once(named_groups_once_, [](const RE2* re) {
313
0
    if (re->suffix_regexp_ != NULL)
314
0
      re->named_groups_ = re->suffix_regexp_->NamedCaptures();
315
0
    if (re->named_groups_ == NULL)
316
0
      re->named_groups_ = empty_named_groups;
317
0
  }, this);
318
0
  return *named_groups_;
319
0
}
320
321
// Returns group_names_, computing it if needed.
322
0
const std::map<int, std::string>& RE2::CapturingGroupNames() const {
323
0
  std::call_once(group_names_once_, [](const RE2* re) {
324
0
    if (re->suffix_regexp_ != NULL)
325
0
      re->group_names_ = re->suffix_regexp_->CaptureNames();
326
0
    if (re->group_names_ == NULL)
327
0
      re->group_names_ = empty_group_names;
328
0
  }, this);
329
0
  return *group_names_;
330
0
}
331
332
/***** Convenience interfaces *****/
333
334
bool RE2::FullMatchN(const StringPiece& text, const RE2& re,
335
0
                     const Arg* const args[], int n) {
336
0
  return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n);
337
0
}
338
339
bool RE2::PartialMatchN(const StringPiece& text, const RE2& re,
340
0
                        const Arg* const args[], int n) {
341
0
  return re.DoMatch(text, UNANCHORED, NULL, args, n);
342
0
}
343
344
bool RE2::ConsumeN(StringPiece* input, const RE2& re,
345
0
                   const Arg* const args[], int n) {
346
0
  size_t consumed;
347
0
  if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) {
348
0
    input->remove_prefix(consumed);
349
0
    return true;
350
0
  } else {
351
0
    return false;
352
0
  }
353
0
}
354
355
bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re,
356
0
                          const Arg* const args[], int n) {
357
0
  size_t consumed;
358
0
  if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) {
359
0
    input->remove_prefix(consumed);
360
0
    return true;
361
0
  } else {
362
0
    return false;
363
0
  }
364
0
}
365
366
bool RE2::Replace(std::string* str,
367
                  const RE2& re,
368
0
                  const StringPiece& rewrite) {
369
0
  StringPiece vec[kVecSize];
370
0
  int nvec = 1 + MaxSubmatch(rewrite);
371
0
  if (nvec > arraysize(vec))
372
0
    return false;
373
0
  if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
374
0
    return false;
375
376
0
  std::string s;
377
0
  if (!re.Rewrite(&s, rewrite, vec, nvec))
378
0
    return false;
379
380
0
  assert(vec[0].begin() >= str->data());
381
0
  assert(vec[0].end() <= str->data()+str->size());
382
0
  str->replace(vec[0].data() - str->data(), vec[0].size(), s);
383
0
  return true;
384
0
}
385
386
int RE2::GlobalReplace(std::string* str,
387
                       const RE2& re,
388
0
                       const StringPiece& rewrite) {
389
0
  StringPiece vec[kVecSize];
390
0
  int nvec = 1 + MaxSubmatch(rewrite);
391
0
  if (nvec > arraysize(vec))
392
0
    return false;
393
394
0
  const char* p = str->data();
395
0
  const char* ep = p + str->size();
396
0
  const char* lastend = NULL;
397
0
  std::string out;
398
0
  int count = 0;
399
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
400
  // Iterate just once when fuzzing. Otherwise, we easily get bogged down
401
  // and coverage is unlikely to improve despite significant expense.
402
0
  while (p == str->data()) {
403
#else
404
  while (p <= ep) {
405
#endif
406
0
    if (!re.Match(*str, static_cast<size_t>(p - str->data()),
407
0
                  str->size(), UNANCHORED, vec, nvec))
408
0
      break;
409
0
    if (p < vec[0].begin())
410
0
      out.append(p, vec[0].begin() - p);
411
0
    if (vec[0].begin() == lastend && vec[0].size() == 0) {
412
      // Disallow empty match at end of last match: skip ahead.
413
      //
414
      // fullrune() takes int, not ptrdiff_t. However, it just looks
415
      // at the leading byte and treats any length >= 4 the same.
416
0
      if (re.options().encoding() == RE2::Options::EncodingUTF8 &&
417
0
          fullrune(p, static_cast<int>(std::min(ptrdiff_t{4}, ep - p)))) {
418
        // re is in UTF-8 mode and there is enough left of str
419
        // to allow us to advance by up to UTFmax bytes.
420
0
        Rune r;
421
0
        int n = chartorune(&r, p);
422
        // Some copies of chartorune have a bug that accepts
423
        // encodings of values in (10FFFF, 1FFFFF] as valid.
424
0
        if (r > Runemax) {
425
0
          n = 1;
426
0
          r = Runeerror;
427
0
        }
428
0
        if (!(n == 1 && r == Runeerror)) {  // no decoding error
429
0
          out.append(p, n);
430
0
          p += n;
431
0
          continue;
432
0
        }
433
0
      }
434
      // Most likely, re is in Latin-1 mode. If it is in UTF-8 mode,
435
      // we fell through from above and the GIGO principle applies.
436
0
      if (p < ep)
437
0
        out.append(p, 1);
438
0
      p++;
439
0
      continue;
440
0
    }
441
0
    re.Rewrite(&out, rewrite, vec, nvec);
442
0
    p = vec[0].end();
443
0
    lastend = p;
444
0
    count++;
445
0
  }
446
447
0
  if (count == 0)
448
0
    return 0;
449
450
0
  if (p < ep)
451
0
    out.append(p, ep - p);
452
0
  using std::swap;
453
0
  swap(out, *str);
454
0
  return count;
455
0
}
456
457
bool RE2::Extract(const StringPiece& text,
458
                  const RE2& re,
459
                  const StringPiece& rewrite,
460
0
                  std::string* out) {
461
0
  StringPiece vec[kVecSize];
462
0
  int nvec = 1 + MaxSubmatch(rewrite);
463
0
  if (nvec > arraysize(vec))
464
0
    return false;
465
466
0
  if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
467
0
    return false;
468
469
0
  out->clear();
470
0
  return re.Rewrite(out, rewrite, vec, nvec);
471
0
}
472
473
0
std::string RE2::QuoteMeta(const StringPiece& unquoted) {
474
0
  std::string result;
475
0
  result.reserve(unquoted.size() << 1);
476
477
  // Escape any ascii character not in [A-Za-z_0-9].
478
  //
479
  // Note that it's legal to escape a character even if it has no
480
  // special meaning in a regular expression -- so this function does
481
  // that.  (This also makes it identical to the perl function of the
482
  // same name except for the null-character special case;
483
  // see `perldoc -f quotemeta`.)
484
0
  for (size_t ii = 0; ii < unquoted.size(); ++ii) {
485
    // Note that using 'isalnum' here raises the benchmark time from
486
    // 32ns to 58ns:
487
0
    if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
488
0
        (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
489
0
        (unquoted[ii] < '0' || unquoted[ii] > '9') &&
490
0
        unquoted[ii] != '_' &&
491
        // If this is the part of a UTF8 or Latin1 character, we need
492
        // to copy this byte without escaping.  Experimentally this is
493
        // what works correctly with the regexp library.
494
0
        !(unquoted[ii] & 128)) {
495
0
      if (unquoted[ii] == '\0') {  // Special handling for null chars.
496
        // Note that this special handling is not strictly required for RE2,
497
        // but this quoting is required for other regexp libraries such as
498
        // PCRE.
499
        // Can't use "\\0" since the next character might be a digit.
500
0
        result += "\\x00";
501
0
        continue;
502
0
      }
503
0
      result += '\\';
504
0
    }
505
0
    result += unquoted[ii];
506
0
  }
507
508
0
  return result;
509
0
}
510
511
bool RE2::PossibleMatchRange(std::string* min, std::string* max,
512
0
                             int maxlen) const {
513
0
  if (prog_ == NULL)
514
0
    return false;
515
516
0
  int n = static_cast<int>(prefix_.size());
517
0
  if (n > maxlen)
518
0
    n = maxlen;
519
520
  // Determine initial min max from prefix_ literal.
521
0
  *min = prefix_.substr(0, n);
522
0
  *max = prefix_.substr(0, n);
523
0
  if (prefix_foldcase_) {
524
    // prefix is ASCII lowercase; change *min to uppercase.
525
0
    for (int i = 0; i < n; i++) {
526
0
      char& c = (*min)[i];
527
0
      if ('a' <= c && c <= 'z')
528
0
        c += 'A' - 'a';
529
0
    }
530
0
  }
531
532
  // Add to prefix min max using PossibleMatchRange on regexp.
533
0
  std::string dmin, dmax;
534
0
  maxlen -= n;
535
0
  if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) {
536
0
    min->append(dmin);
537
0
    max->append(dmax);
538
0
  } else if (!max->empty()) {
539
    // prog_->PossibleMatchRange has failed us,
540
    // but we still have useful information from prefix_.
541
    // Round up *max to allow any possible suffix.
542
0
    PrefixSuccessor(max);
543
0
  } else {
544
    // Nothing useful.
545
0
    *min = "";
546
0
    *max = "";
547
0
    return false;
548
0
  }
549
550
0
  return true;
551
0
}
552
553
// Avoid possible locale nonsense in standard strcasecmp.
554
// The string a is known to be all lowercase.
555
0
static int ascii_strcasecmp(const char* a, const char* b, size_t len) {
556
0
  const char* ae = a + len;
557
558
0
  for (; a < ae; a++, b++) {
559
0
    uint8_t x = *a;
560
0
    uint8_t y = *b;
561
0
    if ('A' <= y && y <= 'Z')
562
0
      y += 'a' - 'A';
563
0
    if (x != y)
564
0
      return x - y;
565
0
  }
566
0
  return 0;
567
0
}
568
569
570
/***** Actual matching and rewriting code *****/
571
572
bool RE2::Match(const StringPiece& text,
573
                size_t startpos,
574
                size_t endpos,
575
                Anchor re_anchor,
576
                StringPiece* submatch,
577
0
                int nsubmatch) const {
578
0
  if (!ok()) {
579
0
    if (options_.log_errors())
580
0
      LOG(ERROR) << "Invalid RE2: " << *error_;
581
0
    return false;
582
0
  }
583
584
0
  if (startpos > endpos || endpos > text.size()) {
585
0
    if (options_.log_errors())
586
0
      LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
587
0
                 << "startpos: " << startpos << ", "
588
0
                 << "endpos: " << endpos << ", "
589
0
                 << "text size: " << text.size() << "]";
590
0
    return false;
591
0
  }
592
593
0
  StringPiece subtext = text;
594
0
  subtext.remove_prefix(startpos);
595
0
  subtext.remove_suffix(text.size() - endpos);
596
597
  // Use DFAs to find exact location of match, filter out non-matches.
598
599
  // Don't ask for the location if we won't use it.
600
  // SearchDFA can do extra optimizations in that case.
601
0
  StringPiece match;
602
0
  StringPiece* matchp = &match;
603
0
  if (nsubmatch == 0)
604
0
    matchp = NULL;
605
606
0
  int ncap = 1 + NumberOfCapturingGroups();
607
0
  if (ncap > nsubmatch)
608
0
    ncap = nsubmatch;
609
610
  // If the regexp is anchored explicitly, must not be in middle of text.
611
0
  if (prog_->anchor_start() && startpos != 0)
612
0
    return false;
613
614
  // If the regexp is anchored explicitly, update re_anchor
615
  // so that we can potentially fall into a faster case below.
616
0
  if (prog_->anchor_start() && prog_->anchor_end())
617
0
    re_anchor = ANCHOR_BOTH;
618
0
  else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH)
619
0
    re_anchor = ANCHOR_START;
620
621
  // Check for the required prefix, if any.
622
0
  size_t prefixlen = 0;
623
0
  if (!prefix_.empty()) {
624
0
    if (startpos != 0)
625
0
      return false;
626
0
    prefixlen = prefix_.size();
627
0
    if (prefixlen > subtext.size())
628
0
      return false;
629
0
    if (prefix_foldcase_) {
630
0
      if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0)
631
0
        return false;
632
0
    } else {
633
0
      if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0)
634
0
        return false;
635
0
    }
636
0
    subtext.remove_prefix(prefixlen);
637
    // If there is a required prefix, the anchor must be at least ANCHOR_START.
638
0
    if (re_anchor != ANCHOR_BOTH)
639
0
      re_anchor = ANCHOR_START;
640
0
  }
641
642
0
  Prog::Anchor anchor = Prog::kUnanchored;
643
0
  Prog::MatchKind kind = Prog::kFirstMatch;
644
0
  if (options_.longest_match())
645
0
    kind = Prog::kLongestMatch;
646
0
  bool skipped_test = false;
647
648
0
  bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture);
649
650
  // BitState allocates a bitmap of size prog_->list_count() * text.size().
651
  // It also allocates a stack of 3-word structures which could potentially
652
  // grow as large as prog_->list_count() * text.size(), but in practice is
653
  // much smaller.
654
0
  const int kMaxBitStateBitmapSize = 256*1024;  // bitmap size <= max (bits)
655
0
  bool can_bit_state = prog_->CanBitState();
656
0
  size_t bit_state_text_max = kMaxBitStateBitmapSize / prog_->list_count();
657
658
0
  bool dfa_failed = false;
659
0
  switch (re_anchor) {
660
0
    default:
661
0
    case UNANCHORED: {
662
0
      if (!prog_->SearchDFA(subtext, text, anchor, kind,
663
0
                            matchp, &dfa_failed, NULL)) {
664
0
        if (dfa_failed) {
665
0
          if (options_.log_errors())
666
0
            LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
667
0
                       << "bytemap range " << prog_->bytemap_range() << ", "
668
0
                       << "list count " << prog_->list_count();
669
          // Fall back to NFA below.
670
0
          skipped_test = true;
671
0
          break;
672
0
        }
673
0
        return false;
674
0
      }
675
0
      if (matchp == NULL)  // Matched.  Don't care where
676
0
        return true;
677
      // SearchDFA set match[0].end() but didn't know where the
678
      // match started.  Run the regexp backward from match[0].end()
679
      // to find the longest possible match -- that's where it started.
680
0
      Prog* prog = ReverseProg();
681
0
      if (prog == NULL)
682
0
        return false;
683
0
      if (!prog->SearchDFA(match, text, Prog::kAnchored,
684
0
                           Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
685
0
        if (dfa_failed) {
686
0
          if (options_.log_errors())
687
0
            LOG(ERROR) << "DFA out of memory: size " << prog->size() << ", "
688
0
                       << "bytemap range " << prog->bytemap_range() << ", "
689
0
                       << "list count " << prog->list_count();
690
          // Fall back to NFA below.
691
0
          skipped_test = true;
692
0
          break;
693
0
        }
694
0
        if (options_.log_errors())
695
0
          LOG(ERROR) << "SearchDFA inconsistency";
696
0
        return false;
697
0
      }
698
0
      break;
699
0
    }
700
701
0
    case ANCHOR_BOTH:
702
0
    case ANCHOR_START:
703
0
      if (re_anchor == ANCHOR_BOTH)
704
0
        kind = Prog::kFullMatch;
705
0
      anchor = Prog::kAnchored;
706
707
      // If only a small amount of text and need submatch
708
      // information anyway and we're going to use OnePass or BitState
709
      // to get it, we might as well not even bother with the DFA:
710
      // OnePass or BitState will be fast enough.
711
      // On tiny texts, OnePass outruns even the DFA, and
712
      // it doesn't have the shared state and occasional mutex that
713
      // the DFA does.
714
0
      if (can_one_pass && text.size() <= 4096 &&
715
0
          (ncap > 1 || text.size() <= 8)) {
716
0
        skipped_test = true;
717
0
        break;
718
0
      }
719
0
      if (can_bit_state && text.size() <= bit_state_text_max && ncap > 1) {
720
0
        skipped_test = true;
721
0
        break;
722
0
      }
723
0
      if (!prog_->SearchDFA(subtext, text, anchor, kind,
724
0
                            &match, &dfa_failed, NULL)) {
725
0
        if (dfa_failed) {
726
0
          if (options_.log_errors())
727
0
            LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
728
0
                       << "bytemap range " << prog_->bytemap_range() << ", "
729
0
                       << "list count " << prog_->list_count();
730
          // Fall back to NFA below.
731
0
          skipped_test = true;
732
0
          break;
733
0
        }
734
0
        return false;
735
0
      }
736
0
      break;
737
0
  }
738
739
0
  if (!skipped_test && ncap <= 1) {
740
    // We know exactly where it matches.  That's enough.
741
0
    if (ncap == 1)
742
0
      submatch[0] = match;
743
0
  } else {
744
0
    StringPiece subtext1;
745
0
    if (skipped_test) {
746
      // DFA ran out of memory or was skipped:
747
      // need to search in entire original text.
748
0
      subtext1 = subtext;
749
0
    } else {
750
      // DFA found the exact match location:
751
      // let NFA run an anchored, full match search
752
      // to find submatch locations.
753
0
      subtext1 = match;
754
0
      anchor = Prog::kAnchored;
755
0
      kind = Prog::kFullMatch;
756
0
    }
757
758
0
    if (can_one_pass && anchor != Prog::kUnanchored) {
759
0
      if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) {
760
0
        if (!skipped_test && options_.log_errors())
761
0
          LOG(ERROR) << "SearchOnePass inconsistency";
762
0
        return false;
763
0
      }
764
0
    } else if (can_bit_state && subtext1.size() <= bit_state_text_max) {
765
0
      if (!prog_->SearchBitState(subtext1, text, anchor,
766
0
                                 kind, submatch, ncap)) {
767
0
        if (!skipped_test && options_.log_errors())
768
0
          LOG(ERROR) << "SearchBitState inconsistency";
769
0
        return false;
770
0
      }
771
0
    } else {
772
0
      if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) {
773
0
        if (!skipped_test && options_.log_errors())
774
0
          LOG(ERROR) << "SearchNFA inconsistency";
775
0
        return false;
776
0
      }
777
0
    }
778
0
  }
779
780
  // Adjust overall match for required prefix that we stripped off.
781
0
  if (prefixlen > 0 && nsubmatch > 0)
782
0
    submatch[0] = StringPiece(submatch[0].data() - prefixlen,
783
0
                              submatch[0].size() + prefixlen);
784
785
  // Zero submatches that don't exist in the regexp.
786
0
  for (int i = ncap; i < nsubmatch; i++)
787
0
    submatch[i] = StringPiece();
788
0
  return true;
789
0
}
790
791
// Internal matcher - like Match() but takes Args not StringPieces.
792
bool RE2::DoMatch(const StringPiece& text,
793
                  Anchor re_anchor,
794
                  size_t* consumed,
795
                  const Arg* const* args,
796
0
                  int n) const {
797
0
  if (!ok()) {
798
0
    if (options_.log_errors())
799
0
      LOG(ERROR) << "Invalid RE2: " << *error_;
800
0
    return false;
801
0
  }
802
803
0
  if (NumberOfCapturingGroups() < n) {
804
    // RE has fewer capturing groups than number of Arg pointers passed in.
805
0
    return false;
806
0
  }
807
808
  // Count number of capture groups needed.
809
0
  int nvec;
810
0
  if (n == 0 && consumed == NULL)
811
0
    nvec = 0;
812
0
  else
813
0
    nvec = n+1;
814
815
0
  StringPiece* vec;
816
0
  StringPiece stkvec[kVecSize];
817
0
  StringPiece* heapvec = NULL;
818
819
0
  if (nvec <= arraysize(stkvec)) {
820
0
    vec = stkvec;
821
0
  } else {
822
0
    vec = new StringPiece[nvec];
823
0
    heapvec = vec;
824
0
  }
825
826
0
  if (!Match(text, 0, text.size(), re_anchor, vec, nvec)) {
827
0
    delete[] heapvec;
828
0
    return false;
829
0
  }
830
831
0
  if (consumed != NULL)
832
0
    *consumed = static_cast<size_t>(vec[0].end() - text.begin());
833
834
0
  if (n == 0 || args == NULL) {
835
    // We are not interested in results
836
0
    delete[] heapvec;
837
0
    return true;
838
0
  }
839
840
  // If we got here, we must have matched the whole pattern.
841
0
  for (int i = 0; i < n; i++) {
842
0
    const StringPiece& s = vec[i+1];
843
0
    if (!args[i]->Parse(s.data(), s.size())) {
844
      // TODO: Should we indicate what the error was?
845
0
      delete[] heapvec;
846
0
      return false;
847
0
    }
848
0
  }
849
850
0
  delete[] heapvec;
851
0
  return true;
852
0
}
853
854
// Checks that the rewrite string is well-formed with respect to this
855
// regular expression.
856
bool RE2::CheckRewriteString(const StringPiece& rewrite,
857
0
                             std::string* error) const {
858
0
  int max_token = -1;
859
0
  for (const char *s = rewrite.data(), *end = s + rewrite.size();
860
0
       s < end; s++) {
861
0
    int c = *s;
862
0
    if (c != '\\') {
863
0
      continue;
864
0
    }
865
0
    if (++s == end) {
866
0
      *error = "Rewrite schema error: '\\' not allowed at end.";
867
0
      return false;
868
0
    }
869
0
    c = *s;
870
0
    if (c == '\\') {
871
0
      continue;
872
0
    }
873
0
    if (!isdigit(c)) {
874
0
      *error = "Rewrite schema error: "
875
0
               "'\\' must be followed by a digit or '\\'.";
876
0
      return false;
877
0
    }
878
0
    int n = (c - '0');
879
0
    if (max_token < n) {
880
0
      max_token = n;
881
0
    }
882
0
  }
883
884
0
  if (max_token > NumberOfCapturingGroups()) {
885
0
    SStringPrintf(error, "Rewrite schema requests %d matches, "
886
0
                  "but the regexp only has %d parenthesized subexpressions.",
887
0
                  max_token, NumberOfCapturingGroups());
888
0
    return false;
889
0
  }
890
0
  return true;
891
0
}
892
893
// Returns the maximum submatch needed for the rewrite to be done by Replace().
894
// E.g. if rewrite == "foo \\2,\\1", returns 2.
895
0
int RE2::MaxSubmatch(const StringPiece& rewrite) {
896
0
  int max = 0;
897
0
  for (const char *s = rewrite.data(), *end = s + rewrite.size();
898
0
       s < end; s++) {
899
0
    if (*s == '\\') {
900
0
      s++;
901
0
      int c = (s < end) ? *s : -1;
902
0
      if (isdigit(c)) {
903
0
        int n = (c - '0');
904
0
        if (n > max)
905
0
          max = n;
906
0
      }
907
0
    }
908
0
  }
909
0
  return max;
910
0
}
911
912
// Append the "rewrite" string, with backslash subsitutions from "vec",
913
// to string "out".
914
bool RE2::Rewrite(std::string* out,
915
                  const StringPiece& rewrite,
916
                  const StringPiece* vec,
917
0
                  int veclen) const {
918
0
  for (const char *s = rewrite.data(), *end = s + rewrite.size();
919
0
       s < end; s++) {
920
0
    if (*s != '\\') {
921
0
      out->push_back(*s);
922
0
      continue;
923
0
    }
924
0
    s++;
925
0
    int c = (s < end) ? *s : -1;
926
0
    if (isdigit(c)) {
927
0
      int n = (c - '0');
928
0
      if (n >= veclen) {
929
0
        if (options_.log_errors()) {
930
0
          LOG(ERROR) << "requested group " << n
931
0
                     << " in regexp " << rewrite.data();
932
0
        }
933
0
        return false;
934
0
      }
935
0
      StringPiece snip = vec[n];
936
0
      if (snip.size() > 0)
937
0
        out->append(snip.data(), snip.size());
938
0
    } else if (c == '\\') {
939
0
      out->push_back('\\');
940
0
    } else {
941
0
      if (options_.log_errors())
942
0
        LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
943
0
      return false;
944
0
    }
945
0
  }
946
0
  return true;
947
0
}
948
949
/***** Parsers for various types *****/
950
951
0
bool RE2::Arg::parse_null(const char* str, size_t n, void* dest) {
952
  // We fail if somebody asked us to store into a non-NULL void* pointer
953
0
  return (dest == NULL);
954
0
}
955
956
0
bool RE2::Arg::parse_string(const char* str, size_t n, void* dest) {
957
0
  if (dest == NULL) return true;
958
0
  reinterpret_cast<std::string*>(dest)->assign(str, n);
959
0
  return true;
960
0
}
961
962
0
bool RE2::Arg::parse_stringpiece(const char* str, size_t n, void* dest) {
963
0
  if (dest == NULL) return true;
964
0
  *(reinterpret_cast<StringPiece*>(dest)) = StringPiece(str, n);
965
0
  return true;
966
0
}
967
968
0
bool RE2::Arg::parse_char(const char* str, size_t n, void* dest) {
969
0
  if (n != 1) return false;
970
0
  if (dest == NULL) return true;
971
0
  *(reinterpret_cast<char*>(dest)) = str[0];
972
0
  return true;
973
0
}
974
975
0
bool RE2::Arg::parse_schar(const char* str, size_t n, void* dest) {
976
0
  if (n != 1) return false;
977
0
  if (dest == NULL) return true;
978
0
  *(reinterpret_cast<signed char*>(dest)) = str[0];
979
0
  return true;
980
0
}
981
982
0
bool RE2::Arg::parse_uchar(const char* str, size_t n, void* dest) {
983
0
  if (n != 1) return false;
984
0
  if (dest == NULL) return true;
985
0
  *(reinterpret_cast<unsigned char*>(dest)) = str[0];
986
0
  return true;
987
0
}
988
989
// Largest number spec that we are willing to parse
990
static const int kMaxNumberLength = 32;
991
992
// REQUIRES "buf" must have length at least nbuf.
993
// Copies "str" into "buf" and null-terminates.
994
// Overwrites *np with the new length.
995
static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
996
0
                                   size_t* np, bool accept_spaces) {
997
0
  size_t n = *np;
998
0
  if (n == 0) return "";
999
0
  if (n > 0 && isspace(*str)) {
1000
    // We are less forgiving than the strtoxxx() routines and do not
1001
    // allow leading spaces. We do allow leading spaces for floats.
1002
0
    if (!accept_spaces) {
1003
0
      return "";
1004
0
    }
1005
0
    while (n > 0 && isspace(*str)) {
1006
0
      n--;
1007
0
      str++;
1008
0
    }
1009
0
  }
1010
1011
  // Although buf has a fixed maximum size, we can still handle
1012
  // arbitrarily large integers correctly by omitting leading zeros.
1013
  // (Numbers that are still too long will be out of range.)
1014
  // Before deciding whether str is too long,
1015
  // remove leading zeros with s/000+/00/.
1016
  // Leaving the leading two zeros in place means that
1017
  // we don't change 0000x123 (invalid) into 0x123 (valid).
1018
  // Skip over leading - before replacing.
1019
0
  bool neg = false;
1020
0
  if (n >= 1 && str[0] == '-') {
1021
0
    neg = true;
1022
0
    n--;
1023
0
    str++;
1024
0
  }
1025
1026
0
  if (n >= 3 && str[0] == '0' && str[1] == '0') {
1027
0
    while (n >= 3 && str[2] == '0') {
1028
0
      n--;
1029
0
      str++;
1030
0
    }
1031
0
  }
1032
1033
0
  if (neg) {  // make room in buf for -
1034
0
    n++;
1035
0
    str--;
1036
0
  }
1037
1038
0
  if (n > nbuf-1) return "";
1039
1040
0
  memmove(buf, str, n);
1041
0
  if (neg) {
1042
0
    buf[0] = '-';
1043
0
  }
1044
0
  buf[n] = '\0';
1045
0
  *np = n;
1046
0
  return buf;
1047
0
}
1048
1049
bool RE2::Arg::parse_long_radix(const char* str,
1050
                                size_t n,
1051
                                void* dest,
1052
0
                                int radix) {
1053
0
  if (n == 0) return false;
1054
0
  char buf[kMaxNumberLength+1];
1055
0
  str = TerminateNumber(buf, sizeof buf, str, &n, false);
1056
0
  char* end;
1057
0
  errno = 0;
1058
0
  long r = strtol(str, &end, radix);
1059
0
  if (end != str + n) return false;   // Leftover junk
1060
0
  if (errno) return false;
1061
0
  if (dest == NULL) return true;
1062
0
  *(reinterpret_cast<long*>(dest)) = r;
1063
0
  return true;
1064
0
}
1065
1066
bool RE2::Arg::parse_ulong_radix(const char* str,
1067
                                 size_t n,
1068
                                 void* dest,
1069
0
                                 int radix) {
1070
0
  if (n == 0) return false;
1071
0
  char buf[kMaxNumberLength+1];
1072
0
  str = TerminateNumber(buf, sizeof buf, str, &n, false);
1073
0
  if (str[0] == '-') {
1074
    // strtoul() will silently accept negative numbers and parse
1075
    // them.  This module is more strict and treats them as errors.
1076
0
    return false;
1077
0
  }
1078
1079
0
  char* end;
1080
0
  errno = 0;
1081
0
  unsigned long r = strtoul(str, &end, radix);
1082
0
  if (end != str + n) return false;   // Leftover junk
1083
0
  if (errno) return false;
1084
0
  if (dest == NULL) return true;
1085
0
  *(reinterpret_cast<unsigned long*>(dest)) = r;
1086
0
  return true;
1087
0
}
1088
1089
bool RE2::Arg::parse_short_radix(const char* str,
1090
                                 size_t n,
1091
                                 void* dest,
1092
0
                                 int radix) {
1093
0
  long r;
1094
0
  if (!parse_long_radix(str, n, &r, radix)) return false;  // Could not parse
1095
0
  if ((short)r != r) return false;                         // Out of range
1096
0
  if (dest == NULL) return true;
1097
0
  *(reinterpret_cast<short*>(dest)) = (short)r;
1098
0
  return true;
1099
0
}
1100
1101
bool RE2::Arg::parse_ushort_radix(const char* str,
1102
                                  size_t n,
1103
                                  void* dest,
1104
0
                                  int radix) {
1105
0
  unsigned long r;
1106
0
  if (!parse_ulong_radix(str, n, &r, radix)) return false;  // Could not parse
1107
0
  if ((unsigned short)r != r) return false;                 // Out of range
1108
0
  if (dest == NULL) return true;
1109
0
  *(reinterpret_cast<unsigned short*>(dest)) = (unsigned short)r;
1110
0
  return true;
1111
0
}
1112
1113
bool RE2::Arg::parse_int_radix(const char* str,
1114
                               size_t n,
1115
                               void* dest,
1116
0
                               int radix) {
1117
0
  long r;
1118
0
  if (!parse_long_radix(str, n, &r, radix)) return false;  // Could not parse
1119
0
  if ((int)r != r) return false;                           // Out of range
1120
0
  if (dest == NULL) return true;
1121
0
  *(reinterpret_cast<int*>(dest)) = (int)r;
1122
0
  return true;
1123
0
}
1124
1125
bool RE2::Arg::parse_uint_radix(const char* str,
1126
                                size_t n,
1127
                                void* dest,
1128
0
                                int radix) {
1129
0
  unsigned long r;
1130
0
  if (!parse_ulong_radix(str, n, &r, radix)) return false;  // Could not parse
1131
0
  if ((unsigned int)r != r) return false;                   // Out of range
1132
0
  if (dest == NULL) return true;
1133
0
  *(reinterpret_cast<unsigned int*>(dest)) = (unsigned int)r;
1134
0
  return true;
1135
0
}
1136
1137
bool RE2::Arg::parse_longlong_radix(const char* str,
1138
                                    size_t n,
1139
                                    void* dest,
1140
0
                                    int radix) {
1141
0
  if (n == 0) return false;
1142
0
  char buf[kMaxNumberLength+1];
1143
0
  str = TerminateNumber(buf, sizeof buf, str, &n, false);
1144
0
  char* end;
1145
0
  errno = 0;
1146
0
  long long r = strtoll(str, &end, radix);
1147
0
  if (end != str + n) return false;   // Leftover junk
1148
0
  if (errno) return false;
1149
0
  if (dest == NULL) return true;
1150
0
  *(reinterpret_cast<long long*>(dest)) = r;
1151
0
  return true;
1152
0
}
1153
1154
bool RE2::Arg::parse_ulonglong_radix(const char* str,
1155
                                     size_t n,
1156
                                     void* dest,
1157
0
                                     int radix) {
1158
0
  if (n == 0) return false;
1159
0
  char buf[kMaxNumberLength+1];
1160
0
  str = TerminateNumber(buf, sizeof buf, str, &n, false);
1161
0
  if (str[0] == '-') {
1162
    // strtoull() will silently accept negative numbers and parse
1163
    // them.  This module is more strict and treats them as errors.
1164
0
    return false;
1165
0
  }
1166
0
  char* end;
1167
0
  errno = 0;
1168
0
  unsigned long long r = strtoull(str, &end, radix);
1169
0
  if (end != str + n) return false;   // Leftover junk
1170
0
  if (errno) return false;
1171
0
  if (dest == NULL) return true;
1172
0
  *(reinterpret_cast<unsigned long long*>(dest)) = r;
1173
0
  return true;
1174
0
}
1175
1176
static bool parse_double_float(const char* str, size_t n, bool isfloat,
1177
0
                               void* dest) {
1178
0
  if (n == 0) return false;
1179
0
  static const int kMaxLength = 200;
1180
0
  char buf[kMaxLength+1];
1181
0
  str = TerminateNumber(buf, sizeof buf, str, &n, true);
1182
0
  char* end;
1183
0
  errno = 0;
1184
0
  double r;
1185
0
  if (isfloat) {
1186
0
    r = strtof(str, &end);
1187
0
  } else {
1188
0
    r = strtod(str, &end);
1189
0
  }
1190
0
  if (end != str + n) return false;   // Leftover junk
1191
0
  if (errno) return false;
1192
0
  if (dest == NULL) return true;
1193
0
  if (isfloat) {
1194
0
    *(reinterpret_cast<float*>(dest)) = (float)r;
1195
0
  } else {
1196
0
    *(reinterpret_cast<double*>(dest)) = r;
1197
0
  }
1198
0
  return true;
1199
0
}
1200
1201
0
bool RE2::Arg::parse_double(const char* str, size_t n, void* dest) {
1202
0
  return parse_double_float(str, n, false, dest);
1203
0
}
1204
1205
0
bool RE2::Arg::parse_float(const char* str, size_t n, void* dest) {
1206
0
  return parse_double_float(str, n, true, dest);
1207
0
}
1208
1209
#define DEFINE_INTEGER_PARSER(name)                                            \
1210
0
  bool RE2::Arg::parse_##name(const char* str, size_t n, void* dest) {         \
1211
0
    return parse_##name##_radix(str, n, dest, 10);                             \
1212
0
  }                                                                            \
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_short(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_ushort(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_int(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_uint(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_long(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_ulong(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_longlong(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_ulonglong(char const*, unsigned long, void*)
1213
0
  bool RE2::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) {   \
1214
0
    return parse_##name##_radix(str, n, dest, 16);                             \
1215
0
  }                                                                            \
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_short_hex(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_ushort_hex(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_int_hex(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_uint_hex(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_long_hex(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_ulong_hex(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_longlong_hex(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_ulonglong_hex(char const*, unsigned long, void*)
1216
0
  bool RE2::Arg::parse_##name##_octal(const char* str, size_t n, void* dest) { \
1217
0
    return parse_##name##_radix(str, n, dest, 8);                              \
1218
0
  }                                                                            \
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_short_octal(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_ushort_octal(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_int_octal(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_uint_octal(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_long_octal(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_ulong_octal(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_longlong_octal(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_ulonglong_octal(char const*, unsigned long, void*)
1219
  bool RE2::Arg::parse_##name##_cradix(const char* str, size_t n,              \
1220
0
                                       void* dest) {                           \
1221
0
    return parse_##name##_radix(str, n, dest, 0);                              \
1222
0
  }
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_short_cradix(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_ushort_cradix(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_int_cradix(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_uint_cradix(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_long_cradix(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_ulong_cradix(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_longlong_cradix(char const*, unsigned long, void*)
Unexecuted instantiation: duckdb_re2::RE2::Arg::parse_ulonglong_cradix(char const*, unsigned long, void*)
1223
1224
DEFINE_INTEGER_PARSER(short)
1225
DEFINE_INTEGER_PARSER(ushort)
1226
DEFINE_INTEGER_PARSER(int)
1227
DEFINE_INTEGER_PARSER(uint)
1228
DEFINE_INTEGER_PARSER(long)
1229
DEFINE_INTEGER_PARSER(ulong)
1230
DEFINE_INTEGER_PARSER(longlong)
1231
DEFINE_INTEGER_PARSER(ulonglong)
1232
1233
#undef DEFINE_INTEGER_PARSER
1234
1235
}  // namespace duckdb_re2