Coverage Report

Created: 2025-07-09 06:39

/src/re2/re2/set.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2010 The RE2 Authors.  All Rights Reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
#include "re2/set.h"
6
7
#include <stddef.h>
8
9
#include <algorithm>
10
#include <memory>
11
#include <string>
12
#include <utility>
13
#include <vector>
14
15
#include "absl/log/absl_log.h"
16
#include "absl/strings/string_view.h"
17
#include "re2/pod_array.h"
18
#include "re2/prog.h"
19
#include "re2/re2.h"
20
#include "re2/regexp.h"
21
#include "re2/sparse_set.h"
22
23
namespace re2 {
24
25
RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor)
26
6.97k
    : options_(options),
27
6.97k
      anchor_(anchor),
28
6.97k
      compiled_(false),
29
6.97k
      size_(0) {
30
6.97k
  options_.set_never_capture(true);  // might unblock some optimisations
31
6.97k
}
32
33
6.97k
RE2::Set::~Set() {
34
6.97k
  for (size_t i = 0; i < elem_.size(); i++)
35
0
    elem_[i].second->Decref();
36
6.97k
}
37
38
RE2::Set::Set(Set&& other)
39
0
    : options_(other.options_),
40
0
      anchor_(other.anchor_),
41
0
      elem_(std::move(other.elem_)),
42
0
      compiled_(other.compiled_),
43
0
      size_(other.size_),
44
0
      prog_(std::move(other.prog_)) {
45
0
  other.elem_.clear();
46
0
  other.elem_.shrink_to_fit();
47
0
  other.compiled_ = false;
48
0
  other.size_ = 0;
49
0
  other.prog_.reset();
50
0
}
51
52
0
RE2::Set& RE2::Set::operator=(Set&& other) {
53
0
  this->~Set();
54
0
  (void) new (this) Set(std::move(other));
55
0
  return *this;
56
0
}
57
58
0
int RE2::Set::Size() const {
59
0
  if (!compiled_)
60
0
    return static_cast<int>(elem_.size());
61
0
  return size_;
62
0
}
63
64
6.97k
int RE2::Set::Add(absl::string_view pattern, std::string* error) {
65
6.97k
  if (compiled_) {
66
0
    ABSL_LOG(DFATAL) << "RE2::Set::Add() called after compiling";
67
0
    return -1;
68
0
  }
69
70
6.97k
  Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
71
6.97k
    options_.ParseFlags());
72
6.97k
  RegexpStatus status;
73
6.97k
  re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
74
6.97k
  if (re == NULL) {
75
0
    if (error != NULL)
76
0
      *error = status.Text();
77
0
    if (options_.log_errors())
78
0
      ABSL_LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
79
0
    return -1;
80
0
  }
81
82
  // Concatenate with match index and push on vector.
83
6.97k
  int n = static_cast<int>(elem_.size());
84
6.97k
  re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
85
6.97k
  if (re->op() == kRegexpConcat) {
86
3.14k
    int nsub = re->nsub();
87
3.14k
    PODArray<re2::Regexp*> sub(nsub + 1);
88
25.2k
    for (int i = 0; i < nsub; i++)
89
22.0k
      sub[i] = re->sub()[i]->Incref();
90
3.14k
    sub[nsub] = m;
91
3.14k
    re->Decref();
92
3.14k
    re = re2::Regexp::Concat(sub.data(), nsub + 1, pf);
93
3.83k
  } else {
94
3.83k
    re2::Regexp* sub[2];
95
3.83k
    sub[0] = re;
96
3.83k
    sub[1] = m;
97
3.83k
    re = re2::Regexp::Concat(sub, 2, pf);
98
3.83k
  }
99
6.97k
  elem_.emplace_back(std::string(pattern), re);
100
6.97k
  return n;
101
6.97k
}
102
103
6.97k
bool RE2::Set::Compile() {
104
6.97k
  if (compiled_) {
105
0
    ABSL_LOG(DFATAL) << "RE2::Set::Compile() called more than once";
106
0
    return false;
107
0
  }
108
6.97k
  compiled_ = true;
109
6.97k
  size_ = static_cast<int>(elem_.size());
110
111
  // Sort the elements by their patterns. This is good enough for now
112
  // until we have a Regexp comparison function. (Maybe someday...)
113
6.97k
  std::sort(elem_.begin(), elem_.end(),
114
6.97k
            [](const Elem& a, const Elem& b) -> bool {
115
0
              return a.first < b.first;
116
0
            });
117
118
6.97k
  PODArray<re2::Regexp*> sub(size_);
119
13.9k
  for (int i = 0; i < size_; i++)
120
6.97k
    sub[i] = elem_[i].second;
121
6.97k
  elem_.clear();
122
6.97k
  elem_.shrink_to_fit();
123
124
6.97k
  Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
125
6.97k
    options_.ParseFlags());
126
6.97k
  re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
127
128
6.97k
  prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem()));
129
6.97k
  re->Decref();
130
6.97k
  return prog_ != nullptr;
131
6.97k
}
132
133
6.97k
bool RE2::Set::Match(absl::string_view text, std::vector<int>* v) const {
134
6.97k
  return Match(text, v, NULL);
135
6.97k
}
136
137
bool RE2::Set::Match(absl::string_view text, std::vector<int>* v,
138
6.97k
                     ErrorInfo* error_info) const {
139
6.97k
  if (!compiled_) {
140
0
    if (error_info != NULL)
141
0
      error_info->kind = kNotCompiled;
142
0
    ABSL_LOG(DFATAL) << "RE2::Set::Match() called before compiling";
143
0
    return false;
144
0
  }
145
6.97k
#ifdef RE2_HAVE_THREAD_LOCAL
146
6.97k
  hooks::context = NULL;
147
6.97k
#endif
148
6.97k
  bool dfa_failed = false;
149
6.97k
  std::unique_ptr<SparseSet> matches;
150
6.97k
  if (v != NULL) {
151
6.97k
    matches.reset(new SparseSet(size_));
152
6.97k
    v->clear();
153
6.97k
  }
154
6.97k
  bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch,
155
6.97k
                              NULL, &dfa_failed, matches.get());
156
6.97k
  if (dfa_failed) {
157
0
    if (options_.log_errors())
158
0
      ABSL_LOG(ERROR) << "DFA out of memory: "
159
0
                      << "program size " << prog_->size() << ", "
160
0
                      << "list count " << prog_->list_count() << ", "
161
0
                      << "bytemap range " << prog_->bytemap_range();
162
0
    if (error_info != NULL)
163
0
      error_info->kind = kOutOfMemory;
164
0
    return false;
165
0
  }
166
6.97k
  if (ret == false) {
167
4.57k
    if (error_info != NULL)
168
0
      error_info->kind = kNoError;
169
4.57k
    return false;
170
4.57k
  }
171
2.40k
  if (v != NULL) {
172
2.40k
    if (matches->empty()) {
173
0
      if (error_info != NULL)
174
0
        error_info->kind = kInconsistent;
175
0
      ABSL_LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned";
176
0
      return false;
177
0
    }
178
2.40k
    v->assign(matches->begin(), matches->end());
179
2.40k
  }
180
2.40k
  if (error_info != NULL)
181
0
    error_info->kind = kNoError;
182
2.40k
  return true;
183
2.40k
}
184
185
}  // namespace re2