Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2010 The RE2 Authors. All Rights Reserved. |
2 | | // Use of this source code is governed by a BSD-style |
3 | | // license that can be found in the LICENSE file. |
4 | | |
5 | | #include "re2/set.h" |
6 | | |
7 | | #include <stddef.h> |
8 | | |
9 | | #include <algorithm> |
10 | | #include <memory> |
11 | | #include <string> |
12 | | #include <utility> |
13 | | #include <vector> |
14 | | |
15 | | #include "absl/log/absl_log.h" |
16 | | #include "absl/strings/string_view.h" |
17 | | #include "re2/pod_array.h" |
18 | | #include "re2/prog.h" |
19 | | #include "re2/re2.h" |
20 | | #include "re2/regexp.h" |
21 | | #include "re2/sparse_set.h" |
22 | | |
23 | | namespace re2 { |
24 | | |
25 | | RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) |
26 | 6.97k | : options_(options), |
27 | 6.97k | anchor_(anchor), |
28 | 6.97k | compiled_(false), |
29 | 6.97k | size_(0) { |
30 | 6.97k | options_.set_never_capture(true); // might unblock some optimisations |
31 | 6.97k | } |
32 | | |
33 | 6.97k | RE2::Set::~Set() { |
34 | 6.97k | for (size_t i = 0; i < elem_.size(); i++) |
35 | 0 | elem_[i].second->Decref(); |
36 | 6.97k | } |
37 | | |
38 | | RE2::Set::Set(Set&& other) |
39 | 0 | : options_(other.options_), |
40 | 0 | anchor_(other.anchor_), |
41 | 0 | elem_(std::move(other.elem_)), |
42 | 0 | compiled_(other.compiled_), |
43 | 0 | size_(other.size_), |
44 | 0 | prog_(std::move(other.prog_)) { |
45 | 0 | other.elem_.clear(); |
46 | 0 | other.elem_.shrink_to_fit(); |
47 | 0 | other.compiled_ = false; |
48 | 0 | other.size_ = 0; |
49 | 0 | other.prog_.reset(); |
50 | 0 | } |
51 | | |
52 | 0 | RE2::Set& RE2::Set::operator=(Set&& other) { |
53 | 0 | this->~Set(); |
54 | 0 | (void) new (this) Set(std::move(other)); |
55 | 0 | return *this; |
56 | 0 | } |
57 | | |
58 | 0 | int RE2::Set::Size() const { |
59 | 0 | if (!compiled_) |
60 | 0 | return static_cast<int>(elem_.size()); |
61 | 0 | return size_; |
62 | 0 | } |
63 | | |
64 | 6.97k | int RE2::Set::Add(absl::string_view pattern, std::string* error) { |
65 | 6.97k | if (compiled_) { |
66 | 0 | ABSL_LOG(DFATAL) << "RE2::Set::Add() called after compiling"; |
67 | 0 | return -1; |
68 | 0 | } |
69 | | |
70 | 6.97k | Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>( |
71 | 6.97k | options_.ParseFlags()); |
72 | 6.97k | RegexpStatus status; |
73 | 6.97k | re2::Regexp* re = Regexp::Parse(pattern, pf, &status); |
74 | 6.97k | if (re == NULL) { |
75 | 0 | if (error != NULL) |
76 | 0 | *error = status.Text(); |
77 | 0 | if (options_.log_errors()) |
78 | 0 | ABSL_LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text(); |
79 | 0 | return -1; |
80 | 0 | } |
81 | | |
82 | | // Concatenate with match index and push on vector. |
83 | 6.97k | int n = static_cast<int>(elem_.size()); |
84 | 6.97k | re2::Regexp* m = re2::Regexp::HaveMatch(n, pf); |
85 | 6.97k | if (re->op() == kRegexpConcat) { |
86 | 3.14k | int nsub = re->nsub(); |
87 | 3.14k | PODArray<re2::Regexp*> sub(nsub + 1); |
88 | 25.2k | for (int i = 0; i < nsub; i++) |
89 | 22.0k | sub[i] = re->sub()[i]->Incref(); |
90 | 3.14k | sub[nsub] = m; |
91 | 3.14k | re->Decref(); |
92 | 3.14k | re = re2::Regexp::Concat(sub.data(), nsub + 1, pf); |
93 | 3.83k | } else { |
94 | 3.83k | re2::Regexp* sub[2]; |
95 | 3.83k | sub[0] = re; |
96 | 3.83k | sub[1] = m; |
97 | 3.83k | re = re2::Regexp::Concat(sub, 2, pf); |
98 | 3.83k | } |
99 | 6.97k | elem_.emplace_back(std::string(pattern), re); |
100 | 6.97k | return n; |
101 | 6.97k | } |
102 | | |
103 | 6.97k | bool RE2::Set::Compile() { |
104 | 6.97k | if (compiled_) { |
105 | 0 | ABSL_LOG(DFATAL) << "RE2::Set::Compile() called more than once"; |
106 | 0 | return false; |
107 | 0 | } |
108 | 6.97k | compiled_ = true; |
109 | 6.97k | size_ = static_cast<int>(elem_.size()); |
110 | | |
111 | | // Sort the elements by their patterns. This is good enough for now |
112 | | // until we have a Regexp comparison function. (Maybe someday...) |
113 | 6.97k | std::sort(elem_.begin(), elem_.end(), |
114 | 6.97k | [](const Elem& a, const Elem& b) -> bool { |
115 | 0 | return a.first < b.first; |
116 | 0 | }); |
117 | | |
118 | 6.97k | PODArray<re2::Regexp*> sub(size_); |
119 | 13.9k | for (int i = 0; i < size_; i++) |
120 | 6.97k | sub[i] = elem_[i].second; |
121 | 6.97k | elem_.clear(); |
122 | 6.97k | elem_.shrink_to_fit(); |
123 | | |
124 | 6.97k | Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>( |
125 | 6.97k | options_.ParseFlags()); |
126 | 6.97k | re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf); |
127 | | |
128 | 6.97k | prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem())); |
129 | 6.97k | re->Decref(); |
130 | 6.97k | return prog_ != nullptr; |
131 | 6.97k | } |
132 | | |
133 | 6.97k | bool RE2::Set::Match(absl::string_view text, std::vector<int>* v) const { |
134 | 6.97k | return Match(text, v, NULL); |
135 | 6.97k | } |
136 | | |
137 | | bool RE2::Set::Match(absl::string_view text, std::vector<int>* v, |
138 | 6.97k | ErrorInfo* error_info) const { |
139 | 6.97k | if (!compiled_) { |
140 | 0 | if (error_info != NULL) |
141 | 0 | error_info->kind = kNotCompiled; |
142 | 0 | ABSL_LOG(DFATAL) << "RE2::Set::Match() called before compiling"; |
143 | 0 | return false; |
144 | 0 | } |
145 | 6.97k | #ifdef RE2_HAVE_THREAD_LOCAL |
146 | 6.97k | hooks::context = NULL; |
147 | 6.97k | #endif |
148 | 6.97k | bool dfa_failed = false; |
149 | 6.97k | std::unique_ptr<SparseSet> matches; |
150 | 6.97k | if (v != NULL) { |
151 | 6.97k | matches.reset(new SparseSet(size_)); |
152 | 6.97k | v->clear(); |
153 | 6.97k | } |
154 | 6.97k | bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch, |
155 | 6.97k | NULL, &dfa_failed, matches.get()); |
156 | 6.97k | if (dfa_failed) { |
157 | 0 | if (options_.log_errors()) |
158 | 0 | ABSL_LOG(ERROR) << "DFA out of memory: " |
159 | 0 | << "program size " << prog_->size() << ", " |
160 | 0 | << "list count " << prog_->list_count() << ", " |
161 | 0 | << "bytemap range " << prog_->bytemap_range(); |
162 | 0 | if (error_info != NULL) |
163 | 0 | error_info->kind = kOutOfMemory; |
164 | 0 | return false; |
165 | 0 | } |
166 | 6.97k | if (ret == false) { |
167 | 4.57k | if (error_info != NULL) |
168 | 0 | error_info->kind = kNoError; |
169 | 4.57k | return false; |
170 | 4.57k | } |
171 | 2.40k | if (v != NULL) { |
172 | 2.40k | if (matches->empty()) { |
173 | 0 | if (error_info != NULL) |
174 | 0 | error_info->kind = kInconsistent; |
175 | 0 | ABSL_LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned"; |
176 | 0 | return false; |
177 | 0 | } |
178 | 2.40k | v->assign(matches->begin(), matches->end()); |
179 | 2.40k | } |
180 | 2.40k | if (error_info != NULL) |
181 | 0 | error_info->kind = kNoError; |
182 | 2.40k | return true; |
183 | 2.40k | } |
184 | | |
185 | | } // namespace re2 |