/proc/self/cwd/source/extensions/common/matcher/matcher.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include "source/extensions/common/matcher/matcher.h" |
2 | | |
3 | | #include "source/common/common/assert.h" |
4 | | |
5 | | namespace Envoy { |
6 | | namespace Extensions { |
7 | | namespace Common { |
8 | | namespace Matcher { |
9 | | |
10 | | void buildMatcher(const envoy::config::common::matcher::v3::MatchPredicate& match_config, |
11 | 10.4k | std::vector<MatcherPtr>& matchers) { |
12 | | // In order to store indexes and build our matcher tree inline, we must reserve a slot where |
13 | | // the matcher we are about to create will go. This allows us to know its future index and still |
14 | | // construct more of the tree in each called constructor (e.g., multiple OR/AND conditions). |
15 | | // Once fully constructed, we move the matcher into its position below. See the matcher |
16 | | // overview in matcher.h for more information. |
17 | 10.4k | matchers.emplace_back(nullptr); |
18 | | |
19 | 10.4k | MatcherPtr new_matcher; |
20 | 10.4k | switch (match_config.rule_case()) { |
21 | 1.97k | case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kOrMatch: |
22 | 1.97k | new_matcher = std::make_unique<SetLogicMatcher>(match_config.or_match(), matchers, |
23 | 1.97k | SetLogicMatcher::Type::Or); |
24 | 1.97k | break; |
25 | 829 | case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kAndMatch: |
26 | 829 | new_matcher = std::make_unique<SetLogicMatcher>(match_config.and_match(), matchers, |
27 | 829 | SetLogicMatcher::Type::And); |
28 | 829 | break; |
29 | 1.04k | case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kNotMatch: |
30 | 1.04k | new_matcher = std::make_unique<NotMatcher>(match_config.not_match(), matchers); |
31 | 1.04k | break; |
32 | 656 | case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kAnyMatch: |
33 | 656 | new_matcher = std::make_unique<AnyMatcher>(matchers); |
34 | 656 | break; |
35 | 798 | case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpRequestHeadersMatch: |
36 | 798 | new_matcher = std::make_unique<HttpRequestHeadersMatcher>( |
37 | 798 | match_config.http_request_headers_match(), matchers); |
38 | 798 | break; |
39 | 746 | case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpRequestTrailersMatch: |
40 | 746 | new_matcher = std::make_unique<HttpRequestTrailersMatcher>( |
41 | 746 | match_config.http_request_trailers_match(), matchers); |
42 | 746 | break; |
43 | 1.10k | case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpResponseHeadersMatch: |
44 | 1.10k | new_matcher = std::make_unique<HttpResponseHeadersMatcher>( |
45 | 1.10k | match_config.http_response_headers_match(), matchers); |
46 | 1.10k | break; |
47 | 1.61k | case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpResponseTrailersMatch: |
48 | 1.61k | new_matcher = std::make_unique<HttpResponseTrailersMatcher>( |
49 | 1.61k | match_config.http_response_trailers_match(), matchers); |
50 | 1.61k | break; |
51 | 1.20k | case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpRequestGenericBodyMatch: |
52 | 1.20k | new_matcher = std::make_unique<HttpRequestGenericBodyMatcher>( |
53 | 1.20k | match_config.http_request_generic_body_match(), matchers); |
54 | 1.20k | break; |
55 | 441 | case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpResponseGenericBodyMatch: |
56 | 441 | new_matcher = std::make_unique<HttpResponseGenericBodyMatcher>( |
57 | 441 | match_config.http_response_generic_body_match(), matchers); |
58 | 441 | break; |
59 | 0 | case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::RULE_NOT_SET: |
60 | 0 | PANIC_DUE_TO_CORRUPT_ENUM; |
61 | 10.4k | } |
62 | | |
63 | | // Per above, move the matcher into its position. |
64 | 10.2k | matchers[new_matcher->index()] = std::move(new_matcher); |
65 | 10.2k | } |
66 | | |
67 | | SetLogicMatcher::SetLogicMatcher( |
68 | | const envoy::config::common::matcher::v3::MatchPredicate::MatchSet& configs, |
69 | | std::vector<MatcherPtr>& matchers, Type type) |
70 | 2.80k | : LogicMatcherBase(matchers), matchers_(matchers), type_(type) { |
71 | 8.93k | for (const auto& config : configs.rules()) { |
72 | 8.93k | indexes_.push_back(matchers_.size()); |
73 | 8.93k | buildMatcher(config, matchers_); |
74 | 8.93k | } |
75 | 2.80k | } |
76 | | |
77 | | void SetLogicMatcher::updateLocalStatus(MatchStatusVector& statuses, |
78 | 34.6k | const UpdateFunctor& functor) const { |
79 | 34.6k | if (!statuses[my_index_].might_change_status_) { |
80 | 1.90k | return; |
81 | 1.90k | } |
82 | | |
83 | 105k | for (size_t index : indexes_) { |
84 | 105k | functor(*matchers_[index], statuses); |
85 | 105k | } |
86 | | |
87 | 56.2k | auto predicate = [&statuses](size_t index) { return statuses[index].matches_; }; |
88 | 32.7k | if (type_ == Type::And) { |
89 | 10.1k | statuses[my_index_].matches_ = std::all_of(indexes_.begin(), indexes_.end(), predicate); |
90 | 22.6k | } else { |
91 | 22.6k | ASSERT(type_ == Type::Or); |
92 | 22.6k | statuses[my_index_].matches_ = std::any_of(indexes_.begin(), indexes_.end(), predicate); |
93 | 22.6k | } |
94 | | |
95 | | // TODO(mattklein123): We can potentially short circuit this even further if we git a single false |
96 | | // in an AND set or a single true in an OR set. |
97 | 32.7k | statuses[my_index_].might_change_status_ = |
98 | 32.7k | std::any_of(indexes_.begin(), indexes_.end(), |
99 | 48.3k | [&statuses](size_t index) { return statuses[index].might_change_status_; }); |
100 | 32.7k | } |
101 | | |
102 | | NotMatcher::NotMatcher(const envoy::config::common::matcher::v3::MatchPredicate& config, |
103 | | std::vector<MatcherPtr>& matchers) |
104 | 1.04k | : LogicMatcherBase(matchers), matchers_(matchers), not_index_(matchers.size()) { |
105 | 1.04k | buildMatcher(config, matchers); |
106 | 1.04k | } |
107 | | |
108 | | void NotMatcher::updateLocalStatus(MatchStatusVector& statuses, |
109 | 5.36k | const UpdateFunctor& functor) const { |
110 | 5.36k | if (!statuses[my_index_].might_change_status_) { |
111 | 369 | return; |
112 | 369 | } |
113 | | |
114 | 4.99k | functor(*matchers_[not_index_], statuses); |
115 | 4.99k | statuses[my_index_].matches_ = !statuses[not_index_].matches_; |
116 | 4.99k | statuses[my_index_].might_change_status_ = statuses[not_index_].might_change_status_; |
117 | 4.99k | } |
118 | | |
119 | | HttpHeaderMatcherBase::HttpHeaderMatcherBase( |
120 | | const envoy::config::common::matcher::v3::HttpHeadersMatch& config, |
121 | | const std::vector<MatcherPtr>& matchers) |
122 | | : SimpleMatcher(matchers), |
123 | 4.26k | headers_to_match_(Http::HeaderUtility::buildHeaderDataVector(config.headers())) {} |
124 | | |
125 | | void HttpHeaderMatcherBase::matchHeaders(const Http::HeaderMap& headers, |
126 | 3.09k | MatchStatusVector& statuses) const { |
127 | 3.09k | ASSERT(statuses[my_index_].might_change_status_); |
128 | 3.09k | statuses[my_index_].matches_ = Http::HeaderUtility::matchHeaders(headers, headers_to_match_); |
129 | 3.09k | statuses[my_index_].might_change_status_ = false; |
130 | 3.09k | } |
131 | | |
132 | | // HttpGenericBodyMatcher |
133 | | // Scans the HTTP body and looks for patterns. |
134 | | // HTTP body may be passed to the matcher in chunks. The search logic buffers |
135 | | // only as many bytes as is the length of the longest pattern to be found. |
136 | | HttpGenericBodyMatcher::HttpGenericBodyMatcher( |
137 | | const envoy::config::common::matcher::v3::HttpGenericBodyMatch& config, |
138 | | const std::vector<MatcherPtr>& matchers) |
139 | 1.64k | : HttpBodyMatcherBase(matchers) { |
140 | 1.64k | patterns_ = std::make_shared<std::vector<std::string>>(); |
141 | 5.05k | for (const auto& i : config.patterns()) { |
142 | 5.05k | switch (i.rule_case()) { |
143 | | // For binary match 'i' contains sequence of bytes to locate in the body. |
144 | 1.26k | case envoy::config::common::matcher::v3::HttpGenericBodyMatch::GenericTextMatch::kBinaryMatch: { |
145 | 1.26k | patterns_->push_back(i.binary_match()); |
146 | 1.26k | } break; |
147 | | // For string match 'i' contains exact string to locate in the body. |
148 | 3.79k | case envoy::config::common::matcher::v3::HttpGenericBodyMatch::GenericTextMatch::kStringMatch: |
149 | 3.79k | patterns_->push_back(i.string_match()); |
150 | 3.79k | break; |
151 | 0 | case envoy::config::common::matcher::v3::HttpGenericBodyMatch::GenericTextMatch::RULE_NOT_SET: |
152 | 0 | PANIC_DUE_TO_CORRUPT_ENUM; |
153 | 5.05k | } |
154 | | // overlap_size_ indicates how many bytes from previous data chunk(s) are buffered. |
155 | 5.05k | overlap_size_ = std::max(overlap_size_, patterns_->back().length() - 1); |
156 | 5.05k | } |
157 | 1.64k | limit_ = config.bytes_limit(); |
158 | 1.64k | } |
159 | | |
160 | 38.2k | void HttpGenericBodyMatcher::onBody(const Buffer::Instance& data, MatchStatusVector& statuses) { |
161 | | // Get the context associated with this stream. |
162 | 38.2k | HttpGenericBodyMatcherCtx* ctx = |
163 | 38.2k | static_cast<HttpGenericBodyMatcherCtx*>(statuses[my_index_].ctx_.get()); |
164 | | |
165 | 38.2k | if (statuses[my_index_].might_change_status_ == false) { |
166 | | // End of search limit has been already reached or all patterns have been found. |
167 | | // Status is not going to change. |
168 | 6.31k | ASSERT(((0 != limit_) && (limit_ == ctx->processed_bytes_)) || (ctx->patterns_index_.empty())); |
169 | 6.31k | return; |
170 | 6.31k | } |
171 | | |
172 | | // Iterate through all patterns to be found and check if they are located across body |
173 | | // chunks: part of the pattern was in previous body chunk and remaining of the pattern |
174 | | // is in the current body chunk on in the current body chunk. |
175 | 31.9k | bool resize_required = false; |
176 | 31.9k | auto body_search_limit = limit_ - ctx->processed_bytes_; |
177 | 31.9k | auto it = ctx->patterns_index_.begin(); |
178 | 85.1k | while (it != ctx->patterns_index_.end()) { |
179 | 53.2k | const auto& pattern = patterns_->at(*it); |
180 | 53.2k | if ((!ctx->overlap_.empty() && (locatePatternAcrossChunks(pattern, data, ctx))) || |
181 | 53.2k | (-1 != data.search(static_cast<const void*>(pattern.data()), pattern.length(), 0, |
182 | 53.0k | body_search_limit))) { |
183 | | // Pattern found. Remove it from the list of patterns to be found. |
184 | | // If the longest pattern has been found, resize of overlap buffer may be |
185 | | // required. |
186 | 1.19k | resize_required = resize_required || (ctx->capacity_ == (pattern.length() - 1)); |
187 | 1.19k | it = ctx->patterns_index_.erase(it); |
188 | 52.0k | } else { |
189 | 52.0k | it++; |
190 | 52.0k | } |
191 | 53.2k | } |
192 | | |
193 | 31.9k | if (ctx->patterns_index_.empty()) { |
194 | | // All patterns were found. |
195 | 301 | statuses[my_index_].matches_ = true; |
196 | 301 | statuses[my_index_].might_change_status_ = false; |
197 | 301 | return; |
198 | 301 | } |
199 | | |
200 | | // Check if next body chunks should be searched for patterns. If the search limit |
201 | | // ends on the current body chunk, there is no need to check next chunks. |
202 | 31.6k | if (0 != limit_) { |
203 | 18.4k | ctx->processed_bytes_ = std::min(uint64_t(limit_), ctx->processed_bytes_ + data.length()); |
204 | 18.4k | if (limit_ == ctx->processed_bytes_) { |
205 | | // End of search limit has been reached and not all patterns have been found. |
206 | 14 | statuses[my_index_].matches_ = false; |
207 | 14 | statuses[my_index_].might_change_status_ = false; |
208 | 14 | return; |
209 | 14 | } |
210 | 18.4k | } |
211 | | |
212 | | // If longest pattern has been located, there is possibility that overlap_ |
213 | | // buffer size may be reduced. |
214 | 31.6k | if (resize_required) { |
215 | 499 | resizeOverlapBuffer(ctx); |
216 | 499 | } |
217 | | |
218 | 31.6k | bufferLastBytes(data, ctx); |
219 | 31.6k | } |
220 | | |
221 | | // Here we handle a situation when a pattern is spread across multiple body buffers. |
222 | | // overlap_ stores number of bytes from previous body chunks equal to longest pattern yet to be |
223 | | // found minus one byte (-1). The logic below tries to find the beginning of the pattern in |
224 | | // overlap_ buffer and the pattern should continue at the beginning of the next buffer. |
225 | | bool HttpGenericBodyMatcher::locatePatternAcrossChunks(const std::string& pattern, |
226 | | const Buffer::Instance& data, |
227 | 41.8k | const HttpGenericBodyMatcherCtx* ctx) { |
228 | | // Take the first character from the pattern and locate it in overlap_. |
229 | 41.8k | auto pattern_index = 0; |
230 | | // Start position in overlap_. overlap_ size was calculated based on the longest pattern to be |
231 | | // found, but search for shorter patterns may start from some offset, not the beginning of the |
232 | | // buffer. |
233 | 41.8k | size_t start_index = (ctx->overlap_.size() > (pattern.size() - 1)) |
234 | 41.8k | ? ctx->overlap_.size() - (pattern.size() - 1) |
235 | 41.8k | : 0; |
236 | 41.8k | auto match_iter = std::find(std::begin(ctx->overlap_) + start_index, std::end(ctx->overlap_), |
237 | 41.8k | pattern.at(pattern_index)); |
238 | | |
239 | 41.8k | if (match_iter == std::end(ctx->overlap_)) { |
240 | 32.0k | return false; |
241 | 32.0k | } |
242 | | |
243 | | // Continue checking characters until end of overlap_ buffer. |
244 | 144k | while (match_iter != std::end(ctx->overlap_)) { |
245 | 138k | if (pattern[pattern_index] != *match_iter) { |
246 | 3.09k | return false; |
247 | 3.09k | } |
248 | 135k | pattern_index++; |
249 | 135k | match_iter++; |
250 | 135k | } |
251 | | |
252 | | // Now check if the remaining of the pattern matches the beginning of the body |
253 | | // buffer.i Do it only if there is sufficient number of bytes in the data buffer. |
254 | 6.69k | auto pattern_remainder = pattern.substr(pattern_index); |
255 | 6.69k | if ((0 != limit_) && (pattern_remainder.length() > (limit_ - ctx->processed_bytes_))) { |
256 | | // Even if we got match it would be outside the search limit |
257 | 60 | return false; |
258 | 60 | } |
259 | 6.63k | return ((pattern_remainder.length() <= data.length()) && data.startsWith(pattern_remainder)); |
260 | 6.69k | } |
261 | | |
262 | | // Method buffers last bytes from the currently processed body in overlap_. |
263 | | // This is required to find patterns which spans across multiple body chunks. |
264 | | void HttpGenericBodyMatcher::bufferLastBytes(const Buffer::Instance& data, |
265 | 31.6k | HttpGenericBodyMatcherCtx* ctx) { |
266 | | // The matcher buffers the last seen X bytes where X is equal to the length of the |
267 | | // longest pattern - 1. With the arrival of the new 'data' the following situations |
268 | | // are possible: |
269 | | // 1. The new data's length is larger or equal to X. In this case just copy last X bytes |
270 | | // from the data to overlap_ buffer. |
271 | | // 2. The new data length is smaller than X and there is enough room in overlap buffer to just |
272 | | // copy the bytes from data. |
273 | | // 3. The new data length is smaller than X and there is not enough room in overlap buffer. |
274 | 31.6k | if (data.length() >= ctx->capacity_) { |
275 | | // Case 1: |
276 | | // Just overwrite the entire overlap_ buffer with new data. |
277 | 10.0k | ctx->overlap_.resize(ctx->capacity_); |
278 | 10.0k | data.copyOut(data.length() - ctx->capacity_, ctx->capacity_, ctx->overlap_.data()); |
279 | 21.5k | } else { |
280 | 21.5k | if (data.length() <= (ctx->capacity_ - ctx->overlap_.size())) { |
281 | | // Case 2. Just add the new data on top of already buffered. |
282 | 6.35k | const auto size = ctx->overlap_.size(); |
283 | 6.35k | ctx->overlap_.resize(ctx->overlap_.size() + data.length()); |
284 | 6.35k | data.copyOut(0, data.length(), ctx->overlap_.data() + size); |
285 | 15.1k | } else { |
286 | | // Case 3. First shift data to make room for new data and then copy |
287 | | // entire new buffer. |
288 | 15.1k | const size_t shift = ctx->overlap_.size() - (ctx->capacity_ - data.length()); |
289 | 390k | for (size_t i = 0; i < (ctx->overlap_.size() - shift); i++) { |
290 | 375k | ctx->overlap_[i] = ctx->overlap_[i + shift]; |
291 | 375k | } |
292 | 15.1k | const auto size = ctx->overlap_.size(); |
293 | 15.1k | ctx->overlap_.resize(ctx->capacity_); |
294 | 15.1k | data.copyOut(0, data.length(), ctx->overlap_.data() + (size - shift)); |
295 | 15.1k | } |
296 | 21.5k | } |
297 | 31.6k | } |
298 | | |
299 | | // Method takes list of indexes of patterns not yet located in the http body and returns the |
300 | | // length of the longest pattern. |
301 | | // This is used by matcher to buffer as minimum bytes as possible. |
302 | 499 | size_t HttpGenericBodyMatcher::calcLongestPatternSize(const std::list<uint32_t>& indexes) const { |
303 | 499 | ASSERT(!indexes.empty()); |
304 | 499 | size_t max_len = 0; |
305 | 979 | for (const auto& i : indexes) { |
306 | 979 | max_len = std::max(max_len, patterns_->at(i).length()); |
307 | 979 | } |
308 | 499 | return max_len; |
309 | 499 | } |
310 | | |
311 | | // Method checks if it is possible to reduce the size of overlap_ buffer. |
312 | 499 | void HttpGenericBodyMatcher::resizeOverlapBuffer(HttpGenericBodyMatcherCtx* ctx) { |
313 | | // Check if we need to resize overlap_ buffer. Since it was initialized to size of the longest |
314 | | // pattern, it will be shrunk only and memory allocations do not happen. |
315 | | // Depending on how many bytes were already in the buffer, shift may be required if |
316 | | // the new size is smaller than number of already buffered bytes. |
317 | 499 | const size_t max_len = calcLongestPatternSize(ctx->patterns_index_); |
318 | 499 | if (ctx->capacity_ != (max_len - 1)) { |
319 | 220 | const size_t new_size = max_len - 1; |
320 | 220 | const size_t shift = (ctx->overlap_.size() > new_size) ? (ctx->overlap_.size() - new_size) : 0; |
321 | | // Copy the last new_size bytes to the beginning of the buffer. |
322 | 1.05k | for (size_t i = 0; (i < new_size) && (shift > 0); i++) { |
323 | 837 | ctx->overlap_[i] = ctx->overlap_[i + shift]; |
324 | 837 | } |
325 | 220 | ctx->capacity_ = new_size; |
326 | 220 | if (shift > 0) { |
327 | 88 | ctx->overlap_.resize(new_size); |
328 | 88 | } |
329 | 220 | } |
330 | 499 | } |
331 | | |
332 | | } // namespace Matcher |
333 | | } // namespace Common |
334 | | } // namespace Extensions |
335 | | } // namespace Envoy |