Line data Source code
1 : #include "source/extensions/common/matcher/matcher.h"
2 :
3 : #include "source/common/common/assert.h"
4 :
5 : namespace Envoy {
6 : namespace Extensions {
7 : namespace Common {
8 : namespace Matcher {
9 :
10 : void buildMatcher(const envoy::config::common::matcher::v3::MatchPredicate& match_config,
11 0 : std::vector<MatcherPtr>& matchers) {
12 : // In order to store indexes and build our matcher tree inline, we must reserve a slot where
13 : // the matcher we are about to create will go. This allows us to know its future index and still
14 : // construct more of the tree in each called constructor (e.g., multiple OR/AND conditions).
15 : // Once fully constructed, we move the matcher into its position below. See the matcher
16 : // overview in matcher.h for more information.
17 0 : matchers.emplace_back(nullptr);
18 :
19 0 : MatcherPtr new_matcher;
20 0 : switch (match_config.rule_case()) {
21 0 : case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kOrMatch:
22 0 : new_matcher = std::make_unique<SetLogicMatcher>(match_config.or_match(), matchers,
23 0 : SetLogicMatcher::Type::Or);
24 0 : break;
25 0 : case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kAndMatch:
26 0 : new_matcher = std::make_unique<SetLogicMatcher>(match_config.and_match(), matchers,
27 0 : SetLogicMatcher::Type::And);
28 0 : break;
29 0 : case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kNotMatch:
30 0 : new_matcher = std::make_unique<NotMatcher>(match_config.not_match(), matchers);
31 0 : break;
32 0 : case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kAnyMatch:
33 0 : new_matcher = std::make_unique<AnyMatcher>(matchers);
34 0 : break;
35 0 : case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpRequestHeadersMatch:
36 0 : new_matcher = std::make_unique<HttpRequestHeadersMatcher>(
37 0 : match_config.http_request_headers_match(), matchers);
38 0 : break;
39 0 : case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpRequestTrailersMatch:
40 0 : new_matcher = std::make_unique<HttpRequestTrailersMatcher>(
41 0 : match_config.http_request_trailers_match(), matchers);
42 0 : break;
43 0 : case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpResponseHeadersMatch:
44 0 : new_matcher = std::make_unique<HttpResponseHeadersMatcher>(
45 0 : match_config.http_response_headers_match(), matchers);
46 0 : break;
47 0 : case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpResponseTrailersMatch:
48 0 : new_matcher = std::make_unique<HttpResponseTrailersMatcher>(
49 0 : match_config.http_response_trailers_match(), matchers);
50 0 : break;
51 0 : case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpRequestGenericBodyMatch:
52 0 : new_matcher = std::make_unique<HttpRequestGenericBodyMatcher>(
53 0 : match_config.http_request_generic_body_match(), matchers);
54 0 : break;
55 0 : case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpResponseGenericBodyMatch:
56 0 : new_matcher = std::make_unique<HttpResponseGenericBodyMatcher>(
57 0 : match_config.http_response_generic_body_match(), matchers);
58 0 : break;
59 0 : case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::RULE_NOT_SET:
60 0 : PANIC_DUE_TO_CORRUPT_ENUM;
61 0 : }
62 :
63 : // Per above, move the matcher into its position.
64 0 : matchers[new_matcher->index()] = std::move(new_matcher);
65 0 : }
66 :
67 : SetLogicMatcher::SetLogicMatcher(
68 : const envoy::config::common::matcher::v3::MatchPredicate::MatchSet& configs,
69 : std::vector<MatcherPtr>& matchers, Type type)
70 0 : : LogicMatcherBase(matchers), matchers_(matchers), type_(type) {
71 0 : for (const auto& config : configs.rules()) {
72 0 : indexes_.push_back(matchers_.size());
73 0 : buildMatcher(config, matchers_);
74 0 : }
75 0 : }
76 :
77 : void SetLogicMatcher::updateLocalStatus(MatchStatusVector& statuses,
78 0 : const UpdateFunctor& functor) const {
79 0 : if (!statuses[my_index_].might_change_status_) {
80 0 : return;
81 0 : }
82 :
83 0 : for (size_t index : indexes_) {
84 0 : functor(*matchers_[index], statuses);
85 0 : }
86 :
87 0 : auto predicate = [&statuses](size_t index) { return statuses[index].matches_; };
88 0 : if (type_ == Type::And) {
89 0 : statuses[my_index_].matches_ = std::all_of(indexes_.begin(), indexes_.end(), predicate);
90 0 : } else {
91 0 : ASSERT(type_ == Type::Or);
92 0 : statuses[my_index_].matches_ = std::any_of(indexes_.begin(), indexes_.end(), predicate);
93 0 : }
94 :
95 : // TODO(mattklein123): We can potentially short circuit this even further if we git a single false
96 : // in an AND set or a single true in an OR set.
97 0 : statuses[my_index_].might_change_status_ =
98 0 : std::any_of(indexes_.begin(), indexes_.end(),
99 0 : [&statuses](size_t index) { return statuses[index].might_change_status_; });
100 0 : }
101 :
102 : NotMatcher::NotMatcher(const envoy::config::common::matcher::v3::MatchPredicate& config,
103 : std::vector<MatcherPtr>& matchers)
104 0 : : LogicMatcherBase(matchers), matchers_(matchers), not_index_(matchers.size()) {
105 0 : buildMatcher(config, matchers);
106 0 : }
107 :
108 : void NotMatcher::updateLocalStatus(MatchStatusVector& statuses,
109 0 : const UpdateFunctor& functor) const {
110 0 : if (!statuses[my_index_].might_change_status_) {
111 0 : return;
112 0 : }
113 :
114 0 : functor(*matchers_[not_index_], statuses);
115 0 : statuses[my_index_].matches_ = !statuses[not_index_].matches_;
116 0 : statuses[my_index_].might_change_status_ = statuses[not_index_].might_change_status_;
117 0 : }
118 :
119 : HttpHeaderMatcherBase::HttpHeaderMatcherBase(
120 : const envoy::config::common::matcher::v3::HttpHeadersMatch& config,
121 : const std::vector<MatcherPtr>& matchers)
122 : : SimpleMatcher(matchers),
123 0 : headers_to_match_(Http::HeaderUtility::buildHeaderDataVector(config.headers())) {}
124 :
125 : void HttpHeaderMatcherBase::matchHeaders(const Http::HeaderMap& headers,
126 0 : MatchStatusVector& statuses) const {
127 0 : ASSERT(statuses[my_index_].might_change_status_);
128 0 : statuses[my_index_].matches_ = Http::HeaderUtility::matchHeaders(headers, headers_to_match_);
129 0 : statuses[my_index_].might_change_status_ = false;
130 0 : }
131 :
132 : // HttpGenericBodyMatcher
133 : // Scans the HTTP body and looks for patterns.
134 : // HTTP body may be passed to the matcher in chunks. The search logic buffers
135 : // only as many bytes as is the length of the longest pattern to be found.
136 : HttpGenericBodyMatcher::HttpGenericBodyMatcher(
137 : const envoy::config::common::matcher::v3::HttpGenericBodyMatch& config,
138 : const std::vector<MatcherPtr>& matchers)
139 0 : : HttpBodyMatcherBase(matchers) {
140 0 : patterns_ = std::make_shared<std::vector<std::string>>();
141 0 : for (const auto& i : config.patterns()) {
142 0 : switch (i.rule_case()) {
143 : // For binary match 'i' contains sequence of bytes to locate in the body.
144 0 : case envoy::config::common::matcher::v3::HttpGenericBodyMatch::GenericTextMatch::kBinaryMatch: {
145 0 : patterns_->push_back(i.binary_match());
146 0 : } break;
147 : // For string match 'i' contains exact string to locate in the body.
148 0 : case envoy::config::common::matcher::v3::HttpGenericBodyMatch::GenericTextMatch::kStringMatch:
149 0 : patterns_->push_back(i.string_match());
150 0 : break;
151 0 : case envoy::config::common::matcher::v3::HttpGenericBodyMatch::GenericTextMatch::RULE_NOT_SET:
152 0 : PANIC_DUE_TO_CORRUPT_ENUM;
153 0 : }
154 : // overlap_size_ indicates how many bytes from previous data chunk(s) are buffered.
155 0 : overlap_size_ = std::max(overlap_size_, patterns_->back().length() - 1);
156 0 : }
157 0 : limit_ = config.bytes_limit();
158 0 : }
159 :
160 0 : void HttpGenericBodyMatcher::onBody(const Buffer::Instance& data, MatchStatusVector& statuses) {
161 : // Get the context associated with this stream.
162 0 : HttpGenericBodyMatcherCtx* ctx =
163 0 : static_cast<HttpGenericBodyMatcherCtx*>(statuses[my_index_].ctx_.get());
164 :
165 0 : if (statuses[my_index_].might_change_status_ == false) {
166 : // End of search limit has been already reached or all patterns have been found.
167 : // Status is not going to change.
168 0 : ASSERT(((0 != limit_) && (limit_ == ctx->processed_bytes_)) || (ctx->patterns_index_.empty()));
169 0 : return;
170 0 : }
171 :
172 : // Iterate through all patterns to be found and check if they are located across body
173 : // chunks: part of the pattern was in previous body chunk and remaining of the pattern
174 : // is in the current body chunk on in the current body chunk.
175 0 : bool resize_required = false;
176 0 : auto body_search_limit = limit_ - ctx->processed_bytes_;
177 0 : auto it = ctx->patterns_index_.begin();
178 0 : while (it != ctx->patterns_index_.end()) {
179 0 : const auto& pattern = patterns_->at(*it);
180 0 : if ((!ctx->overlap_.empty() && (locatePatternAcrossChunks(pattern, data, ctx))) ||
181 0 : (-1 != data.search(static_cast<const void*>(pattern.data()), pattern.length(), 0,
182 0 : body_search_limit))) {
183 : // Pattern found. Remove it from the list of patterns to be found.
184 : // If the longest pattern has been found, resize of overlap buffer may be
185 : // required.
186 0 : resize_required = resize_required || (ctx->capacity_ == (pattern.length() - 1));
187 0 : it = ctx->patterns_index_.erase(it);
188 0 : } else {
189 0 : it++;
190 0 : }
191 0 : }
192 :
193 0 : if (ctx->patterns_index_.empty()) {
194 : // All patterns were found.
195 0 : statuses[my_index_].matches_ = true;
196 0 : statuses[my_index_].might_change_status_ = false;
197 0 : return;
198 0 : }
199 :
200 : // Check if next body chunks should be searched for patterns. If the search limit
201 : // ends on the current body chunk, there is no need to check next chunks.
202 0 : if (0 != limit_) {
203 0 : ctx->processed_bytes_ = std::min(uint64_t(limit_), ctx->processed_bytes_ + data.length());
204 0 : if (limit_ == ctx->processed_bytes_) {
205 : // End of search limit has been reached and not all patterns have been found.
206 0 : statuses[my_index_].matches_ = false;
207 0 : statuses[my_index_].might_change_status_ = false;
208 0 : return;
209 0 : }
210 0 : }
211 :
212 : // If longest pattern has been located, there is possibility that overlap_
213 : // buffer size may be reduced.
214 0 : if (resize_required) {
215 0 : resizeOverlapBuffer(ctx);
216 0 : }
217 :
218 0 : bufferLastBytes(data, ctx);
219 0 : }
220 :
221 : // Here we handle a situation when a pattern is spread across multiple body buffers.
222 : // overlap_ stores number of bytes from previous body chunks equal to longest pattern yet to be
223 : // found minus one byte (-1). The logic below tries to find the beginning of the pattern in
224 : // overlap_ buffer and the pattern should continue at the beginning of the next buffer.
225 : bool HttpGenericBodyMatcher::locatePatternAcrossChunks(const std::string& pattern,
226 : const Buffer::Instance& data,
227 0 : const HttpGenericBodyMatcherCtx* ctx) {
228 : // Take the first character from the pattern and locate it in overlap_.
229 0 : auto pattern_index = 0;
230 : // Start position in overlap_. overlap_ size was calculated based on the longest pattern to be
231 : // found, but search for shorter patterns may start from some offset, not the beginning of the
232 : // buffer.
233 0 : size_t start_index = (ctx->overlap_.size() > (pattern.size() - 1))
234 0 : ? ctx->overlap_.size() - (pattern.size() - 1)
235 0 : : 0;
236 0 : auto match_iter = std::find(std::begin(ctx->overlap_) + start_index, std::end(ctx->overlap_),
237 0 : pattern.at(pattern_index));
238 :
239 0 : if (match_iter == std::end(ctx->overlap_)) {
240 0 : return false;
241 0 : }
242 :
243 : // Continue checking characters until end of overlap_ buffer.
244 0 : while (match_iter != std::end(ctx->overlap_)) {
245 0 : if (pattern[pattern_index] != *match_iter) {
246 0 : return false;
247 0 : }
248 0 : pattern_index++;
249 0 : match_iter++;
250 0 : }
251 :
252 : // Now check if the remaining of the pattern matches the beginning of the body
253 : // buffer.i Do it only if there is sufficient number of bytes in the data buffer.
254 0 : auto pattern_remainder = pattern.substr(pattern_index);
255 0 : if ((0 != limit_) && (pattern_remainder.length() > (limit_ - ctx->processed_bytes_))) {
256 : // Even if we got match it would be outside the search limit
257 0 : return false;
258 0 : }
259 0 : return ((pattern_remainder.length() <= data.length()) && data.startsWith(pattern_remainder));
260 0 : }
261 :
262 : // Method buffers last bytes from the currently processed body in overlap_.
263 : // This is required to find patterns which spans across multiple body chunks.
264 : void HttpGenericBodyMatcher::bufferLastBytes(const Buffer::Instance& data,
265 0 : HttpGenericBodyMatcherCtx* ctx) {
266 : // The matcher buffers the last seen X bytes where X is equal to the length of the
267 : // longest pattern - 1. With the arrival of the new 'data' the following situations
268 : // are possible:
269 : // 1. The new data's length is larger or equal to X. In this case just copy last X bytes
270 : // from the data to overlap_ buffer.
271 : // 2. The new data length is smaller than X and there is enough room in overlap buffer to just
272 : // copy the bytes from data.
273 : // 3. The new data length is smaller than X and there is not enough room in overlap buffer.
274 0 : if (data.length() >= ctx->capacity_) {
275 : // Case 1:
276 : // Just overwrite the entire overlap_ buffer with new data.
277 0 : ctx->overlap_.resize(ctx->capacity_);
278 0 : data.copyOut(data.length() - ctx->capacity_, ctx->capacity_, ctx->overlap_.data());
279 0 : } else {
280 0 : if (data.length() <= (ctx->capacity_ - ctx->overlap_.size())) {
281 : // Case 2. Just add the new data on top of already buffered.
282 0 : const auto size = ctx->overlap_.size();
283 0 : ctx->overlap_.resize(ctx->overlap_.size() + data.length());
284 0 : data.copyOut(0, data.length(), ctx->overlap_.data() + size);
285 0 : } else {
286 : // Case 3. First shift data to make room for new data and then copy
287 : // entire new buffer.
288 0 : const size_t shift = ctx->overlap_.size() - (ctx->capacity_ - data.length());
289 0 : for (size_t i = 0; i < (ctx->overlap_.size() - shift); i++) {
290 0 : ctx->overlap_[i] = ctx->overlap_[i + shift];
291 0 : }
292 0 : const auto size = ctx->overlap_.size();
293 0 : ctx->overlap_.resize(ctx->capacity_);
294 0 : data.copyOut(0, data.length(), ctx->overlap_.data() + (size - shift));
295 0 : }
296 0 : }
297 0 : }
298 :
299 : // Method takes list of indexes of patterns not yet located in the http body and returns the
300 : // length of the longest pattern.
301 : // This is used by matcher to buffer as minimum bytes as possible.
302 0 : size_t HttpGenericBodyMatcher::calcLongestPatternSize(const std::list<uint32_t>& indexes) const {
303 0 : ASSERT(!indexes.empty());
304 0 : size_t max_len = 0;
305 0 : for (const auto& i : indexes) {
306 0 : max_len = std::max(max_len, patterns_->at(i).length());
307 0 : }
308 0 : return max_len;
309 0 : }
310 :
311 : // Method checks if it is possible to reduce the size of overlap_ buffer.
312 0 : void HttpGenericBodyMatcher::resizeOverlapBuffer(HttpGenericBodyMatcherCtx* ctx) {
313 : // Check if we need to resize overlap_ buffer. Since it was initialized to size of the longest
314 : // pattern, it will be shrunk only and memory allocations do not happen.
315 : // Depending on how many bytes were already in the buffer, shift may be required if
316 : // the new size is smaller than number of already buffered bytes.
317 0 : const size_t max_len = calcLongestPatternSize(ctx->patterns_index_);
318 0 : if (ctx->capacity_ != (max_len - 1)) {
319 0 : const size_t new_size = max_len - 1;
320 0 : const size_t shift = (ctx->overlap_.size() > new_size) ? (ctx->overlap_.size() - new_size) : 0;
321 : // Copy the last new_size bytes to the beginning of the buffer.
322 0 : for (size_t i = 0; (i < new_size) && (shift > 0); i++) {
323 0 : ctx->overlap_[i] = ctx->overlap_[i + shift];
324 0 : }
325 0 : ctx->capacity_ = new_size;
326 0 : if (shift > 0) {
327 0 : ctx->overlap_.resize(new_size);
328 0 : }
329 0 : }
330 0 : }
331 :
332 : } // namespace Matcher
333 : } // namespace Common
334 : } // namespace Extensions
335 : } // namespace Envoy
|