Coverage Report

Created: 2024-09-19 09:45

/proc/self/cwd/source/extensions/common/matcher/matcher.cc
Line
Count
Source (jump to first uncovered line)
1
#include "source/extensions/common/matcher/matcher.h"
2
3
#include "source/common/common/assert.h"
4
5
namespace Envoy {
6
namespace Extensions {
7
namespace Common {
8
namespace Matcher {
9
10
void buildMatcher(const envoy::config::common::matcher::v3::MatchPredicate& match_config,
11
                  std::vector<MatcherPtr>& matchers,
12
4.54k
                  Server::Configuration::CommonFactoryContext& context) {
13
  // In order to store indexes and build our matcher tree inline, we must reserve a slot where
14
  // the matcher we are about to create will go. This allows us to know its future index and still
15
  // construct more of the tree in each called constructor (e.g., multiple OR/AND conditions).
16
  // Once fully constructed, we move the matcher into its position below. See the matcher
17
  // overview in matcher.h for more information.
18
4.54k
  matchers.emplace_back(nullptr);
19
20
4.54k
  MatcherPtr new_matcher;
21
4.54k
  switch (match_config.rule_case()) {
22
634
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kOrMatch:
23
634
    new_matcher = std::make_unique<SetLogicMatcher>(match_config.or_match(), matchers,
24
634
                                                    SetLogicMatcher::Type::Or, context);
25
634
    break;
26
522
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kAndMatch:
27
522
    new_matcher = std::make_unique<SetLogicMatcher>(match_config.and_match(), matchers,
28
522
                                                    SetLogicMatcher::Type::And, context);
29
522
    break;
30
676
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kNotMatch:
31
676
    new_matcher = std::make_unique<NotMatcher>(match_config.not_match(), matchers, context);
32
676
    break;
33
628
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kAnyMatch:
34
628
    new_matcher = std::make_unique<AnyMatcher>(matchers);
35
628
    break;
36
212
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpRequestHeadersMatch:
37
212
    new_matcher = std::make_unique<HttpRequestHeadersMatcher>(
38
212
        match_config.http_request_headers_match(), matchers, context);
39
212
    break;
40
76
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpRequestTrailersMatch:
41
76
    new_matcher = std::make_unique<HttpRequestTrailersMatcher>(
42
76
        match_config.http_request_trailers_match(), matchers, context);
43
76
    break;
44
683
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpResponseHeadersMatch:
45
683
    new_matcher = std::make_unique<HttpResponseHeadersMatcher>(
46
683
        match_config.http_response_headers_match(), matchers, context);
47
683
    break;
48
807
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpResponseTrailersMatch:
49
807
    new_matcher = std::make_unique<HttpResponseTrailersMatcher>(
50
807
        match_config.http_response_trailers_match(), matchers, context);
51
807
    break;
52
240
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpRequestGenericBodyMatch:
53
240
    new_matcher = std::make_unique<HttpRequestGenericBodyMatcher>(
54
240
        match_config.http_request_generic_body_match(), matchers);
55
240
    break;
56
70
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpResponseGenericBodyMatch:
57
70
    new_matcher = std::make_unique<HttpResponseGenericBodyMatcher>(
58
70
        match_config.http_response_generic_body_match(), matchers);
59
70
    break;
60
0
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::RULE_NOT_SET:
61
0
    PANIC_DUE_TO_CORRUPT_ENUM;
62
4.54k
  }
63
64
  // Per above, move the matcher into its position.
65
4.54k
  matchers[new_matcher->index()] = std::move(new_matcher);
66
4.54k
}
67
68
SetLogicMatcher::SetLogicMatcher(
69
    const envoy::config::common::matcher::v3::MatchPredicate::MatchSet& configs,
70
    std::vector<MatcherPtr>& matchers, Type type,
71
    Server::Configuration::CommonFactoryContext& context)
72
1.15k
    : LogicMatcherBase(matchers), matchers_(matchers), type_(type) {
73
3.79k
  for (const auto& config : configs.rules()) {
74
3.79k
    indexes_.push_back(matchers_.size());
75
3.79k
    buildMatcher(config, matchers_, context);
76
3.79k
  }
77
1.15k
}
78
79
void SetLogicMatcher::updateLocalStatus(MatchStatusVector& statuses,
80
7.57k
                                        const UpdateFunctor& functor) const {
81
7.57k
  if (!statuses[my_index_].might_change_status_) {
82
47
    return;
83
47
  }
84
85
25.2k
  for (size_t index : indexes_) {
86
25.2k
    functor(*matchers_[index], statuses);
87
25.2k
  }
88
89
11.9k
  auto predicate = [&statuses](size_t index) { return statuses[index].matches_; };
90
7.52k
  if (type_ == Type::And) {
91
4.37k
    statuses[my_index_].matches_ = std::all_of(indexes_.begin(), indexes_.end(), predicate);
92
4.37k
  } else {
93
3.15k
    ASSERT(type_ == Type::Or);
94
3.15k
    statuses[my_index_].matches_ = std::any_of(indexes_.begin(), indexes_.end(), predicate);
95
3.15k
  }
96
97
  // TODO(mattklein123): We can potentially short circuit this even further if we git a single false
98
  // in an AND set or a single true in an OR set.
99
7.52k
  statuses[my_index_].might_change_status_ =
100
7.52k
      std::any_of(indexes_.begin(), indexes_.end(),
101
9.79k
                  [&statuses](size_t index) { return statuses[index].might_change_status_; });
102
7.52k
}
103
104
NotMatcher::NotMatcher(const envoy::config::common::matcher::v3::MatchPredicate& config,
105
                       std::vector<MatcherPtr>& matchers,
106
                       Server::Configuration::CommonFactoryContext& context)
107
676
    : LogicMatcherBase(matchers), matchers_(matchers), not_index_(matchers.size()) {
108
676
  buildMatcher(config, matchers, context);
109
676
}
110
111
void NotMatcher::updateLocalStatus(MatchStatusVector& statuses,
112
2.95k
                                   const UpdateFunctor& functor) const {
113
2.95k
  if (!statuses[my_index_].might_change_status_) {
114
98
    return;
115
98
  }
116
117
2.85k
  functor(*matchers_[not_index_], statuses);
118
2.85k
  statuses[my_index_].matches_ = !statuses[not_index_].matches_;
119
2.85k
  statuses[my_index_].might_change_status_ = statuses[not_index_].might_change_status_;
120
2.85k
}
121
122
HttpHeaderMatcherBase::HttpHeaderMatcherBase(
123
    const envoy::config::common::matcher::v3::HttpHeadersMatch& config,
124
    const std::vector<MatcherPtr>& matchers, Server::Configuration::CommonFactoryContext& context)
125
    : SimpleMatcher(matchers),
126
1.77k
      headers_to_match_(Http::HeaderUtility::buildHeaderDataVector(config.headers(), context)) {}
127
128
void HttpHeaderMatcherBase::matchHeaders(const Http::HeaderMap& headers,
129
1.39k
                                         MatchStatusVector& statuses) const {
130
1.39k
  ASSERT(statuses[my_index_].might_change_status_);
131
1.39k
  statuses[my_index_].matches_ = Http::HeaderUtility::matchHeaders(headers, headers_to_match_);
132
1.39k
  statuses[my_index_].might_change_status_ = false;
133
1.39k
}
134
135
// HttpGenericBodyMatcher
136
// Scans the HTTP body and looks for patterns.
137
// HTTP body may be passed to the matcher in chunks. The search logic buffers
138
// only as many bytes as is the length of the longest pattern to be found.
139
HttpGenericBodyMatcher::HttpGenericBodyMatcher(
140
    const envoy::config::common::matcher::v3::HttpGenericBodyMatch& config,
141
    const std::vector<MatcherPtr>& matchers)
142
310
    : HttpBodyMatcherBase(matchers) {
143
310
  patterns_ = std::make_shared<std::vector<std::string>>();
144
718
  for (const auto& i : config.patterns()) {
145
718
    switch (i.rule_case()) {
146
    // For binary match 'i' contains sequence of bytes to locate in the body.
147
133
    case envoy::config::common::matcher::v3::HttpGenericBodyMatch::GenericTextMatch::kBinaryMatch: {
148
133
      patterns_->push_back(i.binary_match());
149
133
    } break;
150
    // For string match 'i' contains exact string to locate in the body.
151
585
    case envoy::config::common::matcher::v3::HttpGenericBodyMatch::GenericTextMatch::kStringMatch:
152
585
      patterns_->push_back(i.string_match());
153
585
      break;
154
0
    case envoy::config::common::matcher::v3::HttpGenericBodyMatch::GenericTextMatch::RULE_NOT_SET:
155
0
      PANIC_DUE_TO_CORRUPT_ENUM;
156
718
    }
157
    // overlap_size_ indicates how many bytes from previous data chunk(s) are buffered.
158
718
    overlap_size_ = std::max(overlap_size_, patterns_->back().length() - 1);
159
718
  }
160
310
  limit_ = config.bytes_limit();
161
310
}
162
163
2.08k
void HttpGenericBodyMatcher::onBody(const Buffer::Instance& data, MatchStatusVector& statuses) {
164
  // Get the context associated with this stream.
165
2.08k
  HttpGenericBodyMatcherCtx* ctx =
166
2.08k
      static_cast<HttpGenericBodyMatcherCtx*>(statuses[my_index_].ctx_.get());
167
168
2.08k
  if (statuses[my_index_].might_change_status_ == false) {
169
    // End of search limit has been already reached or all patterns have been found.
170
    // Status is not going to change.
171
9
    ASSERT(((0 != limit_) && (limit_ == ctx->processed_bytes_)) || (ctx->patterns_index_.empty()));
172
9
    return;
173
9
  }
174
175
  // Iterate through all patterns to be found and check if they are located across body
176
  // chunks: part of the pattern was in previous body chunk and remaining of the pattern
177
  // is in the current body chunk on in the current body chunk.
178
2.07k
  bool resize_required = false;
179
2.07k
  auto body_search_limit = limit_ - ctx->processed_bytes_;
180
2.07k
  auto it = ctx->patterns_index_.begin();
181
14.0k
  while (it != ctx->patterns_index_.end()) {
182
11.9k
    const auto& pattern = patterns_->at(*it);
183
11.9k
    if ((!ctx->overlap_.empty() && (locatePatternAcrossChunks(pattern, data, ctx))) ||
184
11.9k
        (-1 != data.search(static_cast<const void*>(pattern.data()), pattern.length(), 0,
185
11.9k
                           body_search_limit))) {
186
      // Pattern found. Remove it from the list of patterns to be found.
187
      // If the longest pattern has been found, resize of overlap buffer may be
188
      // required.
189
134
      resize_required = resize_required || (ctx->capacity_ == (pattern.length() - 1));
190
134
      it = ctx->patterns_index_.erase(it);
191
11.8k
    } else {
192
11.8k
      it++;
193
11.8k
    }
194
11.9k
  }
195
196
2.07k
  if (ctx->patterns_index_.empty()) {
197
    // All patterns were found.
198
38
    statuses[my_index_].matches_ = true;
199
38
    statuses[my_index_].might_change_status_ = false;
200
38
    return;
201
38
  }
202
203
  // Check if next body chunks should be searched for patterns. If the search limit
204
  // ends on the current body chunk, there is no need to check next chunks.
205
2.03k
  if (0 != limit_) {
206
1.89k
    ctx->processed_bytes_ = std::min(uint64_t(limit_), ctx->processed_bytes_ + data.length());
207
1.89k
    if (limit_ == ctx->processed_bytes_) {
208
      // End of search limit has been reached and not all patterns have been found.
209
1
      statuses[my_index_].matches_ = false;
210
1
      statuses[my_index_].might_change_status_ = false;
211
1
      return;
212
1
    }
213
1.89k
  }
214
215
  // If longest pattern has been located, there is possibility that overlap_
216
  // buffer size may be reduced.
217
2.03k
  if (resize_required) {
218
65
    resizeOverlapBuffer(ctx);
219
65
  }
220
221
2.03k
  bufferLastBytes(data, ctx);
222
2.03k
}
223
224
// Here we handle a situation when a pattern is spread across multiple body buffers.
225
// overlap_ stores number of bytes from previous body chunks equal to longest pattern yet to be
226
// found minus one byte (-1). The logic below tries to find the beginning of the pattern in
227
// overlap_ buffer and the pattern should continue at the beginning of the next buffer.
228
bool HttpGenericBodyMatcher::locatePatternAcrossChunks(const std::string& pattern,
229
                                                       const Buffer::Instance& data,
230
11.5k
                                                       const HttpGenericBodyMatcherCtx* ctx) {
231
  // Take the first character from the pattern and locate it in overlap_.
232
11.5k
  auto pattern_index = 0;
233
  // Start position in overlap_. overlap_ size was calculated based on the longest pattern to be
234
  // found, but search for shorter patterns may start from some offset, not the beginning of the
235
  // buffer.
236
11.5k
  size_t start_index = (ctx->overlap_.size() > (pattern.size() - 1))
237
11.5k
                           ? ctx->overlap_.size() - (pattern.size() - 1)
238
11.5k
                           : 0;
239
11.5k
  auto match_iter = std::find(std::begin(ctx->overlap_) + start_index, std::end(ctx->overlap_),
240
11.5k
                              pattern.at(pattern_index));
241
242
11.5k
  if (match_iter == std::end(ctx->overlap_)) {
243
9.80k
    return false;
244
9.80k
  }
245
246
  // Continue checking characters until end of overlap_ buffer.
247
64.2k
  while (match_iter != std::end(ctx->overlap_)) {
248
63.2k
    if (pattern[pattern_index] != *match_iter) {
249
724
      return false;
250
724
    }
251
62.5k
    pattern_index++;
252
62.5k
    match_iter++;
253
62.5k
  }
254
255
  // Now check if the remaining of the pattern matches the beginning of the body
256
  // buffer.i Do it only if there is sufficient number of bytes in the data buffer.
257
974
  auto pattern_remainder = pattern.substr(pattern_index);
258
974
  if ((0 != limit_) && (pattern_remainder.length() > (limit_ - ctx->processed_bytes_))) {
259
    // Even if we got match it would be outside the search limit
260
2
    return false;
261
2
  }
262
972
  return ((pattern_remainder.length() <= data.length()) && data.startsWith(pattern_remainder));
263
974
}
264
265
// Method buffers last bytes from the currently processed body in overlap_.
266
// This is required to find patterns which spans across multiple body chunks.
267
void HttpGenericBodyMatcher::bufferLastBytes(const Buffer::Instance& data,
268
2.03k
                                             HttpGenericBodyMatcherCtx* ctx) {
269
  // The matcher buffers the last seen X bytes where X is equal to the length of the
270
  // longest pattern - 1. With the arrival of the new 'data' the following situations
271
  // are possible:
272
  // 1. The new data's length is larger or equal to X. In this case just copy last X bytes
273
  // from the data to overlap_ buffer.
274
  // 2. The new data length is smaller than X and there is enough room in overlap buffer to just
275
  // copy the bytes from data.
276
  // 3. The new data length is smaller than X and there is not enough room in overlap buffer.
277
2.03k
  if (data.length() >= ctx->capacity_) {
278
    // Case 1:
279
    // Just overwrite the entire overlap_ buffer with new data.
280
147
    ctx->overlap_.resize(ctx->capacity_);
281
147
    data.copyOut(data.length() - ctx->capacity_, ctx->capacity_, ctx->overlap_.data());
282
1.88k
  } else {
283
1.88k
    if (data.length() <= (ctx->capacity_ - ctx->overlap_.size())) {
284
      // Case 2. Just add the new data on top of already buffered.
285
817
      const auto size = ctx->overlap_.size();
286
817
      ctx->overlap_.resize(ctx->overlap_.size() + data.length());
287
817
      data.copyOut(0, data.length(), ctx->overlap_.data() + size);
288
1.06k
    } else {
289
      // Case 3. First shift data to make room for new data and then copy
290
      // entire new buffer.
291
1.06k
      const size_t shift = ctx->overlap_.size() - (ctx->capacity_ - data.length());
292
113k
      for (size_t i = 0; i < (ctx->overlap_.size() - shift); i++) {
293
112k
        ctx->overlap_[i] = ctx->overlap_[i + shift];
294
112k
      }
295
1.06k
      const auto size = ctx->overlap_.size();
296
1.06k
      ctx->overlap_.resize(ctx->capacity_);
297
1.06k
      data.copyOut(0, data.length(), ctx->overlap_.data() + (size - shift));
298
1.06k
    }
299
1.88k
  }
300
2.03k
}
301
302
// Method takes list of indexes of patterns not yet located in the http body and returns the
303
// length of the longest pattern.
304
// This is used by matcher to buffer as minimum bytes as possible.
305
65
size_t HttpGenericBodyMatcher::calcLongestPatternSize(const std::list<uint32_t>& indexes) const {
306
65
  ASSERT(!indexes.empty());
307
65
  size_t max_len = 0;
308
97
  for (const auto& i : indexes) {
309
97
    max_len = std::max(max_len, patterns_->at(i).length());
310
97
  }
311
65
  return max_len;
312
65
}
313
314
// Method checks if it is possible to reduce the size of overlap_ buffer.
315
65
void HttpGenericBodyMatcher::resizeOverlapBuffer(HttpGenericBodyMatcherCtx* ctx) {
316
  // Check if we need to resize overlap_ buffer. Since it was initialized to size of the longest
317
  // pattern, it will be shrunk only and memory allocations do not happen.
318
  // Depending on how many bytes were already in the buffer, shift may be required if
319
  // the new size is smaller than number of already buffered bytes.
320
65
  const size_t max_len = calcLongestPatternSize(ctx->patterns_index_);
321
65
  if (ctx->capacity_ != (max_len - 1)) {
322
51
    const size_t new_size = max_len - 1;
323
51
    const size_t shift = (ctx->overlap_.size() > new_size) ? (ctx->overlap_.size() - new_size) : 0;
324
    // Copy the last new_size bytes to the beginning of the buffer.
325
418
    for (size_t i = 0; (i < new_size) && (shift > 0); i++) {
326
367
      ctx->overlap_[i] = ctx->overlap_[i + shift];
327
367
    }
328
51
    ctx->capacity_ = new_size;
329
51
    if (shift > 0) {
330
7
      ctx->overlap_.resize(new_size);
331
7
    }
332
51
  }
333
65
}
334
335
} // namespace Matcher
336
} // namespace Common
337
} // namespace Extensions
338
} // namespace Envoy