1
#include "source/extensions/common/matcher/matcher.h"
2

            
3
#include "source/common/common/assert.h"
4

            
5
namespace Envoy {
6
namespace Extensions {
7
namespace Common {
8
namespace Matcher {
9

            
10
void buildMatcher(const envoy::config::common::matcher::v3::MatchPredicate& match_config,
11
                  std::vector<MatcherPtr>& matchers,
12
255
                  Server::Configuration::CommonFactoryContext& context) {
13
  // In order to store indexes and build our matcher tree inline, we must reserve a slot where
14
  // the matcher we are about to create will go. This allows us to know its future index and still
15
  // construct more of the tree in each called constructor (e.g., multiple OR/AND conditions).
16
  // Once fully constructed, we move the matcher into its position below. See the matcher
17
  // overview in matcher.h for more information.
18
255
  matchers.emplace_back(nullptr);
19

            
20
255
  MatcherPtr new_matcher;
21
255
  switch (match_config.rule_case()) {
22
8
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kOrMatch:
23
8
    new_matcher = std::make_unique<SetLogicMatcher>(match_config.or_match(), matchers,
24
8
                                                    SetLogicMatcher::Type::Or, context);
25
8
    break;
26
7
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kAndMatch:
27
7
    new_matcher = std::make_unique<SetLogicMatcher>(match_config.and_match(), matchers,
28
7
                                                    SetLogicMatcher::Type::And, context);
29
7
    break;
30
1
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kNotMatch:
31
1
    new_matcher = std::make_unique<NotMatcher>(match_config.not_match(), matchers, context);
32
1
    break;
33
26
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kAnyMatch:
34
26
    new_matcher = std::make_unique<AnyMatcher>(matchers);
35
26
    break;
36
7
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpRequestHeadersMatch:
37
7
    new_matcher = std::make_unique<HttpRequestHeadersMatcher>(
38
7
        match_config.http_request_headers_match(), matchers, context);
39
7
    break;
40
4
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpRequestTrailersMatch:
41
4
    new_matcher = std::make_unique<HttpRequestTrailersMatcher>(
42
4
        match_config.http_request_trailers_match(), matchers, context);
43
4
    break;
44
23
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpResponseHeadersMatch:
45
23
    new_matcher = std::make_unique<HttpResponseHeadersMatcher>(
46
23
        match_config.http_response_headers_match(), matchers, context);
47
23
    break;
48
4
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpResponseTrailersMatch:
49
4
    new_matcher = std::make_unique<HttpResponseTrailersMatcher>(
50
4
        match_config.http_response_trailers_match(), matchers, context);
51
4
    break;
52
138
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpRequestGenericBodyMatch:
53
138
    new_matcher = std::make_unique<HttpRequestGenericBodyMatcher>(
54
138
        match_config.http_request_generic_body_match(), matchers);
55
138
    break;
56
37
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::kHttpResponseGenericBodyMatch:
57
37
    new_matcher = std::make_unique<HttpResponseGenericBodyMatcher>(
58
37
        match_config.http_response_generic_body_match(), matchers);
59
37
    break;
60
  case envoy::config::common::matcher::v3::MatchPredicate::RuleCase::RULE_NOT_SET:
61
    PANIC_DUE_TO_CORRUPT_ENUM;
62
255
  }
63

            
64
  // Per above, move the matcher into its position.
65
255
  matchers[new_matcher->index()] = std::move(new_matcher);
66
255
}
67

            
68
SetLogicMatcher::SetLogicMatcher(
69
    const envoy::config::common::matcher::v3::MatchPredicate::MatchSet& configs,
70
    std::vector<MatcherPtr>& matchers, Type type,
71
    Server::Configuration::CommonFactoryContext& context)
72
15
    : LogicMatcherBase(matchers), matchers_(matchers), type_(type) {
73
29
  for (const auto& config : configs.rules()) {
74
29
    indexes_.push_back(matchers_.size());
75
29
    buildMatcher(config, matchers_, context);
76
29
  }
77
15
}
78

            
79
void SetLogicMatcher::updateLocalStatus(MatchStatusVector& statuses,
80
78
                                        const UpdateFunctor& functor) const {
81
78
  if (!statuses[my_index_].might_change_status_) {
82
1
    return;
83
1
  }
84

            
85
150
  for (size_t index : indexes_) {
86
150
    functor(*matchers_[index], statuses);
87
150
  }
88

            
89
122
  auto predicate = [&statuses](size_t index) { return statuses[index].matches_; };
90
77
  if (type_ == Type::And) {
91
44
    statuses[my_index_].matches_ = std::all_of(indexes_.begin(), indexes_.end(), predicate);
92
68
  } else {
93
33
    ASSERT(type_ == Type::Or);
94
33
    statuses[my_index_].matches_ = std::any_of(indexes_.begin(), indexes_.end(), predicate);
95
33
  }
96

            
97
  // TODO(mattklein123): We can potentially short circuit this even further if we git a single false
98
  // in an AND set or a single true in an OR set.
99
77
  statuses[my_index_].might_change_status_ =
100
77
      std::any_of(indexes_.begin(), indexes_.end(),
101
117
                  [&statuses](size_t index) { return statuses[index].might_change_status_; });
102
77
}
103

            
104
NotMatcher::NotMatcher(const envoy::config::common::matcher::v3::MatchPredicate& config,
105
                       std::vector<MatcherPtr>& matchers,
106
                       Server::Configuration::CommonFactoryContext& context)
107
1
    : LogicMatcherBase(matchers), matchers_(matchers), not_index_(matchers.size()) {
108
1
  buildMatcher(config, matchers, context);
109
1
}
110

            
111
void NotMatcher::updateLocalStatus(MatchStatusVector& statuses,
112
5
                                   const UpdateFunctor& functor) const {
113
5
  if (!statuses[my_index_].might_change_status_) {
114
4
    return;
115
4
  }
116

            
117
1
  functor(*matchers_[not_index_], statuses);
118
1
  statuses[my_index_].matches_ = !statuses[not_index_].matches_;
119
1
  statuses[my_index_].might_change_status_ = statuses[not_index_].might_change_status_;
120
1
}
121

            
122
HttpHeaderMatcherBase::HttpHeaderMatcherBase(
123
    const envoy::config::common::matcher::v3::HttpHeadersMatch& config,
124
    const std::vector<MatcherPtr>& matchers, Server::Configuration::CommonFactoryContext& context)
125
38
    : SimpleMatcher(matchers),
126
38
      headers_to_match_(Http::HeaderUtility::buildHeaderDataVector(config.headers(), context)) {}
127

            
128
void HttpHeaderMatcherBase::matchHeaders(const Http::HeaderMap& headers,
129
60
                                         MatchStatusVector& statuses) const {
130
60
  ASSERT(statuses[my_index_].might_change_status_);
131
60
  statuses[my_index_].matches_ = Http::HeaderUtility::matchHeaders(headers, headers_to_match_);
132
60
  statuses[my_index_].might_change_status_ = false;
133
60
}
134

            
135
// HttpGenericBodyMatcher
136
// Scans the HTTP body and looks for patterns.
137
// HTTP body may be passed to the matcher in chunks. The search logic buffers
138
// only as many bytes as is the length of the longest pattern to be found.
139
HttpGenericBodyMatcher::HttpGenericBodyMatcher(
140
    const envoy::config::common::matcher::v3::HttpGenericBodyMatch& config,
141
    const std::vector<MatcherPtr>& matchers)
142
175
    : HttpBodyMatcherBase(matchers) {
143
175
  patterns_ = std::make_shared<std::vector<std::string>>();
144
610
  for (const auto& i : config.patterns()) {
145
610
    switch (i.rule_case()) {
146
    // For binary match 'i' contains sequence of bytes to locate in the body.
147
22
    case envoy::config::common::matcher::v3::HttpGenericBodyMatch::GenericTextMatch::kBinaryMatch: {
148
22
      patterns_->push_back(i.binary_match());
149
22
    } break;
150
    // For string match 'i' contains exact string to locate in the body.
151
588
    case envoy::config::common::matcher::v3::HttpGenericBodyMatch::GenericTextMatch::kStringMatch:
152
588
      patterns_->push_back(i.string_match());
153
588
      break;
154
    case envoy::config::common::matcher::v3::HttpGenericBodyMatch::GenericTextMatch::RULE_NOT_SET:
155
      PANIC_DUE_TO_CORRUPT_ENUM;
156
610
    }
157
    // overlap_size_ indicates how many bytes from previous data chunk(s) are buffered.
158
610
    overlap_size_ = std::max(overlap_size_, patterns_->back().length() - 1);
159
610
  }
160
175
  limit_ = config.bytes_limit();
161
175
}
162

            
163
399
void HttpGenericBodyMatcher::onBody(const Buffer::Instance& data, MatchStatusVector& statuses) {
164
  // Get the context associated with this stream.
165
399
  HttpGenericBodyMatcherCtx* ctx =
166
399
      static_cast<HttpGenericBodyMatcherCtx*>(statuses[my_index_].ctx_.get());
167

            
168
399
  if (statuses[my_index_].might_change_status_ == false) {
169
    // End of search limit has been already reached or all patterns have been found.
170
    // Status is not going to change.
171
33
    ASSERT(((0 != limit_) && (limit_ == ctx->processed_bytes_)) || (ctx->patterns_index_.empty()));
172
33
    return;
173
33
  }
174

            
175
  // Iterate through all patterns to be found and check if they are located across body
176
  // chunks: part of the pattern was in previous body chunk and remaining of the pattern
177
  // is in the current body chunk on in the current body chunk.
178
366
  bool resize_required = false;
179
366
  auto body_search_limit = limit_ - ctx->processed_bytes_;
180
366
  auto it = ctx->patterns_index_.begin();
181
1438
  while (it != ctx->patterns_index_.end()) {
182
1072
    const auto& pattern = patterns_->at(*it);
183
1072
    if ((!ctx->overlap_.empty() && (locatePatternAcrossChunks(pattern, data, ctx))) ||
184
1072
        (-1 != data.search(static_cast<const void*>(pattern.data()), pattern.length(), 0,
185
839
                           body_search_limit))) {
186
      // Pattern found. Remove it from the list of patterns to be found.
187
      // If the longest pattern has been found, resize of overlap buffer may be
188
      // required.
189
572
      resize_required = resize_required || (ctx->capacity_ == (pattern.length() - 1));
190
572
      it = ctx->patterns_index_.erase(it);
191
573
    } else {
192
500
      it++;
193
500
    }
194
1072
  }
195

            
196
366
  if (ctx->patterns_index_.empty()) {
197
    // All patterns were found.
198
147
    statuses[my_index_].matches_ = true;
199
147
    statuses[my_index_].might_change_status_ = false;
200
147
    return;
201
147
  }
202

            
203
  // Check if next body chunks should be searched for patterns. If the search limit
204
  // ends on the current body chunk, there is no need to check next chunks.
205
219
  if (0 != limit_) {
206
46
    ctx->processed_bytes_ = std::min(uint64_t(limit_), ctx->processed_bytes_ + data.length());
207
46
    if (limit_ == ctx->processed_bytes_) {
208
      // End of search limit has been reached and not all patterns have been found.
209
20
      statuses[my_index_].matches_ = false;
210
20
      statuses[my_index_].might_change_status_ = false;
211
20
      return;
212
20
    }
213
46
  }
214

            
215
  // If longest pattern has been located, there is possibility that overlap_
216
  // buffer size may be reduced.
217
199
  if (resize_required) {
218
20
    resizeOverlapBuffer(ctx);
219
20
  }
220

            
221
199
  bufferLastBytes(data, ctx);
222
199
}
223

            
224
// Here we handle a situation when a pattern is spread across multiple body buffers.
225
// overlap_ stores number of bytes from previous body chunks equal to longest pattern yet to be
226
// found minus one byte (-1). The logic below tries to find the beginning of the pattern in
227
// overlap_ buffer and the pattern should continue at the beginning of the next buffer.
228
bool HttpGenericBodyMatcher::locatePatternAcrossChunks(const std::string& pattern,
229
                                                       const Buffer::Instance& data,
230
456
                                                       const HttpGenericBodyMatcherCtx* ctx) {
231
  // Take the first character from the pattern and locate it in overlap_.
232
456
  auto pattern_index = 0;
233
  // Start position in overlap_. overlap_ size was calculated based on the longest pattern to be
234
  // found, but search for shorter patterns may start from some offset, not the beginning of the
235
  // buffer.
236
456
  size_t start_index = (ctx->overlap_.size() > (pattern.size() - 1))
237
456
                           ? ctx->overlap_.size() - (pattern.size() - 1)
238
456
                           : 0;
239
456
  auto match_iter = std::find(std::begin(ctx->overlap_) + start_index, std::end(ctx->overlap_),
240
456
                              pattern.at(pattern_index));
241

            
242
456
  if (match_iter == std::end(ctx->overlap_)) {
243
209
    return false;
244
209
  }
245

            
246
  // Continue checking characters until end of overlap_ buffer.
247
1090
  while (match_iter != std::end(ctx->overlap_)) {
248
845
    if (pattern[pattern_index] != *match_iter) {
249
2
      return false;
250
2
    }
251
843
    pattern_index++;
252
843
    match_iter++;
253
843
  }
254

            
255
  // Now check if the remaining of the pattern matches the beginning of the body
256
  // buffer.i Do it only if there is sufficient number of bytes in the data buffer.
257
245
  auto pattern_remainder = pattern.substr(pattern_index);
258
245
  if ((0 != limit_) && (pattern_remainder.length() > (limit_ - ctx->processed_bytes_))) {
259
    // Even if we got match it would be outside the search limit
260
4
    return false;
261
4
  }
262
241
  return ((pattern_remainder.length() <= data.length()) && data.startsWith(pattern_remainder));
263
245
}
264

            
265
// Method buffers last bytes from the currently processed body in overlap_.
266
// This is required to find patterns which spans across multiple body chunks.
267
void HttpGenericBodyMatcher::bufferLastBytes(const Buffer::Instance& data,
268
199
                                             HttpGenericBodyMatcherCtx* ctx) {
269
  // The matcher buffers the last seen X bytes where X is equal to the length of the
270
  // longest pattern - 1. With the arrival of the new 'data' the following situations
271
  // are possible:
272
  // 1. The new data's length is larger or equal to X. In this case just copy last X bytes
273
  // from the data to overlap_ buffer.
274
  // 2. The new data length is smaller than X and there is enough room in overlap buffer to just
275
  // copy the bytes from data.
276
  // 3. The new data length is smaller than X and there is not enough room in overlap buffer.
277
199
  if (data.length() >= ctx->capacity_) {
278
    // Case 1:
279
    // Just overwrite the entire overlap_ buffer with new data.
280
155
    ctx->overlap_.resize(ctx->capacity_);
281
155
    data.copyOut(data.length() - ctx->capacity_, ctx->capacity_, ctx->overlap_.data());
282
155
  } else {
283
44
    if (data.length() <= (ctx->capacity_ - ctx->overlap_.size())) {
284
      // Case 2. Just add the new data on top of already buffered.
285
28
      const auto size = ctx->overlap_.size();
286
28
      ctx->overlap_.resize(ctx->overlap_.size() + data.length());
287
28
      data.copyOut(0, data.length(), ctx->overlap_.data() + size);
288
28
    } else {
289
      // Case 3. First shift data to make room for new data and then copy
290
      // entire new buffer.
291
16
      const size_t shift = ctx->overlap_.size() - (ctx->capacity_ - data.length());
292
70
      for (size_t i = 0; i < (ctx->overlap_.size() - shift); i++) {
293
54
        ctx->overlap_[i] = ctx->overlap_[i + shift];
294
54
      }
295
16
      const auto size = ctx->overlap_.size();
296
16
      ctx->overlap_.resize(ctx->capacity_);
297
16
      data.copyOut(0, data.length(), ctx->overlap_.data() + (size - shift));
298
16
    }
299
44
  }
300
199
}
301

            
302
// Method takes list of indexes of patterns not yet located in the http body and returns the
303
// length of the longest pattern.
304
// This is used by matcher to buffer as minimum bytes as possible.
305
20
size_t HttpGenericBodyMatcher::calcLongestPatternSize(const std::list<uint32_t>& indexes) const {
306
20
  ASSERT(!indexes.empty());
307
20
  size_t max_len = 0;
308
32
  for (const auto& i : indexes) {
309
32
    max_len = std::max(max_len, patterns_->at(i).length());
310
32
  }
311
20
  return max_len;
312
20
}
313

            
314
// Method checks if it is possible to reduce the size of overlap_ buffer.
315
20
void HttpGenericBodyMatcher::resizeOverlapBuffer(HttpGenericBodyMatcherCtx* ctx) {
316
  // Check if we need to resize overlap_ buffer. Since it was initialized to size of the longest
317
  // pattern, it will be shrunk only and memory allocations do not happen.
318
  // Depending on how many bytes were already in the buffer, shift may be required if
319
  // the new size is smaller than number of already buffered bytes.
320
20
  const size_t max_len = calcLongestPatternSize(ctx->patterns_index_);
321
20
  if (ctx->capacity_ != (max_len - 1)) {
322
15
    const size_t new_size = max_len - 1;
323
15
    const size_t shift = (ctx->overlap_.size() > new_size) ? (ctx->overlap_.size() - new_size) : 0;
324
    // Copy the last new_size bytes to the beginning of the buffer.
325
17
    for (size_t i = 0; (i < new_size) && (shift > 0); i++) {
326
2
      ctx->overlap_[i] = ctx->overlap_[i + shift];
327
2
    }
328
15
    ctx->capacity_ = new_size;
329
15
    if (shift > 0) {
330
1
      ctx->overlap_.resize(new_size);
331
1
    }
332
15
  }
333
20
}
334

            
335
} // namespace Matcher
336
} // namespace Common
337
} // namespace Extensions
338
} // namespace Envoy