1
#include "source/extensions/http/header_validators/envoy_default/header_validator.h"
2

            
3
#include <charconv>
4

            
5
#include "envoy/http/header_validator_errors.h"
6

            
7
#include "source/common/http/path_utility.h"
8
#include "source/common/runtime/runtime_features.h"
9
#include "source/extensions/http/header_validators/envoy_default/character_tables.h"
10

            
11
#include "absl/container/node_hash_set.h"
12
#include "absl/strings/match.h"
13

            
14
namespace Envoy {
15
namespace Extensions {
16
namespace Http {
17
namespace HeaderValidators {
18
namespace EnvoyDefault {
19

            
20
namespace {
21

            
22
template <typename IntType>
23
316
std::from_chars_result fromChars(const absl::string_view string_value, IntType& value) {
24
316
  return std::from_chars(string_value.data(), string_value.data() + string_value.size(), value);
25
316
}
26
} // namespace
27

            
28
using ::envoy::extensions::http::header_validators::envoy_default::v3::HeaderValidatorConfig;
29
using ::Envoy::Http::HeaderString;
30
using ::Envoy::Http::PathUtil;
31
using ::Envoy::Http::Protocol;
32
using ::Envoy::Http::testCharInTable;
33
using ::Envoy::Http::UhvResponseCodeDetail;
34

            
35
HeaderValidator::HeaderValidator(const HeaderValidatorConfig& config, Protocol protocol,
36
                                 ::Envoy::Http::HeaderValidatorStats& stats,
37
                                 const ConfigOverrides& config_overrides)
38
570
    : config_(config), protocol_(protocol), config_overrides_(config_overrides),
39
570
      header_values_(::Envoy::Http::Headers::get()), stats_(stats),
40
570
      path_normalizer_(config, config_overrides) {}
41

            
42
HeaderValidator::HeaderValueValidationResult
43
4986
HeaderValidator::validateMethodHeader(const HeaderString& value) {
44
  // HTTP Method Registry, from iana.org:
45
  // source: https://www.iana.org/assignments/http-methods/http-methods.xhtml
46
  //
47
  // From the RFC:
48
  //
49
  // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "."
50
  //       /  "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
51
  // token = 1*tchar
52
  // method = token
53
4986
  static absl::node_hash_set<absl::string_view> kHttpMethodRegistry = {
54
4986
      "ACL",
55
4986
      "BASELINE-CONTROL",
56
4986
      "BIND",
57
4986
      "CHECKIN",
58
4986
      "CHECKOUT",
59
4986
      "CONNECT",
60
4986
      "COPY",
61
4986
      "DELETE",
62
4986
      "GET",
63
4986
      "HEAD",
64
4986
      "LABEL",
65
4986
      "LINK",
66
4986
      "LOCK",
67
4986
      "MERGE",
68
4986
      "MKACTIVITY",
69
4986
      "MKCALENDAR",
70
4986
      "MKCOL",
71
4986
      "MKREDIRECTREF",
72
4986
      "MKWORKSPACE",
73
4986
      "MOVE",
74
4986
      "OPTIONS",
75
4986
      "ORDERPATCH",
76
4986
      "PATCH",
77
4986
      "POST",
78
4986
      "PRI",
79
4986
      "PROPFIND",
80
4986
      "PROPPATCH",
81
4986
      "PUT",
82
4986
      "REBIND",
83
4986
      "REPORT",
84
4986
      "SEARCH",
85
4986
      "TRACE",
86
4986
      "UNBIND",
87
4986
      "UNCHECKOUT",
88
4986
      "UNLINK",
89
4986
      "UNLOCK",
90
4986
      "UPDATE",
91
4986
      "UPDATEREDIRECTREF",
92
4986
      "VERSION-CONTROL",
93
4986
      "*",
94
4986
  };
95

            
96
4986
  const auto& method = value.getStringView();
97
4986
  bool is_valid = true;
98

            
99
4986
  if (config_.restrict_http_methods()) {
100
3
    is_valid = kHttpMethodRegistry.contains(method);
101
4983
  } else {
102
4983
    is_valid = !method.empty();
103
20034
    for (auto iter = method.begin(); iter != method.end() && is_valid; ++iter) {
104
15051
      is_valid &= testCharInTable(kMethodHeaderCharTable, *iter);
105
15051
    }
106
4983
  }
107

            
108
4986
  if (!is_valid) {
109
2
    return {HeaderValueValidationResult::Action::Reject,
110
2
            UhvResponseCodeDetail::get().InvalidMethod};
111
2
  }
112

            
113
4984
  return HeaderValueValidationResult::success();
114
4986
}
115

            
116
HeaderValidator::HeaderValueValidationResult
117
4976
HeaderValidator::validateSchemeHeader(const HeaderString& value) {
118
  // From RFC 3986, https://datatracker.ietf.org/doc/html/rfc3986#section-3.1:
119
  //
120
  // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
121
  //
122
  // Although schemes are case-insensitive, the canonical form is lowercase and documents that
123
  // specify schemes must do so with lowercase letters. An implementation should accept uppercase
124
  // letters as equivalent to lowercase in scheme names (e.g., allow "HTTP" as well as "http") for
125
  // the sake of robustness but should only produce lowercase scheme names for consistency.
126
  //
127
  // The validation mode controls whether uppercase letters are permitted.
128
4976
  absl::string_view scheme = value.getStringView();
129

            
130
4976
  if (!absl::EqualsIgnoreCase(scheme, "http") && !absl::EqualsIgnoreCase(scheme, "https")) {
131
    // TODO(#23313) - Honor config setting for mixed case.
132
4
    return {HeaderValueValidationResult::Action::Reject,
133
4
            UhvResponseCodeDetail::get().InvalidScheme};
134
4
  }
135

            
136
4972
  return HeaderValueValidationResult::success();
137
4976
}
138

            
139
HeaderValidator::HeaderValueValidationResult
140
289
HeaderValidator::validateStatusHeader(const HeaderString& value) {
141
  // Validate that the response :status header is a valid whole number between 100 and 999
142
  // (inclusive). This is based on RFC 9110, although the Envoy implementation is more permissive
143
  // and allows status codes larger than 599,
144
  // https://www.rfc-editor.org/rfc/rfc9110.html#section-15:
145
  //
146
  // The status code of a response is a three-digit integer code that describes the result of the
147
  // request and the semantics of the response, including whether the request was successful and
148
  // what content is enclosed (if any). All valid status codes are within the range of 100 to 599,
149
  // inclusive.
150

            
151
289
  static uint32_t kMinimumResponseStatusCode = 100;
152
289
  static uint32_t kMaximumResponseStatusCode = 999;
153
289
  const auto& value_string_view = value.getStringView();
154

            
155
  // Convert the status to an integer.
156
289
  std::uint32_t status_value{};
157
289
  auto result = fromChars(value_string_view, status_value);
158
289
  if (result.ec != std::errc() ||
159
289
      result.ptr != (value_string_view.data() + value_string_view.size())) {
160
2
    return {HeaderValueValidationResult::Action::Reject,
161
2
            UhvResponseCodeDetail::get().InvalidStatus};
162
2
  }
163

            
164
287
  if (status_value < kMinimumResponseStatusCode || status_value > kMaximumResponseStatusCode) {
165
5
    return {HeaderValueValidationResult::Action::Reject,
166
5
            UhvResponseCodeDetail::get().InvalidStatus};
167
5
  }
168

            
169
282
  return HeaderValueValidationResult::success();
170
287
}
171

            
172
HeaderValidator::HeaderEntryValidationResult
173
299
HeaderValidator::validateGenericHeaderName(const HeaderString& name) {
174
  // Verify that the header name is valid. This also honors the underscore in
175
  // header configuration setting.
176
  //
177
  // From RFC 9110, https://www.rfc-editor.org/rfc/rfc9110.html#section-5.1:
178
  //
179
  // header-field   = field-name ":" OWS field-value OWS
180
  // field-name     = token
181
  // token          = 1*tchar
182
  //
183
  // tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
184
  //                / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
185
  //                / DIGIT / ALPHA
186
  //                ; any VCHAR, except delimiters
187
299
  const auto& key_string_view = name.getStringView();
188
  // This header name is initially invalid if the name is empty.
189
299
  if (key_string_view.empty()) {
190
1
    return {HeaderEntryValidationResult::Action::Reject,
191
1
            UhvResponseCodeDetail::get().EmptyHeaderName};
192
1
  }
193

            
194
298
  const bool reject_header_names_with_underscores =
195
298
      config_.headers_with_underscores_action() == HeaderValidatorConfig::REJECT_REQUEST;
196
298
  bool is_valid = true;
197
298
  bool reject_due_to_underscore = false;
198
298
  char c = '\0';
199

            
200
298
  for (auto iter = key_string_view.begin();
201
1455
       iter != key_string_view.end() && is_valid && !reject_due_to_underscore; ++iter) {
202
1157
    c = *iter;
203
1157
    if (c != '_') {
204
1148
      is_valid &= testCharInTable(::Envoy::Http::kGenericHeaderNameCharTable, c);
205
1148
    } else {
206
9
      reject_due_to_underscore = reject_header_names_with_underscores;
207
9
    }
208
1157
  }
209

            
210
298
  if (!is_valid) {
211
186
    return {HeaderEntryValidationResult::Action::Reject,
212
186
            UhvResponseCodeDetail::get().InvalidNameCharacters};
213
186
  }
214

            
215
112
  if (reject_due_to_underscore) {
216
2
    stats_.incRequestsRejectedWithUnderscoresInHeaders();
217
2
    return {HeaderEntryValidationResult::Action::Reject,
218
2
            UhvResponseCodeDetail::get().InvalidUnderscore};
219
2
  }
220

            
221
110
  return HeaderEntryValidationResult::success();
222
112
}
223

            
224
HeaderValidator::HeaderValueValidationResult
225
424
HeaderValidator::validateGenericHeaderValue(const HeaderString& value) {
226
  // Verify that the header value is valid.
227
  //
228
  // From RFC 9110, https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5:
229
  //
230
  // header-field   = field-name ":" OWS field-value OWS
231
  // field-value    = *field-content
232
  // field-content  = field-vchar
233
  //                  [ 1*( SP / HTAB / field-vchar ) field-vchar ]
234
  // field-vchar    = VCHAR / obs-text
235
  // obs-text       = %x80-FF
236
  //
237
  // VCHAR          =  %x21-7E
238
  //                   ; visible (printing) characters
239
424
  const auto& value_string_view = value.getStringView();
240
424
  bool is_valid = true;
241

            
242
2992
  for (auto iter = value_string_view.begin(); iter != value_string_view.end() && is_valid; ++iter) {
243
2568
    is_valid &= testCharInTable(kGenericHeaderValueCharTable, *iter);
244
2568
  }
245

            
246
424
  if (!is_valid) {
247
44
    return {HeaderValueValidationResult::Action::Reject,
248
44
            UhvResponseCodeDetail::get().InvalidValueCharacters};
249
44
  }
250

            
251
380
  return HeaderValueValidationResult::success();
252
424
}
253

            
254
HeaderValidator::HeaderValueValidationResult
255
14
HeaderValidator::validateContentLengthHeader(const HeaderString& value) {
256
  // From RFC 9110, https://www.rfc-editor.org/rfc/rfc9110.html#section-8.6:
257
  //
258
  // Content-Length = 1*DIGIT
259
  // TODO(#23315) - Validate multiple Content-Length values
260
14
  const auto value_string_view = value.getStringView();
261

            
262
14
  if (value_string_view.empty()) {
263
    return {HeaderValueValidationResult::Action::Reject,
264
            UhvResponseCodeDetail::get().InvalidContentLength};
265
  }
266

            
267
14
  std::uint64_t int_value{};
268
14
  auto result = fromChars(value_string_view, int_value);
269
14
  if (result.ec != std::errc() ||
270
14
      result.ptr != (value_string_view.data() + value_string_view.size())) {
271
5
    return {HeaderValueValidationResult::Action::Reject,
272
5
            UhvResponseCodeDetail::get().InvalidContentLength};
273
5
  }
274

            
275
9
  return HeaderValueValidationResult::success();
276
14
}
277

            
278
HeaderValidator::HeaderValueValidationResult
279
5003
HeaderValidator::validateHostHeader(const HeaderString& value) {
280
  // From RFC 9110, https://www.rfc-editor.org/rfc/rfc9110.html#section-7.2,
281
  // and RFC 3986, https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2:
282
  //
283
  // Host       = uri-host [ ":" port ]
284
  // uri-host   = IP-literal / IPv4address / reg-name
285
5003
  const auto host = value.getStringView();
286
5003
  if (host.empty()) {
287
1
    return {HeaderValueValidationResult::Action::Reject, UhvResponseCodeDetail::get().InvalidHost};
288
1
  }
289

            
290
  // Check if the host/:authority contains the deprecated userinfo component. This is based on RFC
291
  // 9110, https://www.rfc-editor.org/rfc/rfc9110.html#section-4.2.4:
292
  //
293
  // Before making use of an "http" or "https" URI reference received from an untrusted source, a
294
  // recipient SHOULD parse for userinfo and treat its presence as an error; it is likely being
295
  // used to obscure the authority for the sake of phishing attacks.
296
5002
  auto user_info_delimiter = host.find('@');
297
5002
  if (user_info_delimiter != absl::string_view::npos) {
298
    // :authority cannot contain user info, reject the header
299
4
    return {HeaderValueValidationResult::Action::Reject,
300
4
            UhvResponseCodeDetail::get().InvalidHostDeprecatedUserInfo};
301
4
  }
302

            
303
  // Determine if the host is in IPv4, reg-name, or IPv6 form.
304
4998
  auto result = host.at(0) == '[' ? validateHostHeaderIPv6(host) : validateHostHeaderRegName(host);
305
4998
  if (!result.ok()) {
306
12
    return {HeaderValueValidationResult::Action::Reject, result.details()};
307
12
  }
308

            
309
4986
  const auto port_string = result.portAndDelimiter();
310
4986
  if (!port_string.empty()) {
311
    // Validate the port, which will be in the form of ":<uint16_t>"
312
14
    bool is_valid = true;
313
14
    if (port_string.at(0) != ':') {
314
      // The port must begin with ":"
315
1
      is_valid = false;
316
13
    } else {
317
      // parse the port number
318
13
      std::uint16_t port_int{};
319
13
      auto result = fromChars(port_string.substr(1), port_int);
320
13
      if (result.ec != std::errc() || result.ptr != (port_string.data() + port_string.size()) ||
321
13
          port_int == 0) {
322
5
        is_valid = false;
323
5
      }
324
13
    }
325

            
326
14
    if (!is_valid) {
327
6
      return {HeaderValueValidationResult::Action::Reject,
328
6
              UhvResponseCodeDetail::get().InvalidHost};
329
6
    }
330
14
  }
331

            
332
4980
  return HeaderValueValidationResult::success();
333
4986
}
334

            
335
HeaderValidator::HostHeaderValidationResult
336
19
HeaderValidator::validateHostHeaderIPv6(absl::string_view host) {
337
  // Validate an IPv6 address host header value. This is a simplified check based on RFC 3986,
338
  // https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.2, that only validates the characters,
339
  // not the syntax of the address.
340

            
341
  // Validate that the address is enclosed between "[" and "]".
342
19
  std::size_t closing_bracket = host.rfind(']');
343
19
  if (host.empty() || host.at(0) != '[' || closing_bracket == absl::string_view::npos) {
344
1
    return HostHeaderValidationResult::reject(UhvResponseCodeDetail::get().InvalidHost);
345
1
  }
346

            
347
  // Get the address substring between the brackets.
348
18
  const auto address = host.substr(1, closing_bracket - 1);
349
  // Get the trailing port substring
350
18
  const auto port_string = host.substr(closing_bracket + 1);
351
  // Validate the IPv6 address characters
352
18
  if (address.empty()) {
353
    return HostHeaderValidationResult::reject(UhvResponseCodeDetail::get().InvalidHost);
354
  }
355
18
  if (address == "::") {
356
1
    return HostHeaderValidationResult::success(address, port_string);
357
1
  }
358
  // Split address by (:) and validate:
359
  // 1. there are no more than 8 parts
360
  // 2. each part has only hex digit and is 16-bit
361
  // 3. only one double colon is allowed
362
17
  absl::InlinedVector<absl::string_view, 8> address_components = absl::StrSplit(address, ':');
363
17
  if (address_components.size() > 8) {
364
1
    return HostHeaderValidationResult::reject(UhvResponseCodeDetail::get().InvalidHost);
365
1
  }
366
16
  uint32_t empty_string_count = 0;
367
75
  for (absl::string_view cur_component : address_components) {
368
    // each part must be 16 bits
369
75
    if (cur_component.size() > 4) {
370
1
      return HostHeaderValidationResult::reject(UhvResponseCodeDetail::get().InvalidHost);
371
1
    }
372
74
    if (cur_component.empty()) {
373
25
      empty_string_count++;
374
25
      continue;
375
25
    }
376
    // Validate each char is hex digit
377
148
    for (char c : cur_component) {
378
148
      if (!testCharInTable(kHostIPv6AddressCharTable, c)) {
379
1
        return HostHeaderValidationResult::reject(UhvResponseCodeDetail::get().InvalidHost);
380
1
      }
381
148
    }
382
49
  }
383
  // The address should never have more than 2 empty parts, except "::"
384
14
  if (empty_string_count >= 3) {
385
4
    return HostHeaderValidationResult::reject(UhvResponseCodeDetail::get().InvalidHost);
386
4
  }
387

            
388
  // Double colon is allowed at the beginning or end
389
  // Otherwise the address shouldn't have two empty parts
390
10
  if (empty_string_count == 2 &&
391
10
      !(absl::StartsWith(address, "::") || absl::EndsWith(address, "::"))) {
392
3
    return HostHeaderValidationResult::reject(UhvResponseCodeDetail::get().InvalidHost);
393
3
  }
394

            
395
7
  return HostHeaderValidationResult::success(address, port_string);
396
10
}
397

            
398
HeaderValidator::HostHeaderValidationResult
399
4979
HeaderValidator::validateHostHeaderRegName(absl::string_view host) {
400
  // Validate a reg-name address host header value. This is a simplified check based on RFC 3986,
401
  // https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.2, that only validates the characters,
402
  // not the syntax of the address.
403

            
404
  // Identify the port trailer
405
4979
  auto port_delimiter = host.find(':');
406
4979
  const auto address = host.substr(0, port_delimiter);
407
4979
  bool is_valid = !address.empty();
408

            
409
  // Validate the reg-name characters
410
49758
  for (auto iter = address.begin(); iter != address.end() && is_valid; ++iter) {
411
44779
    is_valid &= testCharInTable(kHostRegNameCharTable, *iter);
412
44779
  }
413

            
414
4979
  if (!is_valid) {
415
1
    return HostHeaderValidationResult::reject(UhvResponseCodeDetail::get().InvalidHost);
416
1
  }
417

            
418
4978
  const auto port_string =
419
4978
      port_delimiter != absl::string_view::npos ? host.substr(port_delimiter) : absl::string_view();
420
4978
  return HostHeaderValidationResult::success(address, port_string);
421
4979
}
422

            
423
HeaderValidator::HeaderValueValidationResult
424
2314
HeaderValidator::validatePathHeaderCharacters(const HeaderString& value) {
425
2314
  return validatePathHeaderCharacterSet(value, kPathHeaderCharTable,
426
2314
                                        ::Envoy::Http::kUriQueryAndFragmentCharTable);
427
2314
}
428

            
429
HeaderValidator::HeaderValueValidationResult HeaderValidator::validatePathHeaderCharacterSet(
430
    const HeaderString& value, const std::array<uint32_t, 8>& allowed_path_chracters,
431
4983
    const std::array<uint32_t, 8>& allowed_query_fragment_characters) {
432
4983
  static const HeaderValueValidationResult bad_path_result{
433
4983
      HeaderValueValidationResult::Action::Reject, UhvResponseCodeDetail::get().InvalidUrl};
434
4983
  const auto& path = value.getStringView();
435
4983
  if (path.empty()) {
436
    return bad_path_result;
437
  }
438

            
439
4983
  auto iter = path.begin();
440
4983
  auto end = path.end();
441

            
442
  // Validate the path component of the URI
443
50298
  for (; iter != end; ++iter) {
444
49300
    if (*iter == '?' || *iter == '#') {
445
      // This is the start of the query or fragment portion of the path which uses a different
446
      // character table.
447
3101
      break;
448
3101
    }
449

            
450
46199
    if (!testCharInTable(allowed_path_chracters, *iter)) {
451
884
      return bad_path_result;
452
884
    }
453
46199
  }
454

            
455
4099
  if (iter != end && *iter == '?') {
456
    // Validate the query component of the URI
457
3094
    ++iter;
458
28828
    for (; iter != end; ++iter) {
459
28165
      if (*iter == '#') {
460
1553
        break;
461
1553
      }
462

            
463
26612
      if (!testCharInTable(allowed_query_fragment_characters, *iter)) {
464
878
        return bad_path_result;
465
878
      }
466
26612
    }
467
3094
  }
468

            
469
3221
  if (iter != end) {
470
1560
    ASSERT(*iter == '#');
471
1560
    if (!config_.strip_fragment_from_path()) {
472
4
      return {HeaderValueValidationResult::Action::Reject,
473
4
              UhvResponseCodeDetail::get().FragmentInUrlPath};
474
4
    }
475
    // Validate the fragment component of the URI
476
1556
    ++iter;
477
26061
    for (; iter != end; ++iter) {
478
25386
      if (!testCharInTable(allowed_query_fragment_characters, *iter)) {
479
881
        return bad_path_result;
480
881
      }
481
25386
    }
482
1556
  }
483

            
484
2336
  return HeaderValueValidationResult::success();
485
3221
}
486

            
487
void HeaderValidator::encodeAdditionalCharactersInPath(
488
    // TODO(#28780): reuse Utility::PercentEncoding class for this code.
489

            
490
37
    ::Envoy::Http::RequestHeaderMap& header_map) {
491
  // " < > ^ ` { } | TAB space extended-ASCII
492
37
  static constexpr std::array<uint32_t, 8> kCharactersToEncode = {
493
      // control characters
494
37
      0b00000000010000000000000000000000,
495
      // !"#$%&'()*+,-./0123456789:;<=>?
496
37
      0b10100000000000000000000000001010,
497
      //@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
498
37
      0b00000000000000000000000000000010,
499
      //`abcdefghijklmnopqrstuvwxyz{|}~
500
37
      0b10000000000000000000000000011100,
501
      // extended ascii
502
37
      0b11111111111111111111111111111111,
503
37
      0b11111111111111111111111111111111,
504
37
      0b11111111111111111111111111111111,
505
37
      0b11111111111111111111111111111111,
506
37
  };
507

            
508
37
  absl::string_view path = header_map.getPathValue();
509
  // Check if URL path contains any characters in the kCharactersToEncode set
510
37
  auto char_to_encode = path.begin();
511
388
  for (; char_to_encode != path.end() && !testCharInTable(kCharactersToEncode, *char_to_encode);
512
360
       ++char_to_encode) {
513
    // Return early if we got to query or fragment without finding any characters that has to be
514
    // encoded.
515
360
    if (*char_to_encode == '?' || *char_to_encode == '#') {
516
9
      return;
517
9
    }
518
360
  }
519
28
  if (char_to_encode == path.end()) {
520
25
    return;
521
25
  }
522
3
  std::string encoded_path(path.begin(), char_to_encode);
523
3
  encoded_path.reserve(path.size());
524

            
525
164
  for (; char_to_encode != path.end(); ++char_to_encode) {
526
161
    if (*char_to_encode == '?' || *char_to_encode == '#') {
527
      break;
528
    }
529
161
    if (testCharInTable(kCharactersToEncode, *char_to_encode)) {
530
152
      absl::StrAppend(&encoded_path,
531
152
                      fmt::format("%{:02X}", static_cast<const unsigned char&>(*char_to_encode)));
532
152
    } else {
533
9
      encoded_path.push_back(*char_to_encode);
534
9
    }
535
161
  }
536
  // Append query and fragment if present
537
3
  encoded_path.append(char_to_encode, path.end());
538
  // Encoding changes the length of the path
539
3
  if (encoded_path.size() > path.size()) {
540
3
    header_map.setPath(encoded_path);
541
3
  }
542
3
}
543

            
544
6
bool HeaderValidator::hasChunkedTransferEncoding(const HeaderString& value) {
545
6
  const auto encoding = value.getStringView();
546
6
  for (const auto token : StringUtil::splitToken(encoding, ",", true, true)) {
547
6
    if (absl::EqualsIgnoreCase(token, header_values_.TransferEncodingValues.Chunked)) {
548
6
      return true;
549
6
    }
550
6
  }
551

            
552
  return false;
553
6
}
554

            
555
HeaderValidator::HeaderEntryValidationResult HeaderValidator::validateGenericRequestHeaderEntry(
556
    const ::Envoy::Http::HeaderString& key, const ::Envoy::Http::HeaderString& value,
557
20215
    const HeaderValidatorMap& protocol_specific_header_validators) {
558
20215
  const auto& key_string_view = key.getStringView();
559
20215
  if (key_string_view.empty()) {
560
    // reject empty header names
561
2
    return {HeaderEntryValidationResult::Action::Reject,
562
2
            UhvResponseCodeDetail::get().EmptyHeaderName};
563
2
  }
564

            
565
  // Protocol specific header validators use this map to check protocol specific headers. For
566
  // example the transfer-encoding header checks are different for H/1 and H/2 or H/3.
567
  // This map also contains validation methods for headers that have additional restrictions other
568
  // than the generic character set (such as :method). The headers that are not part of this map,
569
  // just need the character set validation.
570
20213
  auto validator_it = protocol_specific_header_validators.find(key_string_view);
571
20213
  if (validator_it != protocol_specific_header_validators.end()) {
572
19924
    const auto& validator = validator_it->second;
573
19924
    return validator(value);
574
19924
  }
575

            
576
289
  if (key_string_view.at(0) != ':') {
577
    // Validate the (non-pseudo) header name
578
288
    auto name_result = validateGenericHeaderName(key);
579
288
    if (!name_result) {
580
215
      return name_result;
581
215
    }
582
288
  } else {
583
    // header_validator_map contains every known pseudo header. If the header name starts with ":"
584
    // and we don't have a validator registered in the map, then the header name is an unknown
585
    // pseudo header.
586
1
    return {HeaderEntryValidationResult::Action::Reject,
587
1
            UhvResponseCodeDetail::get().InvalidPseudoHeader};
588
1
  }
589

            
590
73
  return validateGenericHeaderValue(value);
591
289
}
592

            
593
// For all (H/1, H/2 and H/3) protocols, trailers should only contain generic headers. As such a
594
// common validation method can be used.
595
// More in depth explanation for using common function:
596
// For H/2 (and so H/3), per
597
// https://www.rfc-editor.org/rfc/rfc9113#section-8.1 trailers MUST NOT contain pseudo header
598
// fields.
599
// For H/1 the codec will never produce H/2 pseudo headers and per
600
// https://www.rfc-editor.org/rfc/rfc9110#section-6.5 there are no other prohibitions.
601
// As a result this common function can cover trailer validation for all protocols.
602
::Envoy::Http::HeaderValidator::ValidationResult
603
24
HeaderValidator::validateTrailers(const ::Envoy::Http::HeaderMap& trailers) {
604
24
  std::string reject_details;
605
24
  trailers.iterate([this, &reject_details](const ::Envoy::Http::HeaderEntry& header_entry)
606
38
                       -> ::Envoy::Http::HeaderMap::Iterate {
607
38
    const auto& header_name = header_entry.key();
608
38
    const auto& header_value = header_entry.value();
609

            
610
38
    auto entry_name_result = validateGenericHeaderName(header_name);
611
38
    if (!entry_name_result.ok()) {
612
9
      reject_details = static_cast<std::string>(entry_name_result.details());
613
29
    } else {
614
29
      auto entry_value_result = validateGenericHeaderValue(header_value);
615
29
      if (!entry_value_result) {
616
5
        reject_details = static_cast<std::string>(entry_value_result.details());
617
5
      }
618
29
    }
619

            
620
38
    return reject_details.empty() ? ::Envoy::Http::HeaderMap::Iterate::Continue
621
38
                                  : ::Envoy::Http::HeaderMap::Iterate::Break;
622
38
  });
623

            
624
24
  if (!reject_details.empty()) {
625
14
    return {::Envoy::Http::HeaderValidator::ValidationResult::Action::Reject, reject_details};
626
14
  }
627

            
628
10
  return ::Envoy::Http::HeaderValidator::ValidationResult::success();
629
24
}
630

            
631
66
void HeaderValidator::sanitizeHeadersWithUnderscores(::Envoy::Http::HeaderMap& header_map) {
632
66
  const auto& underscore_action = config_.headers_with_underscores_action();
633
66
  if (underscore_action == HeaderValidatorConfig::ALLOW) {
634
61
    return;
635
61
  }
636

            
637
5
  std::vector<absl::string_view> drop_headers;
638
5
  header_map.iterate([&drop_headers](const ::Envoy::Http::HeaderEntry& header_entry)
639
17
                         -> ::Envoy::Http::HeaderMap::Iterate {
640
17
    const absl::string_view header_name = header_entry.key().getStringView();
641
17
    if (absl::StrContains(header_name, '_')) {
642
4
      drop_headers.push_back(header_name);
643
4
    }
644

            
645
17
    return ::Envoy::Http::HeaderMap::Iterate::Continue;
646
17
  });
647

            
648
5
  ASSERT(drop_headers.empty() || underscore_action == HeaderValidatorConfig::DROP_HEADER);
649
5
  for (auto& name : drop_headers) {
650
4
    stats_.incDroppedHeadersWithUnderscores();
651
4
    header_map.remove(::Envoy::Http::LowerCaseString(name));
652
4
  }
653
5
}
654

            
655
63
void HeaderValidator::sanitizePathWithFragment(::Envoy::Http::RequestHeaderMap& header_map) {
656
63
  auto fragment_pos = header_map.getPathValue().find('#');
657
63
  if (fragment_pos != absl::string_view::npos) {
658
6
    ASSERT(config_.strip_fragment_from_path());
659
    // Check runtime override and throw away fragment from URI path
660
6
    header_map.setPath(header_map.getPathValue().substr(0, fragment_pos));
661
6
  }
662
63
}
663

            
664
PathNormalizer::PathNormalizationResult
665
15
HeaderValidator::sanitizeEncodedSlashes(::Envoy::Http::RequestHeaderMap& header_map) {
666
15
  if (!header_map.Path()) {
667
    return PathNormalizer::PathNormalizationResult::success();
668
  }
669
15
  const auto escaped_slashes_action =
670
15
      config_.uri_path_normalization_options().path_with_escaped_slashes_action();
671

            
672
15
  if (escaped_slashes_action ==
673
15
      HeaderValidatorConfig::UriPathNormalizationOptions::KEEP_UNCHANGED) {
674
3
    return PathNormalizer::PathNormalizationResult::success();
675
3
  }
676
  // When path normalization is enabled decoding of slashes is done as part of the normalization
677
  // function for performance.
678
12
  auto escaped_slashes_result = PathUtil::unescapeSlashes(header_map);
679
12
  if (escaped_slashes_result != PathUtil::UnescapeSlashesResult::FoundAndUnescaped) {
680
3
    return PathNormalizer::PathNormalizationResult::success();
681
3
  }
682
9
  if (escaped_slashes_action ==
683
9
      HeaderValidatorConfig::UriPathNormalizationOptions::REJECT_REQUEST) {
684
3
    return {PathNormalizer::PathNormalizationResult::Action::Reject,
685
3
            UhvResponseCodeDetail::get().EscapedSlashesInPath};
686
6
  } else if (escaped_slashes_action ==
687
6
             HeaderValidatorConfig::UriPathNormalizationOptions::UNESCAPE_AND_REDIRECT) {
688
3
    return {PathNormalizer::PathNormalizationResult::Action::Redirect,
689
3
            ::Envoy::Http::PathNormalizerResponseCodeDetail::get().RedirectNormalized};
690
3
  } else {
691
3
    ASSERT(escaped_slashes_action ==
692
3
           HeaderValidatorConfig::UriPathNormalizationOptions::UNESCAPE_AND_FORWARD);
693
3
  }
694
3
  return PathNormalizer::PathNormalizationResult::success();
695
9
}
696

            
697
PathNormalizer::PathNormalizationResult
698
63
HeaderValidator::transformUrlPath(::Envoy::Http::RequestHeaderMap& header_map) {
699
63
  if (!config_.uri_path_normalization_options().skip_path_normalization()) {
700
48
    auto path_result = path_normalizer_.normalizePathUri(header_map);
701
48
    if (!path_result.ok()) {
702
8
      return path_result;
703
8
    }
704
40
    auto percent_00_result = checkForPercent00InUrlPath(header_map);
705
40
    if (!percent_00_result.ok()) {
706
3
      return {PathNormalizer::PathNormalizationResult::Action::Reject, percent_00_result.details()};
707
3
    }
708
37
    if (config_overrides_.allow_non_compliant_characters_in_path_) {
709
37
      encodeAdditionalCharactersInPath(header_map);
710
37
    }
711
37
  } else {
712
    // Path normalization includes sanitization of encoded slashes for performance reasons.
713
    // If normalization is disabled, sanitize encoded slashes here
714
15
    auto result = sanitizeEncodedSlashes(header_map);
715
15
    if (!result.ok()) {
716
6
      return result;
717
6
    }
718
15
  }
719
46
  return PathNormalizer::PathNormalizationResult::success();
720
63
}
721

            
722
HeaderValidator::HeaderValueValidationResult
723
40
HeaderValidator::checkForPercent00InUrlPath(const ::Envoy::Http::RequestHeaderMap& header_map) {
724
40
  if (!header_map.Path() || !config_overrides_.reject_percent_00_) {
725
5
    return HeaderValueValidationResult::success();
726
5
  }
727
35
  if (absl::StrContains(header_map.getPathValue(), "%00")) {
728
3
    return {HeaderValueValidationResult::Action::Reject,
729
3
            UhvResponseCodeDetail::get().Percent00InPath};
730
3
  }
731

            
732
32
  return HeaderValueValidationResult::success();
733
35
}
734

            
735
} // namespace EnvoyDefault
736
} // namespace HeaderValidators
737
} // namespace Http
738
} // namespace Extensions
739
} // namespace Envoy