1
#include "source/common/http/http1/balsa_parser.h"
2

            
3
#include <algorithm>
4
#include <cctype>
5
#include <cstdint>
6

            
7
#include "source/common/common/assert.h"
8
#include "source/common/http/headers.h"
9
#include "source/common/runtime/runtime_features.h"
10

            
11
#include "absl/strings/ascii.h"
12
#include "absl/strings/match.h"
13

            
14
namespace Envoy {
15
namespace Http {
16
namespace Http1 {
17

            
18
namespace {
19

            
20
using ::quiche::BalsaFrameEnums;
21
using ::quiche::BalsaHeaders;
22

            
23
constexpr absl::string_view kColonSlashSlash = "://";
24
// Response must start with "HTTP".
25
constexpr char kResponseFirstByte = 'H';
26
constexpr absl::string_view kHttpVersionPrefix = "HTTP/";
27

            
28
// Allowed characters for field names according to Section 5.1
29
// and for methods according to Section 9.1 of RFC 9110:
30
// https://www.rfc-editor.org/rfc/rfc9110.html
31
constexpr absl::string_view kValidCharacters =
32
    "!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~";
33
constexpr absl::string_view::iterator kValidCharactersBegin = kValidCharacters.begin();
34
constexpr absl::string_view::iterator kValidCharactersEnd = kValidCharacters.end();
35

            
36
// TODO(#21245): Skip method validation altogether when UHV method validation is
37
// enabled.
38
43284
bool isMethodValid(absl::string_view method, bool allow_custom_methods) {
39
43284
  if (allow_custom_methods) {
40
15
    return !method.empty() &&
41
59
           std::all_of(method.begin(), method.end(), [](absl::string_view::value_type c) {
42
59
             return std::binary_search(kValidCharactersBegin, kValidCharactersEnd, c);
43
59
           });
44
15
  }
45

            
46
43269
  static constexpr absl::string_view kValidMethods[] = {
47
43269
      "ACL",       "BIND",    "CHECKOUT", "CONNECT", "COPY",       "DELETE",     "GET",
48
43269
      "HEAD",      "LINK",    "LOCK",     "MERGE",   "MKACTIVITY", "MKCALENDAR", "MKCOL",
49
43269
      "MOVE",      "MSEARCH", "NOTIFY",   "OPTIONS", "PATCH",      "POST",       "PROPFIND",
50
43269
      "PROPPATCH", "PURGE",   "PUT",      "REBIND",  "REPORT",     "SEARCH",     "SOURCE",
51
43269
      "SUBSCRIBE", "TRACE",   "UNBIND",   "UNLINK",  "UNLOCK",     "UNSUBSCRIBE"};
52

            
53
43269
  const auto* begin = &kValidMethods[0];
54
43269
  const auto* end = &kValidMethods[ABSL_ARRAYSIZE(kValidMethods) - 1] + 1;
55
43269
  return std::binary_search(begin, end, method);
56
43284
}
57

            
58
// This function is crafted to match the URL validation behavior of the http-parser library.
59
43261
bool isUrlValid(absl::string_view url, bool is_connect) {
60
43261
  if (url.empty()) {
61
3
    return false;
62
3
  }
63

            
64
  // Same set of characters are allowed for path and query.
65
968233
  const auto is_valid_path_query_char = [](char c) {
66
965567
    return c == 9 || c == 12 || ('!' <= c && c <= 126);
67
965567
  };
68

            
69
  // The URL may start with a path.
70
43258
  if (auto it = url.begin(); *it == '/' || *it == '*') {
71
42878
    ++it;
72
42878
    return std::all_of(it, url.end(), is_valid_path_query_char);
73
42878
  }
74

            
75
  // If method is not CONNECT, parse scheme.
76
380
  if (!is_connect) {
77
    // Scheme must start with alpha and be non-empty.
78
49
    auto it = url.begin();
79
49
    if (!std::isalpha(*it)) {
80
6
      return false;
81
6
    }
82
43
    ++it;
83
    // Scheme started with an alpha character and the rest of it is alpha, digit, '+', '-' or '.'.
84
199
    const auto is_scheme_suffix = [](char c) {
85
199
      return std::isalpha(c) || std::isdigit(c) || c == '+' || c == '-' || c == '.';
86
199
    };
87
43
    it = std::find_if_not(it, url.end(), is_scheme_suffix);
88
43
    url.remove_prefix(it - url.begin());
89
43
    if (!absl::StartsWith(url, kColonSlashSlash)) {
90
6
      return false;
91
6
    }
92
37
    url.remove_prefix(kColonSlashSlash.length());
93
37
  }
94

            
95
  // Path and query start with the first '/' or '?' character.
96
5218
  const auto is_path_query_start = [](char c) { return c == '/' || c == '?'; };
97

            
98
  // Divide the rest of the URL into two sections: host, and path/query/fragments.
99
368
  auto path_query_begin = std::find_if(url.begin(), url.end(), is_path_query_start);
100
368
  const absl::string_view host = url.substr(0, path_query_begin - url.begin());
101
368
  const absl::string_view path_query = url.substr(path_query_begin - url.begin());
102

            
103
5189
  const auto valid_host_char = [](char c) {
104
5189
    return std::isalnum(c) || c == '!' || c == '$' || c == '%' || c == '&' || c == '\'' ||
105
5189
           c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == '-' || c == '.' ||
106
5189
           c == ':' || c == ';' || c == '=' || c == '@' || c == '[' || c == ']' || c == '_' ||
107
5189
           c == '~';
108
5189
  };
109

            
110
  // Match http-parser's quirk of allowing any number of '@' characters in host
111
  // as long as they are not consecutive.
112
368
  return std::all_of(host.begin(), host.end(), valid_host_char) && !absl::StrContains(host, "@@") &&
113
368
         std::all_of(path_query.begin(), path_query.end(), is_valid_path_query_char);
114
380
}
115

            
116
// Returns true if `version_input` is a valid HTTP version string as defined at
117
// https://www.rfc-editor.org/rfc/rfc9112.html#section-2.3, or empty (for HTTP/0.9).
118
83697
bool isVersionValid(absl::string_view version_input) {
119
83697
  if (version_input.empty()) {
120
6
    return true;
121
6
  }
122

            
123
83691
  if (!absl::StartsWith(version_input, kHttpVersionPrefix)) {
124
268
    return false;
125
268
  }
126
83423
  version_input.remove_prefix(kHttpVersionPrefix.size());
127

            
128
  // Version number is in the form of "[0-9].[0-9]".
129
83423
  return version_input.size() == 3 && absl::ascii_isdigit(version_input[0]) &&
130
83423
         version_input[1] == '.' && absl::ascii_isdigit(version_input[2]);
131
83691
}
132

            
133
377726
bool isHeaderNameValid(absl::string_view name) {
134
4073128
  return std::all_of(name.begin(), name.end(), [](absl::string_view::value_type c) {
135
4073128
    return std::binary_search(kValidCharactersBegin, kValidCharactersEnd, c);
136
4073128
  });
137
377726
}
138

            
139
} // anonymous namespace
140

            
141
BalsaParser::BalsaParser(MessageType type, ParserCallbacks* connection, size_t max_header_length,
142
                         bool enable_trailers, bool allow_custom_methods)
143
69397
    : message_type_(type), connection_(connection), enable_trailers_(enable_trailers),
144
69397
      allow_custom_methods_(allow_custom_methods) {
145
69397
  ASSERT(connection_ != nullptr);
146

            
147
69397
  quiche::HttpValidationPolicy http_validation_policy;
148
69397
  http_validation_policy.disallow_header_continuation_lines = false;
149
69397
  http_validation_policy.require_header_colon = true;
150
69397
  http_validation_policy.disallow_multiple_content_length = true;
151
69397
  http_validation_policy.disallow_transfer_encoding_with_content_length = false;
152
69397
  http_validation_policy.validate_transfer_encoding = false;
153
69397
  http_validation_policy.require_content_length_if_body_required = false;
154
69397
  http_validation_policy.disallow_invalid_header_characters_in_response = true;
155
69397
  http_validation_policy.disallow_lone_cr_in_chunk_extension = true;
156
69397
  framer_.set_http_validation_policy(http_validation_policy);
157

            
158
69397
  framer_.set_balsa_headers(&headers_);
159
69397
  framer_.set_balsa_visitor(this);
160
69397
  framer_.set_max_header_length(max_header_length);
161
69397
  framer_.set_invalid_chars_level(quiche::BalsaFrame::InvalidCharsLevel::kError);
162
69397
  framer_.EnableTrailers();
163

            
164
69397
  switch (message_type_) {
165
35134
  case MessageType::Request:
166
35134
    framer_.set_is_request(true);
167
35134
    break;
168
34263
  case MessageType::Response:
169
34263
    framer_.set_is_request(false);
170
34263
    break;
171
69397
  }
172
69397
}
173

            
174
241937
size_t BalsaParser::execute(const char* slice, int len) {
175
241937
  ASSERT(status_ != ParserStatus::Error);
176

            
177
241937
  if (len > 0 && !first_byte_processed_) {
178
84820
    if (first_message_) {
179
68148
      first_message_ = false;
180
80958
    } else {
181
16672
      framer_.Reset();
182
16672
    }
183

            
184
84820
    if (message_type_ == MessageType::Response && *slice != kResponseFirstByte) {
185
14
      status_ = ParserStatus::Error;
186
14
      error_message_ = "HPE_INVALID_CONSTANT";
187
14
      return 0;
188
14
    }
189

            
190
84806
    status_ = convertResult(connection_->onMessageBegin());
191
84806
    if (status_ == ParserStatus::Error) {
192
6
      return 0;
193
6
    }
194

            
195
84800
    first_byte_processed_ = true;
196
84800
  }
197

            
198
241917
  if (len == 0 && headers_done_ && !isChunked() &&
199
241917
      ((message_type_ == MessageType::Response && hasTransferEncoding()) ||
200
5
       !headers_.content_length_valid())) {
201
3
    MessageDone();
202
3
    return 0;
203
3
  }
204

            
205
241914
  if (first_byte_processed_ && len == 0) {
206
57
    status_ = ParserStatus::Error;
207
57
    error_message_ = "HPE_INVALID_EOF_STATE";
208
57
    return 0;
209
57
  }
210

            
211
241857
  return framer_.ProcessInput(slice, len);
212
241914
}
213

            
214
143921
void BalsaParser::resume() {
215
143921
  ASSERT(status_ != ParserStatus::Error);
216
143921
  status_ = ParserStatus::Ok;
217
143921
}
218

            
219
82378
CallbackResult BalsaParser::pause() {
220
82378
  ASSERT(status_ != ParserStatus::Error);
221
82378
  status_ = ParserStatus::Paused;
222
82378
  return CallbackResult::Success;
223
82378
}
224

            
225
473259
ParserStatus BalsaParser::getStatus() const { return status_; }
226

            
227
494359
Http::Code BalsaParser::statusCode() const {
228
494359
  return static_cast<Http::Code>(headers_.parsed_response_code());
229
494359
}
230

            
231
83202
bool BalsaParser::isHttp11() const {
232
83202
  if (message_type_ == MessageType::Request) {
233
41772
    return absl::EndsWith(headers_.first_line(), Http::Headers::get().ProtocolStrings.Http11String);
234
43139
  } else {
235
41430
    return absl::StartsWith(headers_.first_line(),
236
41430
                            Http::Headers::get().ProtocolStrings.Http11String);
237
41430
  }
238
83202
}
239

            
240
173592
absl::optional<uint64_t> BalsaParser::contentLength() const {
241
173592
  if (!headers_.content_length_valid()) {
242
61344
    return absl::nullopt;
243
61344
  }
244
112248
  return headers_.content_length();
245
173592
}
246

            
247
91369
bool BalsaParser::isChunked() const { return headers_.transfer_encoding_is_chunked(); }
248

            
249
268870
absl::string_view BalsaParser::methodName() const { return headers_.request_method(); }
250

            
251
6308
absl::string_view BalsaParser::errorMessage() const { return error_message_; }
252

            
253
82951
int BalsaParser::hasTransferEncoding() const {
254
82951
  return headers_.HasHeader(Http::Headers::get().TransferEncoding);
255
82951
}
256

            
257
258867
void BalsaParser::OnRawBodyInput(absl::string_view /*input*/) {}
258

            
259
165883
void BalsaParser::OnBodyChunkInput(absl::string_view input) {
260
165883
  if (status_ == ParserStatus::Error) {
261
    return;
262
  }
263

            
264
165883
  connection_->bufferBody(input.data(), input.size());
265
165883
}
266

            
267
84634
void BalsaParser::OnHeaderInput(absl::string_view /*input*/) {}
268
402
void BalsaParser::OnTrailerInput(absl::string_view /*input*/) {}
269

            
270
84301
void BalsaParser::ProcessHeaders(const BalsaHeaders& headers) {
271
84301
  validateAndProcessHeadersOrTrailersImpl(headers, /* trailers = */ false);
272
84301
}
273
370
void BalsaParser::OnTrailers(std::unique_ptr<quiche::BalsaHeaders> trailers) {
274
370
  validateAndProcessHeadersOrTrailersImpl(*trailers, /* trailers = */ true);
275
370
}
276

            
277
void BalsaParser::OnRequestFirstLineInput(absl::string_view /*line_input*/,
278
                                          absl::string_view method_input,
279
                                          absl::string_view request_uri,
280
43284
                                          absl::string_view version_input) {
281
43284
  if (status_ == ParserStatus::Error) {
282
    return;
283
  }
284
43284
  if (!isMethodValid(method_input, allow_custom_methods_)) {
285
23
    status_ = ParserStatus::Error;
286
23
    error_message_ = "HPE_INVALID_METHOD";
287
23
    return;
288
23
  }
289
43261
  const bool is_connect = method_input == Headers::get().MethodValues.Connect;
290
43261
  if (!isUrlValid(request_uri, is_connect)) {
291
1052
    status_ = ParserStatus::Error;
292
1052
    error_message_ = "HPE_INVALID_URL";
293
1052
    return;
294
1052
  }
295
42209
  if (!isVersionValid(version_input)) {
296
280
    status_ = ParserStatus::Error;
297
280
    error_message_ = "HPE_INVALID_VERSION";
298
280
    return;
299
280
  }
300
41929
  status_ = convertResult(connection_->onUrl(request_uri.data(), request_uri.size()));
301
41929
}
302

            
303
void BalsaParser::OnResponseFirstLineInput(absl::string_view /*line_input*/,
304
                                           absl::string_view version_input,
305
                                           absl::string_view /*status_input*/,
306
41488
                                           absl::string_view reason_input) {
307
41488
  if (status_ == ParserStatus::Error) {
308
    return;
309
  }
310
41488
  if (!isVersionValid(version_input)) {
311
4
    status_ = ParserStatus::Error;
312
4
    error_message_ = "HPE_INVALID_VERSION";
313
4
    return;
314
4
  }
315
41484
  status_ = convertResult(connection_->onStatus(reason_input.data(), reason_input.size()));
316
41484
}
317

            
318
91743
void BalsaParser::OnChunkLength(size_t chunk_length) {
319
91743
  if (status_ == ParserStatus::Error) {
320
    return;
321
  }
322
91743
  const bool is_final_chunk = chunk_length == 0;
323
91743
  connection_->onChunkHeader(is_final_chunk);
324
91743
}
325

            
326
91746
void BalsaParser::OnChunkExtensionInput(absl::string_view /*input*/) {}
327

            
328
void BalsaParser::OnInterimHeaders(std::unique_ptr<BalsaHeaders> /*headers*/) {}
329

            
330
84301
void BalsaParser::HeaderDone() {
331
84301
  if (status_ == ParserStatus::Error) {
332
1082
    return;
333
1082
  }
334
83219
  headers_done_ = true;
335
83219
  CallbackResult result = connection_->onHeadersComplete();
336
83219
  status_ = convertResult(result);
337
83219
  if (result == CallbackResult::NoBody || result == CallbackResult::NoBodyData) {
338
25713
    MessageDone();
339
25713
  }
340
83219
}
341

            
342
void BalsaParser::ContinueHeaderDone() {}
343

            
344
108711
void BalsaParser::MessageDone() {
345
108711
  if (status_ == ParserStatus::Error ||
346
      // In the case of early 1xx, MessageDone() can be called twice in a row.
347
      // The !first_byte_processed_ check is to make this function idempotent.
348
108711
      !first_byte_processed_) {
349
26455
    return;
350
26455
  }
351
82256
  status_ = convertResult(connection_->onMessageComplete());
352
82256
  first_byte_processed_ = false;
353
82256
  headers_done_ = false;
354
82256
}
355

            
356
403
void BalsaParser::HandleError(BalsaFrameEnums::ErrorCode error_code) {
357
403
  status_ = ParserStatus::Error;
358
403
  switch (error_code) {
359
  case BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING:
360
    error_message_ = "unsupported transfer encoding";
361
    break;
362
1
  case BalsaFrameEnums::INVALID_CHUNK_LENGTH:
363
1
    error_message_ = "HPE_INVALID_CHUNK_SIZE";
364
1
    break;
365
13
  case BalsaFrameEnums::HEADERS_TOO_LONG:
366
13
    error_message_ = "headers size exceeds limit";
367
13
    break;
368
6
  case BalsaFrameEnums::TRAILER_TOO_LONG:
369
6
    error_message_ = "trailers size exceeds limit";
370
6
    break;
371
4
  case BalsaFrameEnums::TRAILER_MISSING_COLON:
372
4
    error_message_ = "HPE_INVALID_HEADER_TOKEN";
373
4
    break;
374
322
  case BalsaFrameEnums::INVALID_HEADER_CHARACTER:
375
322
    error_message_ = "header value contains invalid chars";
376
322
    break;
377
4
  case BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS:
378
4
    error_message_ = "HPE_UNEXPECTED_CONTENT_LENGTH";
379
4
    break;
380
53
  default:
381
53
    error_message_ = BalsaFrameEnums::ErrorCodeToString(error_code);
382
403
  }
383
403
}
384

            
385
294
void BalsaParser::HandleWarning(BalsaFrameEnums::ErrorCode error_code) {
386
294
  if (error_code == BalsaFrameEnums::TRAILER_MISSING_COLON) {
387
    HandleError(error_code);
388
  }
389
294
}
390

            
391
void BalsaParser::validateAndProcessHeadersOrTrailersImpl(const quiche::BalsaHeaders& headers,
392
84671
                                                          bool trailers) {
393
378826
  for (const auto& [key, value] : headers.lines()) {
394
378786
    if (status_ == ParserStatus::Error) {
395
1060
      return;
396
1060
    }
397

            
398
377726
    if (!isHeaderNameValid(key)) {
399
3
      status_ = ParserStatus::Error;
400
3
      error_message_ = "HPE_INVALID_HEADER_TOKEN";
401
3
      return;
402
3
    }
403

            
404
377723
    if (trailers && !enable_trailers_) {
405
526
      continue;
406
526
    }
407

            
408
377197
    status_ = convertResult(connection_->onHeaderField(key.data(), key.length()));
409
377197
    if (status_ == ParserStatus::Error) {
410
5
      return;
411
5
    }
412

            
413
    // Remove CR and LF characters to match http-parser behavior.
414
204923194
    auto is_cr_or_lf = [](char c) { return c == '\r' || c == '\n'; };
415
377192
    if (std::any_of(value.begin(), value.end(), is_cr_or_lf)) {
416
6
      std::string value_without_cr_or_lf;
417
6
      value_without_cr_or_lf.reserve(value.size());
418
186
      for (char c : value) {
419
186
        if (!is_cr_or_lf(c)) {
420
174
          value_without_cr_or_lf.push_back(c);
421
174
        }
422
186
      }
423
6
      status_ = convertResult(connection_->onHeaderValue(value_without_cr_or_lf.data(),
424
6
                                                         value_without_cr_or_lf.length()));
425
377186
    } else {
426
      // No need to copy if header value does not contain CR or LF.
427
377186
      status_ = convertResult(connection_->onHeaderValue(value.data(), value.length()));
428
377186
    }
429
377192
  }
430
84671
}
431

            
432
1088083
ParserStatus BalsaParser::convertResult(CallbackResult result) const {
433
1088083
  return result == CallbackResult::Error ? ParserStatus::Error : status_;
434
1088083
}
435

            
436
} // namespace Http1
437
} // namespace Http
438
} // namespace Envoy