1
#include "source/common/http/http1/balsa_parser.h"
2

            
3
#include <algorithm>
4
#include <cctype>
5
#include <cstdint>
6

            
7
#include "source/common/common/assert.h"
8
#include "source/common/http/headers.h"
9
#include "source/common/runtime/runtime_features.h"
10

            
11
#include "absl/strings/ascii.h"
12
#include "absl/strings/match.h"
13

            
14
namespace Envoy {
15
namespace Http {
16
namespace Http1 {
17

            
18
namespace {
19

            
20
using ::quiche::BalsaFrameEnums;
21
using ::quiche::BalsaHeaders;
22

            
23
constexpr absl::string_view kColonSlashSlash = "://";
24
// Response must start with "HTTP".
25
constexpr char kResponseFirstByte = 'H';
26
constexpr absl::string_view kHttpVersionPrefix = "HTTP/";
27

            
28
// Allowed characters for field names according to Section 5.1
29
// and for methods according to Section 9.1 of RFC 9110:
30
// https://www.rfc-editor.org/rfc/rfc9110.html
31
constexpr absl::string_view kValidCharacters =
32
    "!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~";
33
constexpr absl::string_view::iterator kValidCharactersBegin = kValidCharacters.begin();
34
constexpr absl::string_view::iterator kValidCharactersEnd = kValidCharacters.end();
35

            
36
// TODO(#21245): Skip method validation altogether when UHV method validation is
37
// enabled.
38
43163
bool isMethodValid(absl::string_view method, bool allow_custom_methods) {
39
43163
  if (allow_custom_methods) {
40
15
    return !method.empty() &&
41
59
           std::all_of(method.begin(), method.end(), [](absl::string_view::value_type c) {
42
59
             return std::binary_search(kValidCharactersBegin, kValidCharactersEnd, c);
43
59
           });
44
15
  }
45

            
46
43148
  static constexpr absl::string_view kValidMethods[] = {
47
43148
      "ACL",       "BIND",    "CHECKOUT", "CONNECT", "COPY",       "DELETE",     "GET",
48
43148
      "HEAD",      "LINK",    "LOCK",     "MERGE",   "MKACTIVITY", "MKCALENDAR", "MKCOL",
49
43148
      "MOVE",      "MSEARCH", "NOTIFY",   "OPTIONS", "PATCH",      "POST",       "PROPFIND",
50
43148
      "PROPPATCH", "PURGE",   "PUT",      "REBIND",  "REPORT",     "SEARCH",     "SOURCE",
51
43148
      "SUBSCRIBE", "TRACE",   "UNBIND",   "UNLINK",  "UNLOCK",     "UNSUBSCRIBE"};
52

            
53
43148
  const auto* begin = &kValidMethods[0];
54
43148
  const auto* end = &kValidMethods[ABSL_ARRAYSIZE(kValidMethods) - 1] + 1;
55
43148
  return std::binary_search(begin, end, method);
56
43163
}
57

            
58
// This function is crafted to match the URL validation behavior of the http-parser library.
59
43140
bool isUrlValid(absl::string_view url, bool is_connect) {
60
43140
  if (url.empty()) {
61
3
    return false;
62
3
  }
63

            
64
  // Same set of characters are allowed for path and query.
65
966515
  const auto is_valid_path_query_char = [](char c) {
66
963850
    return c == 9 || c == 12 || ('!' <= c && c <= 126);
67
963850
  };
68

            
69
  // The URL may start with a path.
70
43137
  if (auto it = url.begin(); *it == '/' || *it == '*') {
71
42757
    ++it;
72
42757
    return std::all_of(it, url.end(), is_valid_path_query_char);
73
42757
  }
74

            
75
  // If method is not CONNECT, parse scheme.
76
380
  if (!is_connect) {
77
    // Scheme must start with alpha and be non-empty.
78
49
    auto it = url.begin();
79
49
    if (!std::isalpha(*it)) {
80
6
      return false;
81
6
    }
82
43
    ++it;
83
    // Scheme started with an alpha character and the rest of it is alpha, digit, '+', '-' or '.'.
84
199
    const auto is_scheme_suffix = [](char c) {
85
199
      return std::isalpha(c) || std::isdigit(c) || c == '+' || c == '-' || c == '.';
86
199
    };
87
43
    it = std::find_if_not(it, url.end(), is_scheme_suffix);
88
43
    url.remove_prefix(it - url.begin());
89
43
    if (!absl::StartsWith(url, kColonSlashSlash)) {
90
6
      return false;
91
6
    }
92
37
    url.remove_prefix(kColonSlashSlash.length());
93
37
  }
94

            
95
  // Path and query start with the first '/' or '?' character.
96
5218
  const auto is_path_query_start = [](char c) { return c == '/' || c == '?'; };
97

            
98
  // Divide the rest of the URL into two sections: host, and path/query/fragments.
99
368
  auto path_query_begin = std::find_if(url.begin(), url.end(), is_path_query_start);
100
368
  const absl::string_view host = url.substr(0, path_query_begin - url.begin());
101
368
  const absl::string_view path_query = url.substr(path_query_begin - url.begin());
102

            
103
5189
  const auto valid_host_char = [](char c) {
104
5189
    return std::isalnum(c) || c == '!' || c == '$' || c == '%' || c == '&' || c == '\'' ||
105
5189
           c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == '-' || c == '.' ||
106
5189
           c == ':' || c == ';' || c == '=' || c == '@' || c == '[' || c == ']' || c == '_' ||
107
5189
           c == '~';
108
5189
  };
109

            
110
  // Match http-parser's quirk of allowing any number of '@' characters in host
111
  // as long as they are not consecutive.
112
368
  return std::all_of(host.begin(), host.end(), valid_host_char) && !absl::StrContains(host, "@@") &&
113
368
         std::all_of(path_query.begin(), path_query.end(), is_valid_path_query_char);
114
380
}
115

            
116
// Returns true if `version_input` is a valid HTTP version string as defined at
117
// https://www.rfc-editor.org/rfc/rfc9112.html#section-2.3, or empty (for HTTP/0.9).
118
83454
bool isVersionValid(absl::string_view version_input) {
119
83454
  if (version_input.empty()) {
120
6
    return true;
121
6
  }
122

            
123
83448
  if (!absl::StartsWith(version_input, kHttpVersionPrefix)) {
124
268
    return false;
125
268
  }
126
83180
  version_input.remove_prefix(kHttpVersionPrefix.size());
127

            
128
  // Version number is in the form of "[0-9].[0-9]".
129
83180
  return version_input.size() == 3 && absl::ascii_isdigit(version_input[0]) &&
130
83180
         version_input[1] == '.' && absl::ascii_isdigit(version_input[2]);
131
83448
}
132

            
133
377216
bool isHeaderNameValid(absl::string_view name) {
134
4065384
  return std::all_of(name.begin(), name.end(), [](absl::string_view::value_type c) {
135
4065384
    return std::binary_search(kValidCharactersBegin, kValidCharactersEnd, c);
136
4065384
  });
137
377216
}
138

            
139
} // anonymous namespace
140

            
141
BalsaParser::BalsaParser(MessageType type, ParserCallbacks* connection, size_t max_header_length,
142
                         bool enable_trailers, bool allow_custom_methods)
143
69461
    : message_type_(type), connection_(connection), enable_trailers_(enable_trailers),
144
69461
      allow_custom_methods_(allow_custom_methods) {
145
69461
  ASSERT(connection_ != nullptr);
146

            
147
69461
  quiche::HttpValidationPolicy http_validation_policy;
148
69461
  http_validation_policy.disallow_header_continuation_lines = false;
149
69461
  http_validation_policy.require_header_colon = true;
150
69461
  http_validation_policy.disallow_multiple_content_length = true;
151
69461
  http_validation_policy.disallow_transfer_encoding_with_content_length = false;
152
69461
  http_validation_policy.validate_transfer_encoding = false;
153
69461
  http_validation_policy.require_content_length_if_body_required = false;
154
69461
  http_validation_policy.disallow_invalid_header_characters_in_response = true;
155
69461
  http_validation_policy.disallow_lone_cr_in_chunk_extension = true;
156
69461
  framer_.set_http_validation_policy(http_validation_policy);
157

            
158
69461
  framer_.set_balsa_headers(&headers_);
159
69461
  framer_.set_balsa_visitor(this);
160
69461
  framer_.set_max_header_length(max_header_length);
161
69461
  framer_.set_invalid_chars_level(quiche::BalsaFrame::InvalidCharsLevel::kError);
162
69461
  framer_.EnableTrailers();
163

            
164
69461
  switch (message_type_) {
165
35166
  case MessageType::Request:
166
35166
    framer_.set_is_request(true);
167
35166
    break;
168
34295
  case MessageType::Response:
169
34295
    framer_.set_is_request(false);
170
34295
    break;
171
69461
  }
172
69461
}
173

            
174
241903
size_t BalsaParser::execute(const char* slice, int len) {
175
241903
  ASSERT(status_ != ParserStatus::Error);
176

            
177
241903
  if (len > 0 && !first_byte_processed_) {
178
84577
    if (first_message_) {
179
68204
      first_message_ = false;
180
80716
    } else {
181
16373
      framer_.Reset();
182
16373
    }
183

            
184
84577
    if (message_type_ == MessageType::Response && *slice != kResponseFirstByte) {
185
14
      status_ = ParserStatus::Error;
186
14
      error_message_ = "HPE_INVALID_CONSTANT";
187
14
      return 0;
188
14
    }
189

            
190
84563
    status_ = convertResult(connection_->onMessageBegin());
191
84563
    if (status_ == ParserStatus::Error) {
192
6
      return 0;
193
6
    }
194

            
195
84557
    first_byte_processed_ = true;
196
84557
  }
197

            
198
241883
  if (len == 0 && headers_done_ && !isChunked() &&
199
241883
      ((message_type_ == MessageType::Response && hasTransferEncoding()) ||
200
5
       !headers_.content_length_valid())) {
201
3
    MessageDone();
202
3
    return 0;
203
3
  }
204

            
205
241880
  if (first_byte_processed_ && len == 0) {
206
57
    status_ = ParserStatus::Error;
207
57
    error_message_ = "HPE_INVALID_EOF_STATE";
208
57
    return 0;
209
57
  }
210

            
211
241823
  return framer_.ProcessInput(slice, len);
212
241880
}
213

            
214
143989
void BalsaParser::resume() {
215
143989
  ASSERT(status_ != ParserStatus::Error);
216
143989
  status_ = ParserStatus::Ok;
217
143989
}
218

            
219
82134
CallbackResult BalsaParser::pause() {
220
82134
  ASSERT(status_ != ParserStatus::Error);
221
82134
  status_ = ParserStatus::Paused;
222
82134
  return CallbackResult::Success;
223
82134
}
224

            
225
473190
ParserStatus BalsaParser::getStatus() const { return status_; }
226

            
227
492883
Http::Code BalsaParser::statusCode() const {
228
492883
  return static_cast<Http::Code>(headers_.parsed_response_code());
229
492883
}
230

            
231
82958
bool BalsaParser::isHttp11() const {
232
82958
  if (message_type_ == MessageType::Request) {
233
41651
    return absl::EndsWith(headers_.first_line(), Http::Headers::get().ProtocolStrings.Http11String);
234
43018
  } else {
235
41307
    return absl::StartsWith(headers_.first_line(),
236
41307
                            Http::Headers::get().ProtocolStrings.Http11String);
237
41307
  }
238
82958
}
239

            
240
173419
absl::optional<uint64_t> BalsaParser::contentLength() const {
241
173419
  if (!headers_.content_length_valid()) {
242
60673
    return absl::nullopt;
243
60673
  }
244
112746
  return headers_.content_length();
245
173419
}
246

            
247
91498
bool BalsaParser::isChunked() const { return headers_.transfer_encoding_is_chunked(); }
248

            
249
267949
absl::string_view BalsaParser::methodName() const { return headers_.request_method(); }
250

            
251
6312
absl::string_view BalsaParser::errorMessage() const { return error_message_; }
252

            
253
82707
int BalsaParser::hasTransferEncoding() const {
254
82707
  return headers_.HasHeader(Http::Headers::get().TransferEncoding);
255
82707
}
256

            
257
259447
void BalsaParser::OnRawBodyInput(absl::string_view /*input*/) {}
258

            
259
166711
void BalsaParser::OnBodyChunkInput(absl::string_view input) {
260
166711
  if (status_ == ParserStatus::Error) {
261
    return;
262
  }
263

            
264
166711
  connection_->bufferBody(input.data(), input.size());
265
166711
}
266

            
267
84391
void BalsaParser::OnHeaderInput(absl::string_view /*input*/) {}
268
402
void BalsaParser::OnTrailerInput(absl::string_view /*input*/) {}
269

            
270
84057
void BalsaParser::ProcessHeaders(const BalsaHeaders& headers) {
271
84057
  validateAndProcessHeadersOrTrailersImpl(headers, /* trailers = */ false);
272
84057
}
273
370
void BalsaParser::OnTrailers(std::unique_ptr<quiche::BalsaHeaders> trailers) {
274
370
  validateAndProcessHeadersOrTrailersImpl(*trailers, /* trailers = */ true);
275
370
}
276

            
277
void BalsaParser::OnRequestFirstLineInput(absl::string_view /*line_input*/,
278
                                          absl::string_view method_input,
279
                                          absl::string_view request_uri,
280
43163
                                          absl::string_view version_input) {
281
43163
  if (status_ == ParserStatus::Error) {
282
    return;
283
  }
284
43163
  if (!isMethodValid(method_input, allow_custom_methods_)) {
285
23
    status_ = ParserStatus::Error;
286
23
    error_message_ = "HPE_INVALID_METHOD";
287
23
    return;
288
23
  }
289
43140
  const bool is_connect = method_input == Headers::get().MethodValues.Connect;
290
43140
  if (!isUrlValid(request_uri, is_connect)) {
291
1052
    status_ = ParserStatus::Error;
292
1052
    error_message_ = "HPE_INVALID_URL";
293
1052
    return;
294
1052
  }
295
42088
  if (!isVersionValid(version_input)) {
296
280
    status_ = ParserStatus::Error;
297
280
    error_message_ = "HPE_INVALID_VERSION";
298
280
    return;
299
280
  }
300
41808
  status_ = convertResult(connection_->onUrl(request_uri.data(), request_uri.size()));
301
41808
}
302

            
303
void BalsaParser::OnResponseFirstLineInput(absl::string_view /*line_input*/,
304
                                           absl::string_view version_input,
305
                                           absl::string_view /*status_input*/,
306
41366
                                           absl::string_view reason_input) {
307
41366
  if (status_ == ParserStatus::Error) {
308
    return;
309
  }
310
41366
  if (!isVersionValid(version_input)) {
311
4
    status_ = ParserStatus::Error;
312
4
    error_message_ = "HPE_INVALID_VERSION";
313
4
    return;
314
4
  }
315
41362
  status_ = convertResult(connection_->onStatus(reason_input.data(), reason_input.size()));
316
41362
}
317

            
318
91591
void BalsaParser::OnChunkLength(size_t chunk_length) {
319
91591
  if (status_ == ParserStatus::Error) {
320
    return;
321
  }
322
91591
  const bool is_final_chunk = chunk_length == 0;
323
91591
  connection_->onChunkHeader(is_final_chunk);
324
91591
}
325

            
326
91594
void BalsaParser::OnChunkExtensionInput(absl::string_view /*input*/) {}
327

            
328
void BalsaParser::OnInterimHeaders(std::unique_ptr<BalsaHeaders> /*headers*/) {}
329

            
330
84057
void BalsaParser::HeaderDone() {
331
84057
  if (status_ == ParserStatus::Error) {
332
1082
    return;
333
1082
  }
334
82975
  headers_done_ = true;
335
82975
  CallbackResult result = connection_->onHeadersComplete();
336
82975
  status_ = convertResult(result);
337
82975
  if (result == CallbackResult::NoBody || result == CallbackResult::NoBodyData) {
338
25838
    MessageDone();
339
25838
  }
340
82975
}
341

            
342
void BalsaParser::ContinueHeaderDone() {}
343

            
344
108569
void BalsaParser::MessageDone() {
345
108569
  if (status_ == ParserStatus::Error ||
346
      // In the case of early 1xx, MessageDone() can be called twice in a row.
347
      // The !first_byte_processed_ check is to make this function idempotent.
348
108569
      !first_byte_processed_) {
349
26580
    return;
350
26580
  }
351
81989
  status_ = convertResult(connection_->onMessageComplete());
352
81989
  first_byte_processed_ = false;
353
81989
  headers_done_ = false;
354
81989
}
355

            
356
404
void BalsaParser::HandleError(BalsaFrameEnums::ErrorCode error_code) {
357
404
  status_ = ParserStatus::Error;
358
404
  switch (error_code) {
359
  case BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING:
360
    error_message_ = "unsupported transfer encoding";
361
    break;
362
1
  case BalsaFrameEnums::INVALID_CHUNK_LENGTH:
363
1
    error_message_ = "HPE_INVALID_CHUNK_SIZE";
364
1
    break;
365
13
  case BalsaFrameEnums::HEADERS_TOO_LONG:
366
13
    error_message_ = "headers size exceeds limit";
367
13
    break;
368
6
  case BalsaFrameEnums::TRAILER_TOO_LONG:
369
6
    error_message_ = "trailers size exceeds limit";
370
6
    break;
371
4
  case BalsaFrameEnums::TRAILER_MISSING_COLON:
372
4
    error_message_ = "HPE_INVALID_HEADER_TOKEN";
373
4
    break;
374
322
  case BalsaFrameEnums::INVALID_HEADER_CHARACTER:
375
322
    error_message_ = "header value contains invalid chars";
376
322
    break;
377
4
  case BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS:
378
4
    error_message_ = "HPE_UNEXPECTED_CONTENT_LENGTH";
379
4
    break;
380
54
  default:
381
54
    error_message_ = BalsaFrameEnums::ErrorCodeToString(error_code);
382
404
  }
383
404
}
384

            
385
294
void BalsaParser::HandleWarning(BalsaFrameEnums::ErrorCode error_code) {
386
294
  if (error_code == BalsaFrameEnums::TRAILER_MISSING_COLON) {
387
    HandleError(error_code);
388
  }
389
294
}
390

            
391
void BalsaParser::validateAndProcessHeadersOrTrailersImpl(const quiche::BalsaHeaders& headers,
392
84427
                                                          bool trailers) {
393
378316
  for (const auto& [key, value] : headers.lines()) {
394
378276
    if (status_ == ParserStatus::Error) {
395
1060
      return;
396
1060
    }
397

            
398
377216
    if (!isHeaderNameValid(key)) {
399
3
      status_ = ParserStatus::Error;
400
3
      error_message_ = "HPE_INVALID_HEADER_TOKEN";
401
3
      return;
402
3
    }
403

            
404
377213
    if (trailers && !enable_trailers_) {
405
526
      continue;
406
526
    }
407

            
408
376687
    status_ = convertResult(connection_->onHeaderField(key.data(), key.length()));
409
376687
    if (status_ == ParserStatus::Error) {
410
5
      return;
411
5
    }
412

            
413
    // Remove CR and LF characters to match http-parser behavior.
414
204916142
    auto is_cr_or_lf = [](char c) { return c == '\r' || c == '\n'; };
415
376682
    if (std::any_of(value.begin(), value.end(), is_cr_or_lf)) {
416
6
      std::string value_without_cr_or_lf;
417
6
      value_without_cr_or_lf.reserve(value.size());
418
186
      for (char c : value) {
419
186
        if (!is_cr_or_lf(c)) {
420
174
          value_without_cr_or_lf.push_back(c);
421
174
        }
422
186
      }
423
6
      status_ = convertResult(connection_->onHeaderValue(value_without_cr_or_lf.data(),
424
6
                                                         value_without_cr_or_lf.length()));
425
376676
    } else {
426
      // No need to copy if header value does not contain CR or LF.
427
376676
      status_ = convertResult(connection_->onHeaderValue(value.data(), value.length()));
428
376676
    }
429
376682
  }
430
84427
}
431

            
432
1086066
ParserStatus BalsaParser::convertResult(CallbackResult result) const {
433
1086066
  return result == CallbackResult::Error ? ParserStatus::Error : status_;
434
1086066
}
435

            
436
} // namespace Http1
437
} // namespace Http
438
} // namespace Envoy