Coverage Report

Created: 2023-11-12 09:30

/proc/self/cwd/source/common/http/http1/balsa_parser.cc
Line
Count
Source (jump to first uncovered line)
1
#include "source/common/http/http1/balsa_parser.h"
2
3
#include <algorithm>
4
#include <cctype>
5
#include <cstdint>
6
7
#include "source/common/common/assert.h"
8
#include "source/common/http/headers.h"
9
10
#include "absl/strings/ascii.h"
11
#include "absl/strings/match.h"
12
13
namespace Envoy {
14
namespace Http {
15
namespace Http1 {
16
17
namespace {
18
19
using ::quiche::BalsaFrameEnums;
20
using ::quiche::BalsaHeaders;
21
22
constexpr absl::string_view kColonSlashSlash = "://";
23
// Response must start with "HTTP".
24
constexpr char kResponseFirstByte = 'H';
25
constexpr absl::string_view kHttpVersionPrefix = "HTTP/";
26
27
// Allowed characters for field names according to Section 5.1
28
// and for methods according to Section 9.1 of RFC 9110:
29
// https://www.rfc-editor.org/rfc/rfc9110.html
30
constexpr absl::string_view kValidCharacters =
31
    "!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~";
32
constexpr absl::string_view::iterator kValidCharactersBegin = kValidCharacters.begin();
33
constexpr absl::string_view::iterator kValidCharactersEnd = kValidCharacters.end();
34
35
7.08k
bool isFirstCharacterOfValidMethod(char c) {
36
7.08k
  static constexpr char kValidFirstCharacters[] = {'A', 'B', 'C', 'D', 'G', 'H', 'L', 'M',
37
7.08k
                                                   'N', 'O', 'P', 'R', 'S', 'T', 'U'};
38
39
7.08k
  const auto* begin = &kValidFirstCharacters[0];
40
7.08k
  const auto* end = &kValidFirstCharacters[ABSL_ARRAYSIZE(kValidFirstCharacters) - 1] + 1;
41
7.08k
  return std::binary_search(begin, end, c);
42
7.08k
}
43
44
// TODO(#21245): Skip method validation altogether when UHV method validation is
45
// enabled.
46
5.94k
bool isMethodValid(absl::string_view method, bool allow_custom_methods) {
47
5.94k
  if (allow_custom_methods) {
48
32
    return !method.empty() &&
49
147
           std::all_of(method.begin(), method.end(), [](absl::string_view::value_type c) {
50
147
             return std::binary_search(kValidCharactersBegin, kValidCharactersEnd, c);
51
147
           });
52
32
  }
53
54
5.91k
  static constexpr absl::string_view kValidMethods[] = {
55
5.91k
      "ACL",       "BIND",    "CHECKOUT", "CONNECT", "COPY",       "DELETE",     "GET",
56
5.91k
      "HEAD",      "LINK",    "LOCK",     "MERGE",   "MKACTIVITY", "MKCALENDAR", "MKCOL",
57
5.91k
      "MOVE",      "MSEARCH", "NOTIFY",   "OPTIONS", "PATCH",      "POST",       "PROPFIND",
58
5.91k
      "PROPPATCH", "PURGE",   "PUT",      "REBIND",  "REPORT",     "SEARCH",     "SOURCE",
59
5.91k
      "SUBSCRIBE", "TRACE",   "UNBIND",   "UNLINK",  "UNLOCK",     "UNSUBSCRIBE"};
60
61
5.91k
  const auto* begin = &kValidMethods[0];
62
5.91k
  const auto* end = &kValidMethods[ABSL_ARRAYSIZE(kValidMethods) - 1] + 1;
63
5.91k
  return std::binary_search(begin, end, method);
64
5.94k
}
65
66
// This function is crafted to match the URL validation behavior of the http-parser library.
67
4.50k
bool isUrlValid(absl::string_view url, bool is_connect) {
68
4.50k
  if (url.empty()) {
69
7
    return false;
70
7
  }
71
72
  // Same set of characters are allowed for path and query.
73
2.68M
  const auto is_valid_path_query_char = [](char c) {
74
2.68M
    return c == 9 || c == 12 || ('!' <= c && c <= 126);
75
2.68M
  };
76
77
  // The URL may start with a path.
78
4.50k
  if (auto it = url.begin(); *it == '/' || *it == '*') {
79
2.98k
    ++it;
80
2.98k
    return std::all_of(it, url.end(), is_valid_path_query_char);
81
2.98k
  }
82
83
  // If method is not CONNECT, parse scheme.
84
1.52k
  if (!is_connect) {
85
    // Scheme must start with alpha and be non-empty.
86
1.38k
    auto it = url.begin();
87
1.38k
    if (!std::isalpha(*it)) {
88
12
      return false;
89
12
    }
90
1.37k
    ++it;
91
    // Scheme started with an alpha character and the rest of it is alpha, digit, '+', '-' or '.'.
92
405k
    const auto is_scheme_suffix = [](char c) {
93
405k
      return std::isalpha(c) || std::isdigit(c) || c == '+' || c == '-' || c == '.';
94
405k
    };
95
1.37k
    it = std::find_if_not(it, url.end(), is_scheme_suffix);
96
1.37k
    url.remove_prefix(it - url.begin());
97
1.37k
    if (!absl::StartsWith(url, kColonSlashSlash)) {
98
115
      return false;
99
115
    }
100
1.25k
    url.remove_prefix(kColonSlashSlash.length());
101
1.25k
  }
102
103
  // Path and query start with the first '/' or '?' character.
104
1.48M
  const auto is_path_query_start = [](char c) { return c == '/' || c == '?'; };
105
106
  // Divide the rest of the URL into two sections: host, and path/query/fragments.
107
1.39k
  auto path_query_begin = std::find_if(url.begin(), url.end(), is_path_query_start);
108
1.39k
  const absl::string_view host = url.substr(0, path_query_begin - url.begin());
109
1.39k
  const absl::string_view path_query = url.substr(path_query_begin - url.begin());
110
111
1.42M
  const auto valid_host_char = [](char c) {
112
1.42M
    return std::isalnum(c) || c == '!' || c == '$' || c == '%' || c == '&' || c == '\'' ||
113
1.42M
           c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == '-' || c == '.' ||
114
1.42M
           c == ':' || c == ';' || c == '=' || c == '@' || c == '[' || c == ']' || c == '_' ||
115
1.42M
           c == '~';
116
1.42M
  };
117
118
  // Match http-parser's quirk of allowing any number of '@' characters in host
119
  // as long as they are not consecutive.
120
1.39k
  return std::all_of(host.begin(), host.end(), valid_host_char) && !absl::StrContains(host, "@@") &&
121
1.39k
         std::all_of(path_query.begin(), path_query.end(), is_valid_path_query_char);
122
1.52k
}
123
124
// Returns true if `version_input` is a valid HTTP version string as defined at
125
// https://www.rfc-editor.org/rfc/rfc9112.html#section-2.3, or empty (for HTTP/0.9).
126
5.33k
bool isVersionValid(absl::string_view version_input) {
127
5.33k
  if (version_input.empty()) {
128
2.13k
    return true;
129
2.13k
  }
130
131
3.19k
  if (!absl::StartsWith(version_input, kHttpVersionPrefix)) {
132
765
    return false;
133
765
  }
134
2.42k
  version_input.remove_prefix(kHttpVersionPrefix.size());
135
136
  // Version number is in the form of "[0-9].[0-9]".
137
2.42k
  return version_input.size() == 3 && absl::ascii_isdigit(version_input[0]) &&
138
2.42k
         version_input[1] == '.' && absl::ascii_isdigit(version_input[2]);
139
3.19k
}
140
141
61.4k
bool isHeaderNameValid(absl::string_view name) {
142
717k
  return std::all_of(name.begin(), name.end(), [](absl::string_view::value_type c) {
143
717k
    return std::binary_search(kValidCharactersBegin, kValidCharactersEnd, c);
144
717k
  });
145
61.4k
}
146
147
} // anonymous namespace
148
149
BalsaParser::BalsaParser(MessageType type, ParserCallbacks* connection, size_t max_header_length,
150
                         bool enable_trailers, bool allow_custom_methods)
151
    : message_type_(type), connection_(connection), enable_trailers_(enable_trailers),
152
19.3k
      allow_custom_methods_(allow_custom_methods) {
153
19.3k
  ASSERT(connection_ != nullptr);
154
155
19.3k
  quiche::HttpValidationPolicy http_validation_policy;
156
19.3k
  http_validation_policy.disallow_header_continuation_lines = true;
157
19.3k
  http_validation_policy.require_header_colon = true;
158
19.3k
  http_validation_policy.disallow_multiple_content_length = true;
159
19.3k
  http_validation_policy.disallow_transfer_encoding_with_content_length = false;
160
19.3k
  http_validation_policy.validate_transfer_encoding = false;
161
19.3k
  http_validation_policy.require_content_length_if_body_required = false;
162
19.3k
  http_validation_policy.disallow_invalid_header_characters_in_response = true;
163
19.3k
  framer_.set_http_validation_policy(http_validation_policy);
164
165
19.3k
  framer_.set_balsa_headers(&headers_);
166
19.3k
  framer_.set_balsa_visitor(this);
167
19.3k
  framer_.set_max_header_length(max_header_length);
168
19.3k
  framer_.set_invalid_chars_level(quiche::BalsaFrame::InvalidCharsLevel::kError);
169
19.3k
  framer_.EnableTrailers();
170
171
19.3k
  switch (message_type_) {
172
10.0k
  case MessageType::Request:
173
10.0k
    framer_.set_is_request(true);
174
10.0k
    break;
175
9.29k
  case MessageType::Response:
176
9.29k
    framer_.set_is_request(false);
177
9.29k
    break;
178
19.3k
  }
179
19.3k
}
180
181
23.1k
size_t BalsaParser::execute(const char* slice, int len) {
182
23.1k
  ASSERT(status_ != ParserStatus::Error);
183
184
23.1k
  if (len > 0 && !first_byte_processed_) {
185
11.8k
    if (message_type_ == MessageType::Request && !allow_custom_methods_ &&
186
11.8k
        !isFirstCharacterOfValidMethod(*slice)) {
187
450
      status_ = ParserStatus::Error;
188
450
      error_message_ = "HPE_INVALID_METHOD";
189
450
      return 0;
190
450
    }
191
11.4k
    if (message_type_ == MessageType::Response && *slice != kResponseFirstByte) {
192
3.49k
      status_ = ParserStatus::Error;
193
3.49k
      error_message_ = "HPE_INVALID_CONSTANT";
194
3.49k
      return 0;
195
3.49k
    }
196
197
7.90k
    status_ = convertResult(connection_->onMessageBegin());
198
7.90k
    if (status_ == ParserStatus::Error) {
199
14
      return 0;
200
14
    }
201
202
7.89k
    first_byte_processed_ = true;
203
7.89k
  }
204
205
19.2k
  if (len == 0 && headers_done_ && !isChunked() &&
206
19.2k
      ((message_type_ == MessageType::Response && hasTransferEncoding()) ||
207
2
       !headers_.content_length_valid())) {
208
2
    MessageDone();
209
2
    return 0;
210
2
  }
211
212
19.2k
  if (first_byte_processed_ && len == 0) {
213
6
    status_ = ParserStatus::Error;
214
6
    error_message_ = "HPE_INVALID_EOF_STATE";
215
6
    return 0;
216
6
  }
217
218
19.1k
  return framer_.ProcessInput(slice, len);
219
19.2k
}
220
221
21.8k
void BalsaParser::resume() {
222
21.8k
  ASSERT(status_ != ParserStatus::Error);
223
21.8k
  status_ = ParserStatus::Ok;
224
21.8k
}
225
226
786
CallbackResult BalsaParser::pause() {
227
786
  ASSERT(status_ != ParserStatus::Error);
228
786
  status_ = ParserStatus::Paused;
229
786
  return CallbackResult::Success;
230
786
}
231
232
34.0k
ParserStatus BalsaParser::getStatus() const { return status_; }
233
234
339
Http::Code BalsaParser::statusCode() const {
235
339
  return static_cast<Http::Code>(headers_.parsed_response_code());
236
339
}
237
238
4.36k
bool BalsaParser::isHttp11() const {
239
4.36k
  if (message_type_ == MessageType::Request) {
240
4.08k
    return absl::EndsWith(headers_.first_line(), Http::Headers::get().ProtocolStrings.Http11String);
241
4.08k
  } else {
242
283
    return absl::StartsWith(headers_.first_line(),
243
283
                            Http::Headers::get().ProtocolStrings.Http11String);
244
283
  }
245
4.36k
}
246
247
1.14k
absl::optional<uint64_t> BalsaParser::contentLength() const {
248
1.14k
  if (!headers_.content_length_valid()) {
249
687
    return absl::nullopt;
250
687
  }
251
456
  return headers_.content_length();
252
1.14k
}
253
254
1.85k
bool BalsaParser::isChunked() const { return headers_.transfer_encoding_is_chunked(); }
255
256
19.0k
absl::string_view BalsaParser::methodName() const { return headers_.request_method(); }
257
258
27.4k
absl::string_view BalsaParser::errorMessage() const { return error_message_; }
259
260
4.35k
int BalsaParser::hasTransferEncoding() const {
261
4.35k
  return headers_.HasHeader(Http::Headers::get().TransferEncoding);
262
4.35k
}
263
264
17.3k
void BalsaParser::OnRawBodyInput(absl::string_view /*input*/) {}
265
266
16.3k
void BalsaParser::OnBodyChunkInput(absl::string_view input) {
267
16.3k
  if (status_ == ParserStatus::Error) {
268
0
    return;
269
0
  }
270
271
16.3k
  connection_->bufferBody(input.data(), input.size());
272
16.3k
}
273
274
6.75k
void BalsaParser::OnHeaderInput(absl::string_view /*input*/) {}
275
121
void BalsaParser::OnTrailerInput(absl::string_view /*input*/) {}
276
204k
void BalsaParser::OnHeader(absl::string_view /*key*/, absl::string_view /*value*/) {}
277
278
6.05k
void BalsaParser::ProcessHeaders(const BalsaHeaders& headers) {
279
6.05k
  validateAndProcessHeadersOrTrailersImpl(headers, /* trailers = */ false);
280
6.05k
}
281
49
void BalsaParser::OnTrailers(std::unique_ptr<quiche::BalsaHeaders> trailers) {
282
49
  validateAndProcessHeadersOrTrailersImpl(*trailers, /* trailers = */ true);
283
49
}
284
285
void BalsaParser::OnRequestFirstLineInput(absl::string_view /*line_input*/,
286
                                          absl::string_view method_input,
287
                                          absl::string_view request_uri,
288
5.94k
                                          absl::string_view version_input) {
289
5.94k
  if (status_ == ParserStatus::Error) {
290
0
    return;
291
0
  }
292
5.94k
  if (!isMethodValid(method_input, allow_custom_methods_)) {
293
1.43k
    status_ = ParserStatus::Error;
294
1.43k
    error_message_ = "HPE_INVALID_METHOD";
295
1.43k
    return;
296
1.43k
  }
297
4.50k
  const bool is_connect = method_input == Headers::get().MethodValues.Connect;
298
4.50k
  if (!isUrlValid(request_uri, is_connect)) {
299
262
    status_ = ParserStatus::Error;
300
262
    error_message_ = "HPE_INVALID_URL";
301
262
    return;
302
262
  }
303
4.24k
  if (!isVersionValid(version_input)) {
304
98
    status_ = ParserStatus::Error;
305
98
    error_message_ = "HPE_INVALID_VERSION";
306
98
    return;
307
98
  }
308
4.14k
  status_ = convertResult(connection_->onUrl(request_uri.data(), request_uri.size()));
309
4.14k
}
310
311
void BalsaParser::OnResponseFirstLineInput(absl::string_view /*line_input*/,
312
                                           absl::string_view version_input,
313
                                           absl::string_view /*status_input*/,
314
1.08k
                                           absl::string_view reason_input) {
315
1.08k
  if (status_ == ParserStatus::Error) {
316
0
    return;
317
0
  }
318
1.08k
  if (!isVersionValid(version_input)) {
319
760
    status_ = ParserStatus::Error;
320
760
    error_message_ = "HPE_INVALID_VERSION";
321
760
    return;
322
760
  }
323
324
  status_ = convertResult(connection_->onStatus(reason_input.data(), reason_input.size()));
324
324
}
325
326
16.4k
void BalsaParser::OnChunkLength(size_t chunk_length) {
327
16.4k
  if (status_ == ParserStatus::Error) {
328
0
    return;
329
0
  }
330
16.4k
  const bool is_final_chunk = chunk_length == 0;
331
16.4k
  connection_->onChunkHeader(is_final_chunk);
332
16.4k
}
333
334
16.4k
void BalsaParser::OnChunkExtensionInput(absl::string_view /*input*/) {}
335
336
0
void BalsaParser::OnInterimHeaders(std::unique_ptr<BalsaHeaders> /*headers*/) {}
337
338
6.05k
void BalsaParser::HeaderDone() {
339
6.05k
  if (status_ == ParserStatus::Error) {
340
1.67k
    return;
341
1.67k
  }
342
4.37k
  headers_done_ = true;
343
4.37k
  CallbackResult result = connection_->onHeadersComplete();
344
4.37k
  status_ = convertResult(result);
345
4.37k
  if (result == CallbackResult::NoBody || result == CallbackResult::NoBodyData) {
346
7
    MessageDone();
347
7
  }
348
4.37k
}
349
350
0
void BalsaParser::ContinueHeaderDone() {}
351
352
4.30k
void BalsaParser::MessageDone() {
353
4.30k
  if (status_ == ParserStatus::Error) {
354
3.51k
    return;
355
3.51k
  }
356
789
  status_ = convertResult(connection_->onMessageComplete());
357
789
  framer_.Reset();
358
789
  first_byte_processed_ = false;
359
789
  headers_done_ = false;
360
789
}
361
362
1.06k
void BalsaParser::HandleError(BalsaFrameEnums::ErrorCode error_code) {
363
1.06k
  status_ = ParserStatus::Error;
364
1.06k
  switch (error_code) {
365
0
  case BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING:
366
0
    error_message_ = "unsupported transfer encoding";
367
0
    break;
368
78
  case BalsaFrameEnums::INVALID_CHUNK_LENGTH:
369
78
    error_message_ = "HPE_INVALID_CHUNK_SIZE";
370
78
    break;
371
96
  case BalsaFrameEnums::HEADERS_TOO_LONG:
372
96
    error_message_ = "headers size exceeds limit";
373
96
    break;
374
3
  case BalsaFrameEnums::TRAILER_TOO_LONG:
375
3
    error_message_ = "trailers size exceeds limit";
376
3
    break;
377
13
  case BalsaFrameEnums::TRAILER_MISSING_COLON:
378
13
    error_message_ = "HPE_INVALID_HEADER_TOKEN";
379
13
    break;
380
349
  case BalsaFrameEnums::INVALID_HEADER_CHARACTER:
381
349
    error_message_ = "header value contains invalid chars";
382
349
    break;
383
12
  case BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS:
384
12
    error_message_ = "HPE_UNEXPECTED_CONTENT_LENGTH";
385
12
    break;
386
518
  default:
387
518
    error_message_ = BalsaFrameEnums::ErrorCodeToString(error_code);
388
1.06k
  }
389
1.06k
}
390
391
4.63k
void BalsaParser::HandleWarning(BalsaFrameEnums::ErrorCode error_code) {
392
4.63k
  if (error_code == BalsaFrameEnums::TRAILER_MISSING_COLON) {
393
0
    HandleError(error_code);
394
0
  }
395
4.63k
}
396
397
void BalsaParser::validateAndProcessHeadersOrTrailersImpl(const quiche::BalsaHeaders& headers,
398
6.10k
                                                          bool trailers) {
399
61.7k
  for (const std::pair<absl::string_view, absl::string_view>& key_value : headers.lines()) {
400
61.7k
    if (status_ == ParserStatus::Error) {
401
334
      return;
402
334
    }
403
404
61.4k
    absl::string_view key = key_value.first;
405
61.4k
    if (!isHeaderNameValid(key)) {
406
14
      status_ = ParserStatus::Error;
407
14
      error_message_ = "HPE_INVALID_HEADER_TOKEN";
408
14
      return;
409
14
    }
410
411
61.3k
    if (trailers && !enable_trailers_) {
412
3
      continue;
413
3
    }
414
415
61.3k
    status_ = convertResult(connection_->onHeaderField(key.data(), key.length()));
416
61.3k
    if (status_ == ParserStatus::Error) {
417
8
      return;
418
8
    }
419
420
61.3k
    absl::string_view value = key_value.second;
421
61.3k
    status_ = convertResult(connection_->onHeaderValue(value.data(), value.length()));
422
61.3k
  }
423
6.10k
}
424
425
140k
ParserStatus BalsaParser::convertResult(CallbackResult result) const {
426
140k
  return result == CallbackResult::Error ? ParserStatus::Error : status_;
427
140k
}
428
429
} // namespace Http1
430
} // namespace Http
431
} // namespace Envoy