LCOV - code coverage report
Current view: top level - source/common/http/http1 - balsa_parser.cc (source / functions) Hit Total Coverage
Test: coverage.dat Lines: 264 318 83.0 %
Date: 2024-01-05 06:35:25 Functions: 37 42 88.1 %

          Line data    Source code
       1             : #include "source/common/http/http1/balsa_parser.h"
       2             : 
       3             : #include <algorithm>
       4             : #include <cctype>
       5             : #include <cstdint>
       6             : 
       7             : #include "source/common/common/assert.h"
       8             : #include "source/common/http/headers.h"
       9             : 
      10             : #include "absl/strings/ascii.h"
      11             : #include "absl/strings/match.h"
      12             : 
      13             : namespace Envoy {
      14             : namespace Http {
      15             : namespace Http1 {
      16             : 
      17             : namespace {
      18             : 
      19             : using ::quiche::BalsaFrameEnums;
      20             : using ::quiche::BalsaHeaders;
      21             : 
      22             : constexpr absl::string_view kColonSlashSlash = "://";
      23             : // Response must start with "HTTP".
      24             : constexpr char kResponseFirstByte = 'H';
      25             : constexpr absl::string_view kHttpVersionPrefix = "HTTP/";
      26             : 
      27             : // Allowed characters for field names according to Section 5.1
      28             : // and for methods according to Section 9.1 of RFC 9110:
      29             : // https://www.rfc-editor.org/rfc/rfc9110.html
      30             : constexpr absl::string_view kValidCharacters =
      31             :     "!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~";
      32             : constexpr absl::string_view::iterator kValidCharactersBegin = kValidCharacters.begin();
      33             : constexpr absl::string_view::iterator kValidCharactersEnd = kValidCharacters.end();
      34             : 
      35         882 : bool isFirstCharacterOfValidMethod(char c) {
      36         882 :   static constexpr char kValidFirstCharacters[] = {'A', 'B', 'C', 'D', 'G', 'H', 'L', 'M',
      37         882 :                                                    'N', 'O', 'P', 'R', 'S', 'T', 'U'};
      38             : 
      39         882 :   const auto* begin = &kValidFirstCharacters[0];
      40         882 :   const auto* end = &kValidFirstCharacters[ABSL_ARRAYSIZE(kValidFirstCharacters) - 1] + 1;
      41         882 :   return std::binary_search(begin, end, c);
      42         882 : }
      43             : 
      44             : // TODO(#21245): Skip method validation altogether when UHV method validation is
      45             : // enabled.
      46         737 : bool isMethodValid(absl::string_view method, bool allow_custom_methods) {
      47         737 :   if (allow_custom_methods) {
      48           0 :     return !method.empty() &&
      49           0 :            std::all_of(method.begin(), method.end(), [](absl::string_view::value_type c) {
      50           0 :              return std::binary_search(kValidCharactersBegin, kValidCharactersEnd, c);
      51           0 :            });
      52           0 :   }
      53             : 
      54         737 :   static constexpr absl::string_view kValidMethods[] = {
      55         737 :       "ACL",       "BIND",    "CHECKOUT", "CONNECT", "COPY",       "DELETE",     "GET",
      56         737 :       "HEAD",      "LINK",    "LOCK",     "MERGE",   "MKACTIVITY", "MKCALENDAR", "MKCOL",
      57         737 :       "MOVE",      "MSEARCH", "NOTIFY",   "OPTIONS", "PATCH",      "POST",       "PROPFIND",
      58         737 :       "PROPPATCH", "PURGE",   "PUT",      "REBIND",  "REPORT",     "SEARCH",     "SOURCE",
      59         737 :       "SUBSCRIBE", "TRACE",   "UNBIND",   "UNLINK",  "UNLOCK",     "UNSUBSCRIBE"};
      60             : 
      61         737 :   const auto* begin = &kValidMethods[0];
      62         737 :   const auto* end = &kValidMethods[ABSL_ARRAYSIZE(kValidMethods) - 1] + 1;
      63         737 :   return std::binary_search(begin, end, method);
      64         737 : }
      65             : 
      66             : // This function is crafted to match the URL validation behavior of the http-parser library.
      67         667 : bool isUrlValid(absl::string_view url, bool is_connect) {
      68         667 :   if (url.empty()) {
      69           1 :     return false;
      70           1 :   }
      71             : 
      72             :   // Same set of characters are allowed for path and query.
      73        6932 :   const auto is_valid_path_query_char = [](char c) {
      74        6932 :     return c == 9 || c == 12 || ('!' <= c && c <= 126);
      75        6932 :   };
      76             : 
      77             :   // The URL may start with a path.
      78         666 :   if (auto it = url.begin(); *it == '/' || *it == '*') {
      79         615 :     ++it;
      80         615 :     return std::all_of(it, url.end(), is_valid_path_query_char);
      81         615 :   }
      82             : 
      83             :   // If method is not CONNECT, parse scheme.
      84          51 :   if (!is_connect) {
      85             :     // Scheme must start with alpha and be non-empty.
      86          25 :     auto it = url.begin();
      87          25 :     if (!std::isalpha(*it)) {
      88           8 :       return false;
      89           8 :     }
      90          17 :     ++it;
      91             :     // Scheme started with an alpha character and the rest of it is alpha, digit, '+', '-' or '.'.
      92          88 :     const auto is_scheme_suffix = [](char c) {
      93          88 :       return std::isalpha(c) || std::isdigit(c) || c == '+' || c == '-' || c == '.';
      94          88 :     };
      95          17 :     it = std::find_if_not(it, url.end(), is_scheme_suffix);
      96          17 :     url.remove_prefix(it - url.begin());
      97          17 :     if (!absl::StartsWith(url, kColonSlashSlash)) {
      98          17 :       return false;
      99          17 :     }
     100           0 :     url.remove_prefix(kColonSlashSlash.length());
     101           0 :   }
     102             : 
     103             :   // Path and query start with the first '/' or '?' character.
     104         430 :   const auto is_path_query_start = [](char c) { return c == '/' || c == '?'; };
     105             : 
     106             :   // Divide the rest of the URL into two sections: host, and path/query/fragments.
     107          26 :   auto path_query_begin = std::find_if(url.begin(), url.end(), is_path_query_start);
     108          26 :   const absl::string_view host = url.substr(0, path_query_begin - url.begin());
     109          26 :   const absl::string_view path_query = url.substr(path_query_begin - url.begin());
     110             : 
     111         185 :   const auto valid_host_char = [](char c) {
     112         185 :     return std::isalnum(c) || c == '!' || c == '$' || c == '%' || c == '&' || c == '\'' ||
     113         185 :            c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == '-' || c == '.' ||
     114         185 :            c == ':' || c == ';' || c == '=' || c == '@' || c == '[' || c == ']' || c == '_' ||
     115         185 :            c == '~';
     116         185 :   };
     117             : 
     118             :   // Match http-parser's quirk of allowing any number of '@' characters in host
     119             :   // as long as they are not consecutive.
     120          26 :   return std::all_of(host.begin(), host.end(), valid_host_char) && !absl::StrContains(host, "@@") &&
     121          26 :          std::all_of(path_query.begin(), path_query.end(), is_valid_path_query_char);
     122          51 : }
     123             : 
     124             : // Returns true if `version_input` is a valid HTTP version string as defined at
     125             : // https://www.rfc-editor.org/rfc/rfc9112.html#section-2.3, or empty (for HTTP/0.9).
     126         668 : bool isVersionValid(absl::string_view version_input) {
     127         668 :   if (version_input.empty()) {
     128          67 :     return true;
     129          67 :   }
     130             : 
     131         601 :   if (!absl::StartsWith(version_input, kHttpVersionPrefix)) {
     132          47 :     return false;
     133          47 :   }
     134         554 :   version_input.remove_prefix(kHttpVersionPrefix.size());
     135             : 
     136             :   // Version number is in the form of "[0-9].[0-9]".
     137         554 :   return version_input.size() == 3 && absl::ascii_isdigit(version_input[0]) &&
     138         554 :          version_input[1] == '.' && absl::ascii_isdigit(version_input[2]);
     139         601 : }
     140             : 
     141        1115 : bool isHeaderNameValid(absl::string_view name) {
     142       16473 :   return std::all_of(name.begin(), name.end(), [](absl::string_view::value_type c) {
     143       16473 :     return std::binary_search(kValidCharactersBegin, kValidCharactersEnd, c);
     144       16473 :   });
     145        1115 : }
     146             : 
     147             : } // anonymous namespace
     148             : 
     149             : BalsaParser::BalsaParser(MessageType type, ParserCallbacks* connection, size_t max_header_length,
     150             :                          bool enable_trailers, bool allow_custom_methods)
     151             :     : message_type_(type), connection_(connection), enable_trailers_(enable_trailers),
     152        1571 :       allow_custom_methods_(allow_custom_methods) {
     153        1571 :   ASSERT(connection_ != nullptr);
     154             : 
     155        1571 :   quiche::HttpValidationPolicy http_validation_policy;
     156        1571 :   http_validation_policy.disallow_header_continuation_lines = false;
     157        1571 :   http_validation_policy.require_header_colon = true;
     158        1571 :   http_validation_policy.disallow_multiple_content_length = true;
     159        1571 :   http_validation_policy.disallow_transfer_encoding_with_content_length = false;
     160        1571 :   http_validation_policy.validate_transfer_encoding = false;
     161        1571 :   http_validation_policy.require_content_length_if_body_required = false;
     162        1571 :   http_validation_policy.disallow_invalid_header_characters_in_response = true;
     163        1571 :   framer_.set_http_validation_policy(http_validation_policy);
     164             : 
     165        1571 :   framer_.set_balsa_headers(&headers_);
     166        1571 :   framer_.set_balsa_visitor(this);
     167        1571 :   framer_.set_max_header_length(max_header_length);
     168        1571 :   framer_.set_invalid_chars_level(quiche::BalsaFrame::InvalidCharsLevel::kError);
     169        1571 :   framer_.EnableTrailers();
     170             : 
     171        1571 :   switch (message_type_) {
     172         900 :   case MessageType::Request:
     173         900 :     framer_.set_is_request(true);
     174         900 :     break;
     175         671 :   case MessageType::Response:
     176         671 :     framer_.set_is_request(false);
     177         671 :     break;
     178        1571 :   }
     179        1571 : }
     180             : 
     181        1687 : size_t BalsaParser::execute(const char* slice, int len) {
     182        1687 :   ASSERT(status_ != ParserStatus::Error);
     183             : 
     184        1687 :   if (len > 0 && !first_byte_processed_) {
     185        1432 :     if (message_type_ == MessageType::Request && !allow_custom_methods_ &&
     186        1432 :         !isFirstCharacterOfValidMethod(*slice)) {
     187         106 :       status_ = ParserStatus::Error;
     188         106 :       error_message_ = "HPE_INVALID_METHOD";
     189         106 :       return 0;
     190         106 :     }
     191        1326 :     if (message_type_ == MessageType::Response && *slice != kResponseFirstByte) {
     192         445 :       status_ = ParserStatus::Error;
     193         445 :       error_message_ = "HPE_INVALID_CONSTANT";
     194         445 :       return 0;
     195         445 :     }
     196             : 
     197         881 :     status_ = convertResult(connection_->onMessageBegin());
     198         881 :     if (status_ == ParserStatus::Error) {
     199           0 :       return 0;
     200           0 :     }
     201             : 
     202         881 :     first_byte_processed_ = true;
     203         881 :   }
     204             : 
     205        1136 :   if (len == 0 && headers_done_ && !isChunked() &&
     206        1136 :       ((message_type_ == MessageType::Response && hasTransferEncoding()) ||
     207           0 :        !headers_.content_length_valid())) {
     208           0 :     MessageDone();
     209           0 :     return 0;
     210           0 :   }
     211             : 
     212        1136 :   if (first_byte_processed_ && len == 0) {
     213           0 :     status_ = ParserStatus::Error;
     214           0 :     error_message_ = "HPE_INVALID_EOF_STATE";
     215           0 :     return 0;
     216           0 :   }
     217             : 
     218        1136 :   return framer_.ProcessInput(slice, len);
     219        1136 : }
     220             : 
     221        1579 : void BalsaParser::resume() {
     222        1579 :   ASSERT(status_ != ParserStatus::Error);
     223        1579 :   status_ = ParserStatus::Ok;
     224        1579 : }
     225             : 
     226         267 : CallbackResult BalsaParser::pause() {
     227         267 :   ASSERT(status_ != ParserStatus::Error);
     228         267 :   status_ = ParserStatus::Paused;
     229         267 :   return CallbackResult::Success;
     230         267 : }
     231             : 
     232        2098 : ParserStatus BalsaParser::getStatus() const { return status_; }
     233             : 
     234         404 : Http::Code BalsaParser::statusCode() const {
     235         404 :   return static_cast<Http::Code>(headers_.parsed_response_code());
     236         404 : }
     237             : 
     238         354 : bool BalsaParser::isHttp11() const {
     239         354 :   if (message_type_ == MessageType::Request) {
     240         313 :     return absl::EndsWith(headers_.first_line(), Http::Headers::get().ProtocolStrings.Http11String);
     241         313 :   } else {
     242          41 :     return absl::StartsWith(headers_.first_line(),
     243          41 :                             Http::Headers::get().ProtocolStrings.Http11String);
     244          41 :   }
     245         354 : }
     246             : 
     247         112 : absl::optional<uint64_t> BalsaParser::contentLength() const {
     248         112 :   if (!headers_.content_length_valid()) {
     249         110 :     return absl::nullopt;
     250         110 :   }
     251           2 :   return headers_.content_length();
     252         112 : }
     253             : 
     254         244 : bool BalsaParser::isChunked() const { return headers_.transfer_encoding_is_chunked(); }
     255             : 
     256        1673 : absl::string_view BalsaParser::methodName() const { return headers_.request_method(); }
     257             : 
     258        3844 : absl::string_view BalsaParser::errorMessage() const { return error_message_; }
     259             : 
     260         353 : int BalsaParser::hasTransferEncoding() const {
     261         353 :   return headers_.HasHeader(Http::Headers::get().TransferEncoding);
     262         353 : }
     263             : 
     264         219 : void BalsaParser::OnRawBodyInput(absl::string_view /*input*/) {}
     265             : 
     266         110 : void BalsaParser::OnBodyChunkInput(absl::string_view input) {
     267         110 :   if (status_ == ParserStatus::Error) {
     268           0 :     return;
     269           0 :   }
     270             : 
     271         110 :   connection_->bufferBody(input.data(), input.size());
     272         110 : }
     273             : 
     274         622 : void BalsaParser::OnHeaderInput(absl::string_view /*input*/) {}
     275           0 : void BalsaParser::OnTrailerInput(absl::string_view /*input*/) {}
     276             : 
     277         452 : void BalsaParser::ProcessHeaders(const BalsaHeaders& headers) {
     278         452 :   validateAndProcessHeadersOrTrailersImpl(headers, /* trailers = */ false);
     279         452 : }
     280           0 : void BalsaParser::OnTrailers(std::unique_ptr<quiche::BalsaHeaders> trailers) {
     281           0 :   validateAndProcessHeadersOrTrailersImpl(*trailers, /* trailers = */ true);
     282           0 : }
     283             : 
     284             : void BalsaParser::OnRequestFirstLineInput(absl::string_view /*line_input*/,
     285             :                                           absl::string_view method_input,
     286             :                                           absl::string_view request_uri,
     287         737 :                                           absl::string_view version_input) {
     288         737 :   if (status_ == ParserStatus::Error) {
     289           0 :     return;
     290           0 :   }
     291         737 :   if (!isMethodValid(method_input, allow_custom_methods_)) {
     292          70 :     status_ = ParserStatus::Error;
     293          70 :     error_message_ = "HPE_INVALID_METHOD";
     294          70 :     return;
     295          70 :   }
     296         667 :   const bool is_connect = method_input == Headers::get().MethodValues.Connect;
     297         667 :   if (!isUrlValid(request_uri, is_connect)) {
     298          42 :     status_ = ParserStatus::Error;
     299          42 :     error_message_ = "HPE_INVALID_URL";
     300          42 :     return;
     301          42 :   }
     302         625 :   if (!isVersionValid(version_input)) {
     303         152 :     status_ = ParserStatus::Error;
     304         152 :     error_message_ = "HPE_INVALID_VERSION";
     305         152 :     return;
     306         152 :   }
     307         473 :   status_ = convertResult(connection_->onUrl(request_uri.data(), request_uri.size()));
     308         473 : }
     309             : 
     310             : void BalsaParser::OnResponseFirstLineInput(absl::string_view /*line_input*/,
     311             :                                            absl::string_view version_input,
     312             :                                            absl::string_view /*status_input*/,
     313          43 :                                            absl::string_view reason_input) {
     314          43 :   if (status_ == ParserStatus::Error) {
     315           0 :     return;
     316           0 :   }
     317          43 :   if (!isVersionValid(version_input)) {
     318           2 :     status_ = ParserStatus::Error;
     319           2 :     error_message_ = "HPE_INVALID_VERSION";
     320           2 :     return;
     321           2 :   }
     322          41 :   status_ = convertResult(connection_->onStatus(reason_input.data(), reason_input.size()));
     323          41 : }
     324             : 
     325         212 : void BalsaParser::OnChunkLength(size_t chunk_length) {
     326         212 :   if (status_ == ParserStatus::Error) {
     327           0 :     return;
     328           0 :   }
     329         212 :   const bool is_final_chunk = chunk_length == 0;
     330         212 :   connection_->onChunkHeader(is_final_chunk);
     331         212 : }
     332             : 
     333         212 : void BalsaParser::OnChunkExtensionInput(absl::string_view /*input*/) {}
     334             : 
     335           0 : void BalsaParser::OnInterimHeaders(std::unique_ptr<BalsaHeaders> /*headers*/) {}
     336             : 
     337         452 : void BalsaParser::HeaderDone() {
     338         452 :   if (status_ == ParserStatus::Error) {
     339          98 :     return;
     340          98 :   }
     341         354 :   headers_done_ = true;
     342         354 :   CallbackResult result = connection_->onHeadersComplete();
     343         354 :   status_ = convertResult(result);
     344         354 :   if (result == CallbackResult::NoBody || result == CallbackResult::NoBodyData) {
     345           0 :     MessageDone();
     346           0 :   }
     347         354 : }
     348             : 
     349           0 : void BalsaParser::ContinueHeaderDone() {}
     350             : 
     351         287 : void BalsaParser::MessageDone() {
     352         287 :   if (status_ == ParserStatus::Error) {
     353         137 :     return;
     354         137 :   }
     355         150 :   status_ = convertResult(connection_->onMessageComplete());
     356         150 :   framer_.Reset();
     357         150 :   first_byte_processed_ = false;
     358         150 :   headers_done_ = false;
     359         150 : }
     360             : 
     361         232 : void BalsaParser::HandleError(BalsaFrameEnums::ErrorCode error_code) {
     362         232 :   status_ = ParserStatus::Error;
     363         232 :   switch (error_code) {
     364           0 :   case BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING:
     365           0 :     error_message_ = "unsupported transfer encoding";
     366           0 :     break;
     367           0 :   case BalsaFrameEnums::INVALID_CHUNK_LENGTH:
     368           0 :     error_message_ = "HPE_INVALID_CHUNK_SIZE";
     369           0 :     break;
     370           0 :   case BalsaFrameEnums::HEADERS_TOO_LONG:
     371           0 :     error_message_ = "headers size exceeds limit";
     372           0 :     break;
     373           0 :   case BalsaFrameEnums::TRAILER_TOO_LONG:
     374           0 :     error_message_ = "trailers size exceeds limit";
     375           0 :     break;
     376           0 :   case BalsaFrameEnums::TRAILER_MISSING_COLON:
     377           0 :     error_message_ = "HPE_INVALID_HEADER_TOKEN";
     378           0 :     break;
     379          70 :   case BalsaFrameEnums::INVALID_HEADER_CHARACTER:
     380          70 :     error_message_ = "header value contains invalid chars";
     381          70 :     break;
     382           2 :   case BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS:
     383           2 :     error_message_ = "HPE_UNEXPECTED_CONTENT_LENGTH";
     384           2 :     break;
     385         160 :   default:
     386         160 :     error_message_ = BalsaFrameEnums::ErrorCodeToString(error_code);
     387         232 :   }
     388         232 : }
     389             : 
     390         109 : void BalsaParser::HandleWarning(BalsaFrameEnums::ErrorCode error_code) {
     391         109 :   if (error_code == BalsaFrameEnums::TRAILER_MISSING_COLON) {
     392           0 :     HandleError(error_code);
     393           0 :   }
     394         109 : }
     395             : 
     396             : void BalsaParser::validateAndProcessHeadersOrTrailersImpl(const quiche::BalsaHeaders& headers,
     397         452 :                                                           bool trailers) {
     398        1153 :   for (const auto& [key, value] : headers.lines()) {
     399        1153 :     if (status_ == ParserStatus::Error) {
     400          38 :       return;
     401          38 :     }
     402             : 
     403        1115 :     if (!isHeaderNameValid(key)) {
     404          24 :       status_ = ParserStatus::Error;
     405          24 :       error_message_ = "HPE_INVALID_HEADER_TOKEN";
     406          24 :       return;
     407          24 :     }
     408             : 
     409        1091 :     if (trailers && !enable_trailers_) {
     410           0 :       continue;
     411           0 :     }
     412             : 
     413        1091 :     status_ = convertResult(connection_->onHeaderField(key.data(), key.length()));
     414        1091 :     if (status_ == ParserStatus::Error) {
     415           0 :       return;
     416           0 :     }
     417             : 
     418             :     // Remove CR and LF characters to match http-parser behavior.
     419        7909 :     auto is_cr_or_lf = [](char c) { return c == '\r' || c == '\n'; };
     420        1091 :     if (std::any_of(value.begin(), value.end(), is_cr_or_lf)) {
     421          33 :       std::string value_without_cr_or_lf;
     422          33 :       value_without_cr_or_lf.reserve(value.size());
     423        1038 :       for (char c : value) {
     424        1038 :         if (!is_cr_or_lf(c)) {
     425         875 :           value_without_cr_or_lf.push_back(c);
     426         875 :         }
     427        1038 :       }
     428          33 :       status_ = convertResult(connection_->onHeaderValue(value_without_cr_or_lf.data(),
     429          33 :                                                          value_without_cr_or_lf.length()));
     430        1058 :     } else {
     431             :       // No need to copy if header value does not contain CR or LF.
     432        1058 :       status_ = convertResult(connection_->onHeaderValue(value.data(), value.length()));
     433        1058 :     }
     434        1091 :   }
     435         452 : }
     436             : 
     437        4081 : ParserStatus BalsaParser::convertResult(CallbackResult result) const {
     438        4081 :   return result == CallbackResult::Error ? ParserStatus::Error : status_;
     439        4081 : }
     440             : 
     441             : } // namespace Http1
     442             : } // namespace Http
     443             : } // namespace Envoy

Generated by: LCOV version 1.15