Line data Source code
1 : #include "source/common/http/http1/balsa_parser.h"
2 :
3 : #include <algorithm>
4 : #include <cctype>
5 : #include <cstdint>
6 :
7 : #include "source/common/common/assert.h"
8 : #include "source/common/http/headers.h"
9 :
10 : #include "absl/strings/ascii.h"
11 : #include "absl/strings/match.h"
12 :
13 : namespace Envoy {
14 : namespace Http {
15 : namespace Http1 {
16 :
17 : namespace {
18 :
19 : using ::quiche::BalsaFrameEnums;
20 : using ::quiche::BalsaHeaders;
21 :
22 : constexpr absl::string_view kColonSlashSlash = "://";
23 : // Response must start with "HTTP".
24 : constexpr char kResponseFirstByte = 'H';
25 : constexpr absl::string_view kHttpVersionPrefix = "HTTP/";
26 :
27 : // Allowed characters for field names according to Section 5.1
28 : // and for methods according to Section 9.1 of RFC 9110:
29 : // https://www.rfc-editor.org/rfc/rfc9110.html
30 : constexpr absl::string_view kValidCharacters =
31 : "!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~";
32 : constexpr absl::string_view::iterator kValidCharactersBegin = kValidCharacters.begin();
33 : constexpr absl::string_view::iterator kValidCharactersEnd = kValidCharacters.end();
34 :
35 882 : bool isFirstCharacterOfValidMethod(char c) {
36 882 : static constexpr char kValidFirstCharacters[] = {'A', 'B', 'C', 'D', 'G', 'H', 'L', 'M',
37 882 : 'N', 'O', 'P', 'R', 'S', 'T', 'U'};
38 :
39 882 : const auto* begin = &kValidFirstCharacters[0];
40 882 : const auto* end = &kValidFirstCharacters[ABSL_ARRAYSIZE(kValidFirstCharacters) - 1] + 1;
41 882 : return std::binary_search(begin, end, c);
42 882 : }
43 :
44 : // TODO(#21245): Skip method validation altogether when UHV method validation is
45 : // enabled.
46 737 : bool isMethodValid(absl::string_view method, bool allow_custom_methods) {
47 737 : if (allow_custom_methods) {
48 0 : return !method.empty() &&
49 0 : std::all_of(method.begin(), method.end(), [](absl::string_view::value_type c) {
50 0 : return std::binary_search(kValidCharactersBegin, kValidCharactersEnd, c);
51 0 : });
52 0 : }
53 :
54 737 : static constexpr absl::string_view kValidMethods[] = {
55 737 : "ACL", "BIND", "CHECKOUT", "CONNECT", "COPY", "DELETE", "GET",
56 737 : "HEAD", "LINK", "LOCK", "MERGE", "MKACTIVITY", "MKCALENDAR", "MKCOL",
57 737 : "MOVE", "MSEARCH", "NOTIFY", "OPTIONS", "PATCH", "POST", "PROPFIND",
58 737 : "PROPPATCH", "PURGE", "PUT", "REBIND", "REPORT", "SEARCH", "SOURCE",
59 737 : "SUBSCRIBE", "TRACE", "UNBIND", "UNLINK", "UNLOCK", "UNSUBSCRIBE"};
60 :
61 737 : const auto* begin = &kValidMethods[0];
62 737 : const auto* end = &kValidMethods[ABSL_ARRAYSIZE(kValidMethods) - 1] + 1;
63 737 : return std::binary_search(begin, end, method);
64 737 : }
65 :
66 : // This function is crafted to match the URL validation behavior of the http-parser library.
67 667 : bool isUrlValid(absl::string_view url, bool is_connect) {
68 667 : if (url.empty()) {
69 1 : return false;
70 1 : }
71 :
72 : // Same set of characters are allowed for path and query.
73 6932 : const auto is_valid_path_query_char = [](char c) {
74 6932 : return c == 9 || c == 12 || ('!' <= c && c <= 126);
75 6932 : };
76 :
77 : // The URL may start with a path.
78 666 : if (auto it = url.begin(); *it == '/' || *it == '*') {
79 615 : ++it;
80 615 : return std::all_of(it, url.end(), is_valid_path_query_char);
81 615 : }
82 :
83 : // If method is not CONNECT, parse scheme.
84 51 : if (!is_connect) {
85 : // Scheme must start with alpha and be non-empty.
86 25 : auto it = url.begin();
87 25 : if (!std::isalpha(*it)) {
88 8 : return false;
89 8 : }
90 17 : ++it;
91 : // Scheme started with an alpha character and the rest of it is alpha, digit, '+', '-' or '.'.
92 88 : const auto is_scheme_suffix = [](char c) {
93 88 : return std::isalpha(c) || std::isdigit(c) || c == '+' || c == '-' || c == '.';
94 88 : };
95 17 : it = std::find_if_not(it, url.end(), is_scheme_suffix);
96 17 : url.remove_prefix(it - url.begin());
97 17 : if (!absl::StartsWith(url, kColonSlashSlash)) {
98 17 : return false;
99 17 : }
100 0 : url.remove_prefix(kColonSlashSlash.length());
101 0 : }
102 :
103 : // Path and query start with the first '/' or '?' character.
104 430 : const auto is_path_query_start = [](char c) { return c == '/' || c == '?'; };
105 :
106 : // Divide the rest of the URL into two sections: host, and path/query/fragments.
107 26 : auto path_query_begin = std::find_if(url.begin(), url.end(), is_path_query_start);
108 26 : const absl::string_view host = url.substr(0, path_query_begin - url.begin());
109 26 : const absl::string_view path_query = url.substr(path_query_begin - url.begin());
110 :
111 185 : const auto valid_host_char = [](char c) {
112 185 : return std::isalnum(c) || c == '!' || c == '$' || c == '%' || c == '&' || c == '\'' ||
113 185 : c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == '-' || c == '.' ||
114 185 : c == ':' || c == ';' || c == '=' || c == '@' || c == '[' || c == ']' || c == '_' ||
115 185 : c == '~';
116 185 : };
117 :
118 : // Match http-parser's quirk of allowing any number of '@' characters in host
119 : // as long as they are not consecutive.
120 26 : return std::all_of(host.begin(), host.end(), valid_host_char) && !absl::StrContains(host, "@@") &&
121 26 : std::all_of(path_query.begin(), path_query.end(), is_valid_path_query_char);
122 51 : }
123 :
124 : // Returns true if `version_input` is a valid HTTP version string as defined at
125 : // https://www.rfc-editor.org/rfc/rfc9112.html#section-2.3, or empty (for HTTP/0.9).
126 668 : bool isVersionValid(absl::string_view version_input) {
127 668 : if (version_input.empty()) {
128 67 : return true;
129 67 : }
130 :
131 601 : if (!absl::StartsWith(version_input, kHttpVersionPrefix)) {
132 47 : return false;
133 47 : }
134 554 : version_input.remove_prefix(kHttpVersionPrefix.size());
135 :
136 : // Version number is in the form of "[0-9].[0-9]".
137 554 : return version_input.size() == 3 && absl::ascii_isdigit(version_input[0]) &&
138 554 : version_input[1] == '.' && absl::ascii_isdigit(version_input[2]);
139 601 : }
140 :
141 1115 : bool isHeaderNameValid(absl::string_view name) {
142 16473 : return std::all_of(name.begin(), name.end(), [](absl::string_view::value_type c) {
143 16473 : return std::binary_search(kValidCharactersBegin, kValidCharactersEnd, c);
144 16473 : });
145 1115 : }
146 :
147 : } // anonymous namespace
148 :
149 : BalsaParser::BalsaParser(MessageType type, ParserCallbacks* connection, size_t max_header_length,
150 : bool enable_trailers, bool allow_custom_methods)
151 : : message_type_(type), connection_(connection), enable_trailers_(enable_trailers),
152 1571 : allow_custom_methods_(allow_custom_methods) {
153 1571 : ASSERT(connection_ != nullptr);
154 :
155 1571 : quiche::HttpValidationPolicy http_validation_policy;
156 1571 : http_validation_policy.disallow_header_continuation_lines = false;
157 1571 : http_validation_policy.require_header_colon = true;
158 1571 : http_validation_policy.disallow_multiple_content_length = true;
159 1571 : http_validation_policy.disallow_transfer_encoding_with_content_length = false;
160 1571 : http_validation_policy.validate_transfer_encoding = false;
161 1571 : http_validation_policy.require_content_length_if_body_required = false;
162 1571 : http_validation_policy.disallow_invalid_header_characters_in_response = true;
163 1571 : framer_.set_http_validation_policy(http_validation_policy);
164 :
165 1571 : framer_.set_balsa_headers(&headers_);
166 1571 : framer_.set_balsa_visitor(this);
167 1571 : framer_.set_max_header_length(max_header_length);
168 1571 : framer_.set_invalid_chars_level(quiche::BalsaFrame::InvalidCharsLevel::kError);
169 1571 : framer_.EnableTrailers();
170 :
171 1571 : switch (message_type_) {
172 900 : case MessageType::Request:
173 900 : framer_.set_is_request(true);
174 900 : break;
175 671 : case MessageType::Response:
176 671 : framer_.set_is_request(false);
177 671 : break;
178 1571 : }
179 1571 : }
180 :
181 1687 : size_t BalsaParser::execute(const char* slice, int len) {
182 1687 : ASSERT(status_ != ParserStatus::Error);
183 :
184 1687 : if (len > 0 && !first_byte_processed_) {
185 1432 : if (message_type_ == MessageType::Request && !allow_custom_methods_ &&
186 1432 : !isFirstCharacterOfValidMethod(*slice)) {
187 106 : status_ = ParserStatus::Error;
188 106 : error_message_ = "HPE_INVALID_METHOD";
189 106 : return 0;
190 106 : }
191 1326 : if (message_type_ == MessageType::Response && *slice != kResponseFirstByte) {
192 445 : status_ = ParserStatus::Error;
193 445 : error_message_ = "HPE_INVALID_CONSTANT";
194 445 : return 0;
195 445 : }
196 :
197 881 : status_ = convertResult(connection_->onMessageBegin());
198 881 : if (status_ == ParserStatus::Error) {
199 0 : return 0;
200 0 : }
201 :
202 881 : first_byte_processed_ = true;
203 881 : }
204 :
205 1136 : if (len == 0 && headers_done_ && !isChunked() &&
206 1136 : ((message_type_ == MessageType::Response && hasTransferEncoding()) ||
207 0 : !headers_.content_length_valid())) {
208 0 : MessageDone();
209 0 : return 0;
210 0 : }
211 :
212 1136 : if (first_byte_processed_ && len == 0) {
213 0 : status_ = ParserStatus::Error;
214 0 : error_message_ = "HPE_INVALID_EOF_STATE";
215 0 : return 0;
216 0 : }
217 :
218 1136 : return framer_.ProcessInput(slice, len);
219 1136 : }
220 :
221 1579 : void BalsaParser::resume() {
222 1579 : ASSERT(status_ != ParserStatus::Error);
223 1579 : status_ = ParserStatus::Ok;
224 1579 : }
225 :
226 267 : CallbackResult BalsaParser::pause() {
227 267 : ASSERT(status_ != ParserStatus::Error);
228 267 : status_ = ParserStatus::Paused;
229 267 : return CallbackResult::Success;
230 267 : }
231 :
232 2098 : ParserStatus BalsaParser::getStatus() const { return status_; }
233 :
234 404 : Http::Code BalsaParser::statusCode() const {
235 404 : return static_cast<Http::Code>(headers_.parsed_response_code());
236 404 : }
237 :
238 354 : bool BalsaParser::isHttp11() const {
239 354 : if (message_type_ == MessageType::Request) {
240 313 : return absl::EndsWith(headers_.first_line(), Http::Headers::get().ProtocolStrings.Http11String);
241 313 : } else {
242 41 : return absl::StartsWith(headers_.first_line(),
243 41 : Http::Headers::get().ProtocolStrings.Http11String);
244 41 : }
245 354 : }
246 :
247 112 : absl::optional<uint64_t> BalsaParser::contentLength() const {
248 112 : if (!headers_.content_length_valid()) {
249 110 : return absl::nullopt;
250 110 : }
251 2 : return headers_.content_length();
252 112 : }
253 :
254 244 : bool BalsaParser::isChunked() const { return headers_.transfer_encoding_is_chunked(); }
255 :
256 1673 : absl::string_view BalsaParser::methodName() const { return headers_.request_method(); }
257 :
258 3844 : absl::string_view BalsaParser::errorMessage() const { return error_message_; }
259 :
260 353 : int BalsaParser::hasTransferEncoding() const {
261 353 : return headers_.HasHeader(Http::Headers::get().TransferEncoding);
262 353 : }
263 :
264 219 : void BalsaParser::OnRawBodyInput(absl::string_view /*input*/) {}
265 :
266 110 : void BalsaParser::OnBodyChunkInput(absl::string_view input) {
267 110 : if (status_ == ParserStatus::Error) {
268 0 : return;
269 0 : }
270 :
271 110 : connection_->bufferBody(input.data(), input.size());
272 110 : }
273 :
274 622 : void BalsaParser::OnHeaderInput(absl::string_view /*input*/) {}
275 0 : void BalsaParser::OnTrailerInput(absl::string_view /*input*/) {}
276 :
277 452 : void BalsaParser::ProcessHeaders(const BalsaHeaders& headers) {
278 452 : validateAndProcessHeadersOrTrailersImpl(headers, /* trailers = */ false);
279 452 : }
280 0 : void BalsaParser::OnTrailers(std::unique_ptr<quiche::BalsaHeaders> trailers) {
281 0 : validateAndProcessHeadersOrTrailersImpl(*trailers, /* trailers = */ true);
282 0 : }
283 :
284 : void BalsaParser::OnRequestFirstLineInput(absl::string_view /*line_input*/,
285 : absl::string_view method_input,
286 : absl::string_view request_uri,
287 737 : absl::string_view version_input) {
288 737 : if (status_ == ParserStatus::Error) {
289 0 : return;
290 0 : }
291 737 : if (!isMethodValid(method_input, allow_custom_methods_)) {
292 70 : status_ = ParserStatus::Error;
293 70 : error_message_ = "HPE_INVALID_METHOD";
294 70 : return;
295 70 : }
296 667 : const bool is_connect = method_input == Headers::get().MethodValues.Connect;
297 667 : if (!isUrlValid(request_uri, is_connect)) {
298 42 : status_ = ParserStatus::Error;
299 42 : error_message_ = "HPE_INVALID_URL";
300 42 : return;
301 42 : }
302 625 : if (!isVersionValid(version_input)) {
303 152 : status_ = ParserStatus::Error;
304 152 : error_message_ = "HPE_INVALID_VERSION";
305 152 : return;
306 152 : }
307 473 : status_ = convertResult(connection_->onUrl(request_uri.data(), request_uri.size()));
308 473 : }
309 :
310 : void BalsaParser::OnResponseFirstLineInput(absl::string_view /*line_input*/,
311 : absl::string_view version_input,
312 : absl::string_view /*status_input*/,
313 43 : absl::string_view reason_input) {
314 43 : if (status_ == ParserStatus::Error) {
315 0 : return;
316 0 : }
317 43 : if (!isVersionValid(version_input)) {
318 2 : status_ = ParserStatus::Error;
319 2 : error_message_ = "HPE_INVALID_VERSION";
320 2 : return;
321 2 : }
322 41 : status_ = convertResult(connection_->onStatus(reason_input.data(), reason_input.size()));
323 41 : }
324 :
325 212 : void BalsaParser::OnChunkLength(size_t chunk_length) {
326 212 : if (status_ == ParserStatus::Error) {
327 0 : return;
328 0 : }
329 212 : const bool is_final_chunk = chunk_length == 0;
330 212 : connection_->onChunkHeader(is_final_chunk);
331 212 : }
332 :
333 212 : void BalsaParser::OnChunkExtensionInput(absl::string_view /*input*/) {}
334 :
335 0 : void BalsaParser::OnInterimHeaders(std::unique_ptr<BalsaHeaders> /*headers*/) {}
336 :
337 452 : void BalsaParser::HeaderDone() {
338 452 : if (status_ == ParserStatus::Error) {
339 98 : return;
340 98 : }
341 354 : headers_done_ = true;
342 354 : CallbackResult result = connection_->onHeadersComplete();
343 354 : status_ = convertResult(result);
344 354 : if (result == CallbackResult::NoBody || result == CallbackResult::NoBodyData) {
345 0 : MessageDone();
346 0 : }
347 354 : }
348 :
349 0 : void BalsaParser::ContinueHeaderDone() {}
350 :
351 287 : void BalsaParser::MessageDone() {
352 287 : if (status_ == ParserStatus::Error) {
353 137 : return;
354 137 : }
355 150 : status_ = convertResult(connection_->onMessageComplete());
356 150 : framer_.Reset();
357 150 : first_byte_processed_ = false;
358 150 : headers_done_ = false;
359 150 : }
360 :
361 232 : void BalsaParser::HandleError(BalsaFrameEnums::ErrorCode error_code) {
362 232 : status_ = ParserStatus::Error;
363 232 : switch (error_code) {
364 0 : case BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING:
365 0 : error_message_ = "unsupported transfer encoding";
366 0 : break;
367 0 : case BalsaFrameEnums::INVALID_CHUNK_LENGTH:
368 0 : error_message_ = "HPE_INVALID_CHUNK_SIZE";
369 0 : break;
370 0 : case BalsaFrameEnums::HEADERS_TOO_LONG:
371 0 : error_message_ = "headers size exceeds limit";
372 0 : break;
373 0 : case BalsaFrameEnums::TRAILER_TOO_LONG:
374 0 : error_message_ = "trailers size exceeds limit";
375 0 : break;
376 0 : case BalsaFrameEnums::TRAILER_MISSING_COLON:
377 0 : error_message_ = "HPE_INVALID_HEADER_TOKEN";
378 0 : break;
379 70 : case BalsaFrameEnums::INVALID_HEADER_CHARACTER:
380 70 : error_message_ = "header value contains invalid chars";
381 70 : break;
382 2 : case BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS:
383 2 : error_message_ = "HPE_UNEXPECTED_CONTENT_LENGTH";
384 2 : break;
385 160 : default:
386 160 : error_message_ = BalsaFrameEnums::ErrorCodeToString(error_code);
387 232 : }
388 232 : }
389 :
390 109 : void BalsaParser::HandleWarning(BalsaFrameEnums::ErrorCode error_code) {
391 109 : if (error_code == BalsaFrameEnums::TRAILER_MISSING_COLON) {
392 0 : HandleError(error_code);
393 0 : }
394 109 : }
395 :
396 : void BalsaParser::validateAndProcessHeadersOrTrailersImpl(const quiche::BalsaHeaders& headers,
397 452 : bool trailers) {
398 1153 : for (const auto& [key, value] : headers.lines()) {
399 1153 : if (status_ == ParserStatus::Error) {
400 38 : return;
401 38 : }
402 :
403 1115 : if (!isHeaderNameValid(key)) {
404 24 : status_ = ParserStatus::Error;
405 24 : error_message_ = "HPE_INVALID_HEADER_TOKEN";
406 24 : return;
407 24 : }
408 :
409 1091 : if (trailers && !enable_trailers_) {
410 0 : continue;
411 0 : }
412 :
413 1091 : status_ = convertResult(connection_->onHeaderField(key.data(), key.length()));
414 1091 : if (status_ == ParserStatus::Error) {
415 0 : return;
416 0 : }
417 :
418 : // Remove CR and LF characters to match http-parser behavior.
419 7909 : auto is_cr_or_lf = [](char c) { return c == '\r' || c == '\n'; };
420 1091 : if (std::any_of(value.begin(), value.end(), is_cr_or_lf)) {
421 33 : std::string value_without_cr_or_lf;
422 33 : value_without_cr_or_lf.reserve(value.size());
423 1038 : for (char c : value) {
424 1038 : if (!is_cr_or_lf(c)) {
425 875 : value_without_cr_or_lf.push_back(c);
426 875 : }
427 1038 : }
428 33 : status_ = convertResult(connection_->onHeaderValue(value_without_cr_or_lf.data(),
429 33 : value_without_cr_or_lf.length()));
430 1058 : } else {
431 : // No need to copy if header value does not contain CR or LF.
432 1058 : status_ = convertResult(connection_->onHeaderValue(value.data(), value.length()));
433 1058 : }
434 1091 : }
435 452 : }
436 :
437 4081 : ParserStatus BalsaParser::convertResult(CallbackResult result) const {
438 4081 : return result == CallbackResult::Error ? ParserStatus::Error : status_;
439 4081 : }
440 :
441 : } // namespace Http1
442 : } // namespace Http
443 : } // namespace Envoy
|