/proc/self/cwd/source/common/http/http1/balsa_parser.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include "source/common/http/http1/balsa_parser.h" |
2 | | |
3 | | #include <algorithm> |
4 | | #include <cctype> |
5 | | #include <cstdint> |
6 | | |
7 | | #include "source/common/common/assert.h" |
8 | | #include "source/common/http/headers.h" |
9 | | |
10 | | #include "absl/strings/ascii.h" |
11 | | #include "absl/strings/match.h" |
12 | | |
13 | | namespace Envoy { |
14 | | namespace Http { |
15 | | namespace Http1 { |
16 | | |
17 | | namespace { |
18 | | |
19 | | using ::quiche::BalsaFrameEnums; |
20 | | using ::quiche::BalsaHeaders; |
21 | | |
22 | | constexpr absl::string_view kColonSlashSlash = "://"; |
23 | | // Response must start with "HTTP". |
24 | | constexpr char kResponseFirstByte = 'H'; |
25 | | constexpr absl::string_view kHttpVersionPrefix = "HTTP/"; |
26 | | |
27 | | // Allowed characters for field names according to Section 5.1 |
28 | | // and for methods according to Section 9.1 of RFC 9110: |
29 | | // https://www.rfc-editor.org/rfc/rfc9110.html |
30 | | constexpr absl::string_view kValidCharacters = |
31 | | "!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~"; |
32 | | constexpr absl::string_view::iterator kValidCharactersBegin = kValidCharacters.begin(); |
33 | | constexpr absl::string_view::iterator kValidCharactersEnd = kValidCharacters.end(); |
34 | | |
35 | 7.08k | bool isFirstCharacterOfValidMethod(char c) { |
36 | 7.08k | static constexpr char kValidFirstCharacters[] = {'A', 'B', 'C', 'D', 'G', 'H', 'L', 'M', |
37 | 7.08k | 'N', 'O', 'P', 'R', 'S', 'T', 'U'}; |
38 | | |
39 | 7.08k | const auto* begin = &kValidFirstCharacters[0]; |
40 | 7.08k | const auto* end = &kValidFirstCharacters[ABSL_ARRAYSIZE(kValidFirstCharacters) - 1] + 1; |
41 | 7.08k | return std::binary_search(begin, end, c); |
42 | 7.08k | } |
43 | | |
44 | | // TODO(#21245): Skip method validation altogether when UHV method validation is |
45 | | // enabled. |
46 | 5.94k | bool isMethodValid(absl::string_view method, bool allow_custom_methods) { |
47 | 5.94k | if (allow_custom_methods) { |
48 | 32 | return !method.empty() && |
49 | 147 | std::all_of(method.begin(), method.end(), [](absl::string_view::value_type c) { |
50 | 147 | return std::binary_search(kValidCharactersBegin, kValidCharactersEnd, c); |
51 | 147 | }); |
52 | 32 | } |
53 | | |
54 | 5.91k | static constexpr absl::string_view kValidMethods[] = { |
55 | 5.91k | "ACL", "BIND", "CHECKOUT", "CONNECT", "COPY", "DELETE", "GET", |
56 | 5.91k | "HEAD", "LINK", "LOCK", "MERGE", "MKACTIVITY", "MKCALENDAR", "MKCOL", |
57 | 5.91k | "MOVE", "MSEARCH", "NOTIFY", "OPTIONS", "PATCH", "POST", "PROPFIND", |
58 | 5.91k | "PROPPATCH", "PURGE", "PUT", "REBIND", "REPORT", "SEARCH", "SOURCE", |
59 | 5.91k | "SUBSCRIBE", "TRACE", "UNBIND", "UNLINK", "UNLOCK", "UNSUBSCRIBE"}; |
60 | | |
61 | 5.91k | const auto* begin = &kValidMethods[0]; |
62 | 5.91k | const auto* end = &kValidMethods[ABSL_ARRAYSIZE(kValidMethods) - 1] + 1; |
63 | 5.91k | return std::binary_search(begin, end, method); |
64 | 5.94k | } |
65 | | |
66 | | // This function is crafted to match the URL validation behavior of the http-parser library. |
67 | 4.50k | bool isUrlValid(absl::string_view url, bool is_connect) { |
68 | 4.50k | if (url.empty()) { |
69 | 7 | return false; |
70 | 7 | } |
71 | | |
72 | | // Same set of characters are allowed for path and query. |
73 | 2.68M | const auto is_valid_path_query_char = [](char c) { |
74 | 2.68M | return c == 9 || c == 12 || ('!' <= c && c <= 126); |
75 | 2.68M | }; |
76 | | |
77 | | // The URL may start with a path. |
78 | 4.50k | if (auto it = url.begin(); *it == '/' || *it == '*') { |
79 | 2.98k | ++it; |
80 | 2.98k | return std::all_of(it, url.end(), is_valid_path_query_char); |
81 | 2.98k | } |
82 | | |
83 | | // If method is not CONNECT, parse scheme. |
84 | 1.52k | if (!is_connect) { |
85 | | // Scheme must start with alpha and be non-empty. |
86 | 1.38k | auto it = url.begin(); |
87 | 1.38k | if (!std::isalpha(*it)) { |
88 | 12 | return false; |
89 | 12 | } |
90 | 1.37k | ++it; |
91 | | // Scheme started with an alpha character and the rest of it is alpha, digit, '+', '-' or '.'. |
92 | 405k | const auto is_scheme_suffix = [](char c) { |
93 | 405k | return std::isalpha(c) || std::isdigit(c) || c == '+' || c == '-' || c == '.'; |
94 | 405k | }; |
95 | 1.37k | it = std::find_if_not(it, url.end(), is_scheme_suffix); |
96 | 1.37k | url.remove_prefix(it - url.begin()); |
97 | 1.37k | if (!absl::StartsWith(url, kColonSlashSlash)) { |
98 | 115 | return false; |
99 | 115 | } |
100 | 1.25k | url.remove_prefix(kColonSlashSlash.length()); |
101 | 1.25k | } |
102 | | |
103 | | // Path and query start with the first '/' or '?' character. |
104 | 1.48M | const auto is_path_query_start = [](char c) { return c == '/' || c == '?'; }; |
105 | | |
106 | | // Divide the rest of the URL into two sections: host, and path/query/fragments. |
107 | 1.39k | auto path_query_begin = std::find_if(url.begin(), url.end(), is_path_query_start); |
108 | 1.39k | const absl::string_view host = url.substr(0, path_query_begin - url.begin()); |
109 | 1.39k | const absl::string_view path_query = url.substr(path_query_begin - url.begin()); |
110 | | |
111 | 1.42M | const auto valid_host_char = [](char c) { |
112 | 1.42M | return std::isalnum(c) || c == '!' || c == '$' || c == '%' || c == '&' || c == '\'' || |
113 | 1.42M | c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == '-' || c == '.' || |
114 | 1.42M | c == ':' || c == ';' || c == '=' || c == '@' || c == '[' || c == ']' || c == '_' || |
115 | 1.42M | c == '~'; |
116 | 1.42M | }; |
117 | | |
118 | | // Match http-parser's quirk of allowing any number of '@' characters in host |
119 | | // as long as they are not consecutive. |
120 | 1.39k | return std::all_of(host.begin(), host.end(), valid_host_char) && !absl::StrContains(host, "@@") && |
121 | 1.39k | std::all_of(path_query.begin(), path_query.end(), is_valid_path_query_char); |
122 | 1.52k | } |
123 | | |
124 | | // Returns true if `version_input` is a valid HTTP version string as defined at |
125 | | // https://www.rfc-editor.org/rfc/rfc9112.html#section-2.3, or empty (for HTTP/0.9). |
126 | 5.33k | bool isVersionValid(absl::string_view version_input) { |
127 | 5.33k | if (version_input.empty()) { |
128 | 2.13k | return true; |
129 | 2.13k | } |
130 | | |
131 | 3.19k | if (!absl::StartsWith(version_input, kHttpVersionPrefix)) { |
132 | 765 | return false; |
133 | 765 | } |
134 | 2.42k | version_input.remove_prefix(kHttpVersionPrefix.size()); |
135 | | |
136 | | // Version number is in the form of "[0-9].[0-9]". |
137 | 2.42k | return version_input.size() == 3 && absl::ascii_isdigit(version_input[0]) && |
138 | 2.42k | version_input[1] == '.' && absl::ascii_isdigit(version_input[2]); |
139 | 3.19k | } |
140 | | |
141 | 61.4k | bool isHeaderNameValid(absl::string_view name) { |
142 | 717k | return std::all_of(name.begin(), name.end(), [](absl::string_view::value_type c) { |
143 | 717k | return std::binary_search(kValidCharactersBegin, kValidCharactersEnd, c); |
144 | 717k | }); |
145 | 61.4k | } |
146 | | |
147 | | } // anonymous namespace |
148 | | |
149 | | BalsaParser::BalsaParser(MessageType type, ParserCallbacks* connection, size_t max_header_length, |
150 | | bool enable_trailers, bool allow_custom_methods) |
151 | | : message_type_(type), connection_(connection), enable_trailers_(enable_trailers), |
152 | 19.3k | allow_custom_methods_(allow_custom_methods) { |
153 | 19.3k | ASSERT(connection_ != nullptr); |
154 | | |
155 | 19.3k | quiche::HttpValidationPolicy http_validation_policy; |
156 | 19.3k | http_validation_policy.disallow_header_continuation_lines = true; |
157 | 19.3k | http_validation_policy.require_header_colon = true; |
158 | 19.3k | http_validation_policy.disallow_multiple_content_length = true; |
159 | 19.3k | http_validation_policy.disallow_transfer_encoding_with_content_length = false; |
160 | 19.3k | http_validation_policy.validate_transfer_encoding = false; |
161 | 19.3k | http_validation_policy.require_content_length_if_body_required = false; |
162 | 19.3k | http_validation_policy.disallow_invalid_header_characters_in_response = true; |
163 | 19.3k | framer_.set_http_validation_policy(http_validation_policy); |
164 | | |
165 | 19.3k | framer_.set_balsa_headers(&headers_); |
166 | 19.3k | framer_.set_balsa_visitor(this); |
167 | 19.3k | framer_.set_max_header_length(max_header_length); |
168 | 19.3k | framer_.set_invalid_chars_level(quiche::BalsaFrame::InvalidCharsLevel::kError); |
169 | 19.3k | framer_.EnableTrailers(); |
170 | | |
171 | 19.3k | switch (message_type_) { |
172 | 10.0k | case MessageType::Request: |
173 | 10.0k | framer_.set_is_request(true); |
174 | 10.0k | break; |
175 | 9.29k | case MessageType::Response: |
176 | 9.29k | framer_.set_is_request(false); |
177 | 9.29k | break; |
178 | 19.3k | } |
179 | 19.3k | } |
180 | | |
181 | 23.1k | size_t BalsaParser::execute(const char* slice, int len) { |
182 | 23.1k | ASSERT(status_ != ParserStatus::Error); |
183 | | |
184 | 23.1k | if (len > 0 && !first_byte_processed_) { |
185 | 11.8k | if (message_type_ == MessageType::Request && !allow_custom_methods_ && |
186 | 11.8k | !isFirstCharacterOfValidMethod(*slice)) { |
187 | 450 | status_ = ParserStatus::Error; |
188 | 450 | error_message_ = "HPE_INVALID_METHOD"; |
189 | 450 | return 0; |
190 | 450 | } |
191 | 11.4k | if (message_type_ == MessageType::Response && *slice != kResponseFirstByte) { |
192 | 3.49k | status_ = ParserStatus::Error; |
193 | 3.49k | error_message_ = "HPE_INVALID_CONSTANT"; |
194 | 3.49k | return 0; |
195 | 3.49k | } |
196 | | |
197 | 7.90k | status_ = convertResult(connection_->onMessageBegin()); |
198 | 7.90k | if (status_ == ParserStatus::Error) { |
199 | 14 | return 0; |
200 | 14 | } |
201 | | |
202 | 7.89k | first_byte_processed_ = true; |
203 | 7.89k | } |
204 | | |
205 | 19.2k | if (len == 0 && headers_done_ && !isChunked() && |
206 | 19.2k | ((message_type_ == MessageType::Response && hasTransferEncoding()) || |
207 | 2 | !headers_.content_length_valid())) { |
208 | 2 | MessageDone(); |
209 | 2 | return 0; |
210 | 2 | } |
211 | | |
212 | 19.2k | if (first_byte_processed_ && len == 0) { |
213 | 6 | status_ = ParserStatus::Error; |
214 | 6 | error_message_ = "HPE_INVALID_EOF_STATE"; |
215 | 6 | return 0; |
216 | 6 | } |
217 | | |
218 | 19.1k | return framer_.ProcessInput(slice, len); |
219 | 19.2k | } |
220 | | |
221 | 21.8k | void BalsaParser::resume() { |
222 | 21.8k | ASSERT(status_ != ParserStatus::Error); |
223 | 21.8k | status_ = ParserStatus::Ok; |
224 | 21.8k | } |
225 | | |
226 | 786 | CallbackResult BalsaParser::pause() { |
227 | 786 | ASSERT(status_ != ParserStatus::Error); |
228 | 786 | status_ = ParserStatus::Paused; |
229 | 786 | return CallbackResult::Success; |
230 | 786 | } |
231 | | |
232 | 34.0k | ParserStatus BalsaParser::getStatus() const { return status_; } |
233 | | |
234 | 339 | Http::Code BalsaParser::statusCode() const { |
235 | 339 | return static_cast<Http::Code>(headers_.parsed_response_code()); |
236 | 339 | } |
237 | | |
238 | 4.36k | bool BalsaParser::isHttp11() const { |
239 | 4.36k | if (message_type_ == MessageType::Request) { |
240 | 4.08k | return absl::EndsWith(headers_.first_line(), Http::Headers::get().ProtocolStrings.Http11String); |
241 | 4.08k | } else { |
242 | 283 | return absl::StartsWith(headers_.first_line(), |
243 | 283 | Http::Headers::get().ProtocolStrings.Http11String); |
244 | 283 | } |
245 | 4.36k | } |
246 | | |
247 | 1.14k | absl::optional<uint64_t> BalsaParser::contentLength() const { |
248 | 1.14k | if (!headers_.content_length_valid()) { |
249 | 687 | return absl::nullopt; |
250 | 687 | } |
251 | 456 | return headers_.content_length(); |
252 | 1.14k | } |
253 | | |
254 | 1.85k | bool BalsaParser::isChunked() const { return headers_.transfer_encoding_is_chunked(); } |
255 | | |
256 | 19.0k | absl::string_view BalsaParser::methodName() const { return headers_.request_method(); } |
257 | | |
258 | 27.4k | absl::string_view BalsaParser::errorMessage() const { return error_message_; } |
259 | | |
260 | 4.35k | int BalsaParser::hasTransferEncoding() const { |
261 | 4.35k | return headers_.HasHeader(Http::Headers::get().TransferEncoding); |
262 | 4.35k | } |
263 | | |
264 | 17.3k | void BalsaParser::OnRawBodyInput(absl::string_view /*input*/) {} |
265 | | |
266 | 16.3k | void BalsaParser::OnBodyChunkInput(absl::string_view input) { |
267 | 16.3k | if (status_ == ParserStatus::Error) { |
268 | 0 | return; |
269 | 0 | } |
270 | | |
271 | 16.3k | connection_->bufferBody(input.data(), input.size()); |
272 | 16.3k | } |
273 | | |
274 | 6.75k | void BalsaParser::OnHeaderInput(absl::string_view /*input*/) {} |
275 | 121 | void BalsaParser::OnTrailerInput(absl::string_view /*input*/) {} |
276 | 204k | void BalsaParser::OnHeader(absl::string_view /*key*/, absl::string_view /*value*/) {} |
277 | | |
278 | 6.05k | void BalsaParser::ProcessHeaders(const BalsaHeaders& headers) { |
279 | 6.05k | validateAndProcessHeadersOrTrailersImpl(headers, /* trailers = */ false); |
280 | 6.05k | } |
281 | 49 | void BalsaParser::OnTrailers(std::unique_ptr<quiche::BalsaHeaders> trailers) { |
282 | 49 | validateAndProcessHeadersOrTrailersImpl(*trailers, /* trailers = */ true); |
283 | 49 | } |
284 | | |
285 | | void BalsaParser::OnRequestFirstLineInput(absl::string_view /*line_input*/, |
286 | | absl::string_view method_input, |
287 | | absl::string_view request_uri, |
288 | 5.94k | absl::string_view version_input) { |
289 | 5.94k | if (status_ == ParserStatus::Error) { |
290 | 0 | return; |
291 | 0 | } |
292 | 5.94k | if (!isMethodValid(method_input, allow_custom_methods_)) { |
293 | 1.43k | status_ = ParserStatus::Error; |
294 | 1.43k | error_message_ = "HPE_INVALID_METHOD"; |
295 | 1.43k | return; |
296 | 1.43k | } |
297 | 4.50k | const bool is_connect = method_input == Headers::get().MethodValues.Connect; |
298 | 4.50k | if (!isUrlValid(request_uri, is_connect)) { |
299 | 262 | status_ = ParserStatus::Error; |
300 | 262 | error_message_ = "HPE_INVALID_URL"; |
301 | 262 | return; |
302 | 262 | } |
303 | 4.24k | if (!isVersionValid(version_input)) { |
304 | 98 | status_ = ParserStatus::Error; |
305 | 98 | error_message_ = "HPE_INVALID_VERSION"; |
306 | 98 | return; |
307 | 98 | } |
308 | 4.14k | status_ = convertResult(connection_->onUrl(request_uri.data(), request_uri.size())); |
309 | 4.14k | } |
310 | | |
311 | | void BalsaParser::OnResponseFirstLineInput(absl::string_view /*line_input*/, |
312 | | absl::string_view version_input, |
313 | | absl::string_view /*status_input*/, |
314 | 1.08k | absl::string_view reason_input) { |
315 | 1.08k | if (status_ == ParserStatus::Error) { |
316 | 0 | return; |
317 | 0 | } |
318 | 1.08k | if (!isVersionValid(version_input)) { |
319 | 760 | status_ = ParserStatus::Error; |
320 | 760 | error_message_ = "HPE_INVALID_VERSION"; |
321 | 760 | return; |
322 | 760 | } |
323 | 324 | status_ = convertResult(connection_->onStatus(reason_input.data(), reason_input.size())); |
324 | 324 | } |
325 | | |
326 | 16.4k | void BalsaParser::OnChunkLength(size_t chunk_length) { |
327 | 16.4k | if (status_ == ParserStatus::Error) { |
328 | 0 | return; |
329 | 0 | } |
330 | 16.4k | const bool is_final_chunk = chunk_length == 0; |
331 | 16.4k | connection_->onChunkHeader(is_final_chunk); |
332 | 16.4k | } |
333 | | |
334 | 16.4k | void BalsaParser::OnChunkExtensionInput(absl::string_view /*input*/) {} |
335 | | |
336 | 0 | void BalsaParser::OnInterimHeaders(std::unique_ptr<BalsaHeaders> /*headers*/) {} |
337 | | |
338 | 6.05k | void BalsaParser::HeaderDone() { |
339 | 6.05k | if (status_ == ParserStatus::Error) { |
340 | 1.67k | return; |
341 | 1.67k | } |
342 | 4.37k | headers_done_ = true; |
343 | 4.37k | CallbackResult result = connection_->onHeadersComplete(); |
344 | 4.37k | status_ = convertResult(result); |
345 | 4.37k | if (result == CallbackResult::NoBody || result == CallbackResult::NoBodyData) { |
346 | 7 | MessageDone(); |
347 | 7 | } |
348 | 4.37k | } |
349 | | |
350 | 0 | void BalsaParser::ContinueHeaderDone() {} |
351 | | |
352 | 4.30k | void BalsaParser::MessageDone() { |
353 | 4.30k | if (status_ == ParserStatus::Error) { |
354 | 3.51k | return; |
355 | 3.51k | } |
356 | 789 | status_ = convertResult(connection_->onMessageComplete()); |
357 | 789 | framer_.Reset(); |
358 | 789 | first_byte_processed_ = false; |
359 | 789 | headers_done_ = false; |
360 | 789 | } |
361 | | |
362 | 1.06k | void BalsaParser::HandleError(BalsaFrameEnums::ErrorCode error_code) { |
363 | 1.06k | status_ = ParserStatus::Error; |
364 | 1.06k | switch (error_code) { |
365 | 0 | case BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING: |
366 | 0 | error_message_ = "unsupported transfer encoding"; |
367 | 0 | break; |
368 | 78 | case BalsaFrameEnums::INVALID_CHUNK_LENGTH: |
369 | 78 | error_message_ = "HPE_INVALID_CHUNK_SIZE"; |
370 | 78 | break; |
371 | 96 | case BalsaFrameEnums::HEADERS_TOO_LONG: |
372 | 96 | error_message_ = "headers size exceeds limit"; |
373 | 96 | break; |
374 | 3 | case BalsaFrameEnums::TRAILER_TOO_LONG: |
375 | 3 | error_message_ = "trailers size exceeds limit"; |
376 | 3 | break; |
377 | 13 | case BalsaFrameEnums::TRAILER_MISSING_COLON: |
378 | 13 | error_message_ = "HPE_INVALID_HEADER_TOKEN"; |
379 | 13 | break; |
380 | 349 | case BalsaFrameEnums::INVALID_HEADER_CHARACTER: |
381 | 349 | error_message_ = "header value contains invalid chars"; |
382 | 349 | break; |
383 | 12 | case BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS: |
384 | 12 | error_message_ = "HPE_UNEXPECTED_CONTENT_LENGTH"; |
385 | 12 | break; |
386 | 518 | default: |
387 | 518 | error_message_ = BalsaFrameEnums::ErrorCodeToString(error_code); |
388 | 1.06k | } |
389 | 1.06k | } |
390 | | |
391 | 4.63k | void BalsaParser::HandleWarning(BalsaFrameEnums::ErrorCode error_code) { |
392 | 4.63k | if (error_code == BalsaFrameEnums::TRAILER_MISSING_COLON) { |
393 | 0 | HandleError(error_code); |
394 | 0 | } |
395 | 4.63k | } |
396 | | |
397 | | void BalsaParser::validateAndProcessHeadersOrTrailersImpl(const quiche::BalsaHeaders& headers, |
398 | 6.10k | bool trailers) { |
399 | 61.7k | for (const std::pair<absl::string_view, absl::string_view>& key_value : headers.lines()) { |
400 | 61.7k | if (status_ == ParserStatus::Error) { |
401 | 334 | return; |
402 | 334 | } |
403 | | |
404 | 61.4k | absl::string_view key = key_value.first; |
405 | 61.4k | if (!isHeaderNameValid(key)) { |
406 | 14 | status_ = ParserStatus::Error; |
407 | 14 | error_message_ = "HPE_INVALID_HEADER_TOKEN"; |
408 | 14 | return; |
409 | 14 | } |
410 | | |
411 | 61.3k | if (trailers && !enable_trailers_) { |
412 | 3 | continue; |
413 | 3 | } |
414 | | |
415 | 61.3k | status_ = convertResult(connection_->onHeaderField(key.data(), key.length())); |
416 | 61.3k | if (status_ == ParserStatus::Error) { |
417 | 8 | return; |
418 | 8 | } |
419 | | |
420 | 61.3k | absl::string_view value = key_value.second; |
421 | 61.3k | status_ = convertResult(connection_->onHeaderValue(value.data(), value.length())); |
422 | 61.3k | } |
423 | 6.10k | } |
424 | | |
425 | 140k | ParserStatus BalsaParser::convertResult(CallbackResult result) const { |
426 | 140k | return result == CallbackResult::Error ? ParserStatus::Error : status_; |
427 | 140k | } |
428 | | |
429 | | } // namespace Http1 |
430 | | } // namespace Http |
431 | | } // namespace Envoy |