/src/uWebSockets/src/HttpParser.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Authored by Alex Hultman, 2018-2024. |
3 | | * Intellectual property of third-party. |
4 | | |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at |
8 | | |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | */ |
17 | | |
18 | | #ifndef UWS_HTTPPARSER_H |
19 | | #define UWS_HTTPPARSER_H |
20 | | |
21 | | // todo: HttpParser is in need of a few clean-ups and refactorings |
22 | | |
23 | | /* The HTTP parser is an independent module subject to unit testing / fuzz testing */ |
24 | | |
25 | | #include <string> |
26 | | #include <cstring> |
27 | | #include <algorithm> |
28 | | #include <climits> |
29 | | #include <string_view> |
30 | | #include <map> |
31 | | #include "MoveOnlyFunction.h" |
32 | | #include "ChunkedEncoding.h" |
33 | | |
34 | | #include "BloomFilter.h" |
35 | | #include "ProxyParser.h" |
36 | | #include "QueryParser.h" |
37 | | #include "HttpErrors.h" |
38 | | |
39 | | namespace uWS { |
40 | | |
41 | | /* We require at least this much post padding */ |
42 | | static const unsigned int MINIMUM_HTTP_POST_PADDING = 32; |
43 | | static void *FULLPTR = (void *)~(uintptr_t)0; |
44 | | |
45 | | /* STL needs one of these */ |
46 | | template <typename T> |
47 | 2 | std::optional<T *> optional_ptr(T *ptr) { |
48 | 2 | return ptr ? std::optional<T *>(ptr) : std::nullopt; |
49 | 2 | } |
50 | | |
51 | | static const size_t MAX_FALLBACK_SIZE = (size_t) atoi(optional_ptr(getenv("UWS_HTTP_MAX_HEADERS_SIZE")).value_or((char *) "4096")); |
52 | | #ifndef UWS_HTTP_MAX_HEADERS_COUNT |
53 | 62.1k | #define UWS_HTTP_MAX_HEADERS_COUNT 100 |
54 | | #endif |
55 | | |
56 | | struct HttpRequest { |
57 | | |
58 | | friend struct HttpParser; |
59 | | |
60 | | private: |
61 | | struct Header { |
62 | | std::string_view key, value; |
63 | | } headers[UWS_HTTP_MAX_HEADERS_COUNT]; |
64 | | bool ancientHttp; |
65 | | unsigned int querySeparator; |
66 | | bool didYield; |
67 | | BloomFilter bf; |
68 | | std::pair<int, std::string_view *> currentParameters; |
69 | | std::map<std::string, unsigned short, std::less<>> *currentParameterOffsets = nullptr; |
70 | | |
71 | | public: |
72 | 0 | bool isAncient() { |
73 | 0 | return ancientHttp; |
74 | 0 | } |
75 | | |
76 | 0 | bool getYield() { |
77 | 0 | return didYield; |
78 | 0 | } |
79 | | |
80 | | /* Iteration over headers (key, value) */ |
81 | | struct HeaderIterator { |
82 | | Header *ptr; |
83 | | |
84 | 34.6k | bool operator!=(const HeaderIterator &other) const { |
85 | | /* Comparison with end is a special case */ |
86 | 34.6k | if (ptr != other.ptr) { |
87 | 34.6k | return other.ptr || ptr->key.length(); |
88 | 34.6k | } |
89 | 0 | return false; |
90 | 34.6k | } |
91 | | |
92 | 23.6k | HeaderIterator &operator++() { |
93 | 23.6k | ptr++; |
94 | 23.6k | return *this; |
95 | 23.6k | } |
96 | | |
97 | 23.6k | std::pair<std::string_view, std::string_view> operator*() const { |
98 | 23.6k | return {ptr->key, ptr->value}; |
99 | 23.6k | } |
100 | | }; |
101 | | |
102 | 11.0k | HeaderIterator begin() { |
103 | 11.0k | return {headers + 1}; |
104 | 11.0k | } |
105 | | |
106 | 11.0k | HeaderIterator end() { |
107 | 11.0k | return {nullptr}; |
108 | 11.0k | } |
109 | | |
110 | | /* If you do not want to handle this route */ |
111 | 0 | void setYield(bool yield) { |
112 | 0 | didYield = yield; |
113 | 0 | } |
114 | | |
115 | 51.0k | std::string_view getHeader(std::string_view lowerCasedHeader) { |
116 | 51.0k | if (bf.mightHave(lowerCasedHeader)) { |
117 | 46.3k | for (Header *h = headers; (++h)->key.length(); ) { |
118 | 43.8k | if (h->key.length() == lowerCasedHeader.length() && !strncmp(h->key.data(), lowerCasedHeader.data(), lowerCasedHeader.length())) { |
119 | 17.7k | return h->value; |
120 | 17.7k | } |
121 | 43.8k | } |
122 | 20.3k | } |
123 | 33.2k | return std::string_view(nullptr, 0); |
124 | 51.0k | } |
125 | | |
126 | 23.6k | std::string_view getUrl() { |
127 | 23.6k | return std::string_view(headers->value.data(), querySeparator); |
128 | 23.6k | } |
129 | | |
130 | 0 | std::string_view getFullUrl() { |
131 | 0 | return std::string_view(headers->value.data(), headers->value.length()); |
132 | 0 | } |
133 | | |
134 | | /* Hack: this should be getMethod */ |
135 | 0 | std::string_view getCaseSensitiveMethod() { |
136 | 0 | return std::string_view(headers->key.data(), headers->key.length()); |
137 | 0 | } |
138 | | |
139 | 23.6k | std::string_view getMethod() { |
140 | | /* Compatibility hack: lower case method (todo: remove when major version bumps) */ |
141 | 111k | for (unsigned int i = 0; i < headers->key.length(); i++) { |
142 | 88.2k | ((char *) headers->key.data())[i] |= 32; |
143 | 88.2k | } |
144 | | |
145 | 23.6k | return std::string_view(headers->key.data(), headers->key.length()); |
146 | 23.6k | } |
147 | | |
148 | | /* Returns the raw querystring as a whole, still encoded */ |
149 | 11.8k | std::string_view getQuery() { |
150 | 11.8k | if (querySeparator < headers->value.length()) { |
151 | | /* Strip the initial ? */ |
152 | 4.01k | return std::string_view(headers->value.data() + querySeparator + 1, headers->value.length() - querySeparator - 1); |
153 | 7.83k | } else { |
154 | 7.83k | return std::string_view(nullptr, 0); |
155 | 7.83k | } |
156 | 11.8k | } |
157 | | |
158 | | /* Finds and decodes the URI component. */ |
159 | 23.6k | std::string_view getQuery(std::string_view key) { |
160 | | /* Raw querystring including initial '?' sign */ |
161 | 23.6k | std::string_view queryString = std::string_view(headers->value.data() + querySeparator, headers->value.length() - querySeparator); |
162 | | |
163 | 23.6k | return getDecodedQueryValue(key, queryString); |
164 | 23.6k | } |
165 | | |
166 | 0 | void setParameters(std::pair<int, std::string_view *> parameters) { |
167 | 0 | currentParameters = parameters; |
168 | 0 | } |
169 | | |
170 | 0 | void setParameterOffsets(std::map<std::string, unsigned short, std::less<>> *offsets) { |
171 | 0 | currentParameterOffsets = offsets; |
172 | 0 | } |
173 | | |
174 | 0 | std::string_view getParameter(std::string_view name) { |
175 | 0 | if (!currentParameterOffsets) { |
176 | 0 | return {nullptr, 0}; |
177 | 0 | } |
178 | 0 | auto it = currentParameterOffsets->find(name); |
179 | 0 | if (it == currentParameterOffsets->end()) { |
180 | 0 | return {nullptr, 0}; |
181 | 0 | } |
182 | 0 | return getParameter(it->second); |
183 | 0 | } |
184 | | |
185 | 0 | std::string_view getParameter(unsigned short index) { |
186 | 0 | if (currentParameters.first < (int) index) { |
187 | 0 | return {}; |
188 | 0 | } else { |
189 | 0 | return currentParameters.second[index]; |
190 | 0 | } |
191 | 0 | } |
192 | | |
193 | | }; |
194 | | |
195 | | struct HttpParser { |
196 | | |
197 | | private: |
198 | | std::string fallback; |
199 | | /* This guy really has only 30 bits since we reserve two highest bits to chunked encoding parsing state */ |
200 | | uint64_t remainingStreamingBytes = 0; |
201 | | |
202 | | /* Returns UINT64_MAX on error. Maximum 999999999 is allowed. */ |
203 | 1.33k | static uint64_t toUnsignedInteger(std::string_view str) { |
204 | | /* We assume at least 64-bit integer giving us safely 999999999999999999 (18 number of 9s) */ |
205 | 1.33k | if (str.length() > 18) { |
206 | 44 | return UINT64_MAX; |
207 | 44 | } |
208 | | |
209 | 1.29k | uint64_t unsignedIntegerValue = 0; |
210 | 5.10k | for (char c : str) { |
211 | | /* As long as the letter is 0-9 we cannot overflow. */ |
212 | 5.10k | if (c < '0' || c > '9') { |
213 | 69 | return UINT64_MAX; |
214 | 69 | } |
215 | 5.03k | unsignedIntegerValue = unsignedIntegerValue * 10ull + ((unsigned int) c - (unsigned int) '0'); |
216 | 5.03k | } |
217 | 1.22k | return unsignedIntegerValue; |
218 | 1.29k | } |
219 | | |
220 | 1.47M | static inline uint64_t hasLess(uint64_t x, uint64_t n) { |
221 | 1.47M | return (((x)-~0ULL/255*(n))&~(x)&~0ULL/255*128); |
222 | 1.47M | } |
223 | | |
224 | 0 | static inline uint64_t hasMore(uint64_t x, uint64_t n) { |
225 | 0 | return (( ((x)+~0ULL/255*(127-(n))) |(x))&~0ULL/255*128); |
226 | 0 | } |
227 | | |
228 | 0 | static inline uint64_t hasBetween(uint64_t x, uint64_t m, uint64_t n) { |
229 | 0 | return (( (~0ULL/255*(127+(n))-((x)&~0ULL/255*127)) &~(x)& (((x)&~0ULL/255*127)+~0ULL/255*(127-(m))) )&~0ULL/255*128); |
230 | 0 | } |
231 | | |
232 | 0 | static inline bool notFieldNameWord(uint64_t x) { |
233 | 0 | return hasLess(x, '-') | |
234 | 0 | hasBetween(x, '-', '0') | |
235 | 0 | hasBetween(x, '9', 'A') | |
236 | 0 | hasBetween(x, 'Z', 'a') | |
237 | 0 | hasMore(x, 'z'); |
238 | 0 | } |
239 | | |
240 | | /* RFC 9110 5.6.2. Tokens */ |
241 | | /* Hyphen is not checked here as it is very common */ |
242 | | static inline bool isUnlikelyFieldNameByte(unsigned char c) |
243 | 19.7k | { |
244 | | /* Digits and 14 of the 15 non-alphanum characters (lacking hyphen) */ |
245 | 19.7k | return ((c == '~') | (c == '|') | (c == '`') | (c == '_') | (c == '^') | (c == '.') | (c == '+') |
246 | 19.7k | | (c == '*') | (c == '!')) || ((c >= 48) & (c <= 57)) || ((c <= 39) & (c >= 35)); |
247 | 19.7k | } |
248 | | |
249 | 81.8k | static inline bool isFieldNameByteFastLowercased(unsigned char &in) { |
250 | | /* Most common is lowercase alpha and hyphen */ |
251 | 81.8k | if (((in >= 97) & (in <= 122)) | (in == '-')) [[likely]] { |
252 | 57.7k | return true; |
253 | | /* Second is upper case alpha */ |
254 | 57.7k | } else if ((in >= 65) & (in <= 90)) [[unlikely]] { |
255 | 4.35k | in |= 32; |
256 | 4.35k | return true; |
257 | | /* These are rarely used but still valid */ |
258 | 19.7k | } else if (isUnlikelyFieldNameByte(in)) [[unlikely]] { |
259 | 14.6k | return true; |
260 | 14.6k | } |
261 | 5.09k | return false; |
262 | 81.8k | } |
263 | | |
264 | 61.9k | static inline void *consumeFieldName(char *p) { |
265 | | /* Best case fast path (particularly useful with clang) */ |
266 | 87.6k | while (true) { |
267 | 105k | while ((*p >= 65) & (*p <= 90)) [[likely]] { |
268 | 17.7k | *p |= 32; |
269 | 17.7k | p++; |
270 | 17.7k | } |
271 | 341k | while (((*p >= 97) & (*p <= 122))) [[likely]] { |
272 | 253k | p++; |
273 | 253k | } |
274 | 87.6k | if (*p == ':') { |
275 | 56.8k | return (void *)p; |
276 | 56.8k | } |
277 | 30.7k | if (*p == '-') { |
278 | 13.4k | p++; |
279 | 17.3k | } else if (!((*p >= 65) & (*p <= 90))) { |
280 | | /* Exit fast path parsing */ |
281 | 5.09k | break; |
282 | 5.09k | } |
283 | 30.7k | } |
284 | | |
285 | | /* Generic */ |
286 | 81.8k | while (isFieldNameByteFastLowercased(*(unsigned char *)p)) { |
287 | 76.7k | p++; |
288 | 76.7k | } |
289 | 5.09k | return (void *)p; |
290 | 61.9k | } |
291 | | |
292 | | /* Puts method as key, target as value and returns non-null (or nullptr on error). */ |
293 | 73.4k | static inline char *consumeRequestLine(char *data, char *end, HttpRequest::Header &header) { |
294 | | /* Scan until single SP, assume next is / (origin request) */ |
295 | 73.4k | char *start = data; |
296 | | /* This catches the post padded CR and fails */ |
297 | 87.9M | while (data[0] > 32) data++; |
298 | 73.4k | if (&data[1] == end) [[unlikely]] { |
299 | 267 | return nullptr; |
300 | 267 | } |
301 | 73.1k | if (data[0] == 32 && data[1] == '/') [[likely]] { |
302 | 20.8k | header.key = {start, (size_t) (data - start)}; |
303 | 20.8k | data++; |
304 | | /* Scan for less than 33 (catches post padded CR and fails) */ |
305 | 20.8k | start = data; |
306 | 235k | for (; true; data += 8) { |
307 | 235k | uint64_t word; |
308 | 235k | memcpy(&word, data, sizeof(uint64_t)); |
309 | 235k | if (hasLess(word, 33)) { |
310 | 92.4k | while (*(unsigned char *)data > 32) data++; |
311 | | /* Now we stand on space */ |
312 | 20.8k | header.value = {start, (size_t) (data - start)}; |
313 | | /* Check that the following is http 1.1 */ |
314 | 20.8k | if (data + 11 >= end) { |
315 | | /* Whatever we have must be part of the version string */ |
316 | 1.31k | if (memcmp(" HTTP/1.1\r\n", data, std::min<unsigned int>(11, (unsigned int) (end - data))) == 0) { |
317 | 939 | return nullptr; |
318 | 939 | } |
319 | 373 | return (char *) 0x1; |
320 | 1.31k | } |
321 | 19.5k | if (memcmp(" HTTP/1.1\r\n", data, 11) == 0) { |
322 | 18.3k | return data + 11; |
323 | 18.3k | } |
324 | | /* If we stand at the post padded CR, we have fragmented input so try again later */ |
325 | 1.21k | if (data[0] == '\r') { |
326 | 460 | return nullptr; |
327 | 460 | } |
328 | | /* This is an error */ |
329 | 750 | return (char *) 0x1; |
330 | 1.21k | } |
331 | 235k | } |
332 | 20.8k | } |
333 | | /* If we stand at the post padded CR, we have fragmented input so try again later */ |
334 | 52.3k | if (data[0] == '\r') { |
335 | 29.2k | return nullptr; |
336 | 29.2k | } |
337 | 23.0k | return (char *) 0x1; |
338 | 52.3k | } |
339 | | |
340 | | /* RFC 9110: 5.5 Field Values (TLDR; anything above 31 is allowed; htab (9) is also allowed) |
341 | | * Field values are usually constrained to the range of US-ASCII characters [...] |
342 | | * Field values containing CR, LF, or NUL characters are invalid and dangerous [...] |
343 | | * Field values containing other CTL characters are also invalid. */ |
344 | 61.6k | static inline void *tryConsumeFieldValue(char *p) { |
345 | 1.24M | for (; true; p += 8) { |
346 | 1.24M | uint64_t word; |
347 | 1.24M | memcpy(&word, p, sizeof(uint64_t)); |
348 | 1.24M | if (hasLess(word, 32)) { |
349 | 115k | while (*(unsigned char *)p > 31) p++; |
350 | 61.6k | return (void *)p; |
351 | 61.6k | } |
352 | 1.24M | } |
353 | 61.6k | } |
354 | | |
355 | | /* End is only used for the proxy parser. The HTTP parser recognizes "\ra" as invalid "\r\n" scan and breaks. */ |
356 | 73.4k | static unsigned int getHeaders(char *postPaddedBuffer, char *end, struct HttpRequest::Header *headers, void *reserved, unsigned int &err) { |
357 | 73.4k | char *preliminaryKey, *preliminaryValue, *start = postPaddedBuffer; |
358 | | |
359 | | #ifdef UWS_WITH_PROXY |
360 | | /* ProxyParser is passed as reserved parameter */ |
361 | | ProxyParser *pp = (ProxyParser *) reserved; |
362 | | |
363 | | /* Parse PROXY protocol */ |
364 | | auto [done, offset] = pp->parse({postPaddedBuffer, (size_t) (end - postPaddedBuffer)}); |
365 | | if (!done) { |
366 | | /* We do not reset the ProxyParser (on filure) since it is tied to this |
367 | | * connection, which is really only supposed to ever get one PROXY frame |
368 | | * anyways. We do however allow multiple PROXY frames to be sent (overwrites former). */ |
369 | | return 0; |
370 | | } else { |
371 | | /* We have consumed this data so skip it */ |
372 | | postPaddedBuffer += offset; |
373 | | } |
374 | | #else |
375 | | /* This one is unused */ |
376 | 73.4k | (void) reserved; |
377 | 73.4k | (void) end; |
378 | 73.4k | #endif |
379 | | |
380 | | /* It is critical for fallback buffering logic that we only return with success |
381 | | * if we managed to parse a complete HTTP request (minus data). Returning success |
382 | | * for PROXY means we can end up succeeding, yet leaving bytes in the fallback buffer |
383 | | * which is then removed, and our counters to flip due to overflow and we end up with a crash */ |
384 | | |
385 | | /* The request line is different from the field names / field values */ |
386 | 73.4k | if ((char *) 2 > (postPaddedBuffer = consumeRequestLine(postPaddedBuffer, end, headers[0]))) { |
387 | | /* Error - invalid request line */ |
388 | | /* Assuming it is 505 HTTP Version Not Supported */ |
389 | 55.0k | err = postPaddedBuffer ? HTTP_ERROR_505_HTTP_VERSION_NOT_SUPPORTED : 0; |
390 | 55.0k | return 0; |
391 | 55.0k | } |
392 | 18.3k | headers++; |
393 | | |
394 | 62.1k | for (unsigned int i = 1; i < UWS_HTTP_MAX_HEADERS_COUNT - 1; i++) { |
395 | | /* Lower case and consume the field name */ |
396 | 61.9k | preliminaryKey = postPaddedBuffer; |
397 | 61.9k | postPaddedBuffer = (char *) consumeFieldName(postPaddedBuffer); |
398 | 61.9k | headers->key = std::string_view(preliminaryKey, (size_t) (postPaddedBuffer - preliminaryKey)); |
399 | | |
400 | | /* We should not accept whitespace between key and colon, so colon must foloow immediately */ |
401 | 61.9k | if (postPaddedBuffer[0] != ':') { |
402 | | /* If we stand at the end, we are fragmented */ |
403 | 794 | if (postPaddedBuffer == end) { |
404 | 526 | return 0; |
405 | 526 | } |
406 | | /* Error: invalid chars in field name */ |
407 | 268 | err = HTTP_ERROR_400_BAD_REQUEST; |
408 | 268 | return 0; |
409 | 794 | } |
410 | 61.1k | postPaddedBuffer++; |
411 | | |
412 | 61.1k | preliminaryValue = postPaddedBuffer; |
413 | | /* The goal of this call is to find next "\r\n", or any invalid field value chars, fast */ |
414 | 61.6k | while (true) { |
415 | 61.6k | postPaddedBuffer = (char *) tryConsumeFieldValue(postPaddedBuffer); |
416 | | /* If this is not CR then we caught some stinky invalid char on the way */ |
417 | 61.6k | if (postPaddedBuffer[0] != '\r') { |
418 | | /* If TAB then keep searching */ |
419 | 698 | if (postPaddedBuffer[0] == '\t') { |
420 | 487 | postPaddedBuffer++; |
421 | 487 | continue; |
422 | 487 | } |
423 | | /* Error - invalid chars in field value */ |
424 | 211 | err = HTTP_ERROR_400_BAD_REQUEST; |
425 | 211 | return 0; |
426 | 698 | } |
427 | 60.9k | break; |
428 | 61.6k | } |
429 | | /* We fence end[0] with \r, followed by end[1] being something that is "not \n", to signify "not found". |
430 | | * This way we can have this one single check to see if we found \r\n WITHIN our allowed search space. */ |
431 | 60.9k | if (postPaddedBuffer[1] == '\n') { |
432 | | /* Store this header, it is valid */ |
433 | 59.1k | headers->value = std::string_view(preliminaryValue, (size_t) (postPaddedBuffer - preliminaryValue)); |
434 | 59.1k | postPaddedBuffer += 2; |
435 | | |
436 | | /* Trim trailing whitespace (SP, HTAB) */ |
437 | 63.4k | while (headers->value.length() && headers->value.back() < 33) { |
438 | 4.32k | headers->value.remove_suffix(1); |
439 | 4.32k | } |
440 | | |
441 | | /* Trim initial whitespace (SP, HTAB) */ |
442 | 82.6k | while (headers->value.length() && headers->value.front() < 33) { |
443 | 23.5k | headers->value.remove_prefix(1); |
444 | 23.5k | } |
445 | | |
446 | 59.1k | headers++; |
447 | | |
448 | | /* We definitely have at least one header (or request line), so check if we are done */ |
449 | 59.1k | if (*postPaddedBuffer == '\r') { |
450 | 15.3k | if (postPaddedBuffer[1] == '\n') { |
451 | | /* This cann take the very last header space */ |
452 | 14.6k | headers->key = std::string_view(nullptr, 0); |
453 | 14.6k | return (unsigned int) ((postPaddedBuffer + 2) - start); |
454 | 14.6k | } else { |
455 | | /* \r\n\r plus non-\n letter is malformed request, or simply out of search space */ |
456 | 674 | if (postPaddedBuffer + 1 < end) { |
457 | 201 | err = HTTP_ERROR_400_BAD_REQUEST; |
458 | 201 | } |
459 | 674 | return 0; |
460 | 674 | } |
461 | 15.3k | } |
462 | 59.1k | } else { |
463 | | /* We are either out of search space or this is a malformed request */ |
464 | 1.81k | return 0; |
465 | 1.81k | } |
466 | 60.9k | } |
467 | | /* We ran out of header space, too large request */ |
468 | 194 | err = HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE; |
469 | 194 | return 0; |
470 | 18.3k | } |
471 | | |
472 | | /* This is the only caller of getHeaders and is thus the deepest part of the parser. |
473 | | * From here we return either [consumed, user] for "keep going", |
474 | | * or [consumed, nullptr] for "break; I am closed or upgraded to websocket" |
475 | | * or [whatever, fullptr] for "break and close me, I am a parser error!" */ |
476 | | template <int CONSUME_MINIMALLY> |
477 | 90.4k | std::pair<unsigned int, void *> fenceAndConsumePostPadded(char *data, unsigned int length, void *user, void *reserved, HttpRequest *req, MoveOnlyFunction<void *(void *, HttpRequest *)> &requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &dataHandler) { |
478 | | |
479 | | /* How much data we CONSUMED (to throw away) */ |
480 | 90.4k | unsigned int consumedTotal = 0; |
481 | 90.4k | unsigned int err = 0; |
482 | | |
483 | | /* Fence two bytes past end of our buffer (buffer has post padded margins). |
484 | | * This is to always catch scan for \r but not for \r\n. */ |
485 | 90.4k | data[length] = '\r'; |
486 | 90.4k | data[length + 1] = 'a'; /* Anything that is not \n, to trigger "invalid request" */ |
487 | | |
488 | 99.6k | for (unsigned int consumed; length && (consumed = getHeaders(data, data + length, req->headers, reserved, err)); ) { |
489 | 14.6k | data += consumed; |
490 | 14.6k | length -= consumed; |
491 | 14.6k | consumedTotal += consumed; |
492 | | |
493 | | /* Even if we could parse it, check for length here as well */ |
494 | 14.6k | if (consumed > MAX_FALLBACK_SIZE) { |
495 | 20 | return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR}; |
496 | 20 | } |
497 | | |
498 | | /* Store HTTP version (ancient 1.0 or 1.1) */ |
499 | 14.6k | req->ancientHttp = false; |
500 | | |
501 | | /* Add all headers to bloom filter */ |
502 | 14.6k | req->bf.reset(); |
503 | 48.8k | for (HttpRequest::Header *h = req->headers; (++h)->key.length(); ) { |
504 | 34.7k | if (req->bf.mightHave(h->key)) [[unlikely]] { |
505 | | /* Host header is not allowed twice */ |
506 | 4.87k | if (h->key == "host" && req->getHeader("host").data()) { |
507 | 518 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; |
508 | 518 | } |
509 | 4.87k | } |
510 | 34.2k | req->bf.add(h->key); |
511 | 34.2k | } |
512 | | |
513 | | /* Break if no host header (but we can have empty string which is different from nullptr) */ |
514 | 14.1k | if (!req->getHeader("host").data()) { |
515 | 1.80k | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; |
516 | 1.80k | } |
517 | | |
518 | | /* RFC 9112 6.3 |
519 | | * If a message is received with both a Transfer-Encoding and a Content-Length header field, |
520 | | * the Transfer-Encoding overrides the Content-Length. Such a message might indicate an attempt |
521 | | * to perform request smuggling (Section 11.2) or response splitting (Section 11.1) and |
522 | | * ought to be handled as an error. */ |
523 | 12.3k | std::string_view transferEncodingString = req->getHeader("transfer-encoding"); |
524 | 12.3k | std::string_view contentLengthString = req->getHeader("content-length"); |
525 | 12.3k | if (transferEncodingString.length() && contentLengthString.length()) { |
526 | | /* Returning fullptr is the same as calling the errorHandler */ |
527 | | /* We could be smart and set an error in the context along with this, to indicate what |
528 | | * http error response we might want to return */ |
529 | 461 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; |
530 | 461 | } |
531 | | |
532 | | /* Parse query */ |
533 | 11.8k | const char *querySeparatorPtr = (const char *) memchr(req->headers->value.data(), '?', req->headers->value.length()); |
534 | 11.8k | req->querySeparator = (unsigned int) ((querySeparatorPtr ? querySeparatorPtr : req->headers->value.data() + req->headers->value.length()) - req->headers->value.data()); |
535 | | |
536 | | /* If returned socket is not what we put in we need |
537 | | * to break here as we either have upgraded to |
538 | | * WebSockets or otherwise closed the socket. */ |
539 | 11.8k | void *returnedUser = requestHandler(user, req); |
540 | 11.8k | if (returnedUser != user) { |
541 | | /* We are upgraded to WebSocket or otherwise broken */ |
542 | 764 | return {consumedTotal, returnedUser}; |
543 | 764 | } |
544 | | |
545 | | /* The rules at play here according to RFC 9112 for requests are essentially: |
546 | | * If both content-length and transfer-encoding then invalid message; must break. |
547 | | * If has transfer-encoding then must be chunked regardless of value. |
548 | | * If content-length then fixed length even if 0. |
549 | | * If none of the above then fixed length is 0. */ |
550 | | |
551 | | /* RFC 9112 6.3 |
552 | | * If a message is received with both a Transfer-Encoding and a Content-Length header field, |
553 | | * the Transfer-Encoding overrides the Content-Length. */ |
554 | 11.0k | if (transferEncodingString.length()) { |
555 | | |
556 | | /* If a proxy sent us the transfer-encoding header that 100% means it must be chunked or else the proxy is |
557 | | * not RFC 9112 compliant. Therefore it is always better to assume this is the case, since that entirely eliminates |
558 | | * all forms of transfer-encoding obfuscation tricks. We just rely on the header. */ |
559 | | |
560 | | /* RFC 9112 6.3 |
561 | | * If a Transfer-Encoding header field is present in a request and the chunked transfer coding is not the |
562 | | * final encoding, the message body length cannot be determined reliably; the server MUST respond with the |
563 | | * 400 (Bad Request) status code and then close the connection. */ |
564 | | |
565 | | /* In this case we fail later by having the wrong interpretation (assuming chunked). |
566 | | * This could be made stricter but makes no difference either way, unless forwarding the identical message as a proxy. */ |
567 | | |
568 | 2.68k | remainingStreamingBytes = STATE_IS_CHUNKED; |
569 | | /* If consume minimally, we do not want to consume anything but we want to mark this as being chunked */ |
570 | 2.68k | if (!CONSUME_MINIMALLY) { |
571 | | /* Go ahead and parse it (todo: better heuristics for emitting FIN to the app level) */ |
572 | 1.98k | std::string_view dataToConsume(data, length); |
573 | 2.38k | for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) { |
574 | 2.38k | dataHandler(user, chunk, chunk.length() == 0); |
575 | 2.38k | } |
576 | 1.98k | if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) { |
577 | 28 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; |
578 | 28 | } |
579 | 1.95k | unsigned int consumed = (length - (unsigned int) dataToConsume.length()); |
580 | 1.95k | data = (char *) dataToConsume.data(); |
581 | 1.95k | length = (unsigned int) dataToConsume.length(); |
582 | 1.95k | consumedTotal += consumed; |
583 | 1.95k | } |
584 | 8.39k | } else if (contentLengthString.length()) { |
585 | 1.33k | remainingStreamingBytes = toUnsignedInteger(contentLengthString); |
586 | 1.33k | if (remainingStreamingBytes == UINT64_MAX) { |
587 | | /* Parser error */ |
588 | 113 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; |
589 | 113 | } |
590 | | |
591 | 1.22k | if (!CONSUME_MINIMALLY) { |
592 | 785 | unsigned int emittable = (unsigned int) std::min<uint64_t>(remainingStreamingBytes, length); |
593 | 785 | dataHandler(user, std::string_view(data, emittable), emittable == remainingStreamingBytes); |
594 | 785 | remainingStreamingBytes -= emittable; |
595 | | |
596 | 785 | data += emittable; |
597 | 785 | length -= emittable; |
598 | 785 | consumedTotal += emittable; |
599 | 785 | } |
600 | 7.06k | } else { |
601 | | /* If we came here without a body; emit an empty data chunk to signal no data */ |
602 | 7.06k | dataHandler(user, {}, true); |
603 | 7.06k | } |
604 | | |
605 | | /* Consume minimally should break as easrly as possible */ |
606 | 10.9k | if (CONSUME_MINIMALLY) { |
607 | 1.79k | break; |
608 | 1.79k | } |
609 | 10.9k | } |
610 | | /* Whenever we return FULLPTR, the interpretation of "consumed" should be the HttpError enum. */ |
611 | 86.7k | if (err) { |
612 | 25.0k | return {err, FULLPTR}; |
613 | 25.0k | } |
614 | 61.7k | return {consumedTotal, user}; |
615 | 86.7k | } std::__1::pair<unsigned int, void*> uWS::HttpParser::fenceAndConsumePostPadded<1>(char*, unsigned int, void*, void*, uWS::HttpRequest*, ofats::any_invocable<void* (void*, uWS::HttpRequest*)>&, ofats::any_invocable<void* (void*, std::__1::basic_string_view<char, std::__1::char_traits<char> >, bool)>&) Line | Count | Source | 477 | 37.5k | std::pair<unsigned int, void *> fenceAndConsumePostPadded(char *data, unsigned int length, void *user, void *reserved, HttpRequest *req, MoveOnlyFunction<void *(void *, HttpRequest *)> &requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &dataHandler) { | 478 | | | 479 | | /* How much data we CONSUMED (to throw away) */ | 480 | 37.5k | unsigned int consumedTotal = 0; | 481 | 37.5k | unsigned int err = 0; | 482 | | | 483 | | /* Fence two bytes past end of our buffer (buffer has post padded margins). | 484 | | * This is to always catch scan for \r but not for \r\n. */ | 485 | 37.5k | data[length] = '\r'; | 486 | 37.5k | data[length + 1] = 'a'; /* Anything that is not \n, to trigger "invalid request" */ | 487 | | | 488 | 37.5k | for (unsigned int consumed; length && (consumed = getHeaders(data, data + length, req->headers, reserved, err)); ) { | 489 | 3.90k | data += consumed; | 490 | 3.90k | length -= consumed; | 491 | 3.90k | consumedTotal += consumed; | 492 | | | 493 | | /* Even if we could parse it, check for length here as well */ | 494 | 3.90k | if (consumed > MAX_FALLBACK_SIZE) { | 495 | 0 | return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR}; | 496 | 0 | } | 497 | | | 498 | | /* Store HTTP version (ancient 1.0 or 1.1) */ | 499 | 3.90k | req->ancientHttp = false; | 500 | | | 501 | | /* Add all headers to bloom filter */ | 502 | 3.90k | req->bf.reset(); | 503 | 15.6k | for (HttpRequest::Header *h = req->headers; (++h)->key.length(); ) { | 504 | 12.0k | if (req->bf.mightHave(h->key)) [[unlikely]] { | 505 | | /* Host header is not allowed twice */ | 506 | 2.83k | if (h->key == "host" && req->getHeader("host").data()) { | 507 | 324 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 508 | 324 | } | 509 | 2.83k | } | 510 | 11.7k | req->bf.add(h->key); | 511 | 11.7k | } | 512 | | | 513 | | /* Break if no host header (but we can have empty string which is different from nullptr) */ | 514 | 3.58k | if (!req->getHeader("host").data()) { | 515 | 1.45k | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 516 | 1.45k | } | 517 | | | 518 | | /* RFC 9112 6.3 | 519 | | * If a message is received with both a Transfer-Encoding and a Content-Length header field, | 520 | | * the Transfer-Encoding overrides the Content-Length. Such a message might indicate an attempt | 521 | | * to perform request smuggling (Section 11.2) or response splitting (Section 11.1) and | 522 | | * ought to be handled as an error. */ | 523 | 2.12k | std::string_view transferEncodingString = req->getHeader("transfer-encoding"); | 524 | 2.12k | std::string_view contentLengthString = req->getHeader("content-length"); | 525 | 2.12k | if (transferEncodingString.length() && contentLengthString.length()) { | 526 | | /* Returning fullptr is the same as calling the errorHandler */ | 527 | | /* We could be smart and set an error in the context along with this, to indicate what | 528 | | * http error response we might want to return */ | 529 | 257 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 530 | 257 | } | 531 | | | 532 | | /* Parse query */ | 533 | 1.86k | const char *querySeparatorPtr = (const char *) memchr(req->headers->value.data(), '?', req->headers->value.length()); | 534 | 1.86k | req->querySeparator = (unsigned int) ((querySeparatorPtr ? querySeparatorPtr : req->headers->value.data() + req->headers->value.length()) - req->headers->value.data()); | 535 | | | 536 | | /* If returned socket is not what we put in we need | 537 | | * to break here as we either have upgraded to | 538 | | * WebSockets or otherwise closed the socket. */ | 539 | 1.86k | void *returnedUser = requestHandler(user, req); | 540 | 1.86k | if (returnedUser != user) { | 541 | | /* We are upgraded to WebSocket or otherwise broken */ | 542 | 17 | return {consumedTotal, returnedUser}; | 543 | 17 | } | 544 | | | 545 | | /* The rules at play here according to RFC 9112 for requests are essentially: | 546 | | * If both content-length and transfer-encoding then invalid message; must break. | 547 | | * If has transfer-encoding then must be chunked regardless of value. | 548 | | * If content-length then fixed length even if 0. | 549 | | * If none of the above then fixed length is 0. */ | 550 | | | 551 | | /* RFC 9112 6.3 | 552 | | * If a message is received with both a Transfer-Encoding and a Content-Length header field, | 553 | | * the Transfer-Encoding overrides the Content-Length. */ | 554 | 1.85k | if (transferEncodingString.length()) { | 555 | | | 556 | | /* If a proxy sent us the transfer-encoding header that 100% means it must be chunked or else the proxy is | 557 | | * not RFC 9112 compliant. Therefore it is always better to assume this is the case, since that entirely eliminates | 558 | | * all forms of transfer-encoding obfuscation tricks. We just rely on the header. */ | 559 | | | 560 | | /* RFC 9112 6.3 | 561 | | * If a Transfer-Encoding header field is present in a request and the chunked transfer coding is not the | 562 | | * final encoding, the message body length cannot be determined reliably; the server MUST respond with the | 563 | | * 400 (Bad Request) status code and then close the connection. */ | 564 | | | 565 | | /* In this case we fail later by having the wrong interpretation (assuming chunked). | 566 | | * This could be made stricter but makes no difference either way, unless forwarding the identical message as a proxy. */ | 567 | | | 568 | 703 | remainingStreamingBytes = STATE_IS_CHUNKED; | 569 | | /* If consume minimally, we do not want to consume anything but we want to mark this as being chunked */ | 570 | 703 | if (!CONSUME_MINIMALLY) { | 571 | | /* Go ahead and parse it (todo: better heuristics for emitting FIN to the app level) */ | 572 | 0 | std::string_view dataToConsume(data, length); | 573 | 0 | for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) { | 574 | 0 | dataHandler(user, chunk, chunk.length() == 0); | 575 | 0 | } | 576 | 0 | if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) { | 577 | 0 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 578 | 0 | } | 579 | 0 | unsigned int consumed = (length - (unsigned int) dataToConsume.length()); | 580 | 0 | data = (char *) dataToConsume.data(); | 581 | 0 | length = (unsigned int) dataToConsume.length(); | 582 | 0 | consumedTotal += consumed; | 583 | 0 | } | 584 | 1.14k | } else if (contentLengthString.length()) { | 585 | 489 | remainingStreamingBytes = toUnsignedInteger(contentLengthString); | 586 | 489 | if (remainingStreamingBytes == UINT64_MAX) { | 587 | | /* Parser error */ | 588 | 52 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 589 | 52 | } | 590 | | | 591 | 437 | if (!CONSUME_MINIMALLY) { | 592 | 0 | unsigned int emittable = (unsigned int) std::min<uint64_t>(remainingStreamingBytes, length); | 593 | 0 | dataHandler(user, std::string_view(data, emittable), emittable == remainingStreamingBytes); | 594 | 0 | remainingStreamingBytes -= emittable; | 595 | |
| 596 | 0 | data += emittable; | 597 | 0 | length -= emittable; | 598 | 0 | consumedTotal += emittable; | 599 | 0 | } | 600 | 659 | } else { | 601 | | /* If we came here without a body; emit an empty data chunk to signal no data */ | 602 | 659 | dataHandler(user, {}, true); | 603 | 659 | } | 604 | | | 605 | | /* Consume minimally should break as easrly as possible */ | 606 | 1.79k | if (CONSUME_MINIMALLY) { | 607 | 1.79k | break; | 608 | 1.79k | } | 609 | 1.79k | } | 610 | | /* Whenever we return FULLPTR, the interpretation of "consumed" should be the HttpError enum. */ | 611 | 35.4k | if (err) { | 612 | 2.41k | return {err, FULLPTR}; | 613 | 2.41k | } | 614 | 32.9k | return {consumedTotal, user}; | 615 | 35.4k | } |
std::__1::pair<unsigned int, void*> uWS::HttpParser::fenceAndConsumePostPadded<0>(char*, unsigned int, void*, void*, uWS::HttpRequest*, ofats::any_invocable<void* (void*, uWS::HttpRequest*)>&, ofats::any_invocable<void* (void*, std::__1::basic_string_view<char, std::__1::char_traits<char> >, bool)>&) Line | Count | Source | 477 | 52.9k | std::pair<unsigned int, void *> fenceAndConsumePostPadded(char *data, unsigned int length, void *user, void *reserved, HttpRequest *req, MoveOnlyFunction<void *(void *, HttpRequest *)> &requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &dataHandler) { | 478 | | | 479 | | /* How much data we CONSUMED (to throw away) */ | 480 | 52.9k | unsigned int consumedTotal = 0; | 481 | 52.9k | unsigned int err = 0; | 482 | | | 483 | | /* Fence two bytes past end of our buffer (buffer has post padded margins). | 484 | | * This is to always catch scan for \r but not for \r\n. */ | 485 | 52.9k | data[length] = '\r'; | 486 | 52.9k | data[length + 1] = 'a'; /* Anything that is not \n, to trigger "invalid request" */ | 487 | | | 488 | 62.1k | for (unsigned int consumed; length && (consumed = getHeaders(data, data + length, req->headers, reserved, err)); ) { | 489 | 10.7k | data += consumed; | 490 | 10.7k | length -= consumed; | 491 | 10.7k | consumedTotal += consumed; | 492 | | | 493 | | /* Even if we could parse it, check for length here as well */ | 494 | 10.7k | if (consumed > MAX_FALLBACK_SIZE) { | 495 | 20 | return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR}; | 496 | 20 | } | 497 | | | 498 | | /* Store HTTP version (ancient 1.0 or 1.1) */ | 499 | 10.7k | req->ancientHttp = false; | 500 | | | 501 | | /* Add all headers to bloom filter */ | 502 | 10.7k | req->bf.reset(); | 503 | 33.2k | for (HttpRequest::Header *h = req->headers; (++h)->key.length(); ) { | 504 | 22.7k | if (req->bf.mightHave(h->key)) [[unlikely]] { | 505 | | /* Host header is not allowed twice */ | 506 | 2.04k | if (h->key == "host" && req->getHeader("host").data()) { | 507 | 194 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 508 | 194 | } | 509 | 2.04k | } | 510 | 22.5k | req->bf.add(h->key); | 511 | 22.5k | } | 512 | | | 513 | | /* Break if no host header (but we can have empty string which is different from nullptr) */ | 514 | 10.5k | if (!req->getHeader("host").data()) { | 515 | 348 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 516 | 348 | } | 517 | | | 518 | | /* RFC 9112 6.3 | 519 | | * If a message is received with both a Transfer-Encoding and a Content-Length header field, | 520 | | * the Transfer-Encoding overrides the Content-Length. Such a message might indicate an attempt | 521 | | * to perform request smuggling (Section 11.2) or response splitting (Section 11.1) and | 522 | | * ought to be handled as an error. */ | 523 | 10.1k | std::string_view transferEncodingString = req->getHeader("transfer-encoding"); | 524 | 10.1k | std::string_view contentLengthString = req->getHeader("content-length"); | 525 | 10.1k | if (transferEncodingString.length() && contentLengthString.length()) { | 526 | | /* Returning fullptr is the same as calling the errorHandler */ | 527 | | /* We could be smart and set an error in the context along with this, to indicate what | 528 | | * http error response we might want to return */ | 529 | 204 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 530 | 204 | } | 531 | | | 532 | | /* Parse query */ | 533 | 9.97k | const char *querySeparatorPtr = (const char *) memchr(req->headers->value.data(), '?', req->headers->value.length()); | 534 | 9.97k | req->querySeparator = (unsigned int) ((querySeparatorPtr ? querySeparatorPtr : req->headers->value.data() + req->headers->value.length()) - req->headers->value.data()); | 535 | | | 536 | | /* If returned socket is not what we put in we need | 537 | | * to break here as we either have upgraded to | 538 | | * WebSockets or otherwise closed the socket. */ | 539 | 9.97k | void *returnedUser = requestHandler(user, req); | 540 | 9.97k | if (returnedUser != user) { | 541 | | /* We are upgraded to WebSocket or otherwise broken */ | 542 | 747 | return {consumedTotal, returnedUser}; | 543 | 747 | } | 544 | | | 545 | | /* The rules at play here according to RFC 9112 for requests are essentially: | 546 | | * If both content-length and transfer-encoding then invalid message; must break. | 547 | | * If has transfer-encoding then must be chunked regardless of value. | 548 | | * If content-length then fixed length even if 0. | 549 | | * If none of the above then fixed length is 0. */ | 550 | | | 551 | | /* RFC 9112 6.3 | 552 | | * If a message is received with both a Transfer-Encoding and a Content-Length header field, | 553 | | * the Transfer-Encoding overrides the Content-Length. */ | 554 | 9.23k | if (transferEncodingString.length()) { | 555 | | | 556 | | /* If a proxy sent us the transfer-encoding header that 100% means it must be chunked or else the proxy is | 557 | | * not RFC 9112 compliant. Therefore it is always better to assume this is the case, since that entirely eliminates | 558 | | * all forms of transfer-encoding obfuscation tricks. We just rely on the header. */ | 559 | | | 560 | | /* RFC 9112 6.3 | 561 | | * If a Transfer-Encoding header field is present in a request and the chunked transfer coding is not the | 562 | | * final encoding, the message body length cannot be determined reliably; the server MUST respond with the | 563 | | * 400 (Bad Request) status code and then close the connection. */ | 564 | | | 565 | | /* In this case we fail later by having the wrong interpretation (assuming chunked). | 566 | | * This could be made stricter but makes no difference either way, unless forwarding the identical message as a proxy. */ | 567 | | | 568 | 1.98k | remainingStreamingBytes = STATE_IS_CHUNKED; | 569 | | /* If consume minimally, we do not want to consume anything but we want to mark this as being chunked */ | 570 | 1.98k | if (!CONSUME_MINIMALLY) { | 571 | | /* Go ahead and parse it (todo: better heuristics for emitting FIN to the app level) */ | 572 | 1.98k | std::string_view dataToConsume(data, length); | 573 | 2.38k | for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) { | 574 | 2.38k | dataHandler(user, chunk, chunk.length() == 0); | 575 | 2.38k | } | 576 | 1.98k | if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) { | 577 | 28 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 578 | 28 | } | 579 | 1.95k | unsigned int consumed = (length - (unsigned int) dataToConsume.length()); | 580 | 1.95k | data = (char *) dataToConsume.data(); | 581 | 1.95k | length = (unsigned int) dataToConsume.length(); | 582 | 1.95k | consumedTotal += consumed; | 583 | 1.95k | } | 584 | 7.24k | } else if (contentLengthString.length()) { | 585 | 846 | remainingStreamingBytes = toUnsignedInteger(contentLengthString); | 586 | 846 | if (remainingStreamingBytes == UINT64_MAX) { | 587 | | /* Parser error */ | 588 | 61 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 589 | 61 | } | 590 | | | 591 | 785 | if (!CONSUME_MINIMALLY) { | 592 | 785 | unsigned int emittable = (unsigned int) std::min<uint64_t>(remainingStreamingBytes, length); | 593 | 785 | dataHandler(user, std::string_view(data, emittable), emittable == remainingStreamingBytes); | 594 | 785 | remainingStreamingBytes -= emittable; | 595 | | | 596 | 785 | data += emittable; | 597 | 785 | length -= emittable; | 598 | 785 | consumedTotal += emittable; | 599 | 785 | } | 600 | 6.40k | } else { | 601 | | /* If we came here without a body; emit an empty data chunk to signal no data */ | 602 | 6.40k | dataHandler(user, {}, true); | 603 | 6.40k | } | 604 | | | 605 | | /* Consume minimally should break as easrly as possible */ | 606 | 9.14k | if (CONSUME_MINIMALLY) { | 607 | 0 | break; | 608 | 0 | } | 609 | 9.14k | } | 610 | | /* Whenever we return FULLPTR, the interpretation of "consumed" should be the HttpError enum. */ | 611 | 51.3k | if (err) { | 612 | 22.5k | return {err, FULLPTR}; | 613 | 22.5k | } | 614 | 28.7k | return {consumedTotal, user}; | 615 | 51.3k | } |
|
616 | | |
617 | | public: |
618 | 89.7k | std::pair<unsigned int, void *> consumePostPadded(char *data, unsigned int length, void *user, void *reserved, MoveOnlyFunction<void *(void *, HttpRequest *)> &&requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &&dataHandler) { |
619 | | |
620 | | /* This resets BloomFilter by construction, but later we also reset it again. |
621 | | * Optimize this to skip resetting twice (req could be made global) */ |
622 | 89.7k | HttpRequest req; |
623 | | |
624 | 89.7k | if (remainingStreamingBytes) { |
625 | | |
626 | | /* It's either chunked or with a content-length */ |
627 | 26.0k | if (isParsingChunkedEncoding(remainingStreamingBytes)) { |
628 | 25.1k | std::string_view dataToConsume(data, length); |
629 | 25.1k | for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) { |
630 | 24.7k | dataHandler(user, chunk, chunk.length() == 0); |
631 | 24.7k | } |
632 | 25.1k | if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) { |
633 | 343 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; |
634 | 343 | } |
635 | 24.8k | data = (char *) dataToConsume.data(); |
636 | 24.8k | length = (unsigned int) dataToConsume.length(); |
637 | 24.8k | } else { |
638 | | // this is exactly the same as below! |
639 | | // todo: refactor this |
640 | 891 | if (remainingStreamingBytes >= length) { |
641 | 504 | void *returnedUser = dataHandler(user, std::string_view(data, length), remainingStreamingBytes == length); |
642 | 504 | remainingStreamingBytes -= length; |
643 | 504 | return {0, returnedUser}; |
644 | 504 | } else { |
645 | 387 | void *returnedUser = dataHandler(user, std::string_view(data, remainingStreamingBytes), true); |
646 | | |
647 | 387 | data += (unsigned int) remainingStreamingBytes; |
648 | 387 | length -= (unsigned int) remainingStreamingBytes; |
649 | | |
650 | 387 | remainingStreamingBytes = 0; |
651 | | |
652 | 387 | if (returnedUser != user) { |
653 | 0 | return {0, returnedUser}; |
654 | 0 | } |
655 | 387 | } |
656 | 891 | } |
657 | | |
658 | 63.6k | } else if (fallback.length()) { |
659 | 37.5k | unsigned int had = (unsigned int) fallback.length(); |
660 | | |
661 | 37.5k | size_t maxCopyDistance = std::min<size_t>(MAX_FALLBACK_SIZE - fallback.length(), (size_t) length); |
662 | | |
663 | | /* We don't want fallback to be short string optimized, since we want to move it */ |
664 | 37.5k | fallback.reserve(fallback.length() + maxCopyDistance + std::max<unsigned int>(MINIMUM_HTTP_POST_PADDING, sizeof(std::string))); |
665 | 37.5k | fallback.append(data, maxCopyDistance); |
666 | | |
667 | | // break here on break |
668 | 37.5k | std::pair<unsigned int, void *> consumed = fenceAndConsumePostPadded<true>(fallback.data(), (unsigned int) fallback.length(), user, reserved, &req, requestHandler, dataHandler); |
669 | 37.5k | if (consumed.second != user) { |
670 | 4.52k | return consumed; |
671 | 4.52k | } |
672 | | |
673 | 32.9k | if (consumed.first) { |
674 | | |
675 | | /* This logic assumes that we consumed everything in fallback buffer. |
676 | | * This is critically important, as we will get an integer overflow in case |
677 | | * of "had" being larger than what we consumed, and that we would drop data */ |
678 | 1.79k | fallback.clear(); |
679 | 1.79k | data += consumed.first - had; |
680 | 1.79k | length -= consumed.first - had; |
681 | | |
682 | 1.79k | if (remainingStreamingBytes) { |
683 | | /* It's either chunked or with a content-length */ |
684 | 1.09k | if (isParsingChunkedEncoding(remainingStreamingBytes)) { |
685 | 703 | std::string_view dataToConsume(data, length); |
686 | 703 | for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) { |
687 | 693 | dataHandler(user, chunk, chunk.length() == 0); |
688 | 693 | } |
689 | 703 | if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) { |
690 | 28 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; |
691 | 28 | } |
692 | 675 | data = (char *) dataToConsume.data(); |
693 | 675 | length = (unsigned int) dataToConsume.length(); |
694 | 675 | } else { |
695 | | // this is exactly the same as above! |
696 | 388 | if (remainingStreamingBytes >= (unsigned int) length) { |
697 | 177 | void *returnedUser = dataHandler(user, std::string_view(data, length), remainingStreamingBytes == (unsigned int) length); |
698 | 177 | remainingStreamingBytes -= length; |
699 | 177 | return {0, returnedUser}; |
700 | 211 | } else { |
701 | 211 | void *returnedUser = dataHandler(user, std::string_view(data, remainingStreamingBytes), true); |
702 | | |
703 | 211 | data += (unsigned int) remainingStreamingBytes; |
704 | 211 | length -= (unsigned int) remainingStreamingBytes; |
705 | | |
706 | 211 | remainingStreamingBytes = 0; |
707 | | |
708 | 211 | if (returnedUser != user) { |
709 | 0 | return {0, returnedUser}; |
710 | 0 | } |
711 | 211 | } |
712 | 388 | } |
713 | 1.09k | } |
714 | | |
715 | 31.2k | } else { |
716 | 31.2k | if (fallback.length() == MAX_FALLBACK_SIZE) { |
717 | 29.0k | return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR}; |
718 | 29.0k | } |
719 | 2.13k | return {0, user}; |
720 | 31.2k | } |
721 | 32.9k | } |
722 | | |
723 | 52.9k | std::pair<unsigned int, void *> consumed = fenceAndConsumePostPadded<false>(data, length, user, reserved, &req, requestHandler, dataHandler); |
724 | 52.9k | if (consumed.second != user) { |
725 | 24.1k | return consumed; |
726 | 24.1k | } |
727 | | |
728 | 28.7k | data += consumed.first; |
729 | 28.7k | length -= consumed.first; |
730 | | |
731 | 28.7k | if (length) { |
732 | 2.57k | if (length < MAX_FALLBACK_SIZE) { |
733 | 2.54k | fallback.append(data, length); |
734 | 2.54k | } else { |
735 | 37 | return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR}; |
736 | 37 | } |
737 | 2.57k | } |
738 | | |
739 | | // added for now |
740 | 28.7k | return {0, user}; |
741 | 28.7k | } |
742 | | }; |
743 | | |
744 | | } |
745 | | |
746 | | #endif // UWS_HTTPPARSER_H |