/src/uWebSockets/src/HttpParser.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Authored by Alex Hultman, 2018-2024. |
3 | | * Intellectual property of third-party. |
4 | | |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at |
8 | | |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | */ |
17 | | |
18 | | #ifndef UWS_HTTPPARSER_H |
19 | | #define UWS_HTTPPARSER_H |
20 | | |
21 | | // todo: HttpParser is in need of a few clean-ups and refactorings |
22 | | |
23 | | /* The HTTP parser is an independent module subject to unit testing / fuzz testing */ |
24 | | |
25 | | #include <string> |
26 | | #include <cstring> |
27 | | #include <algorithm> |
28 | | #include <climits> |
29 | | #include <string_view> |
30 | | #include <map> |
31 | | #include "MoveOnlyFunction.h" |
32 | | #include "ChunkedEncoding.h" |
33 | | |
34 | | #include "BloomFilter.h" |
35 | | #include "ProxyParser.h" |
36 | | #include "QueryParser.h" |
37 | | #include "HttpErrors.h" |
38 | | |
39 | | namespace uWS { |
40 | | |
41 | | /* We require at least this much post padding */ |
42 | | static const unsigned int MINIMUM_HTTP_POST_PADDING = 32; |
43 | | static void *FULLPTR = (void *)~(uintptr_t)0; |
44 | | |
45 | | /* STL needs one of these */ |
46 | | template <typename T> |
47 | 2 | std::optional<T *> optional_ptr(T *ptr) { |
48 | 2 | return ptr ? std::optional<T *>(ptr) : std::nullopt; |
49 | 2 | } |
50 | | |
51 | | static const size_t MAX_FALLBACK_SIZE = (size_t) atoi(optional_ptr(getenv("UWS_HTTP_MAX_HEADERS_SIZE")).value_or((char *) "4096")); |
52 | | #ifndef UWS_HTTP_MAX_HEADERS_COUNT |
53 | 65.4k | #define UWS_HTTP_MAX_HEADERS_COUNT 100 |
54 | | #endif |
55 | | |
56 | | struct HttpRequest { |
57 | | |
58 | | friend struct HttpParser; |
59 | | |
60 | | private: |
61 | | struct Header { |
62 | | std::string_view key, value; |
63 | | } headers[UWS_HTTP_MAX_HEADERS_COUNT]; |
64 | | bool ancientHttp; |
65 | | unsigned int querySeparator; |
66 | | bool didYield; |
67 | | BloomFilter bf; |
68 | | std::pair<int, std::string_view *> currentParameters; |
69 | | std::map<std::string, unsigned short, std::less<>> *currentParameterOffsets = nullptr; |
70 | | |
71 | | public: |
72 | 0 | bool isAncient() { |
73 | 0 | return ancientHttp; |
74 | 0 | } |
75 | | |
76 | 0 | bool getYield() { |
77 | 0 | return didYield; |
78 | 0 | } |
79 | | |
80 | | /* Iteration over headers (key, value) */ |
81 | | struct HeaderIterator { |
82 | | Header *ptr; |
83 | | |
84 | 27.2k | bool operator!=(const HeaderIterator &other) const { |
85 | | /* Comparison with end is a special case */ |
86 | 27.2k | if (ptr != other.ptr) { |
87 | 27.2k | return other.ptr || ptr->key.length(); |
88 | 27.2k | } |
89 | 0 | return false; |
90 | 27.2k | } |
91 | | |
92 | 17.1k | HeaderIterator &operator++() { |
93 | 17.1k | ptr++; |
94 | 17.1k | return *this; |
95 | 17.1k | } |
96 | | |
97 | 17.1k | std::pair<std::string_view, std::string_view> operator*() const { |
98 | 17.1k | return {ptr->key, ptr->value}; |
99 | 17.1k | } |
100 | | }; |
101 | | |
102 | 10.0k | HeaderIterator begin() { |
103 | 10.0k | return {headers + 1}; |
104 | 10.0k | } |
105 | | |
106 | 10.0k | HeaderIterator end() { |
107 | 10.0k | return {nullptr}; |
108 | 10.0k | } |
109 | | |
110 | | /* If you do not want to handle this route */ |
111 | 0 | void setYield(bool yield) { |
112 | 0 | didYield = yield; |
113 | 0 | } |
114 | | |
115 | 45.9k | std::string_view getHeader(std::string_view lowerCasedHeader) { |
116 | 45.9k | if (bf.mightHave(lowerCasedHeader)) { |
117 | 27.2k | for (Header *h = headers; (++h)->key.length(); ) { |
118 | 26.2k | if (h->key.length() == lowerCasedHeader.length() && !strncmp(h->key.data(), lowerCasedHeader.data(), lowerCasedHeader.length())) { |
119 | 15.9k | return h->value; |
120 | 15.9k | } |
121 | 26.2k | } |
122 | 16.9k | } |
123 | 30.0k | return std::string_view(nullptr, 0); |
124 | 45.9k | } |
125 | | |
126 | 21.6k | std::string_view getUrl() { |
127 | 21.6k | return std::string_view(headers->value.data(), querySeparator); |
128 | 21.6k | } |
129 | | |
130 | 0 | std::string_view getFullUrl() { |
131 | 0 | return std::string_view(headers->value.data(), headers->value.length()); |
132 | 0 | } |
133 | | |
134 | | /* Hack: this should be getMethod */ |
135 | 0 | std::string_view getCaseSensitiveMethod() { |
136 | 0 | return std::string_view(headers->key.data(), headers->key.length()); |
137 | 0 | } |
138 | | |
139 | 21.6k | std::string_view getMethod() { |
140 | | /* Compatibility hack: lower case method (todo: remove when major version bumps) */ |
141 | 104k | for (unsigned int i = 0; i < headers->key.length(); i++) { |
142 | 82.4k | ((char *) headers->key.data())[i] |= 32; |
143 | 82.4k | } |
144 | | |
145 | 21.6k | return std::string_view(headers->key.data(), headers->key.length()); |
146 | 21.6k | } |
147 | | |
148 | | /* Returns the raw querystring as a whole, still encoded */ |
149 | 10.8k | std::string_view getQuery() { |
150 | 10.8k | if (querySeparator < headers->value.length()) { |
151 | | /* Strip the initial ? */ |
152 | 2.75k | return std::string_view(headers->value.data() + querySeparator + 1, headers->value.length() - querySeparator - 1); |
153 | 8.06k | } else { |
154 | 8.06k | return std::string_view(nullptr, 0); |
155 | 8.06k | } |
156 | 10.8k | } |
157 | | |
158 | | /* Finds and decodes the URI component. */ |
159 | 21.6k | std::string_view getQuery(std::string_view key) { |
160 | | /* Raw querystring including initial '?' sign */ |
161 | 21.6k | std::string_view queryString = std::string_view(headers->value.data() + querySeparator, headers->value.length() - querySeparator); |
162 | | |
163 | 21.6k | return getDecodedQueryValue(key, queryString); |
164 | 21.6k | } |
165 | | |
166 | 0 | void setParameters(std::pair<int, std::string_view *> parameters) { |
167 | 0 | currentParameters = parameters; |
168 | 0 | } |
169 | | |
170 | 0 | void setParameterOffsets(std::map<std::string, unsigned short, std::less<>> *offsets) { |
171 | 0 | currentParameterOffsets = offsets; |
172 | 0 | } |
173 | | |
174 | 0 | std::string_view getParameter(std::string_view name) { |
175 | 0 | if (!currentParameterOffsets) { |
176 | 0 | return {nullptr, 0}; |
177 | 0 | } |
178 | 0 | auto it = currentParameterOffsets->find(name); |
179 | 0 | if (it == currentParameterOffsets->end()) { |
180 | 0 | return {nullptr, 0}; |
181 | 0 | } |
182 | 0 | return getParameter(it->second); |
183 | 0 | } |
184 | | |
185 | 0 | std::string_view getParameter(unsigned short index) { |
186 | 0 | if (currentParameters.first < (int) index) { |
187 | 0 | return {}; |
188 | 0 | } else { |
189 | 0 | return currentParameters.second[index]; |
190 | 0 | } |
191 | 0 | } |
192 | | |
193 | | }; |
194 | | |
195 | | struct HttpParser { |
196 | | |
197 | | private: |
198 | | std::string fallback; |
199 | | /* This guy really has only 30 bits since we reserve two highest bits to chunked encoding parsing state */ |
200 | | uint64_t remainingStreamingBytes = 0; |
201 | | |
202 | | /* Returns UINT64_MAX on error. Maximum 999999999 is allowed. */ |
203 | 1.72k | static uint64_t toUnsignedInteger(std::string_view str) { |
204 | | /* We assume at least 64-bit integer giving us safely 999999999999999999 (18 number of 9s) */ |
205 | 1.72k | if (str.length() > 18) { |
206 | 38 | return UINT64_MAX; |
207 | 38 | } |
208 | | |
209 | 1.68k | uint64_t unsignedIntegerValue = 0; |
210 | 6.43k | for (char c : str) { |
211 | | /* As long as the letter is 0-9 we cannot overflow. */ |
212 | 6.43k | if (c < '0' || c > '9') { |
213 | 73 | return UINT64_MAX; |
214 | 73 | } |
215 | 6.35k | unsignedIntegerValue = unsignedIntegerValue * 10ull + ((unsigned int) c - (unsigned int) '0'); |
216 | 6.35k | } |
217 | 1.60k | return unsignedIntegerValue; |
218 | 1.68k | } |
219 | | |
220 | 1.13M | static inline uint64_t hasLess(uint64_t x, uint64_t n) { |
221 | 1.13M | return (((x)-~0ULL/255*(n))&~(x)&~0ULL/255*128); |
222 | 1.13M | } |
223 | | |
224 | 0 | static inline uint64_t hasMore(uint64_t x, uint64_t n) { |
225 | 0 | return (( ((x)+~0ULL/255*(127-(n))) |(x))&~0ULL/255*128); |
226 | 0 | } |
227 | | |
228 | 0 | static inline uint64_t hasBetween(uint64_t x, uint64_t m, uint64_t n) { |
229 | 0 | return (( (~0ULL/255*(127+(n))-((x)&~0ULL/255*127)) &~(x)& (((x)&~0ULL/255*127)+~0ULL/255*(127-(m))) )&~0ULL/255*128); |
230 | 0 | } |
231 | | |
232 | 0 | static inline bool notFieldNameWord(uint64_t x) { |
233 | 0 | return hasLess(x, '-') | |
234 | 0 | hasBetween(x, '-', '0') | |
235 | 0 | hasBetween(x, '9', 'A') | |
236 | 0 | hasBetween(x, 'Z', 'a') | |
237 | 0 | hasMore(x, 'z'); |
238 | 0 | } |
239 | | |
240 | | /* RFC 9110 5.6.2. Tokens */ |
241 | | /* Hyphen is not checked here as it is very common */ |
242 | | static inline bool isUnlikelyFieldNameByte(unsigned char c) |
243 | 43.2k | { |
244 | | /* Digits and 14 of the 15 non-alphanum characters (lacking hyphen) */ |
245 | 43.2k | return ((c == '~') | (c == '|') | (c == '`') | (c == '_') | (c == '^') | (c == '.') | (c == '+') |
246 | 43.2k | | (c == '*') | (c == '!')) || ((c >= 48) & (c <= 57)) || ((c <= 39) & (c >= 35)); |
247 | 43.2k | } |
248 | | |
249 | 70.5k | static inline bool isFieldNameByteFastLowercased(unsigned char &in) { |
250 | | /* Most common is lowercase alpha and hyphen */ |
251 | 70.5k | if (((in >= 97) & (in <= 122)) | (in == '-')) [[likely]] { |
252 | 11.9k | return true; |
253 | | /* Second is upper case alpha */ |
254 | 58.6k | } else if ((in >= 65) & (in <= 90)) [[unlikely]] { |
255 | 15.4k | in |= 32; |
256 | 15.4k | return true; |
257 | | /* These are rarely used but still valid */ |
258 | 43.2k | } else if (isUnlikelyFieldNameByte(in)) [[unlikely]] { |
259 | 37.2k | return true; |
260 | 37.2k | } |
261 | 5.97k | return false; |
262 | 70.5k | } |
263 | | |
264 | 65.2k | static inline void *consumeFieldName(char *p) { |
265 | | /* Best case fast path (particularly useful with clang) */ |
266 | 82.3k | while (true) { |
267 | 93.8k | while ((*p >= 65) & (*p <= 90)) [[likely]] { |
268 | 11.4k | *p |= 32; |
269 | 11.4k | p++; |
270 | 11.4k | } |
271 | 315k | while (((*p >= 97) & (*p <= 122))) [[likely]] { |
272 | 232k | p++; |
273 | 232k | } |
274 | 82.3k | if (*p == ':') { |
275 | 59.2k | return (void *)p; |
276 | 59.2k | } |
277 | 23.0k | if (*p == '-') { |
278 | 9.71k | p++; |
279 | 13.3k | } else if (!((*p >= 65) & (*p <= 90))) { |
280 | | /* Exit fast path parsing */ |
281 | 5.97k | break; |
282 | 5.97k | } |
283 | 23.0k | } |
284 | | |
285 | | /* Generic */ |
286 | 70.5k | while (isFieldNameByteFastLowercased(*(unsigned char *)p)) { |
287 | 64.5k | p++; |
288 | 64.5k | } |
289 | 5.97k | return (void *)p; |
290 | 65.2k | } |
291 | | |
292 | | /* Puts method as key, target as value and returns non-null (or nullptr on error). */ |
293 | 88.6k | static inline char *consumeRequestLine(char *data, HttpRequest::Header &header) { |
294 | | /* Scan until single SP, assume next is / (origin request) */ |
295 | 88.6k | char *start = data; |
296 | | /* This catches the post padded CR and fails */ |
297 | 294k | while (data[0] > 32) data++; |
298 | 88.6k | if (data[0] == 32 && data[1] == '/') { |
299 | 21.2k | header.key = {start, (size_t) (data - start)}; |
300 | 21.2k | data++; |
301 | | /* Scan for less than 33 (catches post padded CR and fails) */ |
302 | 21.2k | start = data; |
303 | 633k | for (; true; data += 8) { |
304 | 633k | uint64_t word; |
305 | 633k | memcpy(&word, data, sizeof(uint64_t)); |
306 | 633k | if (hasLess(word, 33)) { |
307 | 88.3k | while (*(unsigned char *)data > 32) data++; |
308 | | /* Now we stand on space */ |
309 | 21.2k | header.value = {start, (size_t) (data - start)}; |
310 | | /* Check that the following is http 1.1 */ |
311 | 21.2k | if (memcmp(" HTTP/1.1\r\n", data, 11) == 0) { |
312 | 20.3k | return data + 11; |
313 | 20.3k | } |
314 | 902 | return nullptr; |
315 | 21.2k | } |
316 | 633k | } |
317 | 21.2k | } |
318 | 67.3k | return nullptr; |
319 | 88.6k | } |
320 | | |
321 | | /* RFC 9110: 5.5 Field Values (TLDR; anything above 31 is allowed; htab (9) is also allowed) |
322 | | * Field values are usually constrained to the range of US-ASCII characters [...] |
323 | | * Field values containing CR, LF, or NUL characters are invalid and dangerous [...] |
324 | | * Field values containing other CTL characters are also invalid. */ |
325 | 61.4k | static inline void *tryConsumeFieldValue(char *p) { |
326 | 500k | for (; true; p += 8) { |
327 | 500k | uint64_t word; |
328 | 500k | memcpy(&word, p, sizeof(uint64_t)); |
329 | 500k | if (hasLess(word, 32)) { |
330 | 115k | while (*(unsigned char *)p > 31) p++; |
331 | 61.4k | return (void *)p; |
332 | 61.4k | } |
333 | 500k | } |
334 | 61.4k | } |
335 | | |
336 | | /* End is only used for the proxy parser. The HTTP parser recognizes "\ra" as invalid "\r\n" scan and breaks. */ |
337 | 90.9k | static unsigned int getHeaders(char *postPaddedBuffer, char *end, struct HttpRequest::Header *headers, void *reserved, unsigned int &err) { |
338 | 90.9k | char *preliminaryKey, *preliminaryValue, *start = postPaddedBuffer; |
339 | | |
340 | 90.9k | #ifdef UWS_WITH_PROXY |
341 | | /* ProxyParser is passed as reserved parameter */ |
342 | 90.9k | ProxyParser *pp = (ProxyParser *) reserved; |
343 | | |
344 | | /* Parse PROXY protocol */ |
345 | 90.9k | auto [done, offset] = pp->parse({postPaddedBuffer, (size_t) (end - postPaddedBuffer)}); |
346 | 90.9k | if (!done) { |
347 | | /* We do not reset the ProxyParser (on filure) since it is tied to this |
348 | | * connection, which is really only supposed to ever get one PROXY frame |
349 | | * anyways. We do however allow multiple PROXY frames to be sent (overwrites former). */ |
350 | 2.28k | return 0; |
351 | 88.6k | } else { |
352 | | /* We have consumed this data so skip it */ |
353 | 88.6k | postPaddedBuffer += offset; |
354 | 88.6k | } |
355 | | #else |
356 | | /* This one is unused */ |
357 | | (void) reserved; |
358 | | (void) end; |
359 | | #endif |
360 | | |
361 | | /* It is critical for fallback buffering logic that we only return with success |
362 | | * if we managed to parse a complete HTTP request (minus data). Returning success |
363 | | * for PROXY means we can end up succeeding, yet leaving bytes in the fallback buffer |
364 | | * which is then removed, and our counters to flip due to overflow and we end up with a crash */ |
365 | | |
366 | | /* The request line is different from the field names / field values */ |
367 | 88.6k | if (!(postPaddedBuffer = consumeRequestLine(postPaddedBuffer, headers[0]))) { |
368 | | /* Error - invalid request line */ |
369 | | /* Assuming it is 505 HTTP Version Not Supported */ |
370 | 68.2k | err = HTTP_ERROR_505_HTTP_VERSION_NOT_SUPPORTED; |
371 | 68.2k | return 0; |
372 | 68.2k | } |
373 | 20.3k | headers++; |
374 | | |
375 | 65.4k | for (unsigned int i = 1; i < UWS_HTTP_MAX_HEADERS_COUNT - 1; i++) { |
376 | | /* Lower case and consume the field name */ |
377 | 65.2k | preliminaryKey = postPaddedBuffer; |
378 | 65.2k | postPaddedBuffer = (char *) consumeFieldName(postPaddedBuffer); |
379 | 65.2k | headers->key = std::string_view(preliminaryKey, (size_t) (postPaddedBuffer - preliminaryKey)); |
380 | | |
381 | | /* We should not accept whitespace between key and colon, so colon must foloow immediately */ |
382 | 65.2k | if (postPaddedBuffer[0] != ':') { |
383 | | /* Error: invalid chars in field name */ |
384 | 4.45k | return 0; |
385 | 4.45k | } |
386 | 60.7k | postPaddedBuffer++; |
387 | | |
388 | 60.7k | preliminaryValue = postPaddedBuffer; |
389 | | /* The goal of this call is to find next "\r\n", or any invalid field value chars, fast */ |
390 | 61.4k | while (true) { |
391 | 61.4k | postPaddedBuffer = (char *) tryConsumeFieldValue(postPaddedBuffer); |
392 | | /* If this is not CR then we caught some stinky invalid char on the way */ |
393 | 61.4k | if (postPaddedBuffer[0] != '\r') { |
394 | | /* If TAB then keep searching */ |
395 | 1.86k | if (postPaddedBuffer[0] == '\t') { |
396 | 673 | postPaddedBuffer++; |
397 | 673 | continue; |
398 | 673 | } |
399 | | /* Error - invalid chars in field value */ |
400 | 1.19k | return 0; |
401 | 1.86k | } |
402 | 59.6k | break; |
403 | 61.4k | } |
404 | | /* We fence end[0] with \r, followed by end[1] being something that is "not \n", to signify "not found". |
405 | | * This way we can have this one single check to see if we found \r\n WITHIN our allowed search space. */ |
406 | 59.6k | if (postPaddedBuffer[1] == '\n') { |
407 | | /* Store this header, it is valid */ |
408 | 58.2k | headers->value = std::string_view(preliminaryValue, (size_t) (postPaddedBuffer - preliminaryValue)); |
409 | 58.2k | postPaddedBuffer += 2; |
410 | | |
411 | | /* Trim trailing whitespace (SP, HTAB) */ |
412 | 67.3k | while (headers->value.length() && headers->value.back() < 33) { |
413 | 9.16k | headers->value.remove_suffix(1); |
414 | 9.16k | } |
415 | | |
416 | | /* Trim initial whitespace (SP, HTAB) */ |
417 | 73.6k | while (headers->value.length() && headers->value.front() < 33) { |
418 | 15.4k | headers->value.remove_prefix(1); |
419 | 15.4k | } |
420 | | |
421 | 58.2k | headers++; |
422 | | |
423 | | /* We definitely have at least one header (or request line), so check if we are done */ |
424 | 58.2k | if (*postPaddedBuffer == '\r') { |
425 | 13.1k | if (postPaddedBuffer[1] == '\n') { |
426 | | /* This cann take the very last header space */ |
427 | 12.6k | headers->key = std::string_view(nullptr, 0); |
428 | 12.6k | return (unsigned int) ((postPaddedBuffer + 2) - start); |
429 | 12.6k | } else { |
430 | | /* \r\n\r plus non-\n letter is malformed request, or simply out of search space */ |
431 | 494 | return 0; |
432 | 494 | } |
433 | 13.1k | } |
434 | 58.2k | } else { |
435 | | /* We are either out of search space or this is a malformed request */ |
436 | 1.38k | return 0; |
437 | 1.38k | } |
438 | 59.6k | } |
439 | | /* We ran out of header space, too large request */ |
440 | 194 | return 0; |
441 | 20.3k | } |
442 | | |
443 | | /* This is the only caller of getHeaders and is thus the deepest part of the parser. |
444 | | * From here we return either [consumed, user] for "keep going", |
445 | | * or [consumed, nullptr] for "break; I am closed or upgraded to websocket" |
446 | | * or [whatever, fullptr] for "break and close me, I am a parser error!" */ |
447 | | template <int CONSUME_MINIMALLY> |
448 | 105k | std::pair<unsigned int, void *> fenceAndConsumePostPadded(char *data, unsigned int length, void *user, void *reserved, HttpRequest *req, MoveOnlyFunction<void *(void *, HttpRequest *)> &requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &dataHandler) { |
449 | | |
450 | | /* How much data we CONSUMED (to throw away) */ |
451 | 105k | unsigned int consumedTotal = 0; |
452 | 105k | unsigned int err = 0; |
453 | | |
454 | | /* Fence two bytes past end of our buffer (buffer has post padded margins). |
455 | | * This is to always catch scan for \r but not for \r\n. */ |
456 | 105k | data[length] = '\r'; |
457 | 105k | data[length + 1] = 'a'; /* Anything that is not \n, to trigger "invalid request" */ |
458 | | |
459 | 112k | for (unsigned int consumed; length && (consumed = getHeaders(data, data + length, req->headers, reserved, err)); ) { |
460 | 12.6k | data += consumed; |
461 | 12.6k | length -= consumed; |
462 | 12.6k | consumedTotal += consumed; |
463 | | |
464 | | /* Even if we could parse it, check for length here as well */ |
465 | 12.6k | if (consumed > MAX_FALLBACK_SIZE) { |
466 | 21 | return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR}; |
467 | 21 | } |
468 | | |
469 | | /* Store HTTP version (ancient 1.0 or 1.1) */ |
470 | 12.6k | req->ancientHttp = false; |
471 | | |
472 | | /* Add all headers to bloom filter */ |
473 | 12.6k | req->bf.reset(); |
474 | 34.9k | for (HttpRequest::Header *h = req->headers; (++h)->key.length(); ) { |
475 | 22.3k | req->bf.add(h->key); |
476 | 22.3k | } |
477 | | |
478 | | /* Break if no host header (but we can have empty string which is different from nullptr) */ |
479 | 12.6k | if (!req->getHeader("host").data()) { |
480 | 1.32k | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; |
481 | 1.32k | } |
482 | | |
483 | | /* RFC 9112 6.3 |
484 | | * If a message is received with both a Transfer-Encoding and a Content-Length header field, |
485 | | * the Transfer-Encoding overrides the Content-Length. Such a message might indicate an attempt |
486 | | * to perform request smuggling (Section 11.2) or response splitting (Section 11.1) and |
487 | | * ought to be handled as an error. */ |
488 | 11.2k | std::string_view transferEncodingString = req->getHeader("transfer-encoding"); |
489 | 11.2k | std::string_view contentLengthString = req->getHeader("content-length"); |
490 | 11.2k | if (transferEncodingString.length() && contentLengthString.length()) { |
491 | | /* Returning fullptr is the same as calling the errorHandler */ |
492 | | /* We could be smart and set an error in the context along with this, to indicate what |
493 | | * http error response we might want to return */ |
494 | 465 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; |
495 | 465 | } |
496 | | |
497 | | /* Parse query */ |
498 | 10.8k | const char *querySeparatorPtr = (const char *) memchr(req->headers->value.data(), '?', req->headers->value.length()); |
499 | 10.8k | req->querySeparator = (unsigned int) ((querySeparatorPtr ? querySeparatorPtr : req->headers->value.data() + req->headers->value.length()) - req->headers->value.data()); |
500 | | |
501 | | /* If returned socket is not what we put in we need |
502 | | * to break here as we either have upgraded to |
503 | | * WebSockets or otherwise closed the socket. */ |
504 | 10.8k | void *returnedUser = requestHandler(user, req); |
505 | 10.8k | if (returnedUser != user) { |
506 | | /* We are upgraded to WebSocket or otherwise broken */ |
507 | 743 | return {consumedTotal, returnedUser}; |
508 | 743 | } |
509 | | |
510 | | /* The rules at play here according to RFC 9112 for requests are essentially: |
511 | | * If both content-length and transfer-encoding then invalid message; must break. |
512 | | * If has transfer-encoding then must be chunked regardless of value. |
513 | | * If content-length then fixed length even if 0. |
514 | | * If none of the above then fixed length is 0. */ |
515 | | |
516 | | /* RFC 9112 6.3 |
517 | | * If a message is received with both a Transfer-Encoding and a Content-Length header field, |
518 | | * the Transfer-Encoding overrides the Content-Length. */ |
519 | 10.0k | if (transferEncodingString.length()) { |
520 | | |
521 | | /* If a proxy sent us the transfer-encoding header that 100% means it must be chunked or else the proxy is |
522 | | * not RFC 9112 compliant. Therefore it is always better to assume this is the case, since that entirely eliminates |
523 | | * all forms of transfer-encoding obfuscation tricks. We just rely on the header. */ |
524 | | |
525 | | /* RFC 9112 6.3 |
526 | | * If a Transfer-Encoding header field is present in a request and the chunked transfer coding is not the |
527 | | * final encoding, the message body length cannot be determined reliably; the server MUST respond with the |
528 | | * 400 (Bad Request) status code and then close the connection. */ |
529 | | |
530 | | /* In this case we fail later by having the wrong interpretation (assuming chunked). |
531 | | * This could be made stricter but makes no difference either way, unless forwarding the identical message as a proxy. */ |
532 | | |
533 | 1.98k | remainingStreamingBytes = STATE_IS_CHUNKED; |
534 | | /* If consume minimally, we do not want to consume anything but we want to mark this as being chunked */ |
535 | 1.98k | if (!CONSUME_MINIMALLY) { |
536 | | /* Go ahead and parse it (todo: better heuristics for emitting FIN to the app level) */ |
537 | 1.45k | std::string_view dataToConsume(data, length); |
538 | 1.93k | for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) { |
539 | 1.93k | dataHandler(user, chunk, chunk.length() == 0); |
540 | 1.93k | } |
541 | 1.45k | if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) { |
542 | 27 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; |
543 | 27 | } |
544 | 1.43k | unsigned int consumed = (length - (unsigned int) dataToConsume.length()); |
545 | 1.43k | data = (char *) dataToConsume.data(); |
546 | 1.43k | length = (unsigned int) dataToConsume.length(); |
547 | 1.43k | consumedTotal += consumed; |
548 | 1.43k | } |
549 | 8.08k | } else if (contentLengthString.length()) { |
550 | 1.72k | remainingStreamingBytes = toUnsignedInteger(contentLengthString); |
551 | 1.72k | if (remainingStreamingBytes == UINT64_MAX) { |
552 | | /* Parser error */ |
553 | 111 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; |
554 | 111 | } |
555 | | |
556 | 1.60k | if (!CONSUME_MINIMALLY) { |
557 | 910 | unsigned int emittable = (unsigned int) std::min<uint64_t>(remainingStreamingBytes, length); |
558 | 910 | dataHandler(user, std::string_view(data, emittable), emittable == remainingStreamingBytes); |
559 | 910 | remainingStreamingBytes -= emittable; |
560 | | |
561 | 910 | data += emittable; |
562 | 910 | length -= emittable; |
563 | 910 | consumedTotal += emittable; |
564 | 910 | } |
565 | 6.36k | } else { |
566 | | /* If we came here without a body; emit an empty data chunk to signal no data */ |
567 | 6.36k | dataHandler(user, {}, true); |
568 | 6.36k | } |
569 | | |
570 | | /* Consume minimally should break as easrly as possible */ |
571 | 9.93k | if (CONSUME_MINIMALLY) { |
572 | 2.13k | break; |
573 | 2.13k | } |
574 | 9.93k | } |
575 | | /* Whenever we return FULLPTR, the interpretation of "consumed" should be the HttpError enum. */ |
576 | 102k | if (err) { |
577 | 68.2k | return {err, FULLPTR}; |
578 | 68.2k | } |
579 | 34.1k | return {consumedTotal, user}; |
580 | 102k | } std::__1::pair<unsigned int, void*> uWS::HttpParser::fenceAndConsumePostPadded<1>(char*, unsigned int, void*, void*, uWS::HttpRequest*, ofats::any_invocable<void* (void*, uWS::HttpRequest*)>&, ofats::any_invocable<void* (void*, std::__1::basic_string_view<char, std::__1::char_traits<char> >, bool)>&) Line | Count | Source | 448 | 23.6k | std::pair<unsigned int, void *> fenceAndConsumePostPadded(char *data, unsigned int length, void *user, void *reserved, HttpRequest *req, MoveOnlyFunction<void *(void *, HttpRequest *)> &requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &dataHandler) { | 449 | | | 450 | | /* How much data we CONSUMED (to throw away) */ | 451 | 23.6k | unsigned int consumedTotal = 0; | 452 | 23.6k | unsigned int err = 0; | 453 | | | 454 | | /* Fence two bytes past end of our buffer (buffer has post padded margins). | 455 | | * This is to always catch scan for \r but not for \r\n. */ | 456 | 23.6k | data[length] = '\r'; | 457 | 23.6k | data[length + 1] = 'a'; /* Anything that is not \n, to trigger "invalid request" */ | 458 | | | 459 | 23.6k | for (unsigned int consumed; length && (consumed = getHeaders(data, data + length, req->headers, reserved, err)); ) { | 460 | 3.50k | data += consumed; | 461 | 3.50k | length -= consumed; | 462 | 3.50k | consumedTotal += consumed; | 463 | | | 464 | | /* Even if we could parse it, check for length here as well */ | 465 | 3.50k | if (consumed > MAX_FALLBACK_SIZE) { | 466 | 0 | return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR}; | 467 | 0 | } | 468 | | | 469 | | /* Store HTTP version (ancient 1.0 or 1.1) */ | 470 | 3.50k | req->ancientHttp = false; | 471 | | | 472 | | /* Add all headers to bloom filter */ | 473 | 3.50k | req->bf.reset(); | 474 | 12.1k | for (HttpRequest::Header *h = req->headers; (++h)->key.length(); ) { | 475 | 8.65k | req->bf.add(h->key); | 476 | 8.65k | } | 477 | | | 478 | | /* Break if no host header (but we can have empty string which is different from nullptr) */ | 479 | 3.50k | if (!req->getHeader("host").data()) { | 480 | 1.02k | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 481 | 1.02k | } | 482 | | | 483 | | /* RFC 9112 6.3 | 484 | | * If a message is received with both a Transfer-Encoding and a Content-Length header field, | 485 | | * the Transfer-Encoding overrides the Content-Length. Such a message might indicate an attempt | 486 | | * to perform request smuggling (Section 11.2) or response splitting (Section 11.1) and | 487 | | * ought to be handled as an error. */ | 488 | 2.47k | std::string_view transferEncodingString = req->getHeader("transfer-encoding"); | 489 | 2.47k | std::string_view contentLengthString = req->getHeader("content-length"); | 490 | 2.47k | if (transferEncodingString.length() && contentLengthString.length()) { | 491 | | /* Returning fullptr is the same as calling the errorHandler */ | 492 | | /* We could be smart and set an error in the context along with this, to indicate what | 493 | | * http error response we might want to return */ | 494 | 265 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 495 | 265 | } | 496 | | | 497 | | /* Parse query */ | 498 | 2.21k | const char *querySeparatorPtr = (const char *) memchr(req->headers->value.data(), '?', req->headers->value.length()); | 499 | 2.21k | req->querySeparator = (unsigned int) ((querySeparatorPtr ? querySeparatorPtr : req->headers->value.data() + req->headers->value.length()) - req->headers->value.data()); | 500 | | | 501 | | /* If returned socket is not what we put in we need | 502 | | * to break here as we either have upgraded to | 503 | | * WebSockets or otherwise closed the socket. */ | 504 | 2.21k | void *returnedUser = requestHandler(user, req); | 505 | 2.21k | if (returnedUser != user) { | 506 | | /* We are upgraded to WebSocket or otherwise broken */ | 507 | 21 | return {consumedTotal, returnedUser}; | 508 | 21 | } | 509 | | | 510 | | /* The rules at play here according to RFC 9112 for requests are essentially: | 511 | | * If both content-length and transfer-encoding then invalid message; must break. | 512 | | * If has transfer-encoding then must be chunked regardless of value. | 513 | | * If content-length then fixed length even if 0. | 514 | | * If none of the above then fixed length is 0. */ | 515 | | | 516 | | /* RFC 9112 6.3 | 517 | | * If a message is received with both a Transfer-Encoding and a Content-Length header field, | 518 | | * the Transfer-Encoding overrides the Content-Length. */ | 519 | 2.19k | if (transferEncodingString.length()) { | 520 | | | 521 | | /* If a proxy sent us the transfer-encoding header that 100% means it must be chunked or else the proxy is | 522 | | * not RFC 9112 compliant. Therefore it is always better to assume this is the case, since that entirely eliminates | 523 | | * all forms of transfer-encoding obfuscation tricks. We just rely on the header. */ | 524 | | | 525 | | /* RFC 9112 6.3 | 526 | | * If a Transfer-Encoding header field is present in a request and the chunked transfer coding is not the | 527 | | * final encoding, the message body length cannot be determined reliably; the server MUST respond with the | 528 | | * 400 (Bad Request) status code and then close the connection. */ | 529 | | | 530 | | /* In this case we fail later by having the wrong interpretation (assuming chunked). | 531 | | * This could be made stricter but makes no difference either way, unless forwarding the identical message as a proxy. */ | 532 | | | 533 | 531 | remainingStreamingBytes = STATE_IS_CHUNKED; | 534 | | /* If consume minimally, we do not want to consume anything but we want to mark this as being chunked */ | 535 | 531 | if (!CONSUME_MINIMALLY) { | 536 | | /* Go ahead and parse it (todo: better heuristics for emitting FIN to the app level) */ | 537 | 0 | std::string_view dataToConsume(data, length); | 538 | 0 | for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) { | 539 | 0 | dataHandler(user, chunk, chunk.length() == 0); | 540 | 0 | } | 541 | 0 | if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) { | 542 | 0 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 543 | 0 | } | 544 | 0 | unsigned int consumed = (length - (unsigned int) dataToConsume.length()); | 545 | 0 | data = (char *) dataToConsume.data(); | 546 | 0 | length = (unsigned int) dataToConsume.length(); | 547 | 0 | consumedTotal += consumed; | 548 | 0 | } | 549 | 1.66k | } else if (contentLengthString.length()) { | 550 | 753 | remainingStreamingBytes = toUnsignedInteger(contentLengthString); | 551 | 753 | if (remainingStreamingBytes == UINT64_MAX) { | 552 | | /* Parser error */ | 553 | 54 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 554 | 54 | } | 555 | | | 556 | 699 | if (!CONSUME_MINIMALLY) { | 557 | 0 | unsigned int emittable = (unsigned int) std::min<uint64_t>(remainingStreamingBytes, length); | 558 | 0 | dataHandler(user, std::string_view(data, emittable), emittable == remainingStreamingBytes); | 559 | 0 | remainingStreamingBytes -= emittable; | 560 | |
| 561 | 0 | data += emittable; | 562 | 0 | length -= emittable; | 563 | 0 | consumedTotal += emittable; | 564 | 0 | } | 565 | 909 | } else { | 566 | | /* If we came here without a body; emit an empty data chunk to signal no data */ | 567 | 909 | dataHandler(user, {}, true); | 568 | 909 | } | 569 | | | 570 | | /* Consume minimally should break as easrly as possible */ | 571 | 2.13k | if (CONSUME_MINIMALLY) { | 572 | 2.13k | break; | 573 | 2.13k | } | 574 | 2.13k | } | 575 | | /* Whenever we return FULLPTR, the interpretation of "consumed" should be the HttpError enum. */ | 576 | 22.3k | if (err) { | 577 | 13.1k | return {err, FULLPTR}; | 578 | 13.1k | } | 579 | 9.19k | return {consumedTotal, user}; | 580 | 22.3k | } |
std::__1::pair<unsigned int, void*> uWS::HttpParser::fenceAndConsumePostPadded<0>(char*, unsigned int, void*, void*, uWS::HttpRequest*, ofats::any_invocable<void* (void*, uWS::HttpRequest*)>&, ofats::any_invocable<void* (void*, std::__1::basic_string_view<char, std::__1::char_traits<char> >, bool)>&) Line | Count | Source | 448 | 81.4k | std::pair<unsigned int, void *> fenceAndConsumePostPadded(char *data, unsigned int length, void *user, void *reserved, HttpRequest *req, MoveOnlyFunction<void *(void *, HttpRequest *)> &requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &dataHandler) { | 449 | | | 450 | | /* How much data we CONSUMED (to throw away) */ | 451 | 81.4k | unsigned int consumedTotal = 0; | 452 | 81.4k | unsigned int err = 0; | 453 | | | 454 | | /* Fence two bytes past end of our buffer (buffer has post padded margins). | 455 | | * This is to always catch scan for \r but not for \r\n. */ | 456 | 81.4k | data[length] = '\r'; | 457 | 81.4k | data[length + 1] = 'a'; /* Anything that is not \n, to trigger "invalid request" */ | 458 | | | 459 | 89.2k | for (unsigned int consumed; length && (consumed = getHeaders(data, data + length, req->headers, reserved, err)); ) { | 460 | 9.11k | data += consumed; | 461 | 9.11k | length -= consumed; | 462 | 9.11k | consumedTotal += consumed; | 463 | | | 464 | | /* Even if we could parse it, check for length here as well */ | 465 | 9.11k | if (consumed > MAX_FALLBACK_SIZE) { | 466 | 21 | return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR}; | 467 | 21 | } | 468 | | | 469 | | /* Store HTTP version (ancient 1.0 or 1.1) */ | 470 | 9.09k | req->ancientHttp = false; | 471 | | | 472 | | /* Add all headers to bloom filter */ | 473 | 9.09k | req->bf.reset(); | 474 | 22.7k | for (HttpRequest::Header *h = req->headers; (++h)->key.length(); ) { | 475 | 13.6k | req->bf.add(h->key); | 476 | 13.6k | } | 477 | | | 478 | | /* Break if no host header (but we can have empty string which is different from nullptr) */ | 479 | 9.09k | if (!req->getHeader("host").data()) { | 480 | 295 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 481 | 295 | } | 482 | | | 483 | | /* RFC 9112 6.3 | 484 | | * If a message is received with both a Transfer-Encoding and a Content-Length header field, | 485 | | * the Transfer-Encoding overrides the Content-Length. Such a message might indicate an attempt | 486 | | * to perform request smuggling (Section 11.2) or response splitting (Section 11.1) and | 487 | | * ought to be handled as an error. */ | 488 | 8.80k | std::string_view transferEncodingString = req->getHeader("transfer-encoding"); | 489 | 8.80k | std::string_view contentLengthString = req->getHeader("content-length"); | 490 | 8.80k | if (transferEncodingString.length() && contentLengthString.length()) { | 491 | | /* Returning fullptr is the same as calling the errorHandler */ | 492 | | /* We could be smart and set an error in the context along with this, to indicate what | 493 | | * http error response we might want to return */ | 494 | 200 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 495 | 200 | } | 496 | | | 497 | | /* Parse query */ | 498 | 8.60k | const char *querySeparatorPtr = (const char *) memchr(req->headers->value.data(), '?', req->headers->value.length()); | 499 | 8.60k | req->querySeparator = (unsigned int) ((querySeparatorPtr ? querySeparatorPtr : req->headers->value.data() + req->headers->value.length()) - req->headers->value.data()); | 500 | | | 501 | | /* If returned socket is not what we put in we need | 502 | | * to break here as we either have upgraded to | 503 | | * WebSockets or otherwise closed the socket. */ | 504 | 8.60k | void *returnedUser = requestHandler(user, req); | 505 | 8.60k | if (returnedUser != user) { | 506 | | /* We are upgraded to WebSocket or otherwise broken */ | 507 | 722 | return {consumedTotal, returnedUser}; | 508 | 722 | } | 509 | | | 510 | | /* The rules at play here according to RFC 9112 for requests are essentially: | 511 | | * If both content-length and transfer-encoding then invalid message; must break. | 512 | | * If has transfer-encoding then must be chunked regardless of value. | 513 | | * If content-length then fixed length even if 0. | 514 | | * If none of the above then fixed length is 0. */ | 515 | | | 516 | | /* RFC 9112 6.3 | 517 | | * If a message is received with both a Transfer-Encoding and a Content-Length header field, | 518 | | * the Transfer-Encoding overrides the Content-Length. */ | 519 | 7.88k | if (transferEncodingString.length()) { | 520 | | | 521 | | /* If a proxy sent us the transfer-encoding header that 100% means it must be chunked or else the proxy is | 522 | | * not RFC 9112 compliant. Therefore it is always better to assume this is the case, since that entirely eliminates | 523 | | * all forms of transfer-encoding obfuscation tricks. We just rely on the header. */ | 524 | | | 525 | | /* RFC 9112 6.3 | 526 | | * If a Transfer-Encoding header field is present in a request and the chunked transfer coding is not the | 527 | | * final encoding, the message body length cannot be determined reliably; the server MUST respond with the | 528 | | * 400 (Bad Request) status code and then close the connection. */ | 529 | | | 530 | | /* In this case we fail later by having the wrong interpretation (assuming chunked). | 531 | | * This could be made stricter but makes no difference either way, unless forwarding the identical message as a proxy. */ | 532 | | | 533 | 1.45k | remainingStreamingBytes = STATE_IS_CHUNKED; | 534 | | /* If consume minimally, we do not want to consume anything but we want to mark this as being chunked */ | 535 | 1.45k | if (!CONSUME_MINIMALLY) { | 536 | | /* Go ahead and parse it (todo: better heuristics for emitting FIN to the app level) */ | 537 | 1.45k | std::string_view dataToConsume(data, length); | 538 | 1.93k | for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) { | 539 | 1.93k | dataHandler(user, chunk, chunk.length() == 0); | 540 | 1.93k | } | 541 | 1.45k | if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) { | 542 | 27 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 543 | 27 | } | 544 | 1.43k | unsigned int consumed = (length - (unsigned int) dataToConsume.length()); | 545 | 1.43k | data = (char *) dataToConsume.data(); | 546 | 1.43k | length = (unsigned int) dataToConsume.length(); | 547 | 1.43k | consumedTotal += consumed; | 548 | 1.43k | } | 549 | 6.42k | } else if (contentLengthString.length()) { | 550 | 967 | remainingStreamingBytes = toUnsignedInteger(contentLengthString); | 551 | 967 | if (remainingStreamingBytes == UINT64_MAX) { | 552 | | /* Parser error */ | 553 | 57 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; | 554 | 57 | } | 555 | | | 556 | 910 | if (!CONSUME_MINIMALLY) { | 557 | 910 | unsigned int emittable = (unsigned int) std::min<uint64_t>(remainingStreamingBytes, length); | 558 | 910 | dataHandler(user, std::string_view(data, emittable), emittable == remainingStreamingBytes); | 559 | 910 | remainingStreamingBytes -= emittable; | 560 | | | 561 | 910 | data += emittable; | 562 | 910 | length -= emittable; | 563 | 910 | consumedTotal += emittable; | 564 | 910 | } | 565 | 5.45k | } else { | 566 | | /* If we came here without a body; emit an empty data chunk to signal no data */ | 567 | 5.45k | dataHandler(user, {}, true); | 568 | 5.45k | } | 569 | | | 570 | | /* Consume minimally should break as easrly as possible */ | 571 | 7.79k | if (CONSUME_MINIMALLY) { | 572 | 0 | break; | 573 | 0 | } | 574 | 7.79k | } | 575 | | /* Whenever we return FULLPTR, the interpretation of "consumed" should be the HttpError enum. */ | 576 | 80.0k | if (err) { | 577 | 55.1k | return {err, FULLPTR}; | 578 | 55.1k | } | 579 | 24.9k | return {consumedTotal, user}; | 580 | 80.0k | } |
|
581 | | |
582 | | public: |
583 | 104k | std::pair<unsigned int, void *> consumePostPadded(char *data, unsigned int length, void *user, void *reserved, MoveOnlyFunction<void *(void *, HttpRequest *)> &&requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &&dataHandler) { |
584 | | |
585 | | /* This resets BloomFilter by construction, but later we also reset it again. |
586 | | * Optimize this to skip resetting twice (req could be made global) */ |
587 | 104k | HttpRequest req; |
588 | | |
589 | 104k | if (remainingStreamingBytes) { |
590 | | |
591 | | /* It's either chunked or with a content-length */ |
592 | 21.8k | if (isParsingChunkedEncoding(remainingStreamingBytes)) { |
593 | 20.9k | std::string_view dataToConsume(data, length); |
594 | 21.0k | for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) { |
595 | 21.0k | dataHandler(user, chunk, chunk.length() == 0); |
596 | 21.0k | } |
597 | 20.9k | if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) { |
598 | 341 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; |
599 | 341 | } |
600 | 20.6k | data = (char *) dataToConsume.data(); |
601 | 20.6k | length = (unsigned int) dataToConsume.length(); |
602 | 20.6k | } else { |
603 | | // this is exactly the same as below! |
604 | | // todo: refactor this |
605 | 863 | if (remainingStreamingBytes >= length) { |
606 | 438 | void *returnedUser = dataHandler(user, std::string_view(data, length), remainingStreamingBytes == length); |
607 | 438 | remainingStreamingBytes -= length; |
608 | 438 | return {0, returnedUser}; |
609 | 438 | } else { |
610 | 425 | void *returnedUser = dataHandler(user, std::string_view(data, remainingStreamingBytes), true); |
611 | | |
612 | 425 | data += (unsigned int) remainingStreamingBytes; |
613 | 425 | length -= (unsigned int) remainingStreamingBytes; |
614 | | |
615 | 425 | remainingStreamingBytes = 0; |
616 | | |
617 | 425 | if (returnedUser != user) { |
618 | 0 | return {0, returnedUser}; |
619 | 0 | } |
620 | 425 | } |
621 | 863 | } |
622 | | |
623 | 82.2k | } else if (fallback.length()) { |
624 | 23.6k | unsigned int had = (unsigned int) fallback.length(); |
625 | | |
626 | 23.6k | size_t maxCopyDistance = std::min<size_t>(MAX_FALLBACK_SIZE - fallback.length(), (size_t) length); |
627 | | |
628 | | /* We don't want fallback to be short string optimized, since we want to move it */ |
629 | 23.6k | fallback.reserve(fallback.length() + maxCopyDistance + std::max<unsigned int>(MINIMUM_HTTP_POST_PADDING, sizeof(std::string))); |
630 | 23.6k | fallback.append(data, maxCopyDistance); |
631 | | |
632 | | // break here on break |
633 | 23.6k | std::pair<unsigned int, void *> consumed = fenceAndConsumePostPadded<true>(fallback.data(), (unsigned int) fallback.length(), user, reserved, &req, requestHandler, dataHandler); |
634 | 23.6k | if (consumed.second != user) { |
635 | 14.4k | return consumed; |
636 | 14.4k | } |
637 | | |
638 | 9.19k | if (consumed.first) { |
639 | | |
640 | | /* This logic assumes that we consumed everything in fallback buffer. |
641 | | * This is critically important, as we will get an integer overflow in case |
642 | | * of "had" being larger than what we consumed, and that we would drop data */ |
643 | 2.13k | fallback.clear(); |
644 | 2.13k | data += consumed.first - had; |
645 | 2.13k | length -= consumed.first - had; |
646 | | |
647 | 2.13k | if (remainingStreamingBytes) { |
648 | | /* It's either chunked or with a content-length */ |
649 | 1.20k | if (isParsingChunkedEncoding(remainingStreamingBytes)) { |
650 | 531 | std::string_view dataToConsume(data, length); |
651 | 619 | for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) { |
652 | 619 | dataHandler(user, chunk, chunk.length() == 0); |
653 | 619 | } |
654 | 531 | if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) { |
655 | 35 | return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR}; |
656 | 35 | } |
657 | 496 | data = (char *) dataToConsume.data(); |
658 | 496 | length = (unsigned int) dataToConsume.length(); |
659 | 669 | } else { |
660 | | // this is exactly the same as above! |
661 | 669 | if (remainingStreamingBytes >= (unsigned int) length) { |
662 | 299 | void *returnedUser = dataHandler(user, std::string_view(data, length), remainingStreamingBytes == (unsigned int) length); |
663 | 299 | remainingStreamingBytes -= length; |
664 | 299 | return {0, returnedUser}; |
665 | 370 | } else { |
666 | 370 | void *returnedUser = dataHandler(user, std::string_view(data, remainingStreamingBytes), true); |
667 | | |
668 | 370 | data += (unsigned int) remainingStreamingBytes; |
669 | 370 | length -= (unsigned int) remainingStreamingBytes; |
670 | | |
671 | 370 | remainingStreamingBytes = 0; |
672 | | |
673 | 370 | if (returnedUser != user) { |
674 | 0 | return {0, returnedUser}; |
675 | 0 | } |
676 | 370 | } |
677 | 669 | } |
678 | 1.20k | } |
679 | | |
680 | 7.06k | } else { |
681 | 7.06k | if (fallback.length() == MAX_FALLBACK_SIZE) { |
682 | 4.44k | return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR}; |
683 | 4.44k | } |
684 | 2.61k | return {0, user}; |
685 | 7.06k | } |
686 | 9.19k | } |
687 | | |
688 | 81.4k | std::pair<unsigned int, void *> consumed = fenceAndConsumePostPadded<false>(data, length, user, reserved, &req, requestHandler, dataHandler); |
689 | 81.4k | if (consumed.second != user) { |
690 | 56.4k | return consumed; |
691 | 56.4k | } |
692 | | |
693 | 24.9k | data += consumed.first; |
694 | 24.9k | length -= consumed.first; |
695 | | |
696 | 24.9k | if (length) { |
697 | 2.94k | if (length < MAX_FALLBACK_SIZE) { |
698 | 2.87k | fallback.append(data, length); |
699 | 2.87k | } else { |
700 | 69 | return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR}; |
701 | 69 | } |
702 | 2.94k | } |
703 | | |
704 | | // added for now |
705 | 24.8k | return {0, user}; |
706 | 24.9k | } |
707 | | }; |
708 | | |
709 | | } |
710 | | |
711 | | #endif // UWS_HTTPPARSER_H |