Coverage Report

Created: 2024-04-25 06:10

/src/uWebSockets/src/HttpParser.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Authored by Alex Hultman, 2018-2024.
3
 * Intellectual property of third-party.
4
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
9
 *     http://www.apache.org/licenses/LICENSE-2.0
10
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 */
17
18
#ifndef UWS_HTTPPARSER_H
19
#define UWS_HTTPPARSER_H
20
21
// todo: HttpParser is in need of a few clean-ups and refactorings
22
23
/* The HTTP parser is an independent module subject to unit testing / fuzz testing */
24
25
#include <string>
26
#include <cstring>
27
#include <algorithm>
28
#include <climits>
29
#include <string_view>
30
#include <map>
31
#include "MoveOnlyFunction.h"
32
#include "ChunkedEncoding.h"
33
34
#include "BloomFilter.h"
35
#include "ProxyParser.h"
36
#include "QueryParser.h"
37
#include "HttpErrors.h"
38
39
namespace uWS {
40
41
/* We require at least this much post padding */
42
static const unsigned int MINIMUM_HTTP_POST_PADDING = 32;
43
static void *FULLPTR = (void *)~(uintptr_t)0;
44
45
/* STL needs one of these */
46
template <typename T>
47
2
std::optional<T *> optional_ptr(T *ptr) {
48
2
    return ptr ? std::optional<T *>(ptr) : std::nullopt;
49
2
}
50
51
static const size_t MAX_FALLBACK_SIZE = (size_t) atoi(optional_ptr(getenv("UWS_HTTP_MAX_HEADERS_SIZE")).value_or((char *) "4096"));
52
#ifndef UWS_HTTP_MAX_HEADERS_COUNT
53
65.4k
#define UWS_HTTP_MAX_HEADERS_COUNT 100
54
#endif
55
56
struct HttpRequest {
57
58
    friend struct HttpParser;
59
60
private:
61
    struct Header {
62
        std::string_view key, value;
63
    } headers[UWS_HTTP_MAX_HEADERS_COUNT];
64
    bool ancientHttp;
65
    unsigned int querySeparator;
66
    bool didYield;
67
    BloomFilter bf;
68
    std::pair<int, std::string_view *> currentParameters;
69
    std::map<std::string, unsigned short, std::less<>> *currentParameterOffsets = nullptr;
70
71
public:
72
0
    bool isAncient() {
73
0
        return ancientHttp;
74
0
    }
75
76
0
    bool getYield() {
77
0
        return didYield;
78
0
    }
79
80
    /* Iteration over headers (key, value) */
81
    struct HeaderIterator {
82
        Header *ptr;
83
84
27.2k
        bool operator!=(const HeaderIterator &other) const {
85
            /* Comparison with end is a special case */
86
27.2k
            if (ptr != other.ptr) {
87
27.2k
                return other.ptr || ptr->key.length();
88
27.2k
            }
89
0
            return false;
90
27.2k
        }
91
92
17.1k
        HeaderIterator &operator++() {
93
17.1k
            ptr++;
94
17.1k
            return *this;
95
17.1k
        }
96
97
17.1k
        std::pair<std::string_view, std::string_view> operator*() const {
98
17.1k
            return {ptr->key, ptr->value};
99
17.1k
        }
100
    };
101
102
10.0k
    HeaderIterator begin() {
103
10.0k
        return {headers + 1};
104
10.0k
    }
105
106
10.0k
    HeaderIterator end() {
107
10.0k
        return {nullptr};
108
10.0k
    }
109
110
    /* If you do not want to handle this route */
111
0
    void setYield(bool yield) {
112
0
        didYield = yield;
113
0
    }
114
115
45.9k
    std::string_view getHeader(std::string_view lowerCasedHeader) {
116
45.9k
        if (bf.mightHave(lowerCasedHeader)) {
117
27.2k
            for (Header *h = headers; (++h)->key.length(); ) {
118
26.2k
                if (h->key.length() == lowerCasedHeader.length() && !strncmp(h->key.data(), lowerCasedHeader.data(), lowerCasedHeader.length())) {
119
15.9k
                    return h->value;
120
15.9k
                }
121
26.2k
            }
122
16.9k
        }
123
30.0k
        return std::string_view(nullptr, 0);
124
45.9k
    }
125
126
21.6k
    std::string_view getUrl() {
127
21.6k
        return std::string_view(headers->value.data(), querySeparator);
128
21.6k
    }
129
130
0
    std::string_view getFullUrl() {
131
0
        return std::string_view(headers->value.data(), headers->value.length());
132
0
    }
133
134
    /* Hack: this should be getMethod */
135
0
    std::string_view getCaseSensitiveMethod() {
136
0
        return std::string_view(headers->key.data(), headers->key.length());
137
0
    }
138
139
21.6k
    std::string_view getMethod() {
140
        /* Compatibility hack: lower case method (todo: remove when major version bumps) */
141
104k
        for (unsigned int i = 0; i < headers->key.length(); i++) {
142
82.4k
            ((char *) headers->key.data())[i] |= 32;
143
82.4k
        }
144
145
21.6k
        return std::string_view(headers->key.data(), headers->key.length());
146
21.6k
    }
147
148
    /* Returns the raw querystring as a whole, still encoded */
149
10.8k
    std::string_view getQuery() {
150
10.8k
        if (querySeparator < headers->value.length()) {
151
            /* Strip the initial ? */
152
2.75k
            return std::string_view(headers->value.data() + querySeparator + 1, headers->value.length() - querySeparator - 1);
153
8.06k
        } else {
154
8.06k
            return std::string_view(nullptr, 0);
155
8.06k
        }
156
10.8k
    }
157
158
    /* Finds and decodes the URI component. */
159
21.6k
    std::string_view getQuery(std::string_view key) {
160
        /* Raw querystring including initial '?' sign */
161
21.6k
        std::string_view queryString = std::string_view(headers->value.data() + querySeparator, headers->value.length() - querySeparator);
162
163
21.6k
        return getDecodedQueryValue(key, queryString);
164
21.6k
    }
165
166
0
    void setParameters(std::pair<int, std::string_view *> parameters) {
167
0
        currentParameters = parameters;
168
0
    }
169
170
0
    void setParameterOffsets(std::map<std::string, unsigned short, std::less<>> *offsets) {
171
0
        currentParameterOffsets = offsets;
172
0
    }
173
174
0
    std::string_view getParameter(std::string_view name) {
175
0
        if (!currentParameterOffsets) {
176
0
            return {nullptr, 0};
177
0
        }
178
0
        auto it = currentParameterOffsets->find(name);
179
0
        if (it == currentParameterOffsets->end()) {
180
0
            return {nullptr, 0};
181
0
        }
182
0
        return getParameter(it->second);
183
0
    }
184
185
0
    std::string_view getParameter(unsigned short index) {
186
0
        if (currentParameters.first < (int) index) {
187
0
            return {};
188
0
        } else {
189
0
            return currentParameters.second[index];
190
0
        }
191
0
    }
192
193
};
194
195
struct HttpParser {
196
197
private:
198
    std::string fallback;
199
    /* This guy really has only 30 bits since we reserve two highest bits to chunked encoding parsing state */
200
    uint64_t remainingStreamingBytes = 0;
201
202
    /* Returns UINT64_MAX on error. Maximum 999999999 is allowed. */
203
1.72k
    static uint64_t toUnsignedInteger(std::string_view str) {
204
        /* We assume at least 64-bit integer giving us safely 999999999999999999 (18 number of 9s) */
205
1.72k
        if (str.length() > 18) {
206
38
            return UINT64_MAX;
207
38
        }
208
209
1.68k
        uint64_t unsignedIntegerValue = 0;
210
6.43k
        for (char c : str) {
211
            /* As long as the letter is 0-9 we cannot overflow. */
212
6.43k
            if (c < '0' || c > '9') {
213
73
                return UINT64_MAX;
214
73
            }
215
6.35k
            unsignedIntegerValue = unsignedIntegerValue * 10ull + ((unsigned int) c - (unsigned int) '0');
216
6.35k
        }
217
1.60k
        return unsignedIntegerValue;
218
1.68k
    }
219
    
220
1.13M
    static inline uint64_t hasLess(uint64_t x, uint64_t n) {
221
1.13M
        return (((x)-~0ULL/255*(n))&~(x)&~0ULL/255*128);
222
1.13M
    }
223
224
0
    static inline uint64_t hasMore(uint64_t x, uint64_t n) {
225
0
        return (( ((x)+~0ULL/255*(127-(n))) |(x))&~0ULL/255*128);
226
0
    }
227
228
0
    static inline uint64_t hasBetween(uint64_t x, uint64_t m, uint64_t n) {
229
0
        return (( (~0ULL/255*(127+(n))-((x)&~0ULL/255*127)) &~(x)& (((x)&~0ULL/255*127)+~0ULL/255*(127-(m))) )&~0ULL/255*128);
230
0
    }
231
232
0
    static inline bool notFieldNameWord(uint64_t x) {
233
0
        return hasLess(x, '-') |
234
0
        hasBetween(x, '-', '0') |
235
0
        hasBetween(x, '9', 'A') |
236
0
        hasBetween(x, 'Z', 'a') |
237
0
        hasMore(x, 'z');
238
0
    }
239
240
    /* RFC 9110 5.6.2. Tokens */
241
    /* Hyphen is not checked here as it is very common */
242
    static inline bool isUnlikelyFieldNameByte(unsigned char c)
243
43.2k
    {
244
        /* Digits and 14 of the 15 non-alphanum characters (lacking hyphen) */
245
43.2k
        return ((c == '~') | (c == '|') | (c == '`') | (c == '_') | (c == '^') | (c == '.') | (c == '+')
246
43.2k
            | (c == '*') | (c == '!')) || ((c >= 48) & (c <= 57)) || ((c <= 39) & (c >= 35));
247
43.2k
    }
248
249
70.5k
    static inline bool isFieldNameByteFastLowercased(unsigned char &in) {
250
        /* Most common is lowercase alpha and hyphen */
251
70.5k
        if (((in >= 97) & (in <= 122)) | (in == '-')) [[likely]] {
252
11.9k
            return true;
253
        /* Second is upper case alpha */
254
58.6k
        } else if ((in >= 65) & (in <= 90)) [[unlikely]] {
255
15.4k
            in |= 32;
256
15.4k
            return true;
257
        /* These are rarely used but still valid */
258
43.2k
        } else if (isUnlikelyFieldNameByte(in)) [[unlikely]] {
259
37.2k
            return true;
260
37.2k
        }
261
5.97k
        return false;
262
70.5k
    }
263
    
264
65.2k
    static inline void *consumeFieldName(char *p) {
265
        /* Best case fast path (particularly useful with clang) */
266
82.3k
        while (true) {
267
93.8k
            while ((*p >= 65) & (*p <= 90)) [[likely]] {
268
11.4k
                *p |= 32;
269
11.4k
                p++;
270
11.4k
            }
271
315k
            while (((*p >= 97) & (*p <= 122))) [[likely]] {
272
232k
                p++;
273
232k
            }
274
82.3k
            if (*p == ':') {
275
59.2k
                return (void *)p;
276
59.2k
            }
277
23.0k
            if (*p == '-') {
278
9.71k
                p++;
279
13.3k
            } else if (!((*p >= 65) & (*p <= 90))) {
280
                /* Exit fast path parsing */
281
5.97k
                break;
282
5.97k
            }
283
23.0k
        }
284
285
        /* Generic */
286
70.5k
        while (isFieldNameByteFastLowercased(*(unsigned char *)p)) {
287
64.5k
            p++;
288
64.5k
        }
289
5.97k
        return (void *)p;
290
65.2k
    }
291
292
    /* Puts method as key, target as value and returns non-null (or nullptr on error). */
293
88.6k
    static inline char *consumeRequestLine(char *data, HttpRequest::Header &header) {
294
        /* Scan until single SP, assume next is / (origin request) */
295
88.6k
        char *start = data;
296
        /* This catches the post padded CR and fails */
297
294k
        while (data[0] > 32) data++;
298
88.6k
        if (data[0] == 32 && data[1] == '/') {
299
21.2k
            header.key = {start, (size_t) (data - start)};
300
21.2k
            data++;
301
            /* Scan for less than 33 (catches post padded CR and fails) */
302
21.2k
            start = data;
303
633k
            for (; true; data += 8) {
304
633k
                uint64_t word;
305
633k
                memcpy(&word, data, sizeof(uint64_t));
306
633k
                if (hasLess(word, 33)) {
307
88.3k
                    while (*(unsigned char *)data > 32) data++;
308
                    /* Now we stand on space */
309
21.2k
                    header.value = {start, (size_t) (data - start)};
310
                    /* Check that the following is http 1.1 */
311
21.2k
                    if (memcmp(" HTTP/1.1\r\n", data, 11) == 0) {
312
20.3k
                        return data + 11;
313
20.3k
                    }
314
902
                    return nullptr;
315
21.2k
                }
316
633k
            }
317
21.2k
        }
318
67.3k
        return nullptr;
319
88.6k
    }
320
321
    /* RFC 9110: 5.5 Field Values (TLDR; anything above 31 is allowed; htab (9) is also allowed)
322
     * Field values are usually constrained to the range of US-ASCII characters [...]
323
     * Field values containing CR, LF, or NUL characters are invalid and dangerous [...]
324
     * Field values containing other CTL characters are also invalid. */
325
61.4k
    static inline void *tryConsumeFieldValue(char *p) {
326
500k
        for (; true; p += 8) {
327
500k
            uint64_t word;
328
500k
            memcpy(&word, p, sizeof(uint64_t));
329
500k
            if (hasLess(word, 32)) {
330
115k
                while (*(unsigned char *)p > 31) p++;
331
61.4k
                return (void *)p;
332
61.4k
            }
333
500k
        }
334
61.4k
    }
335
336
    /* End is only used for the proxy parser. The HTTP parser recognizes "\ra" as invalid "\r\n" scan and breaks. */
337
90.9k
    static unsigned int getHeaders(char *postPaddedBuffer, char *end, struct HttpRequest::Header *headers, void *reserved, unsigned int &err) {
338
90.9k
        char *preliminaryKey, *preliminaryValue, *start = postPaddedBuffer;
339
340
90.9k
        #ifdef UWS_WITH_PROXY
341
            /* ProxyParser is passed as reserved parameter */
342
90.9k
            ProxyParser *pp = (ProxyParser *) reserved;
343
344
            /* Parse PROXY protocol */
345
90.9k
            auto [done, offset] = pp->parse({postPaddedBuffer, (size_t) (end - postPaddedBuffer)});
346
90.9k
            if (!done) {
347
                /* We do not reset the ProxyParser (on filure) since it is tied to this
348
                * connection, which is really only supposed to ever get one PROXY frame
349
                * anyways. We do however allow multiple PROXY frames to be sent (overwrites former). */
350
2.28k
                return 0;
351
88.6k
            } else {
352
                /* We have consumed this data so skip it */
353
88.6k
                postPaddedBuffer += offset;
354
88.6k
            }
355
        #else
356
            /* This one is unused */
357
            (void) reserved;
358
            (void) end;
359
        #endif
360
361
        /* It is critical for fallback buffering logic that we only return with success
362
         * if we managed to parse a complete HTTP request (minus data). Returning success
363
         * for PROXY means we can end up succeeding, yet leaving bytes in the fallback buffer
364
         * which is then removed, and our counters to flip due to overflow and we end up with a crash */
365
366
        /* The request line is different from the field names / field values */
367
88.6k
        if (!(postPaddedBuffer = consumeRequestLine(postPaddedBuffer, headers[0]))) {
368
            /* Error - invalid request line */
369
            /* Assuming it is 505 HTTP Version Not Supported */
370
68.2k
            err = HTTP_ERROR_505_HTTP_VERSION_NOT_SUPPORTED;
371
68.2k
            return 0;
372
68.2k
        }
373
20.3k
        headers++;
374
375
65.4k
        for (unsigned int i = 1; i < UWS_HTTP_MAX_HEADERS_COUNT - 1; i++) {
376
            /* Lower case and consume the field name */
377
65.2k
            preliminaryKey = postPaddedBuffer;
378
65.2k
            postPaddedBuffer = (char *) consumeFieldName(postPaddedBuffer);
379
65.2k
            headers->key = std::string_view(preliminaryKey, (size_t) (postPaddedBuffer - preliminaryKey));
380
381
            /* We should not accept whitespace between key and colon, so colon must foloow immediately */
382
65.2k
            if (postPaddedBuffer[0] != ':') {
383
                /* Error: invalid chars in field name */
384
4.45k
                return 0;
385
4.45k
            }
386
60.7k
            postPaddedBuffer++;
387
388
60.7k
            preliminaryValue = postPaddedBuffer;
389
            /* The goal of this call is to find next "\r\n", or any invalid field value chars, fast */
390
61.4k
            while (true) {
391
61.4k
                postPaddedBuffer = (char *) tryConsumeFieldValue(postPaddedBuffer);
392
                /* If this is not CR then we caught some stinky invalid char on the way */
393
61.4k
                if (postPaddedBuffer[0] != '\r') {
394
                    /* If TAB then keep searching */
395
1.86k
                    if (postPaddedBuffer[0] == '\t') {
396
673
                        postPaddedBuffer++;
397
673
                        continue;
398
673
                    }
399
                    /* Error - invalid chars in field value */
400
1.19k
                    return 0;
401
1.86k
                }
402
59.6k
                break;
403
61.4k
            }
404
            /* We fence end[0] with \r, followed by end[1] being something that is "not \n", to signify "not found".
405
                * This way we can have this one single check to see if we found \r\n WITHIN our allowed search space. */
406
59.6k
            if (postPaddedBuffer[1] == '\n') {
407
                /* Store this header, it is valid */
408
58.2k
                headers->value = std::string_view(preliminaryValue, (size_t) (postPaddedBuffer - preliminaryValue));
409
58.2k
                postPaddedBuffer += 2;
410
411
                /* Trim trailing whitespace (SP, HTAB) */
412
67.3k
                while (headers->value.length() && headers->value.back() < 33) {
413
9.16k
                    headers->value.remove_suffix(1);
414
9.16k
                }
415
416
                /* Trim initial whitespace (SP, HTAB) */
417
73.6k
                while (headers->value.length() && headers->value.front() < 33) {
418
15.4k
                    headers->value.remove_prefix(1);
419
15.4k
                }
420
                
421
58.2k
                headers++;
422
423
                /* We definitely have at least one header (or request line), so check if we are done */
424
58.2k
                if (*postPaddedBuffer == '\r') {
425
13.1k
                    if (postPaddedBuffer[1] == '\n') {
426
                        /* This cann take the very last header space */
427
12.6k
                        headers->key = std::string_view(nullptr, 0);
428
12.6k
                        return (unsigned int) ((postPaddedBuffer + 2) - start);
429
12.6k
                    } else {
430
                        /* \r\n\r plus non-\n letter is malformed request, or simply out of search space */
431
494
                        return 0;
432
494
                    }
433
13.1k
                }
434
58.2k
            } else {
435
                /* We are either out of search space or this is a malformed request */
436
1.38k
                return 0;
437
1.38k
            }
438
59.6k
        }
439
        /* We ran out of header space, too large request */
440
194
        return 0;
441
20.3k
    }
442
443
    /* This is the only caller of getHeaders and is thus the deepest part of the parser.
444
     * From here we return either [consumed, user] for "keep going",
445
      * or [consumed, nullptr] for "break; I am closed or upgraded to websocket"
446
      * or [whatever, fullptr] for "break and close me, I am a parser error!" */
447
    template <int CONSUME_MINIMALLY>
448
105k
    std::pair<unsigned int, void *> fenceAndConsumePostPadded(char *data, unsigned int length, void *user, void *reserved, HttpRequest *req, MoveOnlyFunction<void *(void *, HttpRequest *)> &requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &dataHandler) {
449
450
        /* How much data we CONSUMED (to throw away) */
451
105k
        unsigned int consumedTotal = 0;
452
105k
        unsigned int err = 0;
453
454
        /* Fence two bytes past end of our buffer (buffer has post padded margins).
455
         * This is to always catch scan for \r but not for \r\n. */
456
105k
        data[length] = '\r';
457
105k
        data[length + 1] = 'a'; /* Anything that is not \n, to trigger "invalid request" */
458
459
112k
        for (unsigned int consumed; length && (consumed = getHeaders(data, data + length, req->headers, reserved, err)); ) {
460
12.6k
            data += consumed;
461
12.6k
            length -= consumed;
462
12.6k
            consumedTotal += consumed;
463
464
            /* Even if we could parse it, check for length here as well */
465
12.6k
            if (consumed > MAX_FALLBACK_SIZE) {
466
21
                return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR};
467
21
            }
468
469
            /* Store HTTP version (ancient 1.0 or 1.1) */
470
12.6k
            req->ancientHttp = false;
471
472
            /* Add all headers to bloom filter */
473
12.6k
            req->bf.reset();
474
34.9k
            for (HttpRequest::Header *h = req->headers; (++h)->key.length(); ) {
475
22.3k
                req->bf.add(h->key);
476
22.3k
            }
477
            
478
            /* Break if no host header (but we can have empty string which is different from nullptr) */
479
12.6k
            if (!req->getHeader("host").data()) {
480
1.32k
                return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
481
1.32k
            }
482
483
            /* RFC 9112 6.3
484
            * If a message is received with both a Transfer-Encoding and a Content-Length header field,
485
            * the Transfer-Encoding overrides the Content-Length. Such a message might indicate an attempt
486
            * to perform request smuggling (Section 11.2) or response splitting (Section 11.1) and
487
            * ought to be handled as an error. */
488
11.2k
            std::string_view transferEncodingString = req->getHeader("transfer-encoding");
489
11.2k
            std::string_view contentLengthString = req->getHeader("content-length");
490
11.2k
            if (transferEncodingString.length() && contentLengthString.length()) {
491
                /* Returning fullptr is the same as calling the errorHandler */
492
                /* We could be smart and set an error in the context along with this, to indicate what 
493
                 * http error response we might want to return */
494
465
                return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
495
465
            }
496
497
            /* Parse query */
498
10.8k
            const char *querySeparatorPtr = (const char *) memchr(req->headers->value.data(), '?', req->headers->value.length());
499
10.8k
            req->querySeparator = (unsigned int) ((querySeparatorPtr ? querySeparatorPtr : req->headers->value.data() + req->headers->value.length()) - req->headers->value.data());
500
501
            /* If returned socket is not what we put in we need
502
             * to break here as we either have upgraded to
503
             * WebSockets or otherwise closed the socket. */
504
10.8k
            void *returnedUser = requestHandler(user, req);
505
10.8k
            if (returnedUser != user) {
506
                /* We are upgraded to WebSocket or otherwise broken */
507
743
                return {consumedTotal, returnedUser};
508
743
            }
509
510
            /* The rules at play here according to RFC 9112 for requests are essentially:
511
             * If both content-length and transfer-encoding then invalid message; must break.
512
             * If has transfer-encoding then must be chunked regardless of value.
513
             * If content-length then fixed length even if 0.
514
             * If none of the above then fixed length is 0. */
515
516
            /* RFC 9112 6.3
517
             * If a message is received with both a Transfer-Encoding and a Content-Length header field,
518
             * the Transfer-Encoding overrides the Content-Length. */
519
10.0k
            if (transferEncodingString.length()) {
520
521
                /* If a proxy sent us the transfer-encoding header that 100% means it must be chunked or else the proxy is
522
                 * not RFC 9112 compliant. Therefore it is always better to assume this is the case, since that entirely eliminates 
523
                 * all forms of transfer-encoding obfuscation tricks. We just rely on the header. */
524
525
                /* RFC 9112 6.3
526
                 * If a Transfer-Encoding header field is present in a request and the chunked transfer coding is not the
527
                 * final encoding, the message body length cannot be determined reliably; the server MUST respond with the
528
                 * 400 (Bad Request) status code and then close the connection. */
529
530
                /* In this case we fail later by having the wrong interpretation (assuming chunked).
531
                 * This could be made stricter but makes no difference either way, unless forwarding the identical message as a proxy. */
532
533
1.98k
                remainingStreamingBytes = STATE_IS_CHUNKED;
534
                /* If consume minimally, we do not want to consume anything but we want to mark this as being chunked */
535
1.98k
                if (!CONSUME_MINIMALLY) {
536
                    /* Go ahead and parse it (todo: better heuristics for emitting FIN to the app level) */
537
1.45k
                    std::string_view dataToConsume(data, length);
538
1.93k
                    for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) {
539
1.93k
                        dataHandler(user, chunk, chunk.length() == 0);
540
1.93k
                    }
541
1.45k
                    if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) {
542
27
                        return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
543
27
                    }
544
1.43k
                    unsigned int consumed = (length - (unsigned int) dataToConsume.length());
545
1.43k
                    data = (char *) dataToConsume.data();
546
1.43k
                    length = (unsigned int) dataToConsume.length();
547
1.43k
                    consumedTotal += consumed;
548
1.43k
                }
549
8.08k
            } else if (contentLengthString.length()) {
550
1.72k
                remainingStreamingBytes = toUnsignedInteger(contentLengthString);
551
1.72k
                if (remainingStreamingBytes == UINT64_MAX) {
552
                    /* Parser error */
553
111
                    return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
554
111
                }
555
556
1.60k
                if (!CONSUME_MINIMALLY) {
557
910
                    unsigned int emittable = (unsigned int) std::min<uint64_t>(remainingStreamingBytes, length);
558
910
                    dataHandler(user, std::string_view(data, emittable), emittable == remainingStreamingBytes);
559
910
                    remainingStreamingBytes -= emittable;
560
561
910
                    data += emittable;
562
910
                    length -= emittable;
563
910
                    consumedTotal += emittable;
564
910
                }
565
6.36k
            } else {
566
                /* If we came here without a body; emit an empty data chunk to signal no data */
567
6.36k
                dataHandler(user, {}, true);
568
6.36k
            }
569
570
            /* Consume minimally should break as easrly as possible */
571
9.93k
            if (CONSUME_MINIMALLY) {
572
2.13k
                break;
573
2.13k
            }
574
9.93k
        }
575
        /* Whenever we return FULLPTR, the interpretation of "consumed" should be the HttpError enum. */
576
102k
        if (err) {
577
68.2k
            return {err, FULLPTR};
578
68.2k
        }
579
34.1k
        return {consumedTotal, user};
580
102k
    }
std::__1::pair<unsigned int, void*> uWS::HttpParser::fenceAndConsumePostPadded<1>(char*, unsigned int, void*, void*, uWS::HttpRequest*, ofats::any_invocable<void* (void*, uWS::HttpRequest*)>&, ofats::any_invocable<void* (void*, std::__1::basic_string_view<char, std::__1::char_traits<char> >, bool)>&)
Line
Count
Source
448
23.6k
    std::pair<unsigned int, void *> fenceAndConsumePostPadded(char *data, unsigned int length, void *user, void *reserved, HttpRequest *req, MoveOnlyFunction<void *(void *, HttpRequest *)> &requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &dataHandler) {
449
450
        /* How much data we CONSUMED (to throw away) */
451
23.6k
        unsigned int consumedTotal = 0;
452
23.6k
        unsigned int err = 0;
453
454
        /* Fence two bytes past end of our buffer (buffer has post padded margins).
455
         * This is to always catch scan for \r but not for \r\n. */
456
23.6k
        data[length] = '\r';
457
23.6k
        data[length + 1] = 'a'; /* Anything that is not \n, to trigger "invalid request" */
458
459
23.6k
        for (unsigned int consumed; length && (consumed = getHeaders(data, data + length, req->headers, reserved, err)); ) {
460
3.50k
            data += consumed;
461
3.50k
            length -= consumed;
462
3.50k
            consumedTotal += consumed;
463
464
            /* Even if we could parse it, check for length here as well */
465
3.50k
            if (consumed > MAX_FALLBACK_SIZE) {
466
0
                return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR};
467
0
            }
468
469
            /* Store HTTP version (ancient 1.0 or 1.1) */
470
3.50k
            req->ancientHttp = false;
471
472
            /* Add all headers to bloom filter */
473
3.50k
            req->bf.reset();
474
12.1k
            for (HttpRequest::Header *h = req->headers; (++h)->key.length(); ) {
475
8.65k
                req->bf.add(h->key);
476
8.65k
            }
477
            
478
            /* Break if no host header (but we can have empty string which is different from nullptr) */
479
3.50k
            if (!req->getHeader("host").data()) {
480
1.02k
                return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
481
1.02k
            }
482
483
            /* RFC 9112 6.3
484
            * If a message is received with both a Transfer-Encoding and a Content-Length header field,
485
            * the Transfer-Encoding overrides the Content-Length. Such a message might indicate an attempt
486
            * to perform request smuggling (Section 11.2) or response splitting (Section 11.1) and
487
            * ought to be handled as an error. */
488
2.47k
            std::string_view transferEncodingString = req->getHeader("transfer-encoding");
489
2.47k
            std::string_view contentLengthString = req->getHeader("content-length");
490
2.47k
            if (transferEncodingString.length() && contentLengthString.length()) {
491
                /* Returning fullptr is the same as calling the errorHandler */
492
                /* We could be smart and set an error in the context along with this, to indicate what 
493
                 * http error response we might want to return */
494
265
                return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
495
265
            }
496
497
            /* Parse query */
498
2.21k
            const char *querySeparatorPtr = (const char *) memchr(req->headers->value.data(), '?', req->headers->value.length());
499
2.21k
            req->querySeparator = (unsigned int) ((querySeparatorPtr ? querySeparatorPtr : req->headers->value.data() + req->headers->value.length()) - req->headers->value.data());
500
501
            /* If returned socket is not what we put in we need
502
             * to break here as we either have upgraded to
503
             * WebSockets or otherwise closed the socket. */
504
2.21k
            void *returnedUser = requestHandler(user, req);
505
2.21k
            if (returnedUser != user) {
506
                /* We are upgraded to WebSocket or otherwise broken */
507
21
                return {consumedTotal, returnedUser};
508
21
            }
509
510
            /* The rules at play here according to RFC 9112 for requests are essentially:
511
             * If both content-length and transfer-encoding then invalid message; must break.
512
             * If has transfer-encoding then must be chunked regardless of value.
513
             * If content-length then fixed length even if 0.
514
             * If none of the above then fixed length is 0. */
515
516
            /* RFC 9112 6.3
517
             * If a message is received with both a Transfer-Encoding and a Content-Length header field,
518
             * the Transfer-Encoding overrides the Content-Length. */
519
2.19k
            if (transferEncodingString.length()) {
520
521
                /* If a proxy sent us the transfer-encoding header that 100% means it must be chunked or else the proxy is
522
                 * not RFC 9112 compliant. Therefore it is always better to assume this is the case, since that entirely eliminates 
523
                 * all forms of transfer-encoding obfuscation tricks. We just rely on the header. */
524
525
                /* RFC 9112 6.3
526
                 * If a Transfer-Encoding header field is present in a request and the chunked transfer coding is not the
527
                 * final encoding, the message body length cannot be determined reliably; the server MUST respond with the
528
                 * 400 (Bad Request) status code and then close the connection. */
529
530
                /* In this case we fail later by having the wrong interpretation (assuming chunked).
531
                 * This could be made stricter but makes no difference either way, unless forwarding the identical message as a proxy. */
532
533
531
                remainingStreamingBytes = STATE_IS_CHUNKED;
534
                /* If consume minimally, we do not want to consume anything but we want to mark this as being chunked */
535
531
                if (!CONSUME_MINIMALLY) {
536
                    /* Go ahead and parse it (todo: better heuristics for emitting FIN to the app level) */
537
0
                    std::string_view dataToConsume(data, length);
538
0
                    for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) {
539
0
                        dataHandler(user, chunk, chunk.length() == 0);
540
0
                    }
541
0
                    if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) {
542
0
                        return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
543
0
                    }
544
0
                    unsigned int consumed = (length - (unsigned int) dataToConsume.length());
545
0
                    data = (char *) dataToConsume.data();
546
0
                    length = (unsigned int) dataToConsume.length();
547
0
                    consumedTotal += consumed;
548
0
                }
549
1.66k
            } else if (contentLengthString.length()) {
550
753
                remainingStreamingBytes = toUnsignedInteger(contentLengthString);
551
753
                if (remainingStreamingBytes == UINT64_MAX) {
552
                    /* Parser error */
553
54
                    return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
554
54
                }
555
556
699
                if (!CONSUME_MINIMALLY) {
557
0
                    unsigned int emittable = (unsigned int) std::min<uint64_t>(remainingStreamingBytes, length);
558
0
                    dataHandler(user, std::string_view(data, emittable), emittable == remainingStreamingBytes);
559
0
                    remainingStreamingBytes -= emittable;
560
561
0
                    data += emittable;
562
0
                    length -= emittable;
563
0
                    consumedTotal += emittable;
564
0
                }
565
909
            } else {
566
                /* If we came here without a body; emit an empty data chunk to signal no data */
567
909
                dataHandler(user, {}, true);
568
909
            }
569
570
            /* Consume minimally should break as easrly as possible */
571
2.13k
            if (CONSUME_MINIMALLY) {
572
2.13k
                break;
573
2.13k
            }
574
2.13k
        }
575
        /* Whenever we return FULLPTR, the interpretation of "consumed" should be the HttpError enum. */
576
22.3k
        if (err) {
577
13.1k
            return {err, FULLPTR};
578
13.1k
        }
579
9.19k
        return {consumedTotal, user};
580
22.3k
    }
std::__1::pair<unsigned int, void*> uWS::HttpParser::fenceAndConsumePostPadded<0>(char*, unsigned int, void*, void*, uWS::HttpRequest*, ofats::any_invocable<void* (void*, uWS::HttpRequest*)>&, ofats::any_invocable<void* (void*, std::__1::basic_string_view<char, std::__1::char_traits<char> >, bool)>&)
Line
Count
Source
448
81.4k
    std::pair<unsigned int, void *> fenceAndConsumePostPadded(char *data, unsigned int length, void *user, void *reserved, HttpRequest *req, MoveOnlyFunction<void *(void *, HttpRequest *)> &requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &dataHandler) {
449
450
        /* How much data we CONSUMED (to throw away) */
451
81.4k
        unsigned int consumedTotal = 0;
452
81.4k
        unsigned int err = 0;
453
454
        /* Fence two bytes past end of our buffer (buffer has post padded margins).
455
         * This is to always catch scan for \r but not for \r\n. */
456
81.4k
        data[length] = '\r';
457
81.4k
        data[length + 1] = 'a'; /* Anything that is not \n, to trigger "invalid request" */
458
459
89.2k
        for (unsigned int consumed; length && (consumed = getHeaders(data, data + length, req->headers, reserved, err)); ) {
460
9.11k
            data += consumed;
461
9.11k
            length -= consumed;
462
9.11k
            consumedTotal += consumed;
463
464
            /* Even if we could parse it, check for length here as well */
465
9.11k
            if (consumed > MAX_FALLBACK_SIZE) {
466
21
                return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR};
467
21
            }
468
469
            /* Store HTTP version (ancient 1.0 or 1.1) */
470
9.09k
            req->ancientHttp = false;
471
472
            /* Add all headers to bloom filter */
473
9.09k
            req->bf.reset();
474
22.7k
            for (HttpRequest::Header *h = req->headers; (++h)->key.length(); ) {
475
13.6k
                req->bf.add(h->key);
476
13.6k
            }
477
            
478
            /* Break if no host header (but we can have empty string which is different from nullptr) */
479
9.09k
            if (!req->getHeader("host").data()) {
480
295
                return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
481
295
            }
482
483
            /* RFC 9112 6.3
484
            * If a message is received with both a Transfer-Encoding and a Content-Length header field,
485
            * the Transfer-Encoding overrides the Content-Length. Such a message might indicate an attempt
486
            * to perform request smuggling (Section 11.2) or response splitting (Section 11.1) and
487
            * ought to be handled as an error. */
488
8.80k
            std::string_view transferEncodingString = req->getHeader("transfer-encoding");
489
8.80k
            std::string_view contentLengthString = req->getHeader("content-length");
490
8.80k
            if (transferEncodingString.length() && contentLengthString.length()) {
491
                /* Returning fullptr is the same as calling the errorHandler */
492
                /* We could be smart and set an error in the context along with this, to indicate what 
493
                 * http error response we might want to return */
494
200
                return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
495
200
            }
496
497
            /* Parse query */
498
8.60k
            const char *querySeparatorPtr = (const char *) memchr(req->headers->value.data(), '?', req->headers->value.length());
499
8.60k
            req->querySeparator = (unsigned int) ((querySeparatorPtr ? querySeparatorPtr : req->headers->value.data() + req->headers->value.length()) - req->headers->value.data());
500
501
            /* If returned socket is not what we put in we need
502
             * to break here as we either have upgraded to
503
             * WebSockets or otherwise closed the socket. */
504
8.60k
            void *returnedUser = requestHandler(user, req);
505
8.60k
            if (returnedUser != user) {
506
                /* We are upgraded to WebSocket or otherwise broken */
507
722
                return {consumedTotal, returnedUser};
508
722
            }
509
510
            /* The rules at play here according to RFC 9112 for requests are essentially:
511
             * If both content-length and transfer-encoding then invalid message; must break.
512
             * If has transfer-encoding then must be chunked regardless of value.
513
             * If content-length then fixed length even if 0.
514
             * If none of the above then fixed length is 0. */
515
516
            /* RFC 9112 6.3
517
             * If a message is received with both a Transfer-Encoding and a Content-Length header field,
518
             * the Transfer-Encoding overrides the Content-Length. */
519
7.88k
            if (transferEncodingString.length()) {
520
521
                /* If a proxy sent us the transfer-encoding header that 100% means it must be chunked or else the proxy is
522
                 * not RFC 9112 compliant. Therefore it is always better to assume this is the case, since that entirely eliminates 
523
                 * all forms of transfer-encoding obfuscation tricks. We just rely on the header. */
524
525
                /* RFC 9112 6.3
526
                 * If a Transfer-Encoding header field is present in a request and the chunked transfer coding is not the
527
                 * final encoding, the message body length cannot be determined reliably; the server MUST respond with the
528
                 * 400 (Bad Request) status code and then close the connection. */
529
530
                /* In this case we fail later by having the wrong interpretation (assuming chunked).
531
                 * This could be made stricter but makes no difference either way, unless forwarding the identical message as a proxy. */
532
533
1.45k
                remainingStreamingBytes = STATE_IS_CHUNKED;
534
                /* If consume minimally, we do not want to consume anything but we want to mark this as being chunked */
535
1.45k
                if (!CONSUME_MINIMALLY) {
536
                    /* Go ahead and parse it (todo: better heuristics for emitting FIN to the app level) */
537
1.45k
                    std::string_view dataToConsume(data, length);
538
1.93k
                    for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) {
539
1.93k
                        dataHandler(user, chunk, chunk.length() == 0);
540
1.93k
                    }
541
1.45k
                    if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) {
542
27
                        return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
543
27
                    }
544
1.43k
                    unsigned int consumed = (length - (unsigned int) dataToConsume.length());
545
1.43k
                    data = (char *) dataToConsume.data();
546
1.43k
                    length = (unsigned int) dataToConsume.length();
547
1.43k
                    consumedTotal += consumed;
548
1.43k
                }
549
6.42k
            } else if (contentLengthString.length()) {
550
967
                remainingStreamingBytes = toUnsignedInteger(contentLengthString);
551
967
                if (remainingStreamingBytes == UINT64_MAX) {
552
                    /* Parser error */
553
57
                    return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
554
57
                }
555
556
910
                if (!CONSUME_MINIMALLY) {
557
910
                    unsigned int emittable = (unsigned int) std::min<uint64_t>(remainingStreamingBytes, length);
558
910
                    dataHandler(user, std::string_view(data, emittable), emittable == remainingStreamingBytes);
559
910
                    remainingStreamingBytes -= emittable;
560
561
910
                    data += emittable;
562
910
                    length -= emittable;
563
910
                    consumedTotal += emittable;
564
910
                }
565
5.45k
            } else {
566
                /* If we came here without a body; emit an empty data chunk to signal no data */
567
5.45k
                dataHandler(user, {}, true);
568
5.45k
            }
569
570
            /* Consume minimally should break as easrly as possible */
571
7.79k
            if (CONSUME_MINIMALLY) {
572
0
                break;
573
0
            }
574
7.79k
        }
575
        /* Whenever we return FULLPTR, the interpretation of "consumed" should be the HttpError enum. */
576
80.0k
        if (err) {
577
55.1k
            return {err, FULLPTR};
578
55.1k
        }
579
24.9k
        return {consumedTotal, user};
580
80.0k
    }
581
582
public:
583
104k
    std::pair<unsigned int, void *> consumePostPadded(char *data, unsigned int length, void *user, void *reserved, MoveOnlyFunction<void *(void *, HttpRequest *)> &&requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &&dataHandler) {
584
585
        /* This resets BloomFilter by construction, but later we also reset it again.
586
         * Optimize this to skip resetting twice (req could be made global) */
587
104k
        HttpRequest req;
588
589
104k
        if (remainingStreamingBytes) {
590
591
            /* It's either chunked or with a content-length */
592
21.8k
            if (isParsingChunkedEncoding(remainingStreamingBytes)) {
593
20.9k
                std::string_view dataToConsume(data, length);
594
21.0k
                for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) {
595
21.0k
                    dataHandler(user, chunk, chunk.length() == 0);
596
21.0k
                }
597
20.9k
                if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) {
598
341
                    return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
599
341
                }
600
20.6k
                data = (char *) dataToConsume.data();
601
20.6k
                length = (unsigned int) dataToConsume.length();
602
20.6k
            } else {
603
                // this is exactly the same as below!
604
                // todo: refactor this
605
863
                if (remainingStreamingBytes >= length) {
606
438
                    void *returnedUser = dataHandler(user, std::string_view(data, length), remainingStreamingBytes == length);
607
438
                    remainingStreamingBytes -= length;
608
438
                    return {0, returnedUser};
609
438
                } else {
610
425
                    void *returnedUser = dataHandler(user, std::string_view(data, remainingStreamingBytes), true);
611
612
425
                    data += (unsigned int) remainingStreamingBytes;
613
425
                    length -= (unsigned int) remainingStreamingBytes;
614
615
425
                    remainingStreamingBytes = 0;
616
617
425
                    if (returnedUser != user) {
618
0
                        return {0, returnedUser};
619
0
                    }
620
425
                }
621
863
            }
622
623
82.2k
        } else if (fallback.length()) {
624
23.6k
            unsigned int had = (unsigned int) fallback.length();
625
626
23.6k
            size_t maxCopyDistance = std::min<size_t>(MAX_FALLBACK_SIZE - fallback.length(), (size_t) length);
627
628
            /* We don't want fallback to be short string optimized, since we want to move it */
629
23.6k
            fallback.reserve(fallback.length() + maxCopyDistance + std::max<unsigned int>(MINIMUM_HTTP_POST_PADDING, sizeof(std::string)));
630
23.6k
            fallback.append(data, maxCopyDistance);
631
632
            // break here on break
633
23.6k
            std::pair<unsigned int, void *> consumed = fenceAndConsumePostPadded<true>(fallback.data(), (unsigned int) fallback.length(), user, reserved, &req, requestHandler, dataHandler);
634
23.6k
            if (consumed.second != user) {
635
14.4k
                return consumed;
636
14.4k
            }
637
638
9.19k
            if (consumed.first) {
639
640
                /* This logic assumes that we consumed everything in fallback buffer.
641
                 * This is critically important, as we will get an integer overflow in case
642
                 * of "had" being larger than what we consumed, and that we would drop data */
643
2.13k
                fallback.clear();
644
2.13k
                data += consumed.first - had;
645
2.13k
                length -= consumed.first - had;
646
647
2.13k
                if (remainingStreamingBytes) {
648
                    /* It's either chunked or with a content-length */
649
1.20k
                    if (isParsingChunkedEncoding(remainingStreamingBytes)) {
650
531
                        std::string_view dataToConsume(data, length);
651
619
                        for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) {
652
619
                            dataHandler(user, chunk, chunk.length() == 0);
653
619
                        }
654
531
                        if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) {
655
35
                            return {HTTP_ERROR_400_BAD_REQUEST, FULLPTR};
656
35
                        }
657
496
                        data = (char *) dataToConsume.data();
658
496
                        length = (unsigned int) dataToConsume.length();
659
669
                    } else {
660
                        // this is exactly the same as above!
661
669
                        if (remainingStreamingBytes >= (unsigned int) length) {
662
299
                            void *returnedUser = dataHandler(user, std::string_view(data, length), remainingStreamingBytes == (unsigned int) length);
663
299
                            remainingStreamingBytes -= length;
664
299
                            return {0, returnedUser};
665
370
                        } else {
666
370
                            void *returnedUser = dataHandler(user, std::string_view(data, remainingStreamingBytes), true);
667
668
370
                            data += (unsigned int) remainingStreamingBytes;
669
370
                            length -= (unsigned int) remainingStreamingBytes;
670
671
370
                            remainingStreamingBytes = 0;
672
673
370
                            if (returnedUser != user) {
674
0
                                return {0, returnedUser};
675
0
                            }
676
370
                        }
677
669
                    }
678
1.20k
                }
679
680
7.06k
            } else {
681
7.06k
                if (fallback.length() == MAX_FALLBACK_SIZE) {
682
4.44k
                    return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR};
683
4.44k
                }
684
2.61k
                return {0, user};
685
7.06k
            }
686
9.19k
        }
687
688
81.4k
        std::pair<unsigned int, void *> consumed = fenceAndConsumePostPadded<false>(data, length, user, reserved, &req, requestHandler, dataHandler);
689
81.4k
        if (consumed.second != user) {
690
56.4k
            return consumed;
691
56.4k
        }
692
693
24.9k
        data += consumed.first;
694
24.9k
        length -= consumed.first;
695
696
24.9k
        if (length) {
697
2.94k
            if (length < MAX_FALLBACK_SIZE) {
698
2.87k
                fallback.append(data, length);
699
2.87k
            } else {
700
69
                return {HTTP_ERROR_431_REQUEST_HEADER_FIELDS_TOO_LARGE, FULLPTR};
701
69
            }
702
2.94k
        }
703
704
        // added for now
705
24.8k
        return {0, user};
706
24.9k
    }
707
};
708
709
}
710
711
#endif // UWS_HTTPPARSER_H