/src/uWebSockets/src/ChunkedEncoding.h

Source (jump to first uncovered line)
/*
 * Authored by Alex Hultman, 2018-2022.
 * Intellectual property of third-party.

 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at

 *     http://www.apache.org/licenses/LICENSE-2.0

 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef UWS_CHUNKEDENCODING_H
#define UWS_CHUNKEDENCODING_H

/* Independent chunked encoding parser, used by HttpParser. */

#include <string>
#include <cstring>
#include <algorithm>
#include <string_view>
#include "MoveOnlyFunction.h"
#include <optional>

namespace uWS {

    constexpr uint64_t STATE_HAS_SIZE = 1ull << (sizeof(uint64_t) * 8 - 1);//0x80000000;
    constexpr uint64_t STATE_IS_CHUNKED = 1ull << (sizeof(uint64_t) * 8 - 2);//0x40000000;
    constexpr uint64_t STATE_SIZE_MASK = ~(3ull << (sizeof(uint64_t) * 8 - 2));//0x3FFFFFFF;
    constexpr uint64_t STATE_IS_ERROR = ~0ull;//0xFFFFFFFF;
    constexpr uint64_t STATE_SIZE_OVERFLOW = 0x0Full << (sizeof(uint64_t) * 8 - 8);//0x0F000000;

    inline uint64_t chunkSize(uint64_t state) {
        return state & STATE_SIZE_MASK;
    }

    /* Reads hex number until CR or out of data to consume. Updates state. Returns bytes consumed. */
    inline void consumeHexNumber(std::string_view &data, uint64_t &state) {
        /* Consume everything higher than 32 */
        while (data.length() && data.data()[0] > 32) {

            unsigned char digit = (unsigned char)data.data()[0];
            if (digit >= 'a') {
                digit = (unsigned char) (digit - ('a' - ':'));
            } else if (digit >= 'A') {
                digit = (unsigned char) (digit - ('A' - ':'));
            }

            unsigned int number = ((unsigned int) digit - (unsigned int) '0');

            if (number > 16 || (chunkSize(state) & STATE_SIZE_OVERFLOW)) {
                state = STATE_IS_ERROR;
                return;
            }

            // extract state bits
            uint64_t bits = /*state &*/ STATE_IS_CHUNKED;

            state = (state & STATE_SIZE_MASK) * 16ull + number;

            state |= bits;
            data.remove_prefix(1);
        }
        /* Consume everything not /n */
        while (data.length() && data.data()[0] != '\n') {
            data.remove_prefix(1);
        }
        /* Now we stand on \n so consume it and enable size */
        if (data.length()) {
            state += 2; // include the two last /r/n
            state |= STATE_HAS_SIZE | STATE_IS_CHUNKED;
            data.remove_prefix(1);
        }
    }

    inline void decChunkSize(uint64_t &state, unsigned int by) {

        //unsigned int bits = state & STATE_IS_CHUNKED;

        state = (state & ~STATE_SIZE_MASK) | (chunkSize(state) - by);

        //state |= bits;
    }

    inline bool hasChunkSize(uint64_t state) {
        return state & STATE_HAS_SIZE;
    }

    /* Are we in the middle of parsing chunked encoding? */
    inline bool isParsingChunkedEncoding(uint64_t state) {
        return state & ~STATE_SIZE_MASK;
    }

    inline bool isParsingInvalidChunkedEncoding(uint64_t state) {
        return state == STATE_IS_ERROR;
    }

    /* Returns next chunk (empty or not), or if all data was consumed, nullopt is returned. */
    static std::optional<std::string_view> getNextChunk(std::string_view &data, uint64_t &state, bool trailer = false) {

        while (data.length()) {

            // if in "drop trailer mode", just drop up to what we have as size
            if (((state & STATE_IS_CHUNKED) == 0) && hasChunkSize(state) && chunkSize(state)) {

                //printf("Parsing trailer now\n");

                while(data.length() && chunkSize(state)) {
                    data.remove_prefix(1);
                    decChunkSize(state, 1);

                    if (chunkSize(state) == 0) {

                        /* This is an actual place where we need 0 as state */
                        state = 0;

                        /* The parser MUST stop consuming here */
                        return std::nullopt;
                    }
                }
                continue;
            }

            if (!hasChunkSize(state)) {
                consumeHexNumber(data, state);
                if (isParsingInvalidChunkedEncoding(state)) {
                    return std::nullopt;
                }
                if (hasChunkSize(state) && chunkSize(state) == 2) {

                    //printf("Setting state to trailer-parsing and emitting empty chunk\n");

                    // set trailer state and increase size to 4
                    if (trailer) {
                        state = 4 /*| STATE_IS_CHUNKED*/ | STATE_HAS_SIZE;
                    } else {
                        state = 2 /*| STATE_IS_CHUNKED*/ | STATE_HAS_SIZE;
                    }

                    return std::string_view(nullptr, 0);
                }
                continue;
            }

            // do we have data to emit all?
            if (data.length() >= chunkSize(state)) {
                // emit all but 2 bytes then reset state to 0 and goto beginning
                // not fin
                std::string_view emitSoon;
                bool shouldEmit = false;
                if (chunkSize(state) > 2) {
                    emitSoon = std::string_view(data.data(), chunkSize(state) - 2);
                    shouldEmit = true;
                }
                data.remove_prefix(chunkSize(state));
                state = STATE_IS_CHUNKED;
                if (shouldEmit) {
                    return emitSoon;
                }
                continue;
            } else {
                /* We will consume all our input data */
                std::string_view emitSoon;
                if (chunkSize(state) > 2) {
                    uint64_t maximalAppEmit = chunkSize(state) - 2;
                    if (data.length() > maximalAppEmit) {
                        emitSoon = data.substr(0, maximalAppEmit);
                    } else {
                        //cb(data);
                        emitSoon = data;
                    }
                }
                decChunkSize(state, (unsigned int) data.length());
                state |= STATE_IS_CHUNKED;
                // new: decrease data by its size (bug)
                data.remove_prefix(data.length()); // ny bug fix för getNextChunk
                if (emitSoon.length()) {
                    return emitSoon;
                } else {
                    return std::nullopt;
                }
            }
        }

        return std::nullopt;
    }

    /* This is really just a wrapper for convenience */
    struct ChunkIterator {

        std::string_view *data;
        std::optional<std::string_view> chunk;
        uint64_t *state;
        bool trailer;

        ChunkIterator(std::string_view *data, uint64_t *state, bool trailer = false) : data(data), state(state), trailer(trailer) {
            chunk = uWS::getNextChunk(*data, *state, trailer);
        }

        ChunkIterator() {

        }

        ChunkIterator begin() {
            return *this;
        }

        ChunkIterator end() {
            return ChunkIterator();
        }

        std::string_view operator*() {
            if (!chunk.has_value()) {
                std::abort();
            }
            return chunk.value();
        }

        bool operator!=(const ChunkIterator &other) const {
            return other.chunk.has_value() != chunk.has_value();
        }

        ChunkIterator &operator++() {
            chunk = uWS::getNextChunk(*data, *state, trailer);
            return *this;
        }

    };
}

#endif // UWS_CHUNKEDENCODING_H

Coverage Report

Created: 2025-06-13 06:09

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Authored by Alex Hultman, 2018-2022.
3		* Intellectual property of third-party.
4
5		* Licensed under the Apache License, Version 2.0 (the "License");
6		* you may not use this file except in compliance with the License.
7		* You may obtain a copy of the License at
8
9		* http://www.apache.org/licenses/LICENSE-2.0
10
11		* Unless required by applicable law or agreed to in writing, software
12		* distributed under the License is distributed on an "AS IS" BASIS,
13		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		* See the License for the specific language governing permissions and
15		* limitations under the License.
16		*/
17
18		#ifndef UWS_CHUNKEDENCODING_H
19		#define UWS_CHUNKEDENCODING_H
20
21		/* Independent chunked encoding parser, used by HttpParser. */
22
23		#include <string>
24		#include <cstring>
25		#include <algorithm>
26		#include <string_view>
27		#include "MoveOnlyFunction.h"
28		#include <optional>
29
30		namespace uWS {
31
32		constexpr uint64_t STATE_HAS_SIZE = 1ull << (sizeof(uint64_t) * 8 - 1);//0x80000000;
33		constexpr uint64_t STATE_IS_CHUNKED = 1ull << (sizeof(uint64_t) * 8 - 2);//0x40000000;
34		constexpr uint64_t STATE_SIZE_MASK = ~(3ull << (sizeof(uint64_t) * 8 - 2));//0x3FFFFFFF;
35		constexpr uint64_t STATE_IS_ERROR = ~0ull;//0xFFFFFFFF;
36		constexpr uint64_t STATE_SIZE_OVERFLOW = 0x0Full << (sizeof(uint64_t) * 8 - 8);//0x0F000000;
37
38	274k	inline uint64_t chunkSize(uint64_t state) {
39	274k	return state & STATE_SIZE_MASK;
40	274k	}
41
42		/* Reads hex number until CR or out of data to consume. Updates state. Returns bytes consumed. */
43	43.2k	inline void consumeHexNumber(std::string_view &data, uint64_t &state) {
44		/* Consume everything higher than 32 */
45	58.1k	while (data.length() && data.data()[0] > 32) {
46
47	16.3k	unsigned char digit = (unsigned char)data.data()[0];
48	16.3k	if (digit >= 'a') {
49	3.87k	digit = (unsigned char) (digit - ('a' - ':'));
50	12.4k	} else if (digit >= 'A') {
51	4.14k	digit = (unsigned char) (digit - ('A' - ':'));
52	4.14k	}
53
54	16.3k	unsigned int number = ((unsigned int) digit - (unsigned int) '0');
55
56	16.3k	if (number > 16 \|\| (chunkSize(state) & STATE_SIZE_OVERFLOW)) {
57	1.44k	state = STATE_IS_ERROR;
58	1.44k	return;
59	1.44k	}
60
61		// extract state bits
62	14.9k	uint64_t bits = /state &/ STATE_IS_CHUNKED;
63
64	14.9k	state = (state & STATE_SIZE_MASK) * 16ull + number;
65
66	14.9k	state \|= bits;
67	14.9k	data.remove_prefix(1);
68	14.9k	}
69		/* Consume everything not /n */
70	218k	while (data.length() && data.data()[0] != '\n') {
71	177k	data.remove_prefix(1);
72	177k	}
73		/* Now we stand on \n so consume it and enable size */
74	41.7k	if (data.length()) {
75	34.1k	state += 2; // include the two last /r/n
76	34.1k	state \|= STATE_HAS_SIZE \| STATE_IS_CHUNKED;
77	34.1k	data.remove_prefix(1);
78	34.1k	}
79	41.7k	}
80
81	61.1k	inline void decChunkSize(uint64_t &state, unsigned int by) {
82
83		//unsigned int bits = state & STATE_IS_CHUNKED;
84
85	61.1k	state = (state & ~STATE_SIZE_MASK) \| (chunkSize(state) - by);
86
87		//state \|= bits;
88	61.1k	}
89
90	120k	inline bool hasChunkSize(uint64_t state) {
91	120k	return state & STATE_HAS_SIZE;
92	120k	}
93
94		/* Are we in the middle of parsing chunked encoding? */
95	13.0k	inline bool isParsingChunkedEncoding(uint64_t state) {
96	13.0k	return state & ~STATE_SIZE_MASK;
97	13.0k	}
98
99	85.8k	inline bool isParsingInvalidChunkedEncoding(uint64_t state) {
100	85.8k	return state == STATE_IS_ERROR;
101	85.8k	}
102
103		/* Returns next chunk (empty or not), or if all data was consumed, nullopt is returned. */
104	77.2k	static std::optional<std::string_view> getNextChunk(std::string_view &data, uint64_t &state, bool trailer = false) {
105
106	91.9k	while (data.length()) {
107
108		// if in "drop trailer mode", just drop up to what we have as size
109	78.8k	if (((state & STATE_IS_CHUNKED) == 0) && hasChunkSize(state) && chunkSize(state)) {
110
111		//printf("Parsing trailer now\n");
112
113	60.0k	while(data.length() && chunkSize(state)) {
114	57.8k	data.remove_prefix(1);
115	57.8k	decChunkSize(state, 1);
116
117	57.8k	if (chunkSize(state) == 0) {
118
119		/* This is an actual place where we need 0 as state */
120	27.8k	state = 0;
121
122		/* The parser MUST stop consuming here */
123	27.8k	return std::nullopt;
124	27.8k	}
125	57.8k	}
126	2.20k	continue;
127	30.0k	}
128
129	48.7k	if (!hasChunkSize(state)) {
130	43.2k	consumeHexNumber(data, state);
131	43.2k	if (isParsingInvalidChunkedEncoding(state)) {
132	1.44k	return std::nullopt;
133	1.44k	}
134	41.7k	if (hasChunkSize(state) && chunkSize(state) == 2) {
135
136		//printf("Setting state to trailer-parsing and emitting empty chunk\n");
137
138		// set trailer state and increase size to 4
139	30.0k	if (trailer) {
140	0	state = 4 /\| STATE_IS_CHUNKED/ \| STATE_HAS_SIZE;
141	30.0k	} else {
142	30.0k	state = 2 /\| STATE_IS_CHUNKED/ \| STATE_HAS_SIZE;
143	30.0k	}
144
145	30.0k	return std::string_view(nullptr, 0);
146	30.0k	}
147	11.7k	continue;
148	41.7k	}
149
150		// do we have data to emit all?
151	5.57k	if (data.length() >= chunkSize(state)) {
152		// emit all but 2 bytes then reset state to 0 and goto beginning
153		// not fin
154	2.29k	std::string_view emitSoon;
155	2.29k	bool shouldEmit = false;
156	2.29k	if (chunkSize(state) > 2) {
157	1.50k	emitSoon = std::string_view(data.data(), chunkSize(state) - 2);
158	1.50k	shouldEmit = true;
159	1.50k	}
160	2.29k	data.remove_prefix(chunkSize(state));
161	2.29k	state = STATE_IS_CHUNKED;
162	2.29k	if (shouldEmit) {
163	1.50k	return emitSoon;
164	1.50k	}
165	788	continue;
166	3.28k	} else {
167		/* We will consume all our input data */
168	3.28k	std::string_view emitSoon;
169	3.28k	if (chunkSize(state) > 2) {
170	3.08k	uint64_t maximalAppEmit = chunkSize(state) - 2;
171	3.08k	if (data.length() > maximalAppEmit) {
172	897	emitSoon = data.substr(0, maximalAppEmit);
173	2.18k	} else {
174		//cb(data);
175	2.18k	emitSoon = data;
176	2.18k	}
177	3.08k	}
178	3.28k	decChunkSize(state, (unsigned int) data.length());
179	3.28k	state \|= STATE_IS_CHUNKED;
180		// new: decrease data by its size (bug)
181	3.28k	data.remove_prefix(data.length()); // ny bug fix för getNextChunk
182	3.28k	if (emitSoon.length()) {
183	3.08k	return emitSoon;
184	3.08k	} else {
185	194	return std::nullopt;
186	194	}
187	3.28k	}
188	5.57k	}
189
190	13.1k	return std::nullopt;
191	77.2k	}
192
193		/* This is really just a wrapper for convenience */
194		struct ChunkIterator {
195
196		std::string_view *data;
197		std::optional<std::string_view> chunk;
198		uint64_t *state;
199		bool trailer;
200
201	42.6k	ChunkIterator(std::string_view data, uint64_t state, bool trailer = false) : data(data), state(state), trailer(trailer) {
202	42.6k	chunk = uWS::getNextChunk(data, state, trailer);
203	42.6k	}
204
205	42.6k	ChunkIterator() {
206
207	42.6k	}
208
209	42.6k	ChunkIterator begin() {
210	42.6k	return *this;
211	42.6k	}
212
213	42.6k	ChunkIterator end() {
214	42.6k	return ChunkIterator();
215	42.6k	}
216
217	34.6k	std::string_view operator*() {
218	34.6k	if (!chunk.has_value()) {
219	0	std::abort();
220	0	}
221	34.6k	return chunk.value();
222	34.6k	}
223
224	77.2k	bool operator!=(const ChunkIterator &other) const {
225	77.2k	return other.chunk.has_value() != chunk.has_value();
226	77.2k	}
227
228	34.6k	ChunkIterator &operator++() {
229	34.6k	chunk = uWS::getNextChunk(data, state, trailer);
230	34.6k	return *this;
231	34.6k	}
232
233		};
234		}
235
236		#endif // UWS_CHUNKEDENCODING_H