/src/uWebSockets/src/ChunkedEncoding.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Authored by Alex Hultman, 2018-2022. |
3 | | * Intellectual property of third-party. |
4 | | |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at |
8 | | |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | */ |
17 | | |
18 | | #ifndef UWS_CHUNKEDENCODING_H |
19 | | #define UWS_CHUNKEDENCODING_H |
20 | | |
21 | | /* Independent chunked encoding parser, used by HttpParser. */ |
22 | | |
23 | | #include <string> |
24 | | #include <cstring> |
25 | | #include <algorithm> |
26 | | #include <string_view> |
27 | | #include "MoveOnlyFunction.h" |
28 | | #include <optional> |
29 | | |
30 | | namespace uWS { |
31 | | |
32 | | constexpr uint64_t STATE_HAS_SIZE = 1ull << (sizeof(uint64_t) * 8 - 1);//0x80000000; |
33 | | constexpr uint64_t STATE_IS_CHUNKED = 1ull << (sizeof(uint64_t) * 8 - 2);//0x40000000; |
34 | | constexpr uint64_t STATE_SIZE_MASK = ~(3ull << (sizeof(uint64_t) * 8 - 2));//0x3FFFFFFF; |
35 | | constexpr uint64_t STATE_IS_ERROR = ~0ull;//0xFFFFFFFF; |
36 | | constexpr uint64_t STATE_SIZE_OVERFLOW = 0x0Full << (sizeof(uint64_t) * 8 - 8);//0x0F000000; |
37 | | |
38 | 274k | inline uint64_t chunkSize(uint64_t state) { |
39 | 274k | return state & STATE_SIZE_MASK; |
40 | 274k | } |
41 | | |
42 | | /* Reads hex number until CR or out of data to consume. Updates state. Returns bytes consumed. */ |
43 | 43.2k | inline void consumeHexNumber(std::string_view &data, uint64_t &state) { |
44 | | /* Consume everything higher than 32 */ |
45 | 58.1k | while (data.length() && data.data()[0] > 32) { |
46 | | |
47 | 16.3k | unsigned char digit = (unsigned char)data.data()[0]; |
48 | 16.3k | if (digit >= 'a') { |
49 | 3.87k | digit = (unsigned char) (digit - ('a' - ':')); |
50 | 12.4k | } else if (digit >= 'A') { |
51 | 4.14k | digit = (unsigned char) (digit - ('A' - ':')); |
52 | 4.14k | } |
53 | | |
54 | 16.3k | unsigned int number = ((unsigned int) digit - (unsigned int) '0'); |
55 | | |
56 | 16.3k | if (number > 16 || (chunkSize(state) & STATE_SIZE_OVERFLOW)) { |
57 | 1.44k | state = STATE_IS_ERROR; |
58 | 1.44k | return; |
59 | 1.44k | } |
60 | | |
61 | | // extract state bits |
62 | 14.9k | uint64_t bits = /*state &*/ STATE_IS_CHUNKED; |
63 | | |
64 | 14.9k | state = (state & STATE_SIZE_MASK) * 16ull + number; |
65 | | |
66 | 14.9k | state |= bits; |
67 | 14.9k | data.remove_prefix(1); |
68 | 14.9k | } |
69 | | /* Consume everything not /n */ |
70 | 218k | while (data.length() && data.data()[0] != '\n') { |
71 | 177k | data.remove_prefix(1); |
72 | 177k | } |
73 | | /* Now we stand on \n so consume it and enable size */ |
74 | 41.7k | if (data.length()) { |
75 | 34.1k | state += 2; // include the two last /r/n |
76 | 34.1k | state |= STATE_HAS_SIZE | STATE_IS_CHUNKED; |
77 | 34.1k | data.remove_prefix(1); |
78 | 34.1k | } |
79 | 41.7k | } |
80 | | |
81 | 61.1k | inline void decChunkSize(uint64_t &state, unsigned int by) { |
82 | | |
83 | | //unsigned int bits = state & STATE_IS_CHUNKED; |
84 | | |
85 | 61.1k | state = (state & ~STATE_SIZE_MASK) | (chunkSize(state) - by); |
86 | | |
87 | | //state |= bits; |
88 | 61.1k | } |
89 | | |
90 | 120k | inline bool hasChunkSize(uint64_t state) { |
91 | 120k | return state & STATE_HAS_SIZE; |
92 | 120k | } |
93 | | |
94 | | /* Are we in the middle of parsing chunked encoding? */ |
95 | 13.0k | inline bool isParsingChunkedEncoding(uint64_t state) { |
96 | 13.0k | return state & ~STATE_SIZE_MASK; |
97 | 13.0k | } |
98 | | |
99 | 85.8k | inline bool isParsingInvalidChunkedEncoding(uint64_t state) { |
100 | 85.8k | return state == STATE_IS_ERROR; |
101 | 85.8k | } |
102 | | |
103 | | /* Returns next chunk (empty or not), or if all data was consumed, nullopt is returned. */ |
104 | 77.2k | static std::optional<std::string_view> getNextChunk(std::string_view &data, uint64_t &state, bool trailer = false) { |
105 | | |
106 | 91.9k | while (data.length()) { |
107 | | |
108 | | // if in "drop trailer mode", just drop up to what we have as size |
109 | 78.8k | if (((state & STATE_IS_CHUNKED) == 0) && hasChunkSize(state) && chunkSize(state)) { |
110 | | |
111 | | //printf("Parsing trailer now\n"); |
112 | | |
113 | 60.0k | while(data.length() && chunkSize(state)) { |
114 | 57.8k | data.remove_prefix(1); |
115 | 57.8k | decChunkSize(state, 1); |
116 | | |
117 | 57.8k | if (chunkSize(state) == 0) { |
118 | | |
119 | | /* This is an actual place where we need 0 as state */ |
120 | 27.8k | state = 0; |
121 | | |
122 | | /* The parser MUST stop consuming here */ |
123 | 27.8k | return std::nullopt; |
124 | 27.8k | } |
125 | 57.8k | } |
126 | 2.20k | continue; |
127 | 30.0k | } |
128 | | |
129 | 48.7k | if (!hasChunkSize(state)) { |
130 | 43.2k | consumeHexNumber(data, state); |
131 | 43.2k | if (isParsingInvalidChunkedEncoding(state)) { |
132 | 1.44k | return std::nullopt; |
133 | 1.44k | } |
134 | 41.7k | if (hasChunkSize(state) && chunkSize(state) == 2) { |
135 | | |
136 | | //printf("Setting state to trailer-parsing and emitting empty chunk\n"); |
137 | | |
138 | | // set trailer state and increase size to 4 |
139 | 30.0k | if (trailer) { |
140 | 0 | state = 4 /*| STATE_IS_CHUNKED*/ | STATE_HAS_SIZE; |
141 | 30.0k | } else { |
142 | 30.0k | state = 2 /*| STATE_IS_CHUNKED*/ | STATE_HAS_SIZE; |
143 | 30.0k | } |
144 | | |
145 | 30.0k | return std::string_view(nullptr, 0); |
146 | 30.0k | } |
147 | 11.7k | continue; |
148 | 41.7k | } |
149 | | |
150 | | // do we have data to emit all? |
151 | 5.57k | if (data.length() >= chunkSize(state)) { |
152 | | // emit all but 2 bytes then reset state to 0 and goto beginning |
153 | | // not fin |
154 | 2.29k | std::string_view emitSoon; |
155 | 2.29k | bool shouldEmit = false; |
156 | 2.29k | if (chunkSize(state) > 2) { |
157 | 1.50k | emitSoon = std::string_view(data.data(), chunkSize(state) - 2); |
158 | 1.50k | shouldEmit = true; |
159 | 1.50k | } |
160 | 2.29k | data.remove_prefix(chunkSize(state)); |
161 | 2.29k | state = STATE_IS_CHUNKED; |
162 | 2.29k | if (shouldEmit) { |
163 | 1.50k | return emitSoon; |
164 | 1.50k | } |
165 | 788 | continue; |
166 | 3.28k | } else { |
167 | | /* We will consume all our input data */ |
168 | 3.28k | std::string_view emitSoon; |
169 | 3.28k | if (chunkSize(state) > 2) { |
170 | 3.08k | uint64_t maximalAppEmit = chunkSize(state) - 2; |
171 | 3.08k | if (data.length() > maximalAppEmit) { |
172 | 897 | emitSoon = data.substr(0, maximalAppEmit); |
173 | 2.18k | } else { |
174 | | //cb(data); |
175 | 2.18k | emitSoon = data; |
176 | 2.18k | } |
177 | 3.08k | } |
178 | 3.28k | decChunkSize(state, (unsigned int) data.length()); |
179 | 3.28k | state |= STATE_IS_CHUNKED; |
180 | | // new: decrease data by its size (bug) |
181 | 3.28k | data.remove_prefix(data.length()); // ny bug fix för getNextChunk |
182 | 3.28k | if (emitSoon.length()) { |
183 | 3.08k | return emitSoon; |
184 | 3.08k | } else { |
185 | 194 | return std::nullopt; |
186 | 194 | } |
187 | 3.28k | } |
188 | 5.57k | } |
189 | | |
190 | 13.1k | return std::nullopt; |
191 | 77.2k | } |
192 | | |
193 | | /* This is really just a wrapper for convenience */ |
194 | | struct ChunkIterator { |
195 | | |
196 | | std::string_view *data; |
197 | | std::optional<std::string_view> chunk; |
198 | | uint64_t *state; |
199 | | bool trailer; |
200 | | |
201 | 42.6k | ChunkIterator(std::string_view *data, uint64_t *state, bool trailer = false) : data(data), state(state), trailer(trailer) { |
202 | 42.6k | chunk = uWS::getNextChunk(*data, *state, trailer); |
203 | 42.6k | } |
204 | | |
205 | 42.6k | ChunkIterator() { |
206 | | |
207 | 42.6k | } |
208 | | |
209 | 42.6k | ChunkIterator begin() { |
210 | 42.6k | return *this; |
211 | 42.6k | } |
212 | | |
213 | 42.6k | ChunkIterator end() { |
214 | 42.6k | return ChunkIterator(); |
215 | 42.6k | } |
216 | | |
217 | 34.6k | std::string_view operator*() { |
218 | 34.6k | if (!chunk.has_value()) { |
219 | 0 | std::abort(); |
220 | 0 | } |
221 | 34.6k | return chunk.value(); |
222 | 34.6k | } |
223 | | |
224 | 77.2k | bool operator!=(const ChunkIterator &other) const { |
225 | 77.2k | return other.chunk.has_value() != chunk.has_value(); |
226 | 77.2k | } |
227 | | |
228 | 34.6k | ChunkIterator &operator++() { |
229 | 34.6k | chunk = uWS::getNextChunk(*data, *state, trailer); |
230 | 34.6k | return *this; |
231 | 34.6k | } |
232 | | |
233 | | }; |
234 | | } |
235 | | |
236 | | #endif // UWS_CHUNKEDENCODING_H |