/src/uWebSockets/src/ChunkedEncoding.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Authored by Alex Hultman, 2018-2022. |
3 | | * Intellectual property of third-party. |
4 | | |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at |
8 | | |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | */ |
17 | | |
18 | | #ifndef UWS_CHUNKEDENCODING_H |
19 | | #define UWS_CHUNKEDENCODING_H |
20 | | |
21 | | /* Independent chunked encoding parser, used by HttpParser. */ |
22 | | |
23 | | #include <string> |
24 | | #include <cstring> |
25 | | #include <algorithm> |
26 | | #include <string_view> |
27 | | #include "MoveOnlyFunction.h" |
28 | | #include <optional> |
29 | | |
30 | | namespace uWS { |
31 | | |
32 | | constexpr uint32_t STATE_HAS_SIZE = 0x80000000; |
33 | | constexpr uint32_t STATE_IS_CHUNKED = 0x40000000; |
34 | | constexpr uint32_t STATE_SIZE_MASK = 0x3FFFFFFF; |
35 | | constexpr uint32_t STATE_IS_ERROR = 0xFFFFFFFF; |
36 | | constexpr uint32_t STATE_SIZE_OVERFLOW = 0x0F000000; |
37 | | |
38 | 0 | inline unsigned int chunkSize(unsigned int state) { |
39 | 0 | return state & STATE_SIZE_MASK; |
40 | 0 | } |
41 | | |
42 | | /* Reads hex number until CR or out of data to consume. Updates state. Returns bytes consumed. */ |
43 | 0 | inline void consumeHexNumber(std::string_view &data, unsigned int &state) { |
44 | | /* Consume everything higher than 32 */ |
45 | 0 | while (data.length() && data.data()[0] > 32) { |
46 | |
|
47 | 0 | unsigned char digit = (unsigned char)data.data()[0]; |
48 | 0 | if (digit >= 'a') { |
49 | 0 | digit = (unsigned char) (digit - ('a' - ':')); |
50 | 0 | } else if (digit >= 'A') { |
51 | 0 | digit = (unsigned char) (digit - ('A' - ':')); |
52 | 0 | } |
53 | |
|
54 | 0 | unsigned int number = ((unsigned int) digit - (unsigned int) '0'); |
55 | |
|
56 | 0 | if (number > 16 || (chunkSize(state) & STATE_SIZE_OVERFLOW)) { |
57 | 0 | state = STATE_IS_ERROR; |
58 | 0 | return; |
59 | 0 | } |
60 | | |
61 | | // extract state bits |
62 | 0 | unsigned int bits = /*state &*/ STATE_IS_CHUNKED; |
63 | |
|
64 | 0 | state = (state & STATE_SIZE_MASK) * 16u + number; |
65 | |
|
66 | 0 | state |= bits; |
67 | 0 | data.remove_prefix(1); |
68 | 0 | } |
69 | | /* Consume everything not /n */ |
70 | 0 | while (data.length() && data.data()[0] != '\n') { |
71 | 0 | data.remove_prefix(1); |
72 | 0 | } |
73 | | /* Now we stand on \n so consume it and enable size */ |
74 | 0 | if (data.length()) { |
75 | 0 | state += 2; // include the two last /r/n |
76 | 0 | state |= STATE_HAS_SIZE | STATE_IS_CHUNKED; |
77 | 0 | data.remove_prefix(1); |
78 | 0 | } |
79 | 0 | } |
80 | | |
81 | 0 | inline void decChunkSize(unsigned int &state, unsigned int by) { |
82 | | |
83 | | //unsigned int bits = state & STATE_IS_CHUNKED; |
84 | |
|
85 | 0 | state = (state & ~STATE_SIZE_MASK) | (chunkSize(state) - by); |
86 | | |
87 | | //state |= bits; |
88 | 0 | } |
89 | | |
90 | 0 | inline bool hasChunkSize(unsigned int state) { |
91 | 0 | return state & STATE_HAS_SIZE; |
92 | 0 | } |
93 | | |
94 | | /* Are we in the middle of parsing chunked encoding? */ |
95 | 0 | inline bool isParsingChunkedEncoding(unsigned int state) { |
96 | 0 | return state & ~STATE_SIZE_MASK; |
97 | 0 | } |
98 | | |
99 | 0 | inline bool isParsingInvalidChunkedEncoding(unsigned int state) { |
100 | 0 | return state == STATE_IS_ERROR; |
101 | 0 | } |
102 | | |
103 | | /* Returns next chunk (empty or not), or if all data was consumed, nullopt is returned. */ |
104 | 0 | static std::optional<std::string_view> getNextChunk(std::string_view &data, unsigned int &state, bool trailer = false) { |
105 | |
|
106 | 0 | while (data.length()) { |
107 | | |
108 | | // if in "drop trailer mode", just drop up to what we have as size |
109 | 0 | if (((state & STATE_IS_CHUNKED) == 0) && hasChunkSize(state) && chunkSize(state)) { |
110 | | |
111 | | //printf("Parsing trailer now\n"); |
112 | |
|
113 | 0 | while(data.length() && chunkSize(state)) { |
114 | 0 | data.remove_prefix(1); |
115 | 0 | decChunkSize(state, 1); |
116 | |
|
117 | 0 | if (chunkSize(state) == 0) { |
118 | | |
119 | | /* This is an actual place where we need 0 as state */ |
120 | 0 | state = 0; |
121 | | |
122 | | /* The parser MUST stop consuming here */ |
123 | 0 | return std::nullopt; |
124 | 0 | } |
125 | 0 | } |
126 | 0 | continue; |
127 | 0 | } |
128 | | |
129 | 0 | if (!hasChunkSize(state)) { |
130 | 0 | consumeHexNumber(data, state); |
131 | 0 | if (isParsingInvalidChunkedEncoding(state)) { |
132 | 0 | return std::nullopt; |
133 | 0 | } |
134 | 0 | if (hasChunkSize(state) && chunkSize(state) == 2) { |
135 | | |
136 | | //printf("Setting state to trailer-parsing and emitting empty chunk\n"); |
137 | | |
138 | | // set trailer state and increase size to 4 |
139 | 0 | if (trailer) { |
140 | 0 | state = 4 /*| STATE_IS_CHUNKED*/ | STATE_HAS_SIZE; |
141 | 0 | } else { |
142 | 0 | state = 2 /*| STATE_IS_CHUNKED*/ | STATE_HAS_SIZE; |
143 | 0 | } |
144 | |
|
145 | 0 | return std::string_view(nullptr, 0); |
146 | 0 | } |
147 | 0 | continue; |
148 | 0 | } |
149 | | |
150 | | // do we have data to emit all? |
151 | 0 | if (data.length() >= chunkSize(state)) { |
152 | | // emit all but 2 bytes then reset state to 0 and goto beginning |
153 | | // not fin |
154 | 0 | std::string_view emitSoon; |
155 | 0 | bool shouldEmit = false; |
156 | 0 | if (chunkSize(state) > 2) { |
157 | 0 | emitSoon = std::string_view(data.data(), chunkSize(state) - 2); |
158 | 0 | shouldEmit = true; |
159 | 0 | } |
160 | 0 | data.remove_prefix(chunkSize(state)); |
161 | 0 | state = STATE_IS_CHUNKED; |
162 | 0 | if (shouldEmit) { |
163 | 0 | return emitSoon; |
164 | 0 | } |
165 | 0 | continue; |
166 | 0 | } else { |
167 | | /* We will consume all our input data */ |
168 | 0 | std::string_view emitSoon; |
169 | 0 | if (chunkSize(state) > 2) { |
170 | 0 | unsigned int maximalAppEmit = chunkSize(state) - 2; |
171 | 0 | if (data.length() > maximalAppEmit) { |
172 | 0 | emitSoon = data.substr(0, maximalAppEmit); |
173 | 0 | } else { |
174 | | //cb(data); |
175 | 0 | emitSoon = data; |
176 | 0 | } |
177 | 0 | } |
178 | 0 | decChunkSize(state, (unsigned int) data.length()); |
179 | 0 | state |= STATE_IS_CHUNKED; |
180 | | // new: decrease data by its size (bug) |
181 | 0 | data.remove_prefix(data.length()); // ny bug fix för getNextChunk |
182 | 0 | if (emitSoon.length()) { |
183 | 0 | return emitSoon; |
184 | 0 | } else { |
185 | 0 | return std::nullopt; |
186 | 0 | } |
187 | 0 | } |
188 | 0 | } |
189 | | |
190 | 0 | return std::nullopt; |
191 | 0 | } |
192 | | |
193 | | /* This is really just a wrapper for convenience */ |
194 | | struct ChunkIterator { |
195 | | |
196 | | std::string_view *data; |
197 | | std::optional<std::string_view> chunk; |
198 | | unsigned int *state; |
199 | | bool trailer; |
200 | | |
201 | 0 | ChunkIterator(std::string_view *data, unsigned int *state, bool trailer = false) : data(data), state(state), trailer(trailer) { |
202 | 0 | chunk = uWS::getNextChunk(*data, *state, trailer); |
203 | 0 | } |
204 | | |
205 | 0 | ChunkIterator() { |
206 | |
|
207 | 0 | } |
208 | | |
209 | 0 | ChunkIterator begin() { |
210 | 0 | return *this; |
211 | 0 | } |
212 | | |
213 | 0 | ChunkIterator end() { |
214 | 0 | return ChunkIterator(); |
215 | 0 | } |
216 | | |
217 | 0 | std::string_view operator*() { |
218 | 0 | if (!chunk.has_value()) { |
219 | 0 | std::abort(); |
220 | 0 | } |
221 | 0 | return chunk.value(); |
222 | 0 | } |
223 | | |
224 | 0 | bool operator!=(const ChunkIterator &other) const { |
225 | 0 | return other.chunk.has_value() != chunk.has_value(); |
226 | 0 | } |
227 | | |
228 | 0 | ChunkIterator &operator++() { |
229 | 0 | chunk = uWS::getNextChunk(*data, *state, trailer); |
230 | 0 | return *this; |
231 | 0 | } |
232 | | |
233 | | }; |
234 | | } |
235 | | |
236 | | #endif // UWS_CHUNKEDENCODING_H |