/src/uWebSockets/src/Multipart.h
Line | Count | Source |
1 | | /* |
2 | | * Authored by Alex Hultman, 2018-2020. |
3 | | * Intellectual property of third-party. |
4 | | |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at |
8 | | |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | */ |
17 | | |
18 | | /* Implements the multipart protocol. Builds atop parts of our common http parser (not yet refactored that way). */ |
19 | | /* https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html */ |
20 | | |
21 | | #ifndef UWS_MULTIPART_H |
22 | | #define UWS_MULTIPART_H |
23 | | |
24 | | #include "MessageParser.h" |
25 | | |
26 | | #include <string_view> |
27 | | #include <optional> |
28 | | #include <cstring> |
29 | | #include <utility> |
30 | | #include <cctype> |
31 | | |
32 | | namespace uWS { |
33 | | |
34 | | /* This one could possibly be shared with ExtensionsParser to some degree */ |
35 | | struct ParameterParser { |
36 | | |
37 | | /* Takes the line, commonly given as content-disposition header in the multipart */ |
38 | 4.49k | ParameterParser(std::string_view line) { |
39 | 4.49k | remainingLine = line; |
40 | 4.49k | } |
41 | | |
42 | | /* Returns next key/value where value can simply be empty. |
43 | | * If key (first) is empty then we are at the end */ |
44 | 161k | std::pair<std::string_view, std::string_view> getKeyValue() { |
45 | 161k | auto key = getToken(); |
46 | 161k | auto op = getToken(); |
47 | | |
48 | 161k | if (!op.length()) { |
49 | 6.99k | return {key, ""}; |
50 | 6.99k | } |
51 | | |
52 | 154k | if (op[0] != ';') { |
53 | 143k | auto value = getToken(); |
54 | | /* Strip ; or if at end, nothing */ |
55 | 143k | getToken(); |
56 | 143k | return {key, value}; |
57 | 143k | } |
58 | | |
59 | 10.2k | return {key, ""}; |
60 | 154k | } |
61 | | |
62 | | private: |
63 | | std::string_view remainingLine; |
64 | | |
65 | | /* Consumes a token from the line. Will "unquote" strings */ |
66 | 609k | std::string_view getToken() { |
67 | | /* Strip whitespace */ |
68 | 1.48M | while (remainingLine.length() && isspace(remainingLine[0])) { |
69 | 876k | remainingLine.remove_prefix(1); |
70 | 876k | } |
71 | | |
72 | 609k | if (!remainingLine.length()) { |
73 | | /* All we had was space */ |
74 | 10.8k | return {}; |
75 | 599k | } else { |
76 | | /* Are we at an operator? */ |
77 | 599k | if (remainingLine[0] == ';' || remainingLine[0] == '=') { |
78 | 308k | auto op = remainingLine.substr(0, 1); |
79 | 308k | remainingLine.remove_prefix(1); |
80 | 308k | return op; |
81 | 308k | } else { |
82 | | /* Are we at a quoted string? */ |
83 | 290k | if (remainingLine[0] == '\"') { |
84 | | /* Remove first quote and start counting */ |
85 | 28.4k | remainingLine.remove_prefix(1); |
86 | 28.4k | auto quote = remainingLine; |
87 | 28.4k | int quoteLength = 0; |
88 | | |
89 | | /* Read anything until other double quote appears */ |
90 | 7.73M | while (remainingLine.length() && remainingLine[0] != '\"') { |
91 | 7.70M | remainingLine.remove_prefix(1); |
92 | 7.70M | quoteLength++; |
93 | 7.70M | } |
94 | | |
95 | | /* We can't remove_prefix if we have nothing to remove */ |
96 | 28.4k | if (!remainingLine.length()) { |
97 | 1.75k | return {}; |
98 | 1.75k | } |
99 | | |
100 | 26.6k | remainingLine.remove_prefix(1); |
101 | 26.6k | return quote.substr(0, quoteLength); |
102 | 262k | } else { |
103 | | /* Read anything until ; = space or end */ |
104 | 262k | std::string_view token = remainingLine; |
105 | | |
106 | 262k | int tokenLength = 0; |
107 | 8.68M | while (remainingLine.length() && remainingLine[0] != ';' && remainingLine[0] != '=' && !isspace(remainingLine[0])) { |
108 | 8.42M | remainingLine.remove_prefix(1); |
109 | 8.42M | tokenLength++; |
110 | 8.42M | } |
111 | | |
112 | 262k | return token.substr(0, tokenLength); |
113 | 262k | } |
114 | 290k | } |
115 | 599k | } |
116 | | |
117 | | /* Nothing */ |
118 | 0 | return ""; |
119 | 609k | } |
120 | | }; |
121 | | |
122 | | struct MultipartParser { |
123 | | |
124 | | /* 2 chars of hyphen + 1 - 70 chars of boundary */ |
125 | | char prependedBoundaryBuffer[72]; |
126 | | std::string_view prependedBoundary; |
127 | | std::string_view remainingBody; |
128 | | bool first = true; |
129 | | |
130 | | /* I think it is more than sane to limit this to 10 per part */ |
131 | | //static const int MAX_HEADERS = 10; |
132 | | |
133 | | /* Construct the parser based on contentType (reads boundary) */ |
134 | 1.49k | MultipartParser(std::string_view contentType) { |
135 | | |
136 | | /* We expect the form "multipart/something;somethingboundary=something" */ |
137 | 1.49k | if (contentType.length() < 10 || contentType.substr(0, 10) != "multipart/") { |
138 | 135 | return; |
139 | 135 | } |
140 | | |
141 | | /* For now we simply guess boundary will lie between = and end. This is not entirely |
142 | | * standards compliant as boundary may be expressed with or without " and spaces */ |
143 | 1.35k | auto equalToken = contentType.find('=', 10); |
144 | 1.35k | if (equalToken != std::string_view::npos) { |
145 | | |
146 | | /* Boundary must be less than or equal to 70 chars yet 1 char or longer */ |
147 | 1.35k | std::string_view boundary = contentType.substr(equalToken + 1); |
148 | 1.35k | if (!boundary.length() || boundary.length() > 70) { |
149 | | /* Invalid size */ |
150 | 14 | return; |
151 | 14 | } |
152 | | |
153 | | /* Prepend it with two hyphens */ |
154 | 1.33k | prependedBoundaryBuffer[0] = prependedBoundaryBuffer[1] = '-'; |
155 | 1.33k | memcpy(&prependedBoundaryBuffer[2], boundary.data(), boundary.length()); |
156 | | |
157 | 1.33k | prependedBoundary = {prependedBoundaryBuffer, boundary.length() + 2}; |
158 | 1.33k | } |
159 | 1.35k | } |
160 | | |
161 | | /* Is this even a valid multipart request? */ |
162 | 1.49k | bool isValid() { |
163 | 1.49k | return prependedBoundary.length() != 0; |
164 | 1.49k | } |
165 | | |
166 | | /* Set the body once, before getting any parts */ |
167 | 1.33k | void setBody(std::string_view body) { |
168 | 1.33k | remainingBody = body; |
169 | 1.33k | } |
170 | | |
171 | | /* Parse out the next part's data, filling the headers. Returns nullopt on end or error. */ |
172 | 4.05k | std::optional<std::string_view> getNextPart(std::pair<std::string_view, std::string_view> *headers) { |
173 | | |
174 | | /* The remaining two hyphens should be shorter than the boundary */ |
175 | 4.05k | if (remainingBody.length() < prependedBoundary.length()) { |
176 | | /* We are done now */ |
177 | 970 | return std::nullopt; |
178 | 970 | } |
179 | | |
180 | 3.08k | if (first) { |
181 | 1.30k | auto nextBoundary = remainingBody.find(prependedBoundary); |
182 | 1.30k | if (nextBoundary == std::string_view::npos) { |
183 | | /* Cannot parse */ |
184 | 101 | return std::nullopt; |
185 | 101 | } |
186 | | |
187 | | /* Toss away boundary and anything before it */ |
188 | 1.20k | remainingBody.remove_prefix(nextBoundary + prependedBoundary.length()); |
189 | 1.20k | first = false; |
190 | 1.20k | } |
191 | | |
192 | 2.98k | auto nextEndBoundary = remainingBody.find(prependedBoundary); |
193 | 2.98k | if (nextEndBoundary == std::string_view::npos) { |
194 | | /* Cannot parse (or simply done) */ |
195 | 93 | return std::nullopt; |
196 | 93 | } |
197 | | |
198 | 2.89k | std::string_view part = remainingBody.substr(0, nextEndBoundary); |
199 | 2.89k | remainingBody.remove_prefix(nextEndBoundary + prependedBoundary.length()); |
200 | | |
201 | | /* Also strip rn before and rn after the part */ |
202 | 2.89k | if (part.length() < 4) { |
203 | | /* Cannot strip */ |
204 | 33 | return std::nullopt; |
205 | 33 | } |
206 | 2.86k | part.remove_prefix(2); |
207 | 2.86k | part.remove_suffix(2); |
208 | | |
209 | | /* We are allowed to post pad like this because we know the boundary is at least 2 bytes */ |
210 | | /* This makes parsing a second pass invalid, so you can only iterate over parts once */ |
211 | 2.86k | memset((char *) part.data() + part.length(), '\r', 1); |
212 | | |
213 | | /* For this to be a valid part, we need to consume at least 4 bytes (\r\n\r\n) */ |
214 | 2.86k | int consumed = getHeaders((char *) part.data(), (char *) part.data() + part.length(), headers); |
215 | | |
216 | 2.86k | if (!consumed) { |
217 | | /* This is an invalid part */ |
218 | 139 | return std::nullopt; |
219 | 139 | } |
220 | | |
221 | | /* Strip away the headers from the part body data */ |
222 | 2.72k | part.remove_prefix(consumed); |
223 | | |
224 | | /* Now pass whatever is remaining of the part */ |
225 | 2.72k | return part; |
226 | 2.86k | } |
227 | | }; |
228 | | |
229 | | } |
230 | | |
231 | | #endif |