/src/uWebSockets/src/Multipart.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Authored by Alex Hultman, 2018-2020. |
3 | | * Intellectual property of third-party. |
4 | | |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at |
8 | | |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | */ |
17 | | |
18 | | /* Implements the multipart protocol. Builds atop parts of our common http parser (not yet refactored that way). */ |
19 | | /* https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html */ |
20 | | |
21 | | #ifndef UWS_MULTIPART_H |
22 | | #define UWS_MULTIPART_H |
23 | | |
24 | | #include "MessageParser.h" |
25 | | |
26 | | #include <string_view> |
27 | | #include <optional> |
28 | | #include <cstring> |
29 | | #include <utility> |
30 | | #include <cctype> |
31 | | |
32 | | namespace uWS { |
33 | | |
34 | | /* This one could possibly be shared with ExtensionsParser to some degree */ |
35 | | struct ParameterParser { |
36 | | |
37 | | /* Takes the line, commonly given as content-disposition header in the multipart */ |
38 | 4.82k | ParameterParser(std::string_view line) { |
39 | 4.82k | remainingLine = line; |
40 | 4.82k | } |
41 | | |
42 | | /* Returns next key/value where value can simply be empty. |
43 | | * If key (first) is empty then we are at the end */ |
44 | 54.6k | std::pair<std::string_view, std::string_view> getKeyValue() { |
45 | 54.6k | auto key = getToken(); |
46 | 54.6k | auto op = getToken(); |
47 | | |
48 | 54.6k | if (!op.length()) { |
49 | 6.22k | return {key, ""}; |
50 | 6.22k | } |
51 | | |
52 | 48.3k | if (op[0] != ';') { |
53 | 40.7k | auto value = getToken(); |
54 | | /* Strip ; or if at end, nothing */ |
55 | 40.7k | getToken(); |
56 | 40.7k | return {key, value}; |
57 | 40.7k | } |
58 | | |
59 | 7.61k | return {key, ""}; |
60 | 48.3k | } |
61 | | |
62 | | private: |
63 | | std::string_view remainingLine; |
64 | | |
65 | | /* Consumes a token from the line. Will "unquote" strings */ |
66 | 190k | std::string_view getToken() { |
67 | | /* Strip whitespace */ |
68 | 235k | while (remainingLine.length() && isspace(remainingLine[0])) { |
69 | 44.8k | remainingLine.remove_prefix(1); |
70 | 44.8k | } |
71 | | |
72 | 190k | if (!remainingLine.length()) { |
73 | | /* All we had was space */ |
74 | 11.4k | return {}; |
75 | 179k | } else { |
76 | | /* Are we at an operator? */ |
77 | 179k | if (remainingLine[0] == ';' || remainingLine[0] == '=') { |
78 | 80.5k | auto op = remainingLine.substr(0, 1); |
79 | 80.5k | remainingLine.remove_prefix(1); |
80 | 80.5k | return op; |
81 | 98.7k | } else { |
82 | | /* Are we at a quoted string? */ |
83 | 98.7k | if (remainingLine[0] == '\"') { |
84 | | /* Remove first quote and start counting */ |
85 | 13.2k | remainingLine.remove_prefix(1); |
86 | 13.2k | auto quote = remainingLine; |
87 | 13.2k | int quoteLength = 0; |
88 | | |
89 | | /* Read anything until other double quote appears */ |
90 | 4.88M | while (remainingLine.length() && remainingLine[0] != '\"') { |
91 | 4.87M | remainingLine.remove_prefix(1); |
92 | 4.87M | quoteLength++; |
93 | 4.87M | } |
94 | | |
95 | | /* We can't remove_prefix if we have nothing to remove */ |
96 | 13.2k | if (!remainingLine.length()) { |
97 | 2.09k | return {}; |
98 | 2.09k | } |
99 | | |
100 | 11.1k | remainingLine.remove_prefix(1); |
101 | 11.1k | return quote.substr(0, quoteLength); |
102 | 85.5k | } else { |
103 | | /* Read anything until ; = space or end */ |
104 | 85.5k | std::string_view token = remainingLine; |
105 | | |
106 | 85.5k | int tokenLength = 0; |
107 | 7.64M | while (remainingLine.length() && remainingLine[0] != ';' && remainingLine[0] != '=' && !isspace(remainingLine[0])) { |
108 | 7.55M | remainingLine.remove_prefix(1); |
109 | 7.55M | tokenLength++; |
110 | 7.55M | } |
111 | | |
112 | 85.5k | return token.substr(0, tokenLength); |
113 | 85.5k | } |
114 | 98.7k | } |
115 | 179k | } |
116 | | |
117 | | /* Nothing */ |
118 | 0 | return ""; |
119 | 190k | } |
120 | | }; |
121 | | |
122 | | struct MultipartParser { |
123 | | |
124 | | /* 2 chars of hyphen + 1 - 70 chars of boundary */ |
125 | | char prependedBoundaryBuffer[72]; |
126 | | std::string_view prependedBoundary; |
127 | | std::string_view remainingBody; |
128 | | bool first = true; |
129 | | |
130 | | /* I think it is more than sane to limit this to 10 per part */ |
131 | | //static const int MAX_HEADERS = 10; |
132 | | |
133 | | /* Construct the parser based on contentType (reads boundary) */ |
134 | 1.42k | MultipartParser(std::string_view contentType) { |
135 | | |
136 | | /* We expect the form "multipart/something;somethingboundary=something" */ |
137 | 1.42k | if (contentType.length() < 10 || contentType.substr(0, 10) != "multipart/") { |
138 | 134 | return; |
139 | 134 | } |
140 | | |
141 | | /* For now we simply guess boundary will lie between = and end. This is not entirely |
142 | | * standards compliant as boundary may be expressed with or without " and spaces */ |
143 | 1.29k | auto equalToken = contentType.find('=', 10); |
144 | 1.29k | if (equalToken != std::string_view::npos) { |
145 | | |
146 | | /* Boundary must be less than or equal to 70 chars yet 1 char or longer */ |
147 | 1.28k | std::string_view boundary = contentType.substr(equalToken + 1); |
148 | 1.28k | if (!boundary.length() || boundary.length() > 70) { |
149 | | /* Invalid size */ |
150 | 18 | return; |
151 | 18 | } |
152 | | |
153 | | /* Prepend it with two hyphens */ |
154 | 1.26k | prependedBoundaryBuffer[0] = prependedBoundaryBuffer[1] = '-'; |
155 | 1.26k | memcpy(&prependedBoundaryBuffer[2], boundary.data(), boundary.length()); |
156 | | |
157 | 1.26k | prependedBoundary = {prependedBoundaryBuffer, boundary.length() + 2}; |
158 | 1.26k | } |
159 | 1.29k | } |
160 | | |
161 | | /* Is this even a valid multipart request? */ |
162 | 1.42k | bool isValid() { |
163 | 1.42k | return prependedBoundary.length() != 0; |
164 | 1.42k | } |
165 | | |
166 | | /* Set the body once, before getting any parts */ |
167 | 1.26k | void setBody(std::string_view body) { |
168 | 1.26k | remainingBody = body; |
169 | 1.26k | } |
170 | | |
171 | | /* Parse out the next part's data, filling the headers. Returns nullopt on end or error. */ |
172 | 3.92k | std::optional<std::string_view> getNextPart(std::pair<std::string_view, std::string_view> *headers) { |
173 | | |
174 | | /* The remaining two hyphens should be shorter than the boundary */ |
175 | 3.92k | if (remainingBody.length() < prependedBoundary.length()) { |
176 | | /* We are done now */ |
177 | 909 | return std::nullopt; |
178 | 909 | } |
179 | | |
180 | 3.01k | if (first) { |
181 | 1.24k | auto nextBoundary = remainingBody.find(prependedBoundary); |
182 | 1.24k | if (nextBoundary == std::string_view::npos) { |
183 | | /* Cannot parse */ |
184 | 105 | return std::nullopt; |
185 | 105 | } |
186 | | |
187 | | /* Toss away boundary and anything before it */ |
188 | 1.14k | remainingBody.remove_prefix(nextBoundary + prependedBoundary.length()); |
189 | 1.14k | first = false; |
190 | 1.14k | } |
191 | | |
192 | 2.90k | auto nextEndBoundary = remainingBody.find(prependedBoundary); |
193 | 2.90k | if (nextEndBoundary == std::string_view::npos) { |
194 | | /* Cannot parse (or simply done) */ |
195 | 80 | return std::nullopt; |
196 | 80 | } |
197 | | |
198 | 2.82k | std::string_view part = remainingBody.substr(0, nextEndBoundary); |
199 | 2.82k | remainingBody.remove_prefix(nextEndBoundary + prependedBoundary.length()); |
200 | | |
201 | | /* Also strip rn before and rn after the part */ |
202 | 2.82k | if (part.length() < 4) { |
203 | | /* Cannot strip */ |
204 | 34 | return std::nullopt; |
205 | 34 | } |
206 | 2.79k | part.remove_prefix(2); |
207 | 2.79k | part.remove_suffix(2); |
208 | | |
209 | | /* We are allowed to post pad like this because we know the boundary is at least 2 bytes */ |
210 | | /* This makes parsing a second pass invalid, so you can only iterate over parts once */ |
211 | 2.79k | memset((char *) part.data() + part.length(), '\r', 1); |
212 | | |
213 | | /* For this to be a valid part, we need to consume at least 4 bytes (\r\n\r\n) */ |
214 | 2.79k | int consumed = getHeaders((char *) part.data(), (char *) part.data() + part.length(), headers); |
215 | | |
216 | 2.79k | if (!consumed) { |
217 | | /* This is an invalid part */ |
218 | 141 | return std::nullopt; |
219 | 141 | } |
220 | | |
221 | | /* Strip away the headers from the part body data */ |
222 | 2.65k | part.remove_prefix(consumed); |
223 | | |
224 | | /* Now pass whatever is remaining of the part */ |
225 | 2.65k | return part; |
226 | 2.79k | } |
227 | | }; |
228 | | |
229 | | } |
230 | | |
231 | | #endif |