/src/uWebSockets/src/Multipart.h

Source (jump to first uncovered line)
/*
 * Authored by Alex Hultman, 2018-2020.
 * Intellectual property of third-party.

 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at

 *     http://www.apache.org/licenses/LICENSE-2.0

 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* Implements the multipart protocol. Builds atop parts of our common http parser (not yet refactored that way). */
/* https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html */

#ifndef UWS_MULTIPART_H
#define UWS_MULTIPART_H

#include "MessageParser.h"

#include <string_view>
#include <optional>
#include <cstring>
#include <utility>
#include <cctype>

namespace uWS {

    /* This one could possibly be shared with ExtensionsParser to some degree */
    struct ParameterParser {

        /* Takes the line, commonly given as content-disposition header in the multipart */
        ParameterParser(std::string_view line) {
            remainingLine = line;
        }

        /* Returns next key/value where value can simply be empty.
         * If key (first) is empty then we are at the end */
        std::pair<std::string_view, std::string_view> getKeyValue() {
            auto key = getToken();
            auto op = getToken();

            if (!op.length()) {
                return {key, ""};
            }

            if (op[0] != ';') {
                auto value = getToken();
                /* Strip ; or if at end, nothing */
                getToken();
                return {key, value};
            }

            return {key, ""};
        }

    private:
        std::string_view remainingLine;

        /* Consumes a token from the line. Will "unquote" strings */
        std::string_view getToken() {
            /* Strip whitespace */
            while (remainingLine.length() && isspace(remainingLine[0])) {
                remainingLine.remove_prefix(1);
            }

            if (!remainingLine.length()) {
                /* All we had was space */
                return {};
            } else {
                /* Are we at an operator? */
                if (remainingLine[0] == ';' || remainingLine[0] == '=') {
                    auto op = remainingLine.substr(0, 1);
                    remainingLine.remove_prefix(1);
                    return op;
                } else {
                    /* Are we at a quoted string? */
                    if (remainingLine[0] == '\"') {
                        /* Remove first quote and start counting */
                        remainingLine.remove_prefix(1);
                        auto quote = remainingLine;
                        int quoteLength = 0;

                        /* Read anything until other double quote appears */
                        while (remainingLine.length() && remainingLine[0] != '\"') {
                            remainingLine.remove_prefix(1);
                            quoteLength++;
                        }

                        /* We can't remove_prefix if we have nothing to remove */
                        if (!remainingLine.length()) {
                            return {};
                        }

                        remainingLine.remove_prefix(1);
                        return quote.substr(0, quoteLength);
                    } else {
                        /* Read anything until ; = space or end */
                        std::string_view token = remainingLine;

                        int tokenLength = 0;
                        while (remainingLine.length() && remainingLine[0] != ';' && remainingLine[0] != '=' && !isspace(remainingLine[0])) {
                            remainingLine.remove_prefix(1);
                            tokenLength++;
                        }

                        return token.substr(0, tokenLength);
                    }
                }
            }

            /* Nothing */
            return "";
        }
    };

    struct MultipartParser {

        /* 2 chars of hyphen + 1 - 70 chars of boundary */
        char prependedBoundaryBuffer[72];
        std::string_view prependedBoundary;
        std::string_view remainingBody;
        bool first = true;

        /* I think it is more than sane to limit this to 10 per part */
        //static const int MAX_HEADERS = 10;

        /* Construct the parser based on contentType (reads boundary) */
        MultipartParser(std::string_view contentType) {

            /* We expect the form "multipart/something;somethingboundary=something" */
            if (contentType.length() < 10 || contentType.substr(0, 10) != "multipart/") {
                return;
            }

            /* For now we simply guess boundary will lie between = and end. This is not entirely
            * standards compliant as boundary may be expressed with or without " and spaces */
            auto equalToken = contentType.find('=', 10);
            if (equalToken != std::string_view::npos) {

                /* Boundary must be less than or equal to 70 chars yet 1 char or longer */
                std::string_view boundary = contentType.substr(equalToken + 1);
                if (!boundary.length() || boundary.length() > 70) {
                    /* Invalid size */
                    return;
                }

                /* Prepend it with two hyphens */
                prependedBoundaryBuffer[0] = prependedBoundaryBuffer[1] = '-';
                memcpy(&prependedBoundaryBuffer[2], boundary.data(), boundary.length());

                prependedBoundary = {prependedBoundaryBuffer, boundary.length() + 2};
            }
        }

        /* Is this even a valid multipart request? */
        bool isValid() {
            return prependedBoundary.length() != 0;
        }

        /* Set the body once, before getting any parts */
        void setBody(std::string_view body) {
            remainingBody = body;
        }

        /* Parse out the next part's data, filling the headers. Returns nullopt on end or error. */
        std::optional<std::string_view> getNextPart(std::pair<std::string_view, std::string_view> *headers) {

            /* The remaining two hyphens should be shorter than the boundary */
            if (remainingBody.length() < prependedBoundary.length()) {
                /* We are done now */
                return std::nullopt;
            }

            if (first) {
                auto nextBoundary = remainingBody.find(prependedBoundary);
                if (nextBoundary == std::string_view::npos) {
                    /* Cannot parse */
                    return std::nullopt;
                }

                /* Toss away boundary and anything before it */
                remainingBody.remove_prefix(nextBoundary + prependedBoundary.length());
                first = false;
            }

            auto nextEndBoundary = remainingBody.find(prependedBoundary);
            if (nextEndBoundary == std::string_view::npos) {
                /* Cannot parse (or simply done) */
                return std::nullopt;
            }

            std::string_view part = remainingBody.substr(0, nextEndBoundary);
            remainingBody.remove_prefix(nextEndBoundary + prependedBoundary.length());

            /* Also strip rn before and rn after the part */
            if (part.length() < 4) {
                /* Cannot strip */
                return std::nullopt;
            }
            part.remove_prefix(2);
            part.remove_suffix(2);

            /* We are allowed to post pad like this because we know the boundary is at least 2 bytes */
            /* This makes parsing a second pass invalid, so you can only iterate over parts once */
            memset((char *) part.data() + part.length(), '\r', 1);

            /* For this to be a valid part, we need to consume at least 4 bytes (\r\n\r\n) */
            int consumed = getHeaders((char *) part.data(), (char *) part.data() + part.length(), headers);

            if (!consumed) {
                /* This is an invalid part */
                return std::nullopt;
            }

            /* Strip away the headers from the part body data */
            part.remove_prefix(consumed);

            /* Now pass whatever is remaining of the part */
            return part;
        }
    };

}

#endif

Coverage Report

Created: 2023-09-25 07:17

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Authored by Alex Hultman, 2018-2020.
3		* Intellectual property of third-party.
4
5		* Licensed under the Apache License, Version 2.0 (the "License");
6		* you may not use this file except in compliance with the License.
7		* You may obtain a copy of the License at
8
9		* http://www.apache.org/licenses/LICENSE-2.0
10
11		* Unless required by applicable law or agreed to in writing, software
12		* distributed under the License is distributed on an "AS IS" BASIS,
13		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		* See the License for the specific language governing permissions and
15		* limitations under the License.
16		*/
17
18		/* Implements the multipart protocol. Builds atop parts of our common http parser (not yet refactored that way). */
19		/* https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html */
20
21		#ifndef UWS_MULTIPART_H
22		#define UWS_MULTIPART_H
23
24		#include "MessageParser.h"
25
26		#include <string_view>
27		#include <optional>
28		#include <cstring>
29		#include <utility>
30		#include <cctype>
31
32		namespace uWS {
33
34		/* This one could possibly be shared with ExtensionsParser to some degree */
35		struct ParameterParser {
36
37		/* Takes the line, commonly given as content-disposition header in the multipart */
38	4.82k	ParameterParser(std::string_view line) {
39	4.82k	remainingLine = line;
40	4.82k	}
41
42		/* Returns next key/value where value can simply be empty.
43		* If key (first) is empty then we are at the end */
44	54.6k	std::pair<std::string_view, std::string_view> getKeyValue() {
45	54.6k	auto key = getToken();
46	54.6k	auto op = getToken();
47
48	54.6k	if (!op.length()) {
49	6.22k	return {key, ""};
50	6.22k	}
51
52	48.3k	if (op[0] != ';') {
53	40.7k	auto value = getToken();
54		/* Strip ; or if at end, nothing */
55	40.7k	getToken();
56	40.7k	return {key, value};
57	40.7k	}
58
59	7.61k	return {key, ""};
60	48.3k	}
61
62		private:
63		std::string_view remainingLine;
64
65		/* Consumes a token from the line. Will "unquote" strings */
66	190k	std::string_view getToken() {
67		/* Strip whitespace */
68	235k	while (remainingLine.length() && isspace(remainingLine[0])) {
69	44.8k	remainingLine.remove_prefix(1);
70	44.8k	}
71
72	190k	if (!remainingLine.length()) {
73		/* All we had was space */
74	11.4k	return {};
75	179k	} else {
76		/* Are we at an operator? */
77	179k	if (remainingLine[0] == ';' \|\| remainingLine[0] == '=') {
78	80.5k	auto op = remainingLine.substr(0, 1);
79	80.5k	remainingLine.remove_prefix(1);
80	80.5k	return op;
81	98.7k	} else {
82		/* Are we at a quoted string? */
83	98.7k	if (remainingLine[0] == '\"') {
84		/* Remove first quote and start counting */
85	13.2k	remainingLine.remove_prefix(1);
86	13.2k	auto quote = remainingLine;
87	13.2k	int quoteLength = 0;
88
89		/* Read anything until other double quote appears */
90	4.88M	while (remainingLine.length() && remainingLine[0] != '\"') {
91	4.87M	remainingLine.remove_prefix(1);
92	4.87M	quoteLength++;
93	4.87M	}
94
95		/* We can't remove_prefix if we have nothing to remove */
96	13.2k	if (!remainingLine.length()) {
97	2.09k	return {};
98	2.09k	}
99
100	11.1k	remainingLine.remove_prefix(1);
101	11.1k	return quote.substr(0, quoteLength);
102	85.5k	} else {
103		/* Read anything until ; = space or end */
104	85.5k	std::string_view token = remainingLine;
105
106	85.5k	int tokenLength = 0;
107	7.64M	while (remainingLine.length() && remainingLine[0] != ';' && remainingLine[0] != '=' && !isspace(remainingLine[0])) {
108	7.55M	remainingLine.remove_prefix(1);
109	7.55M	tokenLength++;
110	7.55M	}
111
112	85.5k	return token.substr(0, tokenLength);
113	85.5k	}
114	98.7k	}
115	179k	}
116
117		/* Nothing */
118	0	return "";
119	190k	}
120		};
121
122		struct MultipartParser {
123
124		/* 2 chars of hyphen + 1 - 70 chars of boundary */
125		char prependedBoundaryBuffer[72];
126		std::string_view prependedBoundary;
127		std::string_view remainingBody;
128		bool first = true;
129
130		/* I think it is more than sane to limit this to 10 per part */
131		//static const int MAX_HEADERS = 10;
132
133		/* Construct the parser based on contentType (reads boundary) */
134	1.42k	MultipartParser(std::string_view contentType) {
135
136		/* We expect the form "multipart/something;somethingboundary=something" */
137	1.42k	if (contentType.length() < 10 \|\| contentType.substr(0, 10) != "multipart/") {
138	134	return;
139	134	}
140
141		/* For now we simply guess boundary will lie between = and end. This is not entirely
142		* standards compliant as boundary may be expressed with or without " and spaces */
143	1.29k	auto equalToken = contentType.find('=', 10);
144	1.29k	if (equalToken != std::string_view::npos) {
145
146		/* Boundary must be less than or equal to 70 chars yet 1 char or longer */
147	1.28k	std::string_view boundary = contentType.substr(equalToken + 1);
148	1.28k	if (!boundary.length() \|\| boundary.length() > 70) {
149		/* Invalid size */
150	18	return;
151	18	}
152
153		/* Prepend it with two hyphens */
154	1.26k	prependedBoundaryBuffer[0] = prependedBoundaryBuffer[1] = '-';
155	1.26k	memcpy(&prependedBoundaryBuffer[2], boundary.data(), boundary.length());
156
157	1.26k	prependedBoundary = {prependedBoundaryBuffer, boundary.length() + 2};
158	1.26k	}
159	1.29k	}
160
161		/* Is this even a valid multipart request? */
162	1.42k	bool isValid() {
163	1.42k	return prependedBoundary.length() != 0;
164	1.42k	}
165
166		/* Set the body once, before getting any parts */
167	1.26k	void setBody(std::string_view body) {
168	1.26k	remainingBody = body;
169	1.26k	}
170
171		/* Parse out the next part's data, filling the headers. Returns nullopt on end or error. */
172	3.92k	std::optional<std::string_view> getNextPart(std::pair<std::string_view, std::string_view> *headers) {
173
174		/* The remaining two hyphens should be shorter than the boundary */
175	3.92k	if (remainingBody.length() < prependedBoundary.length()) {
176		/* We are done now */
177	909	return std::nullopt;
178	909	}
179
180	3.01k	if (first) {
181	1.24k	auto nextBoundary = remainingBody.find(prependedBoundary);
182	1.24k	if (nextBoundary == std::string_view::npos) {
183		/* Cannot parse */
184	105	return std::nullopt;
185	105	}
186
187		/* Toss away boundary and anything before it */
188	1.14k	remainingBody.remove_prefix(nextBoundary + prependedBoundary.length());
189	1.14k	first = false;
190	1.14k	}
191
192	2.90k	auto nextEndBoundary = remainingBody.find(prependedBoundary);
193	2.90k	if (nextEndBoundary == std::string_view::npos) {
194		/* Cannot parse (or simply done) */
195	80	return std::nullopt;
196	80	}
197
198	2.82k	std::string_view part = remainingBody.substr(0, nextEndBoundary);
199	2.82k	remainingBody.remove_prefix(nextEndBoundary + prependedBoundary.length());
200
201		/* Also strip rn before and rn after the part */
202	2.82k	if (part.length() < 4) {
203		/* Cannot strip */
204	34	return std::nullopt;
205	34	}
206	2.79k	part.remove_prefix(2);
207	2.79k	part.remove_suffix(2);
208
209		/* We are allowed to post pad like this because we know the boundary is at least 2 bytes */
210		/* This makes parsing a second pass invalid, so you can only iterate over parts once */
211	2.79k	memset((char *) part.data() + part.length(), '\r', 1);
212
213		/* For this to be a valid part, we need to consume at least 4 bytes (\r\n\r\n) */
214	2.79k	int consumed = getHeaders((char ) part.data(), (char ) part.data() + part.length(), headers);
215
216	2.79k	if (!consumed) {
217		/* This is an invalid part */
218	141	return std::nullopt;
219	141	}
220
221		/* Strip away the headers from the part body data */
222	2.65k	part.remove_prefix(consumed);
223
224		/* Now pass whatever is remaining of the part */
225	2.65k	return part;
226	2.79k	}
227		};
228
229		}
230
231		#endif