/src/libtorrent/src/http_parser.cpp
Line | Count | Source |
1 | | /* |
2 | | |
3 | | Copyright (c) 2008-2019, 2021, Arvid Norberg |
4 | | Copyright (c) 2016-2018, Alden Torres |
5 | | Copyright (c) 2017, Pavel Pimenov |
6 | | All rights reserved. |
7 | | |
8 | | Redistribution and use in source and binary forms, with or without |
9 | | modification, are permitted provided that the following conditions |
10 | | are met: |
11 | | |
12 | | * Redistributions of source code must retain the above copyright |
13 | | notice, this list of conditions and the following disclaimer. |
14 | | * Redistributions in binary form must reproduce the above copyright |
15 | | notice, this list of conditions and the following disclaimer in |
16 | | the documentation and/or other materials provided with the distribution. |
17 | | * Neither the name of the author nor the names of its |
18 | | contributors may be used to endorse or promote products derived |
19 | | from this software without specific prior written permission. |
20 | | |
21 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
22 | | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
23 | | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
24 | | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
25 | | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
26 | | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
27 | | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
28 | | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
29 | | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
30 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
31 | | POSSIBILITY OF SUCH DAMAGE. |
32 | | |
33 | | */ |
34 | | |
35 | | #include <cctype> |
36 | | #include <cstring> |
37 | | #include <algorithm> |
38 | | #include <cstdlib> |
39 | | #include <cinttypes> |
40 | | |
41 | | #include "libtorrent/config.hpp" |
42 | | #include "libtorrent/http_parser.hpp" |
43 | | #include "libtorrent/hex.hpp" // for hex_to_int |
44 | | #include "libtorrent/assert.hpp" |
45 | | #include "libtorrent/parse_url.hpp" // for parse_url_components |
46 | | #include "libtorrent/string_util.hpp" // for ensure_trailing_slash, to_lower |
47 | | #include "libtorrent/aux_/escape_string.hpp" // for read_until |
48 | | #include "libtorrent/time.hpp" // for seconds32 |
49 | | #include "libtorrent/aux_/numeric_cast.hpp" |
50 | | |
51 | | namespace libtorrent { |
52 | | |
53 | | bool is_ok_status(int http_status) |
54 | 0 | { |
55 | 0 | return http_status == 206 // partial content |
56 | 0 | || http_status == 200 // OK |
57 | 0 | || (http_status >= 300 // redirect |
58 | 0 | && http_status < 400); |
59 | 0 | } |
60 | | |
61 | | bool is_redirect(int http_status) |
62 | 0 | { |
63 | 0 | return http_status >= 300 |
64 | 0 | && http_status < 400; |
65 | 0 | } |
66 | | |
67 | | std::string resolve_redirect_location(std::string referrer |
68 | | , std::string location) |
69 | 0 | { |
70 | 0 | if (location.empty()) return referrer; |
71 | | |
72 | 0 | error_code ec; |
73 | 0 | using std::ignore; |
74 | 0 | std::tie(ignore, ignore, ignore, ignore, ignore) |
75 | 0 | = parse_url_components(location, ec); |
76 | | |
77 | | // if location is a full URL, just return it |
78 | 0 | if (!ec) return location; |
79 | | |
80 | | // otherwise it's likely to be just the path, or a relative path |
81 | 0 | std::string url = referrer; |
82 | |
|
83 | 0 | if (location[0] == '/') |
84 | 0 | { |
85 | | // it's an absolute path. replace the path component of |
86 | | // referrer with location. |
87 | | |
88 | | // first skip the url scheme of the referer |
89 | 0 | std::size_t i = url.find("://"); |
90 | | |
91 | | // if the referrer doesn't appear to have a proper URL scheme |
92 | | // just return the location verbatim (and probably fail) |
93 | 0 | if (i == std::string::npos) |
94 | 0 | return location; |
95 | | |
96 | | // then skip the hostname and port, it's fine for this to fail, in |
97 | | // case the referrer doesn't have a path component, it's just the |
98 | | // url-scheme and hostname, in which case we just append the location |
99 | 0 | i = url.find_first_of('/', i + 3); |
100 | 0 | if (i != std::string::npos) |
101 | 0 | url.resize(i); |
102 | |
|
103 | 0 | url += location; |
104 | 0 | } |
105 | 0 | else |
106 | 0 | { |
107 | | // some web servers send out relative paths |
108 | | // in the location header. |
109 | | |
110 | | // remove the leaf filename |
111 | | // first skip the url scheme of the referer |
112 | 0 | std::size_t start = url.find("://"); |
113 | | |
114 | | // the referrer is not a valid full URL |
115 | 0 | if (start == std::string::npos) |
116 | 0 | return location; |
117 | | |
118 | 0 | std::size_t end = url.find_last_of('/'); |
119 | | // if the / we find is part of the scheme, there is no / in the path |
120 | | // component or hostname. |
121 | 0 | if (end <= start + 2) end = std::string::npos; |
122 | | |
123 | | // if this fails, the referrer is just url-scheme and hostname. We can |
124 | | // just append the location to it. |
125 | 0 | if (end != std::string::npos) |
126 | 0 | url.resize(end); |
127 | | |
128 | | // however, we may still need to insert a '/' in case neither side |
129 | | // has one. We know the location doesn't start with a / already. |
130 | | // so, if the referrer doesn't end with one, add it. |
131 | 0 | ensure_trailing_slash(url); |
132 | 0 | url += location; |
133 | 0 | } |
134 | 0 | return url; |
135 | 0 | } |
136 | | |
137 | | std::string const& http_parser::header(string_view const key) const |
138 | 0 | { |
139 | 0 | static std::string const empty; |
140 | | // at least GCC-5.4 for ARM (on travis) has a libstdc++ whose debug map$ |
141 | | // doesn't seem to support transparent comparators$ |
142 | | #if ! defined _GLIBCXX_DEBUG |
143 | | auto const i = m_header.find(key); |
144 | | #else |
145 | 0 | auto const i = m_header.find(std::string(key)); |
146 | 0 | #endif |
147 | 0 | if (i == m_header.end()) return empty; |
148 | 0 | return i->second; |
149 | 0 | } |
150 | | |
151 | | boost::optional<seconds32> http_parser::header_duration(string_view const key) const |
152 | 0 | { |
153 | | // at least GCC-5.4 for ARM (on travis) has a libstdc++ whose debug map$ |
154 | | // doesn't seem to support transparent comparators$ |
155 | | #if ! defined _GLIBCXX_DEBUG |
156 | | auto const i = m_header.find(key); |
157 | | #else |
158 | 0 | auto const i = m_header.find(std::string(key)); |
159 | 0 | #endif |
160 | 0 | if (i == m_header.end()) return boost::none; |
161 | 0 | auto const val = std::atol(i->second.c_str()); |
162 | 0 | if (val <= 0) return boost::none; |
163 | 0 | return seconds32(val); |
164 | 0 | } |
165 | | |
166 | 3.38k | http_parser::~http_parser() = default; |
167 | | |
168 | 3.38k | http_parser::http_parser(int const flags) : m_flags(flags) {} |
169 | | |
170 | | std::tuple<int, int> http_parser::incoming( |
171 | | span<char const> recv_buffer, bool& error) |
172 | 184M | { |
173 | 184M | TORRENT_ASSERT(recv_buffer.size() >= m_recv_buffer.size()); |
174 | 184M | std::tuple<int, int> ret(0, 0); |
175 | 184M | std::ptrdiff_t start_pos = m_recv_buffer.size(); |
176 | | |
177 | | // early exit if there's nothing new in the receive buffer |
178 | 184M | if (start_pos == recv_buffer.size()) return ret; |
179 | 184M | m_recv_buffer = recv_buffer; |
180 | | |
181 | 184M | if (m_state == error_state) |
182 | 0 | { |
183 | 0 | error = true; |
184 | 0 | return ret; |
185 | 0 | } |
186 | | |
187 | 184M | char const* pos = recv_buffer.data() + m_recv_pos; |
188 | | |
189 | 184M | restart_response: |
190 | | |
191 | 184M | if (m_state == read_status) |
192 | 5.86M | { |
193 | 5.86M | TORRENT_ASSERT(!m_finished); |
194 | 5.86M | TORRENT_ASSERT(pos <= recv_buffer.end()); |
195 | 5.86M | char const* newline = std::find(pos, recv_buffer.end(), '\n'); |
196 | | // if we don't have a full line yet, wait. |
197 | 5.86M | if (newline == recv_buffer.end()) |
198 | 5.31M | { |
199 | 5.31M | std::get<1>(ret) += int(m_recv_buffer.size() - start_pos); |
200 | 5.31M | return ret; |
201 | 5.31M | } |
202 | | |
203 | 551k | if (newline == pos) |
204 | 1.10k | { |
205 | 1.10k | m_state = error_state; |
206 | 1.10k | error = true; |
207 | 1.10k | return ret; |
208 | 1.10k | } |
209 | | |
210 | 550k | char const* line_end = newline; |
211 | 550k | if (pos != line_end && *(line_end - 1) == '\r') --line_end; |
212 | | |
213 | 550k | char const* line = pos; |
214 | 550k | ++newline; |
215 | 550k | TORRENT_ASSERT(newline >= pos); |
216 | 550k | int incoming = int(newline - pos); |
217 | 550k | m_recv_pos += incoming; |
218 | 550k | std::get<1>(ret) += int(newline - (m_recv_buffer.data() + start_pos)); |
219 | 550k | pos = newline; |
220 | | |
221 | 550k | m_protocol = read_until(line, ' ', line_end); |
222 | 550k | if (m_protocol.substr(0, 5) == "HTTP/") |
223 | 331k | { |
224 | 331k | m_status_code = atoi(read_until(line, ' ', line_end).c_str()); |
225 | 331k | m_server_message = read_until(line, '\r', line_end); |
226 | | |
227 | | // HTTP 1.0 always closes the connection after |
228 | | // each request |
229 | 331k | if (m_protocol == "HTTP/1.0") m_connection_close = true; |
230 | 331k | } |
231 | 219k | else |
232 | 219k | { |
233 | 219k | m_method = m_protocol; |
234 | 219k | std::transform(m_method.begin(), m_method.end(), m_method.begin(), &to_lower); |
235 | | // the content length is assumed to be 0 for requests |
236 | 219k | m_content_length = 0; |
237 | 219k | m_protocol.clear(); |
238 | 219k | m_path = read_until(line, ' ', line_end); |
239 | 219k | m_protocol = read_until(line, ' ', line_end); |
240 | 219k | m_status_code = 0; |
241 | 219k | } |
242 | 550k | m_state = read_header; |
243 | 550k | start_pos = pos - recv_buffer.data(); |
244 | 550k | } |
245 | | |
246 | 179M | if (m_state == read_header) |
247 | 87.8M | { |
248 | 87.8M | TORRENT_ASSERT(!m_finished); |
249 | 87.8M | TORRENT_ASSERT(pos <= recv_buffer.end()); |
250 | 87.8M | char const* newline = std::find(pos, recv_buffer.end(), '\n'); |
251 | 87.8M | std::string line; |
252 | | |
253 | 89.9M | while (newline != recv_buffer.end() && m_state == read_header) |
254 | 2.59M | { |
255 | | // if the LF character is preceded by a CR |
256 | | // character, don't copy it into the line string. |
257 | 2.59M | char const* line_end = newline; |
258 | 2.59M | if (pos != line_end && *(line_end - 1) == '\r') --line_end; |
259 | 2.59M | line.assign(pos, line_end); |
260 | 2.59M | ++newline; |
261 | 2.59M | m_recv_pos += newline - pos; |
262 | 2.59M | pos = newline; |
263 | | |
264 | 2.59M | std::string::size_type separator = line.find(':'); |
265 | 2.59M | if (separator == std::string::npos) |
266 | 435k | { |
267 | 435k | if (m_status_code == 100) |
268 | 319k | { |
269 | | // for 100 Continue, we need to read another response header |
270 | | // before reading the body |
271 | 319k | m_state = read_status; |
272 | 319k | goto restart_response; |
273 | 319k | } |
274 | | // this means we got a blank line, |
275 | | // the header is finished and the body |
276 | | // starts. |
277 | 116k | m_state = read_body; |
278 | | // if this is a request (not a response) |
279 | | // we're done once we reach the end of the headers |
280 | | // if (!m_method.empty()) m_finished = true; |
281 | | // the HTTP header should always be < 2 GB |
282 | 116k | TORRENT_ASSERT(m_recv_pos < std::numeric_limits<int>::max()); |
283 | 116k | m_body_start_pos = int(m_recv_pos); |
284 | 116k | break; |
285 | 435k | } |
286 | | |
287 | 2.16M | std::string name = line.substr(0, separator); |
288 | 2.16M | std::transform(name.begin(), name.end(), name.begin(), &to_lower); |
289 | 2.16M | ++separator; |
290 | | // skip whitespace |
291 | 2.22M | while (separator < line.size() |
292 | 1.14M | && (line[separator] == ' ' || line[separator] == '\t')) |
293 | 60.0k | ++separator; |
294 | 2.16M | std::string value = line.substr(separator, std::string::npos); |
295 | 2.16M | m_header.insert(std::make_pair(name, value)); |
296 | | |
297 | 2.16M | if (name == "content-length") |
298 | 75.6k | { |
299 | 75.6k | m_content_length = std::strtoll(value.c_str(), nullptr, 10); |
300 | 75.6k | if (m_content_length < 0 |
301 | 70.8k | || m_content_length == std::numeric_limits<std::int64_t>::max()) |
302 | 4.83k | { |
303 | 4.83k | m_state = error_state; |
304 | 4.83k | error = true; |
305 | 4.83k | return ret; |
306 | 4.83k | } |
307 | 75.6k | } |
308 | 2.08M | else if (name == "connection") |
309 | 105k | { |
310 | 105k | m_connection_close = string_begins_no_case("close", value.c_str()); |
311 | 105k | } |
312 | 1.98M | else if (name == "content-range") |
313 | 149k | { |
314 | 149k | bool success = true; |
315 | 149k | char const* ptr = value.c_str(); |
316 | | |
317 | | // apparently some web servers do not send the "bytes" |
318 | | // in their content-range. Don't treat it as an error |
319 | | // if we can't find it, just assume the byte counters |
320 | | // start immediately |
321 | 149k | if (string_begins_no_case("bytes ", ptr)) ptr += 6; |
322 | 149k | char* end; |
323 | 149k | m_range_start = std::strtoll(ptr, &end, 10); |
324 | 149k | if (m_range_start < 0 |
325 | 144k | || m_range_start == std::numeric_limits<std::int64_t>::max()) |
326 | 4.89k | { |
327 | 4.89k | m_state = error_state; |
328 | 4.89k | error = true; |
329 | 4.89k | return ret; |
330 | 4.89k | } |
331 | 144k | if (end == ptr) success = false; |
332 | 143k | else if (*end != '-') success = false; |
333 | 134k | else |
334 | 134k | { |
335 | 134k | ptr = end + 1; |
336 | 134k | m_range_end = std::strtoll(ptr, &end, 10); |
337 | 134k | if (m_range_end < 0 |
338 | 129k | || m_range_end == std::numeric_limits<std::int64_t>::max()) |
339 | 5.17k | { |
340 | 5.17k | m_state = error_state; |
341 | 5.17k | error = true; |
342 | 5.17k | return ret; |
343 | 5.17k | } |
344 | 129k | if (end == ptr) success = false; |
345 | 129k | } |
346 | | |
347 | 139k | if (!success || m_range_end < m_range_start) |
348 | 10.2k | { |
349 | 10.2k | m_state = error_state; |
350 | 10.2k | error = true; |
351 | 10.2k | return ret; |
352 | 10.2k | } |
353 | | // the http range is inclusive |
354 | 129k | m_content_length = m_range_end - m_range_start + 1; |
355 | 129k | } |
356 | 1.83M | else if (name == "transfer-encoding") |
357 | 679k | { |
358 | 679k | m_chunked_encoding = string_begins_no_case("chunked", value.c_str()); |
359 | 679k | } |
360 | | |
361 | 2.13M | TORRENT_ASSERT(m_recv_pos <= int(recv_buffer.size())); |
362 | 2.13M | TORRENT_ASSERT(pos <= recv_buffer.end()); |
363 | 2.13M | newline = std::find(pos, recv_buffer.end(), '\n'); |
364 | 2.13M | } |
365 | 87.4M | std::get<1>(ret) += int(newline - (m_recv_buffer.data() + start_pos)); |
366 | 87.4M | } |
367 | | |
368 | 179M | if (m_state == read_body) |
369 | 91.7M | { |
370 | 91.7M | int incoming = int(recv_buffer.end() - pos); |
371 | | |
372 | 91.7M | if (m_chunked_encoding && (m_flags & dont_parse_chunks) == 0) |
373 | 50.3M | { |
374 | 50.3M | if (m_cur_chunk_end == -1) |
375 | 93.4k | m_cur_chunk_end = m_body_start_pos; |
376 | | |
377 | 103M | while (m_cur_chunk_end <= m_recv_pos + incoming && !m_finished && incoming > 0) |
378 | 53.4M | { |
379 | 53.4M | std::int64_t payload = m_cur_chunk_end - m_recv_pos; |
380 | 53.4M | if (payload > 0) |
381 | 6.29M | { |
382 | 6.29M | TORRENT_ASSERT(payload < std::numeric_limits<int>::max()); |
383 | 6.29M | m_recv_pos += payload; |
384 | 6.29M | std::get<0>(ret) += int(payload); |
385 | 6.29M | incoming -= int(payload); |
386 | 6.29M | } |
387 | 53.4M | auto const buf = span<char const>(recv_buffer) |
388 | 53.4M | .subspan(aux::numeric_cast<std::ptrdiff_t>(m_cur_chunk_end)); |
389 | 53.4M | std::int64_t chunk_size; |
390 | 53.4M | int header_size; |
391 | 53.4M | if (parse_chunk_header(buf, &chunk_size, &header_size)) |
392 | 6.34M | { |
393 | 6.34M | if (chunk_size < 0 |
394 | 6.34M | || chunk_size > std::numeric_limits<std::int64_t>::max() - m_cur_chunk_end - header_size) |
395 | 5.45k | { |
396 | 5.45k | m_state = error_state; |
397 | 5.45k | error = true; |
398 | 5.45k | return ret; |
399 | 5.45k | } |
400 | 6.34M | if (chunk_size > 0) |
401 | 6.31M | { |
402 | 6.31M | std::pair<std::int64_t, std::int64_t> chunk_range(m_cur_chunk_end + header_size |
403 | 6.31M | , m_cur_chunk_end + header_size + chunk_size); |
404 | 6.31M | m_chunked_ranges.push_back(chunk_range); |
405 | 6.31M | } |
406 | 6.34M | m_cur_chunk_end += header_size + chunk_size; |
407 | 6.34M | if (chunk_size == 0) |
408 | 29.9k | { |
409 | 29.9k | m_finished = true; |
410 | 29.9k | } |
411 | 6.34M | header_size -= m_partial_chunk_header; |
412 | 6.34M | m_partial_chunk_header = 0; |
413 | | // std::fprintf(stderr, "parse_chunk_header(%d, -> %" PRId64 ", -> %d) -> %d\n" |
414 | | // " incoming = %d\n m_recv_pos = %d\n m_cur_chunk_end = %" PRId64 "\n" |
415 | | // " content-length = %d\n" |
416 | | // , int(buf.size()), chunk_size, header_size, 1, incoming, int(m_recv_pos) |
417 | | // , m_cur_chunk_end, int(m_content_length)); |
418 | 6.34M | } |
419 | 47.1M | else |
420 | 47.1M | { |
421 | 47.1M | m_partial_chunk_header += incoming; |
422 | 47.1M | header_size = incoming; |
423 | | |
424 | | // std::fprintf(stderr, "parse_chunk_header(%d, -> %" PRId64 ", -> %d) -> %d\n" |
425 | | // " incoming = %d\n m_recv_pos = %d\n m_cur_chunk_end = %" PRId64 "\n" |
426 | | // " content-length = %d\n" |
427 | | // , int(buf.size()), chunk_size, header_size, 0, incoming, int(m_recv_pos) |
428 | | // , m_cur_chunk_end, int(m_content_length)); |
429 | 47.1M | } |
430 | 53.4M | m_chunk_header_size += header_size; |
431 | 53.4M | m_recv_pos += header_size; |
432 | 53.4M | std::get<1>(ret) += header_size; |
433 | 53.4M | incoming -= header_size; |
434 | 53.4M | } |
435 | 50.3M | if (incoming > 0) |
436 | 2.75M | { |
437 | 2.75M | m_recv_pos += incoming; |
438 | 2.75M | std::get<0>(ret) += incoming; |
439 | | // incoming = 0; |
440 | 2.75M | } |
441 | 50.3M | } |
442 | 41.3M | else |
443 | 41.3M | { |
444 | 41.3M | std::int64_t payload_received = m_recv_pos - m_body_start_pos + incoming; |
445 | 41.3M | if (payload_received > m_content_length |
446 | 35.6M | && m_content_length >= 0) |
447 | 35.6M | { |
448 | 35.6M | TORRENT_ASSERT(m_content_length - m_recv_pos + m_body_start_pos |
449 | 35.6M | < std::numeric_limits<int>::max()); |
450 | 35.6M | incoming = int(m_content_length - m_recv_pos + m_body_start_pos); |
451 | 35.6M | } |
452 | | |
453 | 41.3M | TORRENT_ASSERT(incoming >= 0); |
454 | 41.3M | m_recv_pos += incoming; |
455 | 41.3M | std::get<0>(ret) += incoming; |
456 | 41.3M | } |
457 | | |
458 | 91.7M | if (m_content_length >= 0 |
459 | 91.7M | && !m_chunked_encoding |
460 | 41.3M | && m_recv_pos - m_body_start_pos >= m_content_length) |
461 | 35.6M | { |
462 | 35.6M | m_finished = true; |
463 | 35.6M | } |
464 | 91.7M | } |
465 | 179M | return ret; |
466 | 179M | } |
467 | | |
468 | | // this function signals error by assigning a negative value to "chunk_size" |
469 | | // the return value indicates whether enough data is available in "buf" to |
470 | | // completely parse the chunk header. Returning false means we need more data |
471 | | bool http_parser::parse_chunk_header(span<char const> buf |
472 | | , std::int64_t* chunk_size, int* header_size) |
473 | 53.4M | { |
474 | 53.4M | char const* pos = buf.data(); |
475 | | |
476 | | // ignore one optional new-line. This is since each chunk |
477 | | // is terminated by a newline. we're likely to see one |
478 | | // before the actual header. |
479 | | |
480 | 53.4M | if (pos < buf.end() && pos[0] == '\r') ++pos; |
481 | 53.4M | if (pos < buf.end() && pos[0] == '\n') ++pos; |
482 | 53.4M | if (pos == buf.end()) return false; |
483 | | |
484 | 53.0M | TORRENT_ASSERT(pos <= buf.end()); |
485 | 53.0M | char const* newline = std::find(pos, buf.end(), '\n'); |
486 | 53.0M | if (newline == buf.end()) return false; |
487 | 51.8M | ++newline; |
488 | | |
489 | | // the chunk header is a single line, a hex length of the |
490 | | // chunk followed by an optional semi-colon with a comment |
491 | | // in case the length is 0, the stream is terminated and |
492 | | // there are extra tail headers, which is terminated by an |
493 | | // empty line |
494 | | |
495 | 51.8M | *header_size = int(newline - buf.data()); |
496 | | |
497 | | // first, read the chunk length |
498 | 51.8M | std::int64_t size = 0; |
499 | 87.4M | for (char const* i = pos; i != newline; ++i) |
500 | 68.7M | { |
501 | 68.7M | if (*i == '\r') continue; |
502 | 68.7M | if (*i == '\n') continue; |
503 | 50.0M | if (*i == ';') break; |
504 | 16.8M | int const digit = aux::hex_to_int(*i); |
505 | 16.8M | if (digit < 0) |
506 | 3.31k | { |
507 | 3.31k | *chunk_size = -1; |
508 | 3.31k | return true; |
509 | 3.31k | } |
510 | 16.8M | if (size >= std::numeric_limits<std::int64_t>::max() / 16) |
511 | 759 | { |
512 | 759 | *chunk_size = -1; |
513 | 759 | return true; |
514 | 759 | } |
515 | 16.8M | size *= 16; |
516 | 16.8M | size += digit; |
517 | 16.8M | } |
518 | 51.8M | *chunk_size = size; |
519 | | |
520 | 51.8M | if (*chunk_size != 0) |
521 | 6.31M | { |
522 | | // the newline is at least 1 byte, and the length-prefix is at least 1 |
523 | | // byte |
524 | 6.31M | TORRENT_ASSERT(newline - buf.data() >= 2); |
525 | 6.31M | return true; |
526 | 6.31M | } |
527 | | |
528 | | // this is the terminator of the stream. Also read headers |
529 | 45.5M | std::map<std::string, std::string> tail_headers; |
530 | 45.5M | pos = newline; |
531 | 45.5M | newline = std::find(pos, buf.end(), '\n'); |
532 | | |
533 | 45.5M | std::string line; |
534 | 65.1M | while (newline != buf.end()) |
535 | 19.6M | { |
536 | | // if the LF character is preceded by a CR |
537 | | // character, don't copy it into the line string. |
538 | 19.6M | char const* line_end = newline; |
539 | 19.6M | if (pos != line_end && *(line_end - 1) == '\r') --line_end; |
540 | 19.6M | line.assign(pos, line_end); |
541 | 19.6M | ++newline; |
542 | 19.6M | pos = newline; |
543 | | |
544 | 19.6M | std::string::size_type separator = line.find(':'); |
545 | 19.6M | if (separator == std::string::npos) |
546 | 29.9k | { |
547 | | // this means we got a blank line, |
548 | | // the header is finished and the body |
549 | | // starts. |
550 | 29.9k | *header_size = int(newline - buf.data()); |
551 | | |
552 | | // the newline alone is two bytes |
553 | 29.9k | TORRENT_ASSERT(newline - buf.data() > 2); |
554 | | |
555 | | // we were successful in parsing the headers. |
556 | | // add them to the headers in the parser |
557 | 29.9k | for (auto const& p : tail_headers) |
558 | 420k | m_header.insert(p); |
559 | | |
560 | 29.9k | return true; |
561 | 29.9k | } |
562 | | |
563 | 19.6M | std::string name = line.substr(0, separator); |
564 | 19.6M | std::transform(name.begin(), name.end(), name.begin(), &to_lower); |
565 | 19.6M | ++separator; |
566 | | // skip whitespace |
567 | 22.7M | while (separator < line.size() |
568 | 6.67M | && (line[separator] == ' ' || line[separator] == '\t')) |
569 | 3.14M | ++separator; |
570 | 19.6M | std::string value = line.substr(separator, std::string::npos); |
571 | 19.6M | tail_headers.insert(std::make_pair(name, value)); |
572 | | // std::fprintf(stderr, "tail_header: %s: %s\n", name.c_str(), value.c_str()); |
573 | | |
574 | 19.6M | newline = std::find(pos, buf.end(), '\n'); |
575 | 19.6M | } |
576 | 45.5M | return false; |
577 | 45.5M | } |
578 | | |
579 | | span<char const> http_parser::get_body() const |
580 | 0 | { |
581 | 0 | if (m_state != read_body) return {}; |
582 | 0 | std::int64_t const received = m_recv_pos - m_body_start_pos; |
583 | |
|
584 | 0 | std::int64_t const body_length = m_chunked_encoding && !m_chunked_ranges.empty() |
585 | 0 | ? std::min(m_chunked_ranges.back().second - m_body_start_pos, received) |
586 | 0 | : m_content_length < 0 ? received : std::min(m_content_length, received); |
587 | |
|
588 | 0 | return m_recv_buffer.subspan(m_body_start_pos, aux::numeric_cast<std::ptrdiff_t>(body_length)); |
589 | 0 | } |
590 | | |
591 | | void http_parser::reset() |
592 | 233k | { |
593 | 233k | m_method.clear(); |
594 | 233k | m_recv_pos = 0; |
595 | 233k | m_body_start_pos = 0; |
596 | 233k | m_status_code = -1; |
597 | 233k | m_content_length = -1; |
598 | 233k | m_range_start = -1; |
599 | 233k | m_range_end = -1; |
600 | 233k | m_finished = false; |
601 | 233k | m_state = read_status; |
602 | 233k | m_recv_buffer = span<char const>(); |
603 | 233k | m_header.clear(); |
604 | 233k | m_chunked_encoding = false; |
605 | 233k | m_chunked_ranges.clear(); |
606 | 233k | m_cur_chunk_end = -1; |
607 | 233k | m_chunk_header_size = 0; |
608 | 233k | m_partial_chunk_header = 0; |
609 | 233k | } |
610 | | |
611 | | span<char> http_parser::collapse_chunk_headers(span<char> buffer) const |
612 | 0 | { |
613 | 0 | if (!chunked_encoding()) return buffer; |
614 | | |
615 | | // go through all chunks and compact them |
616 | | // since we're bottled, and the buffer is our after all |
617 | | // it's OK to mutate it |
618 | 0 | char* write_ptr = buffer.data(); |
619 | | // the offsets in the array are from the start of the |
620 | | // buffer, not start of the body, so subtract the size |
621 | | // of the HTTP header from them |
622 | 0 | int const offset = body_start(); |
623 | 0 | for (auto const& i : chunks()) |
624 | 0 | { |
625 | 0 | auto const chunk_start = i.first; |
626 | 0 | auto const chunk_end = i.second; |
627 | 0 | if (chunk_end - offset > buffer.size() |
628 | 0 | || (i.second - i.first) >= std::numeric_limits<int>::max()) |
629 | 0 | { |
630 | | // invalid chunk header. Return the body we've parsed out so far |
631 | 0 | return buffer.first(write_ptr - buffer.data()); |
632 | 0 | } |
633 | 0 | span<char> chunk = buffer.subspan( |
634 | 0 | aux::numeric_cast<std::ptrdiff_t>(chunk_start - offset) |
635 | 0 | , aux::numeric_cast<std::ptrdiff_t>(chunk_end - chunk_start)); |
636 | | #if defined __GNUC__ && __GNUC__ >= 7 |
637 | | #pragma GCC diagnostic push |
638 | | #pragma GCC diagnostic ignored "-Wstringop-overflow" |
639 | | #endif |
640 | 0 | std::memmove(write_ptr, chunk.data(), std::size_t(chunk.size())); |
641 | | #if defined __GNUC__ && __GNUC__ >= 7 |
642 | | #pragma GCC diagnostic pop |
643 | | #endif |
644 | 0 | write_ptr += chunk.size(); |
645 | 0 | } |
646 | 0 | return buffer.first(write_ptr - buffer.data()); |
647 | 0 | } |
648 | | } |