/src/libtorrent/src/parse_url.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | |
3 | | Copyright (c) 2008-2009, 2013-2017, 2019-2020, Arvid Norberg |
4 | | Copyright (c) 2016, 2018, Alden Torres |
5 | | All rights reserved. |
6 | | |
7 | | Redistribution and use in source and binary forms, with or without |
8 | | modification, are permitted provided that the following conditions |
9 | | are met: |
10 | | |
11 | | * Redistributions of source code must retain the above copyright |
12 | | notice, this list of conditions and the following disclaimer. |
13 | | * Redistributions in binary form must reproduce the above copyright |
14 | | notice, this list of conditions and the following disclaimer in |
15 | | the documentation and/or other materials provided with the distribution. |
16 | | * Neither the name of the author nor the names of its |
17 | | contributors may be used to endorse or promote products derived |
18 | | from this software without specific prior written permission. |
19 | | |
20 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
21 | | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
22 | | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
23 | | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
24 | | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
25 | | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
26 | | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
27 | | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
28 | | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
29 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
30 | | POSSIBILITY OF SUCH DAMAGE. |
31 | | |
32 | | */ |
33 | | |
34 | | #include <algorithm> |
35 | | |
36 | | #include "libtorrent/parse_url.hpp" |
37 | | #include "libtorrent/string_util.hpp" |
38 | | #include "libtorrent/string_view.hpp" |
39 | | |
40 | | namespace libtorrent { |
41 | | |
42 | | // returns protocol, auth, hostname, port, path |
43 | | std::tuple<std::string, std::string, std::string, int, std::string> |
44 | | parse_url_components(std::string url, error_code& ec) |
45 | 0 | { |
46 | 0 | std::string hostname; // hostname only |
47 | 0 | std::string auth; // user:pass |
48 | 0 | std::string protocol; // http or https for instance |
49 | 0 | int port = -1; |
50 | |
|
51 | 0 | std::string::iterator at; |
52 | 0 | std::string::iterator colon; |
53 | 0 | std::string::iterator port_pos; |
54 | | |
55 | | // PARSE URL |
56 | 0 | auto start = url.begin(); |
57 | | // remove white spaces in front of the url |
58 | 0 | while (start != url.end() && is_space(*start)) |
59 | 0 | ++start; |
60 | 0 | auto end = std::find(url.begin(), url.end(), ':'); |
61 | 0 | protocol.assign(start, end); |
62 | |
|
63 | 0 | if (end == url.end()) |
64 | 0 | { |
65 | 0 | ec = errors::unsupported_url_protocol; |
66 | 0 | goto exit; |
67 | 0 | } |
68 | 0 | ++end; |
69 | 0 | if (end == url.end() || *end != '/') |
70 | 0 | { |
71 | 0 | ec = errors::unsupported_url_protocol; |
72 | 0 | goto exit; |
73 | 0 | } |
74 | 0 | ++end; |
75 | 0 | if (end == url.end() || *end != '/') |
76 | 0 | { |
77 | 0 | ec = errors::unsupported_url_protocol; |
78 | 0 | goto exit; |
79 | 0 | } |
80 | 0 | ++end; |
81 | 0 | start = end; |
82 | |
|
83 | 0 | at = std::find(start, url.end(), '@'); |
84 | 0 | colon = std::find(start, url.end(), ':'); |
85 | 0 | end = std::min({ |
86 | 0 | std::find(start, url.end(), '/') |
87 | 0 | , std::find(start, url.end(), '?') |
88 | 0 | , std::find(start, url.end(), '#') |
89 | 0 | }); |
90 | |
|
91 | 0 | if (at != url.end() |
92 | 0 | && colon != url.end() |
93 | 0 | && colon < at |
94 | 0 | && at < end) |
95 | 0 | { |
96 | 0 | auth.assign(start, at); |
97 | 0 | start = at; |
98 | 0 | ++start; |
99 | 0 | } |
100 | | |
101 | | // this is for IPv6 addresses |
102 | 0 | if (start != url.end() && *start == '[') |
103 | 0 | { |
104 | 0 | port_pos = std::find(start, url.end(), ']'); |
105 | 0 | if (port_pos == url.end()) |
106 | 0 | { |
107 | 0 | ec = errors::expected_close_bracket_in_address; |
108 | 0 | goto exit; |
109 | 0 | } |
110 | | // strip the brackets |
111 | 0 | hostname.assign(start + 1, port_pos); |
112 | 0 | port_pos = std::find(port_pos, url.end(), ':'); |
113 | 0 | } |
114 | 0 | else |
115 | 0 | { |
116 | 0 | port_pos = std::find(start, url.end(), ':'); |
117 | 0 | if (port_pos < end) hostname.assign(start, port_pos); |
118 | 0 | else hostname.assign(start, end); |
119 | 0 | } |
120 | | |
121 | 0 | if (port_pos < end) |
122 | 0 | { |
123 | 0 | ++port_pos; |
124 | 0 | for (auto i = port_pos; i < end; ++i) |
125 | 0 | { |
126 | 0 | if (is_digit(*i)) continue; |
127 | 0 | ec = errors::invalid_port; |
128 | 0 | goto exit; |
129 | 0 | } |
130 | 0 | port = std::atoi(std::string(port_pos, end).c_str()); |
131 | 0 | } |
132 | | |
133 | 0 | start = end; |
134 | 0 | exit: |
135 | 0 | std::string path_component(start, url.end()); |
136 | 0 | if (path_component.empty() |
137 | 0 | || path_component.front() == '?' |
138 | 0 | || path_component.front() == '#') |
139 | 0 | { |
140 | 0 | path_component.insert(path_component.begin(), '/'); |
141 | 0 | } |
142 | |
|
143 | 0 | return std::make_tuple(std::move(protocol) |
144 | 0 | , std::move(auth) |
145 | 0 | , std::move(hostname) |
146 | 0 | , port |
147 | 0 | , path_component); |
148 | 0 | } |
149 | | |
150 | | // splits a url into the base url and the path |
151 | | std::tuple<std::string, std::string> |
152 | | split_url(std::string url, error_code& ec) |
153 | 0 | { |
154 | 0 | std::string base; |
155 | 0 | std::string path; |
156 | | |
157 | | // PARSE URL |
158 | 0 | auto pos = std::find(url.begin(), url.end(), ':'); |
159 | |
|
160 | 0 | if (pos == url.end() || url.end() - pos < 3 |
161 | 0 | || *(pos + 1) != '/' || *(pos + 2) != '/') |
162 | 0 | { |
163 | 0 | ec = errors::unsupported_url_protocol; |
164 | 0 | return std::make_tuple(std::move(url), std::move(path)); |
165 | 0 | } |
166 | 0 | pos += 3; // skip "://" |
167 | |
|
168 | 0 | pos = std::find(pos, url.end(), '/'); |
169 | 0 | if (pos == url.end()) |
170 | 0 | { |
171 | 0 | return std::make_tuple(std::move(url), std::move(path)); |
172 | 0 | } |
173 | | |
174 | 0 | base.assign(url.begin(), pos); |
175 | 0 | path.assign(pos, url.end()); |
176 | 0 | return std::make_tuple(std::move(base), std::move(path)); |
177 | 0 | } |
178 | | |
179 | | TORRENT_EXTRA_EXPORT bool is_idna(string_view hostname) |
180 | 0 | { |
181 | 0 | for (;;) |
182 | 0 | { |
183 | 0 | auto dot = hostname.find('.'); |
184 | 0 | string_view const label = (dot == string_view::npos) ? hostname : hostname.substr(0, dot); |
185 | 0 | if (label.size() >= 4 |
186 | 0 | && (label[0] == 'x' || label[0] == 'X') |
187 | 0 | && (label[1] == 'n' || label[1] == 'N') |
188 | 0 | && label.substr(2, 2) == "--"_sv) |
189 | 0 | return true; |
190 | 0 | if (dot == string_view::npos) return false; |
191 | 0 | hostname = hostname.substr(dot + 1); |
192 | 0 | } |
193 | 0 | } |
194 | | |
195 | | bool has_tracker_query_string(string_view query_string) |
196 | 0 | { |
197 | 0 | static string_view const tracker_args[] = { |
198 | 0 | "info_hash"_sv, "event"_sv, "port"_sv, "left"_sv, "key"_sv, |
199 | 0 | "uploaded"_sv, "downloaded"_sv, "corrupt"_sv, "peer_id"_sv |
200 | 0 | }; |
201 | 0 | while (!query_string.empty()) |
202 | 0 | { |
203 | 0 | string_view arg; |
204 | 0 | std::tie(arg, query_string) = split_string(query_string, '&'); |
205 | |
|
206 | 0 | auto const name = split_string(arg, '=').first; |
207 | 0 | for (auto const& tracker_arg : tracker_args) |
208 | 0 | { |
209 | 0 | if (string_equal_no_case(name, tracker_arg)) |
210 | 0 | return true; |
211 | 0 | } |
212 | 0 | } |
213 | 0 | return false; |
214 | 0 | } |
215 | | |
216 | | } |