/src/libtorrent/src/parse_url.cpp

Source (jump to first uncovered line)
/*

Copyright (c) 2008-2009, 2013-2017, 2019-2020, Arvid Norberg
Copyright (c) 2016, 2018, Alden Torres
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in
      the documentation and/or other materials provided with the distribution.
    * Neither the name of the author nor the names of its
      contributors may be used to endorse or promote products derived
      from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

*/

#include <algorithm>

#include "libtorrent/parse_url.hpp"
#include "libtorrent/string_util.hpp"
#include "libtorrent/string_view.hpp"

namespace libtorrent {

  // returns protocol, auth, hostname, port, path
  std::tuple<std::string, std::string, std::string, int, std::string>
    parse_url_components(std::string url, error_code& ec)
  {
    std::string hostname; // hostname only
    std::string auth; // user:pass
    std::string protocol; // http or https for instance
    int port = -1;

    std::string::iterator at;
    std::string::iterator colon;
    std::string::iterator port_pos;

    // PARSE URL
    auto start = url.begin();
    // remove white spaces in front of the url
    while (start != url.end() && is_space(*start))
      ++start;
    auto end = std::find(url.begin(), url.end(), ':');
    protocol.assign(start, end);

    if (end == url.end())
    {
      ec = errors::unsupported_url_protocol;
      goto exit;
    }
    ++end;
    if (end == url.end() || *end != '/')
    {
      ec = errors::unsupported_url_protocol;
      goto exit;
    }
    ++end;
    if (end == url.end() || *end != '/')
    {
      ec = errors::unsupported_url_protocol;
      goto exit;
    }
    ++end;
    start = end;

    at = std::find(start, url.end(), '@');
    colon = std::find(start, url.end(), ':');
    end = std::min({
      std::find(start, url.end(), '/')
      , std::find(start, url.end(), '?')
      , std::find(start, url.end(), '#')
      });

    if (at != url.end()
      && colon != url.end()
      && colon < at
      && at < end)
    {
      auth.assign(start, at);
      start = at;
      ++start;
    }

    // this is for IPv6 addresses
    if (start != url.end() && *start == '[')
    {
      port_pos = std::find(start, url.end(), ']');
      if (port_pos == url.end())
      {
        ec = errors::expected_close_bracket_in_address;
        goto exit;
      }
      // strip the brackets
      hostname.assign(start + 1, port_pos);
      port_pos = std::find(port_pos, url.end(), ':');
    }
    else
    {
      port_pos = std::find(start, url.end(), ':');
      if (port_pos < end) hostname.assign(start, port_pos);
      else hostname.assign(start, end);
    }

    if (port_pos < end)
    {
      ++port_pos;
      for (auto i = port_pos; i < end; ++i)
      {
        if (is_digit(*i)) continue;
        ec = errors::invalid_port;
        goto exit;
      }
      port = std::atoi(std::string(port_pos, end).c_str());
    }

    start = end;
exit:
    std::string path_component(start, url.end());
    if (path_component.empty()
      || path_component.front() == '?'
      || path_component.front() == '#')
    {
      path_component.insert(path_component.begin(), '/');
    }

    return std::make_tuple(std::move(protocol)
      , std::move(auth)
      , std::move(hostname)
      , port
      , path_component);
  }

  // splits a url into the base url and the path
  std::tuple<std::string, std::string>
    split_url(std::string url, error_code& ec)
  {
    std::string base;
    std::string path;

    // PARSE URL
    auto pos = std::find(url.begin(), url.end(), ':');

    if (pos == url.end() || url.end() - pos < 3
      || *(pos + 1) != '/' || *(pos + 2) != '/')
    {
      ec = errors::unsupported_url_protocol;
      return std::make_tuple(std::move(url), std::move(path));
    }
    pos += 3; // skip "://"

    pos = std::find(pos, url.end(), '/');
    if (pos == url.end())
    {
      return std::make_tuple(std::move(url), std::move(path));
    }

    base.assign(url.begin(), pos);
    path.assign(pos, url.end());
    return std::make_tuple(std::move(base), std::move(path));
  }

  TORRENT_EXTRA_EXPORT bool is_idna(string_view hostname)
  {
    for (;;)
    {
      auto dot = hostname.find('.');
      string_view const label = (dot == string_view::npos) ? hostname : hostname.substr(0, dot);
      if (label.size() >= 4
        && (label[0] == 'x' || label[0] == 'X')
        && (label[1] == 'n' || label[1] == 'N')
        && label.substr(2, 2) == "--"_sv)
        return true;
      if (dot == string_view::npos) return false;
      hostname = hostname.substr(dot + 1);
    }
  }

  bool has_tracker_query_string(string_view query_string)
  {
    static string_view const tracker_args[] = {
      "info_hash"_sv, "event"_sv, "port"_sv, "left"_sv, "key"_sv,
      "uploaded"_sv, "downloaded"_sv, "corrupt"_sv, "peer_id"_sv
    };
    while (!query_string.empty())
    {
      string_view arg;
      std::tie(arg, query_string) = split_string(query_string, '&');

      auto const name = split_string(arg, '=').first;
      for (auto const& tracker_arg : tracker_args)
      {
        if (string_equal_no_case(name, tracker_arg))
          return true;
      }
    }
    return false;
  }

}

Coverage Report

Created: 2025-06-12 06:24

Line	Count	Source (jump to first uncovered line)
1		/*
2
3		Copyright (c) 2008-2009, 2013-2017, 2019-2020, Arvid Norberg
4		Copyright (c) 2016, 2018, Alden Torres
5		All rights reserved.
6
7		Redistribution and use in source and binary forms, with or without
8		modification, are permitted provided that the following conditions
9		are met:
10
11		* Redistributions of source code must retain the above copyright
12		notice, this list of conditions and the following disclaimer.
13		* Redistributions in binary form must reproduce the above copyright
14		notice, this list of conditions and the following disclaimer in
15		the documentation and/or other materials provided with the distribution.
16		* Neither the name of the author nor the names of its
17		contributors may be used to endorse or promote products derived
18		from this software without specific prior written permission.
19
20		THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21		AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22		IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23		ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24		LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25		CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26		SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27		INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28		CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29		ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30		POSSIBILITY OF SUCH DAMAGE.
31
32		*/
33
34		#include <algorithm>
35
36		#include "libtorrent/parse_url.hpp"
37		#include "libtorrent/string_util.hpp"
38		#include "libtorrent/string_view.hpp"
39
40		namespace libtorrent {
41
42		// returns protocol, auth, hostname, port, path
43		std::tuple<std::string, std::string, std::string, int, std::string>
44		parse_url_components(std::string url, error_code& ec)
45	0	{
46	0	std::string hostname; // hostname only
47	0	std::string auth; // user:pass
48	0	std::string protocol; // http or https for instance
49	0	int port = -1;
50
51	0	std::string::iterator at;
52	0	std::string::iterator colon;
53	0	std::string::iterator port_pos;
54
55		// PARSE URL
56	0	auto start = url.begin();
57		// remove white spaces in front of the url
58	0	while (start != url.end() && is_space(*start))
59	0	++start;
60	0	auto end = std::find(url.begin(), url.end(), ':');
61	0	protocol.assign(start, end);
62
63	0	if (end == url.end())
64	0	{
65	0	ec = errors::unsupported_url_protocol;
66	0	goto exit;
67	0	}
68	0	++end;
69	0	if (end == url.end() \|\| *end != '/')
70	0	{
71	0	ec = errors::unsupported_url_protocol;
72	0	goto exit;
73	0	}
74	0	++end;
75	0	if (end == url.end() \|\| *end != '/')
76	0	{
77	0	ec = errors::unsupported_url_protocol;
78	0	goto exit;
79	0	}
80	0	++end;
81	0	start = end;
82
83	0	at = std::find(start, url.end(), '@');
84	0	colon = std::find(start, url.end(), ':');
85	0	end = std::min({
86	0	std::find(start, url.end(), '/')
87	0	, std::find(start, url.end(), '?')
88	0	, std::find(start, url.end(), '#')
89	0	});
90
91	0	if (at != url.end()
92	0	&& colon != url.end()
93	0	&& colon < at
94	0	&& at < end)
95	0	{
96	0	auth.assign(start, at);
97	0	start = at;
98	0	++start;
99	0	}
100
101		// this is for IPv6 addresses
102	0	if (start != url.end() && *start == '[')
103	0	{
104	0	port_pos = std::find(start, url.end(), ']');
105	0	if (port_pos == url.end())
106	0	{
107	0	ec = errors::expected_close_bracket_in_address;
108	0	goto exit;
109	0	}
110		// strip the brackets
111	0	hostname.assign(start + 1, port_pos);
112	0	port_pos = std::find(port_pos, url.end(), ':');
113	0	}
114	0	else
115	0	{
116	0	port_pos = std::find(start, url.end(), ':');
117	0	if (port_pos < end) hostname.assign(start, port_pos);
118	0	else hostname.assign(start, end);
119	0	}
120
121	0	if (port_pos < end)
122	0	{
123	0	++port_pos;
124	0	for (auto i = port_pos; i < end; ++i)
125	0	{
126	0	if (is_digit(*i)) continue;
127	0	ec = errors::invalid_port;
128	0	goto exit;
129	0	}
130	0	port = std::atoi(std::string(port_pos, end).c_str());
131	0	}
132
133	0	start = end;
134	0	exit:
135	0	std::string path_component(start, url.end());
136	0	if (path_component.empty()
137	0	\|\| path_component.front() == '?'
138	0	\|\| path_component.front() == '#')
139	0	{
140	0	path_component.insert(path_component.begin(), '/');
141	0	}
142
143	0	return std::make_tuple(std::move(protocol)
144	0	, std::move(auth)
145	0	, std::move(hostname)
146	0	, port
147	0	, path_component);
148	0	}
149
150		// splits a url into the base url and the path
151		std::tuple<std::string, std::string>
152		split_url(std::string url, error_code& ec)
153	0	{
154	0	std::string base;
155	0	std::string path;
156
157		// PARSE URL
158	0	auto pos = std::find(url.begin(), url.end(), ':');
159
160	0	if (pos == url.end() \|\| url.end() - pos < 3
161	0	\|\| (pos + 1) != '/' \|\| (pos + 2) != '/')
162	0	{
163	0	ec = errors::unsupported_url_protocol;
164	0	return std::make_tuple(std::move(url), std::move(path));
165	0	}
166	0	pos += 3; // skip "://"
167
168	0	pos = std::find(pos, url.end(), '/');
169	0	if (pos == url.end())
170	0	{
171	0	return std::make_tuple(std::move(url), std::move(path));
172	0	}
173
174	0	base.assign(url.begin(), pos);
175	0	path.assign(pos, url.end());
176	0	return std::make_tuple(std::move(base), std::move(path));
177	0	}
178
179		TORRENT_EXTRA_EXPORT bool is_idna(string_view hostname)
180	0	{
181	0	for (;;)
182	0	{
183	0	auto dot = hostname.find('.');
184	0	string_view const label = (dot == string_view::npos) ? hostname : hostname.substr(0, dot);
185	0	if (label.size() >= 4
186	0	&& (label[0] == 'x' \|\| label[0] == 'X')
187	0	&& (label[1] == 'n' \|\| label[1] == 'N')
188	0	&& label.substr(2, 2) == "--"_sv)
189	0	return true;
190	0	if (dot == string_view::npos) return false;
191	0	hostname = hostname.substr(dot + 1);
192	0	}
193	0	}
194
195		bool has_tracker_query_string(string_view query_string)
196	0	{
197	0	static string_view const tracker_args[] = {
198	0	"info_hash"_sv, "event"_sv, "port"_sv, "left"_sv, "key"_sv,
199	0	"uploaded"_sv, "downloaded"_sv, "corrupt"_sv, "peer_id"_sv
200	0	};
201	0	while (!query_string.empty())
202	0	{
203	0	string_view arg;
204	0	std::tie(arg, query_string) = split_string(query_string, '&');
205
206	0	auto const name = split_string(arg, '=').first;
207	0	for (auto const& tracker_arg : tracker_args)
208	0	{
209	0	if (string_equal_no_case(name, tracker_arg))
210	0	return true;
211	0	}
212	0	}
213	0	return false;
214	0	}
215
216		}