Coverage Report

Created: 2025-06-12 06:24

/src/libtorrent/src/parse_url.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
3
Copyright (c) 2008-2009, 2013-2017, 2019-2020, Arvid Norberg
4
Copyright (c) 2016, 2018, Alden Torres
5
All rights reserved.
6
7
Redistribution and use in source and binary forms, with or without
8
modification, are permitted provided that the following conditions
9
are met:
10
11
    * Redistributions of source code must retain the above copyright
12
      notice, this list of conditions and the following disclaimer.
13
    * Redistributions in binary form must reproduce the above copyright
14
      notice, this list of conditions and the following disclaimer in
15
      the documentation and/or other materials provided with the distribution.
16
    * Neither the name of the author nor the names of its
17
      contributors may be used to endorse or promote products derived
18
      from this software without specific prior written permission.
19
20
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30
POSSIBILITY OF SUCH DAMAGE.
31
32
*/
33
34
#include <algorithm>
35
36
#include "libtorrent/parse_url.hpp"
37
#include "libtorrent/string_util.hpp"
38
#include "libtorrent/string_view.hpp"
39
40
namespace libtorrent {
41
42
  // returns protocol, auth, hostname, port, path
43
  std::tuple<std::string, std::string, std::string, int, std::string>
44
    parse_url_components(std::string url, error_code& ec)
45
0
  {
46
0
    std::string hostname; // hostname only
47
0
    std::string auth; // user:pass
48
0
    std::string protocol; // http or https for instance
49
0
    int port = -1;
50
51
0
    std::string::iterator at;
52
0
    std::string::iterator colon;
53
0
    std::string::iterator port_pos;
54
55
    // PARSE URL
56
0
    auto start = url.begin();
57
    // remove white spaces in front of the url
58
0
    while (start != url.end() && is_space(*start))
59
0
      ++start;
60
0
    auto end = std::find(url.begin(), url.end(), ':');
61
0
    protocol.assign(start, end);
62
63
0
    if (end == url.end())
64
0
    {
65
0
      ec = errors::unsupported_url_protocol;
66
0
      goto exit;
67
0
    }
68
0
    ++end;
69
0
    if (end == url.end() || *end != '/')
70
0
    {
71
0
      ec = errors::unsupported_url_protocol;
72
0
      goto exit;
73
0
    }
74
0
    ++end;
75
0
    if (end == url.end() || *end != '/')
76
0
    {
77
0
      ec = errors::unsupported_url_protocol;
78
0
      goto exit;
79
0
    }
80
0
    ++end;
81
0
    start = end;
82
83
0
    at = std::find(start, url.end(), '@');
84
0
    colon = std::find(start, url.end(), ':');
85
0
    end = std::min({
86
0
      std::find(start, url.end(), '/')
87
0
      , std::find(start, url.end(), '?')
88
0
      , std::find(start, url.end(), '#')
89
0
      });
90
91
0
    if (at != url.end()
92
0
      && colon != url.end()
93
0
      && colon < at
94
0
      && at < end)
95
0
    {
96
0
      auth.assign(start, at);
97
0
      start = at;
98
0
      ++start;
99
0
    }
100
101
    // this is for IPv6 addresses
102
0
    if (start != url.end() && *start == '[')
103
0
    {
104
0
      port_pos = std::find(start, url.end(), ']');
105
0
      if (port_pos == url.end())
106
0
      {
107
0
        ec = errors::expected_close_bracket_in_address;
108
0
        goto exit;
109
0
      }
110
      // strip the brackets
111
0
      hostname.assign(start + 1, port_pos);
112
0
      port_pos = std::find(port_pos, url.end(), ':');
113
0
    }
114
0
    else
115
0
    {
116
0
      port_pos = std::find(start, url.end(), ':');
117
0
      if (port_pos < end) hostname.assign(start, port_pos);
118
0
      else hostname.assign(start, end);
119
0
    }
120
121
0
    if (port_pos < end)
122
0
    {
123
0
      ++port_pos;
124
0
      for (auto i = port_pos; i < end; ++i)
125
0
      {
126
0
        if (is_digit(*i)) continue;
127
0
        ec = errors::invalid_port;
128
0
        goto exit;
129
0
      }
130
0
      port = std::atoi(std::string(port_pos, end).c_str());
131
0
    }
132
133
0
    start = end;
134
0
exit:
135
0
    std::string path_component(start, url.end());
136
0
    if (path_component.empty()
137
0
      || path_component.front() == '?'
138
0
      || path_component.front() == '#')
139
0
    {
140
0
      path_component.insert(path_component.begin(), '/');
141
0
    }
142
143
0
    return std::make_tuple(std::move(protocol)
144
0
      , std::move(auth)
145
0
      , std::move(hostname)
146
0
      , port
147
0
      , path_component);
148
0
  }
149
150
  // splits a url into the base url and the path
151
  std::tuple<std::string, std::string>
152
    split_url(std::string url, error_code& ec)
153
0
  {
154
0
    std::string base;
155
0
    std::string path;
156
157
    // PARSE URL
158
0
    auto pos = std::find(url.begin(), url.end(), ':');
159
160
0
    if (pos == url.end() || url.end() - pos < 3
161
0
      || *(pos + 1) != '/' || *(pos + 2) != '/')
162
0
    {
163
0
      ec = errors::unsupported_url_protocol;
164
0
      return std::make_tuple(std::move(url), std::move(path));
165
0
    }
166
0
    pos += 3; // skip "://"
167
168
0
    pos = std::find(pos, url.end(), '/');
169
0
    if (pos == url.end())
170
0
    {
171
0
      return std::make_tuple(std::move(url), std::move(path));
172
0
    }
173
174
0
    base.assign(url.begin(), pos);
175
0
    path.assign(pos, url.end());
176
0
    return std::make_tuple(std::move(base), std::move(path));
177
0
  }
178
179
  TORRENT_EXTRA_EXPORT bool is_idna(string_view hostname)
180
0
  {
181
0
    for (;;)
182
0
    {
183
0
      auto dot = hostname.find('.');
184
0
      string_view const label = (dot == string_view::npos) ? hostname : hostname.substr(0, dot);
185
0
      if (label.size() >= 4
186
0
        && (label[0] == 'x' || label[0] == 'X')
187
0
        && (label[1] == 'n' || label[1] == 'N')
188
0
        && label.substr(2, 2) == "--"_sv)
189
0
        return true;
190
0
      if (dot == string_view::npos) return false;
191
0
      hostname = hostname.substr(dot + 1);
192
0
    }
193
0
  }
194
195
  bool has_tracker_query_string(string_view query_string)
196
0
  {
197
0
    static string_view const tracker_args[] = {
198
0
      "info_hash"_sv, "event"_sv, "port"_sv, "left"_sv, "key"_sv,
199
0
      "uploaded"_sv, "downloaded"_sv, "corrupt"_sv, "peer_id"_sv
200
0
    };
201
0
    while (!query_string.empty())
202
0
    {
203
0
      string_view arg;
204
0
      std::tie(arg, query_string) = split_string(query_string, '&');
205
206
0
      auto const name = split_string(arg, '=').first;
207
0
      for (auto const& tracker_arg : tracker_args)
208
0
      {
209
0
        if (string_equal_no_case(name, tracker_arg))
210
0
          return true;
211
0
      }
212
0
    }
213
0
    return false;
214
0
  }
215
216
}