/src/libtorrent/src/bdecode.cpp
Line | Count | Source |
1 | | /* |
2 | | |
3 | | Copyright (c) 2015-2020, 2022, Arvid Norberg |
4 | | Copyright (c) 2016-2017, 2019, Alden Torres |
5 | | Copyright (c) 2016-2017, Andrei Kurushin |
6 | | Copyright (c) 2016-2017, Steven Siloti |
7 | | Copyright (c) 2017, Pavel Pimenov |
8 | | All rights reserved. |
9 | | |
10 | | Redistribution and use in source and binary forms, with or without |
11 | | modification, are permitted provided that the following conditions |
12 | | are met: |
13 | | |
14 | | * Redistributions of source code must retain the above copyright |
15 | | notice, this list of conditions and the following disclaimer. |
16 | | * Redistributions in binary form must reproduce the above copyright |
17 | | notice, this list of conditions and the following disclaimer in |
18 | | the documentation and/or other materials provided with the distribution. |
19 | | * Neither the name of the author nor the names of its |
20 | | contributors may be used to endorse or promote products derived |
21 | | from this software without specific prior written permission. |
22 | | |
23 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
24 | | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
25 | | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
26 | | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
27 | | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
28 | | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
29 | | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
30 | | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
31 | | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
32 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
33 | | POSSIBILITY OF SUCH DAMAGE. |
34 | | |
35 | | */ |
36 | | |
37 | | #include "libtorrent/bdecode.hpp" |
38 | | #include "libtorrent/aux_/alloca.hpp" |
39 | | #include "libtorrent/aux_/numeric_cast.hpp" |
40 | | #include "libtorrent/error_code.hpp" |
41 | | #include <limits> |
42 | | #include <cstring> // for memset |
43 | | #include <cstdio> // for snprintf |
44 | | #include <cinttypes> // for PRId64 et.al. |
45 | | #include <algorithm> // for any_of |
46 | | |
47 | | #ifndef BOOST_SYSTEM_NOEXCEPT |
48 | | #define BOOST_SYSTEM_NOEXCEPT throw() |
49 | | #endif |
50 | | |
51 | | namespace libtorrent { |
52 | | |
53 | | using aux::bdecode_token; |
54 | | |
55 | | namespace { |
56 | | |
57 | | static_assert(int('0') == 48, "this code relies on ASCII compatible source encoding"); |
58 | | static_assert(int('9') == 57, "this code relies on ASCII compatible source encoding"); |
59 | | |
60 | 0 | bool numeric(char c) { return c >= '0' && c <= '9'; } |
61 | | |
62 | | // finds the end of an integer and verifies that it looks valid this does |
63 | | // not detect all overflows, just the ones that are an order of magnitude |
64 | | // beyond. Exact overflow checking is done when the integer value is queried |
65 | | // from a bdecode_node. |
66 | | char const* check_integer(char const* start, char const* end |
67 | | , bdecode_errors::error_code_enum& e) |
68 | 0 | { |
69 | 0 | if (start == end) |
70 | 0 | { |
71 | 0 | e = bdecode_errors::unexpected_eof; |
72 | 0 | return start; |
73 | 0 | } |
74 | | |
75 | 0 | if (*start == '-') |
76 | 0 | { |
77 | 0 | ++start; |
78 | 0 | if (start == end) |
79 | 0 | { |
80 | 0 | e = bdecode_errors::unexpected_eof; |
81 | 0 | return start; |
82 | 0 | } |
83 | 0 | } |
84 | | |
85 | 0 | int digits = 0; |
86 | 0 | do |
87 | 0 | { |
88 | 0 | if (!numeric(*start)) |
89 | 0 | { |
90 | 0 | e = bdecode_errors::expected_digit; |
91 | 0 | break; |
92 | 0 | } |
93 | 0 | ++start; |
94 | 0 | ++digits; |
95 | |
|
96 | 0 | if (start == end) |
97 | 0 | { |
98 | 0 | e = bdecode_errors::unexpected_eof; |
99 | 0 | break; |
100 | 0 | } |
101 | 0 | } |
102 | 0 | while (*start != 'e'); |
103 | |
|
104 | 0 | if (digits > 20) |
105 | 0 | { |
106 | 0 | e = bdecode_errors::overflow; |
107 | 0 | } |
108 | |
|
109 | 0 | return start; |
110 | 0 | } |
111 | | |
112 | | struct stack_frame |
113 | | { |
114 | 0 | stack_frame() : token(0), state(0) {} |
115 | 0 | explicit stack_frame(int const t): token(std::uint32_t(t)), state(0) {} |
116 | | // this is an index into m_tokens |
117 | | std::uint32_t token:31; |
118 | | // this is used for dictionaries to indicate whether we're |
119 | | // reading a key or a vale. 0 means key 1 is value |
120 | | std::uint32_t state:1; |
121 | | }; |
122 | | |
123 | | // diff between current and next item offset |
124 | | // should only be called for non last item in array |
125 | | int token_source_span(bdecode_token const& t) |
126 | 0 | { |
127 | 0 | return (&t)[1].offset - t.offset; |
128 | 0 | } |
129 | | |
130 | | } // anonymous namespace |
131 | | |
132 | | namespace aux { |
133 | | void escape_string(std::string& ret, char const* str, int len) |
134 | 0 | { |
135 | 0 | if (std::any_of(str, str + len, [](char const c) { return c < 32 || c >= 127; } )) |
136 | 0 | { |
137 | 0 | for (int i = 0; i < len; ++i) |
138 | 0 | { |
139 | 0 | char tmp[3]; |
140 | 0 | std::snprintf(tmp, sizeof(tmp), "%02x", std::uint8_t(str[i])); |
141 | 0 | ret += tmp; |
142 | 0 | } |
143 | 0 | } |
144 | 0 | else |
145 | 0 | { |
146 | 0 | ret.assign(str, std::size_t(len)); |
147 | 0 | } |
148 | 0 | } |
149 | | } |
150 | | |
151 | | |
152 | | |
153 | | // reads the string between start and end, or up to the first occurrence of |
154 | | // 'delimiter', whichever comes first. This string is interpreted as an |
155 | | // integer which is assigned to 'val'. If there's a non-delimiter and |
156 | | // non-digit in the range, a parse error is reported in 'ec'. If the value |
157 | | // cannot be represented by the variable 'val' and overflow error is reported |
158 | | // by 'ec'. |
159 | | // WARNING: the val is an out-parameter that is expected to be initialized |
160 | | // to 0 or a *positive* number. |
161 | | char const* parse_int(char const* start, char const* end, char delimiter |
162 | | , std::int64_t& val, bdecode_errors::error_code_enum& ec) |
163 | 0 | { |
164 | 0 | TORRENT_ASSERT(val >= 0); |
165 | 0 | while (start < end && *start != delimiter) |
166 | 0 | { |
167 | 0 | char const c = *start; |
168 | 0 | if (!numeric(c)) |
169 | 0 | { |
170 | 0 | ec = bdecode_errors::expected_digit; |
171 | 0 | return start; |
172 | 0 | } |
173 | 0 | if (val > std::numeric_limits<std::int64_t>::max() / 10) |
174 | 0 | { |
175 | 0 | ec = bdecode_errors::overflow; |
176 | 0 | return start; |
177 | 0 | } |
178 | 0 | val *= 10; |
179 | 0 | int const digit = c - '0'; |
180 | 0 | if (val > std::numeric_limits<std::int64_t>::max() - digit) |
181 | 0 | { |
182 | 0 | ec = bdecode_errors::overflow; |
183 | 0 | return start; |
184 | 0 | } |
185 | 0 | val += digit; |
186 | 0 | ++start; |
187 | 0 | } |
188 | 0 | return start; |
189 | 0 | } |
190 | | |
191 | | |
192 | | struct bdecode_error_category final : boost::system::error_category |
193 | | { |
194 | | const char* name() const BOOST_SYSTEM_NOEXCEPT override; |
195 | | std::string message(int ev) const override; |
196 | | boost::system::error_condition default_error_condition( |
197 | | int ev) const BOOST_SYSTEM_NOEXCEPT override |
198 | 0 | { return {ev, *this}; } |
199 | | }; |
200 | | |
201 | | const char* bdecode_error_category::name() const BOOST_SYSTEM_NOEXCEPT |
202 | 0 | { |
203 | 0 | return "bdecode"; |
204 | 0 | } |
205 | | |
206 | | std::string bdecode_error_category::message(int ev) const |
207 | 0 | { |
208 | 0 | static char const* msgs[] = |
209 | 0 | { |
210 | 0 | "no error", |
211 | 0 | "expected digit in bencoded string", |
212 | 0 | "expected colon in bencoded string", |
213 | 0 | "unexpected end of file in bencoded string", |
214 | 0 | "expected value (list, dict, int or string) in bencoded string", |
215 | 0 | "bencoded nesting depth exceeded", |
216 | 0 | "bencoded item count limit exceeded", |
217 | 0 | "integer overflow", |
218 | 0 | }; |
219 | 0 | if (ev < 0 || ev >= int(sizeof(msgs)/sizeof(msgs[0]))) |
220 | 0 | return "Unknown error"; |
221 | 0 | return msgs[ev]; |
222 | 0 | } |
223 | | |
224 | | boost::system::error_category& bdecode_category() |
225 | 0 | { |
226 | 0 | static bdecode_error_category bdecode_category; |
227 | 0 | return bdecode_category; |
228 | 0 | } |
229 | | |
230 | | namespace bdecode_errors |
231 | | { |
232 | | boost::system::error_code make_error_code(error_code_enum e) |
233 | 0 | { |
234 | 0 | return {e, bdecode_category()}; |
235 | 0 | } |
236 | | } |
237 | | |
238 | | bdecode_node::bdecode_node(bdecode_node const& n) |
239 | 0 | : m_tokens(n.m_tokens) |
240 | 0 | , m_root_tokens(n.m_root_tokens) |
241 | 0 | , m_buffer(n.m_buffer) |
242 | 0 | , m_buffer_size(n.m_buffer_size) |
243 | 0 | , m_token_idx(n.m_token_idx) |
244 | 0 | , m_last_index(n.m_last_index) |
245 | 0 | , m_last_token(n.m_last_token) |
246 | 0 | , m_size(n.m_size) |
247 | 0 | { |
248 | 0 | (*this) = n; |
249 | 0 | } |
250 | | |
251 | | bdecode_node& bdecode_node::operator=(bdecode_node const& n) & |
252 | 0 | { |
253 | 0 | if (&n == this) return *this; |
254 | 0 | m_tokens = n.m_tokens; |
255 | 0 | m_root_tokens = n.m_root_tokens; |
256 | 0 | m_buffer = n.m_buffer; |
257 | 0 | m_buffer_size = n.m_buffer_size; |
258 | 0 | m_token_idx = n.m_token_idx; |
259 | 0 | m_last_index = n.m_last_index; |
260 | 0 | m_last_token = n.m_last_token; |
261 | 0 | m_size = n.m_size; |
262 | 0 | if (!m_tokens.empty()) |
263 | 0 | { |
264 | | // if this is a root, make the token pointer |
265 | | // point to our storage |
266 | 0 | m_root_tokens = &m_tokens[0]; |
267 | 0 | } |
268 | 0 | return *this; |
269 | 0 | } |
270 | | |
271 | 0 | bdecode_node::bdecode_node(bdecode_node&&) noexcept = default; |
272 | | |
273 | | bdecode_node::bdecode_node(bdecode_token const* tokens, char const* buf |
274 | | , int len, int idx) |
275 | 0 | : m_root_tokens(tokens) |
276 | 0 | , m_buffer(buf) |
277 | 0 | , m_buffer_size(len) |
278 | 0 | , m_token_idx(idx) |
279 | 0 | { |
280 | 0 | TORRENT_ASSERT(tokens != nullptr); |
281 | 0 | TORRENT_ASSERT(idx >= 0); |
282 | 0 | } |
283 | | |
284 | | bdecode_node bdecode_node::non_owning() const |
285 | 0 | { |
286 | | // if we're not a root, just return a copy of ourself |
287 | 0 | if (m_tokens.empty()) return *this; |
288 | | |
289 | | // otherwise, return a reference to this node, but without |
290 | | // being an owning root node |
291 | 0 | return bdecode_node(&m_tokens[0], m_buffer, m_buffer_size, m_token_idx); |
292 | 0 | } |
293 | | |
294 | | void bdecode_node::clear() |
295 | 0 | { |
296 | 0 | m_tokens.clear(); |
297 | 0 | m_root_tokens = nullptr; |
298 | 0 | m_token_idx = -1; |
299 | 0 | m_size = -1; |
300 | 0 | m_last_index = -1; |
301 | 0 | m_last_token = -1; |
302 | 0 | } |
303 | | |
304 | | void bdecode_node::switch_underlying_buffer(char const* buf) noexcept |
305 | 0 | { |
306 | 0 | TORRENT_ASSERT(!m_tokens.empty()); |
307 | 0 | if (m_tokens.empty()) return; |
308 | | |
309 | 0 | m_buffer = buf; |
310 | 0 | } |
311 | | |
312 | | bool bdecode_node::has_soft_error(span<char> error) const |
313 | 0 | { |
314 | 0 | if (type() == none_t) return false; |
315 | | |
316 | 0 | bdecode_token const* tokens = m_root_tokens; |
317 | 0 | int token = m_token_idx; |
318 | | |
319 | | // we don't know what the original depth_limit was |
320 | | // so this has to go on the heap |
321 | 0 | std::vector<int> stack; |
322 | | // make the initial allocation the default depth_limit |
323 | 0 | stack.reserve(100); |
324 | |
|
325 | 0 | do |
326 | 0 | { |
327 | 0 | switch (tokens[token].type) |
328 | 0 | { |
329 | 0 | case bdecode_token::integer: |
330 | 0 | if (m_buffer[tokens[token].offset + 1] == '0' |
331 | 0 | && m_buffer[tokens[token].offset + 2] != 'e') |
332 | 0 | { |
333 | 0 | std::snprintf(error.data(), std::size_t(error.size()), "leading zero in integer"); |
334 | 0 | return true; |
335 | 0 | } |
336 | 0 | break; |
337 | 0 | case bdecode_token::string: |
338 | 0 | case bdecode_token::long_string: |
339 | 0 | if (m_buffer[tokens[token].offset] == '0' |
340 | 0 | && m_buffer[tokens[token].offset + 1] != ':') |
341 | 0 | { |
342 | 0 | std::snprintf(error.data(), std::size_t(error.size()), "leading zero in string length"); |
343 | 0 | return true; |
344 | 0 | } |
345 | 0 | break; |
346 | 0 | case bdecode_token::dict: |
347 | 0 | case bdecode_token::list: |
348 | 0 | stack.push_back(token); |
349 | 0 | break; |
350 | 0 | case bdecode_token::end: |
351 | 0 | auto const parent = stack.back(); |
352 | 0 | stack.pop_back(); |
353 | 0 | if (tokens[parent].type == bdecode_token::dict |
354 | 0 | && token != parent + 1) |
355 | 0 | { |
356 | | // this is the end of a non-empty dict |
357 | | // check the sort order of the keys |
358 | 0 | int k1 = parent + 1; |
359 | 0 | for (;;) |
360 | 0 | { |
361 | | // skip to the first key's value |
362 | 0 | int const v1 = k1 + tokens[k1].next_item; |
363 | | // then to the next key |
364 | 0 | int const k2 = v1 + tokens[v1].next_item; |
365 | | |
366 | | // check if k1 was the last key in the dict |
367 | 0 | if (k2 == token) |
368 | 0 | break; |
369 | | |
370 | 0 | int const v2 = k2 + tokens[k2].next_item; |
371 | |
|
372 | 0 | int const k1_start = tokens[k1].offset + tokens[k1].start_offset(); |
373 | 0 | int const k1_len = tokens[v1].offset - k1_start; |
374 | 0 | int const k2_start = tokens[k2].offset + tokens[k2].start_offset(); |
375 | 0 | int const k2_len = tokens[v2].offset - k2_start; |
376 | |
|
377 | 0 | int const min_len = std::min(k1_len, k2_len); |
378 | |
|
379 | 0 | int cmp = std::memcmp(m_buffer + k1_start, m_buffer + k2_start, std::size_t(min_len)); |
380 | 0 | if (cmp > 0 || (cmp == 0 && k1_len > k2_len)) |
381 | 0 | { |
382 | 0 | std::snprintf(error.data(), std::size_t(error.size()), "unsorted dictionary key"); |
383 | 0 | return true; |
384 | 0 | } |
385 | 0 | else if (cmp == 0 && k1_len == k2_len) |
386 | 0 | { |
387 | 0 | std::snprintf(error.data(), std::size_t(error.size()), "duplicate dictionary key"); |
388 | 0 | return true; |
389 | 0 | } |
390 | | |
391 | 0 | k1 = k2; |
392 | 0 | } |
393 | 0 | } |
394 | 0 | break; |
395 | 0 | } |
396 | | |
397 | 0 | ++token; |
398 | 0 | } while (!stack.empty()); |
399 | 0 | return false; |
400 | 0 | } |
401 | | |
402 | | bdecode_node::type_t bdecode_node::type() const noexcept |
403 | 0 | { |
404 | 0 | if (m_token_idx == -1) return none_t; |
405 | 0 | if (m_root_tokens[m_token_idx].type == bdecode_token::long_string) |
406 | 0 | return bdecode_node::string_t; |
407 | 0 | return static_cast<bdecode_node::type_t>(m_root_tokens[m_token_idx].type); |
408 | 0 | } |
409 | | |
410 | | bdecode_node::operator bool() const noexcept |
411 | 0 | { return m_token_idx != -1; } |
412 | | |
413 | | span<char const> bdecode_node::data_section() const noexcept |
414 | 0 | { |
415 | 0 | if (m_token_idx == -1) return {}; |
416 | | |
417 | 0 | TORRENT_ASSERT(m_token_idx != -1); |
418 | 0 | bdecode_token const& t = m_root_tokens[m_token_idx]; |
419 | 0 | bdecode_token const& next = m_root_tokens[m_token_idx + t.next_item]; |
420 | 0 | return {m_buffer + t.offset, static_cast<std::ptrdiff_t>(next.offset - t.offset)}; |
421 | 0 | } |
422 | | |
423 | | std::ptrdiff_t bdecode_node::data_offset() const noexcept |
424 | 0 | { |
425 | 0 | TORRENT_ASSERT_PRECOND(m_token_idx != -1); |
426 | 0 | if (m_token_idx == -1) return -1; |
427 | 0 | bdecode_token const& t = m_root_tokens[m_token_idx]; |
428 | 0 | return t.offset; |
429 | 0 | } |
430 | | |
431 | | bdecode_node bdecode_node::list_at(int i) const |
432 | 0 | { |
433 | 0 | TORRENT_ASSERT(type() == list_t); |
434 | 0 | TORRENT_ASSERT(i >= 0); |
435 | | |
436 | | // make sure this is a list. |
437 | 0 | bdecode_token const* tokens = m_root_tokens; |
438 | | |
439 | | // this is the first item |
440 | 0 | int token = m_token_idx + 1; |
441 | 0 | int item = 0; |
442 | | |
443 | | // do we have a lookup cached? |
444 | 0 | if (m_last_index <= i && m_last_index != -1) |
445 | 0 | { |
446 | 0 | token = m_last_token; |
447 | 0 | item = m_last_index; |
448 | 0 | } |
449 | |
|
450 | 0 | while (item < i) |
451 | 0 | { |
452 | 0 | token += tokens[token].next_item; |
453 | 0 | ++item; |
454 | | |
455 | | // index 'i' out of range |
456 | 0 | TORRENT_ASSERT(tokens[token].type != bdecode_token::end); |
457 | 0 | } |
458 | |
|
459 | 0 | m_last_token = token; |
460 | 0 | m_last_index = i; |
461 | |
|
462 | 0 | return bdecode_node(tokens, m_buffer, m_buffer_size, token); |
463 | 0 | } |
464 | | |
465 | | string_view bdecode_node::list_string_value_at(int i |
466 | | , string_view default_val) const |
467 | 0 | { |
468 | 0 | bdecode_node const n = list_at(i); |
469 | 0 | if (n.type() != bdecode_node::string_t) return default_val; |
470 | 0 | return n.string_value(); |
471 | 0 | } |
472 | | |
473 | | std::int64_t bdecode_node::list_int_value_at(int i |
474 | | , std::int64_t default_val) const |
475 | 0 | { |
476 | 0 | bdecode_node const n = list_at(i); |
477 | 0 | if (n.type() != bdecode_node::int_t) return default_val; |
478 | 0 | return n.int_value(); |
479 | 0 | } |
480 | | |
481 | | int bdecode_node::list_size() const |
482 | 0 | { |
483 | 0 | TORRENT_ASSERT(type() == list_t); |
484 | |
|
485 | 0 | if (m_size != -1) return m_size; |
486 | | |
487 | | // make sure this is a list. |
488 | 0 | bdecode_token const* tokens = m_root_tokens; |
489 | 0 | TORRENT_ASSERT(tokens[m_token_idx].type == bdecode_token::list); |
490 | | |
491 | | // this is the first item |
492 | 0 | int token = m_token_idx + 1; |
493 | 0 | int ret = 0; |
494 | | |
495 | | // do we have a lookup cached? |
496 | 0 | if (m_last_index != -1) |
497 | 0 | { |
498 | 0 | token = m_last_token; |
499 | 0 | ret = m_last_index; |
500 | 0 | } |
501 | 0 | while (tokens[token].type != bdecode_token::end) |
502 | 0 | { |
503 | 0 | token += tokens[token].next_item; |
504 | 0 | ++ret; |
505 | 0 | } |
506 | |
|
507 | 0 | m_size = ret; |
508 | |
|
509 | 0 | return ret; |
510 | 0 | } |
511 | | |
512 | | std::pair<bdecode_node, bdecode_node> bdecode_node::dict_at_node(int i) const |
513 | 0 | { |
514 | 0 | TORRENT_ASSERT(type() == dict_t); |
515 | 0 | TORRENT_ASSERT(m_token_idx != -1); |
516 | |
|
517 | 0 | bdecode_token const* tokens = m_root_tokens; |
518 | 0 | TORRENT_ASSERT(tokens[m_token_idx].type == bdecode_token::dict); |
519 | |
|
520 | 0 | int token = m_token_idx + 1; |
521 | 0 | int item = 0; |
522 | | |
523 | | // do we have a lookup cached? |
524 | 0 | if (m_last_index <= i && m_last_index != -1) |
525 | 0 | { |
526 | 0 | token = m_last_token; |
527 | 0 | item = m_last_index; |
528 | 0 | } |
529 | |
|
530 | 0 | while (item < i) |
531 | 0 | { |
532 | 0 | TORRENT_ASSERT(tokens[token].type == bdecode_token::string |
533 | 0 | || tokens[token].type == bdecode_token::long_string); |
534 | | |
535 | | // skip the key |
536 | 0 | token += tokens[token].next_item; |
537 | 0 | TORRENT_ASSERT(tokens[token].type != bdecode_token::end); |
538 | | |
539 | | // skip the value |
540 | 0 | token += tokens[token].next_item; |
541 | |
|
542 | 0 | ++item; |
543 | | |
544 | | // index 'i' out of range |
545 | 0 | TORRENT_ASSERT(tokens[token].type != bdecode_token::end); |
546 | 0 | } |
547 | | |
548 | | // there's no point in caching the first item |
549 | 0 | if (i > 0) |
550 | 0 | { |
551 | 0 | m_last_token = token; |
552 | 0 | m_last_index = i; |
553 | 0 | } |
554 | |
|
555 | 0 | int value_token = token + tokens[token].next_item; |
556 | 0 | TORRENT_ASSERT(tokens[token].type != bdecode_token::end); |
557 | |
|
558 | 0 | return std::make_pair( |
559 | 0 | bdecode_node(tokens, m_buffer, m_buffer_size, token) |
560 | 0 | , bdecode_node(tokens, m_buffer, m_buffer_size, value_token)); |
561 | 0 | } |
562 | | |
563 | | std::pair<string_view, bdecode_node> bdecode_node::dict_at(int const i) const |
564 | 0 | { |
565 | 0 | bdecode_node key; |
566 | 0 | bdecode_node value; |
567 | 0 | std::tie(key, value) = dict_at_node(i); |
568 | 0 | return {key.string_value(), value}; |
569 | 0 | } |
570 | | |
571 | | int bdecode_node::dict_size() const |
572 | 0 | { |
573 | 0 | TORRENT_ASSERT(type() == dict_t); |
574 | 0 | TORRENT_ASSERT(m_token_idx != -1); |
575 | |
|
576 | 0 | if (m_size != -1) return m_size; |
577 | | |
578 | 0 | bdecode_token const* tokens = m_root_tokens; |
579 | 0 | TORRENT_ASSERT(tokens[m_token_idx].type == bdecode_token::dict); |
580 | | |
581 | | // this is the first item |
582 | 0 | int token = m_token_idx + 1; |
583 | 0 | int ret = 0; |
584 | |
|
585 | 0 | if (m_last_index != -1) |
586 | 0 | { |
587 | 0 | ret = m_last_index * 2; |
588 | 0 | token = m_last_token; |
589 | 0 | } |
590 | |
|
591 | 0 | while (tokens[token].type != bdecode_token::end) |
592 | 0 | { |
593 | 0 | token += tokens[token].next_item; |
594 | 0 | ++ret; |
595 | 0 | } |
596 | | |
597 | | // a dictionary must contain full key-value pairs. which means |
598 | | // the number of entries is divisible by 2 |
599 | 0 | TORRENT_ASSERT((ret % 2) == 0); |
600 | | |
601 | | // each item is one key and one value, so divide by 2 |
602 | 0 | ret /= 2; |
603 | |
|
604 | 0 | m_size = ret; |
605 | |
|
606 | 0 | return ret; |
607 | 0 | } |
608 | | |
609 | | bdecode_node bdecode_node::dict_find(string_view key) const |
610 | 0 | { |
611 | 0 | TORRENT_ASSERT(type() == dict_t); |
612 | |
|
613 | 0 | bdecode_token const* const tokens = m_root_tokens; |
614 | | |
615 | | // this is the first item |
616 | 0 | int token = m_token_idx + 1; |
617 | |
|
618 | 0 | while (tokens[token].type != bdecode_token::end) |
619 | 0 | { |
620 | 0 | bdecode_token const& t = tokens[token]; |
621 | 0 | TORRENT_ASSERT(t.type == bdecode_token::string |
622 | 0 | || t.type == bdecode_token::long_string); |
623 | 0 | int const size = token_source_span(t) - t.start_offset(); |
624 | 0 | if (int(key.size()) == size |
625 | 0 | && std::equal(key.data(), key.data() + size, m_buffer |
626 | 0 | + t.offset + t.start_offset())) |
627 | 0 | { |
628 | | // skip key |
629 | 0 | token += t.next_item; |
630 | 0 | TORRENT_ASSERT(tokens[token].type != bdecode_token::end); |
631 | |
|
632 | 0 | return bdecode_node(tokens, m_buffer, m_buffer_size, token); |
633 | 0 | } |
634 | | |
635 | | // skip key |
636 | 0 | token += t.next_item; |
637 | 0 | TORRENT_ASSERT(tokens[token].type != bdecode_token::end); |
638 | | |
639 | | // skip value |
640 | 0 | token += tokens[token].next_item; |
641 | 0 | } |
642 | | |
643 | 0 | return bdecode_node(); |
644 | 0 | } |
645 | | |
646 | | bdecode_node bdecode_node::dict_find_list(string_view key) const |
647 | 0 | { |
648 | 0 | bdecode_node ret = dict_find(key); |
649 | 0 | if (ret.type() == bdecode_node::list_t) |
650 | 0 | return ret; |
651 | 0 | return bdecode_node(); |
652 | 0 | } |
653 | | |
654 | | bdecode_node bdecode_node::dict_find_dict(string_view key) const |
655 | 0 | { |
656 | 0 | bdecode_node ret = dict_find(key); |
657 | 0 | if (ret.type() == bdecode_node::dict_t) |
658 | 0 | return ret; |
659 | 0 | return bdecode_node(); |
660 | 0 | } |
661 | | |
662 | | bdecode_node bdecode_node::dict_find_string(string_view key) const |
663 | 0 | { |
664 | 0 | bdecode_node ret = dict_find(key); |
665 | 0 | if (ret.type() == bdecode_node::string_t) |
666 | 0 | return ret; |
667 | 0 | return bdecode_node(); |
668 | 0 | } |
669 | | |
670 | | bdecode_node bdecode_node::dict_find_int(string_view key) const |
671 | 0 | { |
672 | 0 | bdecode_node ret = dict_find(key); |
673 | 0 | if (ret.type() == bdecode_node::int_t) |
674 | 0 | return ret; |
675 | 0 | return bdecode_node(); |
676 | 0 | } |
677 | | |
678 | | string_view bdecode_node::dict_find_string_value(string_view key |
679 | | , string_view default_value) const |
680 | 0 | { |
681 | 0 | bdecode_node n = dict_find(key); |
682 | 0 | if (n.type() != bdecode_node::string_t) return default_value; |
683 | 0 | return n.string_value(); |
684 | 0 | } |
685 | | |
686 | | std::int64_t bdecode_node::dict_find_int_value(string_view key |
687 | | , std::int64_t default_val) const |
688 | 0 | { |
689 | 0 | bdecode_node n = dict_find(key); |
690 | 0 | if (n.type() != bdecode_node::int_t) return default_val; |
691 | 0 | return n.int_value(); |
692 | 0 | } |
693 | | |
694 | | std::int64_t bdecode_node::int_value() const |
695 | 0 | { |
696 | 0 | TORRENT_ASSERT(type() == int_t); |
697 | 0 | bdecode_token const& t = m_root_tokens[m_token_idx]; |
698 | 0 | int const size = token_source_span(t); |
699 | 0 | TORRENT_ASSERT(t.type == bdecode_token::integer); |
700 | | |
701 | | // +1 is to skip the 'i' |
702 | 0 | char const* ptr = m_buffer + t.offset + 1; |
703 | 0 | std::int64_t val = 0; |
704 | 0 | bool const negative = (*ptr == '-'); |
705 | 0 | bdecode_errors::error_code_enum ec = bdecode_errors::no_error; |
706 | 0 | char const* end = parse_int(ptr + int(negative) |
707 | 0 | , ptr + size, 'e', val, ec); |
708 | 0 | if (ec) return 0; |
709 | 0 | TORRENT_UNUSED(end); |
710 | 0 | TORRENT_ASSERT(end < ptr + size); |
711 | 0 | if (negative) val = -val; |
712 | 0 | return val; |
713 | 0 | } |
714 | | |
715 | | string_view bdecode_node::string_value() const |
716 | 0 | { |
717 | 0 | TORRENT_ASSERT(type() == string_t); |
718 | 0 | bdecode_token const& t = m_root_tokens[m_token_idx]; |
719 | 0 | auto const size = aux::numeric_cast<std::size_t>(token_source_span(t) - t.start_offset()); |
720 | 0 | TORRENT_ASSERT(t.type == bdecode_token::string |
721 | 0 | || t.type == bdecode_token::long_string); |
722 | |
|
723 | 0 | return string_view(m_buffer + t.offset + t.start_offset(), size); |
724 | 0 | } |
725 | | |
726 | | std::ptrdiff_t bdecode_node::string_offset() const |
727 | 0 | { |
728 | 0 | TORRENT_ASSERT(type() == string_t); |
729 | 0 | bdecode_token const& t = m_root_tokens[m_token_idx]; |
730 | 0 | TORRENT_ASSERT(t.type == bdecode_token::string |
731 | 0 | || t.type == bdecode_token::long_string); |
732 | 0 | return t.offset + t.start_offset(); |
733 | 0 | } |
734 | | |
735 | | char const* bdecode_node::string_ptr() const |
736 | 0 | { |
737 | 0 | TORRENT_ASSERT(type() == string_t); |
738 | 0 | bdecode_token const& t = m_root_tokens[m_token_idx]; |
739 | 0 | TORRENT_ASSERT(t.type == bdecode_token::string |
740 | 0 | || t.type == bdecode_token::long_string); |
741 | 0 | return m_buffer + t.offset + t.start_offset(); |
742 | 0 | } |
743 | | |
744 | | int bdecode_node::string_length() const |
745 | 0 | { |
746 | 0 | TORRENT_ASSERT(type() == string_t); |
747 | 0 | bdecode_token const& t = m_root_tokens[m_token_idx]; |
748 | 0 | TORRENT_ASSERT(t.type == bdecode_token::string |
749 | 0 | || t.type == bdecode_token::long_string); |
750 | 0 | return token_source_span(t) - t.start_offset(); |
751 | 0 | } |
752 | | |
753 | | void bdecode_node::reserve(int tokens) |
754 | 0 | { m_tokens.reserve(aux::numeric_cast<std::size_t>(tokens)); } |
755 | | |
756 | | void bdecode_node::swap(bdecode_node& n) |
757 | 0 | { |
758 | | /* |
759 | | bool lhs_is_root = (m_root_tokens == &m_tokens); |
760 | | bool rhs_is_root = (n.m_root_tokens == &n.m_tokens); |
761 | | |
762 | | // swap is only defined between non-root nodes |
763 | | // and between root-nodes. They may not be mixed! |
764 | | // note that when swapping root nodes, all bdecode_node |
765 | | // entries that exist in those subtrees are invalidated! |
766 | | TORRENT_ASSERT(lhs_is_root == rhs_is_root); |
767 | | |
768 | | // if both are roots, m_root_tokens always point to |
769 | | // its own vector, and should not get swapped (the |
770 | | // underlying vectors are swapped already) |
771 | | if (!lhs_is_root && !rhs_is_root) |
772 | | { |
773 | | // if neither is a root, we just swap the pointers |
774 | | // to the token vectors, switching their roots |
775 | | std::swap(m_root_tokens, n.m_root_tokens); |
776 | | } |
777 | | */ |
778 | 0 | m_tokens.swap(n.m_tokens); |
779 | 0 | std::swap(m_root_tokens, n.m_root_tokens); |
780 | 0 | std::swap(m_buffer, n.m_buffer); |
781 | 0 | std::swap(m_buffer_size, n.m_buffer_size); |
782 | 0 | std::swap(m_token_idx, n.m_token_idx); |
783 | 0 | std::swap(m_last_index, n.m_last_index); |
784 | 0 | std::swap(m_last_token, n.m_last_token); |
785 | 0 | std::swap(m_size, n.m_size); |
786 | 0 | } |
787 | | |
788 | 0 | #define TORRENT_FAIL_BDECODE(code) do { \ |
789 | 0 | ec = code; \ |
790 | 0 | if (error_pos) *error_pos = int(start - orig_start); \ |
791 | 0 | goto done; \ |
792 | 0 | } TORRENT_WHILE_0 |
793 | | |
794 | | int bdecode(char const* start, char const* end, bdecode_node& ret |
795 | | , error_code& ec, int* error_pos, int const depth_limit, int token_limit) |
796 | 0 | { |
797 | 0 | ret = bdecode({start, end - start}, ec, error_pos, depth_limit, token_limit); |
798 | 0 | return ec ? -1 : 0; |
799 | 0 | } |
800 | | |
801 | | bdecode_node bdecode(span<char const> buffer, int depth_limit, int token_limit) |
802 | 0 | { |
803 | 0 | error_code ec; |
804 | 0 | bdecode_node ret = bdecode(buffer, ec, nullptr, depth_limit, token_limit); |
805 | 0 | if (ec) throw system_error(ec); |
806 | 0 | return ret; |
807 | 0 | } |
808 | | |
809 | | bdecode_node bdecode(span<char const> buffer |
810 | | , error_code& ec, int* error_pos, int depth_limit, int token_limit) |
811 | 0 | { |
812 | 0 | bdecode_node ret; |
813 | 0 | ec.clear(); |
814 | |
|
815 | 0 | if (buffer.size() > bdecode_token::max_offset) |
816 | 0 | { |
817 | 0 | if (error_pos) *error_pos = 0; |
818 | 0 | ec = bdecode_errors::limit_exceeded; |
819 | 0 | return ret; |
820 | 0 | } |
821 | | |
822 | | // this is the stack of bdecode_token indices, into m_tokens. |
823 | | // sp is the stack pointer, as index into the array, stack |
824 | 0 | int sp = 0; |
825 | 0 | TORRENT_ALLOCA(stack, stack_frame, depth_limit); |
826 | | |
827 | | // TODO: 2 attempt to simplify this implementation by embracing the span |
828 | 0 | char const* start = buffer.data(); |
829 | 0 | char const* end = start + buffer.size(); |
830 | 0 | char const* const orig_start = start; |
831 | |
|
832 | 0 | if (start == end) |
833 | 0 | TORRENT_FAIL_BDECODE(bdecode_errors::unexpected_eof); |
834 | | |
835 | 0 | while (start <= end) |
836 | 0 | { |
837 | 0 | if (start >= end) TORRENT_FAIL_BDECODE(bdecode_errors::unexpected_eof); |
838 | | |
839 | 0 | if (sp >= depth_limit) |
840 | 0 | TORRENT_FAIL_BDECODE(bdecode_errors::depth_exceeded); |
841 | | |
842 | 0 | --token_limit; |
843 | 0 | if (token_limit < 0) |
844 | 0 | TORRENT_FAIL_BDECODE(bdecode_errors::limit_exceeded); |
845 | | |
846 | | // look for a new token |
847 | 0 | char const t = *start; |
848 | |
|
849 | 0 | int const current_frame = sp; |
850 | | |
851 | | // if we're currently parsing a dictionary, assert that |
852 | | // every other node is a string. |
853 | 0 | if (current_frame > 0 |
854 | 0 | && ret.m_tokens[stack[current_frame - 1].token].type == bdecode_token::dict) |
855 | 0 | { |
856 | 0 | if (stack[current_frame - 1].state == 0) |
857 | 0 | { |
858 | | // the current parent is a dict and we are parsing a key. |
859 | | // only allow a digit (for a string) or 'e' to terminate |
860 | 0 | if (!numeric(t) && t != 'e') |
861 | 0 | TORRENT_FAIL_BDECODE(bdecode_errors::expected_digit); |
862 | 0 | } |
863 | 0 | } |
864 | | |
865 | 0 | switch (t) |
866 | 0 | { |
867 | 0 | case 'd': |
868 | 0 | stack[sp++] = stack_frame(int(ret.m_tokens.size())); |
869 | | // we push it into the stack so that we know where to fill |
870 | | // in the next_node field once we pop this node off the stack. |
871 | | // i.e. get to the node following the dictionary in the buffer |
872 | 0 | ret.m_tokens.push_back({start - orig_start, bdecode_token::dict}); |
873 | 0 | ++start; |
874 | 0 | break; |
875 | 0 | case 'l': |
876 | 0 | stack[sp++] = stack_frame(int(ret.m_tokens.size())); |
877 | | // we push it into the stack so that we know where to fill |
878 | | // in the next_node field once we pop this node off the stack. |
879 | | // i.e. get to the node following the list in the buffer |
880 | 0 | ret.m_tokens.push_back({start - orig_start, bdecode_token::list}); |
881 | 0 | ++start; |
882 | 0 | break; |
883 | 0 | case 'i': |
884 | 0 | { |
885 | 0 | char const* const int_start = start; |
886 | 0 | bdecode_errors::error_code_enum e = bdecode_errors::no_error; |
887 | | // +1 here to point to the first digit, rather than 'i' |
888 | 0 | start = check_integer(start + 1, end, e); |
889 | 0 | if (e) |
890 | 0 | { |
891 | | // in order to gracefully terminate the tree, |
892 | | // make sure the end of the previous token is set correctly |
893 | 0 | if (error_pos) *error_pos = int(start - orig_start); |
894 | 0 | error_pos = nullptr; |
895 | 0 | start = int_start; |
896 | 0 | TORRENT_FAIL_BDECODE(e); |
897 | 0 | } |
898 | 0 | ret.m_tokens.push_back({int_start - orig_start |
899 | 0 | , 1, bdecode_token::integer, 1}); |
900 | 0 | TORRENT_ASSERT(*start == 'e'); |
901 | | |
902 | | // skip 'e' |
903 | 0 | ++start; |
904 | 0 | break; |
905 | 0 | } |
906 | 0 | case 'e': |
907 | 0 | { |
908 | | // this is the end of a list or dict |
909 | 0 | if (sp == 0) |
910 | 0 | TORRENT_FAIL_BDECODE(bdecode_errors::unexpected_eof); |
911 | | |
912 | 0 | if (sp > 0 |
913 | 0 | && ret.m_tokens[stack[sp - 1].token].type == bdecode_token::dict |
914 | 0 | && stack[sp - 1].state == 1) |
915 | 0 | { |
916 | | // this means we're parsing a dictionary and about to parse a |
917 | | // value associated with a key. Instead, we got a termination |
918 | 0 | TORRENT_FAIL_BDECODE(bdecode_errors::expected_value); |
919 | 0 | } |
920 | | |
921 | | // insert the end-of-sequence token |
922 | 0 | ret.m_tokens.push_back({start - orig_start, 1, bdecode_token::end}); |
923 | | |
924 | | // and back-patch the start of this sequence with the offset |
925 | | // to the next token we'll insert |
926 | 0 | int const top = stack[sp - 1].token; |
927 | | // subtract the token's own index, since this is a relative |
928 | | // offset |
929 | 0 | if (int(ret.m_tokens.size()) - top > bdecode_token::max_next_item) |
930 | 0 | TORRENT_FAIL_BDECODE(bdecode_errors::limit_exceeded); |
931 | | |
932 | 0 | ret.m_tokens[std::size_t(top)].next_item = std::uint32_t(int(ret.m_tokens.size()) - top); |
933 | | |
934 | | // and pop it from the stack. |
935 | 0 | TORRENT_ASSERT(sp > 0); |
936 | 0 | --sp; |
937 | 0 | ++start; |
938 | 0 | break; |
939 | 0 | } |
940 | 0 | default: |
941 | 0 | { |
942 | | // this is the case for strings. The start character is any |
943 | | // numeric digit |
944 | 0 | if (!numeric(t)) |
945 | 0 | TORRENT_FAIL_BDECODE(bdecode_errors::expected_value); |
946 | | |
947 | 0 | std::int64_t len = t - '0'; |
948 | 0 | char const* const str_start = start; |
949 | 0 | ++start; |
950 | 0 | if (start >= end) TORRENT_FAIL_BDECODE(bdecode_errors::unexpected_eof); |
951 | 0 | bdecode_errors::error_code_enum e = bdecode_errors::no_error; |
952 | 0 | start = parse_int(start, end, ':', len, e); |
953 | 0 | if (e) |
954 | 0 | TORRENT_FAIL_BDECODE(e); |
955 | 0 | if (start == end) |
956 | 0 | TORRENT_FAIL_BDECODE(bdecode_errors::expected_colon); |
957 | | |
958 | | // remaining buffer size excluding ':' |
959 | 0 | ptrdiff_t const buff_size = end - start - 1; |
960 | 0 | if (len > buff_size) |
961 | 0 | TORRENT_FAIL_BDECODE(bdecode_errors::unexpected_eof); |
962 | 0 | if (len < 0) |
963 | 0 | TORRENT_FAIL_BDECODE(bdecode_errors::overflow); |
964 | | |
965 | | // skip ':' |
966 | 0 | ++start; |
967 | | // no need to range check start here |
968 | | // the check above ensures that the buffer is long enough to hold |
969 | | // the string's length which guarantees that start <= end |
970 | |
|
971 | 0 | int const header = int(start - str_start); |
972 | 0 | TORRENT_ASSERT(header >= 0); |
973 | | |
974 | | // the bdecode_token only has 8 bits to keep the header size |
975 | | // in. If it overflows, fail! |
976 | 0 | if (header > aux::bdecode_token::long_string_max_header) |
977 | 0 | TORRENT_FAIL_BDECODE(bdecode_errors::limit_exceeded); |
978 | | |
979 | 0 | ret.m_tokens.push_back({str_start - orig_start |
980 | 0 | , 1, bdecode_token::string, std::uint32_t(header)}); |
981 | 0 | start += len; |
982 | 0 | break; |
983 | 0 | } |
984 | 0 | } |
985 | | |
986 | 0 | if (current_frame > 0 |
987 | 0 | && ret.m_tokens[stack[current_frame - 1].token].type == bdecode_token::dict) |
988 | 0 | { |
989 | | // the next item we parse is the opposite |
990 | | // state is an unsigned 1-bit member. adding 1 will flip the bit |
991 | 0 | stack[current_frame - 1].state = (stack[current_frame - 1].state + 1) & 1; |
992 | 0 | } |
993 | | |
994 | | // this terminates the top level node, we're done! |
995 | 0 | if (sp == 0) break; |
996 | 0 | } |
997 | | |
998 | 0 | done: |
999 | | |
1000 | | // if parse failed, sp will be greater than 1 |
1001 | | // unwind the stack by inserting terminator to make whatever we have |
1002 | | // so far valid |
1003 | 0 | while (sp > 0) { |
1004 | 0 | TORRENT_ASSERT(ec); |
1005 | 0 | --sp; |
1006 | | |
1007 | | // we may need to insert a dummy token to properly terminate the tree, |
1008 | | // in case we just parsed a key to a dict and failed in the value |
1009 | 0 | if (ret.m_tokens[stack[sp].token].type == bdecode_token::dict |
1010 | 0 | && stack[sp].state == 1) |
1011 | 0 | { |
1012 | | // insert an empty dictionary as the value |
1013 | 0 | ret.m_tokens.push_back({start - orig_start, 2, bdecode_token::dict}); |
1014 | 0 | ret.m_tokens.push_back({start - orig_start, bdecode_token::end}); |
1015 | 0 | } |
1016 | |
|
1017 | 0 | int const top = stack[sp].token; |
1018 | 0 | TORRENT_ASSERT(int(ret.m_tokens.size()) - top <= bdecode_token::max_next_item); |
1019 | 0 | ret.m_tokens[std::size_t(top)].next_item = std::uint32_t(int(ret.m_tokens.size()) - top); |
1020 | 0 | ret.m_tokens.push_back({start - orig_start, 1, bdecode_token::end}); |
1021 | 0 | } |
1022 | |
|
1023 | 0 | ret.m_tokens.push_back({start - orig_start, 0, bdecode_token::end}); |
1024 | |
|
1025 | 0 | ret.m_token_idx = 0; |
1026 | 0 | ret.m_buffer = orig_start; |
1027 | 0 | ret.m_buffer_size = int(start - orig_start); |
1028 | 0 | ret.m_root_tokens = ret.m_tokens.data(); |
1029 | |
|
1030 | 0 | return ret; |
1031 | 0 | } |
1032 | | |
1033 | | namespace { |
1034 | | |
1035 | | int line_longer_than(bdecode_node const& e, int limit) |
1036 | 0 | { |
1037 | 0 | int line_len = 0; |
1038 | 0 | switch (e.type()) |
1039 | 0 | { |
1040 | 0 | case bdecode_node::list_t: |
1041 | 0 | line_len += 4; |
1042 | 0 | if (line_len > limit) return -1; |
1043 | 0 | for (int i = 0; i < e.list_size(); ++i) |
1044 | 0 | { |
1045 | 0 | int const ret = line_longer_than(e.list_at(i), limit - line_len); |
1046 | 0 | if (ret == -1) return -1; |
1047 | 0 | line_len += ret + 2; |
1048 | 0 | } |
1049 | 0 | break; |
1050 | 0 | case bdecode_node::dict_t: |
1051 | 0 | line_len += 4; |
1052 | 0 | if (line_len > limit) return -1; |
1053 | 0 | for (int i = 0; i < e.dict_size(); ++i) |
1054 | 0 | { |
1055 | 0 | line_len += 4 + int(e.dict_at(i).first.size()); |
1056 | 0 | if (line_len > limit) return -1; |
1057 | 0 | int const ret = line_longer_than(e.dict_at(i).second, limit - line_len); |
1058 | 0 | if (ret == -1) return -1; |
1059 | 0 | line_len += ret + 1; |
1060 | 0 | } |
1061 | 0 | break; |
1062 | 0 | case bdecode_node::string_t: |
1063 | 0 | line_len += 3 + e.string_length(); |
1064 | 0 | break; |
1065 | 0 | case bdecode_node::int_t: |
1066 | 0 | { |
1067 | 0 | std::int64_t val = e.int_value(); |
1068 | 0 | while (val > 0) |
1069 | 0 | { |
1070 | 0 | ++line_len; |
1071 | 0 | val /= 10; |
1072 | 0 | } |
1073 | 0 | line_len += 2; |
1074 | 0 | } |
1075 | 0 | break; |
1076 | 0 | case bdecode_node::none_t: |
1077 | 0 | line_len += 4; |
1078 | 0 | break; |
1079 | 0 | } |
1080 | | |
1081 | 0 | if (line_len > limit) return -1; |
1082 | 0 | return line_len; |
1083 | 0 | } |
1084 | | |
1085 | | void print_string(std::string& ret, string_view str, bool single_line) |
1086 | 0 | { |
1087 | 0 | int const len = int(str.size()); |
1088 | 0 | bool printable = true; |
1089 | 0 | for (int i = 0; i < len; ++i) |
1090 | 0 | { |
1091 | 0 | char const c = str[std::size_t(i)]; |
1092 | 0 | if (c >= 32 && c < 127) continue; |
1093 | 0 | printable = false; |
1094 | 0 | break; |
1095 | 0 | } |
1096 | 0 | ret += "'"; |
1097 | 0 | if (printable) |
1098 | 0 | { |
1099 | 0 | if (single_line && len > 30) |
1100 | 0 | { |
1101 | 0 | ret.append(str.data(), 14); |
1102 | 0 | ret += "..."; |
1103 | 0 | ret.append(str.data() + len - 14, 14); |
1104 | 0 | } |
1105 | 0 | else |
1106 | 0 | ret.append(str.data(), std::size_t(len)); |
1107 | 0 | ret += "'"; |
1108 | 0 | return; |
1109 | 0 | } |
1110 | 0 | if (single_line && len > 32) |
1111 | 0 | { |
1112 | 0 | aux::escape_string(ret, str.data(), 25); |
1113 | 0 | ret += "..."; |
1114 | 0 | aux::escape_string(ret, str.data() + len - 4, 4); |
1115 | 0 | } |
1116 | 0 | else |
1117 | 0 | { |
1118 | 0 | aux::escape_string(ret, str.data(), len); |
1119 | 0 | } |
1120 | 0 | ret += "'"; |
1121 | 0 | } |
1122 | | |
1123 | | } |
1124 | | |
1125 | | std::string print_entry(bdecode_node const& e |
1126 | | , bool single_line, int indent) |
1127 | 0 | { |
1128 | 0 | char indent_str[200]; |
1129 | 0 | using std::memset; |
1130 | 0 | memset(indent_str, ' ', 200); |
1131 | 0 | indent_str[0] = ','; |
1132 | 0 | indent_str[1] = '\n'; |
1133 | 0 | indent_str[199] = 0; |
1134 | 0 | if (indent < 197 && indent >= 0) indent_str[indent + 2] = 0; |
1135 | 0 | std::string ret; |
1136 | 0 | switch (e.type()) |
1137 | 0 | { |
1138 | 0 | case bdecode_node::none_t: return "none"; |
1139 | 0 | case bdecode_node::int_t: |
1140 | 0 | { |
1141 | 0 | char str[100]; |
1142 | 0 | std::snprintf(str, sizeof(str), "%" PRId64, e.int_value()); |
1143 | 0 | return str; |
1144 | 0 | } |
1145 | 0 | case bdecode_node::string_t: |
1146 | 0 | { |
1147 | 0 | print_string(ret, e.string_value(), single_line); |
1148 | 0 | return ret; |
1149 | 0 | } |
1150 | 0 | case bdecode_node::list_t: |
1151 | 0 | { |
1152 | 0 | ret += '['; |
1153 | 0 | bool one_liner = line_longer_than(e, 200) != -1 || single_line; |
1154 | |
|
1155 | 0 | if (!one_liner) ret += indent_str + 1; |
1156 | 0 | for (int i = 0; i < e.list_size(); ++i) |
1157 | 0 | { |
1158 | 0 | if (i == 0 && one_liner) ret += ' '; |
1159 | 0 | ret += print_entry(e.list_at(i), single_line, indent + 2); |
1160 | 0 | if (i < e.list_size() - 1) ret += (one_liner ? ", " : indent_str); |
1161 | 0 | else ret += (one_liner ? " " : indent_str + 1); |
1162 | 0 | } |
1163 | 0 | ret += ']'; |
1164 | 0 | return ret; |
1165 | 0 | } |
1166 | 0 | case bdecode_node::dict_t: |
1167 | 0 | { |
1168 | 0 | ret += '{'; |
1169 | 0 | bool one_liner = line_longer_than(e, 200) != -1 || single_line; |
1170 | |
|
1171 | 0 | if (!one_liner) ret += indent_str + 1; |
1172 | 0 | for (int i = 0; i < e.dict_size(); ++i) |
1173 | 0 | { |
1174 | 0 | if (i == 0 && one_liner) ret += ' '; |
1175 | 0 | std::pair<string_view, bdecode_node> ent = e.dict_at(i); |
1176 | 0 | print_string(ret, ent.first, true); |
1177 | 0 | ret += ": "; |
1178 | 0 | ret += print_entry(ent.second, single_line, indent + 2); |
1179 | 0 | if (i < e.dict_size() - 1) ret += (one_liner ? ", " : indent_str); |
1180 | 0 | else ret += (one_liner ? " " : indent_str + 1); |
1181 | 0 | } |
1182 | 0 | ret += '}'; |
1183 | 0 | return ret; |
1184 | 0 | } |
1185 | 0 | } |
1186 | 0 | return ret; |
1187 | 0 | } |
1188 | | } |