Coverage Report

Created: 2025-12-04 06:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libtorrent/src/bdecode.cpp
Line
Count
Source
1
/*
2
3
Copyright (c) 2015-2020, 2022, Arvid Norberg
4
Copyright (c) 2016-2017, 2019, Alden Torres
5
Copyright (c) 2016-2017, Andrei Kurushin
6
Copyright (c) 2016-2017, Steven Siloti
7
Copyright (c) 2017, Pavel Pimenov
8
All rights reserved.
9
10
Redistribution and use in source and binary forms, with or without
11
modification, are permitted provided that the following conditions
12
are met:
13
14
    * Redistributions of source code must retain the above copyright
15
      notice, this list of conditions and the following disclaimer.
16
    * Redistributions in binary form must reproduce the above copyright
17
      notice, this list of conditions and the following disclaimer in
18
      the documentation and/or other materials provided with the distribution.
19
    * Neither the name of the author nor the names of its
20
      contributors may be used to endorse or promote products derived
21
      from this software without specific prior written permission.
22
23
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33
POSSIBILITY OF SUCH DAMAGE.
34
35
*/
36
37
#include "libtorrent/bdecode.hpp"
38
#include "libtorrent/aux_/alloca.hpp"
39
#include "libtorrent/aux_/numeric_cast.hpp"
40
#include "libtorrent/error_code.hpp"
41
#include <limits>
42
#include <cstring> // for memset
43
#include <cstdio> // for snprintf
44
#include <cinttypes> // for PRId64 et.al.
45
#include <algorithm> // for any_of
46
47
#ifndef BOOST_SYSTEM_NOEXCEPT
48
#define BOOST_SYSTEM_NOEXCEPT throw()
49
#endif
50
51
namespace libtorrent {
52
53
  using aux::bdecode_token;
54
55
namespace {
56
57
  static_assert(int('0') == 48, "this code relies on ASCII compatible source encoding");
58
  static_assert(int('9') == 57, "this code relies on ASCII compatible source encoding");
59
60
0
  bool numeric(char c) { return c >= '0' && c <= '9'; }
61
62
  // finds the end of an integer and verifies that it looks valid this does
63
  // not detect all overflows, just the ones that are an order of magnitude
64
  // beyond. Exact overflow checking is done when the integer value is queried
65
  // from a bdecode_node.
66
  char const* check_integer(char const* start, char const* end
67
    , bdecode_errors::error_code_enum& e)
68
0
  {
69
0
    if (start == end)
70
0
    {
71
0
      e = bdecode_errors::unexpected_eof;
72
0
      return start;
73
0
    }
74
75
0
    if (*start == '-')
76
0
    {
77
0
      ++start;
78
0
      if (start == end)
79
0
      {
80
0
        e = bdecode_errors::unexpected_eof;
81
0
        return start;
82
0
      }
83
0
    }
84
85
0
    int digits = 0;
86
0
    do
87
0
    {
88
0
      if (!numeric(*start))
89
0
      {
90
0
        e = bdecode_errors::expected_digit;
91
0
        break;
92
0
      }
93
0
      ++start;
94
0
      ++digits;
95
96
0
      if (start == end)
97
0
      {
98
0
        e = bdecode_errors::unexpected_eof;
99
0
        break;
100
0
      }
101
0
    }
102
0
    while (*start != 'e');
103
104
0
    if (digits > 20)
105
0
    {
106
0
      e = bdecode_errors::overflow;
107
0
    }
108
109
0
    return start;
110
0
  }
111
112
  struct stack_frame
113
  {
114
0
    stack_frame() : token(0), state(0) {}
115
0
    explicit stack_frame(int const t): token(std::uint32_t(t)), state(0) {}
116
    // this is an index into m_tokens
117
    std::uint32_t token:31;
118
    // this is used for dictionaries to indicate whether we're
119
    // reading a key or a vale. 0 means key 1 is value
120
    std::uint32_t state:1;
121
  };
122
123
  // diff between current and next item offset
124
  // should only be called for non last item in array
125
  int token_source_span(bdecode_token const& t)
126
0
  {
127
0
    return (&t)[1].offset - t.offset;
128
0
  }
129
130
} // anonymous namespace
131
132
namespace aux {
133
  void escape_string(std::string& ret, char const* str, int len)
134
0
  {
135
0
    if (std::any_of(str, str + len, [](char const c) { return c < 32 || c >= 127; } ))
136
0
    {
137
0
      for (int i = 0; i < len; ++i)
138
0
      {
139
0
        char tmp[3];
140
0
        std::snprintf(tmp, sizeof(tmp), "%02x", std::uint8_t(str[i]));
141
0
        ret += tmp;
142
0
      }
143
0
    }
144
0
    else
145
0
    {
146
0
      ret.assign(str, std::size_t(len));
147
0
    }
148
0
  }
149
}
150
151
152
153
  // reads the string between start and end, or up to the first occurrence of
154
  // 'delimiter', whichever comes first. This string is interpreted as an
155
  // integer which is assigned to 'val'. If there's a non-delimiter and
156
  // non-digit in the range, a parse error is reported in 'ec'. If the value
157
  // cannot be represented by the variable 'val' and overflow error is reported
158
  // by 'ec'.
159
  // WARNING: the val is an out-parameter that is expected to be initialized
160
  // to 0 or a *positive* number.
161
  char const* parse_int(char const* start, char const* end, char delimiter
162
    , std::int64_t& val, bdecode_errors::error_code_enum& ec)
163
0
  {
164
0
    TORRENT_ASSERT(val >= 0);
165
0
    while (start < end && *start != delimiter)
166
0
    {
167
0
      char const c = *start;
168
0
      if (!numeric(c))
169
0
      {
170
0
        ec = bdecode_errors::expected_digit;
171
0
        return start;
172
0
      }
173
0
      if (val > std::numeric_limits<std::int64_t>::max() / 10)
174
0
      {
175
0
        ec = bdecode_errors::overflow;
176
0
        return start;
177
0
      }
178
0
      val *= 10;
179
0
      int const digit = c - '0';
180
0
      if (val > std::numeric_limits<std::int64_t>::max() - digit)
181
0
      {
182
0
        ec = bdecode_errors::overflow;
183
0
        return start;
184
0
      }
185
0
      val += digit;
186
0
      ++start;
187
0
    }
188
0
    return start;
189
0
  }
190
191
192
  struct bdecode_error_category final : boost::system::error_category
193
  {
194
    const char* name() const BOOST_SYSTEM_NOEXCEPT override;
195
    std::string message(int ev) const override;
196
    boost::system::error_condition default_error_condition(
197
      int ev) const BOOST_SYSTEM_NOEXCEPT override
198
0
    { return {ev, *this}; }
199
  };
200
201
  const char* bdecode_error_category::name() const BOOST_SYSTEM_NOEXCEPT
202
0
  {
203
0
    return "bdecode";
204
0
  }
205
206
  std::string bdecode_error_category::message(int ev) const
207
0
  {
208
0
    static char const* msgs[] =
209
0
    {
210
0
      "no error",
211
0
      "expected digit in bencoded string",
212
0
      "expected colon in bencoded string",
213
0
      "unexpected end of file in bencoded string",
214
0
      "expected value (list, dict, int or string) in bencoded string",
215
0
      "bencoded nesting depth exceeded",
216
0
      "bencoded item count limit exceeded",
217
0
      "integer overflow",
218
0
    };
219
0
    if (ev < 0 || ev >= int(sizeof(msgs)/sizeof(msgs[0])))
220
0
      return "Unknown error";
221
0
    return msgs[ev];
222
0
  }
223
224
  boost::system::error_category& bdecode_category()
225
0
  {
226
0
    static bdecode_error_category bdecode_category;
227
0
    return bdecode_category;
228
0
  }
229
230
  namespace bdecode_errors
231
  {
232
    boost::system::error_code make_error_code(error_code_enum e)
233
0
    {
234
0
      return {e, bdecode_category()};
235
0
    }
236
  }
237
238
  bdecode_node::bdecode_node(bdecode_node const& n)
239
0
    : m_tokens(n.m_tokens)
240
0
    , m_root_tokens(n.m_root_tokens)
241
0
    , m_buffer(n.m_buffer)
242
0
    , m_buffer_size(n.m_buffer_size)
243
0
    , m_token_idx(n.m_token_idx)
244
0
    , m_last_index(n.m_last_index)
245
0
    , m_last_token(n.m_last_token)
246
0
    , m_size(n.m_size)
247
0
  {
248
0
    (*this) = n;
249
0
  }
250
251
  bdecode_node& bdecode_node::operator=(bdecode_node const& n) &
252
0
  {
253
0
    if (&n == this) return *this;
254
0
    m_tokens = n.m_tokens;
255
0
    m_root_tokens = n.m_root_tokens;
256
0
    m_buffer = n.m_buffer;
257
0
    m_buffer_size = n.m_buffer_size;
258
0
    m_token_idx = n.m_token_idx;
259
0
    m_last_index = n.m_last_index;
260
0
    m_last_token = n.m_last_token;
261
0
    m_size = n.m_size;
262
0
    if (!m_tokens.empty())
263
0
    {
264
      // if this is a root, make the token pointer
265
      // point to our storage
266
0
      m_root_tokens = &m_tokens[0];
267
0
    }
268
0
    return *this;
269
0
  }
270
271
0
  bdecode_node::bdecode_node(bdecode_node&&) noexcept = default;
272
273
  bdecode_node::bdecode_node(bdecode_token const* tokens, char const* buf
274
    , int len, int idx)
275
0
    : m_root_tokens(tokens)
276
0
    , m_buffer(buf)
277
0
    , m_buffer_size(len)
278
0
    , m_token_idx(idx)
279
0
  {
280
0
    TORRENT_ASSERT(tokens != nullptr);
281
0
    TORRENT_ASSERT(idx >= 0);
282
0
  }
283
284
  bdecode_node bdecode_node::non_owning() const
285
0
  {
286
    // if we're not a root, just return a copy of ourself
287
0
    if (m_tokens.empty()) return *this;
288
289
    // otherwise, return a reference to this node, but without
290
    // being an owning root node
291
0
    return bdecode_node(&m_tokens[0], m_buffer, m_buffer_size, m_token_idx);
292
0
  }
293
294
  void bdecode_node::clear()
295
0
  {
296
0
    m_tokens.clear();
297
0
    m_root_tokens = nullptr;
298
0
    m_token_idx = -1;
299
0
    m_size = -1;
300
0
    m_last_index = -1;
301
0
    m_last_token = -1;
302
0
  }
303
304
  void bdecode_node::switch_underlying_buffer(char const* buf) noexcept
305
0
  {
306
0
    TORRENT_ASSERT(!m_tokens.empty());
307
0
    if (m_tokens.empty()) return;
308
309
0
    m_buffer = buf;
310
0
  }
311
312
  bool bdecode_node::has_soft_error(span<char> error) const
313
0
  {
314
0
    if (type() == none_t) return false;
315
316
0
    bdecode_token const* tokens = m_root_tokens;
317
0
    int token = m_token_idx;
318
319
    // we don't know what the original depth_limit was
320
    // so this has to go on the heap
321
0
    std::vector<int> stack;
322
    // make the initial allocation the default depth_limit
323
0
    stack.reserve(100);
324
325
0
    do
326
0
    {
327
0
      switch (tokens[token].type)
328
0
      {
329
0
      case bdecode_token::integer:
330
0
        if (m_buffer[tokens[token].offset + 1] == '0'
331
0
          && m_buffer[tokens[token].offset + 2] != 'e')
332
0
        {
333
0
          std::snprintf(error.data(), std::size_t(error.size()), "leading zero in integer");
334
0
          return true;
335
0
        }
336
0
        break;
337
0
      case bdecode_token::string:
338
0
      case bdecode_token::long_string:
339
0
        if (m_buffer[tokens[token].offset] == '0'
340
0
          && m_buffer[tokens[token].offset + 1] != ':')
341
0
        {
342
0
          std::snprintf(error.data(), std::size_t(error.size()), "leading zero in string length");
343
0
          return true;
344
0
        }
345
0
        break;
346
0
      case bdecode_token::dict:
347
0
      case bdecode_token::list:
348
0
        stack.push_back(token);
349
0
        break;
350
0
      case bdecode_token::end:
351
0
        auto const parent = stack.back();
352
0
        stack.pop_back();
353
0
        if (tokens[parent].type == bdecode_token::dict
354
0
          && token != parent + 1)
355
0
        {
356
          // this is the end of a non-empty dict
357
          // check the sort order of the keys
358
0
          int k1 = parent + 1;
359
0
          for (;;)
360
0
          {
361
            // skip to the first key's value
362
0
            int const v1 = k1 + tokens[k1].next_item;
363
            // then to the next key
364
0
            int const k2 = v1 + tokens[v1].next_item;
365
366
            // check if k1 was the last key in the dict
367
0
            if (k2 == token)
368
0
              break;
369
370
0
            int const v2 = k2 + tokens[k2].next_item;
371
372
0
            int const k1_start = tokens[k1].offset + tokens[k1].start_offset();
373
0
            int const k1_len = tokens[v1].offset - k1_start;
374
0
            int const k2_start = tokens[k2].offset + tokens[k2].start_offset();
375
0
            int const k2_len = tokens[v2].offset - k2_start;
376
377
0
            int const min_len = std::min(k1_len, k2_len);
378
379
0
            int cmp = std::memcmp(m_buffer + k1_start, m_buffer + k2_start, std::size_t(min_len));
380
0
            if (cmp > 0 || (cmp == 0 && k1_len > k2_len))
381
0
            {
382
0
              std::snprintf(error.data(), std::size_t(error.size()), "unsorted dictionary key");
383
0
              return true;
384
0
            }
385
0
            else if (cmp == 0 && k1_len == k2_len)
386
0
            {
387
0
              std::snprintf(error.data(), std::size_t(error.size()), "duplicate dictionary key");
388
0
              return true;
389
0
            }
390
391
0
            k1 = k2;
392
0
          }
393
0
        }
394
0
        break;
395
0
      }
396
397
0
      ++token;
398
0
    } while (!stack.empty());
399
0
    return false;
400
0
  }
401
402
  bdecode_node::type_t bdecode_node::type() const noexcept
403
0
  {
404
0
    if (m_token_idx == -1) return none_t;
405
0
    if (m_root_tokens[m_token_idx].type == bdecode_token::long_string)
406
0
      return bdecode_node::string_t;
407
0
    return static_cast<bdecode_node::type_t>(m_root_tokens[m_token_idx].type);
408
0
  }
409
410
  bdecode_node::operator bool() const noexcept
411
0
  { return m_token_idx != -1; }
412
413
  span<char const> bdecode_node::data_section() const noexcept
414
0
  {
415
0
    if (m_token_idx == -1) return {};
416
417
0
    TORRENT_ASSERT(m_token_idx != -1);
418
0
    bdecode_token const& t = m_root_tokens[m_token_idx];
419
0
    bdecode_token const& next = m_root_tokens[m_token_idx + t.next_item];
420
0
    return {m_buffer + t.offset, static_cast<std::ptrdiff_t>(next.offset - t.offset)};
421
0
  }
422
423
  std::ptrdiff_t bdecode_node::data_offset() const noexcept
424
0
  {
425
0
    TORRENT_ASSERT_PRECOND(m_token_idx != -1);
426
0
    if (m_token_idx == -1) return -1;
427
0
    bdecode_token const& t = m_root_tokens[m_token_idx];
428
0
    return t.offset;
429
0
  }
430
431
  bdecode_node bdecode_node::list_at(int i) const
432
0
  {
433
0
    TORRENT_ASSERT(type() == list_t);
434
0
    TORRENT_ASSERT(i >= 0);
435
436
    // make sure this is a list.
437
0
    bdecode_token const* tokens = m_root_tokens;
438
439
    // this is the first item
440
0
    int token = m_token_idx + 1;
441
0
    int item = 0;
442
443
    // do we have a lookup cached?
444
0
    if (m_last_index <= i && m_last_index != -1)
445
0
    {
446
0
      token = m_last_token;
447
0
      item = m_last_index;
448
0
    }
449
450
0
    while (item < i)
451
0
    {
452
0
      token += tokens[token].next_item;
453
0
      ++item;
454
455
      // index 'i' out of range
456
0
      TORRENT_ASSERT(tokens[token].type != bdecode_token::end);
457
0
    }
458
459
0
    m_last_token = token;
460
0
    m_last_index = i;
461
462
0
    return bdecode_node(tokens, m_buffer, m_buffer_size, token);
463
0
  }
464
465
  string_view bdecode_node::list_string_value_at(int i
466
    , string_view default_val) const
467
0
  {
468
0
    bdecode_node const n = list_at(i);
469
0
    if (n.type() != bdecode_node::string_t) return default_val;
470
0
    return n.string_value();
471
0
  }
472
473
  std::int64_t bdecode_node::list_int_value_at(int i
474
    , std::int64_t default_val) const
475
0
  {
476
0
    bdecode_node const n = list_at(i);
477
0
    if (n.type() != bdecode_node::int_t) return default_val;
478
0
    return n.int_value();
479
0
  }
480
481
  int bdecode_node::list_size() const
482
0
  {
483
0
    TORRENT_ASSERT(type() == list_t);
484
485
0
    if (m_size != -1) return m_size;
486
487
    // make sure this is a list.
488
0
    bdecode_token const* tokens = m_root_tokens;
489
0
    TORRENT_ASSERT(tokens[m_token_idx].type == bdecode_token::list);
490
491
    // this is the first item
492
0
    int token = m_token_idx + 1;
493
0
    int ret = 0;
494
495
    // do we have a lookup cached?
496
0
    if (m_last_index != -1)
497
0
    {
498
0
      token = m_last_token;
499
0
      ret = m_last_index;
500
0
    }
501
0
    while (tokens[token].type != bdecode_token::end)
502
0
    {
503
0
      token += tokens[token].next_item;
504
0
      ++ret;
505
0
    }
506
507
0
    m_size = ret;
508
509
0
    return ret;
510
0
  }
511
512
  std::pair<bdecode_node, bdecode_node> bdecode_node::dict_at_node(int i) const
513
0
  {
514
0
    TORRENT_ASSERT(type() == dict_t);
515
0
    TORRENT_ASSERT(m_token_idx != -1);
516
517
0
    bdecode_token const* tokens = m_root_tokens;
518
0
    TORRENT_ASSERT(tokens[m_token_idx].type == bdecode_token::dict);
519
520
0
    int token = m_token_idx + 1;
521
0
    int item = 0;
522
523
    // do we have a lookup cached?
524
0
    if (m_last_index <= i && m_last_index != -1)
525
0
    {
526
0
      token = m_last_token;
527
0
      item = m_last_index;
528
0
    }
529
530
0
    while (item < i)
531
0
    {
532
0
      TORRENT_ASSERT(tokens[token].type == bdecode_token::string
533
0
        || tokens[token].type == bdecode_token::long_string);
534
535
      // skip the key
536
0
      token += tokens[token].next_item;
537
0
      TORRENT_ASSERT(tokens[token].type != bdecode_token::end);
538
539
      // skip the value
540
0
      token += tokens[token].next_item;
541
542
0
      ++item;
543
544
      // index 'i' out of range
545
0
      TORRENT_ASSERT(tokens[token].type != bdecode_token::end);
546
0
    }
547
548
    // there's no point in caching the first item
549
0
    if (i > 0)
550
0
    {
551
0
      m_last_token = token;
552
0
      m_last_index = i;
553
0
    }
554
555
0
    int value_token = token + tokens[token].next_item;
556
0
    TORRENT_ASSERT(tokens[token].type != bdecode_token::end);
557
558
0
    return std::make_pair(
559
0
      bdecode_node(tokens, m_buffer, m_buffer_size, token)
560
0
      , bdecode_node(tokens, m_buffer, m_buffer_size, value_token));
561
0
  }
562
563
  std::pair<string_view, bdecode_node> bdecode_node::dict_at(int const i) const
564
0
  {
565
0
    bdecode_node key;
566
0
    bdecode_node value;
567
0
    std::tie(key, value) = dict_at_node(i);
568
0
    return {key.string_value(), value};
569
0
  }
570
571
  int bdecode_node::dict_size() const
572
0
  {
573
0
    TORRENT_ASSERT(type() == dict_t);
574
0
    TORRENT_ASSERT(m_token_idx != -1);
575
576
0
    if (m_size != -1) return m_size;
577
578
0
    bdecode_token const* tokens = m_root_tokens;
579
0
    TORRENT_ASSERT(tokens[m_token_idx].type == bdecode_token::dict);
580
581
    // this is the first item
582
0
    int token = m_token_idx + 1;
583
0
    int ret = 0;
584
585
0
    if (m_last_index != -1)
586
0
    {
587
0
      ret = m_last_index * 2;
588
0
      token = m_last_token;
589
0
    }
590
591
0
    while (tokens[token].type != bdecode_token::end)
592
0
    {
593
0
      token += tokens[token].next_item;
594
0
      ++ret;
595
0
    }
596
597
    // a dictionary must contain full key-value pairs. which means
598
    // the number of entries is divisible by 2
599
0
    TORRENT_ASSERT((ret % 2) == 0);
600
601
    // each item is one key and one value, so divide by 2
602
0
    ret /= 2;
603
604
0
    m_size = ret;
605
606
0
    return ret;
607
0
  }
608
609
  bdecode_node bdecode_node::dict_find(string_view key) const
610
0
  {
611
0
    TORRENT_ASSERT(type() == dict_t);
612
613
0
    bdecode_token const* const tokens = m_root_tokens;
614
615
    // this is the first item
616
0
    int token = m_token_idx + 1;
617
618
0
    while (tokens[token].type != bdecode_token::end)
619
0
    {
620
0
      bdecode_token const& t = tokens[token];
621
0
      TORRENT_ASSERT(t.type == bdecode_token::string
622
0
        || t.type == bdecode_token::long_string);
623
0
      int const size = token_source_span(t) - t.start_offset();
624
0
      if (int(key.size()) == size
625
0
        && std::equal(key.data(), key.data() + size, m_buffer
626
0
          + t.offset + t.start_offset()))
627
0
      {
628
        // skip key
629
0
        token += t.next_item;
630
0
        TORRENT_ASSERT(tokens[token].type != bdecode_token::end);
631
632
0
        return bdecode_node(tokens, m_buffer, m_buffer_size, token);
633
0
      }
634
635
      // skip key
636
0
      token += t.next_item;
637
0
      TORRENT_ASSERT(tokens[token].type != bdecode_token::end);
638
639
      // skip value
640
0
      token += tokens[token].next_item;
641
0
    }
642
643
0
    return bdecode_node();
644
0
  }
645
646
  bdecode_node bdecode_node::dict_find_list(string_view key) const
647
0
  {
648
0
    bdecode_node ret = dict_find(key);
649
0
    if (ret.type() == bdecode_node::list_t)
650
0
      return ret;
651
0
    return bdecode_node();
652
0
  }
653
654
  bdecode_node bdecode_node::dict_find_dict(string_view key) const
655
0
  {
656
0
    bdecode_node ret = dict_find(key);
657
0
    if (ret.type() == bdecode_node::dict_t)
658
0
      return ret;
659
0
    return bdecode_node();
660
0
  }
661
662
  bdecode_node bdecode_node::dict_find_string(string_view key) const
663
0
  {
664
0
    bdecode_node ret = dict_find(key);
665
0
    if (ret.type() == bdecode_node::string_t)
666
0
      return ret;
667
0
    return bdecode_node();
668
0
  }
669
670
  bdecode_node bdecode_node::dict_find_int(string_view key) const
671
0
  {
672
0
    bdecode_node ret = dict_find(key);
673
0
    if (ret.type() == bdecode_node::int_t)
674
0
      return ret;
675
0
    return bdecode_node();
676
0
  }
677
678
  string_view bdecode_node::dict_find_string_value(string_view key
679
    , string_view default_value) const
680
0
  {
681
0
    bdecode_node n = dict_find(key);
682
0
    if (n.type() != bdecode_node::string_t) return default_value;
683
0
    return n.string_value();
684
0
  }
685
686
  std::int64_t bdecode_node::dict_find_int_value(string_view key
687
    , std::int64_t default_val) const
688
0
  {
689
0
    bdecode_node n = dict_find(key);
690
0
    if (n.type() != bdecode_node::int_t) return default_val;
691
0
    return n.int_value();
692
0
  }
693
694
  std::int64_t bdecode_node::int_value() const
695
0
  {
696
0
    TORRENT_ASSERT(type() == int_t);
697
0
    bdecode_token const& t = m_root_tokens[m_token_idx];
698
0
    int const size = token_source_span(t);
699
0
    TORRENT_ASSERT(t.type == bdecode_token::integer);
700
701
    // +1 is to skip the 'i'
702
0
    char const* ptr = m_buffer + t.offset + 1;
703
0
    std::int64_t val = 0;
704
0
    bool const negative = (*ptr == '-');
705
0
    bdecode_errors::error_code_enum ec = bdecode_errors::no_error;
706
0
    char const* end = parse_int(ptr + int(negative)
707
0
      , ptr + size, 'e', val, ec);
708
0
    if (ec) return 0;
709
0
    TORRENT_UNUSED(end);
710
0
    TORRENT_ASSERT(end < ptr + size);
711
0
    if (negative) val = -val;
712
0
    return val;
713
0
  }
714
715
  string_view bdecode_node::string_value() const
716
0
  {
717
0
    TORRENT_ASSERT(type() == string_t);
718
0
    bdecode_token const& t = m_root_tokens[m_token_idx];
719
0
    auto const size = aux::numeric_cast<std::size_t>(token_source_span(t) - t.start_offset());
720
0
    TORRENT_ASSERT(t.type == bdecode_token::string
721
0
      || t.type == bdecode_token::long_string);
722
723
0
    return string_view(m_buffer + t.offset + t.start_offset(), size);
724
0
  }
725
726
  std::ptrdiff_t bdecode_node::string_offset() const
727
0
  {
728
0
    TORRENT_ASSERT(type() == string_t);
729
0
    bdecode_token const& t = m_root_tokens[m_token_idx];
730
0
    TORRENT_ASSERT(t.type == bdecode_token::string
731
0
      || t.type == bdecode_token::long_string);
732
0
    return t.offset + t.start_offset();
733
0
  }
734
735
  char const* bdecode_node::string_ptr() const
736
0
  {
737
0
    TORRENT_ASSERT(type() == string_t);
738
0
    bdecode_token const& t = m_root_tokens[m_token_idx];
739
0
    TORRENT_ASSERT(t.type == bdecode_token::string
740
0
      || t.type == bdecode_token::long_string);
741
0
    return m_buffer + t.offset + t.start_offset();
742
0
  }
743
744
  int bdecode_node::string_length() const
745
0
  {
746
0
    TORRENT_ASSERT(type() == string_t);
747
0
    bdecode_token const& t = m_root_tokens[m_token_idx];
748
0
    TORRENT_ASSERT(t.type == bdecode_token::string
749
0
      || t.type == bdecode_token::long_string);
750
0
    return token_source_span(t) - t.start_offset();
751
0
  }
752
753
  void bdecode_node::reserve(int tokens)
754
0
  { m_tokens.reserve(aux::numeric_cast<std::size_t>(tokens)); }
755
756
  void bdecode_node::swap(bdecode_node& n)
757
0
  {
758
/*
759
    bool lhs_is_root = (m_root_tokens == &m_tokens);
760
    bool rhs_is_root = (n.m_root_tokens == &n.m_tokens);
761
762
    // swap is only defined between non-root nodes
763
    // and between root-nodes. They may not be mixed!
764
    // note that when swapping root nodes, all bdecode_node
765
    // entries that exist in those subtrees are invalidated!
766
    TORRENT_ASSERT(lhs_is_root == rhs_is_root);
767
768
    // if both are roots, m_root_tokens always point to
769
    // its own vector, and should not get swapped (the
770
    // underlying vectors are swapped already)
771
    if (!lhs_is_root && !rhs_is_root)
772
    {
773
      // if neither is a root, we just swap the pointers
774
      // to the token vectors, switching their roots
775
      std::swap(m_root_tokens, n.m_root_tokens);
776
    }
777
*/
778
0
    m_tokens.swap(n.m_tokens);
779
0
    std::swap(m_root_tokens, n.m_root_tokens);
780
0
    std::swap(m_buffer, n.m_buffer);
781
0
    std::swap(m_buffer_size, n.m_buffer_size);
782
0
    std::swap(m_token_idx, n.m_token_idx);
783
0
    std::swap(m_last_index, n.m_last_index);
784
0
    std::swap(m_last_token, n.m_last_token);
785
0
    std::swap(m_size, n.m_size);
786
0
  }
787
788
0
#define TORRENT_FAIL_BDECODE(code) do { \
789
0
  ec = code; \
790
0
  if (error_pos) *error_pos = int(start - orig_start); \
791
0
  goto done; \
792
0
  } TORRENT_WHILE_0
793
794
  int bdecode(char const* start, char const* end, bdecode_node& ret
795
    , error_code& ec, int* error_pos, int const depth_limit, int token_limit)
796
0
  {
797
0
    ret = bdecode({start, end - start}, ec, error_pos, depth_limit, token_limit);
798
0
    return ec ? -1 : 0;
799
0
  }
800
801
  bdecode_node bdecode(span<char const> buffer, int depth_limit, int token_limit)
802
0
  {
803
0
    error_code ec;
804
0
    bdecode_node ret = bdecode(buffer, ec, nullptr, depth_limit, token_limit);
805
0
    if (ec) throw system_error(ec);
806
0
    return ret;
807
0
  }
808
809
  bdecode_node bdecode(span<char const> buffer
810
    , error_code& ec, int* error_pos, int depth_limit, int token_limit)
811
0
  {
812
0
    bdecode_node ret;
813
0
    ec.clear();
814
815
0
    if (buffer.size() > bdecode_token::max_offset)
816
0
    {
817
0
      if (error_pos) *error_pos = 0;
818
0
      ec = bdecode_errors::limit_exceeded;
819
0
      return ret;
820
0
    }
821
822
    // this is the stack of bdecode_token indices, into m_tokens.
823
    // sp is the stack pointer, as index into the array, stack
824
0
    int sp = 0;
825
0
    TORRENT_ALLOCA(stack, stack_frame, depth_limit);
826
827
    // TODO: 2 attempt to simplify this implementation by embracing the span
828
0
    char const* start = buffer.data();
829
0
    char const* end = start + buffer.size();
830
0
    char const* const orig_start = start;
831
832
0
    if (start == end)
833
0
      TORRENT_FAIL_BDECODE(bdecode_errors::unexpected_eof);
834
835
0
    while (start <= end)
836
0
    {
837
0
      if (start >= end) TORRENT_FAIL_BDECODE(bdecode_errors::unexpected_eof);
838
839
0
      if (sp >= depth_limit)
840
0
        TORRENT_FAIL_BDECODE(bdecode_errors::depth_exceeded);
841
842
0
      --token_limit;
843
0
      if (token_limit < 0)
844
0
        TORRENT_FAIL_BDECODE(bdecode_errors::limit_exceeded);
845
846
      // look for a new token
847
0
      char const t = *start;
848
849
0
      int const current_frame = sp;
850
851
      // if we're currently parsing a dictionary, assert that
852
      // every other node is a string.
853
0
      if (current_frame > 0
854
0
        && ret.m_tokens[stack[current_frame - 1].token].type == bdecode_token::dict)
855
0
      {
856
0
        if (stack[current_frame - 1].state == 0)
857
0
        {
858
          // the current parent is a dict and we are parsing a key.
859
          // only allow a digit (for a string) or 'e' to terminate
860
0
          if (!numeric(t) && t != 'e')
861
0
            TORRENT_FAIL_BDECODE(bdecode_errors::expected_digit);
862
0
        }
863
0
      }
864
865
0
      switch (t)
866
0
      {
867
0
        case 'd':
868
0
          stack[sp++] = stack_frame(int(ret.m_tokens.size()));
869
          // we push it into the stack so that we know where to fill
870
          // in the next_node field once we pop this node off the stack.
871
          // i.e. get to the node following the dictionary in the buffer
872
0
          ret.m_tokens.push_back({start - orig_start, bdecode_token::dict});
873
0
          ++start;
874
0
          break;
875
0
        case 'l':
876
0
          stack[sp++] = stack_frame(int(ret.m_tokens.size()));
877
          // we push it into the stack so that we know where to fill
878
          // in the next_node field once we pop this node off the stack.
879
          // i.e. get to the node following the list in the buffer
880
0
          ret.m_tokens.push_back({start - orig_start, bdecode_token::list});
881
0
          ++start;
882
0
          break;
883
0
        case 'i':
884
0
        {
885
0
          char const* const int_start = start;
886
0
          bdecode_errors::error_code_enum e = bdecode_errors::no_error;
887
          // +1 here to point to the first digit, rather than 'i'
888
0
          start = check_integer(start + 1, end, e);
889
0
          if (e)
890
0
          {
891
            // in order to gracefully terminate the tree,
892
            // make sure the end of the previous token is set correctly
893
0
            if (error_pos) *error_pos = int(start - orig_start);
894
0
            error_pos = nullptr;
895
0
            start = int_start;
896
0
            TORRENT_FAIL_BDECODE(e);
897
0
          }
898
0
          ret.m_tokens.push_back({int_start - orig_start
899
0
            , 1, bdecode_token::integer, 1});
900
0
          TORRENT_ASSERT(*start == 'e');
901
902
          // skip 'e'
903
0
          ++start;
904
0
          break;
905
0
        }
906
0
        case 'e':
907
0
        {
908
          // this is the end of a list or dict
909
0
          if (sp == 0)
910
0
            TORRENT_FAIL_BDECODE(bdecode_errors::unexpected_eof);
911
912
0
          if (sp > 0
913
0
            && ret.m_tokens[stack[sp - 1].token].type == bdecode_token::dict
914
0
            && stack[sp - 1].state == 1)
915
0
          {
916
            // this means we're parsing a dictionary and about to parse a
917
            // value associated with a key. Instead, we got a termination
918
0
            TORRENT_FAIL_BDECODE(bdecode_errors::expected_value);
919
0
          }
920
921
          // insert the end-of-sequence token
922
0
          ret.m_tokens.push_back({start - orig_start, 1, bdecode_token::end});
923
924
          // and back-patch the start of this sequence with the offset
925
          // to the next token we'll insert
926
0
          int const top = stack[sp - 1].token;
927
          // subtract the token's own index, since this is a relative
928
          // offset
929
0
          if (int(ret.m_tokens.size()) - top > bdecode_token::max_next_item)
930
0
            TORRENT_FAIL_BDECODE(bdecode_errors::limit_exceeded);
931
932
0
          ret.m_tokens[std::size_t(top)].next_item = std::uint32_t(int(ret.m_tokens.size()) - top);
933
934
          // and pop it from the stack.
935
0
          TORRENT_ASSERT(sp > 0);
936
0
          --sp;
937
0
          ++start;
938
0
          break;
939
0
        }
940
0
        default:
941
0
        {
942
          // this is the case for strings. The start character is any
943
          // numeric digit
944
0
          if (!numeric(t))
945
0
            TORRENT_FAIL_BDECODE(bdecode_errors::expected_value);
946
947
0
          std::int64_t len = t - '0';
948
0
          char const* const str_start = start;
949
0
          ++start;
950
0
          if (start >= end) TORRENT_FAIL_BDECODE(bdecode_errors::unexpected_eof);
951
0
          bdecode_errors::error_code_enum e = bdecode_errors::no_error;
952
0
          start = parse_int(start, end, ':', len, e);
953
0
          if (e)
954
0
            TORRENT_FAIL_BDECODE(e);
955
0
          if (start == end)
956
0
            TORRENT_FAIL_BDECODE(bdecode_errors::expected_colon);
957
958
          // remaining buffer size excluding ':'
959
0
          ptrdiff_t const buff_size = end - start - 1;
960
0
          if (len > buff_size)
961
0
            TORRENT_FAIL_BDECODE(bdecode_errors::unexpected_eof);
962
0
          if (len < 0)
963
0
            TORRENT_FAIL_BDECODE(bdecode_errors::overflow);
964
965
          // skip ':'
966
0
          ++start;
967
          // no need to range check start here
968
          // the check above ensures that the buffer is long enough to hold
969
          // the string's length which guarantees that start <= end
970
971
0
          int const header = int(start - str_start);
972
0
          TORRENT_ASSERT(header >= 0);
973
974
          // the bdecode_token only has 8 bits to keep the header size
975
          // in. If it overflows, fail!
976
0
          if (header > aux::bdecode_token::long_string_max_header)
977
0
            TORRENT_FAIL_BDECODE(bdecode_errors::limit_exceeded);
978
979
0
          ret.m_tokens.push_back({str_start - orig_start
980
0
            , 1, bdecode_token::string, std::uint32_t(header)});
981
0
          start += len;
982
0
          break;
983
0
        }
984
0
      }
985
986
0
      if (current_frame > 0
987
0
        && ret.m_tokens[stack[current_frame - 1].token].type == bdecode_token::dict)
988
0
      {
989
        // the next item we parse is the opposite
990
        // state is an unsigned 1-bit member. adding 1 will flip the bit
991
0
        stack[current_frame - 1].state = (stack[current_frame - 1].state + 1) & 1;
992
0
      }
993
994
      // this terminates the top level node, we're done!
995
0
      if (sp == 0) break;
996
0
    }
997
998
0
done:
999
1000
    // if parse failed, sp will be greater than 1
1001
    // unwind the stack by inserting terminator to make whatever we have
1002
    // so far valid
1003
0
    while (sp > 0) {
1004
0
      TORRENT_ASSERT(ec);
1005
0
      --sp;
1006
1007
      // we may need to insert a dummy token to properly terminate the tree,
1008
      // in case we just parsed a key to a dict and failed in the value
1009
0
      if (ret.m_tokens[stack[sp].token].type == bdecode_token::dict
1010
0
        && stack[sp].state == 1)
1011
0
      {
1012
        // insert an empty dictionary as the value
1013
0
        ret.m_tokens.push_back({start - orig_start, 2, bdecode_token::dict});
1014
0
        ret.m_tokens.push_back({start - orig_start, bdecode_token::end});
1015
0
      }
1016
1017
0
      int const top = stack[sp].token;
1018
0
      TORRENT_ASSERT(int(ret.m_tokens.size()) - top <= bdecode_token::max_next_item);
1019
0
      ret.m_tokens[std::size_t(top)].next_item = std::uint32_t(int(ret.m_tokens.size()) - top);
1020
0
      ret.m_tokens.push_back({start - orig_start, 1, bdecode_token::end});
1021
0
    }
1022
1023
0
    ret.m_tokens.push_back({start - orig_start, 0, bdecode_token::end});
1024
1025
0
    ret.m_token_idx = 0;
1026
0
    ret.m_buffer = orig_start;
1027
0
    ret.m_buffer_size = int(start - orig_start);
1028
0
    ret.m_root_tokens = ret.m_tokens.data();
1029
1030
0
    return ret;
1031
0
  }
1032
1033
  namespace {
1034
1035
  int line_longer_than(bdecode_node const& e, int limit)
1036
0
  {
1037
0
    int line_len = 0;
1038
0
    switch (e.type())
1039
0
    {
1040
0
    case bdecode_node::list_t:
1041
0
      line_len += 4;
1042
0
      if (line_len > limit) return -1;
1043
0
      for (int i = 0; i < e.list_size(); ++i)
1044
0
      {
1045
0
        int const ret = line_longer_than(e.list_at(i), limit - line_len);
1046
0
        if (ret == -1) return -1;
1047
0
        line_len += ret + 2;
1048
0
      }
1049
0
      break;
1050
0
    case bdecode_node::dict_t:
1051
0
      line_len += 4;
1052
0
      if (line_len > limit) return -1;
1053
0
      for (int i = 0; i < e.dict_size(); ++i)
1054
0
      {
1055
0
        line_len += 4 + int(e.dict_at(i).first.size());
1056
0
        if (line_len > limit) return -1;
1057
0
        int const ret = line_longer_than(e.dict_at(i).second, limit - line_len);
1058
0
        if (ret == -1) return -1;
1059
0
        line_len += ret + 1;
1060
0
      }
1061
0
      break;
1062
0
    case bdecode_node::string_t:
1063
0
      line_len += 3 + e.string_length();
1064
0
      break;
1065
0
    case bdecode_node::int_t:
1066
0
    {
1067
0
      std::int64_t val = e.int_value();
1068
0
      while (val > 0)
1069
0
      {
1070
0
        ++line_len;
1071
0
        val /= 10;
1072
0
      }
1073
0
      line_len += 2;
1074
0
    }
1075
0
    break;
1076
0
    case bdecode_node::none_t:
1077
0
      line_len += 4;
1078
0
      break;
1079
0
    }
1080
1081
0
    if (line_len > limit) return -1;
1082
0
    return line_len;
1083
0
  }
1084
1085
  void print_string(std::string& ret, string_view str, bool single_line)
1086
0
  {
1087
0
    int const len = int(str.size());
1088
0
    bool printable = true;
1089
0
    for (int i = 0; i < len; ++i)
1090
0
    {
1091
0
      char const c = str[std::size_t(i)];
1092
0
      if (c >= 32 && c < 127) continue;
1093
0
      printable = false;
1094
0
      break;
1095
0
    }
1096
0
    ret += "'";
1097
0
    if (printable)
1098
0
    {
1099
0
      if (single_line && len > 30)
1100
0
      {
1101
0
        ret.append(str.data(), 14);
1102
0
        ret += "...";
1103
0
        ret.append(str.data() + len - 14, 14);
1104
0
      }
1105
0
      else
1106
0
        ret.append(str.data(), std::size_t(len));
1107
0
      ret += "'";
1108
0
      return;
1109
0
    }
1110
0
    if (single_line && len > 32)
1111
0
    {
1112
0
      aux::escape_string(ret, str.data(), 25);
1113
0
      ret += "...";
1114
0
      aux::escape_string(ret, str.data() + len - 4, 4);
1115
0
    }
1116
0
    else
1117
0
    {
1118
0
      aux::escape_string(ret, str.data(), len);
1119
0
    }
1120
0
    ret += "'";
1121
0
  }
1122
1123
}
1124
1125
  std::string print_entry(bdecode_node const& e
1126
    , bool single_line, int indent)
1127
0
  {
1128
0
    char indent_str[200];
1129
0
    using std::memset;
1130
0
    memset(indent_str, ' ', 200);
1131
0
    indent_str[0] = ',';
1132
0
    indent_str[1] = '\n';
1133
0
    indent_str[199] = 0;
1134
0
    if (indent < 197 && indent >= 0) indent_str[indent + 2] = 0;
1135
0
    std::string ret;
1136
0
    switch (e.type())
1137
0
    {
1138
0
      case bdecode_node::none_t: return "none";
1139
0
      case bdecode_node::int_t:
1140
0
      {
1141
0
        char str[100];
1142
0
        std::snprintf(str, sizeof(str), "%" PRId64, e.int_value());
1143
0
        return str;
1144
0
      }
1145
0
      case bdecode_node::string_t:
1146
0
      {
1147
0
        print_string(ret, e.string_value(), single_line);
1148
0
        return ret;
1149
0
      }
1150
0
      case bdecode_node::list_t:
1151
0
      {
1152
0
        ret += '[';
1153
0
        bool one_liner = line_longer_than(e, 200) != -1 || single_line;
1154
1155
0
        if (!one_liner) ret += indent_str + 1;
1156
0
        for (int i = 0; i < e.list_size(); ++i)
1157
0
        {
1158
0
          if (i == 0 && one_liner) ret += ' ';
1159
0
          ret += print_entry(e.list_at(i), single_line, indent + 2);
1160
0
          if (i < e.list_size() - 1) ret += (one_liner ? ", " : indent_str);
1161
0
          else ret += (one_liner ? " " : indent_str + 1);
1162
0
        }
1163
0
        ret += ']';
1164
0
        return ret;
1165
0
      }
1166
0
      case bdecode_node::dict_t:
1167
0
      {
1168
0
        ret += '{';
1169
0
        bool one_liner = line_longer_than(e, 200) != -1 || single_line;
1170
1171
0
        if (!one_liner) ret += indent_str + 1;
1172
0
        for (int i = 0; i < e.dict_size(); ++i)
1173
0
        {
1174
0
          if (i == 0 && one_liner) ret += ' ';
1175
0
          std::pair<string_view, bdecode_node> ent = e.dict_at(i);
1176
0
          print_string(ret, ent.first, true);
1177
0
          ret += ": ";
1178
0
          ret += print_entry(ent.second, single_line, indent + 2);
1179
0
          if (i < e.dict_size() - 1) ret += (one_liner ? ", " : indent_str);
1180
0
          else ret += (one_liner ? " " : indent_str + 1);
1181
0
        }
1182
0
        ret += '}';
1183
0
        return ret;
1184
0
      }
1185
0
    }
1186
0
    return ret;
1187
0
  }
1188
}