Coverage Report

Created: 2024-09-23 06:29

/src/abseil-cpp/absl/strings/internal/str_split_internal.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2017 The Abseil Authors.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//      https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
//
15
16
// This file declares INTERNAL parts of the Split API that are inline/templated
17
// or otherwise need to be available at compile time. The main abstractions
18
// defined in here are
19
//
20
//   - ConvertibleToStringView
21
//   - SplitIterator<>
22
//   - Splitter<>
23
//
24
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
25
// absl/strings/str_split.h.
26
//
27
// IWYU pragma: private, include "absl/strings/str_split.h"
28
29
#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
30
#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
31
32
#include <array>
33
#include <cstddef>
34
#include <initializer_list>
35
#include <iterator>
36
#include <tuple>
37
#include <type_traits>
38
#include <utility>
39
#include <vector>
40
41
#include "absl/base/macros.h"
42
#include "absl/base/port.h"
43
#include "absl/meta/type_traits.h"
44
#include "absl/strings/string_view.h"
45
46
#ifdef _GLIBCXX_DEBUG
47
#include "absl/strings/internal/stl_type_traits.h"
48
#endif  // _GLIBCXX_DEBUG
49
50
namespace absl {
51
ABSL_NAMESPACE_BEGIN
52
namespace strings_internal {
53
54
// This class is implicitly constructible from everything that absl::string_view
55
// is implicitly constructible from, except for rvalue strings.  This means it
56
// can be used as a function parameter in places where passing a temporary
57
// string might cause memory lifetime issues.
58
class ConvertibleToStringView {
59
 public:
60
  ConvertibleToStringView(const char* s)  // NOLINT(runtime/explicit)
61
0
      : value_(s) {}
62
0
  ConvertibleToStringView(char* s) : value_(s) {}  // NOLINT(runtime/explicit)
63
  ConvertibleToStringView(absl::string_view s)     // NOLINT(runtime/explicit)
64
14.8M
      : value_(s) {}
65
  ConvertibleToStringView(const std::string& s)  // NOLINT(runtime/explicit)
66
0
      : value_(s) {}
67
68
  // Disable conversion from rvalue strings.
69
  ConvertibleToStringView(std::string&& s) = delete;
70
  ConvertibleToStringView(const std::string&& s) = delete;
71
72
14.8M
  absl::string_view value() const { return value_; }
73
74
 private:
75
  absl::string_view value_;
76
};
77
78
// An iterator that enumerates the parts of a string from a Splitter. The text
79
// to be split, the Delimiter, and the Predicate are all taken from the given
80
// Splitter object. Iterators may only be compared if they refer to the same
81
// Splitter instance.
82
//
83
// This class is NOT part of the public splitting API.
84
template <typename Splitter>
85
class SplitIterator {
86
 public:
87
  using iterator_category = std::input_iterator_tag;
88
  using value_type = absl::string_view;
89
  using difference_type = ptrdiff_t;
90
  using pointer = const value_type*;
91
  using reference = const value_type&;
92
93
  enum State { kInitState, kLastState, kEndState };
94
  SplitIterator(State state, const Splitter* splitter)
95
      : pos_(0),
96
        state_(state),
97
        splitter_(splitter),
98
        delimiter_(splitter->delimiter()),
99
6.24M
        predicate_(splitter->predicate()) {
100
    // Hack to maintain backward compatibility. This one block makes it so an
101
    // empty absl::string_view whose .data() happens to be nullptr behaves
102
    // *differently* from an otherwise empty absl::string_view whose .data() is
103
    // not nullptr. This is an undesirable difference in general, but this
104
    // behavior is maintained to avoid breaking existing code that happens to
105
    // depend on this old behavior/bug. Perhaps it will be fixed one day. The
106
    // difference in behavior is as follows:
107
    //   Split(absl::string_view(""), '-');  // {""}
108
    //   Split(absl::string_view(), '-');    // {}
109
6.24M
    if (splitter_->text().data() == nullptr) {
110
0
      state_ = kEndState;
111
0
      pos_ = splitter_->text().size();
112
0
      return;
113
0
    }
114
115
6.24M
    if (state_ == kEndState) {
116
0
      pos_ = splitter_->text().size();
117
6.24M
    } else {
118
6.24M
      ++(*this);
119
6.24M
    }
120
6.24M
  }
121
122
28.8M
  bool at_end() const { return state_ == kEndState; }
123
124
0
  reference operator*() const { return curr_; }
125
32.7M
  pointer operator->() const { return &curr_; }
126
127
22.6M
  SplitIterator& operator++() {
128
22.6M
    do {
129
22.6M
      if (state_ == kLastState) {
130
6.24M
        state_ = kEndState;
131
6.24M
        return *this;
132
6.24M
      }
133
16.3M
      const absl::string_view text = splitter_->text();
134
16.3M
      const absl::string_view d = delimiter_.Find(text, pos_);
135
16.3M
      if (d.data() == text.data() + text.size()) state_ = kLastState;
136
16.3M
      curr_ = text.substr(pos_,
137
16.3M
                          static_cast<size_t>(d.data() - (text.data() + pos_)));
138
16.3M
      pos_ += curr_.size() + d.size();
139
16.3M
    } while (!predicate_(curr_));
140
16.3M
    return *this;
141
22.6M
  }
142
143
  SplitIterator operator++(int) {
144
    SplitIterator old(*this);
145
    ++(*this);
146
    return old;
147
  }
148
149
0
  friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
150
0
    return a.state_ == b.state_ && a.pos_ == b.pos_;
151
0
  }
152
153
0
  friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
154
0
    return !(a == b);
155
0
  }
156
157
 private:
158
  size_t pos_;
159
  State state_;
160
  absl::string_view curr_;
161
  const Splitter* splitter_;
162
  typename Splitter::DelimiterType delimiter_;
163
  typename Splitter::PredicateType predicate_;
164
};
165
166
// HasMappedType<T>::value is true iff there exists a type T::mapped_type.
167
template <typename T, typename = void>
168
struct HasMappedType : std::false_type {};
169
template <typename T>
170
struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
171
    : std::true_type {};
172
173
// HasValueType<T>::value is true iff there exists a type T::value_type.
174
template <typename T, typename = void>
175
struct HasValueType : std::false_type {};
176
template <typename T>
177
struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
178
};
179
180
// HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
181
template <typename T, typename = void>
182
struct HasConstIterator : std::false_type {};
183
template <typename T>
184
struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
185
    : std::true_type {};
186
187
// HasEmplace<T>::value is true iff there exists a method T::emplace().
188
template <typename T, typename = void>
189
struct HasEmplace : std::false_type {};
190
template <typename T>
191
struct HasEmplace<T, absl::void_t<decltype(std::declval<T>().emplace())>>
192
    : std::true_type {};
193
194
// IsInitializerList<T>::value is true iff T is an std::initializer_list. More
195
// details below in Splitter<> where this is used.
196
std::false_type IsInitializerListDispatch(...);  // default: No
197
template <typename T>
198
std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
199
template <typename T>
200
struct IsInitializerList
201
    : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
202
203
// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
204
// is true for type 'C'.
205
//
206
// Restricts conversion to container-like types (by testing for the presence of
207
// a const_iterator member type) and also to disable conversion to an
208
// std::initializer_list (which also has a const_iterator). Otherwise, code
209
// compiled in C++11 will get an error due to ambiguous conversion paths (in
210
// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
211
// or an std::initializer_list<T>).
212
213
template <typename C, bool has_value_type, bool has_mapped_type>
214
struct SplitterIsConvertibleToImpl : std::false_type {};
215
216
template <typename C>
217
struct SplitterIsConvertibleToImpl<C, true, false>
218
    : std::is_constructible<typename C::value_type, absl::string_view> {};
219
220
template <typename C>
221
struct SplitterIsConvertibleToImpl<C, true, true>
222
    : absl::conjunction<
223
          std::is_constructible<typename C::key_type, absl::string_view>,
224
          std::is_constructible<typename C::mapped_type, absl::string_view>> {};
225
226
template <typename C>
227
struct SplitterIsConvertibleTo
228
    : SplitterIsConvertibleToImpl<
229
          C,
230
#ifdef _GLIBCXX_DEBUG
231
          !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value &&
232
#endif  // _GLIBCXX_DEBUG
233
              !IsInitializerList<
234
                  typename std::remove_reference<C>::type>::value &&
235
              HasValueType<C>::value && HasConstIterator<C>::value,
236
          HasMappedType<C>::value> {
237
};
238
239
template <typename StringType, typename Container, typename = void>
240
struct ShouldUseLifetimeBound : std::false_type {};
241
242
template <typename StringType, typename Container>
243
struct ShouldUseLifetimeBound<
244
    StringType, Container,
245
    std::enable_if_t<
246
        std::is_same<StringType, std::string>::value &&
247
        std::is_same<typename Container::value_type, absl::string_view>::value>>
248
    : std::true_type {};
249
250
template <typename StringType, typename First, typename Second>
251
using ShouldUseLifetimeBoundForPair = std::integral_constant<
252
    bool, std::is_same<StringType, std::string>::value &&
253
              (std::is_same<First, absl::string_view>::value ||
254
               std::is_same<Second, absl::string_view>::value)>;
255
256
257
// This class implements the range that is returned by absl::StrSplit(). This
258
// class has templated conversion operators that allow it to be implicitly
259
// converted to a variety of types that the caller may have specified on the
260
// left-hand side of an assignment.
261
//
262
// The main interface for interacting with this class is through its implicit
263
// conversion operators. However, this class may also be used like a container
264
// in that it has .begin() and .end() member functions. It may also be used
265
// within a range-for loop.
266
//
267
// Output containers can be collections of any type that is constructible from
268
// an absl::string_view.
269
//
270
// An Predicate functor may be supplied. This predicate will be used to filter
271
// the split strings: only strings for which the predicate returns true will be
272
// kept. A Predicate object is any unary functor that takes an absl::string_view
273
// and returns bool.
274
//
275
// The StringType parameter can be either string_view or string, depending on
276
// whether the Splitter refers to a string stored elsewhere, or if the string
277
// resides inside the Splitter itself.
278
template <typename Delimiter, typename Predicate, typename StringType>
279
class Splitter {
280
 public:
281
  using DelimiterType = Delimiter;
282
  using PredicateType = Predicate;
283
  using const_iterator = strings_internal::SplitIterator<Splitter>;
284
  using value_type = typename std::iterator_traits<const_iterator>::value_type;
285
286
  Splitter(StringType input_text, Delimiter d, Predicate p)
287
      : text_(std::move(input_text)),
288
        delimiter_(std::move(d)),
289
6.24M
        predicate_(std::move(p)) {}
290
291
22.6M
  absl::string_view text() const { return text_; }
292
6.24M
  const Delimiter& delimiter() const { return delimiter_; }
293
6.24M
  const Predicate& predicate() const { return predicate_; }
294
295
  // Range functions that iterate the split substrings as absl::string_view
296
  // objects. These methods enable a Splitter to be used in a range-based for
297
  // loop.
298
6.24M
  const_iterator begin() const { return {const_iterator::kInitState, this}; }
299
0
  const_iterator end() const { return {const_iterator::kEndState, this}; }
300
301
  // An implicit conversion operator that is restricted to only those containers
302
  // that the splitter is convertible to.
303
  template <
304
      typename Container,
305
      std::enable_if_t<ShouldUseLifetimeBound<StringType, Container>::value &&
306
                           SplitterIsConvertibleTo<Container>::value,
307
                       std::nullptr_t> = nullptr>
308
  // NOLINTNEXTLINE(google-explicit-constructor)
309
  operator Container() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
310
    return ConvertToContainer<Container, typename Container::value_type,
311
                              HasMappedType<Container>::value>()(*this);
312
  }
313
314
  template <
315
      typename Container,
316
      std::enable_if_t<!ShouldUseLifetimeBound<StringType, Container>::value &&
317
                           SplitterIsConvertibleTo<Container>::value,
318
                       std::nullptr_t> = nullptr>
319
  // NOLINTNEXTLINE(google-explicit-constructor)
320
6.24M
  operator Container() const {
321
6.24M
    return ConvertToContainer<Container, typename Container::value_type,
322
6.24M
                              HasMappedType<Container>::value>()(*this);
323
6.24M
  }
Unexecuted instantiation: absl::strings_internal::Splitter<absl::ByChar, absl::AllowEmpty, std::__1::basic_string_view<char, std::__1::char_traits<char> > >::operator std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > ><std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >, (decltype(nullptr))0>() const
absl::strings_internal::Splitter<absl::ByChar, absl::AllowEmpty, std::__1::basic_string_view<char, std::__1::char_traits<char> > >::operator std::__1::vector<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::allocator<std::__1::basic_string_view<char, std::__1::char_traits<char> > > ><std::__1::vector<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::allocator<std::__1::basic_string_view<char, std::__1::char_traits<char> > > >, (decltype(nullptr))0>() const
Line
Count
Source
320
6.24M
  operator Container() const {
321
6.24M
    return ConvertToContainer<Container, typename Container::value_type,
322
6.24M
                              HasMappedType<Container>::value>()(*this);
323
6.24M
  }
324
325
  // Returns a pair with its .first and .second members set to the first two
326
  // strings returned by the begin() iterator. Either/both of .first and .second
327
  // will be constructed with empty strings if the iterator doesn't have a
328
  // corresponding value.
329
  template <typename First, typename Second,
330
            std::enable_if_t<
331
                ShouldUseLifetimeBoundForPair<StringType, First, Second>::value,
332
                std::nullptr_t> = nullptr>
333
  // NOLINTNEXTLINE(google-explicit-constructor)
334
  operator std::pair<First, Second>() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
335
    return ConvertToPair<First, Second>();
336
  }
337
338
  template <typename First, typename Second,
339
            std::enable_if_t<!ShouldUseLifetimeBoundForPair<StringType, First,
340
                                                            Second>::value,
341
                             std::nullptr_t> = nullptr>
342
  // NOLINTNEXTLINE(google-explicit-constructor)
343
  operator std::pair<First, Second>() const {
344
    return ConvertToPair<First, Second>();
345
  }
346
347
 private:
348
  template <typename First, typename Second>
349
  std::pair<First, Second> ConvertToPair() const {
350
    absl::string_view first, second;
351
    auto it = begin();
352
    if (it != end()) {
353
      first = *it;
354
      if (++it != end()) {
355
        second = *it;
356
      }
357
    }
358
    return {First(first), Second(second)};
359
  }
360
361
  // ConvertToContainer is a functor converting a Splitter to the requested
362
  // Container of ValueType. It is specialized below to optimize splitting to
363
  // certain combinations of Container and ValueType.
364
  //
365
  // This base template handles the generic case of storing the split results in
366
  // the requested non-map-like container and converting the split substrings to
367
  // the requested type.
368
  template <typename Container, typename ValueType, bool is_map = false>
369
  struct ConvertToContainer {
370
    Container operator()(const Splitter& splitter) const {
371
      Container c;
372
      auto it = std::inserter(c, c.end());
373
      for (const auto& sp : splitter) {
374
        *it++ = ValueType(sp);
375
      }
376
      return c;
377
    }
378
  };
379
380
  // Partial specialization for a std::vector<absl::string_view>.
381
  //
382
  // Optimized for the common case of splitting to a
383
  // std::vector<absl::string_view>. In this case we first split the results to
384
  // a small array of absl::string_view on the stack, to reduce reallocations.
385
  template <typename A>
386
  struct ConvertToContainer<std::vector<absl::string_view, A>,
387
                            absl::string_view, false> {
388
    std::vector<absl::string_view, A> operator()(
389
6.24M
        const Splitter& splitter) const {
390
6.24M
      struct raw_view {
391
6.24M
        const char* data;
392
6.24M
        size_t size;
393
16.3M
        operator absl::string_view() const {  // NOLINT(runtime/explicit)
394
16.3M
          return {data, size};
395
16.3M
        }
396
6.24M
      };
397
6.24M
      std::vector<absl::string_view, A> v;
398
6.24M
      std::array<raw_view, 16> ar;
399
12.7M
      for (auto it = splitter.begin(); !it.at_end();) {
400
6.48M
        size_t index = 0;
401
16.3M
        do {
402
16.3M
          ar[index].data = it->data();
403
16.3M
          ar[index].size = it->size();
404
16.3M
          ++it;
405
16.3M
        } while (++index != ar.size() && !it.at_end());
406
        // We static_cast index to a signed type to work around overzealous
407
        // compiler warnings about signedness.
408
6.48M
        v.insert(v.end(), ar.begin(),
409
6.48M
                 ar.begin() + static_cast<ptrdiff_t>(index));
410
6.48M
      }
411
6.24M
      return v;
412
6.24M
    }
413
  };
414
415
  // Partial specialization for a std::vector<std::string>.
416
  //
417
  // Optimized for the common case of splitting to a std::vector<std::string>.
418
  // In this case we first split the results to a std::vector<absl::string_view>
419
  // so the returned std::vector<std::string> can have space reserved to avoid
420
  // std::string moves.
421
  template <typename A>
422
  struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
423
0
    std::vector<std::string, A> operator()(const Splitter& splitter) const {
424
0
      const std::vector<absl::string_view> v = splitter;
425
0
      return std::vector<std::string, A>(v.begin(), v.end());
426
0
    }
427
  };
428
429
  // Partial specialization for containers of pairs (e.g., maps).
430
  //
431
  // The algorithm is to insert a new pair into the map for each even-numbered
432
  // item, with the even-numbered item as the key with a default-constructed
433
  // value. Each odd-numbered item will then be assigned to the last pair's
434
  // value.
435
  template <typename Container, typename First, typename Second>
436
  struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
437
    using iterator = typename Container::iterator;
438
439
    Container operator()(const Splitter& splitter) const {
440
      Container m;
441
      iterator it;
442
      bool insert = true;
443
      for (const absl::string_view sv : splitter) {
444
        if (insert) {
445
          it = InsertOrEmplace(&m, sv);
446
        } else {
447
          it->second = Second(sv);
448
        }
449
        insert = !insert;
450
      }
451
      return m;
452
    }
453
454
    // Inserts the key and an empty value into the map, returning an iterator to
455
    // the inserted item. We use emplace() if available, otherwise insert().
456
    template <typename M>
457
    static absl::enable_if_t<HasEmplace<M>::value, iterator> InsertOrEmplace(
458
        M* m, absl::string_view key) {
459
      // Use piecewise_construct to support old versions of gcc in which pair
460
      // constructor can't otherwise construct string from string_view.
461
      return ToIter(m->emplace(std::piecewise_construct, std::make_tuple(key),
462
                               std::tuple<>()));
463
    }
464
    template <typename M>
465
    static absl::enable_if_t<!HasEmplace<M>::value, iterator> InsertOrEmplace(
466
        M* m, absl::string_view key) {
467
      return ToIter(m->insert(std::make_pair(First(key), Second(""))));
468
    }
469
470
    static iterator ToIter(std::pair<iterator, bool> pair) {
471
      return pair.first;
472
    }
473
    static iterator ToIter(iterator iter) { return iter; }
474
  };
475
476
  StringType text_;
477
  Delimiter delimiter_;
478
  Predicate predicate_;
479
};
480
481
}  // namespace strings_internal
482
ABSL_NAMESPACE_END
483
}  // namespace absl
484
485
#endif  // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_