Coverage Report

Created: 2023-09-25 06:27

/src/abseil-cpp/absl/strings/internal/str_split_internal.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2017 The Abseil Authors.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//      https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
//
15
16
// This file declares INTERNAL parts of the Split API that are inline/templated
17
// or otherwise need to be available at compile time. The main abstractions
18
// defined in here are
19
//
20
//   - ConvertibleToStringView
21
//   - SplitIterator<>
22
//   - Splitter<>
23
//
24
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
25
// absl/strings/str_split.h.
26
//
27
// IWYU pragma: private, include "absl/strings/str_split.h"
28
29
#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
30
#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
31
32
#include <array>
33
#include <initializer_list>
34
#include <iterator>
35
#include <tuple>
36
#include <type_traits>
37
#include <utility>
38
#include <vector>
39
40
#include "absl/base/macros.h"
41
#include "absl/base/port.h"
42
#include "absl/meta/type_traits.h"
43
#include "absl/strings/string_view.h"
44
45
#ifdef _GLIBCXX_DEBUG
46
#include "absl/strings/internal/stl_type_traits.h"
47
#endif  // _GLIBCXX_DEBUG
48
49
namespace absl {
50
ABSL_NAMESPACE_BEGIN
51
namespace strings_internal {
52
53
// This class is implicitly constructible from everything that absl::string_view
54
// is implicitly constructible from, except for rvalue strings.  This means it
55
// can be used as a function parameter in places where passing a temporary
56
// string might cause memory lifetime issues.
57
class ConvertibleToStringView {
58
 public:
59
  ConvertibleToStringView(const char* s)  // NOLINT(runtime/explicit)
60
0
      : value_(s) {}
61
0
  ConvertibleToStringView(char* s) : value_(s) {}  // NOLINT(runtime/explicit)
62
  ConvertibleToStringView(absl::string_view s)     // NOLINT(runtime/explicit)
63
13.6M
      : value_(s) {}
64
  ConvertibleToStringView(const std::string& s)  // NOLINT(runtime/explicit)
65
0
      : value_(s) {}
66
67
  // Disable conversion from rvalue strings.
68
  ConvertibleToStringView(std::string&& s) = delete;
69
  ConvertibleToStringView(const std::string&& s) = delete;
70
71
13.6M
  absl::string_view value() const { return value_; }
72
73
 private:
74
  absl::string_view value_;
75
};
76
77
// An iterator that enumerates the parts of a string from a Splitter. The text
78
// to be split, the Delimiter, and the Predicate are all taken from the given
79
// Splitter object. Iterators may only be compared if they refer to the same
80
// Splitter instance.
81
//
82
// This class is NOT part of the public splitting API.
83
template <typename Splitter>
84
class SplitIterator {
85
 public:
86
  using iterator_category = std::input_iterator_tag;
87
  using value_type = absl::string_view;
88
  using difference_type = ptrdiff_t;
89
  using pointer = const value_type*;
90
  using reference = const value_type&;
91
92
  enum State { kInitState, kLastState, kEndState };
93
  SplitIterator(State state, const Splitter* splitter)
94
      : pos_(0),
95
        state_(state),
96
        splitter_(splitter),
97
        delimiter_(splitter->delimiter()),
98
5.98M
        predicate_(splitter->predicate()) {
99
    // Hack to maintain backward compatibility. This one block makes it so an
100
    // empty absl::string_view whose .data() happens to be nullptr behaves
101
    // *differently* from an otherwise empty absl::string_view whose .data() is
102
    // not nullptr. This is an undesirable difference in general, but this
103
    // behavior is maintained to avoid breaking existing code that happens to
104
    // depend on this old behavior/bug. Perhaps it will be fixed one day. The
105
    // difference in behavior is as follows:
106
    //   Split(absl::string_view(""), '-');  // {""}
107
    //   Split(absl::string_view(), '-');    // {}
108
5.98M
    if (splitter_->text().data() == nullptr) {
109
0
      state_ = kEndState;
110
0
      pos_ = splitter_->text().size();
111
0
      return;
112
0
    }
113
114
5.98M
    if (state_ == kEndState) {
115
0
      pos_ = splitter_->text().size();
116
5.98M
    } else {
117
5.98M
      ++(*this);
118
5.98M
    }
119
5.98M
  }
120
121
26.9M
  bool at_end() const { return state_ == kEndState; }
122
123
  reference operator*() const { return curr_; }
124
30.0M
  pointer operator->() const { return &curr_; }
125
126
21.0M
  SplitIterator& operator++() {
127
21.0M
    do {
128
21.0M
      if (state_ == kLastState) {
129
5.98M
        state_ = kEndState;
130
5.98M
        return *this;
131
5.98M
      }
132
15.0M
      const absl::string_view text = splitter_->text();
133
15.0M
      const absl::string_view d = delimiter_.Find(text, pos_);
134
15.0M
      if (d.data() == text.data() + text.size()) state_ = kLastState;
135
15.0M
      curr_ = text.substr(pos_,
136
15.0M
                          static_cast<size_t>(d.data() - (text.data() + pos_)));
137
15.0M
      pos_ += curr_.size() + d.size();
138
15.0M
    } while (!predicate_(curr_));
139
15.0M
    return *this;
140
21.0M
  }
141
142
  SplitIterator operator++(int) {
143
    SplitIterator old(*this);
144
    ++(*this);
145
    return old;
146
  }
147
148
  friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
149
    return a.state_ == b.state_ && a.pos_ == b.pos_;
150
  }
151
152
  friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
153
    return !(a == b);
154
  }
155
156
 private:
157
  size_t pos_;
158
  State state_;
159
  absl::string_view curr_;
160
  const Splitter* splitter_;
161
  typename Splitter::DelimiterType delimiter_;
162
  typename Splitter::PredicateType predicate_;
163
};
164
165
// HasMappedType<T>::value is true iff there exists a type T::mapped_type.
166
template <typename T, typename = void>
167
struct HasMappedType : std::false_type {};
168
template <typename T>
169
struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
170
    : std::true_type {};
171
172
// HasValueType<T>::value is true iff there exists a type T::value_type.
173
template <typename T, typename = void>
174
struct HasValueType : std::false_type {};
175
template <typename T>
176
struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
177
};
178
179
// HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
180
template <typename T, typename = void>
181
struct HasConstIterator : std::false_type {};
182
template <typename T>
183
struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
184
    : std::true_type {};
185
186
// HasEmplace<T>::value is true iff there exists a method T::emplace().
187
template <typename T, typename = void>
188
struct HasEmplace : std::false_type {};
189
template <typename T>
190
struct HasEmplace<T, absl::void_t<decltype(std::declval<T>().emplace())>>
191
    : std::true_type {};
192
193
// IsInitializerList<T>::value is true iff T is an std::initializer_list. More
194
// details below in Splitter<> where this is used.
195
std::false_type IsInitializerListDispatch(...);  // default: No
196
template <typename T>
197
std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
198
template <typename T>
199
struct IsInitializerList
200
    : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
201
202
// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
203
// is true for type 'C'.
204
//
205
// Restricts conversion to container-like types (by testing for the presence of
206
// a const_iterator member type) and also to disable conversion to an
207
// std::initializer_list (which also has a const_iterator). Otherwise, code
208
// compiled in C++11 will get an error due to ambiguous conversion paths (in
209
// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
210
// or an std::initializer_list<T>).
211
212
template <typename C, bool has_value_type, bool has_mapped_type>
213
struct SplitterIsConvertibleToImpl : std::false_type {};
214
215
template <typename C>
216
struct SplitterIsConvertibleToImpl<C, true, false>
217
    : std::is_constructible<typename C::value_type, absl::string_view> {};
218
219
template <typename C>
220
struct SplitterIsConvertibleToImpl<C, true, true>
221
    : absl::conjunction<
222
          std::is_constructible<typename C::key_type, absl::string_view>,
223
          std::is_constructible<typename C::mapped_type, absl::string_view>> {};
224
225
template <typename C>
226
struct SplitterIsConvertibleTo
227
    : SplitterIsConvertibleToImpl<
228
          C,
229
#ifdef _GLIBCXX_DEBUG
230
          !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value &&
231
#endif  // _GLIBCXX_DEBUG
232
              !IsInitializerList<
233
                  typename std::remove_reference<C>::type>::value &&
234
              HasValueType<C>::value && HasConstIterator<C>::value,
235
          HasMappedType<C>::value> {
236
};
237
238
template <typename StringType, typename Container, typename = void>
239
struct ShouldUseLifetimeBound : std::false_type {};
240
241
template <typename StringType, typename Container>
242
struct ShouldUseLifetimeBound<
243
    StringType, Container,
244
    std::enable_if_t<
245
        std::is_same<StringType, std::string>::value &&
246
        std::is_same<typename Container::value_type, absl::string_view>::value>>
247
    : std::true_type {};
248
249
template <typename StringType, typename First, typename Second>
250
using ShouldUseLifetimeBoundForPair = std::integral_constant<
251
    bool, std::is_same<StringType, std::string>::value &&
252
              (std::is_same<First, absl::string_view>::value ||
253
               std::is_same<Second, absl::string_view>::value)>;
254
255
256
// This class implements the range that is returned by absl::StrSplit(). This
257
// class has templated conversion operators that allow it to be implicitly
258
// converted to a variety of types that the caller may have specified on the
259
// left-hand side of an assignment.
260
//
261
// The main interface for interacting with this class is through its implicit
262
// conversion operators. However, this class may also be used like a container
263
// in that it has .begin() and .end() member functions. It may also be used
264
// within a range-for loop.
265
//
266
// Output containers can be collections of any type that is constructible from
267
// an absl::string_view.
268
//
269
// An Predicate functor may be supplied. This predicate will be used to filter
270
// the split strings: only strings for which the predicate returns true will be
271
// kept. A Predicate object is any unary functor that takes an absl::string_view
272
// and returns bool.
273
//
274
// The StringType parameter can be either string_view or string, depending on
275
// whether the Splitter refers to a string stored elsewhere, or if the string
276
// resides inside the Splitter itself.
277
template <typename Delimiter, typename Predicate, typename StringType>
278
class Splitter {
279
 public:
280
  using DelimiterType = Delimiter;
281
  using PredicateType = Predicate;
282
  using const_iterator = strings_internal::SplitIterator<Splitter>;
283
  using value_type = typename std::iterator_traits<const_iterator>::value_type;
284
285
  Splitter(StringType input_text, Delimiter d, Predicate p)
286
      : text_(std::move(input_text)),
287
        delimiter_(std::move(d)),
288
5.98M
        predicate_(std::move(p)) {}
289
290
21.0M
  absl::string_view text() const { return text_; }
291
5.98M
  const Delimiter& delimiter() const { return delimiter_; }
292
5.98M
  const Predicate& predicate() const { return predicate_; }
293
294
  // Range functions that iterate the split substrings as absl::string_view
295
  // objects. These methods enable a Splitter to be used in a range-based for
296
  // loop.
297
5.98M
  const_iterator begin() const { return {const_iterator::kInitState, this}; }
298
  const_iterator end() const { return {const_iterator::kEndState, this}; }
299
300
  // An implicit conversion operator that is restricted to only those containers
301
  // that the splitter is convertible to.
302
  template <
303
      typename Container,
304
      std::enable_if_t<ShouldUseLifetimeBound<StringType, Container>::value &&
305
                           SplitterIsConvertibleTo<Container>::value,
306
                       std::nullptr_t> = nullptr>
307
  // NOLINTNEXTLINE(google-explicit-constructor)
308
  operator Container() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
309
    return ConvertToContainer<Container, typename Container::value_type,
310
                              HasMappedType<Container>::value>()(*this);
311
  }
312
313
  template <
314
      typename Container,
315
      std::enable_if_t<!ShouldUseLifetimeBound<StringType, Container>::value &&
316
                           SplitterIsConvertibleTo<Container>::value,
317
                       std::nullptr_t> = nullptr>
318
  // NOLINTNEXTLINE(google-explicit-constructor)
319
5.98M
  operator Container() const {
320
5.98M
    return ConvertToContainer<Container, typename Container::value_type,
321
5.98M
                              HasMappedType<Container>::value>()(*this);
322
5.98M
  }
Unexecuted instantiation: absl::strings_internal::Splitter<absl::ByChar, absl::AllowEmpty, absl::string_view>::operator std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > ><std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >, (decltype(nullptr))0>() const
absl::strings_internal::Splitter<absl::ByChar, absl::AllowEmpty, absl::string_view>::operator std::__1::vector<absl::string_view, std::__1::allocator<absl::string_view> ><std::__1::vector<absl::string_view, std::__1::allocator<absl::string_view> >, (decltype(nullptr))0>() const
Line
Count
Source
319
5.98M
  operator Container() const {
320
5.98M
    return ConvertToContainer<Container, typename Container::value_type,
321
5.98M
                              HasMappedType<Container>::value>()(*this);
322
5.98M
  }
323
324
  // Returns a pair with its .first and .second members set to the first two
325
  // strings returned by the begin() iterator. Either/both of .first and .second
326
  // will be constructed with empty strings if the iterator doesn't have a
327
  // corresponding value.
328
  template <typename First, typename Second,
329
            std::enable_if_t<
330
                ShouldUseLifetimeBoundForPair<StringType, First, Second>::value,
331
                std::nullptr_t> = nullptr>
332
  // NOLINTNEXTLINE(google-explicit-constructor)
333
  operator std::pair<First, Second>() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
334
    return ConvertToPair<First, Second>();
335
  }
336
337
  template <typename First, typename Second,
338
            std::enable_if_t<!ShouldUseLifetimeBoundForPair<StringType, First,
339
                                                            Second>::value,
340
                             std::nullptr_t> = nullptr>
341
  // NOLINTNEXTLINE(google-explicit-constructor)
342
  operator std::pair<First, Second>() const {
343
    return ConvertToPair<First, Second>();
344
  }
345
346
 private:
347
  template <typename First, typename Second>
348
  std::pair<First, Second> ConvertToPair() const {
349
    absl::string_view first, second;
350
    auto it = begin();
351
    if (it != end()) {
352
      first = *it;
353
      if (++it != end()) {
354
        second = *it;
355
      }
356
    }
357
    return {First(first), Second(second)};
358
  }
359
360
  // ConvertToContainer is a functor converting a Splitter to the requested
361
  // Container of ValueType. It is specialized below to optimize splitting to
362
  // certain combinations of Container and ValueType.
363
  //
364
  // This base template handles the generic case of storing the split results in
365
  // the requested non-map-like container and converting the split substrings to
366
  // the requested type.
367
  template <typename Container, typename ValueType, bool is_map = false>
368
  struct ConvertToContainer {
369
    Container operator()(const Splitter& splitter) const {
370
      Container c;
371
      auto it = std::inserter(c, c.end());
372
      for (const auto& sp : splitter) {
373
        *it++ = ValueType(sp);
374
      }
375
      return c;
376
    }
377
  };
378
379
  // Partial specialization for a std::vector<absl::string_view>.
380
  //
381
  // Optimized for the common case of splitting to a
382
  // std::vector<absl::string_view>. In this case we first split the results to
383
  // a small array of absl::string_view on the stack, to reduce reallocations.
384
  template <typename A>
385
  struct ConvertToContainer<std::vector<absl::string_view, A>,
386
                            absl::string_view, false> {
387
    std::vector<absl::string_view, A> operator()(
388
5.98M
        const Splitter& splitter) const {
389
5.98M
      struct raw_view {
390
5.98M
        const char* data;
391
5.98M
        size_t size;
392
15.0M
        operator absl::string_view() const {  // NOLINT(runtime/explicit)
393
15.0M
          return {data, size};
394
15.0M
        }
395
5.98M
      };
396
5.98M
      std::vector<absl::string_view, A> v;
397
5.98M
      std::array<raw_view, 16> ar;
398
12.1M
      for (auto it = splitter.begin(); !it.at_end();) {
399
6.17M
        size_t index = 0;
400
15.0M
        do {
401
15.0M
          ar[index].data = it->data();
402
15.0M
          ar[index].size = it->size();
403
15.0M
          ++it;
404
15.0M
        } while (++index != ar.size() && !it.at_end());
405
6.17M
        v.insert(v.end(), ar.begin(), ar.begin() + index);
406
6.17M
      }
407
5.98M
      return v;
408
5.98M
    }
409
  };
410
411
  // Partial specialization for a std::vector<std::string>.
412
  //
413
  // Optimized for the common case of splitting to a std::vector<std::string>.
414
  // In this case we first split the results to a std::vector<absl::string_view>
415
  // so the returned std::vector<std::string> can have space reserved to avoid
416
  // std::string moves.
417
  template <typename A>
418
  struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
419
0
    std::vector<std::string, A> operator()(const Splitter& splitter) const {
420
0
      const std::vector<absl::string_view> v = splitter;
421
0
      return std::vector<std::string, A>(v.begin(), v.end());
422
0
    }
423
  };
424
425
  // Partial specialization for containers of pairs (e.g., maps).
426
  //
427
  // The algorithm is to insert a new pair into the map for each even-numbered
428
  // item, with the even-numbered item as the key with a default-constructed
429
  // value. Each odd-numbered item will then be assigned to the last pair's
430
  // value.
431
  template <typename Container, typename First, typename Second>
432
  struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
433
    using iterator = typename Container::iterator;
434
435
    Container operator()(const Splitter& splitter) const {
436
      Container m;
437
      iterator it;
438
      bool insert = true;
439
      for (const absl::string_view sv : splitter) {
440
        if (insert) {
441
          it = InsertOrEmplace(&m, sv);
442
        } else {
443
          it->second = Second(sv);
444
        }
445
        insert = !insert;
446
      }
447
      return m;
448
    }
449
450
    // Inserts the key and an empty value into the map, returning an iterator to
451
    // the inserted item. We use emplace() if available, otherwise insert().
452
    template <typename M>
453
    static absl::enable_if_t<HasEmplace<M>::value, iterator> InsertOrEmplace(
454
        M* m, absl::string_view key) {
455
      // Use piecewise_construct to support old versions of gcc in which pair
456
      // constructor can't otherwise construct string from string_view.
457
      return ToIter(m->emplace(std::piecewise_construct, std::make_tuple(key),
458
                               std::tuple<>()));
459
    }
460
    template <typename M>
461
    static absl::enable_if_t<!HasEmplace<M>::value, iterator> InsertOrEmplace(
462
        M* m, absl::string_view key) {
463
      return ToIter(m->insert(std::make_pair(First(key), Second(""))));
464
    }
465
466
    static iterator ToIter(std::pair<iterator, bool> pair) {
467
      return pair.first;
468
    }
469
    static iterator ToIter(iterator iter) { return iter; }
470
  };
471
472
  StringType text_;
473
  Delimiter delimiter_;
474
  Predicate predicate_;
475
};
476
477
}  // namespace strings_internal
478
ABSL_NAMESPACE_END
479
}  // namespace absl
480
481
#endif  // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_