Coverage Report

Created: 2025-08-25 06:55

/src/abseil-cpp/absl/strings/internal/str_split_internal.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2017 The Abseil Authors.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//      https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
//
15
16
// This file declares INTERNAL parts of the Split API that are inline/templated
17
// or otherwise need to be available at compile time. The main abstractions
18
// defined in here are
19
//
20
//   - ConvertibleToStringView
21
//   - SplitIterator<>
22
//   - Splitter<>
23
//
24
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
25
// absl/strings/str_split.h.
26
//
27
// IWYU pragma: private, include "absl/strings/str_split.h"
28
29
#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
30
#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
31
32
#include <array>
33
#include <cassert>
34
#include <cstddef>
35
#include <initializer_list>
36
#include <iterator>
37
#include <tuple>
38
#include <type_traits>
39
#include <utility>
40
#include <vector>
41
42
#include "absl/base/macros.h"
43
#include "absl/base/port.h"
44
#include "absl/meta/type_traits.h"
45
#include "absl/strings/string_view.h"
46
47
#ifdef _GLIBCXX_DEBUG
48
#include "absl/strings/internal/stl_type_traits.h"
49
#endif  // _GLIBCXX_DEBUG
50
51
namespace absl {
52
ABSL_NAMESPACE_BEGIN
53
namespace strings_internal {
54
55
// This class is implicitly constructible from everything that absl::string_view
56
// is implicitly constructible from, except for rvalue strings.  This means it
57
// can be used as a function parameter in places where passing a temporary
58
// string might cause memory lifetime issues.
59
class ConvertibleToStringView {
60
 public:
61
  ConvertibleToStringView(const char* s)  // NOLINT(runtime/explicit)
62
0
      : value_(s) {
63
0
    assert(s != nullptr);
64
0
  }
65
0
  ConvertibleToStringView(char* s) : value_(s) {  // NOLINT(runtime/explicit)
66
0
    assert(s != nullptr);
67
0
  }
68
  ConvertibleToStringView(absl::string_view s)     // NOLINT(runtime/explicit)
69
6.79M
      : value_(s) {}
70
  ConvertibleToStringView(const std::string& s)  // NOLINT(runtime/explicit)
71
0
      : value_(s) {}
72
73
  // Disable conversion from rvalue strings.
74
  ConvertibleToStringView(std::string&& s) = delete;
75
  ConvertibleToStringView(const std::string&& s) = delete;
76
77
6.79M
  absl::string_view value() const { return value_; }
78
79
 private:
80
  absl::string_view value_;
81
};
82
83
// An iterator that enumerates the parts of a string from a Splitter. The text
84
// to be split, the Delimiter, and the Predicate are all taken from the given
85
// Splitter object. Iterators may only be compared if they refer to the same
86
// Splitter instance.
87
//
88
// This class is NOT part of the public splitting API.
89
template <typename Splitter>
90
class SplitIterator {
91
 public:
92
  using iterator_category = std::input_iterator_tag;
93
  using value_type = absl::string_view;
94
  using difference_type = ptrdiff_t;
95
  using pointer = const value_type*;
96
  using reference = const value_type&;
97
98
  enum State { kInitState, kLastState, kEndState };
99
  SplitIterator(State state, const Splitter* splitter)
100
1.99M
      : pos_(0),
101
1.99M
        state_(state),
102
1.99M
        splitter_(splitter),
103
1.99M
        delimiter_(splitter->delimiter()),
104
1.99M
        predicate_(splitter->predicate()) {
105
    // Hack to maintain backward compatibility. This one block makes it so an
106
    // empty absl::string_view whose .data() happens to be nullptr behaves
107
    // *differently* from an otherwise empty absl::string_view whose .data() is
108
    // not nullptr. This is an undesirable difference in general, but this
109
    // behavior is maintained to avoid breaking existing code that happens to
110
    // depend on this old behavior/bug. Perhaps it will be fixed one day. The
111
    // difference in behavior is as follows:
112
    //   Split(absl::string_view(""), '-');  // {""}
113
    //   Split(absl::string_view(), '-');    // {}
114
1.99M
    if (splitter_->text().data() == nullptr) {
115
0
      state_ = kEndState;
116
0
      pos_ = splitter_->text().size();
117
0
      return;
118
0
    }
119
120
1.99M
    if (state_ == kEndState) {
121
0
      pos_ = splitter_->text().size();
122
1.99M
    } else {
123
1.99M
      ++(*this);
124
1.99M
    }
125
1.99M
  }
126
127
10.6M
  bool at_end() const { return state_ == kEndState; }
128
129
0
  reference operator*() const { return curr_; }
130
13.2M
  pointer operator->() const { return &curr_; }
131
132
8.64M
  SplitIterator& operator++() {
133
8.64M
    do {
134
8.64M
      if (state_ == kLastState) {
135
1.99M
        state_ = kEndState;
136
1.99M
        return *this;
137
1.99M
      }
138
6.64M
      const absl::string_view text = splitter_->text();
139
6.64M
      const absl::string_view d = delimiter_.Find(text, pos_);
140
6.64M
      if (d.data() == text.data() + text.size()) state_ = kLastState;
141
6.64M
      curr_ = text.substr(pos_,
142
6.64M
                          static_cast<size_t>(d.data() - (text.data() + pos_)));
143
6.64M
      pos_ += curr_.size() + d.size();
144
6.64M
    } while (!predicate_(curr_));
145
6.64M
    return *this;
146
8.64M
  }
147
148
  SplitIterator operator++(int) {
149
    SplitIterator old(*this);
150
    ++(*this);
151
    return old;
152
  }
153
154
0
  friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
155
0
    return a.state_ == b.state_ && a.pos_ == b.pos_;
156
0
  }
157
158
0
  friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
159
0
    return !(a == b);
160
0
  }
161
162
 private:
163
  size_t pos_;
164
  State state_;
165
  absl::string_view curr_;
166
  const Splitter* splitter_;
167
  typename Splitter::DelimiterType delimiter_;
168
  typename Splitter::PredicateType predicate_;
169
};
170
171
// HasMappedType<T>::value is true iff there exists a type T::mapped_type.
172
template <typename T, typename = void>
173
struct HasMappedType : std::false_type {};
174
template <typename T>
175
struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
176
    : std::true_type {};
177
178
// HasValueType<T>::value is true iff there exists a type T::value_type.
179
template <typename T, typename = void>
180
struct HasValueType : std::false_type {};
181
template <typename T>
182
struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
183
};
184
185
// HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
186
template <typename T, typename = void>
187
struct HasConstIterator : std::false_type {};
188
template <typename T>
189
struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
190
    : std::true_type {};
191
192
// HasEmplace<T>::value is true iff there exists a method T::emplace().
193
template <typename T, typename = void>
194
struct HasEmplace : std::false_type {};
195
template <typename T>
196
struct HasEmplace<T, absl::void_t<decltype(std::declval<T>().emplace())>>
197
    : std::true_type {};
198
199
// IsInitializerList<T>::value is true iff T is an std::initializer_list. More
200
// details below in Splitter<> where this is used.
201
std::false_type IsInitializerListDispatch(...);  // default: No
202
template <typename T>
203
std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
204
template <typename T>
205
struct IsInitializerList
206
    : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
207
208
// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
209
// is true for type 'C'.
210
//
211
// Restricts conversion to container-like types (by testing for the presence of
212
// a const_iterator member type) and also to disable conversion to an
213
// std::initializer_list (which also has a const_iterator). Otherwise, code
214
// compiled in C++11 will get an error due to ambiguous conversion paths (in
215
// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
216
// or an std::initializer_list<T>).
217
218
template <typename C, bool has_value_type, bool has_mapped_type>
219
struct SplitterIsConvertibleToImpl : std::false_type {};
220
221
template <typename C>
222
struct SplitterIsConvertibleToImpl<C, true, false>
223
    : std::is_constructible<typename C::value_type, absl::string_view> {};
224
225
template <typename C>
226
struct SplitterIsConvertibleToImpl<C, true, true>
227
    : absl::conjunction<
228
          std::is_constructible<typename C::key_type, absl::string_view>,
229
          std::is_constructible<typename C::mapped_type, absl::string_view>> {};
230
231
template <typename C>
232
struct SplitterIsConvertibleTo
233
    : SplitterIsConvertibleToImpl<
234
          C,
235
#ifdef _GLIBCXX_DEBUG
236
          !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value &&
237
#endif  // _GLIBCXX_DEBUG
238
              !IsInitializerList<
239
                  typename std::remove_reference<C>::type>::value &&
240
              HasValueType<C>::value && HasConstIterator<C>::value,
241
          HasMappedType<C>::value> {
242
};
243
244
template <typename StringType, typename Container, typename = void>
245
struct ShouldUseLifetimeBound : std::false_type {};
246
247
template <typename StringType, typename Container>
248
struct ShouldUseLifetimeBound<
249
    StringType, Container,
250
    std::enable_if_t<
251
        std::is_same<StringType, std::string>::value &&
252
        std::is_same<typename Container::value_type, absl::string_view>::value>>
253
    : std::true_type {};
254
255
template <typename StringType, typename First, typename Second>
256
using ShouldUseLifetimeBoundForPair = std::integral_constant<
257
    bool, std::is_same<StringType, std::string>::value &&
258
              (std::is_same<First, absl::string_view>::value ||
259
               std::is_same<Second, absl::string_view>::value)>;
260
261
template <typename StringType, typename ElementType, std::size_t Size>
262
using ShouldUseLifetimeBoundForArray = std::integral_constant<
263
    bool, std::is_same<StringType, std::string>::value &&
264
              std::is_same<ElementType, absl::string_view>::value>;
265
266
// This class implements the range that is returned by absl::StrSplit(). This
267
// class has templated conversion operators that allow it to be implicitly
268
// converted to a variety of types that the caller may have specified on the
269
// left-hand side of an assignment.
270
//
271
// The main interface for interacting with this class is through its implicit
272
// conversion operators. However, this class may also be used like a container
273
// in that it has .begin() and .end() member functions. It may also be used
274
// within a range-for loop.
275
//
276
// Output containers can be collections of any type that is constructible from
277
// an absl::string_view.
278
//
279
// An Predicate functor may be supplied. This predicate will be used to filter
280
// the split strings: only strings for which the predicate returns true will be
281
// kept. A Predicate object is any unary functor that takes an absl::string_view
282
// and returns bool.
283
//
284
// The StringType parameter can be either string_view or string, depending on
285
// whether the Splitter refers to a string stored elsewhere, or if the string
286
// resides inside the Splitter itself.
287
template <typename Delimiter, typename Predicate, typename StringType>
288
class Splitter {
289
 public:
290
  using DelimiterType = Delimiter;
291
  using PredicateType = Predicate;
292
  using const_iterator = strings_internal::SplitIterator<Splitter>;
293
  using value_type = typename std::iterator_traits<const_iterator>::value_type;
294
295
  Splitter(StringType input_text, Delimiter d, Predicate p)
296
1.99M
      : text_(std::move(input_text)),
297
1.99M
        delimiter_(std::move(d)),
298
1.99M
        predicate_(std::move(p)) {}
299
300
8.64M
  absl::string_view text() const { return text_; }
301
1.99M
  const Delimiter& delimiter() const { return delimiter_; }
302
1.99M
  const Predicate& predicate() const { return predicate_; }
303
304
  // Range functions that iterate the split substrings as absl::string_view
305
  // objects. These methods enable a Splitter to be used in a range-based for
306
  // loop.
307
1.99M
  const_iterator begin() const { return {const_iterator::kInitState, this}; }
308
0
  const_iterator end() const { return {const_iterator::kEndState, this}; }
309
310
  // An implicit conversion operator that is restricted to only those containers
311
  // that the splitter is convertible to.
312
  template <
313
      typename Container,
314
      std::enable_if_t<ShouldUseLifetimeBound<StringType, Container>::value &&
315
                           SplitterIsConvertibleTo<Container>::value,
316
                       std::nullptr_t> = nullptr>
317
  // NOLINTNEXTLINE(google-explicit-constructor)
318
  operator Container() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
319
    return ConvertToContainer<Container, typename Container::value_type,
320
                              HasMappedType<Container>::value>()(*this);
321
  }
322
323
  template <
324
      typename Container,
325
      std::enable_if_t<!ShouldUseLifetimeBound<StringType, Container>::value &&
326
                           SplitterIsConvertibleTo<Container>::value,
327
                       std::nullptr_t> = nullptr>
328
  // NOLINTNEXTLINE(google-explicit-constructor)
329
1.99M
  operator Container() const {
330
1.99M
    return ConvertToContainer<Container, typename Container::value_type,
331
1.99M
                              HasMappedType<Container>::value>()(*this);
332
1.99M
  }
Unexecuted instantiation: _ZNK4absl16strings_internal8SplitterINS_6ByCharENS_10AllowEmptyENSt3__117basic_string_viewIcNS4_11char_traitsIcEEEEEcvT_INS4_6vectorINS4_12basic_stringIcS7_NS4_9allocatorIcEEEENSE_ISG_EEEETnNS4_9enable_ifIXaantsr22ShouldUseLifetimeBoundIS8_SA_EE5valuesr23SplitterIsConvertibleToISA_EE5valueEDnE4typeELDn0EEEv
_ZNK4absl16strings_internal8SplitterINS_6ByCharENS_10AllowEmptyENSt3__117basic_string_viewIcNS4_11char_traitsIcEEEEEcvT_INS4_6vectorIS8_NS4_9allocatorIS8_EEEETnNS4_9enable_ifIXaantsr22ShouldUseLifetimeBoundIS8_SA_EE5valuesr23SplitterIsConvertibleToISA_EE5valueEDnE4typeELDn0EEEv
Line
Count
Source
329
1.99M
  operator Container() const {
330
1.99M
    return ConvertToContainer<Container, typename Container::value_type,
331
1.99M
                              HasMappedType<Container>::value>()(*this);
332
1.99M
  }
333
334
  // Returns a pair with its .first and .second members set to the first two
335
  // strings returned by the begin() iterator. Either/both of .first and .second
336
  // will be constructed with empty strings if the iterator doesn't have a
337
  // corresponding value.
338
  template <typename First, typename Second,
339
            std::enable_if_t<
340
                ShouldUseLifetimeBoundForPair<StringType, First, Second>::value,
341
                std::nullptr_t> = nullptr>
342
  // NOLINTNEXTLINE(google-explicit-constructor)
343
  operator std::pair<First, Second>() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
344
    return ConvertToPair<First, Second>();
345
  }
346
347
  template <typename First, typename Second,
348
            std::enable_if_t<!ShouldUseLifetimeBoundForPair<StringType, First,
349
                                                            Second>::value,
350
                             std::nullptr_t> = nullptr>
351
  // NOLINTNEXTLINE(google-explicit-constructor)
352
  operator std::pair<First, Second>() const {
353
    return ConvertToPair<First, Second>();
354
  }
355
356
  // Returns an array with its elements set to the first few strings returned by
357
  // the begin() iterator.  If there is not a corresponding value the empty
358
  // string is used.
359
  template <typename ElementType, std::size_t Size,
360
            std::enable_if_t<ShouldUseLifetimeBoundForArray<
361
                                 StringType, ElementType, Size>::value,
362
                             std::nullptr_t> = nullptr>
363
  // NOLINTNEXTLINE(google-explicit-constructor)
364
  operator std::array<ElementType, Size>() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
365
    return ConvertToArray<ElementType, Size>();
366
  }
367
368
  template <typename ElementType, std::size_t Size,
369
            std::enable_if_t<!ShouldUseLifetimeBoundForArray<
370
                                 StringType, ElementType, Size>::value,
371
                             std::nullptr_t> = nullptr>
372
  // NOLINTNEXTLINE(google-explicit-constructor)
373
  operator std::array<ElementType, Size>() const {
374
    return ConvertToArray<ElementType, Size>();
375
  }
376
377
 private:
378
  template <typename ElementType, std::size_t Size>
379
  std::array<ElementType, Size> ConvertToArray() const {
380
    std::array<ElementType, Size> a;
381
    auto it = begin();
382
    for (std::size_t i = 0; i < Size && it != end(); ++i, ++it) {
383
      a[i] = ElementType(*it);
384
    }
385
    return a;
386
  }
387
388
  template <typename First, typename Second>
389
  std::pair<First, Second> ConvertToPair() const {
390
    absl::string_view first, second;
391
    auto it = begin();
392
    if (it != end()) {
393
      first = *it;
394
      if (++it != end()) {
395
        second = *it;
396
      }
397
    }
398
    return {First(first), Second(second)};
399
  }
400
401
  // ConvertToContainer is a functor converting a Splitter to the requested
402
  // Container of ValueType. It is specialized below to optimize splitting to
403
  // certain combinations of Container and ValueType.
404
  //
405
  // This base template handles the generic case of storing the split results in
406
  // the requested non-map-like container and converting the split substrings to
407
  // the requested type.
408
  template <typename Container, typename ValueType, bool is_map = false>
409
  struct ConvertToContainer {
410
    Container operator()(const Splitter& splitter) const {
411
      Container c;
412
      auto it = std::inserter(c, c.end());
413
      for (const auto& sp : splitter) {
414
        *it++ = ValueType(sp);
415
      }
416
      return c;
417
    }
418
  };
419
420
  // Partial specialization for a std::vector<absl::string_view>.
421
  //
422
  // Optimized for the common case of splitting to a
423
  // std::vector<absl::string_view>. In this case we first split the results to
424
  // a small array of absl::string_view on the stack, to reduce reallocations.
425
  template <typename A>
426
  struct ConvertToContainer<std::vector<absl::string_view, A>,
427
                            absl::string_view, false> {
428
    std::vector<absl::string_view, A> operator()(
429
1.99M
        const Splitter& splitter) const {
430
1.99M
      struct raw_view {
431
1.99M
        const char* data;
432
1.99M
        size_t size;
433
6.64M
        operator absl::string_view() const {  // NOLINT(runtime/explicit)
434
6.64M
          return {data, size};
435
6.64M
        }
436
1.99M
      };
437
1.99M
      std::vector<absl::string_view, A> v;
438
1.99M
      std::array<raw_view, 16> ar;
439
4.15M
      for (auto it = splitter.begin(); !it.at_end();) {
440
2.16M
        size_t index = 0;
441
6.64M
        do {
442
6.64M
          ar[index].data = it->data();
443
6.64M
          ar[index].size = it->size();
444
6.64M
          ++it;
445
6.64M
        } while (++index != ar.size() && !it.at_end());
446
        // We static_cast index to a signed type to work around overzealous
447
        // compiler warnings about signedness.
448
2.16M
        v.insert(v.end(), ar.begin(),
449
2.16M
                 ar.begin() + static_cast<ptrdiff_t>(index));
450
2.16M
      }
451
1.99M
      return v;
452
1.99M
    }
453
  };
454
455
  // Partial specialization for a std::vector<std::string>.
456
  //
457
  // Optimized for the common case of splitting to a std::vector<std::string>.
458
  // In this case we first split the results to a std::vector<absl::string_view>
459
  // so the returned std::vector<std::string> can have space reserved to avoid
460
  // std::string moves.
461
  template <typename A>
462
  struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
463
0
    std::vector<std::string, A> operator()(const Splitter& splitter) const {
464
0
      const std::vector<absl::string_view> v = splitter;
465
0
      return std::vector<std::string, A>(v.begin(), v.end());
466
0
    }
467
  };
468
469
  // Partial specialization for containers of pairs (e.g., maps).
470
  //
471
  // The algorithm is to insert a new pair into the map for each even-numbered
472
  // item, with the even-numbered item as the key with a default-constructed
473
  // value. Each odd-numbered item will then be assigned to the last pair's
474
  // value.
475
  template <typename Container, typename First, typename Second>
476
  struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
477
    using iterator = typename Container::iterator;
478
479
    Container operator()(const Splitter& splitter) const {
480
      Container m;
481
      iterator it;
482
      bool insert = true;
483
      for (const absl::string_view sv : splitter) {
484
        if (insert) {
485
          it = InsertOrEmplace(&m, sv);
486
        } else {
487
          it->second = Second(sv);
488
        }
489
        insert = !insert;
490
      }
491
      return m;
492
    }
493
494
    // Inserts the key and an empty value into the map, returning an iterator to
495
    // the inserted item. We use emplace() if available, otherwise insert().
496
    template <typename M>
497
    static absl::enable_if_t<HasEmplace<M>::value, iterator> InsertOrEmplace(
498
        M* m, absl::string_view key) {
499
      // Use piecewise_construct to support old versions of gcc in which pair
500
      // constructor can't otherwise construct string from string_view.
501
      return ToIter(m->emplace(std::piecewise_construct, std::make_tuple(key),
502
                               std::tuple<>()));
503
    }
504
    template <typename M>
505
    static absl::enable_if_t<!HasEmplace<M>::value, iterator> InsertOrEmplace(
506
        M* m, absl::string_view key) {
507
      return ToIter(m->insert(std::make_pair(First(key), Second(""))));
508
    }
509
510
    static iterator ToIter(std::pair<iterator, bool> pair) {
511
      return pair.first;
512
    }
513
    static iterator ToIter(iterator iter) { return iter; }
514
  };
515
516
  StringType text_;
517
  Delimiter delimiter_;
518
  Predicate predicate_;
519
};
520
521
}  // namespace strings_internal
522
ABSL_NAMESPACE_END
523
}  // namespace absl
524
525
#endif  // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_