/src/abseil-cpp/absl/strings/internal/str_split_internal.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2017 The Abseil Authors. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | // |
15 | | |
16 | | // This file declares INTERNAL parts of the Split API that are inline/templated |
17 | | // or otherwise need to be available at compile time. The main abstractions |
18 | | // defined in here are |
19 | | // |
20 | | // - ConvertibleToStringView |
21 | | // - SplitIterator<> |
22 | | // - Splitter<> |
23 | | // |
24 | | // DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including |
25 | | // absl/strings/str_split.h. |
26 | | // |
27 | | // IWYU pragma: private, include "absl/strings/str_split.h" |
28 | | |
29 | | #ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ |
30 | | #define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ |
31 | | |
32 | | #include <array> |
33 | | #include <cstddef> |
34 | | #include <initializer_list> |
35 | | #include <iterator> |
36 | | #include <tuple> |
37 | | #include <type_traits> |
38 | | #include <utility> |
39 | | #include <vector> |
40 | | |
41 | | #include "absl/base/macros.h" |
42 | | #include "absl/base/port.h" |
43 | | #include "absl/meta/type_traits.h" |
44 | | #include "absl/strings/string_view.h" |
45 | | |
46 | | #ifdef _GLIBCXX_DEBUG |
47 | | #include "absl/strings/internal/stl_type_traits.h" |
48 | | #endif // _GLIBCXX_DEBUG |
49 | | |
50 | | namespace absl { |
51 | | ABSL_NAMESPACE_BEGIN |
52 | | namespace strings_internal { |
53 | | |
54 | | // This class is implicitly constructible from everything that absl::string_view |
55 | | // is implicitly constructible from, except for rvalue strings. This means it |
56 | | // can be used as a function parameter in places where passing a temporary |
57 | | // string might cause memory lifetime issues. |
58 | | class ConvertibleToStringView { |
59 | | public: |
60 | | ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit) |
61 | 0 | : value_(s) {} |
62 | 0 | ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit) |
63 | | ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit) |
64 | 14.8M | : value_(s) {} |
65 | | ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit) |
66 | 0 | : value_(s) {} |
67 | | |
68 | | // Disable conversion from rvalue strings. |
69 | | ConvertibleToStringView(std::string&& s) = delete; |
70 | | ConvertibleToStringView(const std::string&& s) = delete; |
71 | | |
72 | 14.8M | absl::string_view value() const { return value_; } |
73 | | |
74 | | private: |
75 | | absl::string_view value_; |
76 | | }; |
77 | | |
78 | | // An iterator that enumerates the parts of a string from a Splitter. The text |
79 | | // to be split, the Delimiter, and the Predicate are all taken from the given |
80 | | // Splitter object. Iterators may only be compared if they refer to the same |
81 | | // Splitter instance. |
82 | | // |
83 | | // This class is NOT part of the public splitting API. |
84 | | template <typename Splitter> |
85 | | class SplitIterator { |
86 | | public: |
87 | | using iterator_category = std::input_iterator_tag; |
88 | | using value_type = absl::string_view; |
89 | | using difference_type = ptrdiff_t; |
90 | | using pointer = const value_type*; |
91 | | using reference = const value_type&; |
92 | | |
93 | | enum State { kInitState, kLastState, kEndState }; |
94 | | SplitIterator(State state, const Splitter* splitter) |
95 | | : pos_(0), |
96 | | state_(state), |
97 | | splitter_(splitter), |
98 | | delimiter_(splitter->delimiter()), |
99 | 6.24M | predicate_(splitter->predicate()) { |
100 | | // Hack to maintain backward compatibility. This one block makes it so an |
101 | | // empty absl::string_view whose .data() happens to be nullptr behaves |
102 | | // *differently* from an otherwise empty absl::string_view whose .data() is |
103 | | // not nullptr. This is an undesirable difference in general, but this |
104 | | // behavior is maintained to avoid breaking existing code that happens to |
105 | | // depend on this old behavior/bug. Perhaps it will be fixed one day. The |
106 | | // difference in behavior is as follows: |
107 | | // Split(absl::string_view(""), '-'); // {""} |
108 | | // Split(absl::string_view(), '-'); // {} |
109 | 6.24M | if (splitter_->text().data() == nullptr) { |
110 | 0 | state_ = kEndState; |
111 | 0 | pos_ = splitter_->text().size(); |
112 | 0 | return; |
113 | 0 | } |
114 | | |
115 | 6.24M | if (state_ == kEndState) { |
116 | 0 | pos_ = splitter_->text().size(); |
117 | 6.24M | } else { |
118 | 6.24M | ++(*this); |
119 | 6.24M | } |
120 | 6.24M | } |
121 | | |
122 | 28.8M | bool at_end() const { return state_ == kEndState; } |
123 | | |
124 | 0 | reference operator*() const { return curr_; } |
125 | 32.7M | pointer operator->() const { return &curr_; } |
126 | | |
127 | 22.6M | SplitIterator& operator++() { |
128 | 22.6M | do { |
129 | 22.6M | if (state_ == kLastState) { |
130 | 6.24M | state_ = kEndState; |
131 | 6.24M | return *this; |
132 | 6.24M | } |
133 | 16.3M | const absl::string_view text = splitter_->text(); |
134 | 16.3M | const absl::string_view d = delimiter_.Find(text, pos_); |
135 | 16.3M | if (d.data() == text.data() + text.size()) state_ = kLastState; |
136 | 16.3M | curr_ = text.substr(pos_, |
137 | 16.3M | static_cast<size_t>(d.data() - (text.data() + pos_))); |
138 | 16.3M | pos_ += curr_.size() + d.size(); |
139 | 16.3M | } while (!predicate_(curr_)); |
140 | 16.3M | return *this; |
141 | 22.6M | } |
142 | | |
143 | | SplitIterator operator++(int) { |
144 | | SplitIterator old(*this); |
145 | | ++(*this); |
146 | | return old; |
147 | | } |
148 | | |
149 | 0 | friend bool operator==(const SplitIterator& a, const SplitIterator& b) { |
150 | 0 | return a.state_ == b.state_ && a.pos_ == b.pos_; |
151 | 0 | } |
152 | | |
153 | 0 | friend bool operator!=(const SplitIterator& a, const SplitIterator& b) { |
154 | 0 | return !(a == b); |
155 | 0 | } |
156 | | |
157 | | private: |
158 | | size_t pos_; |
159 | | State state_; |
160 | | absl::string_view curr_; |
161 | | const Splitter* splitter_; |
162 | | typename Splitter::DelimiterType delimiter_; |
163 | | typename Splitter::PredicateType predicate_; |
164 | | }; |
165 | | |
166 | | // HasMappedType<T>::value is true iff there exists a type T::mapped_type. |
167 | | template <typename T, typename = void> |
168 | | struct HasMappedType : std::false_type {}; |
169 | | template <typename T> |
170 | | struct HasMappedType<T, absl::void_t<typename T::mapped_type>> |
171 | | : std::true_type {}; |
172 | | |
173 | | // HasValueType<T>::value is true iff there exists a type T::value_type. |
174 | | template <typename T, typename = void> |
175 | | struct HasValueType : std::false_type {}; |
176 | | template <typename T> |
177 | | struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type { |
178 | | }; |
179 | | |
180 | | // HasConstIterator<T>::value is true iff there exists a type T::const_iterator. |
181 | | template <typename T, typename = void> |
182 | | struct HasConstIterator : std::false_type {}; |
183 | | template <typename T> |
184 | | struct HasConstIterator<T, absl::void_t<typename T::const_iterator>> |
185 | | : std::true_type {}; |
186 | | |
187 | | // HasEmplace<T>::value is true iff there exists a method T::emplace(). |
188 | | template <typename T, typename = void> |
189 | | struct HasEmplace : std::false_type {}; |
190 | | template <typename T> |
191 | | struct HasEmplace<T, absl::void_t<decltype(std::declval<T>().emplace())>> |
192 | | : std::true_type {}; |
193 | | |
194 | | // IsInitializerList<T>::value is true iff T is an std::initializer_list. More |
195 | | // details below in Splitter<> where this is used. |
196 | | std::false_type IsInitializerListDispatch(...); // default: No |
197 | | template <typename T> |
198 | | std::true_type IsInitializerListDispatch(std::initializer_list<T>*); |
199 | | template <typename T> |
200 | | struct IsInitializerList |
201 | | : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {}; |
202 | | |
203 | | // A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition |
204 | | // is true for type 'C'. |
205 | | // |
206 | | // Restricts conversion to container-like types (by testing for the presence of |
207 | | // a const_iterator member type) and also to disable conversion to an |
208 | | // std::initializer_list (which also has a const_iterator). Otherwise, code |
209 | | // compiled in C++11 will get an error due to ambiguous conversion paths (in |
210 | | // C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T> |
211 | | // or an std::initializer_list<T>). |
212 | | |
213 | | template <typename C, bool has_value_type, bool has_mapped_type> |
214 | | struct SplitterIsConvertibleToImpl : std::false_type {}; |
215 | | |
216 | | template <typename C> |
217 | | struct SplitterIsConvertibleToImpl<C, true, false> |
218 | | : std::is_constructible<typename C::value_type, absl::string_view> {}; |
219 | | |
220 | | template <typename C> |
221 | | struct SplitterIsConvertibleToImpl<C, true, true> |
222 | | : absl::conjunction< |
223 | | std::is_constructible<typename C::key_type, absl::string_view>, |
224 | | std::is_constructible<typename C::mapped_type, absl::string_view>> {}; |
225 | | |
226 | | template <typename C> |
227 | | struct SplitterIsConvertibleTo |
228 | | : SplitterIsConvertibleToImpl< |
229 | | C, |
230 | | #ifdef _GLIBCXX_DEBUG |
231 | | !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value && |
232 | | #endif // _GLIBCXX_DEBUG |
233 | | !IsInitializerList< |
234 | | typename std::remove_reference<C>::type>::value && |
235 | | HasValueType<C>::value && HasConstIterator<C>::value, |
236 | | HasMappedType<C>::value> { |
237 | | }; |
238 | | |
239 | | template <typename StringType, typename Container, typename = void> |
240 | | struct ShouldUseLifetimeBound : std::false_type {}; |
241 | | |
242 | | template <typename StringType, typename Container> |
243 | | struct ShouldUseLifetimeBound< |
244 | | StringType, Container, |
245 | | std::enable_if_t< |
246 | | std::is_same<StringType, std::string>::value && |
247 | | std::is_same<typename Container::value_type, absl::string_view>::value>> |
248 | | : std::true_type {}; |
249 | | |
250 | | template <typename StringType, typename First, typename Second> |
251 | | using ShouldUseLifetimeBoundForPair = std::integral_constant< |
252 | | bool, std::is_same<StringType, std::string>::value && |
253 | | (std::is_same<First, absl::string_view>::value || |
254 | | std::is_same<Second, absl::string_view>::value)>; |
255 | | |
256 | | |
257 | | // This class implements the range that is returned by absl::StrSplit(). This |
258 | | // class has templated conversion operators that allow it to be implicitly |
259 | | // converted to a variety of types that the caller may have specified on the |
260 | | // left-hand side of an assignment. |
261 | | // |
262 | | // The main interface for interacting with this class is through its implicit |
263 | | // conversion operators. However, this class may also be used like a container |
264 | | // in that it has .begin() and .end() member functions. It may also be used |
265 | | // within a range-for loop. |
266 | | // |
267 | | // Output containers can be collections of any type that is constructible from |
268 | | // an absl::string_view. |
269 | | // |
270 | | // An Predicate functor may be supplied. This predicate will be used to filter |
271 | | // the split strings: only strings for which the predicate returns true will be |
272 | | // kept. A Predicate object is any unary functor that takes an absl::string_view |
273 | | // and returns bool. |
274 | | // |
275 | | // The StringType parameter can be either string_view or string, depending on |
276 | | // whether the Splitter refers to a string stored elsewhere, or if the string |
277 | | // resides inside the Splitter itself. |
278 | | template <typename Delimiter, typename Predicate, typename StringType> |
279 | | class Splitter { |
280 | | public: |
281 | | using DelimiterType = Delimiter; |
282 | | using PredicateType = Predicate; |
283 | | using const_iterator = strings_internal::SplitIterator<Splitter>; |
284 | | using value_type = typename std::iterator_traits<const_iterator>::value_type; |
285 | | |
286 | | Splitter(StringType input_text, Delimiter d, Predicate p) |
287 | | : text_(std::move(input_text)), |
288 | | delimiter_(std::move(d)), |
289 | 6.24M | predicate_(std::move(p)) {} |
290 | | |
291 | 22.6M | absl::string_view text() const { return text_; } |
292 | 6.24M | const Delimiter& delimiter() const { return delimiter_; } |
293 | 6.24M | const Predicate& predicate() const { return predicate_; } |
294 | | |
295 | | // Range functions that iterate the split substrings as absl::string_view |
296 | | // objects. These methods enable a Splitter to be used in a range-based for |
297 | | // loop. |
298 | 6.24M | const_iterator begin() const { return {const_iterator::kInitState, this}; } |
299 | 0 | const_iterator end() const { return {const_iterator::kEndState, this}; } |
300 | | |
301 | | // An implicit conversion operator that is restricted to only those containers |
302 | | // that the splitter is convertible to. |
303 | | template < |
304 | | typename Container, |
305 | | std::enable_if_t<ShouldUseLifetimeBound<StringType, Container>::value && |
306 | | SplitterIsConvertibleTo<Container>::value, |
307 | | std::nullptr_t> = nullptr> |
308 | | // NOLINTNEXTLINE(google-explicit-constructor) |
309 | | operator Container() const ABSL_ATTRIBUTE_LIFETIME_BOUND { |
310 | | return ConvertToContainer<Container, typename Container::value_type, |
311 | | HasMappedType<Container>::value>()(*this); |
312 | | } |
313 | | |
314 | | template < |
315 | | typename Container, |
316 | | std::enable_if_t<!ShouldUseLifetimeBound<StringType, Container>::value && |
317 | | SplitterIsConvertibleTo<Container>::value, |
318 | | std::nullptr_t> = nullptr> |
319 | | // NOLINTNEXTLINE(google-explicit-constructor) |
320 | 6.24M | operator Container() const { |
321 | 6.24M | return ConvertToContainer<Container, typename Container::value_type, |
322 | 6.24M | HasMappedType<Container>::value>()(*this); |
323 | 6.24M | } Unexecuted instantiation: absl::strings_internal::Splitter<absl::ByChar, absl::AllowEmpty, std::__1::basic_string_view<char, std::__1::char_traits<char> > >::operator std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > ><std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >, (decltype(nullptr))0>() const absl::strings_internal::Splitter<absl::ByChar, absl::AllowEmpty, std::__1::basic_string_view<char, std::__1::char_traits<char> > >::operator std::__1::vector<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::allocator<std::__1::basic_string_view<char, std::__1::char_traits<char> > > ><std::__1::vector<std::__1::basic_string_view<char, std::__1::char_traits<char> >, std::__1::allocator<std::__1::basic_string_view<char, std::__1::char_traits<char> > > >, (decltype(nullptr))0>() const Line | Count | Source | 320 | 6.24M | operator Container() const { | 321 | 6.24M | return ConvertToContainer<Container, typename Container::value_type, | 322 | 6.24M | HasMappedType<Container>::value>()(*this); | 323 | 6.24M | } |
|
324 | | |
325 | | // Returns a pair with its .first and .second members set to the first two |
326 | | // strings returned by the begin() iterator. Either/both of .first and .second |
327 | | // will be constructed with empty strings if the iterator doesn't have a |
328 | | // corresponding value. |
329 | | template <typename First, typename Second, |
330 | | std::enable_if_t< |
331 | | ShouldUseLifetimeBoundForPair<StringType, First, Second>::value, |
332 | | std::nullptr_t> = nullptr> |
333 | | // NOLINTNEXTLINE(google-explicit-constructor) |
334 | | operator std::pair<First, Second>() const ABSL_ATTRIBUTE_LIFETIME_BOUND { |
335 | | return ConvertToPair<First, Second>(); |
336 | | } |
337 | | |
338 | | template <typename First, typename Second, |
339 | | std::enable_if_t<!ShouldUseLifetimeBoundForPair<StringType, First, |
340 | | Second>::value, |
341 | | std::nullptr_t> = nullptr> |
342 | | // NOLINTNEXTLINE(google-explicit-constructor) |
343 | | operator std::pair<First, Second>() const { |
344 | | return ConvertToPair<First, Second>(); |
345 | | } |
346 | | |
347 | | private: |
348 | | template <typename First, typename Second> |
349 | | std::pair<First, Second> ConvertToPair() const { |
350 | | absl::string_view first, second; |
351 | | auto it = begin(); |
352 | | if (it != end()) { |
353 | | first = *it; |
354 | | if (++it != end()) { |
355 | | second = *it; |
356 | | } |
357 | | } |
358 | | return {First(first), Second(second)}; |
359 | | } |
360 | | |
361 | | // ConvertToContainer is a functor converting a Splitter to the requested |
362 | | // Container of ValueType. It is specialized below to optimize splitting to |
363 | | // certain combinations of Container and ValueType. |
364 | | // |
365 | | // This base template handles the generic case of storing the split results in |
366 | | // the requested non-map-like container and converting the split substrings to |
367 | | // the requested type. |
368 | | template <typename Container, typename ValueType, bool is_map = false> |
369 | | struct ConvertToContainer { |
370 | | Container operator()(const Splitter& splitter) const { |
371 | | Container c; |
372 | | auto it = std::inserter(c, c.end()); |
373 | | for (const auto& sp : splitter) { |
374 | | *it++ = ValueType(sp); |
375 | | } |
376 | | return c; |
377 | | } |
378 | | }; |
379 | | |
380 | | // Partial specialization for a std::vector<absl::string_view>. |
381 | | // |
382 | | // Optimized for the common case of splitting to a |
383 | | // std::vector<absl::string_view>. In this case we first split the results to |
384 | | // a small array of absl::string_view on the stack, to reduce reallocations. |
385 | | template <typename A> |
386 | | struct ConvertToContainer<std::vector<absl::string_view, A>, |
387 | | absl::string_view, false> { |
388 | | std::vector<absl::string_view, A> operator()( |
389 | 6.24M | const Splitter& splitter) const { |
390 | 6.24M | struct raw_view { |
391 | 6.24M | const char* data; |
392 | 6.24M | size_t size; |
393 | 16.3M | operator absl::string_view() const { // NOLINT(runtime/explicit) |
394 | 16.3M | return {data, size}; |
395 | 16.3M | } |
396 | 6.24M | }; |
397 | 6.24M | std::vector<absl::string_view, A> v; |
398 | 6.24M | std::array<raw_view, 16> ar; |
399 | 12.7M | for (auto it = splitter.begin(); !it.at_end();) { |
400 | 6.48M | size_t index = 0; |
401 | 16.3M | do { |
402 | 16.3M | ar[index].data = it->data(); |
403 | 16.3M | ar[index].size = it->size(); |
404 | 16.3M | ++it; |
405 | 16.3M | } while (++index != ar.size() && !it.at_end()); |
406 | | // We static_cast index to a signed type to work around overzealous |
407 | | // compiler warnings about signedness. |
408 | 6.48M | v.insert(v.end(), ar.begin(), |
409 | 6.48M | ar.begin() + static_cast<ptrdiff_t>(index)); |
410 | 6.48M | } |
411 | 6.24M | return v; |
412 | 6.24M | } |
413 | | }; |
414 | | |
415 | | // Partial specialization for a std::vector<std::string>. |
416 | | // |
417 | | // Optimized for the common case of splitting to a std::vector<std::string>. |
418 | | // In this case we first split the results to a std::vector<absl::string_view> |
419 | | // so the returned std::vector<std::string> can have space reserved to avoid |
420 | | // std::string moves. |
421 | | template <typename A> |
422 | | struct ConvertToContainer<std::vector<std::string, A>, std::string, false> { |
423 | 0 | std::vector<std::string, A> operator()(const Splitter& splitter) const { |
424 | 0 | const std::vector<absl::string_view> v = splitter; |
425 | 0 | return std::vector<std::string, A>(v.begin(), v.end()); |
426 | 0 | } |
427 | | }; |
428 | | |
429 | | // Partial specialization for containers of pairs (e.g., maps). |
430 | | // |
431 | | // The algorithm is to insert a new pair into the map for each even-numbered |
432 | | // item, with the even-numbered item as the key with a default-constructed |
433 | | // value. Each odd-numbered item will then be assigned to the last pair's |
434 | | // value. |
435 | | template <typename Container, typename First, typename Second> |
436 | | struct ConvertToContainer<Container, std::pair<const First, Second>, true> { |
437 | | using iterator = typename Container::iterator; |
438 | | |
439 | | Container operator()(const Splitter& splitter) const { |
440 | | Container m; |
441 | | iterator it; |
442 | | bool insert = true; |
443 | | for (const absl::string_view sv : splitter) { |
444 | | if (insert) { |
445 | | it = InsertOrEmplace(&m, sv); |
446 | | } else { |
447 | | it->second = Second(sv); |
448 | | } |
449 | | insert = !insert; |
450 | | } |
451 | | return m; |
452 | | } |
453 | | |
454 | | // Inserts the key and an empty value into the map, returning an iterator to |
455 | | // the inserted item. We use emplace() if available, otherwise insert(). |
456 | | template <typename M> |
457 | | static absl::enable_if_t<HasEmplace<M>::value, iterator> InsertOrEmplace( |
458 | | M* m, absl::string_view key) { |
459 | | // Use piecewise_construct to support old versions of gcc in which pair |
460 | | // constructor can't otherwise construct string from string_view. |
461 | | return ToIter(m->emplace(std::piecewise_construct, std::make_tuple(key), |
462 | | std::tuple<>())); |
463 | | } |
464 | | template <typename M> |
465 | | static absl::enable_if_t<!HasEmplace<M>::value, iterator> InsertOrEmplace( |
466 | | M* m, absl::string_view key) { |
467 | | return ToIter(m->insert(std::make_pair(First(key), Second("")))); |
468 | | } |
469 | | |
470 | | static iterator ToIter(std::pair<iterator, bool> pair) { |
471 | | return pair.first; |
472 | | } |
473 | | static iterator ToIter(iterator iter) { return iter; } |
474 | | }; |
475 | | |
476 | | StringType text_; |
477 | | Delimiter delimiter_; |
478 | | Predicate predicate_; |
479 | | }; |
480 | | |
481 | | } // namespace strings_internal |
482 | | ABSL_NAMESPACE_END |
483 | | } // namespace absl |
484 | | |
485 | | #endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ |