Coverage Report

Created: 2024-09-08 06:07

/proc/self/cwd/external/com_google_absl/absl/strings/str_split.h
Line
Count
Source (jump to first uncovered line)
1
//
2
// Copyright 2017 The Abseil Authors.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
//      https://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
//
16
// -----------------------------------------------------------------------------
17
// File: str_split.h
18
// -----------------------------------------------------------------------------
19
//
20
// This file contains functions for splitting strings. It defines the main
21
// `StrSplit()` function, several delimiters for determining the boundaries on
22
// which to split the string, and predicates for filtering delimited results.
23
// `StrSplit()` adapts the returned collection to the type specified by the
24
// caller.
25
//
26
// Example:
27
//
28
//   // Splits the given string on commas. Returns the results in a
29
//   // vector of strings.
30
//   std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
31
//   // Can also use ","
32
//   // v[0] == "a", v[1] == "b", v[2] == "c"
33
//
34
// See StrSplit() below for more information.
35
#ifndef ABSL_STRINGS_STR_SPLIT_H_
36
#define ABSL_STRINGS_STR_SPLIT_H_
37
38
#include <algorithm>
39
#include <cstddef>
40
#include <map>
41
#include <set>
42
#include <string>
43
#include <utility>
44
#include <vector>
45
46
#include "absl/base/internal/raw_logging.h"
47
#include "absl/base/macros.h"
48
#include "absl/strings/internal/str_split_internal.h"
49
#include "absl/strings/string_view.h"
50
#include "absl/strings/strip.h"
51
52
namespace absl {
53
ABSL_NAMESPACE_BEGIN
54
55
//------------------------------------------------------------------------------
56
// Delimiters
57
//------------------------------------------------------------------------------
58
//
59
// `StrSplit()` uses delimiters to define the boundaries between elements in the
60
// provided input. Several `Delimiter` types are defined below. If a string
61
// (`const char*`, `std::string`, or `absl::string_view`) is passed in place of
62
// an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it
63
// were passed a `ByString` delimiter.
64
//
65
// A `Delimiter` is an object with a `Find()` function that knows how to find
66
// the first occurrence of itself in a given `absl::string_view`.
67
//
68
// The following `Delimiter` types are available for use within `StrSplit()`:
69
//
70
//   - `ByString` (default for string arguments)
71
//   - `ByChar` (default for a char argument)
72
//   - `ByAnyChar`
73
//   - `ByLength`
74
//   - `MaxSplits`
75
//
76
// A Delimiter's `Find()` member function will be passed an input `text` that is
77
// to be split and a position (`pos`) to begin searching for the next delimiter
78
// in `text`. The returned absl::string_view should refer to the next occurrence
79
// (after `pos`) of the represented delimiter; this returned absl::string_view
80
// represents the next location where the input `text` should be broken.
81
//
82
// The returned absl::string_view may be zero-length if the Delimiter does not
83
// represent a part of the string (e.g., a fixed-length delimiter). If no
84
// delimiter is found in the input `text`, a zero-length absl::string_view
85
// referring to `text.end()` should be returned (e.g.,
86
// `text.substr(text.size())`). It is important that the returned
87
// absl::string_view always be within the bounds of the input `text` given as an
88
// argument--it must not refer to a string that is physically located outside of
89
// the given string.
90
//
91
// The following example is a simple Delimiter object that is created with a
92
// single char and will look for that char in the text passed to the `Find()`
93
// function:
94
//
95
//   struct SimpleDelimiter {
96
//     const char c_;
97
//     explicit SimpleDelimiter(char c) : c_(c) {}
98
//     absl::string_view Find(absl::string_view text, size_t pos) {
99
//       auto found = text.find(c_, pos);
100
//       if (found == absl::string_view::npos)
101
//         return text.substr(text.size());
102
//
103
//       return text.substr(found, 1);
104
//     }
105
//   };
106
107
// ByString
108
//
109
// A sub-string delimiter. If `StrSplit()` is passed a string in place of a
110
// `Delimiter` object, the string will be implicitly converted into a
111
// `ByString` delimiter.
112
//
113
// Example:
114
//
115
//   // Because a string literal is converted to an `absl::ByString`,
116
//   // the following two splits are equivalent.
117
//
118
//   std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", ");
119
//
120
//   using absl::ByString;
121
//   std::vector<std::string> v2 = absl::StrSplit("a, b, c",
122
//                                                ByString(", "));
123
//   // v[0] == "a", v[1] == "b", v[2] == "c"
124
class ByString {
125
 public:
126
  explicit ByString(absl::string_view sp);
127
  absl::string_view Find(absl::string_view text, size_t pos) const;
128
129
 private:
130
  const std::string delimiter_;
131
};
132
133
// ByAsciiWhitespace
134
//
135
// A sub-string delimiter that splits by ASCII whitespace
136
// (space, tab, vertical tab, formfeed, linefeed, or carriage return).
137
// Note: you probably want to use absl::SkipEmpty() as well!
138
//
139
// This class is equivalent to ByAnyChar with ASCII whitespace chars.
140
//
141
// Example:
142
//
143
//   std::vector<std::string> v = absl::StrSplit(
144
//       "a b\tc\n  d  \n", absl::ByAsciiWhitespace(), absl::SkipEmpty());
145
//   // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
146
class ByAsciiWhitespace {
147
 public:
148
  absl::string_view Find(absl::string_view text, size_t pos) const;
149
};
150
151
// ByChar
152
//
153
// A single character delimiter. `ByChar` is functionally equivalent to a
154
// 1-char string within a `ByString` delimiter, but slightly more efficient.
155
//
156
// Example:
157
//
158
//   // Because a char literal is converted to a absl::ByChar,
159
//   // the following two splits are equivalent.
160
//   std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
161
//   using absl::ByChar;
162
//   std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(','));
163
//   // v[0] == "a", v[1] == "b", v[2] == "c"
164
//
165
// `ByChar` is also the default delimiter if a single character is given
166
// as the delimiter to `StrSplit()`. For example, the following calls are
167
// equivalent:
168
//
169
//   std::vector<std::string> v = absl::StrSplit("a-b", '-');
170
//
171
//   using absl::ByChar;
172
//   std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-'));
173
//
174
class ByChar {
175
 public:
176
0
  explicit ByChar(char c) : c_(c) {}
177
  absl::string_view Find(absl::string_view text, size_t pos) const;
178
179
 private:
180
  char c_;
181
};
182
183
// ByAnyChar
184
//
185
// A delimiter that will match any of the given byte-sized characters within
186
// its provided string.
187
//
188
// Note: this delimiter works with single-byte string data, but does not work
189
// with variable-width encodings, such as UTF-8.
190
//
191
// Example:
192
//
193
//   using absl::ByAnyChar;
194
//   std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
195
//   // v[0] == "a", v[1] == "b", v[2] == "c"
196
//
197
// If `ByAnyChar` is given the empty string, it behaves exactly like
198
// `ByString` and matches each individual character in the input string.
199
//
200
class ByAnyChar {
201
 public:
202
  explicit ByAnyChar(absl::string_view sp);
203
  absl::string_view Find(absl::string_view text, size_t pos) const;
204
205
 private:
206
  const std::string delimiters_;
207
};
208
209
// ByLength
210
//
211
// A delimiter for splitting into equal-length strings. The length argument to
212
// the constructor must be greater than 0.
213
//
214
// Note: this delimiter works with single-byte string data, but does not work
215
// with variable-width encodings, such as UTF-8.
216
//
217
// Example:
218
//
219
//   using absl::ByLength;
220
//   std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3));
221
222
//   // v[0] == "123", v[1] == "456", v[2] == "789"
223
//
224
// Note that the string does not have to be a multiple of the fixed split
225
// length. In such a case, the last substring will be shorter.
226
//
227
//   using absl::ByLength;
228
//   std::vector<std::string> v = absl::StrSplit("12345", ByLength(2));
229
//
230
//   // v[0] == "12", v[1] == "34", v[2] == "5"
231
class ByLength {
232
 public:
233
  explicit ByLength(ptrdiff_t length);
234
  absl::string_view Find(absl::string_view text, size_t pos) const;
235
236
 private:
237
  const ptrdiff_t length_;
238
};
239
240
namespace strings_internal {
241
242
// A traits-like metafunction for selecting the default Delimiter object type
243
// for a particular Delimiter type. The base case simply exposes type Delimiter
244
// itself as the delimiter's Type. However, there are specializations for
245
// string-like objects that map them to the ByString delimiter object.
246
// This allows functions like absl::StrSplit() and absl::MaxSplits() to accept
247
// string-like objects (e.g., ',') as delimiter arguments but they will be
248
// treated as if a ByString delimiter was given.
249
template <typename Delimiter>
250
struct SelectDelimiter {
251
  using type = Delimiter;
252
};
253
254
template <>
255
struct SelectDelimiter<char> {
256
  using type = ByChar;
257
};
258
template <>
259
struct SelectDelimiter<char*> {
260
  using type = ByString;
261
};
262
template <>
263
struct SelectDelimiter<const char*> {
264
  using type = ByString;
265
};
266
template <>
267
struct SelectDelimiter<absl::string_view> {
268
  using type = ByString;
269
};
270
template <>
271
struct SelectDelimiter<std::string> {
272
  using type = ByString;
273
};
274
275
// Wraps another delimiter and sets a max number of matches for that delimiter.
276
template <typename Delimiter>
277
class MaxSplitsImpl {
278
 public:
279
  MaxSplitsImpl(Delimiter delimiter, int limit)
280
      : delimiter_(delimiter), limit_(limit), count_(0) {}
281
  absl::string_view Find(absl::string_view text, size_t pos) {
282
    if (count_++ == limit_) {
283
      return absl::string_view(text.data() + text.size(),
284
                               0);  // No more matches.
285
    }
286
    return delimiter_.Find(text, pos);
287
  }
288
289
 private:
290
  Delimiter delimiter_;
291
  const int limit_;
292
  int count_;
293
};
294
295
}  // namespace strings_internal
296
297
// MaxSplits()
298
//
299
// A delimiter that limits the number of matches which can occur to the passed
300
// `limit`. The last element in the returned collection will contain all
301
// remaining unsplit pieces, which may contain instances of the delimiter.
302
// The collection will contain at most `limit` + 1 elements.
303
// Example:
304
//
305
//   using absl::MaxSplits;
306
//   std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1));
307
//
308
//   // v[0] == "a", v[1] == "b,c"
309
template <typename Delimiter>
310
inline strings_internal::MaxSplitsImpl<
311
    typename strings_internal::SelectDelimiter<Delimiter>::type>
312
MaxSplits(Delimiter delimiter, int limit) {
313
  typedef
314
      typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType;
315
  return strings_internal::MaxSplitsImpl<DelimiterType>(
316
      DelimiterType(delimiter), limit);
317
}
318
319
//------------------------------------------------------------------------------
320
// Predicates
321
//------------------------------------------------------------------------------
322
//
323
// Predicates filter the results of a `StrSplit()` by determining whether or not
324
// a resultant element is included in the result set. A predicate may be passed
325
// as an optional third argument to the `StrSplit()` function.
326
//
327
// Predicates are unary functions (or functors) that take a single
328
// `absl::string_view` argument and return a bool indicating whether the
329
// argument should be included (`true`) or excluded (`false`).
330
//
331
// Predicates are useful when filtering out empty substrings. By default, empty
332
// substrings may be returned by `StrSplit()`, which is similar to the way split
333
// functions work in other programming languages.
334
335
// AllowEmpty()
336
//
337
// Always returns `true`, indicating that all strings--including empty
338
// strings--should be included in the split output. This predicate is not
339
// strictly needed because this is the default behavior of `StrSplit()`;
340
// however, it might be useful at some call sites to make the intent explicit.
341
//
342
// Example:
343
//
344
//  std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty());
345
//
346
//  // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == ""
347
struct AllowEmpty {
348
13.6M
  bool operator()(absl::string_view) const { return true; }
349
};
350
351
// SkipEmpty()
352
//
353
// Returns `false` if the given `absl::string_view` is empty, indicating that
354
// `StrSplit()` should omit the empty string.
355
//
356
// Example:
357
//
358
//   std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty());
359
//
360
//   // v[0] == "a", v[1] == "b"
361
//
362
// Note: `SkipEmpty()` does not consider a string containing only whitespace
363
// to be empty. To skip such whitespace as well, use the `SkipWhitespace()`
364
// predicate.
365
struct SkipEmpty {
366
0
  bool operator()(absl::string_view sp) const { return !sp.empty(); }
367
};
368
369
// SkipWhitespace()
370
//
371
// Returns `false` if the given `absl::string_view` is empty *or* contains only
372
// whitespace, indicating that `StrSplit()` should omit the string.
373
//
374
// Example:
375
//
376
//   std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
377
//                                               ',', SkipWhitespace());
378
//   // v[0] == " a ", v[1] == "b"
379
//
380
//   // SkipEmpty() would return whitespace elements
381
//   std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty());
382
//   // v[0] == " a ", v[1] == " ", v[2] == "b"
383
struct SkipWhitespace {
384
0
  bool operator()(absl::string_view sp) const {
385
0
    sp = absl::StripAsciiWhitespace(sp);
386
0
    return !sp.empty();
387
0
  }
388
};
389
390
template <typename T>
391
using EnableSplitIfString =
392
    typename std::enable_if<std::is_same<T, std::string>::value ||
393
                            std::is_same<T, const std::string>::value,
394
                            int>::type;
395
396
//------------------------------------------------------------------------------
397
//                                  StrSplit()
398
//------------------------------------------------------------------------------
399
400
// StrSplit()
401
//
402
// Splits a given string based on the provided `Delimiter` object, returning the
403
// elements within the type specified by the caller. Optionally, you may pass a
404
// `Predicate` to `StrSplit()` indicating whether to include or exclude the
405
// resulting element within the final result set. (See the overviews for
406
// Delimiters and Predicates above.)
407
//
408
// Example:
409
//
410
//   std::vector<std::string> v = absl::StrSplit("a,b,c,d", ',');
411
//   // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
412
//
413
// You can also provide an explicit `Delimiter` object:
414
//
415
// Example:
416
//
417
//   using absl::ByAnyChar;
418
//   std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
419
//   // v[0] == "a", v[1] == "b", v[2] == "c"
420
//
421
// See above for more information on delimiters.
422
//
423
// By default, empty strings are included in the result set. You can optionally
424
// include a third `Predicate` argument to apply a test for whether the
425
// resultant element should be included in the result set:
426
//
427
// Example:
428
//
429
//   std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
430
//                                               ',', SkipWhitespace());
431
//   // v[0] == " a ", v[1] == "b"
432
//
433
// See above for more information on predicates.
434
//
435
//------------------------------------------------------------------------------
436
// StrSplit() Return Types
437
//------------------------------------------------------------------------------
438
//
439
// The `StrSplit()` function adapts the returned collection to the collection
440
// specified by the caller (e.g. `std::vector` above). The returned collections
441
// may contain `std::string`, `absl::string_view` (in which case the original
442
// string being split must ensure that it outlives the collection), or any
443
// object that can be explicitly created from an `absl::string_view`. This
444
// behavior works for:
445
//
446
// 1) All standard STL containers including `std::vector`, `std::list`,
447
//    `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap`
448
// 2) `std::pair` (which is not actually a container). See below.
449
//
450
// Example:
451
//
452
//   // The results are returned as `absl::string_view` objects. Note that we
453
//   // have to ensure that the input string outlives any results.
454
//   std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
455
//
456
//   // Stores results in a std::set<std::string>, which also performs
457
//   // de-duplication and orders the elements in ascending order.
458
//   std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ',');
459
//   // a[0] == "a", a[1] == "b", a[2] == "c"
460
//
461
//   // `StrSplit()` can be used within a range-based for loop, in which case
462
//   // each element will be of type `absl::string_view`.
463
//   std::vector<std::string> v;
464
//   for (const auto sv : absl::StrSplit("a,b,c", ',')) {
465
//     if (sv != "b") v.emplace_back(sv);
466
//   }
467
//   // v[0] == "a", v[1] == "c"
468
//
469
//   // Stores results in a map. The map implementation assumes that the input
470
//   // is provided as a series of key/value pairs. For example, the 0th element
471
//   // resulting from the split will be stored as a key to the 1st element. If
472
//   // an odd number of elements are resolved, the last element is paired with
473
//   // a default-constructed value (e.g., empty string).
474
//   std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ',');
475
//   // m["a"] == "b", m["c"] == ""     // last component value equals ""
476
//
477
// Splitting to `std::pair` is an interesting case because it can hold only two
478
// elements and is not a collection type. When splitting to a `std::pair` the
479
// first two split strings become the `std::pair` `.first` and `.second`
480
// members, respectively. The remaining split substrings are discarded. If there
481
// are less than two split substrings, the empty string is used for the
482
// corresponding `std::pair` member.
483
//
484
// Example:
485
//
486
//   // Stores first two split strings as the members in a std::pair.
487
//   std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
488
//   // p.first == "a", p.second == "b"       // "c" is omitted.
489
//
490
// The `StrSplit()` function can be used multiple times to perform more
491
// complicated splitting logic, such as intelligently parsing key-value pairs.
492
//
493
// Example:
494
//
495
//   // The input string "a=b=c,d=e,f=,g" becomes
496
//   // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" }
497
//   std::map<std::string, std::string> m;
498
//   for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
499
//     m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
500
//   }
501
//   EXPECT_EQ("b=c", m.find("a")->second);
502
//   EXPECT_EQ("e", m.find("d")->second);
503
//   EXPECT_EQ("", m.find("f")->second);
504
//   EXPECT_EQ("", m.find("g")->second);
505
//
506
// WARNING: Due to a legacy bug that is maintained for backward compatibility,
507
// splitting the following empty string_views produces different results:
508
//
509
//   absl::StrSplit(absl::string_view(""), '-');  // {""}
510
//   absl::StrSplit(absl::string_view(), '-');    // {}, but should be {""}
511
//
512
// Try not to depend on this distinction because the bug may one day be fixed.
513
template <typename Delimiter>
514
strings_internal::Splitter<
515
    typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty,
516
    absl::string_view>
517
1.24k
StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) {
518
1.24k
  using DelimiterType =
519
1.24k
      typename strings_internal::SelectDelimiter<Delimiter>::type;
520
1.24k
  return strings_internal::Splitter<DelimiterType, AllowEmpty,
521
1.24k
                                    absl::string_view>(
522
1.24k
      text.value(), DelimiterType(d), AllowEmpty());
523
1.24k
}
524
525
template <typename Delimiter, typename StringType,
526
          EnableSplitIfString<StringType> = 0>
527
strings_internal::Splitter<
528
    typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty,
529
    std::string>
530
StrSplit(StringType&& text, Delimiter d) {
531
  using DelimiterType =
532
      typename strings_internal::SelectDelimiter<Delimiter>::type;
533
  return strings_internal::Splitter<DelimiterType, AllowEmpty, std::string>(
534
      std::move(text), DelimiterType(d), AllowEmpty());
535
}
536
537
template <typename Delimiter, typename Predicate>
538
strings_internal::Splitter<
539
    typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate,
540
    absl::string_view>
541
StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d,
542
         Predicate p) {
543
  using DelimiterType =
544
      typename strings_internal::SelectDelimiter<Delimiter>::type;
545
  return strings_internal::Splitter<DelimiterType, Predicate,
546
                                    absl::string_view>(
547
      text.value(), DelimiterType(std::move(d)), std::move(p));
548
}
549
550
template <typename Delimiter, typename Predicate, typename StringType,
551
          EnableSplitIfString<StringType> = 0>
552
strings_internal::Splitter<
553
    typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate,
554
    std::string>
555
StrSplit(StringType&& text, Delimiter d, Predicate p) {
556
  using DelimiterType =
557
      typename strings_internal::SelectDelimiter<Delimiter>::type;
558
  return strings_internal::Splitter<DelimiterType, Predicate, std::string>(
559
      std::move(text), DelimiterType(d), std::move(p));
560
}
561
562
ABSL_NAMESPACE_END
563
}  // namespace absl
564
565
#endif  // ABSL_STRINGS_STR_SPLIT_H_