Coverage Report

Created: 2025-07-12 07:18

/rust/registry/src/index.crates.io-6f17d22bba15001f/rayon-1.10.0/src/str.rs
Line
Count
Source (jump to first uncovered line)
1
//! Parallel iterator types for [strings][std::str]
2
//!
3
//! You will rarely need to interact with this module directly unless you need
4
//! to name one of the iterator types.
5
//!
6
//! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`]
7
//! reference a `Pattern` trait which is not visible outside this crate.
8
//! This trait is intentionally kept private, for use only by Rayon itself.
9
//! It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
10
//! and any function or closure `F: Fn(char) -> bool + Sync + Send`.
11
//!
12
//! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split
13
//! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator
14
//!
15
//! [std::str]: https://doc.rust-lang.org/stable/std/str/
16
17
use crate::iter::plumbing::*;
18
use crate::iter::*;
19
use crate::split_producer::*;
20
21
/// Test if a byte is the start of a UTF-8 character.
22
/// (extracted from `str::is_char_boundary`)
23
#[inline]
24
0
fn is_char_boundary(b: u8) -> bool {
25
0
    // This is bit magic equivalent to: b < 128 || b >= 192
26
0
    (b as i8) >= -0x40
27
0
}
28
29
/// Find the index of a character boundary near the midpoint.
30
#[inline]
31
0
fn find_char_midpoint(chars: &str) -> usize {
32
0
    let mid = chars.len() / 2;
33
0
34
0
    // We want to split near the midpoint, but we need to find an actual
35
0
    // character boundary.  So we look at the raw bytes, first scanning
36
0
    // forward from the midpoint for a boundary, then trying backward.
37
0
    let (left, right) = chars.as_bytes().split_at(mid);
38
0
    match right.iter().copied().position(is_char_boundary) {
39
0
        Some(i) => mid + i,
40
0
        None => left
41
0
            .iter()
42
0
            .copied()
43
0
            .rposition(is_char_boundary)
44
0
            .unwrap_or(0),
45
    }
46
0
}
47
48
/// Try to split a string near the midpoint.
49
#[inline]
50
0
fn split(chars: &str) -> Option<(&str, &str)> {
51
0
    let index = find_char_midpoint(chars);
52
0
    if index > 0 {
53
0
        Some(chars.split_at(index))
54
    } else {
55
0
        None
56
    }
57
0
}
58
59
/// Parallel extensions for strings.
60
pub trait ParallelString {
61
    /// Returns a plain string slice, which is used to implement the rest of
62
    /// the parallel methods.
63
    fn as_parallel_string(&self) -> &str;
64
65
    /// Returns a parallel iterator over the characters of a string.
66
    ///
67
    /// # Examples
68
    ///
69
    /// ```
70
    /// use rayon::prelude::*;
71
    /// let max = "hello".par_chars().max_by_key(|c| *c as i32);
72
    /// assert_eq!(Some('o'), max);
73
    /// ```
74
0
    fn par_chars(&self) -> Chars<'_> {
75
0
        Chars {
76
0
            chars: self.as_parallel_string(),
77
0
        }
78
0
    }
79
80
    /// Returns a parallel iterator over the characters of a string, with their positions.
81
    ///
82
    /// # Examples
83
    ///
84
    /// ```
85
    /// use rayon::prelude::*;
86
    /// let min = "hello".par_char_indices().min_by_key(|&(_i, c)| c as i32);
87
    /// assert_eq!(Some((1, 'e')), min);
88
    /// ```
89
0
    fn par_char_indices(&self) -> CharIndices<'_> {
90
0
        CharIndices {
91
0
            chars: self.as_parallel_string(),
92
0
        }
93
0
    }
94
95
    /// Returns a parallel iterator over the bytes of a string.
96
    ///
97
    /// Note that multi-byte sequences (for code points greater than `U+007F`)
98
    /// are produced as separate items, but will not be split across threads.
99
    /// If you would prefer an indexed iterator without that guarantee, consider
100
    /// `string.as_bytes().par_iter().copied()` instead.
101
    ///
102
    /// # Examples
103
    ///
104
    /// ```
105
    /// use rayon::prelude::*;
106
    /// let max = "hello".par_bytes().max();
107
    /// assert_eq!(Some(b'o'), max);
108
    /// ```
109
0
    fn par_bytes(&self) -> Bytes<'_> {
110
0
        Bytes {
111
0
            chars: self.as_parallel_string(),
112
0
        }
113
0
    }
114
115
    /// Returns a parallel iterator over a string encoded as UTF-16.
116
    ///
117
    /// Note that surrogate pairs (for code points greater than `U+FFFF`) are
118
    /// produced as separate items, but will not be split across threads.
119
    ///
120
    /// # Examples
121
    ///
122
    /// ```
123
    /// use rayon::prelude::*;
124
    ///
125
    /// let max = "hello".par_encode_utf16().max();
126
    /// assert_eq!(Some(b'o' as u16), max);
127
    ///
128
    /// let text = "Zażółć gęślą jaźń";
129
    /// let utf8_len = text.len();
130
    /// let utf16_len = text.par_encode_utf16().count();
131
    /// assert!(utf16_len <= utf8_len);
132
    /// ```
133
0
    fn par_encode_utf16(&self) -> EncodeUtf16<'_> {
134
0
        EncodeUtf16 {
135
0
            chars: self.as_parallel_string(),
136
0
        }
137
0
    }
138
139
    /// Returns a parallel iterator over substrings separated by a
140
    /// given character or predicate, similar to `str::split`.
141
    ///
142
    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
143
    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
144
    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
145
    ///
146
    /// # Examples
147
    ///
148
    /// ```
149
    /// use rayon::prelude::*;
150
    /// let total = "1, 2, buckle, 3, 4, door"
151
    ///    .par_split(',')
152
    ///    .filter_map(|s| s.trim().parse::<i32>().ok())
153
    ///    .sum();
154
    /// assert_eq!(10, total);
155
    /// ```
156
0
    fn par_split<P: Pattern>(&self, separator: P) -> Split<'_, P> {
157
0
        Split::new(self.as_parallel_string(), separator)
158
0
    }
159
160
    /// Returns a parallel iterator over substrings separated by a
161
    /// given character or predicate, keeping the matched part as a terminator
162
    /// of the substring similar to `str::split_inclusive`.
163
    ///
164
    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
165
    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
166
    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
167
    ///
168
    /// # Examples
169
    ///
170
    /// ```
171
    /// use rayon::prelude::*;
172
    /// let lines: Vec<_> = "Mary had a little lamb\nlittle lamb\nlittle lamb."
173
    ///    .par_split_inclusive('\n')
174
    ///    .collect();
175
    /// assert_eq!(lines, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]);
176
    /// ```
177
0
    fn par_split_inclusive<P: Pattern>(&self, separator: P) -> SplitInclusive<'_, P> {
178
0
        SplitInclusive::new(self.as_parallel_string(), separator)
179
0
    }
180
181
    /// Returns a parallel iterator over substrings terminated by a
182
    /// given character or predicate, similar to `str::split_terminator`.
183
    /// It's equivalent to `par_split`, except it doesn't produce an empty
184
    /// substring after a trailing terminator.
185
    ///
186
    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
187
    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
188
    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
189
    ///
190
    /// # Examples
191
    ///
192
    /// ```
193
    /// use rayon::prelude::*;
194
    /// let parts: Vec<_> = "((1 + 3) * 2)"
195
    ///     .par_split_terminator(|c| c == '(' || c == ')')
196
    ///     .collect();
197
    /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts);
198
    /// ```
199
0
    fn par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P> {
200
0
        SplitTerminator::new(self.as_parallel_string(), terminator)
201
0
    }
202
203
    /// Returns a parallel iterator over the lines of a string, ending with an
204
    /// optional carriage return and with a newline (`\r\n` or just `\n`).
205
    /// The final line ending is optional, and line endings are not included in
206
    /// the output strings.
207
    ///
208
    /// # Examples
209
    ///
210
    /// ```
211
    /// use rayon::prelude::*;
212
    /// let lengths: Vec<_> = "hello world\nfizbuzz"
213
    ///     .par_lines()
214
    ///     .map(|l| l.len())
215
    ///     .collect();
216
    /// assert_eq!(vec![11, 7], lengths);
217
    /// ```
218
0
    fn par_lines(&self) -> Lines<'_> {
219
0
        Lines(self.as_parallel_string())
220
0
    }
221
222
    /// Returns a parallel iterator over the sub-slices of a string that are
223
    /// separated by any amount of whitespace.
224
    ///
225
    /// As with `str::split_whitespace`, 'whitespace' is defined according to
226
    /// the terms of the Unicode Derived Core Property `White_Space`.
227
    /// If you only want to split on ASCII whitespace instead, use
228
    /// [`par_split_ascii_whitespace`][`ParallelString::par_split_ascii_whitespace`].
229
    ///
230
    /// # Examples
231
    ///
232
    /// ```
233
    /// use rayon::prelude::*;
234
    /// let longest = "which is the longest word?"
235
    ///     .par_split_whitespace()
236
    ///     .max_by_key(|word| word.len());
237
    /// assert_eq!(Some("longest"), longest);
238
    /// ```
239
    ///
240
    /// All kinds of whitespace are considered:
241
    ///
242
    /// ```
243
    /// use rayon::prelude::*;
244
    /// let words: Vec<&str> = " Mary   had\ta\u{2009}little  \n\t lamb"
245
    ///     .par_split_whitespace()
246
    ///     .collect();
247
    /// assert_eq!(words, ["Mary", "had", "a", "little", "lamb"]);
248
    /// ```
249
    ///
250
    /// If the string is empty or all whitespace, the iterator yields no string slices:
251
    ///
252
    /// ```
253
    /// use rayon::prelude::*;
254
    /// assert_eq!("".par_split_whitespace().count(), 0);
255
    /// assert_eq!("   ".par_split_whitespace().count(), 0);
256
    /// ```
257
0
    fn par_split_whitespace(&self) -> SplitWhitespace<'_> {
258
0
        SplitWhitespace(self.as_parallel_string())
259
0
    }
260
261
    /// Returns a parallel iterator over the sub-slices of a string that are
262
    /// separated by any amount of ASCII whitespace.
263
    ///
264
    /// To split by Unicode `White_Space` instead, use
265
    /// [`par_split_whitespace`][`ParallelString::par_split_whitespace`].
266
    ///
267
    /// # Examples
268
    ///
269
    /// ```
270
    /// use rayon::prelude::*;
271
    /// let longest = "which is the longest word?"
272
    ///     .par_split_ascii_whitespace()
273
    ///     .max_by_key(|word| word.len());
274
    /// assert_eq!(Some("longest"), longest);
275
    /// ```
276
    ///
277
    /// All kinds of ASCII whitespace are considered, but not Unicode `White_Space`:
278
    ///
279
    /// ```
280
    /// use rayon::prelude::*;
281
    /// let words: Vec<&str> = " Mary   had\ta\u{2009}little  \n\t lamb"
282
    ///     .par_split_ascii_whitespace()
283
    ///     .collect();
284
    /// assert_eq!(words, ["Mary", "had", "a\u{2009}little", "lamb"]);
285
    /// ```
286
    ///
287
    /// If the string is empty or all ASCII whitespace, the iterator yields no string slices:
288
    ///
289
    /// ```
290
    /// use rayon::prelude::*;
291
    /// assert_eq!("".par_split_whitespace().count(), 0);
292
    /// assert_eq!("   ".par_split_whitespace().count(), 0);
293
    /// ```
294
0
    fn par_split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
295
0
        SplitAsciiWhitespace(self.as_parallel_string())
296
0
    }
297
298
    /// Returns a parallel iterator over substrings that match a
299
    /// given character or predicate, similar to `str::matches`.
300
    ///
301
    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
302
    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
303
    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
304
    ///
305
    /// # Examples
306
    ///
307
    /// ```
308
    /// use rayon::prelude::*;
309
    /// let total = "1, 2, buckle, 3, 4, door"
310
    ///    .par_matches(char::is_numeric)
311
    ///    .map(|s| s.parse::<i32>().expect("digit"))
312
    ///    .sum();
313
    /// assert_eq!(10, total);
314
    /// ```
315
0
    fn par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P> {
316
0
        Matches {
317
0
            chars: self.as_parallel_string(),
318
0
            pattern,
319
0
        }
320
0
    }
321
322
    /// Returns a parallel iterator over substrings that match a given character
323
    /// or predicate, with their positions, similar to `str::match_indices`.
324
    ///
325
    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
326
    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
327
    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
328
    ///
329
    /// # Examples
330
    ///
331
    /// ```
332
    /// use rayon::prelude::*;
333
    /// let digits: Vec<_> = "1, 2, buckle, 3, 4, door"
334
    ///    .par_match_indices(char::is_numeric)
335
    ///    .collect();
336
    /// assert_eq!(digits, vec![(0, "1"), (3, "2"), (14, "3"), (17, "4")]);
337
    /// ```
338
0
    fn par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P> {
339
0
        MatchIndices {
340
0
            chars: self.as_parallel_string(),
341
0
            pattern,
342
0
        }
343
0
    }
344
}
345
346
impl ParallelString for str {
347
    #[inline]
348
0
    fn as_parallel_string(&self) -> &str {
349
0
        self
350
0
    }
351
}
352
353
// /////////////////////////////////////////////////////////////////////////
354
355
/// We hide the `Pattern` trait in a private module, as its API is not meant
356
/// for general consumption.  If we could have privacy on trait items, then it
357
/// would be nicer to have its basic existence and implementors public while
358
/// keeping all of the methods private.
359
mod private {
360
    use crate::iter::plumbing::Folder;
361
362
    /// Pattern-matching trait for `ParallelString`, somewhat like a mix of
363
    /// `std::str::pattern::{Pattern, Searcher}`.
364
    ///
365
    /// Implementing this trait is not permitted outside of `rayon`.
366
    pub trait Pattern: Sized + Sync + Send {
367
        private_decl! {}
368
        fn find_in(&self, haystack: &str) -> Option<usize>;
369
        fn rfind_in(&self, haystack: &str) -> Option<usize>;
370
        fn is_suffix_of(&self, haystack: &str) -> bool;
371
        fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F
372
        where
373
            F: Folder<&'ch str>;
374
        fn fold_inclusive_splits<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
375
        where
376
            F: Folder<&'ch str>;
377
        fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
378
        where
379
            F: Folder<&'ch str>;
380
        fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F
381
        where
382
            F: Folder<(usize, &'ch str)>;
383
    }
384
}
385
use self::private::Pattern;
386
387
#[inline]
388
0
fn offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T) {
389
0
    move |(i, x)| (base + i, x)
390
0
}
391
392
macro_rules! impl_pattern {
393
    (&$self:ident => $pattern:expr) => {
394
        private_impl! {}
395
396
        #[inline]
397
0
        fn find_in(&$self, chars: &str) -> Option<usize> {
398
0
            chars.find($pattern)
399
0
        }
Unexecuted instantiation: <char as rayon::str::private::Pattern>::find_in
Unexecuted instantiation: <&[char] as rayon::str::private::Pattern>::find_in
Unexecuted instantiation: <[char; _] as rayon::str::private::Pattern>::find_in
Unexecuted instantiation: <&[char; _] as rayon::str::private::Pattern>::find_in
Unexecuted instantiation: <_ as rayon::str::private::Pattern>::find_in
400
401
        #[inline]
402
0
        fn rfind_in(&$self, chars: &str) -> Option<usize> {
403
0
            chars.rfind($pattern)
404
0
        }
Unexecuted instantiation: <char as rayon::str::private::Pattern>::rfind_in
Unexecuted instantiation: <&[char] as rayon::str::private::Pattern>::rfind_in
Unexecuted instantiation: <[char; _] as rayon::str::private::Pattern>::rfind_in
Unexecuted instantiation: <&[char; _] as rayon::str::private::Pattern>::rfind_in
Unexecuted instantiation: <_ as rayon::str::private::Pattern>::rfind_in
405
406
        #[inline]
407
0
        fn is_suffix_of(&$self, chars: &str) -> bool {
408
0
            chars.ends_with($pattern)
409
0
        }
Unexecuted instantiation: <char as rayon::str::private::Pattern>::is_suffix_of
Unexecuted instantiation: <&[char] as rayon::str::private::Pattern>::is_suffix_of
Unexecuted instantiation: <[char; _] as rayon::str::private::Pattern>::is_suffix_of
Unexecuted instantiation: <&[char; _] as rayon::str::private::Pattern>::is_suffix_of
Unexecuted instantiation: <_ as rayon::str::private::Pattern>::is_suffix_of
410
411
0
        fn fold_splits<'ch, F>(&$self, chars: &'ch str, folder: F, skip_last: bool) -> F
412
0
        where
413
0
            F: Folder<&'ch str>,
414
0
        {
415
0
            let mut split = chars.split($pattern);
416
0
            if skip_last {
417
0
                split.next_back();
418
0
            }
419
0
            folder.consume_iter(split)
420
0
        }
Unexecuted instantiation: <char as rayon::str::private::Pattern>::fold_splits::<_>
Unexecuted instantiation: <&[char] as rayon::str::private::Pattern>::fold_splits::<_>
Unexecuted instantiation: <[char; _] as rayon::str::private::Pattern>::fold_splits::<_>
Unexecuted instantiation: <&[char; _] as rayon::str::private::Pattern>::fold_splits::<_>
Unexecuted instantiation: <_ as rayon::str::private::Pattern>::fold_splits::<_>
421
422
0
        fn fold_inclusive_splits<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
423
0
        where
424
0
            F: Folder<&'ch str>,
425
0
        {
426
0
            folder.consume_iter(chars.split_inclusive($pattern))
427
0
        }
Unexecuted instantiation: <char as rayon::str::private::Pattern>::fold_inclusive_splits::<_>
Unexecuted instantiation: <&[char] as rayon::str::private::Pattern>::fold_inclusive_splits::<_>
Unexecuted instantiation: <[char; _] as rayon::str::private::Pattern>::fold_inclusive_splits::<_>
Unexecuted instantiation: <&[char; _] as rayon::str::private::Pattern>::fold_inclusive_splits::<_>
Unexecuted instantiation: <_ as rayon::str::private::Pattern>::fold_inclusive_splits::<_>
428
429
0
        fn fold_matches<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
430
0
        where
431
0
            F: Folder<&'ch str>,
432
0
        {
433
0
            folder.consume_iter(chars.matches($pattern))
434
0
        }
Unexecuted instantiation: <char as rayon::str::private::Pattern>::fold_matches::<_>
Unexecuted instantiation: <&[char] as rayon::str::private::Pattern>::fold_matches::<_>
Unexecuted instantiation: <[char; _] as rayon::str::private::Pattern>::fold_matches::<_>
Unexecuted instantiation: <&[char; _] as rayon::str::private::Pattern>::fold_matches::<_>
Unexecuted instantiation: <_ as rayon::str::private::Pattern>::fold_matches::<_>
435
436
0
        fn fold_match_indices<'ch, F>(&$self, chars: &'ch str, folder: F, base: usize) -> F
437
0
        where
438
0
            F: Folder<(usize, &'ch str)>,
439
0
        {
440
0
            folder.consume_iter(chars.match_indices($pattern).map(offset(base)))
441
0
        }
Unexecuted instantiation: <char as rayon::str::private::Pattern>::fold_match_indices::<_>
Unexecuted instantiation: <&[char] as rayon::str::private::Pattern>::fold_match_indices::<_>
Unexecuted instantiation: <[char; _] as rayon::str::private::Pattern>::fold_match_indices::<_>
Unexecuted instantiation: <&[char; _] as rayon::str::private::Pattern>::fold_match_indices::<_>
Unexecuted instantiation: <_ as rayon::str::private::Pattern>::fold_match_indices::<_>
442
    }
443
}
444
445
impl Pattern for char {
446
    impl_pattern!(&self => *self);
447
}
448
449
impl Pattern for &[char] {
450
    impl_pattern!(&self => *self);
451
}
452
453
// TODO (MSRV 1.75): use `*self` for array patterns too.
454
// - Needs `DoubleEndedSearcher` so `split.next_back()` works.
455
456
impl<const N: usize> Pattern for [char; N] {
457
    impl_pattern!(&self => self.as_slice());
458
}
459
460
impl<const N: usize> Pattern for &[char; N] {
461
    impl_pattern!(&self => self.as_slice());
462
}
463
464
impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
465
    impl_pattern!(&self => self);
466
}
467
468
// /////////////////////////////////////////////////////////////////////////
469
470
/// Parallel iterator over the characters of a string
471
#[derive(Debug, Clone)]
472
pub struct Chars<'ch> {
473
    chars: &'ch str,
474
}
475
476
struct CharsProducer<'ch> {
477
    chars: &'ch str,
478
}
479
480
impl<'ch> ParallelIterator for Chars<'ch> {
481
    type Item = char;
482
483
0
    fn drive_unindexed<C>(self, consumer: C) -> C::Result
484
0
    where
485
0
        C: UnindexedConsumer<Self::Item>,
486
0
    {
487
0
        bridge_unindexed(CharsProducer { chars: self.chars }, consumer)
488
0
    }
489
}
490
491
impl<'ch> UnindexedProducer for CharsProducer<'ch> {
492
    type Item = char;
493
494
0
    fn split(self) -> (Self, Option<Self>) {
495
0
        match split(self.chars) {
496
0
            Some((left, right)) => (
497
0
                CharsProducer { chars: left },
498
0
                Some(CharsProducer { chars: right }),
499
0
            ),
500
0
            None => (self, None),
501
        }
502
0
    }
503
504
0
    fn fold_with<F>(self, folder: F) -> F
505
0
    where
506
0
        F: Folder<Self::Item>,
507
0
    {
508
0
        folder.consume_iter(self.chars.chars())
509
0
    }
510
}
511
512
// /////////////////////////////////////////////////////////////////////////
513
514
/// Parallel iterator over the characters of a string, with their positions
515
#[derive(Debug, Clone)]
516
pub struct CharIndices<'ch> {
517
    chars: &'ch str,
518
}
519
520
struct CharIndicesProducer<'ch> {
521
    index: usize,
522
    chars: &'ch str,
523
}
524
525
impl<'ch> ParallelIterator for CharIndices<'ch> {
526
    type Item = (usize, char);
527
528
0
    fn drive_unindexed<C>(self, consumer: C) -> C::Result
529
0
    where
530
0
        C: UnindexedConsumer<Self::Item>,
531
0
    {
532
0
        let producer = CharIndicesProducer {
533
0
            index: 0,
534
0
            chars: self.chars,
535
0
        };
536
0
        bridge_unindexed(producer, consumer)
537
0
    }
538
}
539
540
impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> {
541
    type Item = (usize, char);
542
543
0
    fn split(self) -> (Self, Option<Self>) {
544
0
        match split(self.chars) {
545
0
            Some((left, right)) => (
546
0
                CharIndicesProducer {
547
0
                    chars: left,
548
0
                    ..self
549
0
                },
550
0
                Some(CharIndicesProducer {
551
0
                    chars: right,
552
0
                    index: self.index + left.len(),
553
0
                }),
554
0
            ),
555
0
            None => (self, None),
556
        }
557
0
    }
558
559
0
    fn fold_with<F>(self, folder: F) -> F
560
0
    where
561
0
        F: Folder<Self::Item>,
562
0
    {
563
0
        let base = self.index;
564
0
        folder.consume_iter(self.chars.char_indices().map(offset(base)))
565
0
    }
566
}
567
568
// /////////////////////////////////////////////////////////////////////////
569
570
/// Parallel iterator over the bytes of a string
571
#[derive(Debug, Clone)]
572
pub struct Bytes<'ch> {
573
    chars: &'ch str,
574
}
575
576
struct BytesProducer<'ch> {
577
    chars: &'ch str,
578
}
579
580
impl<'ch> ParallelIterator for Bytes<'ch> {
581
    type Item = u8;
582
583
0
    fn drive_unindexed<C>(self, consumer: C) -> C::Result
584
0
    where
585
0
        C: UnindexedConsumer<Self::Item>,
586
0
    {
587
0
        bridge_unindexed(BytesProducer { chars: self.chars }, consumer)
588
0
    }
589
}
590
591
impl<'ch> UnindexedProducer for BytesProducer<'ch> {
592
    type Item = u8;
593
594
0
    fn split(self) -> (Self, Option<Self>) {
595
0
        match split(self.chars) {
596
0
            Some((left, right)) => (
597
0
                BytesProducer { chars: left },
598
0
                Some(BytesProducer { chars: right }),
599
0
            ),
600
0
            None => (self, None),
601
        }
602
0
    }
603
604
0
    fn fold_with<F>(self, folder: F) -> F
605
0
    where
606
0
        F: Folder<Self::Item>,
607
0
    {
608
0
        folder.consume_iter(self.chars.bytes())
609
0
    }
610
}
611
612
// /////////////////////////////////////////////////////////////////////////
613
614
/// Parallel iterator over a string encoded as UTF-16
615
#[derive(Debug, Clone)]
616
pub struct EncodeUtf16<'ch> {
617
    chars: &'ch str,
618
}
619
620
struct EncodeUtf16Producer<'ch> {
621
    chars: &'ch str,
622
}
623
624
impl<'ch> ParallelIterator for EncodeUtf16<'ch> {
625
    type Item = u16;
626
627
0
    fn drive_unindexed<C>(self, consumer: C) -> C::Result
628
0
    where
629
0
        C: UnindexedConsumer<Self::Item>,
630
0
    {
631
0
        bridge_unindexed(EncodeUtf16Producer { chars: self.chars }, consumer)
632
0
    }
633
}
634
635
impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> {
636
    type Item = u16;
637
638
0
    fn split(self) -> (Self, Option<Self>) {
639
0
        match split(self.chars) {
640
0
            Some((left, right)) => (
641
0
                EncodeUtf16Producer { chars: left },
642
0
                Some(EncodeUtf16Producer { chars: right }),
643
0
            ),
644
0
            None => (self, None),
645
        }
646
0
    }
647
648
0
    fn fold_with<F>(self, folder: F) -> F
649
0
    where
650
0
        F: Folder<Self::Item>,
651
0
    {
652
0
        folder.consume_iter(self.chars.encode_utf16())
653
0
    }
654
}
655
656
// /////////////////////////////////////////////////////////////////////////
657
658
/// Parallel iterator over substrings separated by a pattern
659
#[derive(Debug, Clone)]
660
pub struct Split<'ch, P: Pattern> {
661
    chars: &'ch str,
662
    separator: P,
663
}
664
665
impl<'ch, P: Pattern> Split<'ch, P> {
666
0
    fn new(chars: &'ch str, separator: P) -> Self {
667
0
        Split { chars, separator }
668
0
    }
669
}
670
671
impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> {
672
    type Item = &'ch str;
673
674
0
    fn drive_unindexed<C>(self, consumer: C) -> C::Result
675
0
    where
676
0
        C: UnindexedConsumer<Self::Item>,
677
0
    {
678
0
        let producer = SplitProducer::new(self.chars, &self.separator);
679
0
        bridge_unindexed(producer, consumer)
680
0
    }
681
}
682
683
/// Implement support for `SplitProducer`.
684
impl<'ch, P: Pattern> Fissile<P> for &'ch str {
685
0
    fn length(&self) -> usize {
686
0
        self.len()
687
0
    }
688
689
0
    fn midpoint(&self, end: usize) -> usize {
690
0
        // First find a suitable UTF-8 boundary.
691
0
        find_char_midpoint(&self[..end])
692
0
    }
693
694
0
    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
695
0
        separator.find_in(&self[start..end])
696
0
    }
697
698
0
    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
699
0
        separator.rfind_in(&self[..end])
700
0
    }
701
702
0
    fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
703
0
        if INCL {
704
            // include the separator in the left side
705
0
            let separator = self[index..].chars().next().unwrap();
706
0
            self.split_at(index + separator.len_utf8())
707
        } else {
708
0
            let (left, right) = self.split_at(index);
709
0
            let mut right_iter = right.chars();
710
0
            right_iter.next(); // skip the separator
711
0
            (left, right_iter.as_str())
712
        }
713
0
    }
714
715
0
    fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
716
0
    where
717
0
        F: Folder<Self>,
718
0
    {
719
0
        if INCL {
720
0
            debug_assert!(!skip_last);
721
0
            separator.fold_inclusive_splits(self, folder)
722
        } else {
723
0
            separator.fold_splits(self, folder, skip_last)
724
        }
725
0
    }
726
}
727
728
// /////////////////////////////////////////////////////////////////////////
729
730
/// Parallel iterator over substrings separated by a pattern
731
#[derive(Debug, Clone)]
732
pub struct SplitInclusive<'ch, P: Pattern> {
733
    chars: &'ch str,
734
    separator: P,
735
}
736
737
impl<'ch, P: Pattern> SplitInclusive<'ch, P> {
738
0
    fn new(chars: &'ch str, separator: P) -> Self {
739
0
        SplitInclusive { chars, separator }
740
0
    }
741
}
742
743
impl<'ch, P: Pattern> ParallelIterator for SplitInclusive<'ch, P> {
744
    type Item = &'ch str;
745
746
0
    fn drive_unindexed<C>(self, consumer: C) -> C::Result
747
0
    where
748
0
        C: UnindexedConsumer<Self::Item>,
749
0
    {
750
0
        let producer = SplitInclusiveProducer::new_incl(self.chars, &self.separator);
751
0
        bridge_unindexed(producer, consumer)
752
0
    }
753
}
754
755
// /////////////////////////////////////////////////////////////////////////
756
757
/// Parallel iterator over substrings separated by a terminator pattern
758
#[derive(Debug, Clone)]
759
pub struct SplitTerminator<'ch, P: Pattern> {
760
    chars: &'ch str,
761
    terminator: P,
762
}
763
764
struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> {
765
    splitter: SplitProducer<'sep, P, &'ch str>,
766
    skip_last: bool,
767
}
768
769
impl<'ch, P: Pattern> SplitTerminator<'ch, P> {
770
0
    fn new(chars: &'ch str, terminator: P) -> Self {
771
0
        SplitTerminator { chars, terminator }
772
0
    }
773
}
774
775
impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> {
776
0
    fn new(chars: &'ch str, terminator: &'sep P) -> Self {
777
0
        SplitTerminatorProducer {
778
0
            splitter: SplitProducer::new(chars, terminator),
779
0
            skip_last: chars.is_empty() || terminator.is_suffix_of(chars),
780
        }
781
0
    }
782
}
783
784
impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> {
785
    type Item = &'ch str;
786
787
0
    fn drive_unindexed<C>(self, consumer: C) -> C::Result
788
0
    where
789
0
        C: UnindexedConsumer<Self::Item>,
790
0
    {
791
0
        let producer = SplitTerminatorProducer::new(self.chars, &self.terminator);
792
0
        bridge_unindexed(producer, consumer)
793
0
    }
794
}
795
796
impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> {
797
    type Item = &'ch str;
798
799
0
    fn split(mut self) -> (Self, Option<Self>) {
800
0
        let (left, right) = self.splitter.split();
801
0
        self.splitter = left;
802
0
        let right = right.map(|right| {
803
0
            let skip_last = self.skip_last;
804
0
            self.skip_last = false;
805
0
            SplitTerminatorProducer {
806
0
                splitter: right,
807
0
                skip_last,
808
0
            }
809
0
        });
810
0
        (self, right)
811
0
    }
812
813
0
    fn fold_with<F>(self, folder: F) -> F
814
0
    where
815
0
        F: Folder<Self::Item>,
816
0
    {
817
0
        self.splitter.fold_with(folder, self.skip_last)
818
0
    }
819
}
820
821
// /////////////////////////////////////////////////////////////////////////
822
823
/// Parallel iterator over lines in a string
824
#[derive(Debug, Clone)]
825
pub struct Lines<'ch>(&'ch str);
826
827
#[inline]
828
0
fn no_carriage_return(line: &str) -> &str {
829
0
    line.strip_suffix('\r').unwrap_or(line)
830
0
}
831
832
impl<'ch> ParallelIterator for Lines<'ch> {
833
    type Item = &'ch str;
834
835
0
    fn drive_unindexed<C>(self, consumer: C) -> C::Result
836
0
    where
837
0
        C: UnindexedConsumer<Self::Item>,
838
0
    {
839
0
        self.0
840
0
            .par_split_terminator('\n')
841
0
            .map(no_carriage_return)
842
0
            .drive_unindexed(consumer)
843
0
    }
844
}
845
846
// /////////////////////////////////////////////////////////////////////////
847
848
/// Parallel iterator over substrings separated by whitespace
849
#[derive(Debug, Clone)]
850
pub struct SplitWhitespace<'ch>(&'ch str);
851
852
#[inline]
853
0
fn not_empty(s: &&str) -> bool {
854
0
    !s.is_empty()
855
0
}
856
857
impl<'ch> ParallelIterator for SplitWhitespace<'ch> {
858
    type Item = &'ch str;
859
860
0
    fn drive_unindexed<C>(self, consumer: C) -> C::Result
861
0
    where
862
0
        C: UnindexedConsumer<Self::Item>,
863
0
    {
864
0
        self.0
865
0
            .par_split(char::is_whitespace)
866
0
            .filter(not_empty)
867
0
            .drive_unindexed(consumer)
868
0
    }
869
}
870
871
// /////////////////////////////////////////////////////////////////////////
872
873
/// Parallel iterator over substrings separated by ASCII whitespace
874
#[derive(Debug, Clone)]
875
pub struct SplitAsciiWhitespace<'ch>(&'ch str);
876
877
#[inline]
878
0
fn is_ascii_whitespace(c: char) -> bool {
879
0
    c.is_ascii_whitespace()
880
0
}
881
882
impl<'ch> ParallelIterator for SplitAsciiWhitespace<'ch> {
883
    type Item = &'ch str;
884
885
0
    fn drive_unindexed<C>(self, consumer: C) -> C::Result
886
0
    where
887
0
        C: UnindexedConsumer<Self::Item>,
888
0
    {
889
0
        self.0
890
0
            .par_split(is_ascii_whitespace)
891
0
            .filter(not_empty)
892
0
            .drive_unindexed(consumer)
893
0
    }
894
}
895
896
// /////////////////////////////////////////////////////////////////////////
897
898
/// Parallel iterator over substrings that match a pattern
899
#[derive(Debug, Clone)]
900
pub struct Matches<'ch, P: Pattern> {
901
    chars: &'ch str,
902
    pattern: P,
903
}
904
905
struct MatchesProducer<'ch, 'pat, P: Pattern> {
906
    chars: &'ch str,
907
    pattern: &'pat P,
908
}
909
910
impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> {
911
    type Item = &'ch str;
912
913
0
    fn drive_unindexed<C>(self, consumer: C) -> C::Result
914
0
    where
915
0
        C: UnindexedConsumer<Self::Item>,
916
0
    {
917
0
        let producer = MatchesProducer {
918
0
            chars: self.chars,
919
0
            pattern: &self.pattern,
920
0
        };
921
0
        bridge_unindexed(producer, consumer)
922
0
    }
923
}
924
925
impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> {
926
    type Item = &'ch str;
927
928
0
    fn split(self) -> (Self, Option<Self>) {
929
0
        match split(self.chars) {
930
0
            Some((left, right)) => (
931
0
                MatchesProducer {
932
0
                    chars: left,
933
0
                    ..self
934
0
                },
935
0
                Some(MatchesProducer {
936
0
                    chars: right,
937
0
                    ..self
938
0
                }),
939
0
            ),
940
0
            None => (self, None),
941
        }
942
0
    }
943
944
0
    fn fold_with<F>(self, folder: F) -> F
945
0
    where
946
0
        F: Folder<Self::Item>,
947
0
    {
948
0
        self.pattern.fold_matches(self.chars, folder)
949
0
    }
950
}
951
952
// /////////////////////////////////////////////////////////////////////////
953
954
/// Parallel iterator over substrings that match a pattern, with their positions
955
#[derive(Debug, Clone)]
956
pub struct MatchIndices<'ch, P: Pattern> {
957
    chars: &'ch str,
958
    pattern: P,
959
}
960
961
struct MatchIndicesProducer<'ch, 'pat, P: Pattern> {
962
    index: usize,
963
    chars: &'ch str,
964
    pattern: &'pat P,
965
}
966
967
impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> {
968
    type Item = (usize, &'ch str);
969
970
0
    fn drive_unindexed<C>(self, consumer: C) -> C::Result
971
0
    where
972
0
        C: UnindexedConsumer<Self::Item>,
973
0
    {
974
0
        let producer = MatchIndicesProducer {
975
0
            index: 0,
976
0
            chars: self.chars,
977
0
            pattern: &self.pattern,
978
0
        };
979
0
        bridge_unindexed(producer, consumer)
980
0
    }
981
}
982
983
impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> {
984
    type Item = (usize, &'ch str);
985
986
0
    fn split(self) -> (Self, Option<Self>) {
987
0
        match split(self.chars) {
988
0
            Some((left, right)) => (
989
0
                MatchIndicesProducer {
990
0
                    chars: left,
991
0
                    ..self
992
0
                },
993
0
                Some(MatchIndicesProducer {
994
0
                    chars: right,
995
0
                    index: self.index + left.len(),
996
0
                    ..self
997
0
                }),
998
0
            ),
999
0
            None => (self, None),
1000
        }
1001
0
    }
1002
1003
0
    fn fold_with<F>(self, folder: F) -> F
1004
0
    where
1005
0
        F: Folder<Self::Item>,
1006
0
    {
1007
0
        self.pattern
1008
0
            .fold_match_indices(self.chars, folder, self.index)
1009
0
    }
1010
}