Coverage Report

Created: 2025-07-11 07:04

/src/unicode-segmentation/src/grapheme.rs
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2
// file at the top-level directory of this distribution and at
3
// http://rust-lang.org/COPYRIGHT.
4
//
5
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8
// option. This file may not be copied, modified, or distributed
9
// except according to those terms.
10
11
use core::cmp;
12
13
use crate::tables::grapheme::GraphemeCat;
14
15
/// External iterator for grapheme clusters and byte offsets.
16
///
17
/// This struct is created by the [`grapheme_indices`] method on the [`UnicodeSegmentation`]
18
/// trait. See its documentation for more.
19
///
20
/// [`grapheme_indices`]: trait.UnicodeSegmentation.html#tymethod.grapheme_indices
21
/// [`UnicodeSegmentation`]: trait.UnicodeSegmentation.html
22
#[derive(Debug, Clone)]
23
pub struct GraphemeIndices<'a> {
24
    start_offset: usize,
25
    iter: Graphemes<'a>,
26
}
27
28
impl<'a> GraphemeIndices<'a> {
29
    #[inline]
30
    /// View the underlying data (the part yet to be iterated) as a slice of the original string.
31
    ///
32
    /// ```rust
33
    /// # use unicode_segmentation::UnicodeSegmentation;
34
    /// let mut iter = "abc".grapheme_indices(true);
35
    /// assert_eq!(iter.as_str(), "abc");
36
    /// iter.next();
37
    /// assert_eq!(iter.as_str(), "bc");
38
    /// iter.next();
39
    /// iter.next();
40
    /// assert_eq!(iter.as_str(), "");
41
    /// ```
42
0
    pub fn as_str(&self) -> &'a str {
43
0
        self.iter.as_str()
44
0
    }
45
}
46
47
impl<'a> Iterator for GraphemeIndices<'a> {
48
    type Item = (usize, &'a str);
49
50
    #[inline]
51
0
    fn next(&mut self) -> Option<(usize, &'a str)> {
52
0
        self.iter
53
0
            .next()
54
0
            .map(|s| (s.as_ptr() as usize - self.start_offset, s))
55
0
    }
56
57
    #[inline]
58
0
    fn size_hint(&self) -> (usize, Option<usize>) {
59
0
        self.iter.size_hint()
60
0
    }
61
}
62
63
impl<'a> DoubleEndedIterator for GraphemeIndices<'a> {
64
    #[inline]
65
0
    fn next_back(&mut self) -> Option<(usize, &'a str)> {
66
0
        self.iter
67
0
            .next_back()
68
0
            .map(|s| (s.as_ptr() as usize - self.start_offset, s))
69
0
    }
70
}
71
72
/// External iterator for a string's
73
/// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries).
74
///
75
/// This struct is created by the [`graphemes`] method on the [`UnicodeSegmentation`] trait. See its
76
/// documentation for more.
77
///
78
/// [`graphemes`]: trait.UnicodeSegmentation.html#tymethod.graphemes
79
/// [`UnicodeSegmentation`]: trait.UnicodeSegmentation.html
80
#[derive(Clone, Debug)]
81
pub struct Graphemes<'a> {
82
    string: &'a str,
83
    cursor: GraphemeCursor,
84
    cursor_back: GraphemeCursor,
85
}
86
87
impl<'a> Graphemes<'a> {
88
    #[inline]
89
    /// View the underlying data (the part yet to be iterated) as a slice of the original string.
90
    ///
91
    /// ```rust
92
    /// # use unicode_segmentation::UnicodeSegmentation;
93
    /// let mut iter = "abc".graphemes(true);
94
    /// assert_eq!(iter.as_str(), "abc");
95
    /// iter.next();
96
    /// assert_eq!(iter.as_str(), "bc");
97
    /// iter.next();
98
    /// iter.next();
99
    /// assert_eq!(iter.as_str(), "");
100
    /// ```
101
0
    pub fn as_str(&self) -> &'a str {
102
0
        &self.string[self.cursor.cur_cursor()..self.cursor_back.cur_cursor()]
103
0
    }
104
}
105
106
impl<'a> Iterator for Graphemes<'a> {
107
    type Item = &'a str;
108
109
    #[inline]
110
6.30k
    fn size_hint(&self) -> (usize, Option<usize>) {
111
6.30k
        let slen = self.cursor_back.cur_cursor() - self.cursor.cur_cursor();
112
6.30k
        (cmp::min(slen, 1), Some(slen))
113
6.30k
    }
<unicode_segmentation::grapheme::Graphemes as core::iter::traits::iterator::Iterator>::size_hint
Line
Count
Source
110
6.30k
    fn size_hint(&self) -> (usize, Option<usize>) {
111
6.30k
        let slen = self.cursor_back.cur_cursor() - self.cursor.cur_cursor();
112
6.30k
        (cmp::min(slen, 1), Some(slen))
113
6.30k
    }
Unexecuted instantiation: <unicode_segmentation::grapheme::Graphemes as core::iter::traits::iterator::Iterator>::size_hint
114
115
    #[inline]
116
24.0M
    fn next(&mut self) -> Option<&'a str> {
117
24.0M
        let start = self.cursor.cur_cursor();
118
24.0M
        if start == self.cursor_back.cur_cursor() {
119
2.11k
            return None;
120
24.0M
        }
121
24.0M
        let next = self.cursor.next_boundary(self.string, 0).unwrap().unwrap();
122
24.0M
        Some(&self.string[start..next])
123
24.0M
    }
<unicode_segmentation::grapheme::Graphemes as core::iter::traits::iterator::Iterator>::next
Line
Count
Source
116
24.0M
    fn next(&mut self) -> Option<&'a str> {
117
24.0M
        let start = self.cursor.cur_cursor();
118
24.0M
        if start == self.cursor_back.cur_cursor() {
119
2.11k
            return None;
120
24.0M
        }
121
24.0M
        let next = self.cursor.next_boundary(self.string, 0).unwrap().unwrap();
122
24.0M
        Some(&self.string[start..next])
123
24.0M
    }
Unexecuted instantiation: <unicode_segmentation::grapheme::Graphemes as core::iter::traits::iterator::Iterator>::next
124
}
125
126
impl<'a> DoubleEndedIterator for Graphemes<'a> {
127
    #[inline]
128
0
    fn next_back(&mut self) -> Option<&'a str> {
129
0
        let end = self.cursor_back.cur_cursor();
130
0
        if end == self.cursor.cur_cursor() {
131
0
            return None;
132
0
        }
133
0
        let prev = self
134
0
            .cursor_back
135
0
            .prev_boundary(self.string, 0)
136
0
            .unwrap()
137
0
            .unwrap();
138
0
        Some(&self.string[prev..end])
139
0
    }
140
}
141
142
#[inline]
143
2.11k
pub fn new_graphemes(s: &str, is_extended: bool) -> Graphemes<'_> {
144
2.11k
    let len = s.len();
145
2.11k
    Graphemes {
146
2.11k
        string: s,
147
2.11k
        cursor: GraphemeCursor::new(0, len, is_extended),
148
2.11k
        cursor_back: GraphemeCursor::new(len, len, is_extended),
149
2.11k
    }
150
2.11k
}
unicode_segmentation::grapheme::new_graphemes
Line
Count
Source
143
2.11k
pub fn new_graphemes(s: &str, is_extended: bool) -> Graphemes<'_> {
144
2.11k
    let len = s.len();
145
2.11k
    Graphemes {
146
2.11k
        string: s,
147
2.11k
        cursor: GraphemeCursor::new(0, len, is_extended),
148
2.11k
        cursor_back: GraphemeCursor::new(len, len, is_extended),
149
2.11k
    }
150
2.11k
}
Unexecuted instantiation: unicode_segmentation::grapheme::new_graphemes
151
152
#[inline]
153
0
pub fn new_grapheme_indices(s: &str, is_extended: bool) -> GraphemeIndices<'_> {
154
0
    GraphemeIndices {
155
0
        start_offset: s.as_ptr() as usize,
156
0
        iter: new_graphemes(s, is_extended),
157
0
    }
158
0
}
159
160
/// maybe unify with PairResult?
161
/// An enum describing information about a potential boundary.
162
#[derive(PartialEq, Eq, Clone, Debug)]
163
enum GraphemeState {
164
    /// No information is known.
165
    Unknown,
166
    /// It is known to not be a boundary.
167
    NotBreak,
168
    /// It is known to be a boundary.
169
    Break,
170
    /// The codepoint after it has Indic_Conjunct_Break=Consonant,
171
    /// so there is a break before so a boundary if it is preceded by another
172
    /// InCB=Consonant follwoed by a sequence consisting of one or more InCB=Linker
173
    /// and zero or more InCB = Extend (in any order).
174
    InCbConsonant,
175
    /// The codepoint after is a Regional Indicator Symbol, so a boundary iff
176
    /// it is preceded by an even number of RIS codepoints. (GB12, GB13)
177
    Regional,
178
    /// The codepoint after is Extended_Pictographic,
179
    /// so whether it's a boundary depends on pre-context according to GB11.
180
    Emoji,
181
}
182
183
/// Cursor-based segmenter for grapheme clusters.
184
///
185
/// This allows working with ropes and other datastructures where the string is not contiguous or
186
/// fully known at initialization time.
187
#[derive(Clone, Debug)]
188
pub struct GraphemeCursor {
189
    /// Current cursor position.
190
    offset: usize,
191
    /// Total length of the string.
192
    len: usize,
193
    /// A config flag indicating whether this cursor computes legacy or extended
194
    /// grapheme cluster boundaries (enables GB9a and GB9b if set).
195
    is_extended: bool,
196
    /// Information about the potential boundary at `offset`
197
    state: GraphemeState,
198
    /// Category of codepoint immediately preceding cursor, if known.
199
    cat_before: Option<GraphemeCat>,
200
    /// Category of codepoint immediately after cursor, if known.
201
    cat_after: Option<GraphemeCat>,
202
    /// If set, at least one more codepoint immediately preceding this offset
203
    /// is needed to resolve whether there's a boundary at `offset`.
204
    pre_context_offset: Option<usize>,
205
    /// The number of `InCB=Linker` codepoints preceding `offset`
206
    /// (potentially intermingled with `InCB=Extend`).
207
    incb_linker_count: Option<usize>,
208
    /// The number of RIS codepoints preceding `offset`. If `pre_context_offset`
209
    /// is set, then counts the number of RIS between that and `offset`, otherwise
210
    /// is an accurate count relative to the string.
211
    ris_count: Option<usize>,
212
    /// Set if a call to `prev_boundary` or `next_boundary` was suspended due
213
    /// to needing more input.
214
    resuming: bool,
215
    /// Cached grapheme category and associated scalar value range.
216
    grapheme_cat_cache: (u32, u32, GraphemeCat),
217
}
218
219
/// An error return indicating that not enough content was available in the
220
/// provided chunk to satisfy the query, and that more content must be provided.
221
#[derive(PartialEq, Eq, Debug)]
222
pub enum GraphemeIncomplete {
223
    /// More pre-context is needed. The caller should call `provide_context`
224
    /// with a chunk ending at the offset given, then retry the query. This
225
    /// will only be returned if the `chunk_start` parameter is nonzero.
226
    PreContext(usize),
227
228
    /// When requesting `prev_boundary`, the cursor is moving past the beginning
229
    /// of the current chunk, so the chunk before that is requested. This will
230
    /// only be returned if the `chunk_start` parameter is nonzero.
231
    PrevChunk,
232
233
    /// When requesting `next_boundary`, the cursor is moving past the end of the
234
    /// current chunk, so the chunk after that is requested. This will only be
235
    /// returned if the chunk ends before the `len` parameter provided on
236
    /// creation of the cursor.
237
    NextChunk, // requesting chunk following the one given
238
239
    /// An error returned when the chunk given does not contain the cursor position.
240
    InvalidOffset,
241
}
242
243
// An enum describing the result from lookup of a pair of categories.
244
#[derive(PartialEq, Eq)]
245
enum PairResult {
246
    /// definitely not a break
247
    NotBreak,
248
    /// definitely a break
249
    Break,
250
    /// a break iff not in extended mode
251
    Extended,
252
    /// a break unless in extended mode and preceded by
253
    /// a sequence of 0 or more InCB=Extend and one or more
254
    /// InCB = Linker (in any order),
255
    /// preceded by another InCB=Consonant
256
    InCbConsonant,
257
    /// a break if preceded by an even number of RIS
258
    Regional,
259
    /// a break if preceded by emoji base and (Extend)*
260
    Emoji,
261
}
262
263
#[inline]
264
24.1M
fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
265
    use self::PairResult::*;
266
    use crate::tables::grapheme::GraphemeCat::*;
267
24.1M
    match (before, after) {
268
9.48k
        (GC_CR, GC_LF) => NotBreak,                                 // GB3
269
10.6M
        (GC_Control | GC_CR | GC_LF, _) => Break,                   // GB4
270
273k
        (_, GC_Control | GC_CR | GC_LF) => Break,                   // GB5
271
1.11k
        (GC_L, GC_L | GC_V | GC_LV | GC_LVT) => NotBreak,           // GB6
272
1.58k
        (GC_LV | GC_V, GC_V | GC_T) => NotBreak,                    // GB7
273
530
        (GC_LVT | GC_T, GC_T) => NotBreak,                          // GB8
274
53.8k
        (_, GC_Extend | GC_ZWJ) => NotBreak,                        // GB9
275
1.47k
        (_, GC_SpacingMark) => Extended,                            // GB9a
276
2.40k
        (GC_Prepend, _) => Extended,                                // GB9b
277
8.83k
        (_, GC_InCB_Consonant) => InCbConsonant,                    // GB9c
278
10.2k
        (GC_ZWJ, GC_Extended_Pictographic) => Emoji,                // GB11
279
1.87k
        (GC_Regional_Indicator, GC_Regional_Indicator) => Regional, // GB12, GB13
280
13.0M
        (_, _) => Break,                                            // GB999
281
    }
282
24.0M
}
unicode_segmentation::grapheme::check_pair
Line
Count
Source
264
24.1M
fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
265
    use self::PairResult::*;
266
    use crate::tables::grapheme::GraphemeCat::*;
267
24.1M
    match (before, after) {
268
9.48k
        (GC_CR, GC_LF) => NotBreak,                                 // GB3
269
10.6M
        (GC_Control | GC_CR | GC_LF, _) => Break,                   // GB4
270
273k
        (_, GC_Control | GC_CR | GC_LF) => Break,                   // GB5
271
1.11k
        (GC_L, GC_L | GC_V | GC_LV | GC_LVT) => NotBreak,           // GB6
272
1.58k
        (GC_LV | GC_V, GC_V | GC_T) => NotBreak,                    // GB7
273
530
        (GC_LVT | GC_T, GC_T) => NotBreak,                          // GB8
274
53.8k
        (_, GC_Extend | GC_ZWJ) => NotBreak,                        // GB9
275
1.47k
        (_, GC_SpacingMark) => Extended,                            // GB9a
276
2.40k
        (GC_Prepend, _) => Extended,                                // GB9b
277
8.83k
        (_, GC_InCB_Consonant) => InCbConsonant,                    // GB9c
278
10.2k
        (GC_ZWJ, GC_Extended_Pictographic) => Emoji,                // GB11
279
1.87k
        (GC_Regional_Indicator, GC_Regional_Indicator) => Regional, // GB12, GB13
280
13.0M
        (_, _) => Break,                                            // GB999
281
    }
282
24.0M
}
Unexecuted instantiation: unicode_segmentation::grapheme::check_pair
283
284
impl GraphemeCursor {
285
    /// Create a new cursor. The string and initial offset are given at creation
286
    /// time, but the contents of the string are not. The `is_extended` parameter
287
    /// controls whether extended grapheme clusters are selected.
288
    ///
289
    /// The `offset` parameter must be on a codepoint boundary.
290
    ///
291
    /// ```rust
292
    /// # use unicode_segmentation::GraphemeCursor;
293
    /// let s = "हिन्दी";
294
    /// let mut legacy = GraphemeCursor::new(0, s.len(), false);
295
    /// assert_eq!(legacy.next_boundary(s, 0), Ok(Some("ह".len())));
296
    /// let mut extended = GraphemeCursor::new(0, s.len(), true);
297
    /// assert_eq!(extended.next_boundary(s, 0), Ok(Some("हि".len())));
298
    /// ```
299
4.23k
    pub fn new(offset: usize, len: usize, is_extended: bool) -> GraphemeCursor {
300
4.23k
        let state = if offset == 0 || offset == len {
301
4.23k
            GraphemeState::Break
302
        } else {
303
0
            GraphemeState::Unknown
304
        };
305
4.23k
        GraphemeCursor {
306
4.23k
            offset,
307
4.23k
            len,
308
4.23k
            state,
309
4.23k
            is_extended,
310
4.23k
            cat_before: None,
311
4.23k
            cat_after: None,
312
4.23k
            pre_context_offset: None,
313
4.23k
            incb_linker_count: None,
314
4.23k
            ris_count: None,
315
4.23k
            resuming: false,
316
4.23k
            grapheme_cat_cache: (0, 0, GraphemeCat::GC_Control),
317
4.23k
        }
318
4.23k
    }
319
320
24.1M
    fn grapheme_category(&mut self, ch: char) -> GraphemeCat {
321
        use crate::tables::grapheme as gr;
322
        use crate::tables::grapheme::GraphemeCat::*;
323
324
24.1M
        if ch <= '\u{7e}' {
325
            // Special-case optimization for ascii, except U+007F.  This
326
            // improves performance even for many primarily non-ascii texts,
327
            // due to use of punctuation and white space characters from the
328
            // ascii range.
329
23.8M
            if ch >= '\u{20}' {
330
13.1M
                GC_Any
331
10.7M
            } else if ch == '\n' {
332
97.8k
                GC_LF
333
10.6M
            } else if ch == '\r' {
334
1.60M
                GC_CR
335
            } else {
336
9.02M
                GC_Control
337
            }
338
        } else {
339
            // If this char isn't within the cached range, update the cache to the
340
            // range that includes it.
341
332k
            if (ch as u32) < self.grapheme_cat_cache.0 || (ch as u32) > self.grapheme_cat_cache.1 {
342
130k
                self.grapheme_cat_cache = gr::grapheme_category(ch);
343
201k
            }
344
332k
            self.grapheme_cat_cache.2
345
        }
346
24.1M
    }
347
348
    // Not sure I'm gonna keep this, the advantage over new() seems thin.
349
350
    /// Set the cursor to a new location in the same string.
351
    ///
352
    /// ```rust
353
    /// # use unicode_segmentation::GraphemeCursor;
354
    /// let s = "abcd";
355
    /// let mut cursor = GraphemeCursor::new(0, s.len(), false);
356
    /// assert_eq!(cursor.cur_cursor(), 0);
357
    /// cursor.set_cursor(2);
358
    /// assert_eq!(cursor.cur_cursor(), 2);
359
    /// ```
360
0
    pub fn set_cursor(&mut self, offset: usize) {
361
0
        if offset != self.offset {
362
0
            self.offset = offset;
363
0
            self.state = if offset == 0 || offset == self.len {
364
0
                GraphemeState::Break
365
            } else {
366
0
                GraphemeState::Unknown
367
            };
368
            // reset state derived from text around cursor
369
0
            self.cat_before = None;
370
0
            self.cat_after = None;
371
0
            self.incb_linker_count = None;
372
0
            self.ris_count = None;
373
0
        }
374
0
    }
375
376
    #[inline]
377
    /// The current offset of the cursor. Equal to the last value provided to
378
    /// `new()` or `set_cursor()`, or returned from `next_boundary()` or
379
    /// `prev_boundary()`.
380
    ///
381
    /// ```rust
382
    /// # use unicode_segmentation::GraphemeCursor;
383
    /// // Two flags (🇷🇸🇮🇴), each flag is two RIS codepoints, each RIS is 4 bytes.
384
    /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
385
    /// let mut cursor = GraphemeCursor::new(4, flags.len(), false);
386
    /// assert_eq!(cursor.cur_cursor(), 4);
387
    /// assert_eq!(cursor.next_boundary(flags, 0), Ok(Some(8)));
388
    /// assert_eq!(cursor.cur_cursor(), 8);
389
    /// ```
390
48.1M
    pub fn cur_cursor(&self) -> usize {
391
48.1M
        self.offset
392
48.1M
    }
<unicode_segmentation::grapheme::GraphemeCursor>::cur_cursor
Line
Count
Source
390
48.1M
    pub fn cur_cursor(&self) -> usize {
391
48.1M
        self.offset
392
48.1M
    }
Unexecuted instantiation: <unicode_segmentation::grapheme::GraphemeCursor>::cur_cursor
393
394
    /// Provide additional pre-context when it is needed to decide a boundary.
395
    /// The end of the chunk must coincide with the value given in the
396
    /// `GraphemeIncomplete::PreContext` request.
397
    ///
398
    /// ```rust
399
    /// # use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
400
    /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
401
    /// let mut cursor = GraphemeCursor::new(8, flags.len(), false);
402
    /// // Not enough pre-context to decide if there's a boundary between the two flags.
403
    /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Err(GraphemeIncomplete::PreContext(8)));
404
    /// // Provide one more Regional Indicator Symbol of pre-context
405
    /// cursor.provide_context(&flags[4..8], 4);
406
    /// // Still not enough context to decide.
407
    /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Err(GraphemeIncomplete::PreContext(4)));
408
    /// // Provide additional requested context.
409
    /// cursor.provide_context(&flags[0..4], 0);
410
    /// // That's enough to decide (it always is when context goes to the start of the string)
411
    /// assert_eq!(cursor.is_boundary(&flags[8..], 8), Ok(true));
412
    /// ```
413
0
    pub fn provide_context(&mut self, chunk: &str, chunk_start: usize) {
414
        use crate::tables::grapheme as gr;
415
0
        assert!(chunk_start.saturating_add(chunk.len()) == self.pre_context_offset.unwrap());
416
0
        self.pre_context_offset = None;
417
0
        if self.is_extended && chunk_start + chunk.len() == self.offset {
418
0
            let ch = chunk.chars().next_back().unwrap();
419
0
            if self.grapheme_category(ch) == gr::GC_Prepend {
420
0
                self.decide(false); // GB9b
421
0
                return;
422
0
            }
423
0
        }
424
0
        match self.state {
425
0
            GraphemeState::InCbConsonant => self.handle_incb_consonant(chunk, chunk_start),
426
0
            GraphemeState::Regional => self.handle_regional(chunk, chunk_start),
427
0
            GraphemeState::Emoji => self.handle_emoji(chunk, chunk_start),
428
            _ => {
429
0
                if self.cat_before.is_none() && self.offset == chunk.len() + chunk_start {
430
0
                    let ch = chunk.chars().next_back().unwrap();
431
0
                    self.cat_before = Some(self.grapheme_category(ch));
432
0
                }
433
            }
434
        }
435
0
    }
436
437
    #[inline]
438
24.1M
    fn decide(&mut self, is_break: bool) {
439
24.1M
        self.state = if is_break {
440
24.0M
            GraphemeState::Break
441
        } else {
442
75.4k
            GraphemeState::NotBreak
443
        };
444
24.1M
    }
<unicode_segmentation::grapheme::GraphemeCursor>::decide
Line
Count
Source
438
24.1M
    fn decide(&mut self, is_break: bool) {
439
24.1M
        self.state = if is_break {
440
24.0M
            GraphemeState::Break
441
        } else {
442
75.4k
            GraphemeState::NotBreak
443
        };
444
24.1M
    }
Unexecuted instantiation: <unicode_segmentation::grapheme::GraphemeCursor>::decide
445
446
    #[inline]
447
24.1M
    fn decision(&mut self, is_break: bool) -> Result<bool, GraphemeIncomplete> {
448
24.1M
        self.decide(is_break);
449
24.1M
        Ok(is_break)
450
24.1M
    }
<unicode_segmentation::grapheme::GraphemeCursor>::decision
Line
Count
Source
447
24.1M
    fn decision(&mut self, is_break: bool) -> Result<bool, GraphemeIncomplete> {
448
24.1M
        self.decide(is_break);
449
24.1M
        Ok(is_break)
450
24.1M
    }
Unexecuted instantiation: <unicode_segmentation::grapheme::GraphemeCursor>::decision
451
452
    #[inline]
453
19.1k
    fn is_boundary_result(&self) -> Result<bool, GraphemeIncomplete> {
454
19.1k
        if self.state == GraphemeState::Break {
455
15.4k
            Ok(true)
456
3.69k
        } else if self.state == GraphemeState::NotBreak {
457
3.69k
            Ok(false)
458
0
        } else if let Some(pre_context_offset) = self.pre_context_offset {
459
0
            Err(GraphemeIncomplete::PreContext(pre_context_offset))
460
        } else {
461
0
            unreachable!("inconsistent state");
462
        }
463
19.1k
    }
<unicode_segmentation::grapheme::GraphemeCursor>::is_boundary_result
Line
Count
Source
453
19.1k
    fn is_boundary_result(&self) -> Result<bool, GraphemeIncomplete> {
454
19.1k
        if self.state == GraphemeState::Break {
455
15.4k
            Ok(true)
456
3.69k
        } else if self.state == GraphemeState::NotBreak {
457
3.69k
            Ok(false)
458
0
        } else if let Some(pre_context_offset) = self.pre_context_offset {
459
0
            Err(GraphemeIncomplete::PreContext(pre_context_offset))
460
        } else {
461
0
            unreachable!("inconsistent state");
462
        }
463
19.1k
    }
Unexecuted instantiation: <unicode_segmentation::grapheme::GraphemeCursor>::is_boundary_result
464
465
    /// For handling rule GB9c:
466
    ///
467
    /// There's an `InCB=Consonant` after this, and we need to look back
468
    /// to verify whether there should be a break.
469
    ///
470
    /// Seek backward to find an `InCB=Linker` preceded by an `InCB=Consonsnt`
471
    /// (potentially separated by some number of `InCB=Linker` or `InCB=Extend`).
472
    /// If we find the consonant in question, then there's no break; if we find a consonant
473
    /// with no linker, or a non-linker non-extend non-consonant, or the start of text, there's a break;
474
    /// otherwise we need more context
475
    #[inline]
476
8.83k
    fn handle_incb_consonant(&mut self, chunk: &str, chunk_start: usize) {
477
        use crate::tables::{self, grapheme as gr};
478
479
        // GB9c only applies to extended grapheme clusters
480
8.83k
        if !self.is_extended {
481
0
            self.decide(true);
482
0
            return;
483
8.83k
        }
484
8.83k
485
8.83k
        let mut incb_linker_count = self.incb_linker_count.unwrap_or(0);
486
487
16.1k
        for ch in chunk.chars().rev() {
488
16.1k
            if tables::is_incb_linker(ch) {
489
3.75k
                // We found an InCB linker
490
3.75k
                incb_linker_count += 1;
491
3.75k
                self.incb_linker_count = Some(incb_linker_count);
492
12.3k
            } else if tables::derived_property::InCB_Extend(ch) {
493
3.60k
                // We ignore InCB extends, continue
494
3.60k
            } else {
495
                // Prev character is neither linker nor extend, break suppressed iff it's InCB=Consonant
496
8.76k
                let result = !(self.incb_linker_count.unwrap_or(0) > 0
497
2.09k
                    && self.grapheme_category(ch) == gr::GC_InCB_Consonant);
498
8.76k
                self.decide(result);
499
8.76k
                return;
500
            }
501
        }
502
503
63
        if chunk_start == 0 {
504
63
            // Start of text and we still haven't found a consonant, so break
505
63
            self.decide(true);
506
63
        } else {
507
0
            // We need more context
508
0
            self.pre_context_offset = Some(chunk_start);
509
0
            self.state = GraphemeState::InCbConsonant;
510
0
        }
511
8.83k
    }
<unicode_segmentation::grapheme::GraphemeCursor>::handle_incb_consonant
Line
Count
Source
476
8.83k
    fn handle_incb_consonant(&mut self, chunk: &str, chunk_start: usize) {
477
        use crate::tables::{self, grapheme as gr};
478
479
        // GB9c only applies to extended grapheme clusters
480
8.83k
        if !self.is_extended {
481
0
            self.decide(true);
482
0
            return;
483
8.83k
        }
484
8.83k
485
8.83k
        let mut incb_linker_count = self.incb_linker_count.unwrap_or(0);
486
487
16.1k
        for ch in chunk.chars().rev() {
488
16.1k
            if tables::is_incb_linker(ch) {
489
3.75k
                // We found an InCB linker
490
3.75k
                incb_linker_count += 1;
491
3.75k
                self.incb_linker_count = Some(incb_linker_count);
492
12.3k
            } else if tables::derived_property::InCB_Extend(ch) {
493
3.60k
                // We ignore InCB extends, continue
494
3.60k
            } else {
495
                // Prev character is neither linker nor extend, break suppressed iff it's InCB=Consonant
496
8.76k
                let result = !(self.incb_linker_count.unwrap_or(0) > 0
497
2.09k
                    && self.grapheme_category(ch) == gr::GC_InCB_Consonant);
498
8.76k
                self.decide(result);
499
8.76k
                return;
500
            }
501
        }
502
503
63
        if chunk_start == 0 {
504
63
            // Start of text and we still haven't found a consonant, so break
505
63
            self.decide(true);
506
63
        } else {
507
0
            // We need more context
508
0
            self.pre_context_offset = Some(chunk_start);
509
0
            self.state = GraphemeState::InCbConsonant;
510
0
        }
511
8.83k
    }
Unexecuted instantiation: <unicode_segmentation::grapheme::GraphemeCursor>::handle_incb_consonant
512
513
    #[inline]
514
28
    fn handle_regional(&mut self, chunk: &str, chunk_start: usize) {
515
        use crate::tables::grapheme as gr;
516
28
        let mut ris_count = self.ris_count.unwrap_or(0);
517
28
        for ch in chunk.chars().rev() {
518
28
            if self.grapheme_category(ch) != gr::GC_Regional_Indicator {
519
0
                self.ris_count = Some(ris_count);
520
0
                self.decide((ris_count % 2) == 0);
521
0
                return;
522
28
            }
523
28
            ris_count += 1;
524
        }
525
28
        self.ris_count = Some(ris_count);
526
28
        if chunk_start == 0 {
527
28
            self.decide((ris_count % 2) == 0);
528
28
        } else {
529
0
            self.pre_context_offset = Some(chunk_start);
530
0
            self.state = GraphemeState::Regional;
531
0
        }
532
28
    }
<unicode_segmentation::grapheme::GraphemeCursor>::handle_regional
Line
Count
Source
514
28
    fn handle_regional(&mut self, chunk: &str, chunk_start: usize) {
515
        use crate::tables::grapheme as gr;
516
28
        let mut ris_count = self.ris_count.unwrap_or(0);
517
28
        for ch in chunk.chars().rev() {
518
28
            if self.grapheme_category(ch) != gr::GC_Regional_Indicator {
519
0
                self.ris_count = Some(ris_count);
520
0
                self.decide((ris_count % 2) == 0);
521
0
                return;
522
28
            }
523
28
            ris_count += 1;
524
        }
525
28
        self.ris_count = Some(ris_count);
526
28
        if chunk_start == 0 {
527
28
            self.decide((ris_count % 2) == 0);
528
28
        } else {
529
0
            self.pre_context_offset = Some(chunk_start);
530
0
            self.state = GraphemeState::Regional;
531
0
        }
532
28
    }
Unexecuted instantiation: <unicode_segmentation::grapheme::GraphemeCursor>::handle_regional
533
534
    #[inline]
535
10.2k
    fn handle_emoji(&mut self, chunk: &str, chunk_start: usize) {
536
        use crate::tables::grapheme as gr;
537
10.2k
        let mut iter = chunk.chars().rev();
538
10.2k
        if let Some(ch) = iter.next() {
539
10.2k
            if self.grapheme_category(ch) != gr::GC_ZWJ {
540
0
                self.decide(true);
541
0
                return;
542
10.2k
            }
543
0
        }
544
11.5k
        for ch in iter {
545
11.5k
            match self.grapheme_category(ch) {
546
1.33k
                gr::GC_Extend => (),
547
                gr::GC_Extended_Pictographic => {
548
3.04k
                    self.decide(false);
549
3.04k
                    return;
550
                }
551
                _ => {
552
7.15k
                    self.decide(true);
553
7.15k
                    return;
554
                }
555
            }
556
        }
557
59
        if chunk_start == 0 {
558
59
            self.decide(true);
559
59
        } else {
560
0
            self.pre_context_offset = Some(chunk_start);
561
0
            self.state = GraphemeState::Emoji;
562
0
        }
563
10.2k
    }
<unicode_segmentation::grapheme::GraphemeCursor>::handle_emoji
Line
Count
Source
535
10.2k
    fn handle_emoji(&mut self, chunk: &str, chunk_start: usize) {
536
        use crate::tables::grapheme as gr;
537
10.2k
        let mut iter = chunk.chars().rev();
538
10.2k
        if let Some(ch) = iter.next() {
539
10.2k
            if self.grapheme_category(ch) != gr::GC_ZWJ {
540
0
                self.decide(true);
541
0
                return;
542
10.2k
            }
543
0
        }
544
11.5k
        for ch in iter {
545
11.5k
            match self.grapheme_category(ch) {
546
1.33k
                gr::GC_Extend => (),
547
                gr::GC_Extended_Pictographic => {
548
3.04k
                    self.decide(false);
549
3.04k
                    return;
550
                }
551
                _ => {
552
7.15k
                    self.decide(true);
553
7.15k
                    return;
554
                }
555
            }
556
        }
557
59
        if chunk_start == 0 {
558
59
            self.decide(true);
559
59
        } else {
560
0
            self.pre_context_offset = Some(chunk_start);
561
0
            self.state = GraphemeState::Emoji;
562
0
        }
563
10.2k
    }
Unexecuted instantiation: <unicode_segmentation::grapheme::GraphemeCursor>::handle_emoji
564
565
    #[inline]
566
    /// Determine whether the current cursor location is a grapheme cluster boundary.
567
    /// Only a part of the string need be supplied. If `chunk_start` is nonzero or
568
    /// the length of `chunk` is not equal to `len` on creation, then this method
569
    /// may return `GraphemeIncomplete::PreContext`. The caller should then
570
    /// call `provide_context` with the requested chunk, then retry calling this
571
    /// method.
572
    ///
573
    /// For partial chunks, if the cursor is not at the beginning or end of the
574
    /// string, the chunk should contain at least the codepoint following the cursor.
575
    /// If the string is nonempty, the chunk must be nonempty.
576
    ///
577
    /// All calls should have consistent chunk contents (ie, if a chunk provides
578
    /// content for a given slice, all further chunks covering that slice must have
579
    /// the same content for it).
580
    ///
581
    /// ```rust
582
    /// # use unicode_segmentation::GraphemeCursor;
583
    /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
584
    /// let mut cursor = GraphemeCursor::new(8, flags.len(), false);
585
    /// assert_eq!(cursor.is_boundary(flags, 0), Ok(true));
586
    /// cursor.set_cursor(12);
587
    /// assert_eq!(cursor.is_boundary(flags, 0), Ok(false));
588
    /// ```
589
24.1M
    pub fn is_boundary(
590
24.1M
        &mut self,
591
24.1M
        chunk: &str,
592
24.1M
        chunk_start: usize,
593
24.1M
    ) -> Result<bool, GraphemeIncomplete> {
594
        use crate::tables::grapheme as gr;
595
24.1M
        if self.state == GraphemeState::Break {
596
2.11k
            return Ok(true);
597
24.1M
        }
598
24.1M
        if self.state == GraphemeState::NotBreak {
599
0
            return Ok(false);
600
24.1M
        }
601
24.1M
        if (self.offset < chunk_start || self.offset >= chunk_start.saturating_add(chunk.len()))
602
0
            && (self.offset > chunk_start.saturating_add(chunk.len()) || self.cat_after.is_none())
603
        {
604
0
            return Err(GraphemeIncomplete::InvalidOffset);
605
24.1M
        }
606
24.1M
        if let Some(pre_context_offset) = self.pre_context_offset {
607
0
            return Err(GraphemeIncomplete::PreContext(pre_context_offset));
608
24.1M
        }
609
24.1M
        let offset_in_chunk = self.offset.saturating_sub(chunk_start);
610
24.1M
        if self.cat_after.is_none() {
611
0
            let ch = chunk[offset_in_chunk..].chars().next().unwrap();
612
0
            self.cat_after = Some(self.grapheme_category(ch));
613
24.1M
        }
614
24.1M
        if self.offset == chunk_start {
615
0
            let mut need_pre_context = true;
616
0
            match self.cat_after.unwrap() {
617
0
                gr::GC_InCB_Consonant => self.state = GraphemeState::InCbConsonant,
618
0
                gr::GC_Regional_Indicator => self.state = GraphemeState::Regional,
619
0
                gr::GC_Extended_Pictographic => self.state = GraphemeState::Emoji,
620
0
                _ => need_pre_context = self.cat_before.is_none(),
621
            }
622
0
            if need_pre_context {
623
0
                self.pre_context_offset = Some(chunk_start);
624
0
                return Err(GraphemeIncomplete::PreContext(chunk_start));
625
0
            }
626
24.1M
        }
627
24.1M
        if self.cat_before.is_none() {
628
0
            let ch = chunk[..offset_in_chunk].chars().next_back().unwrap();
629
0
            self.cat_before = Some(self.grapheme_category(ch));
630
24.1M
        }
631
24.1M
        match check_pair(self.cat_before.unwrap(), self.cat_after.unwrap()) {
632
66.5k
            PairResult::NotBreak => self.decision(false),
633
24.0M
            PairResult::Break => self.decision(true),
634
            PairResult::Extended => {
635
3.87k
                let is_extended = self.is_extended;
636
3.87k
                self.decision(!is_extended)
637
            }
638
            PairResult::InCbConsonant => {
639
8.83k
                self.handle_incb_consonant(&chunk[..offset_in_chunk], chunk_start);
640
8.83k
                self.is_boundary_result()
641
            }
642
            PairResult::Regional => {
643
1.87k
                if let Some(ris_count) = self.ris_count {
644
1.85k
                    return self.decision((ris_count % 2) == 0);
645
28
                }
646
28
                self.handle_regional(&chunk[..offset_in_chunk], chunk_start);
647
28
                self.is_boundary_result()
648
            }
649
            PairResult::Emoji => {
650
10.2k
                self.handle_emoji(&chunk[..offset_in_chunk], chunk_start);
651
10.2k
                self.is_boundary_result()
652
            }
653
        }
654
24.1M
    }
<unicode_segmentation::grapheme::GraphemeCursor>::is_boundary
Line
Count
Source
589
24.1M
    pub fn is_boundary(
590
24.1M
        &mut self,
591
24.1M
        chunk: &str,
592
24.1M
        chunk_start: usize,
593
24.1M
    ) -> Result<bool, GraphemeIncomplete> {
594
        use crate::tables::grapheme as gr;
595
24.1M
        if self.state == GraphemeState::Break {
596
2.11k
            return Ok(true);
597
24.1M
        }
598
24.1M
        if self.state == GraphemeState::NotBreak {
599
0
            return Ok(false);
600
24.1M
        }
601
24.1M
        if (self.offset < chunk_start || self.offset >= chunk_start.saturating_add(chunk.len()))
602
0
            && (self.offset > chunk_start.saturating_add(chunk.len()) || self.cat_after.is_none())
603
        {
604
0
            return Err(GraphemeIncomplete::InvalidOffset);
605
24.1M
        }
606
24.1M
        if let Some(pre_context_offset) = self.pre_context_offset {
607
0
            return Err(GraphemeIncomplete::PreContext(pre_context_offset));
608
24.1M
        }
609
24.1M
        let offset_in_chunk = self.offset.saturating_sub(chunk_start);
610
24.1M
        if self.cat_after.is_none() {
611
0
            let ch = chunk[offset_in_chunk..].chars().next().unwrap();
612
0
            self.cat_after = Some(self.grapheme_category(ch));
613
24.1M
        }
614
24.1M
        if self.offset == chunk_start {
615
0
            let mut need_pre_context = true;
616
0
            match self.cat_after.unwrap() {
617
0
                gr::GC_InCB_Consonant => self.state = GraphemeState::InCbConsonant,
618
0
                gr::GC_Regional_Indicator => self.state = GraphemeState::Regional,
619
0
                gr::GC_Extended_Pictographic => self.state = GraphemeState::Emoji,
620
0
                _ => need_pre_context = self.cat_before.is_none(),
621
            }
622
0
            if need_pre_context {
623
0
                self.pre_context_offset = Some(chunk_start);
624
0
                return Err(GraphemeIncomplete::PreContext(chunk_start));
625
0
            }
626
24.1M
        }
627
24.1M
        if self.cat_before.is_none() {
628
0
            let ch = chunk[..offset_in_chunk].chars().next_back().unwrap();
629
0
            self.cat_before = Some(self.grapheme_category(ch));
630
24.1M
        }
631
24.1M
        match check_pair(self.cat_before.unwrap(), self.cat_after.unwrap()) {
632
66.5k
            PairResult::NotBreak => self.decision(false),
633
24.0M
            PairResult::Break => self.decision(true),
634
            PairResult::Extended => {
635
3.87k
                let is_extended = self.is_extended;
636
3.87k
                self.decision(!is_extended)
637
            }
638
            PairResult::InCbConsonant => {
639
8.83k
                self.handle_incb_consonant(&chunk[..offset_in_chunk], chunk_start);
640
8.83k
                self.is_boundary_result()
641
            }
642
            PairResult::Regional => {
643
1.87k
                if let Some(ris_count) = self.ris_count {
644
1.85k
                    return self.decision((ris_count % 2) == 0);
645
28
                }
646
28
                self.handle_regional(&chunk[..offset_in_chunk], chunk_start);
647
28
                self.is_boundary_result()
648
            }
649
            PairResult::Emoji => {
650
10.2k
                self.handle_emoji(&chunk[..offset_in_chunk], chunk_start);
651
10.2k
                self.is_boundary_result()
652
            }
653
        }
654
24.1M
    }
Unexecuted instantiation: <unicode_segmentation::grapheme::GraphemeCursor>::is_boundary
655
656
    #[inline]
657
    /// Find the next boundary after the current cursor position. Only a part of
658
    /// the string need be supplied. If the chunk is incomplete, then this
659
    /// method might return `GraphemeIncomplete::PreContext` or
660
    /// `GraphemeIncomplete::NextChunk`. In the former case, the caller should
661
    /// call `provide_context` with the requested chunk, then retry. In the
662
    /// latter case, the caller should provide the chunk following the one
663
    /// given, then retry.
664
    ///
665
    /// See `is_boundary` for expectations on the provided chunk.
666
    ///
667
    /// ```rust
668
    /// # use unicode_segmentation::GraphemeCursor;
669
    /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
670
    /// let mut cursor = GraphemeCursor::new(4, flags.len(), false);
671
    /// assert_eq!(cursor.next_boundary(flags, 0), Ok(Some(8)));
672
    /// assert_eq!(cursor.next_boundary(flags, 0), Ok(Some(16)));
673
    /// assert_eq!(cursor.next_boundary(flags, 0), Ok(None));
674
    /// ```
675
    ///
676
    /// And an example that uses partial strings:
677
    ///
678
    /// ```rust
679
    /// # use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
680
    /// let s = "abcd";
681
    /// let mut cursor = GraphemeCursor::new(0, s.len(), false);
682
    /// assert_eq!(cursor.next_boundary(&s[..2], 0), Ok(Some(1)));
683
    /// assert_eq!(cursor.next_boundary(&s[..2], 0), Err(GraphemeIncomplete::NextChunk));
684
    /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(2)));
685
    /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(3)));
686
    /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(4)));
687
    /// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(None));
688
    /// ```
689
24.0M
    pub fn next_boundary(
690
24.0M
        &mut self,
691
24.0M
        chunk: &str,
692
24.0M
        chunk_start: usize,
693
24.0M
    ) -> Result<Option<usize>, GraphemeIncomplete> {
694
24.0M
        if self.offset == self.len {
695
0
            return Ok(None);
696
24.0M
        }
697
24.0M
        let mut iter = chunk[self.offset.saturating_sub(chunk_start)..].chars();
698
24.0M
        let mut ch = match iter.next() {
699
24.0M
            Some(ch) => ch,
700
0
            None => return Err(GraphemeIncomplete::NextChunk),
701
        };
702
        loop {
703
24.1M
            if self.resuming {
704
0
                if self.cat_after.is_none() {
705
0
                    self.cat_after = Some(self.grapheme_category(ch));
706
0
                }
707
            } else {
708
24.1M
                self.offset = self.offset.saturating_add(ch.len_utf8());
709
24.1M
                self.state = GraphemeState::Unknown;
710
24.1M
                self.cat_before = self.cat_after.take();
711
24.1M
                if self.cat_before.is_none() {
712
2.11k
                    self.cat_before = Some(self.grapheme_category(ch));
713
24.1M
                }
714
24.1M
                if crate::tables::is_incb_linker(ch) {
715
7.38k
                    self.incb_linker_count = Some(self.incb_linker_count.map_or(1, |c| c + 1));
<unicode_segmentation::grapheme::GraphemeCursor>::next_boundary::{closure#0}
Line
Count
Source
715
7.28k
                    self.incb_linker_count = Some(self.incb_linker_count.map_or(1, |c| c + 1));
Unexecuted instantiation: <unicode_segmentation::grapheme::GraphemeCursor>::next_boundary::{closure#0}
716
24.1M
                } else if !crate::tables::derived_property::InCB_Extend(ch) {
717
24.0M
                    self.incb_linker_count = Some(0);
718
24.0M
                }
719
24.1M
                if self.cat_before.unwrap() == GraphemeCat::GC_Regional_Indicator {
720
4.01k
                    self.ris_count = self.ris_count.map(|c| c + 1);
<unicode_segmentation::grapheme::GraphemeCursor>::next_boundary::{closure#1}
Line
Count
Source
720
3.95k
                    self.ris_count = self.ris_count.map(|c| c + 1);
Unexecuted instantiation: <unicode_segmentation::grapheme::GraphemeCursor>::next_boundary::{closure#1}
721
24.1M
                } else {
722
24.1M
                    self.ris_count = Some(0);
723
24.1M
                }
724
24.1M
                if let Some(next_ch) = iter.next() {
725
24.1M
                    ch = next_ch;
726
24.1M
                    self.cat_after = Some(self.grapheme_category(ch));
727
24.1M
                } else if self.offset == self.len {
728
2.11k
                    self.decide(true);
729
2.11k
                } else {
730
0
                    self.resuming = true;
731
0
                    return Err(GraphemeIncomplete::NextChunk);
732
                }
733
            }
734
24.1M
            self.resuming = true;
735
24.1M
            if self.is_boundary(chunk, chunk_start)? {
736
24.0M
                self.resuming = false;
737
24.0M
                return Ok(Some(self.offset));
738
75.4k
            }
739
75.4k
            self.resuming = false;
740
        }
741
24.0M
    }
<unicode_segmentation::grapheme::GraphemeCursor>::next_boundary
Line
Count
Source
689
24.0M
    pub fn next_boundary(
690
24.0M
        &mut self,
691
24.0M
        chunk: &str,
692
24.0M
        chunk_start: usize,
693
24.0M
    ) -> Result<Option<usize>, GraphemeIncomplete> {
694
24.0M
        if self.offset == self.len {
695
0
            return Ok(None);
696
24.0M
        }
697
24.0M
        let mut iter = chunk[self.offset.saturating_sub(chunk_start)..].chars();
698
24.0M
        let mut ch = match iter.next() {
699
24.0M
            Some(ch) => ch,
700
0
            None => return Err(GraphemeIncomplete::NextChunk),
701
        };
702
        loop {
703
24.1M
            if self.resuming {
704
0
                if self.cat_after.is_none() {
705
0
                    self.cat_after = Some(self.grapheme_category(ch));
706
0
                }
707
            } else {
708
24.1M
                self.offset = self.offset.saturating_add(ch.len_utf8());
709
24.1M
                self.state = GraphemeState::Unknown;
710
24.1M
                self.cat_before = self.cat_after.take();
711
24.1M
                if self.cat_before.is_none() {
712
2.11k
                    self.cat_before = Some(self.grapheme_category(ch));
713
24.1M
                }
714
24.1M
                if crate::tables::is_incb_linker(ch) {
715
7.38k
                    self.incb_linker_count = Some(self.incb_linker_count.map_or(1, |c| c + 1));
716
24.1M
                } else if !crate::tables::derived_property::InCB_Extend(ch) {
717
24.0M
                    self.incb_linker_count = Some(0);
718
24.0M
                }
719
24.1M
                if self.cat_before.unwrap() == GraphemeCat::GC_Regional_Indicator {
720
4.01k
                    self.ris_count = self.ris_count.map(|c| c + 1);
721
24.1M
                } else {
722
24.1M
                    self.ris_count = Some(0);
723
24.1M
                }
724
24.1M
                if let Some(next_ch) = iter.next() {
725
24.1M
                    ch = next_ch;
726
24.1M
                    self.cat_after = Some(self.grapheme_category(ch));
727
24.1M
                } else if self.offset == self.len {
728
2.11k
                    self.decide(true);
729
2.11k
                } else {
730
0
                    self.resuming = true;
731
0
                    return Err(GraphemeIncomplete::NextChunk);
732
                }
733
            }
734
24.1M
            self.resuming = true;
735
24.1M
            if self.is_boundary(chunk, chunk_start)? {
736
24.0M
                self.resuming = false;
737
24.0M
                return Ok(Some(self.offset));
738
75.4k
            }
739
75.4k
            self.resuming = false;
740
        }
741
24.0M
    }
Unexecuted instantiation: <unicode_segmentation::grapheme::GraphemeCursor>::next_boundary
742
743
    /// Find the previous boundary after the current cursor position. Only a part
744
    /// of the string need be supplied. If the chunk is incomplete, then this
745
    /// method might return `GraphemeIncomplete::PreContext` or
746
    /// `GraphemeIncomplete::PrevChunk`. In the former case, the caller should
747
    /// call `provide_context` with the requested chunk, then retry. In the
748
    /// latter case, the caller should provide the chunk preceding the one
749
    /// given, then retry.
750
    ///
751
    /// See `is_boundary` for expectations on the provided chunk.
752
    ///
753
    /// ```rust
754
    /// # use unicode_segmentation::GraphemeCursor;
755
    /// let flags = "\u{1F1F7}\u{1F1F8}\u{1F1EE}\u{1F1F4}";
756
    /// let mut cursor = GraphemeCursor::new(12, flags.len(), false);
757
    /// assert_eq!(cursor.prev_boundary(flags, 0), Ok(Some(8)));
758
    /// assert_eq!(cursor.prev_boundary(flags, 0), Ok(Some(0)));
759
    /// assert_eq!(cursor.prev_boundary(flags, 0), Ok(None));
760
    /// ```
761
    ///
762
    /// And an example that uses partial strings (note the exact return is not
763
    /// guaranteed, and may be `PrevChunk` or `PreContext` arbitrarily):
764
    ///
765
    /// ```rust
766
    /// # use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
767
    /// let s = "abcd";
768
    /// let mut cursor = GraphemeCursor::new(4, s.len(), false);
769
    /// assert_eq!(cursor.prev_boundary(&s[2..4], 2), Ok(Some(3)));
770
    /// assert_eq!(cursor.prev_boundary(&s[2..4], 2), Err(GraphemeIncomplete::PrevChunk));
771
    /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(2)));
772
    /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(1)));
773
    /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(0)));
774
    /// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(None));
775
    /// ```
776
0
    pub fn prev_boundary(
777
0
        &mut self,
778
0
        chunk: &str,
779
0
        chunk_start: usize,
780
0
    ) -> Result<Option<usize>, GraphemeIncomplete> {
781
0
        if self.offset == 0 {
782
0
            return Ok(None);
783
0
        }
784
0
        if self.offset == chunk_start {
785
0
            return Err(GraphemeIncomplete::PrevChunk);
786
0
        }
787
0
        let mut iter = chunk[..self.offset.saturating_sub(chunk_start)]
788
0
            .chars()
789
0
            .rev();
790
0
        let mut ch = iter.next().unwrap();
791
        loop {
792
0
            if self.offset == chunk_start {
793
0
                self.resuming = true;
794
0
                return Err(GraphemeIncomplete::PrevChunk);
795
0
            }
796
0
            if self.resuming {
797
0
                self.cat_before = Some(self.grapheme_category(ch));
798
0
            } else {
799
0
                self.offset -= ch.len_utf8();
800
0
                self.cat_after = self.cat_before.take();
801
0
                self.state = GraphemeState::Unknown;
802
0
                if let Some(incb_linker_count) = self.incb_linker_count {
803
0
                    self.ris_count = if incb_linker_count > 0 && crate::tables::is_incb_linker(ch) {
804
0
                        Some(incb_linker_count - 1)
805
0
                    } else if crate::tables::derived_property::InCB_Extend(ch) {
806
0
                        Some(incb_linker_count)
807
                    } else {
808
0
                        None
809
                    };
810
0
                }
811
0
                if let Some(ris_count) = self.ris_count {
812
0
                    self.ris_count = if ris_count > 0 {
813
0
                        Some(ris_count - 1)
814
                    } else {
815
0
                        None
816
                    };
817
0
                }
818
0
                if let Some(prev_ch) = iter.next() {
819
0
                    ch = prev_ch;
820
0
                    self.cat_before = Some(self.grapheme_category(ch));
821
0
                } else if self.offset == 0 {
822
0
                    self.decide(true);
823
0
                } else {
824
0
                    self.resuming = true;
825
0
                    self.cat_after = Some(self.grapheme_category(ch));
826
0
                    return Err(GraphemeIncomplete::PrevChunk);
827
                }
828
            }
829
0
            self.resuming = true;
830
0
            if self.is_boundary(chunk, chunk_start)? {
831
0
                self.resuming = false;
832
0
                return Ok(Some(self.offset));
833
0
            }
834
0
            self.resuming = false;
835
        }
836
0
    }
837
}
838
839
#[test]
840
fn test_grapheme_cursor_ris_precontext() {
841
    let s = "\u{1f1fa}\u{1f1f8}\u{1f1fa}\u{1f1f8}\u{1f1fa}\u{1f1f8}";
842
    let mut c = GraphemeCursor::new(8, s.len(), true);
843
    assert_eq!(
844
        c.is_boundary(&s[4..], 4),
845
        Err(GraphemeIncomplete::PreContext(4))
846
    );
847
    c.provide_context(&s[..4], 0);
848
    assert_eq!(c.is_boundary(&s[4..], 4), Ok(true));
849
}
850
851
#[test]
852
fn test_grapheme_cursor_chunk_start_require_precontext() {
853
    let s = "\r\n";
854
    let mut c = GraphemeCursor::new(1, s.len(), true);
855
    assert_eq!(
856
        c.is_boundary(&s[1..], 1),
857
        Err(GraphemeIncomplete::PreContext(1))
858
    );
859
    c.provide_context(&s[..1], 0);
860
    assert_eq!(c.is_boundary(&s[1..], 1), Ok(false));
861
}
862
863
#[test]
864
fn test_grapheme_cursor_prev_boundary() {
865
    let s = "abcd";
866
    let mut c = GraphemeCursor::new(3, s.len(), true);
867
    assert_eq!(
868
        c.prev_boundary(&s[2..], 2),
869
        Err(GraphemeIncomplete::PrevChunk)
870
    );
871
    assert_eq!(c.prev_boundary(&s[..2], 0), Ok(Some(2)));
872
}
873
874
#[test]
875
fn test_grapheme_cursor_prev_boundary_chunk_start() {
876
    let s = "abcd";
877
    let mut c = GraphemeCursor::new(2, s.len(), true);
878
    assert_eq!(
879
        c.prev_boundary(&s[2..], 2),
880
        Err(GraphemeIncomplete::PrevChunk)
881
    );
882
    assert_eq!(c.prev_boundary(&s[..2], 0), Ok(Some(1)));
883
}