Coverage Report

Created: 2025-11-09 07:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/utf16_iter-1.0.5/src/lib.rs
Line
Count
Source
1
// Copyright Mozilla Foundation
2
//
3
// Licensed under the Apache License (Version 2.0), or the MIT license,
4
// (the "Licenses") at your option. You may not use this file except in
5
// compliance with one of the Licenses. You may obtain copies of the
6
// Licenses at:
7
//
8
//    https://www.apache.org/licenses/LICENSE-2.0
9
//    https://opensource.org/licenses/MIT
10
//
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the Licenses is distributed on an "AS IS" BASIS,
13
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
// See the Licenses for the specific language governing permissions and
15
// limitations under the Licenses.
16
17
#![no_std]
18
19
//! Provides iteration by `char` over `&[u16]` containing potentially-invalid
20
//! UTF-16 such that errors are replaced with the REPLACEMENT CHARACTER.
21
//!
22
//! The trait `Utf16CharsEx` provides the convenience method `chars()` on
23
//! byte slices themselves instead of having to use the more verbose
24
//! `Utf16Chars::new(slice)`.
25
26
mod indices;
27
mod report;
28
29
pub use crate::indices::Utf16CharIndices;
30
pub use crate::report::ErrorReportingUtf16Chars;
31
pub use crate::report::Utf16CharsError;
32
use core::iter::FusedIterator;
33
34
#[inline(always)]
35
0
fn in_inclusive_range16(i: u16, start: u16, end: u16) -> bool {
36
0
    i.wrapping_sub(start) <= (end - start)
37
0
}
38
39
/// Iterator by `char` over `&[u16]` that contains
40
/// potentially-invalid UTF-16. See the crate documentation.
41
#[derive(Debug, Clone)]
42
pub struct Utf16Chars<'a> {
43
    remaining: &'a [u16],
44
}
45
46
impl<'a> Utf16Chars<'a> {
47
    #[inline(always)]
48
    /// Creates the iterator from a `u16` slice.
49
0
    pub fn new(code_units: &'a [u16]) -> Self {
50
0
        Utf16Chars::<'a> {
51
0
            remaining: code_units,
52
0
        }
53
0
    }
54
55
    /// Views the current remaining data in the iterator as a subslice
56
    /// of the original slice.
57
    #[inline(always)]
58
0
    pub fn as_slice(&self) -> &'a [u16] {
59
0
        self.remaining
60
0
    }
61
62
    #[inline(never)]
63
0
    fn surrogate_next(&mut self, surrogate_base: u16, first: u16) -> char {
64
0
        if surrogate_base <= (0xDBFF - 0xD800) {
65
0
            if let Some((&low, tail_tail)) = self.remaining.split_first() {
66
0
                if in_inclusive_range16(low, 0xDC00, 0xDFFF) {
67
0
                    self.remaining = tail_tail;
68
                    return unsafe {
69
0
                        char::from_u32_unchecked(
70
0
                            (u32::from(first) << 10) + u32::from(low)
71
0
                                - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
72
                        )
73
                    };
74
0
                }
75
0
            }
76
0
        }
77
0
        '\u{FFFD}'
78
0
    }
79
80
    #[inline(never)]
81
0
    fn surrogate_next_back(&mut self, last: u16) -> char {
82
0
        if in_inclusive_range16(last, 0xDC00, 0xDFFF) {
83
0
            if let Some((&high, head_head)) = self.remaining.split_last() {
84
0
                if in_inclusive_range16(high, 0xD800, 0xDBFF) {
85
0
                    self.remaining = head_head;
86
                    return unsafe {
87
0
                        char::from_u32_unchecked(
88
0
                            (u32::from(high) << 10) + u32::from(last)
89
0
                                - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32),
90
                        )
91
                    };
92
0
                }
93
0
            }
94
0
        }
95
0
        '\u{FFFD}'
96
0
    }
97
}
98
99
impl<'a> Iterator for Utf16Chars<'a> {
100
    type Item = char;
101
102
    #[inline(always)]
103
0
    fn next(&mut self) -> Option<char> {
104
        // It might be OK to delegate to `ErrorReportingUtf16Chars`, but since
105
        // the methods are rather small, copypaste is probably clearer. Also,
106
        // copypaste would _not_ be equivalent if any part of this was delegated
107
        // to an `inline(never)` helper. However, previous experimentation indicated
108
        // that such a helper didn't help performance here.
109
0
        let (&first, tail) = self.remaining.split_first()?;
110
0
        self.remaining = tail;
111
0
        let surrogate_base = first.wrapping_sub(0xD800);
112
0
        if surrogate_base > (0xDFFF - 0xD800) {
113
0
            return Some(unsafe { char::from_u32_unchecked(u32::from(first)) });
114
0
        }
115
0
        Some(self.surrogate_next(surrogate_base, first))
116
0
    }
117
}
118
119
impl<'a> DoubleEndedIterator for Utf16Chars<'a> {
120
    #[inline(always)]
121
0
    fn next_back(&mut self) -> Option<char> {
122
0
        let (&last, head) = self.remaining.split_last()?;
123
0
        self.remaining = head;
124
0
        if !in_inclusive_range16(last, 0xD800, 0xDFFF) {
125
0
            return Some(unsafe { char::from_u32_unchecked(u32::from(last)) });
126
0
        }
127
0
        Some(self.surrogate_next_back(last))
128
0
    }
129
}
130
131
impl FusedIterator for Utf16Chars<'_> {}
132
133
/// Convenience trait that adds `chars()` and `char_indices()` methods
134
/// similar to the ones on string slices to `u16` slices.
135
pub trait Utf16CharsEx {
136
    fn chars(&self) -> Utf16Chars<'_>;
137
    fn char_indices(&self) -> Utf16CharIndices<'_>;
138
}
139
140
impl Utf16CharsEx for [u16] {
141
    /// Convenience method for creating an UTF-16 iterator
142
    /// for the slice.
143
    #[inline]
144
0
    fn chars(&self) -> Utf16Chars<'_> {
145
0
        Utf16Chars::new(self)
146
0
    }
Unexecuted instantiation: <[u16] as utf16_iter::Utf16CharsEx>::chars
Unexecuted instantiation: <[u16] as utf16_iter::Utf16CharsEx>::chars
Unexecuted instantiation: <[u16] as utf16_iter::Utf16CharsEx>::chars
147
    /// Convenience method for creating a code unit index and
148
    /// UTF-16 iterator for the slice.
149
    #[inline]
150
0
    fn char_indices(&self) -> Utf16CharIndices<'_> {
151
0
        Utf16CharIndices::new(self)
152
0
    }
153
}
154
155
#[cfg(test)]
156
mod tests {
157
    use crate::Utf16CharsEx;
158
159
    #[test]
160
    fn test_boundaries() {
161
        assert!([0xD7FFu16]
162
            .as_slice()
163
            .chars()
164
            .eq(core::iter::once('\u{D7FF}')));
165
        assert!([0xE000u16]
166
            .as_slice()
167
            .chars()
168
            .eq(core::iter::once('\u{E000}')));
169
        assert!([0xD800u16]
170
            .as_slice()
171
            .chars()
172
            .eq(core::iter::once('\u{FFFD}')));
173
        assert!([0xDFFFu16]
174
            .as_slice()
175
            .chars()
176
            .eq(core::iter::once('\u{FFFD}')));
177
    }
178
179
    #[test]
180
    fn test_unpaired() {
181
        assert!([0xD800u16, 0x0061u16]
182
            .as_slice()
183
            .chars()
184
            .eq([0xFFFDu16, 0x0061u16].as_slice().chars()));
185
        assert!([0xDFFFu16, 0x0061u16]
186
            .as_slice()
187
            .chars()
188
            .eq([0xFFFDu16, 0x0061u16].as_slice().chars()));
189
    }
190
191
    #[test]
192
    fn test_unpaired_rev() {
193
        assert!([0xD800u16, 0x0061u16]
194
            .as_slice()
195
            .chars()
196
            .rev()
197
            .eq([0xFFFDu16, 0x0061u16].as_slice().chars().rev()));
198
        assert!([0xDFFFu16, 0x0061u16]
199
            .as_slice()
200
            .chars()
201
            .rev()
202
            .eq([0xFFFDu16, 0x0061u16].as_slice().chars().rev()));
203
    }
204
205
    #[test]
206
    fn test_paired() {
207
        assert!([0xD83Eu16, 0xDD73u16]
208
            .as_slice()
209
            .chars()
210
            .eq(core::iter::once('🥳')));
211
    }
212
213
    #[test]
214
    fn test_paired_rev() {
215
        assert!([0xD83Eu16, 0xDD73u16]
216
            .as_slice()
217
            .chars()
218
            .rev()
219
            .eq(core::iter::once('🥳')));
220
    }
221
222
    #[test]
223
    fn test_as_slice() {
224
        let mut iter = [0x0061u16, 0x0062u16].as_slice().chars();
225
        let at_start = iter.as_slice();
226
        assert_eq!(iter.next(), Some('a'));
227
        let in_middle = iter.as_slice();
228
        assert_eq!(iter.next(), Some('b'));
229
        let at_end = iter.as_slice();
230
        assert_eq!(at_start.len(), 2);
231
        assert_eq!(in_middle.len(), 1);
232
        assert_eq!(at_end.len(), 0);
233
        assert_eq!(at_start[0], 0x0061u16);
234
        assert_eq!(at_start[1], 0x0062u16);
235
        assert_eq!(in_middle[0], 0x0062u16);
236
    }
237
}