Coverage Report

Created: 2025-11-16 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/unicase-2.8.1/src/unicode/mod.rs
Line
Count
Source
1
use alloc::string::String;
2
use core::cmp::Ordering;
3
use core::hash::{Hash, Hasher};
4
5
use self::map::lookup;
6
mod map;
7
8
#[derive(Clone, Copy, Debug, Default)]
9
pub struct Unicode<S>(pub S);
10
11
impl<S: AsRef<str>> Unicode<S> {
12
0
    pub fn to_folded_case(&self) -> String {
13
0
        self.0.as_ref().chars().flat_map(lookup).collect()
14
0
    }
15
}
16
17
impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<Unicode<S2>> for Unicode<S1> {
18
    #[inline]
19
106k
    fn eq(&self, other: &Unicode<S2>) -> bool {
20
106k
        let mut left = self.0.as_ref().chars().flat_map(lookup);
21
106k
        let mut right = other.0.as_ref().chars().flat_map(lookup);
22
23
        // inline Iterator::eq since not added until Rust 1.5
24
        loop {
25
1.23M
            let x = match left.next() {
26
90.4k
                None => return right.next().is_none(),
27
1.14M
                Some(val) => val,
28
            };
29
30
1.14M
            let y = match right.next() {
31
10
                None => return false,
32
1.14M
                Some(val) => val,
33
            };
34
35
1.14M
            if x != y {
36
15.7k
                return false;
37
1.13M
            }
38
        }
39
106k
    }
<unicase::unicode::Unicode<pulldown_cmark::strings::CowStr> as core::cmp::PartialEq<unicase::unicode::Unicode<&str>>>::eq
Line
Count
Source
19
5.01k
    fn eq(&self, other: &Unicode<S2>) -> bool {
20
5.01k
        let mut left = self.0.as_ref().chars().flat_map(lookup);
21
5.01k
        let mut right = other.0.as_ref().chars().flat_map(lookup);
22
23
        // inline Iterator::eq since not added until Rust 1.5
24
        loop {
25
6.87k
            let x = match left.next() {
26
386
                None => return right.next().is_none(),
27
6.48k
                Some(val) => val,
28
            };
29
30
6.48k
            let y = match right.next() {
31
8
                None => return false,
32
6.48k
                Some(val) => val,
33
            };
34
35
6.48k
            if x != y {
36
4.62k
                return false;
37
1.85k
            }
38
        }
39
5.01k
    }
<unicase::unicode::Unicode<pulldown_cmark::strings::CowStr> as core::cmp::PartialEq>::eq
Line
Count
Source
19
94.8k
    fn eq(&self, other: &Unicode<S2>) -> bool {
20
94.8k
        let mut left = self.0.as_ref().chars().flat_map(lookup);
21
94.8k
        let mut right = other.0.as_ref().chars().flat_map(lookup);
22
23
        // inline Iterator::eq since not added until Rust 1.5
24
        loop {
25
1.22M
            let x = match left.next() {
26
89.3k
                None => return right.next().is_none(),
27
1.13M
                Some(val) => val,
28
            };
29
30
1.13M
            let y = match right.next() {
31
2
                None => return false,
32
1.13M
                Some(val) => val,
33
            };
34
35
1.13M
            if x != y {
36
5.50k
                return false;
37
1.12M
            }
38
        }
39
94.8k
    }
<unicase::unicode::Unicode<&str> as core::cmp::PartialEq<unicase::unicode::Unicode<pulldown_cmark::strings::CowStr>>>::eq
Line
Count
Source
19
6.41k
    fn eq(&self, other: &Unicode<S2>) -> bool {
20
6.41k
        let mut left = self.0.as_ref().chars().flat_map(lookup);
21
6.41k
        let mut right = other.0.as_ref().chars().flat_map(lookup);
22
23
        // inline Iterator::eq since not added until Rust 1.5
24
        loop {
25
8.65k
            let x = match left.next() {
26
770
                None => return right.next().is_none(),
27
7.88k
                Some(val) => val,
28
            };
29
30
7.88k
            let y = match right.next() {
31
0
                None => return false,
32
7.88k
                Some(val) => val,
33
            };
34
35
7.88k
            if x != y {
36
5.64k
                return false;
37
2.24k
            }
38
        }
39
6.41k
    }
Unexecuted instantiation: <unicase::unicode::Unicode<_> as core::cmp::PartialEq<unicase::unicode::Unicode<_>>>::eq
40
}
41
42
impl<S: AsRef<str>> Eq for Unicode<S> {}
43
44
impl<T: AsRef<str>> PartialOrd for Unicode<T> {
45
    #[inline]
46
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
47
0
        Some(self.cmp(other))
48
0
    }
49
}
50
51
impl<T: AsRef<str>> Ord for Unicode<T> {
52
    #[inline]
53
0
    fn cmp(&self, other: &Self) -> Ordering {
54
0
        let self_chars = self.0.as_ref().chars().flat_map(lookup);
55
0
        let other_chars = other.0.as_ref().chars().flat_map(lookup);
56
0
        self_chars.cmp(other_chars)
57
0
    }
58
}
59
60
impl<S: AsRef<str>> Hash for Unicode<S> {
61
    #[inline]
62
259k
    fn hash<H: Hasher>(&self, hasher: &mut H) {
63
259k
        let mut buf = [0; 4];
64
7.95M
        for c in self.0.as_ref().chars().flat_map(|c| lookup(c)) {
<unicase::unicode::Unicode<pulldown_cmark::strings::CowStr> as core::hash::Hash>::hash::<std::hash::random::DefaultHasher>::{closure#0}
Line
Count
Source
64
7.87M
        for c in self.0.as_ref().chars().flat_map(|c| lookup(c)) {
Unexecuted instantiation: <unicase::unicode::Unicode<_> as core::hash::Hash>::hash::<_>::{closure#0}
65
7.95M
            let len = char_to_utf8(c, &mut buf);
66
            // we can't use `write(buf)` because the ASCII variant uses
67
            // `write_u8`. The docs for Hash say that's technically different.
68
            // ¯\_(ツ)_/¯
69
8.46M
            for &b in &buf[..len] {
70
8.46M
                hasher.write_u8(b);
71
8.46M
            }
72
        }
73
        // prefix-freedom
74
259k
        hasher.write_u8(0xFF);
75
259k
    }
<unicase::unicode::Unicode<pulldown_cmark::strings::CowStr> as core::hash::Hash>::hash::<std::hash::random::DefaultHasher>
Line
Count
Source
62
259k
    fn hash<H: Hasher>(&self, hasher: &mut H) {
63
259k
        let mut buf = [0; 4];
64
7.95M
        for c in self.0.as_ref().chars().flat_map(|c| lookup(c)) {
65
7.95M
            let len = char_to_utf8(c, &mut buf);
66
            // we can't use `write(buf)` because the ASCII variant uses
67
            // `write_u8`. The docs for Hash say that's technically different.
68
            // ¯\_(ツ)_/¯
69
8.46M
            for &b in &buf[..len] {
70
8.46M
                hasher.write_u8(b);
71
8.46M
            }
72
        }
73
        // prefix-freedom
74
259k
        hasher.write_u8(0xFF);
75
259k
    }
Unexecuted instantiation: <unicase::unicode::Unicode<_> as core::hash::Hash>::hash::<_>
76
}
77
78
#[inline]
79
7.95M
fn char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize {
80
    const TAG_CONT: u8 = 0b1000_0000;
81
    const TAG_TWO_B: u8 = 0b1100_0000;
82
    const TAG_THREE_B: u8 = 0b1110_0000;
83
    const TAG_FOUR_B: u8 = 0b1111_0000;
84
85
7.95M
    let code = c as u32;
86
7.95M
    if code <= 0x7F {
87
7.56M
        dst[0] = code as u8;
88
7.56M
        1
89
389k
    } else if code <= 0x7FF {
90
286k
        dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
91
286k
        dst[1] = (code & 0x3F) as u8 | TAG_CONT;
92
286k
        2
93
103k
    } else if code <= 0xFFFF {
94
85.5k
        dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
95
85.5k
        dst[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
96
85.5k
        dst[2] = (code & 0x3F) as u8 | TAG_CONT;
97
85.5k
        3
98
    } else {
99
17.6k
        dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
100
17.6k
        dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
101
17.6k
        dst[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
102
17.6k
        dst[3] = (code & 0x3F) as u8 | TAG_CONT;
103
17.6k
        4
104
    }
105
7.95M
}
unicase::unicode::char_to_utf8
Line
Count
Source
79
7.95M
fn char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize {
80
    const TAG_CONT: u8 = 0b1000_0000;
81
    const TAG_TWO_B: u8 = 0b1100_0000;
82
    const TAG_THREE_B: u8 = 0b1110_0000;
83
    const TAG_FOUR_B: u8 = 0b1111_0000;
84
85
7.95M
    let code = c as u32;
86
7.95M
    if code <= 0x7F {
87
7.56M
        dst[0] = code as u8;
88
7.56M
        1
89
389k
    } else if code <= 0x7FF {
90
286k
        dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
91
286k
        dst[1] = (code & 0x3F) as u8 | TAG_CONT;
92
286k
        2
93
103k
    } else if code <= 0xFFFF {
94
85.5k
        dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
95
85.5k
        dst[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
96
85.5k
        dst[2] = (code & 0x3F) as u8 | TAG_CONT;
97
85.5k
        3
98
    } else {
99
17.6k
        dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
100
17.6k
        dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
101
17.6k
        dst[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
102
17.6k
        dst[3] = (code & 0x3F) as u8 | TAG_CONT;
103
17.6k
        4
104
    }
105
7.95M
}
Unexecuted instantiation: unicase::unicode::char_to_utf8
106
107
// internal mod so that the enum can be 'pub'
108
// thanks privacy-checker :___(
109
mod fold {
110
    #[derive(Clone, Copy)]
111
    pub enum Fold {
112
        Zero,
113
        One(char),
114
        Two(char, char),
115
        Three(char, char, char),
116
    }
117
118
    impl Iterator for Fold {
119
        type Item = char;
120
        #[inline]
121
20.3M
        fn next(&mut self) -> Option<char> {
122
20.3M
            match *self {
123
10.1M
                Fold::Zero => None,
124
10.1M
                Fold::One(one) => {
125
10.1M
                    *self = Fold::Zero;
126
10.1M
                    Some(one)
127
                }
128
91.5k
                Fold::Two(one, two) => {
129
91.5k
                    *self = Fold::One(two);
130
91.5k
                    Some(one)
131
                }
132
10.6k
                Fold::Three(one, two, three) => {
133
10.6k
                    *self = Fold::Two(one, two);
134
10.6k
                    Some(three)
135
                }
136
            }
137
20.3M
        }
<unicase::unicode::fold::Fold as core::iter::traits::iterator::Iterator>::next
Line
Count
Source
121
20.3M
        fn next(&mut self) -> Option<char> {
122
20.3M
            match *self {
123
10.1M
                Fold::Zero => None,
124
10.1M
                Fold::One(one) => {
125
10.1M
                    *self = Fold::Zero;
126
10.1M
                    Some(one)
127
                }
128
91.5k
                Fold::Two(one, two) => {
129
91.5k
                    *self = Fold::One(two);
130
91.5k
                    Some(one)
131
                }
132
10.6k
                Fold::Three(one, two, three) => {
133
10.6k
                    *self = Fold::Two(one, two);
134
10.6k
                    Some(three)
135
                }
136
            }
137
20.3M
        }
Unexecuted instantiation: <unicase::unicode::fold::Fold as core::iter::traits::iterator::Iterator>::next
138
139
        #[inline]
140
0
        fn size_hint(&self) -> (usize, Option<usize>) {
141
0
            match *self {
142
0
                Fold::Zero => (0, Some(0)),
143
0
                Fold::One(..) => (1, Some(1)),
144
0
                Fold::Two(..) => (2, Some(2)),
145
0
                Fold::Three(..) => (3, Some(3)),
146
            }
147
0
        }
148
    }
149
    impl From<(char,)> for Fold {
150
        #[inline]
151
0
        fn from((one,): (char,)) -> Fold {
152
0
            Fold::One(one)
153
0
        }
154
    }
155
156
    impl From<(char, char)> for Fold {
157
        #[inline]
158
0
        fn from((one, two): (char, char)) -> Fold {
159
0
            Fold::Two(one, two)
160
0
        }
161
    }
162
163
    impl From<(char, char, char)> for Fold {
164
        #[inline]
165
0
        fn from((one, two, three): (char, char, char)) -> Fold {
166
0
            Fold::Three(one, two, three)
167
0
        }
168
    }
169
}
170
171
#[cfg(test)]
172
mod tests {
173
    use super::Unicode;
174
175
    macro_rules! eq {
176
        ($left:expr, $right:expr) => {{
177
            assert_eq!(Unicode($left), Unicode($right));
178
        }};
179
    }
180
181
    #[test]
182
    fn test_ascii_folding() {
183
        eq!("foo bar", "FoO BAR");
184
    }
185
186
    #[test]
187
    fn test_simple_case_folding() {
188
        eq!("στιγμας", "στιγμασ");
189
    }
190
191
    #[test]
192
    fn test_full_case_folding() {
193
        eq!("flour", "flour");
194
        eq!("Maße", "MASSE");
195
        eq!("ᾲ στο διάολο", "ὰι στο διάολο");
196
    }
197
198
    #[test]
199
    fn test_to_folded_case() {
200
        assert_eq!(Unicode("Maße").to_folded_case(), "masse");
201
    }
202
203
    #[cfg(feature = "nightly")]
204
    #[bench]
205
    fn bench_ascii_folding(b: &mut ::test::Bencher) {
206
        b.bytes = b"foo bar".len() as u64;
207
        b.iter(|| eq!("foo bar", "FoO BAR"));
208
    }
209
210
    #[cfg(feature = "nightly")]
211
    #[bench]
212
    fn bench_simple_case_folding(b: &mut ::test::Bencher) {
213
        b.bytes = "στιγμας".len() as u64;
214
        b.iter(|| eq!("στιγμας", "στιγμασ"));
215
    }
216
}