Coverage Report

Created: 2025-02-21 07:11

/rust/registry/src/index.crates.io-6f17d22bba15001f/unicase-2.8.1/src/unicode/mod.rs
Line
Count
Source (jump to first uncovered line)
1
use alloc::string::String;
2
use core::cmp::Ordering;
3
use core::hash::{Hash, Hasher};
4
5
use self::map::lookup;
6
mod map;
7
8
#[derive(Clone, Copy, Debug, Default)]
9
pub struct Unicode<S>(pub S);
10
11
impl<S: AsRef<str>> Unicode<S> {
12
0
    pub fn to_folded_case(&self) -> String {
13
0
        self.0.as_ref().chars().flat_map(lookup).collect()
14
0
    }
15
}
16
17
impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<Unicode<S2>> for Unicode<S1> {
18
    #[inline]
19
0
    fn eq(&self, other: &Unicode<S2>) -> bool {
20
0
        let mut left = self.0.as_ref().chars().flat_map(lookup);
21
0
        let mut right = other.0.as_ref().chars().flat_map(lookup);
22
23
        // inline Iterator::eq since not added until Rust 1.5
24
        loop {
25
0
            let x = match left.next() {
26
0
                None => return right.next().is_none(),
27
0
                Some(val) => val,
28
            };
29
30
0
            let y = match right.next() {
31
0
                None => return false,
32
0
                Some(val) => val,
33
0
            };
34
0
35
0
            if x != y {
36
0
                return false;
37
0
            }
38
        }
39
0
    }
Unexecuted instantiation: <unicase::unicode::Unicode<&str> as core::cmp::PartialEq>::eq
Unexecuted instantiation: <unicase::unicode::Unicode<_> as core::cmp::PartialEq<unicase::unicode::Unicode<_>>>::eq
40
}
41
42
impl<S: AsRef<str>> Eq for Unicode<S> {}
43
44
impl<T: AsRef<str>> PartialOrd for Unicode<T> {
45
    #[inline]
46
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
47
0
        Some(self.cmp(other))
48
0
    }
49
}
50
51
impl<T: AsRef<str>> Ord for Unicode<T> {
52
    #[inline]
53
0
    fn cmp(&self, other: &Self) -> Ordering {
54
0
        let self_chars = self.0.as_ref().chars().flat_map(lookup);
55
0
        let other_chars = other.0.as_ref().chars().flat_map(lookup);
56
0
        self_chars.cmp(other_chars)
57
0
    }
58
}
59
60
impl<S: AsRef<str>> Hash for Unicode<S> {
61
    #[inline]
62
0
    fn hash<H: Hasher>(&self, hasher: &mut H) {
63
0
        let mut buf = [0; 4];
64
0
        for c in self.0.as_ref().chars().flat_map(|c| lookup(c)) {
Unexecuted instantiation: <unicase::unicode::Unicode<&str> as core::hash::Hash>::hash::<siphasher::sip128::SipHasher13>::{closure#0}
Unexecuted instantiation: <unicase::unicode::Unicode<_> as core::hash::Hash>::hash::<_>::{closure#0}
65
0
            let len = char_to_utf8(c, &mut buf);
66
            // we can't use `write(buf)` because the ASCII variant uses
67
            // `write_u8`. The docs for Hash say that's technically different.
68
            // ¯\_(ツ)_/¯
69
0
            for &b in &buf[..len] {
70
0
                hasher.write_u8(b);
71
0
            }
72
        }
73
        // prefix-freedom
74
0
        hasher.write_u8(0xFF);
75
0
    }
Unexecuted instantiation: <unicase::unicode::Unicode<&str> as core::hash::Hash>::hash::<siphasher::sip128::SipHasher13>
Unexecuted instantiation: <unicase::unicode::Unicode<_> as core::hash::Hash>::hash::<_>
76
}
77
78
#[inline]
79
0
fn char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize {
80
    const TAG_CONT: u8 = 0b1000_0000;
81
    const TAG_TWO_B: u8 = 0b1100_0000;
82
    const TAG_THREE_B: u8 = 0b1110_0000;
83
    const TAG_FOUR_B: u8 = 0b1111_0000;
84
85
0
    let code = c as u32;
86
0
    if code <= 0x7F {
87
0
        dst[0] = code as u8;
88
0
        1
89
0
    } else if code <= 0x7FF {
90
0
        dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
91
0
        dst[1] = (code & 0x3F) as u8 | TAG_CONT;
92
0
        2
93
0
    } else if code <= 0xFFFF {
94
0
        dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
95
0
        dst[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
96
0
        dst[2] = (code & 0x3F) as u8 | TAG_CONT;
97
0
        3
98
    } else {
99
0
        dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
100
0
        dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
101
0
        dst[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
102
0
        dst[3] = (code & 0x3F) as u8 | TAG_CONT;
103
0
        4
104
    }
105
0
}
Unexecuted instantiation: unicase::unicode::char_to_utf8
Unexecuted instantiation: unicase::unicode::char_to_utf8
106
107
// internal mod so that the enum can be 'pub'
108
// thanks privacy-checker :___(
109
mod fold {
110
    #[derive(Clone, Copy)]
111
    pub enum Fold {
112
        Zero,
113
        One(char),
114
        Two(char, char),
115
        Three(char, char, char),
116
    }
117
118
    impl Iterator for Fold {
119
        type Item = char;
120
        #[inline]
121
0
        fn next(&mut self) -> Option<char> {
122
0
            match *self {
123
0
                Fold::Zero => None,
124
0
                Fold::One(one) => {
125
0
                    *self = Fold::Zero;
126
0
                    Some(one)
127
                }
128
0
                Fold::Two(one, two) => {
129
0
                    *self = Fold::One(two);
130
0
                    Some(one)
131
                }
132
0
                Fold::Three(one, two, three) => {
133
0
                    *self = Fold::Two(one, two);
134
0
                    Some(three)
135
                }
136
            }
137
0
        }
Unexecuted instantiation: <unicase::unicode::fold::Fold as core::iter::traits::iterator::Iterator>::next
Unexecuted instantiation: <unicase::unicode::fold::Fold as core::iter::traits::iterator::Iterator>::next
138
139
        #[inline]
140
0
        fn size_hint(&self) -> (usize, Option<usize>) {
141
0
            match *self {
142
0
                Fold::Zero => (0, Some(0)),
143
0
                Fold::One(..) => (1, Some(1)),
144
0
                Fold::Two(..) => (2, Some(2)),
145
0
                Fold::Three(..) => (3, Some(3)),
146
            }
147
0
        }
148
    }
149
    impl From<(char,)> for Fold {
150
        #[inline]
151
0
        fn from((one,): (char,)) -> Fold {
152
0
            Fold::One(one)
153
0
        }
154
    }
155
156
    impl From<(char, char)> for Fold {
157
        #[inline]
158
0
        fn from((one, two): (char, char)) -> Fold {
159
0
            Fold::Two(one, two)
160
0
        }
161
    }
162
163
    impl From<(char, char, char)> for Fold {
164
        #[inline]
165
0
        fn from((one, two, three): (char, char, char)) -> Fold {
166
0
            Fold::Three(one, two, three)
167
0
        }
168
    }
169
}
170
171
#[cfg(test)]
172
mod tests {
173
    use super::Unicode;
174
175
    macro_rules! eq {
176
        ($left:expr, $right:expr) => {{
177
            assert_eq!(Unicode($left), Unicode($right));
178
        }};
179
    }
180
181
    #[test]
182
    fn test_ascii_folding() {
183
        eq!("foo bar", "FoO BAR");
184
    }
185
186
    #[test]
187
    fn test_simple_case_folding() {
188
        eq!("στιγμας", "στιγμασ");
189
    }
190
191
    #[test]
192
    fn test_full_case_folding() {
193
        eq!("flour", "flour");
194
        eq!("Maße", "MASSE");
195
        eq!("ᾲ στο διάολο", "ὰι στο διάολο");
196
    }
197
198
    #[test]
199
    fn test_to_folded_case() {
200
        assert_eq!(Unicode("Maße").to_folded_case(), "masse");
201
    }
202
203
    #[cfg(feature = "nightly")]
204
    #[bench]
205
    fn bench_ascii_folding(b: &mut ::test::Bencher) {
206
        b.bytes = b"foo bar".len() as u64;
207
        b.iter(|| eq!("foo bar", "FoO BAR"));
208
    }
209
210
    #[cfg(feature = "nightly")]
211
    #[bench]
212
    fn bench_simple_case_folding(b: &mut ::test::Bencher) {
213
        b.bytes = "στιγμας".len() as u64;
214
        b.iter(|| eq!("στιγμας", "στιγμασ"));
215
    }
216
}