/rust/registry/src/index.crates.io-6f17d22bba15001f/unicase-2.8.1/src/unicode/mod.rs
Line | Count | Source (jump to first uncovered line) |
1 | | use alloc::string::String; |
2 | | use core::cmp::Ordering; |
3 | | use core::hash::{Hash, Hasher}; |
4 | | |
5 | | use self::map::lookup; |
6 | | mod map; |
7 | | |
8 | | #[derive(Clone, Copy, Debug, Default)] |
9 | | pub struct Unicode<S>(pub S); |
10 | | |
11 | | impl<S: AsRef<str>> Unicode<S> { |
12 | 0 | pub fn to_folded_case(&self) -> String { |
13 | 0 | self.0.as_ref().chars().flat_map(lookup).collect() |
14 | 0 | } |
15 | | } |
16 | | |
17 | | impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<Unicode<S2>> for Unicode<S1> { |
18 | | #[inline] |
19 | 0 | fn eq(&self, other: &Unicode<S2>) -> bool { |
20 | 0 | let mut left = self.0.as_ref().chars().flat_map(lookup); |
21 | 0 | let mut right = other.0.as_ref().chars().flat_map(lookup); |
22 | | |
23 | | // inline Iterator::eq since not added until Rust 1.5 |
24 | | loop { |
25 | 0 | let x = match left.next() { |
26 | 0 | None => return right.next().is_none(), |
27 | 0 | Some(val) => val, |
28 | | }; |
29 | | |
30 | 0 | let y = match right.next() { |
31 | 0 | None => return false, |
32 | 0 | Some(val) => val, |
33 | 0 | }; |
34 | 0 |
|
35 | 0 | if x != y { |
36 | 0 | return false; |
37 | 0 | } |
38 | | } |
39 | 0 | } Unexecuted instantiation: <unicase::unicode::Unicode<&str> as core::cmp::PartialEq>::eq Unexecuted instantiation: <unicase::unicode::Unicode<_> as core::cmp::PartialEq<unicase::unicode::Unicode<_>>>::eq |
40 | | } |
41 | | |
42 | | impl<S: AsRef<str>> Eq for Unicode<S> {} |
43 | | |
44 | | impl<T: AsRef<str>> PartialOrd for Unicode<T> { |
45 | | #[inline] |
46 | 0 | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
47 | 0 | Some(self.cmp(other)) |
48 | 0 | } |
49 | | } |
50 | | |
51 | | impl<T: AsRef<str>> Ord for Unicode<T> { |
52 | | #[inline] |
53 | 0 | fn cmp(&self, other: &Self) -> Ordering { |
54 | 0 | let self_chars = self.0.as_ref().chars().flat_map(lookup); |
55 | 0 | let other_chars = other.0.as_ref().chars().flat_map(lookup); |
56 | 0 | self_chars.cmp(other_chars) |
57 | 0 | } |
58 | | } |
59 | | |
60 | | impl<S: AsRef<str>> Hash for Unicode<S> { |
61 | | #[inline] |
62 | 0 | fn hash<H: Hasher>(&self, hasher: &mut H) { |
63 | 0 | let mut buf = [0; 4]; |
64 | 0 | for c in self.0.as_ref().chars().flat_map(|c| lookup(c)) { Unexecuted instantiation: <unicase::unicode::Unicode<&str> as core::hash::Hash>::hash::<siphasher::sip128::SipHasher13>::{closure#0} Unexecuted instantiation: <unicase::unicode::Unicode<_> as core::hash::Hash>::hash::<_>::{closure#0} |
65 | 0 | let len = char_to_utf8(c, &mut buf); |
66 | | // we can't use `write(buf)` because the ASCII variant uses |
67 | | // `write_u8`. The docs for Hash say that's technically different. |
68 | | // ¯\_(ツ)_/¯ |
69 | 0 | for &b in &buf[..len] { |
70 | 0 | hasher.write_u8(b); |
71 | 0 | } |
72 | | } |
73 | | // prefix-freedom |
74 | 0 | hasher.write_u8(0xFF); |
75 | 0 | } Unexecuted instantiation: <unicase::unicode::Unicode<&str> as core::hash::Hash>::hash::<siphasher::sip128::SipHasher13> Unexecuted instantiation: <unicase::unicode::Unicode<_> as core::hash::Hash>::hash::<_> |
76 | | } |
77 | | |
78 | | #[inline] |
79 | 0 | fn char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize { |
80 | | const TAG_CONT: u8 = 0b1000_0000; |
81 | | const TAG_TWO_B: u8 = 0b1100_0000; |
82 | | const TAG_THREE_B: u8 = 0b1110_0000; |
83 | | const TAG_FOUR_B: u8 = 0b1111_0000; |
84 | | |
85 | 0 | let code = c as u32; |
86 | 0 | if code <= 0x7F { |
87 | 0 | dst[0] = code as u8; |
88 | 0 | 1 |
89 | 0 | } else if code <= 0x7FF { |
90 | 0 | dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; |
91 | 0 | dst[1] = (code & 0x3F) as u8 | TAG_CONT; |
92 | 0 | 2 |
93 | 0 | } else if code <= 0xFFFF { |
94 | 0 | dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; |
95 | 0 | dst[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
96 | 0 | dst[2] = (code & 0x3F) as u8 | TAG_CONT; |
97 | 0 | 3 |
98 | | } else { |
99 | 0 | dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; |
100 | 0 | dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; |
101 | 0 | dst[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
102 | 0 | dst[3] = (code & 0x3F) as u8 | TAG_CONT; |
103 | 0 | 4 |
104 | | } |
105 | 0 | } Unexecuted instantiation: unicase::unicode::char_to_utf8 Unexecuted instantiation: unicase::unicode::char_to_utf8 |
106 | | |
107 | | // internal mod so that the enum can be 'pub' |
108 | | // thanks privacy-checker :___( |
109 | | mod fold { |
110 | | #[derive(Clone, Copy)] |
111 | | pub enum Fold { |
112 | | Zero, |
113 | | One(char), |
114 | | Two(char, char), |
115 | | Three(char, char, char), |
116 | | } |
117 | | |
118 | | impl Iterator for Fold { |
119 | | type Item = char; |
120 | | #[inline] |
121 | 0 | fn next(&mut self) -> Option<char> { |
122 | 0 | match *self { |
123 | 0 | Fold::Zero => None, |
124 | 0 | Fold::One(one) => { |
125 | 0 | *self = Fold::Zero; |
126 | 0 | Some(one) |
127 | | } |
128 | 0 | Fold::Two(one, two) => { |
129 | 0 | *self = Fold::One(two); |
130 | 0 | Some(one) |
131 | | } |
132 | 0 | Fold::Three(one, two, three) => { |
133 | 0 | *self = Fold::Two(one, two); |
134 | 0 | Some(three) |
135 | | } |
136 | | } |
137 | 0 | } Unexecuted instantiation: <unicase::unicode::fold::Fold as core::iter::traits::iterator::Iterator>::next Unexecuted instantiation: <unicase::unicode::fold::Fold as core::iter::traits::iterator::Iterator>::next |
138 | | |
139 | | #[inline] |
140 | 0 | fn size_hint(&self) -> (usize, Option<usize>) { |
141 | 0 | match *self { |
142 | 0 | Fold::Zero => (0, Some(0)), |
143 | 0 | Fold::One(..) => (1, Some(1)), |
144 | 0 | Fold::Two(..) => (2, Some(2)), |
145 | 0 | Fold::Three(..) => (3, Some(3)), |
146 | | } |
147 | 0 | } |
148 | | } |
149 | | impl From<(char,)> for Fold { |
150 | | #[inline] |
151 | 0 | fn from((one,): (char,)) -> Fold { |
152 | 0 | Fold::One(one) |
153 | 0 | } |
154 | | } |
155 | | |
156 | | impl From<(char, char)> for Fold { |
157 | | #[inline] |
158 | 0 | fn from((one, two): (char, char)) -> Fold { |
159 | 0 | Fold::Two(one, two) |
160 | 0 | } |
161 | | } |
162 | | |
163 | | impl From<(char, char, char)> for Fold { |
164 | | #[inline] |
165 | 0 | fn from((one, two, three): (char, char, char)) -> Fold { |
166 | 0 | Fold::Three(one, two, three) |
167 | 0 | } |
168 | | } |
169 | | } |
170 | | |
171 | | #[cfg(test)] |
172 | | mod tests { |
173 | | use super::Unicode; |
174 | | |
175 | | macro_rules! eq { |
176 | | ($left:expr, $right:expr) => {{ |
177 | | assert_eq!(Unicode($left), Unicode($right)); |
178 | | }}; |
179 | | } |
180 | | |
181 | | #[test] |
182 | | fn test_ascii_folding() { |
183 | | eq!("foo bar", "FoO BAR"); |
184 | | } |
185 | | |
186 | | #[test] |
187 | | fn test_simple_case_folding() { |
188 | | eq!("στιγμας", "στιγμασ"); |
189 | | } |
190 | | |
191 | | #[test] |
192 | | fn test_full_case_folding() { |
193 | | eq!("flour", "flour"); |
194 | | eq!("Maße", "MASSE"); |
195 | | eq!("ᾲ στο διάολο", "ὰι στο διάολο"); |
196 | | } |
197 | | |
198 | | #[test] |
199 | | fn test_to_folded_case() { |
200 | | assert_eq!(Unicode("Maße").to_folded_case(), "masse"); |
201 | | } |
202 | | |
203 | | #[cfg(feature = "nightly")] |
204 | | #[bench] |
205 | | fn bench_ascii_folding(b: &mut ::test::Bencher) { |
206 | | b.bytes = b"foo bar".len() as u64; |
207 | | b.iter(|| eq!("foo bar", "FoO BAR")); |
208 | | } |
209 | | |
210 | | #[cfg(feature = "nightly")] |
211 | | #[bench] |
212 | | fn bench_simple_case_folding(b: &mut ::test::Bencher) { |
213 | | b.bytes = "στιγμας".len() as u64; |
214 | | b.iter(|| eq!("στιγμας", "στιγμασ")); |
215 | | } |
216 | | } |