/rust/registry/src/index.crates.io-1949cf8c6b5b557f/icu_properties-2.0.1/src/trievalue.rs
Line | Count | Source |
1 | | // This file is part of ICU4X. For terms of use, please see the file |
2 | | // called LICENSE at the top level of the ICU4X source tree |
3 | | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | | |
5 | | use crate::bidi::BidiMirroringGlyph; |
6 | | use crate::props::{ |
7 | | BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup, |
8 | | GraphemeClusterBreak, HangulSyllableType, IndicConjunctBreak, IndicSyllabicCategory, |
9 | | JoiningType, LineBreak, Script, SentenceBreak, VerticalOrientation, WordBreak, |
10 | | }; |
11 | | use crate::script::ScriptWithExt; |
12 | | use core::convert::TryInto; |
13 | | use core::num::TryFromIntError; |
14 | | use zerovec::ule::{AsULE, RawBytesULE}; |
15 | | |
16 | | use icu_collections::codepointtrie::TrieValue; |
17 | | |
18 | | use core::convert::TryFrom; |
19 | | |
20 | | impl TrieValue for CanonicalCombiningClass { |
21 | | type TryFromU32Error = TryFromIntError; |
22 | | |
23 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
24 | 0 | u8::try_from(i).map(Self) |
25 | 0 | } |
26 | | |
27 | 0 | fn to_u32(self) -> u32 { |
28 | 0 | u32::from(self.0) |
29 | 0 | } |
30 | | } |
31 | | |
32 | | impl TrieValue for BidiClass { |
33 | | type TryFromU32Error = TryFromIntError; |
34 | | |
35 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
36 | 0 | u8::try_from(i).map(Self) |
37 | 0 | } |
38 | | |
39 | 0 | fn to_u32(self) -> u32 { |
40 | 0 | u32::from(self.0) |
41 | 0 | } |
42 | | } |
43 | | |
44 | | impl TrieValue for GeneralCategory { |
45 | | type TryFromU32Error = &'static str; |
46 | | |
47 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
48 | | // If the u32 is out of range, fall back to u8::MAX, which is out of range of the GeneralCategory enum. |
49 | 0 | GeneralCategory::new_from_u8(i.try_into().unwrap_or(u8::MAX)) |
50 | 0 | .ok_or("Cannot parse GeneralCategory from integer") |
51 | 0 | } |
52 | | |
53 | 0 | fn to_u32(self) -> u32 { |
54 | 0 | u32::from(self as u8) |
55 | 0 | } |
56 | | } |
57 | | |
58 | | impl TrieValue for Script { |
59 | | type TryFromU32Error = TryFromIntError; |
60 | | |
61 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
62 | 0 | u16::try_from(i).map(Script) |
63 | 0 | } |
64 | | |
65 | 0 | fn to_u32(self) -> u32 { |
66 | 0 | u32::from(self.0) |
67 | 0 | } |
68 | | } |
69 | | |
70 | | impl TrieValue for HangulSyllableType { |
71 | | type TryFromU32Error = TryFromIntError; |
72 | | |
73 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
74 | 0 | u8::try_from(i).map(Self) |
75 | 0 | } |
76 | | |
77 | 0 | fn to_u32(self) -> u32 { |
78 | 0 | u32::from(self.0) |
79 | 0 | } |
80 | | } |
81 | | |
82 | | impl TrieValue for ScriptWithExt { |
83 | | type TryFromU32Error = TryFromIntError; |
84 | | |
85 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
86 | 0 | u16::try_from(i).map(Self) |
87 | 0 | } |
88 | | |
89 | 0 | fn to_u32(self) -> u32 { |
90 | 0 | u32::from(self.0) |
91 | 0 | } |
92 | | } |
93 | | |
94 | | impl TrieValue for EastAsianWidth { |
95 | | type TryFromU32Error = TryFromIntError; |
96 | | |
97 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
98 | 0 | u8::try_from(i).map(Self) |
99 | 0 | } |
100 | | |
101 | 0 | fn to_u32(self) -> u32 { |
102 | 0 | u32::from(self.0) |
103 | 0 | } |
104 | | } |
105 | | |
106 | | impl TrieValue for LineBreak { |
107 | | type TryFromU32Error = TryFromIntError; |
108 | | |
109 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
110 | 0 | u8::try_from(i).map(Self) |
111 | 0 | } |
112 | | |
113 | 0 | fn to_u32(self) -> u32 { |
114 | 0 | u32::from(self.0) |
115 | 0 | } |
116 | | } |
117 | | |
118 | | impl TrieValue for GraphemeClusterBreak { |
119 | | type TryFromU32Error = TryFromIntError; |
120 | | |
121 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
122 | 0 | u8::try_from(i).map(Self) |
123 | 0 | } |
124 | | |
125 | 0 | fn to_u32(self) -> u32 { |
126 | 0 | u32::from(self.0) |
127 | 0 | } |
128 | | } |
129 | | |
130 | | impl TrieValue for WordBreak { |
131 | | type TryFromU32Error = TryFromIntError; |
132 | | |
133 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
134 | 0 | u8::try_from(i).map(Self) |
135 | 0 | } |
136 | | |
137 | 0 | fn to_u32(self) -> u32 { |
138 | 0 | u32::from(self.0) |
139 | 0 | } |
140 | | } |
141 | | |
142 | | impl TrieValue for SentenceBreak { |
143 | | type TryFromU32Error = TryFromIntError; |
144 | | |
145 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
146 | 0 | u8::try_from(i).map(Self) |
147 | 0 | } |
148 | | |
149 | 0 | fn to_u32(self) -> u32 { |
150 | 0 | u32::from(self.0) |
151 | 0 | } |
152 | | } |
153 | | |
154 | | impl TrieValue for IndicConjunctBreak { |
155 | | type TryFromU32Error = TryFromIntError; |
156 | | |
157 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
158 | 0 | u8::try_from(i).map(Self) |
159 | 0 | } |
160 | | |
161 | 0 | fn to_u32(self) -> u32 { |
162 | 0 | u32::from(self.0) |
163 | 0 | } |
164 | | } |
165 | | |
166 | | impl TrieValue for IndicSyllabicCategory { |
167 | | type TryFromU32Error = TryFromIntError; |
168 | | |
169 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
170 | 0 | u8::try_from(i).map(Self) |
171 | 0 | } |
172 | | |
173 | 0 | fn to_u32(self) -> u32 { |
174 | 0 | u32::from(self.0) |
175 | 0 | } |
176 | | } |
177 | | |
178 | | impl TrieValue for VerticalOrientation { |
179 | | type TryFromU32Error = TryFromIntError; |
180 | | |
181 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
182 | 0 | u8::try_from(i).map(Self) |
183 | 0 | } |
184 | | |
185 | 0 | fn to_u32(self) -> u32 { |
186 | 0 | u32::from(self.0) |
187 | 0 | } |
188 | | } |
189 | | |
190 | | // GCG is not used inside tries, but it is used in the name lookup type, and we want |
191 | | // to squeeze it into a u16 for storage. Its named mask values are specced so we can |
192 | | // do this in code. |
193 | | // |
194 | | // This is done by: |
195 | | // - Single-value masks are translated to their corresponding GeneralCategory values |
196 | | // - we know all of the multi-value masks and we give them special values |
197 | | // - Anything else goes to 0xFF00, though this code path shouldn't be hit unless working with malformed icuexportdata |
198 | | // |
199 | | // In the reverse direction, unknown values go to the empty mask, but this codepath should not be hit except |
200 | | // with malformed ICU4X generated data. |
201 | | impl AsULE for GeneralCategoryGroup { |
202 | | type ULE = RawBytesULE<2>; |
203 | 0 | fn to_unaligned(self) -> Self::ULE { |
204 | 0 | let value = gcg_to_packed_u16(self); |
205 | 0 | value.to_unaligned() |
206 | 0 | } |
207 | 0 | fn from_unaligned(ule: Self::ULE) -> Self { |
208 | 0 | let value = ule.as_unsigned_int(); |
209 | 0 | packed_u16_to_gcg(value) |
210 | 0 | } |
211 | | } |
212 | | |
213 | 0 | fn packed_u16_to_gcg(value: u16) -> GeneralCategoryGroup { |
214 | 0 | match value { |
215 | 0 | 0xFFFF => GeneralCategoryGroup::CasedLetter, |
216 | 0 | 0xFFFE => GeneralCategoryGroup::Letter, |
217 | 0 | 0xFFFD => GeneralCategoryGroup::Mark, |
218 | 0 | 0xFFFC => GeneralCategoryGroup::Number, |
219 | 0 | 0xFFFB => GeneralCategoryGroup::Separator, |
220 | 0 | 0xFFFA => GeneralCategoryGroup::Other, |
221 | 0 | 0xFFF9 => GeneralCategoryGroup::Punctuation, |
222 | 0 | 0xFFF8 => GeneralCategoryGroup::Symbol, |
223 | 0 | v if v < 32 => GeneralCategory::new_from_u8(v as u8) |
224 | 0 | .map(|gc| gc.into()) |
225 | 0 | .unwrap_or(GeneralCategoryGroup(0)), |
226 | | // unknown values produce an empty mask |
227 | 0 | _ => GeneralCategoryGroup(0), |
228 | | } |
229 | 0 | } |
230 | | |
231 | 0 | fn gcg_to_packed_u16(gcg: GeneralCategoryGroup) -> u16 { |
232 | | // if it's a single property, translate to that property |
233 | 0 | if gcg.0.is_power_of_two() { |
234 | | // inverse operation of a bitshift |
235 | 0 | gcg.0.trailing_zeros() as u16 |
236 | | } else { |
237 | 0 | match gcg { |
238 | 0 | GeneralCategoryGroup::CasedLetter => 0xFFFF, |
239 | 0 | GeneralCategoryGroup::Letter => 0xFFFE, |
240 | 0 | GeneralCategoryGroup::Mark => 0xFFFD, |
241 | 0 | GeneralCategoryGroup::Number => 0xFFFC, |
242 | 0 | GeneralCategoryGroup::Separator => 0xFFFB, |
243 | 0 | GeneralCategoryGroup::Other => 0xFFFA, |
244 | 0 | GeneralCategoryGroup::Punctuation => 0xFFF9, |
245 | 0 | GeneralCategoryGroup::Symbol => 0xFFF8, |
246 | 0 | _ => 0xFF00, // random sentinel value |
247 | | } |
248 | | } |
249 | 0 | } |
250 | | |
251 | | impl TrieValue for GeneralCategoryGroup { |
252 | | type TryFromU32Error = TryFromIntError; |
253 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
254 | | // Even though we're dealing with u32s here, TrieValue is about converting |
255 | | // trie storage types to the actual type. This type will always be a packed u16 |
256 | | // in our case since the names map upcasts from u16 |
257 | 0 | u16::try_from(i).map(packed_u16_to_gcg) |
258 | 0 | } |
259 | | |
260 | 0 | fn to_u32(self) -> u32 { |
261 | 0 | u32::from(gcg_to_packed_u16(self)) |
262 | 0 | } |
263 | | } |
264 | | |
265 | | impl TrieValue for BidiMirroringGlyph { |
266 | | type TryFromU32Error = u32; |
267 | | |
268 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
269 | 0 | let code_point = i & 0x1FFFFF; |
270 | 0 | let mirroring_glyph = if code_point == 0 { |
271 | 0 | None |
272 | | } else { |
273 | 0 | Some(char::try_from_u32(code_point).map_err(|_| i)?) |
274 | | }; |
275 | 0 | let mirrored = ((i >> 21) & 0x1) == 1; |
276 | 0 | let paired_bracket_type = { |
277 | 0 | let value = ((i >> 22) & 0x3) as u8; |
278 | 0 | match value { |
279 | 0 | 0 => crate::bidi::BidiPairedBracketType::None, |
280 | 0 | 1 => crate::bidi::BidiPairedBracketType::Open, |
281 | 0 | 2 => crate::bidi::BidiPairedBracketType::Close, |
282 | 0 | _ => return Err(i), |
283 | | } |
284 | | }; |
285 | 0 | Ok(Self { |
286 | 0 | mirrored, |
287 | 0 | mirroring_glyph, |
288 | 0 | paired_bracket_type, |
289 | 0 | }) |
290 | 0 | } |
291 | | |
292 | 0 | fn to_u32(self) -> u32 { |
293 | 0 | self.mirroring_glyph.unwrap_or_default() as u32 |
294 | 0 | | ((self.mirrored as u32) << 21) |
295 | 0 | | (match self.paired_bracket_type { |
296 | 0 | crate::bidi::BidiPairedBracketType::None => 0, |
297 | 0 | crate::bidi::BidiPairedBracketType::Open => 1, |
298 | 0 | crate::bidi::BidiPairedBracketType::Close => 2, |
299 | | } << 22) |
300 | 0 | } |
301 | | } |
302 | | |
303 | | impl TrieValue for JoiningType { |
304 | | type TryFromU32Error = TryFromIntError; |
305 | | |
306 | 0 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
307 | 0 | u8::try_from(i).map(Self) |
308 | 0 | } |
309 | | |
310 | 0 | fn to_u32(self) -> u32 { |
311 | 0 | u32::from(self.0) |
312 | 0 | } |
313 | | } |