Coverage Report

Created: 2026-04-29 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/icu_properties-2.2.0/src/trievalue.rs
Line
Count
Source
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
use crate::bidi::BidiMirroringGlyph;
6
use crate::props::{
7
    BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup,
8
    GraphemeClusterBreak, HangulSyllableType, IndicConjunctBreak, IndicSyllabicCategory,
9
    JoiningGroup, JoiningType, LineBreak, NumericType, Script, SentenceBreak, VerticalOrientation,
10
    WordBreak,
11
};
12
use crate::script::ScriptWithExt;
13
use core::convert::TryInto;
14
use core::num::TryFromIntError;
15
use zerovec::ule::{AsULE, RawBytesULE};
16
17
use icu_collections::codepointtrie::TrieValue;
18
19
use core::convert::TryFrom;
20
21
impl TrieValue for CanonicalCombiningClass {
22
    type TryFromU32Error = TryFromIntError;
23
24
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
25
0
        u8::try_from(i).map(Self)
26
0
    }
27
28
0
    fn to_u32(self) -> u32 {
29
0
        u32::from(self.0)
30
0
    }
31
}
32
33
impl TrieValue for NumericType {
34
    type TryFromU32Error = TryFromIntError;
35
36
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
37
0
        u8::try_from(i).map(Self)
38
0
    }
39
40
0
    fn to_u32(self) -> u32 {
41
0
        u32::from(self.0)
42
0
    }
43
}
44
45
impl TrieValue for BidiClass {
46
    type TryFromU32Error = TryFromIntError;
47
48
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
49
0
        u8::try_from(i).map(Self)
50
0
    }
51
52
0
    fn to_u32(self) -> u32 {
53
0
        u32::from(self.0)
54
0
    }
55
}
56
57
impl TrieValue for GeneralCategory {
58
    type TryFromU32Error = &'static str;
59
60
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
61
        // If the u32 is out of range, fall back to u8::MAX, which is out of range of the GeneralCategory enum.
62
0
        GeneralCategory::new_from_u8(i.try_into().unwrap_or(u8::MAX))
63
0
            .ok_or("Cannot parse GeneralCategory from integer")
64
0
    }
65
66
0
    fn to_u32(self) -> u32 {
67
0
        u32::from(self as u8)
68
0
    }
69
}
70
71
impl TrieValue for Script {
72
    type TryFromU32Error = TryFromIntError;
73
74
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
75
0
        u16::try_from(i).map(Script)
76
0
    }
77
78
0
    fn to_u32(self) -> u32 {
79
0
        u32::from(self.0)
80
0
    }
81
}
82
83
impl TrieValue for HangulSyllableType {
84
    type TryFromU32Error = TryFromIntError;
85
86
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
87
0
        u8::try_from(i).map(Self)
88
0
    }
89
90
0
    fn to_u32(self) -> u32 {
91
0
        u32::from(self.0)
92
0
    }
93
}
94
95
impl TrieValue for ScriptWithExt {
96
    type TryFromU32Error = TryFromIntError;
97
98
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
99
0
        u16::try_from(i).map(Self)
100
0
    }
101
102
0
    fn to_u32(self) -> u32 {
103
0
        u32::from(self.0)
104
0
    }
105
}
106
107
impl TrieValue for EastAsianWidth {
108
    type TryFromU32Error = TryFromIntError;
109
110
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
111
0
        u8::try_from(i).map(Self)
112
0
    }
113
114
0
    fn to_u32(self) -> u32 {
115
0
        u32::from(self.0)
116
0
    }
117
}
118
119
impl TrieValue for LineBreak {
120
    type TryFromU32Error = TryFromIntError;
121
122
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
123
0
        u8::try_from(i).map(Self)
124
0
    }
125
126
0
    fn to_u32(self) -> u32 {
127
0
        u32::from(self.0)
128
0
    }
129
}
130
131
impl TrieValue for GraphemeClusterBreak {
132
    type TryFromU32Error = TryFromIntError;
133
134
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
135
0
        u8::try_from(i).map(Self)
136
0
    }
137
138
0
    fn to_u32(self) -> u32 {
139
0
        u32::from(self.0)
140
0
    }
141
}
142
143
impl TrieValue for WordBreak {
144
    type TryFromU32Error = TryFromIntError;
145
146
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
147
0
        u8::try_from(i).map(Self)
148
0
    }
149
150
0
    fn to_u32(self) -> u32 {
151
0
        u32::from(self.0)
152
0
    }
153
}
154
155
impl TrieValue for SentenceBreak {
156
    type TryFromU32Error = TryFromIntError;
157
158
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
159
0
        u8::try_from(i).map(Self)
160
0
    }
161
162
0
    fn to_u32(self) -> u32 {
163
0
        u32::from(self.0)
164
0
    }
165
}
166
167
impl TrieValue for IndicConjunctBreak {
168
    type TryFromU32Error = TryFromIntError;
169
170
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
171
0
        u8::try_from(i).map(Self)
172
0
    }
173
174
0
    fn to_u32(self) -> u32 {
175
0
        u32::from(self.0)
176
0
    }
177
}
178
179
impl TrieValue for IndicSyllabicCategory {
180
    type TryFromU32Error = TryFromIntError;
181
182
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
183
0
        u8::try_from(i).map(Self)
184
0
    }
185
186
0
    fn to_u32(self) -> u32 {
187
0
        u32::from(self.0)
188
0
    }
189
}
190
191
impl TrieValue for VerticalOrientation {
192
    type TryFromU32Error = TryFromIntError;
193
194
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
195
0
        u8::try_from(i).map(Self)
196
0
    }
197
198
0
    fn to_u32(self) -> u32 {
199
0
        u32::from(self.0)
200
0
    }
201
}
202
203
// GCG is not used inside tries, but it is used in the name lookup type, and we want
204
// to squeeze it into a u16 for storage. Its named mask values are specced so we can
205
// do this in code.
206
//
207
// This is done by:
208
// - Single-value masks are translated to their corresponding GeneralCategory values
209
// - we know all of the multi-value masks and we give them special values
210
// - Anything else goes to 0xFF00, though this code path shouldn't be hit unless working with malformed icuexportdata
211
//
212
// In the reverse direction, unknown values go to the empty mask, but this codepath should not be hit except
213
// with malformed ICU4X generated data.
214
impl AsULE for GeneralCategoryGroup {
215
    type ULE = RawBytesULE<2>;
216
0
    fn to_unaligned(self) -> Self::ULE {
217
0
        let value = gcg_to_packed_u16(self);
218
0
        value.to_unaligned()
219
0
    }
220
0
    fn from_unaligned(ule: Self::ULE) -> Self {
221
0
        let value = ule.as_unsigned_int();
222
0
        packed_u16_to_gcg(value)
223
0
    }
224
}
225
226
0
fn packed_u16_to_gcg(value: u16) -> GeneralCategoryGroup {
227
0
    match value {
228
0
        0xFFFF => GeneralCategoryGroup::CasedLetter,
229
0
        0xFFFE => GeneralCategoryGroup::Letter,
230
0
        0xFFFD => GeneralCategoryGroup::Mark,
231
0
        0xFFFC => GeneralCategoryGroup::Number,
232
0
        0xFFFB => GeneralCategoryGroup::Separator,
233
0
        0xFFFA => GeneralCategoryGroup::Other,
234
0
        0xFFF9 => GeneralCategoryGroup::Punctuation,
235
0
        0xFFF8 => GeneralCategoryGroup::Symbol,
236
0
        v if v < 32 => GeneralCategory::new_from_u8(v as u8)
237
0
            .map(|gc| gc.into())
238
0
            .unwrap_or(GeneralCategoryGroup(0)),
239
        // unknown values produce an empty mask
240
0
        _ => GeneralCategoryGroup(0),
241
    }
242
0
}
243
244
0
fn gcg_to_packed_u16(gcg: GeneralCategoryGroup) -> u16 {
245
    // if it's a single property, translate to that property
246
0
    if gcg.0.is_power_of_two() {
247
        // inverse operation of a bitshift
248
0
        gcg.0.trailing_zeros() as u16
249
    } else {
250
0
        match gcg {
251
0
            GeneralCategoryGroup::CasedLetter => 0xFFFF,
252
0
            GeneralCategoryGroup::Letter => 0xFFFE,
253
0
            GeneralCategoryGroup::Mark => 0xFFFD,
254
0
            GeneralCategoryGroup::Number => 0xFFFC,
255
0
            GeneralCategoryGroup::Separator => 0xFFFB,
256
0
            GeneralCategoryGroup::Other => 0xFFFA,
257
0
            GeneralCategoryGroup::Punctuation => 0xFFF9,
258
0
            GeneralCategoryGroup::Symbol => 0xFFF8,
259
0
            _ => 0xFF00, // random sentinel value
260
        }
261
    }
262
0
}
263
264
impl TrieValue for GeneralCategoryGroup {
265
    type TryFromU32Error = TryFromIntError;
266
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
267
        // Even though we're dealing with u32s here, TrieValue is about converting
268
        // trie storage types to the actual type. This type will always be a packed u16
269
        // in our case since the names map upcasts from u16
270
0
        u16::try_from(i).map(packed_u16_to_gcg)
271
0
    }
272
273
0
    fn to_u32(self) -> u32 {
274
0
        u32::from(gcg_to_packed_u16(self))
275
0
    }
276
}
277
278
impl TrieValue for BidiMirroringGlyph {
279
    type TryFromU32Error = u32;
280
281
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
282
0
        let code_point = i & 0x1FFFFF;
283
0
        let mirroring_glyph = if code_point == 0 {
284
0
            None
285
        } else {
286
0
            Some(char::try_from_u32(code_point).map_err(|_| i)?)
287
        };
288
0
        let mirrored = ((i >> 21) & 0x1) == 1;
289
0
        let paired_bracket_type = {
290
0
            let value = ((i >> 22) & 0x3) as u8;
291
0
            match value {
292
0
                0 => crate::bidi::BidiPairedBracketType::None,
293
0
                1 => crate::bidi::BidiPairedBracketType::Open,
294
0
                2 => crate::bidi::BidiPairedBracketType::Close,
295
0
                _ => return Err(i),
296
            }
297
        };
298
0
        Ok(Self {
299
0
            mirrored,
300
0
            mirroring_glyph,
301
0
            paired_bracket_type,
302
0
        })
303
0
    }
304
305
0
    fn to_u32(self) -> u32 {
306
0
        self.mirroring_glyph.unwrap_or_default() as u32
307
0
            | ((self.mirrored as u32) << 21)
308
0
            | (match self.paired_bracket_type {
309
0
                crate::bidi::BidiPairedBracketType::None => 0,
310
0
                crate::bidi::BidiPairedBracketType::Open => 1,
311
0
                crate::bidi::BidiPairedBracketType::Close => 2,
312
            } << 22)
313
0
    }
314
}
315
316
impl TrieValue for JoiningType {
317
    type TryFromU32Error = TryFromIntError;
318
319
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
320
0
        u8::try_from(i).map(Self)
321
0
    }
322
323
0
    fn to_u32(self) -> u32 {
324
0
        u32::from(self.0)
325
0
    }
326
}
327
328
impl TrieValue for JoiningGroup {
329
    type TryFromU32Error = TryFromIntError;
330
331
0
    fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
332
0
        u8::try_from(i).map(Self)
333
0
    }
334
0
    fn to_u32(self) -> u32 {
335
0
        u32::from(self.0)
336
0
    }
337
}