/rust/registry/src/index.crates.io-1949cf8c6b5b557f/icu_properties-2.1.2/src/provider.rs
Line | Count | Source |
1 | | // This file is part of ICU4X. For terms of use, please see the file |
2 | | // called LICENSE at the top level of the ICU4X source tree |
3 | | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | | |
5 | | // Provider structs must be stable |
6 | | #![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)] |
7 | | |
8 | | //! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component. |
9 | | //! |
10 | | //! <div class="stab unstable"> |
11 | | //! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, |
12 | | //! including in SemVer minor releases. While the serde representation of data structs is guaranteed |
13 | | //! to be stable, their Rust representation might not be. Use with caution. |
14 | | //! </div> |
15 | | //! |
16 | | //! Read more about data providers: [`icu_provider`] |
17 | | |
18 | | pub mod names; |
19 | | |
20 | | #[cfg(feature = "alloc")] |
21 | | pub use names::{ |
22 | | PropertyNameLongCanonicalCombiningClassV1, PropertyNameShortCanonicalCombiningClassV1, |
23 | | }; |
24 | | |
25 | | pub use names::{ |
26 | | PropertyNameLongBidiClassV1, PropertyNameLongEastAsianWidthV1, |
27 | | PropertyNameLongGeneralCategoryV1, PropertyNameLongGraphemeClusterBreakV1, |
28 | | PropertyNameLongHangulSyllableTypeV1, PropertyNameLongIndicConjunctBreakV1, |
29 | | PropertyNameLongIndicSyllabicCategoryV1, PropertyNameLongJoiningTypeV1, |
30 | | PropertyNameLongLineBreakV1, PropertyNameLongScriptV1, PropertyNameLongSentenceBreakV1, |
31 | | PropertyNameLongVerticalOrientationV1, PropertyNameLongWordBreakV1, |
32 | | PropertyNameParseBidiClassV1, PropertyNameParseCanonicalCombiningClassV1, |
33 | | PropertyNameParseEastAsianWidthV1, PropertyNameParseGeneralCategoryMaskV1, |
34 | | PropertyNameParseGeneralCategoryV1, PropertyNameParseGraphemeClusterBreakV1, |
35 | | PropertyNameParseHangulSyllableTypeV1, PropertyNameParseIndicConjunctBreakV1, |
36 | | PropertyNameParseIndicSyllabicCategoryV1, PropertyNameParseJoiningTypeV1, |
37 | | PropertyNameParseLineBreakV1, PropertyNameParseScriptV1, PropertyNameParseSentenceBreakV1, |
38 | | PropertyNameParseVerticalOrientationV1, PropertyNameParseWordBreakV1, |
39 | | PropertyNameShortBidiClassV1, PropertyNameShortEastAsianWidthV1, |
40 | | PropertyNameShortGeneralCategoryV1, PropertyNameShortGraphemeClusterBreakV1, |
41 | | PropertyNameShortHangulSyllableTypeV1, PropertyNameShortIndicConjunctBreakV1, |
42 | | PropertyNameShortIndicSyllabicCategoryV1, PropertyNameShortJoiningTypeV1, |
43 | | PropertyNameShortLineBreakV1, PropertyNameShortScriptV1, PropertyNameShortSentenceBreakV1, |
44 | | PropertyNameShortVerticalOrientationV1, PropertyNameShortWordBreakV1, |
45 | | }; |
46 | | |
47 | | pub use crate::props::gc::GeneralCategoryULE; |
48 | | use crate::props::*; |
49 | | use crate::script::ScriptWithExt; |
50 | | use core::ops::RangeInclusive; |
51 | | use icu_collections::codepointinvlist::CodePointInversionList; |
52 | | use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList; |
53 | | use icu_collections::codepointtrie::{CodePointMapRange, CodePointTrie, TrieValue}; |
54 | | use icu_provider::prelude::*; |
55 | | use zerofrom::ZeroFrom; |
56 | | use zerovec::{VarZeroVec, ZeroSlice}; |
57 | | |
58 | | #[cfg(feature = "compiled_data")] |
59 | | #[derive(Debug)] |
60 | | /// Baked data |
61 | | /// |
62 | | /// <div class="stab unstable"> |
63 | | /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, |
64 | | /// including in SemVer minor releases. In particular, the `DataProvider` implementations are only |
65 | | /// guaranteed to match with this version's `*_unstable` providers. Use with caution. |
66 | | /// </div> |
67 | | pub struct Baked; |
68 | | |
69 | | #[cfg(feature = "compiled_data")] |
70 | | #[allow(unused_imports)] |
71 | | const _: () = { |
72 | | use icu_properties_data::*; |
73 | | pub mod icu { |
74 | | pub use crate as properties; |
75 | | pub use icu_collections as collections; |
76 | | } |
77 | | make_provider!(Baked); |
78 | | impl_property_binary_alnum_v1!(Baked); |
79 | | impl_property_binary_alphabetic_v1!(Baked); |
80 | | impl_property_binary_ascii_hex_digit_v1!(Baked); |
81 | | impl_property_binary_basic_emoji_v1!(Baked); |
82 | | impl_property_binary_bidi_control_v1!(Baked); |
83 | | impl_property_binary_bidi_mirrored_v1!(Baked); |
84 | | impl_property_binary_blank_v1!(Baked); |
85 | | impl_property_binary_case_ignorable_v1!(Baked); |
86 | | impl_property_binary_case_sensitive_v1!(Baked); |
87 | | impl_property_binary_cased_v1!(Baked); |
88 | | impl_property_binary_changes_when_casefolded_v1!(Baked); |
89 | | impl_property_binary_changes_when_casemapped_v1!(Baked); |
90 | | impl_property_binary_changes_when_lowercased_v1!(Baked); |
91 | | impl_property_binary_changes_when_nfkc_casefolded_v1!(Baked); |
92 | | impl_property_binary_changes_when_titlecased_v1!(Baked); |
93 | | impl_property_binary_changes_when_uppercased_v1!(Baked); |
94 | | impl_property_binary_dash_v1!(Baked); |
95 | | impl_property_binary_default_ignorable_code_point_v1!(Baked); |
96 | | impl_property_binary_deprecated_v1!(Baked); |
97 | | impl_property_binary_diacritic_v1!(Baked); |
98 | | impl_property_binary_emoji_component_v1!(Baked); |
99 | | impl_property_binary_emoji_modifier_base_v1!(Baked); |
100 | | impl_property_binary_emoji_modifier_v1!(Baked); |
101 | | impl_property_binary_emoji_presentation_v1!(Baked); |
102 | | impl_property_binary_emoji_v1!(Baked); |
103 | | impl_property_binary_extended_pictographic_v1!(Baked); |
104 | | impl_property_binary_extender_v1!(Baked); |
105 | | impl_property_binary_full_composition_exclusion_v1!(Baked); |
106 | | impl_property_binary_graph_v1!(Baked); |
107 | | impl_property_binary_grapheme_base_v1!(Baked); |
108 | | impl_property_binary_grapheme_extend_v1!(Baked); |
109 | | impl_property_binary_grapheme_link_v1!(Baked); |
110 | | impl_property_binary_hex_digit_v1!(Baked); |
111 | | impl_property_binary_hyphen_v1!(Baked); |
112 | | impl_property_binary_id_compat_math_continue_v1!(Baked); |
113 | | impl_property_binary_id_compat_math_start_v1!(Baked); |
114 | | impl_property_binary_id_continue_v1!(Baked); |
115 | | impl_property_binary_id_start_v1!(Baked); |
116 | | impl_property_binary_ideographic_v1!(Baked); |
117 | | impl_property_binary_ids_binary_operator_v1!(Baked); |
118 | | impl_property_binary_ids_trinary_operator_v1!(Baked); |
119 | | impl_property_binary_ids_unary_operator_v1!(Baked); |
120 | | impl_property_binary_join_control_v1!(Baked); |
121 | | impl_property_binary_logical_order_exception_v1!(Baked); |
122 | | impl_property_binary_lowercase_v1!(Baked); |
123 | | impl_property_binary_math_v1!(Baked); |
124 | | impl_property_binary_modifier_combining_mark_v1!(Baked); |
125 | | impl_property_binary_nfc_inert_v1!(Baked); |
126 | | impl_property_binary_nfd_inert_v1!(Baked); |
127 | | impl_property_binary_nfkc_inert_v1!(Baked); |
128 | | impl_property_binary_nfkd_inert_v1!(Baked); |
129 | | impl_property_binary_noncharacter_code_point_v1!(Baked); |
130 | | impl_property_binary_pattern_syntax_v1!(Baked); |
131 | | impl_property_binary_pattern_white_space_v1!(Baked); |
132 | | impl_property_binary_prepended_concatenation_mark_v1!(Baked); |
133 | | impl_property_binary_print_v1!(Baked); |
134 | | impl_property_binary_quotation_mark_v1!(Baked); |
135 | | impl_property_binary_radical_v1!(Baked); |
136 | | impl_property_binary_regional_indicator_v1!(Baked); |
137 | | impl_property_binary_segment_starter_v1!(Baked); |
138 | | impl_property_binary_sentence_terminal_v1!(Baked); |
139 | | impl_property_binary_soft_dotted_v1!(Baked); |
140 | | impl_property_binary_terminal_punctuation_v1!(Baked); |
141 | | impl_property_binary_unified_ideograph_v1!(Baked); |
142 | | impl_property_binary_uppercase_v1!(Baked); |
143 | | impl_property_binary_variation_selector_v1!(Baked); |
144 | | impl_property_binary_white_space_v1!(Baked); |
145 | | impl_property_binary_xdigit_v1!(Baked); |
146 | | impl_property_binary_xid_continue_v1!(Baked); |
147 | | impl_property_binary_xid_start_v1!(Baked); |
148 | | impl_property_enum_bidi_class_v1!(Baked); |
149 | | impl_property_enum_bidi_mirroring_glyph_v1!(Baked); |
150 | | impl_property_enum_canonical_combining_class_v1!(Baked); |
151 | | impl_property_enum_east_asian_width_v1!(Baked); |
152 | | impl_property_enum_general_category_v1!(Baked); |
153 | | impl_property_enum_grapheme_cluster_break_v1!(Baked); |
154 | | impl_property_enum_hangul_syllable_type_v1!(Baked); |
155 | | impl_property_enum_indic_conjunct_break_v1!(Baked); |
156 | | impl_property_enum_indic_syllabic_category_v1!(Baked); |
157 | | impl_property_enum_joining_type_v1!(Baked); |
158 | | impl_property_enum_line_break_v1!(Baked); |
159 | | impl_property_enum_script_v1!(Baked); |
160 | | impl_property_enum_sentence_break_v1!(Baked); |
161 | | impl_property_enum_vertical_orientation_v1!(Baked); |
162 | | impl_property_enum_word_break_v1!(Baked); |
163 | | impl_property_name_long_bidi_class_v1!(Baked); |
164 | | #[cfg(feature = "alloc")] |
165 | | impl_property_name_long_canonical_combining_class_v1!(Baked); |
166 | | impl_property_name_long_east_asian_width_v1!(Baked); |
167 | | impl_property_name_long_general_category_v1!(Baked); |
168 | | impl_property_name_long_grapheme_cluster_break_v1!(Baked); |
169 | | impl_property_name_long_hangul_syllable_type_v1!(Baked); |
170 | | impl_property_name_long_indic_syllabic_category_v1!(Baked); |
171 | | impl_property_name_long_indic_conjunct_break_v1!(Baked); |
172 | | impl_property_name_long_joining_type_v1!(Baked); |
173 | | impl_property_name_long_line_break_v1!(Baked); |
174 | | impl_property_name_long_script_v1!(Baked); |
175 | | impl_property_name_long_sentence_break_v1!(Baked); |
176 | | impl_property_name_long_vertical_orientation_v1!(Baked); |
177 | | impl_property_name_long_word_break_v1!(Baked); |
178 | | impl_property_name_parse_bidi_class_v1!(Baked); |
179 | | impl_property_name_parse_canonical_combining_class_v1!(Baked); |
180 | | impl_property_name_parse_east_asian_width_v1!(Baked); |
181 | | impl_property_name_parse_general_category_mask_v1!(Baked); |
182 | | impl_property_name_parse_general_category_v1!(Baked); |
183 | | impl_property_name_parse_grapheme_cluster_break_v1!(Baked); |
184 | | impl_property_name_parse_hangul_syllable_type_v1!(Baked); |
185 | | impl_property_name_parse_indic_syllabic_category_v1!(Baked); |
186 | | impl_property_name_parse_indic_conjunct_break_v1!(Baked); |
187 | | impl_property_name_parse_joining_type_v1!(Baked); |
188 | | impl_property_name_parse_line_break_v1!(Baked); |
189 | | impl_property_name_parse_script_v1!(Baked); |
190 | | impl_property_name_parse_sentence_break_v1!(Baked); |
191 | | impl_property_name_parse_vertical_orientation_v1!(Baked); |
192 | | impl_property_name_parse_word_break_v1!(Baked); |
193 | | impl_property_name_short_bidi_class_v1!(Baked); |
194 | | #[cfg(feature = "alloc")] |
195 | | impl_property_name_short_canonical_combining_class_v1!(Baked); |
196 | | impl_property_name_short_east_asian_width_v1!(Baked); |
197 | | impl_property_name_short_general_category_v1!(Baked); |
198 | | impl_property_name_short_grapheme_cluster_break_v1!(Baked); |
199 | | impl_property_name_short_hangul_syllable_type_v1!(Baked); |
200 | | impl_property_name_short_indic_syllabic_category_v1!(Baked); |
201 | | impl_property_name_short_indic_conjunct_break_v1!(Baked); |
202 | | impl_property_name_short_joining_type_v1!(Baked); |
203 | | impl_property_name_short_line_break_v1!(Baked); |
204 | | impl_property_name_short_script_v1!(Baked); |
205 | | impl_property_name_short_sentence_break_v1!(Baked); |
206 | | impl_property_name_short_vertical_orientation_v1!(Baked); |
207 | | impl_property_name_short_word_break_v1!(Baked); |
208 | | impl_property_script_with_extensions_v1!(Baked); |
209 | | }; |
210 | | |
211 | | icu_provider::data_marker!( |
212 | | /// `PropertyBinaryAlnumV1` |
213 | | PropertyBinaryAlnumV1, |
214 | | PropertyCodePointSet<'static>, |
215 | | is_singleton = true |
216 | | ); |
217 | | icu_provider::data_marker!( |
218 | | /// `PropertyBinaryAlphabeticV1` |
219 | | PropertyBinaryAlphabeticV1, |
220 | | PropertyCodePointSet<'static>, |
221 | | is_singleton = true |
222 | | ); |
223 | | icu_provider::data_marker!( |
224 | | /// `PropertyBinaryAsciiHexDigitV1` |
225 | | PropertyBinaryAsciiHexDigitV1, |
226 | | PropertyCodePointSet<'static>, |
227 | | is_singleton = true |
228 | | ); |
229 | | icu_provider::data_marker!( |
230 | | /// `PropertyBinaryBidiControlV1` |
231 | | PropertyBinaryBidiControlV1, |
232 | | PropertyCodePointSet<'static>, |
233 | | is_singleton = true |
234 | | ); |
235 | | icu_provider::data_marker!( |
236 | | /// `PropertyBinaryBidiMirroredV1` |
237 | | PropertyBinaryBidiMirroredV1, |
238 | | PropertyCodePointSet<'static>, |
239 | | is_singleton = true |
240 | | ); |
241 | | icu_provider::data_marker!( |
242 | | /// `PropertyBinaryBlankV1` |
243 | | PropertyBinaryBlankV1, |
244 | | PropertyCodePointSet<'static>, |
245 | | is_singleton = true |
246 | | ); |
247 | | icu_provider::data_marker!( |
248 | | /// `PropertyBinaryCasedV1` |
249 | | PropertyBinaryCasedV1, |
250 | | PropertyCodePointSet<'static>, |
251 | | is_singleton = true |
252 | | ); |
253 | | icu_provider::data_marker!( |
254 | | /// `PropertyBinaryCaseIgnorableV1` |
255 | | PropertyBinaryCaseIgnorableV1, |
256 | | PropertyCodePointSet<'static>, |
257 | | is_singleton = true |
258 | | ); |
259 | | icu_provider::data_marker!( |
260 | | /// `PropertyBinaryCaseSensitiveV1` |
261 | | PropertyBinaryCaseSensitiveV1, |
262 | | PropertyCodePointSet<'static>, |
263 | | is_singleton = true |
264 | | ); |
265 | | icu_provider::data_marker!( |
266 | | /// `PropertyBinaryChangesWhenCasefoldedV1` |
267 | | PropertyBinaryChangesWhenCasefoldedV1, |
268 | | PropertyCodePointSet<'static>, |
269 | | is_singleton = true |
270 | | ); |
271 | | icu_provider::data_marker!( |
272 | | /// `PropertyBinaryChangesWhenCasemappedV1` |
273 | | PropertyBinaryChangesWhenCasemappedV1, |
274 | | PropertyCodePointSet<'static>, |
275 | | is_singleton = true |
276 | | ); |
277 | | icu_provider::data_marker!( |
278 | | /// `PropertyBinaryChangesWhenLowercasedV1` |
279 | | PropertyBinaryChangesWhenLowercasedV1, |
280 | | PropertyCodePointSet<'static>, |
281 | | is_singleton = true |
282 | | ); |
283 | | icu_provider::data_marker!( |
284 | | /// `PropertyBinaryChangesWhenNfkcCasefoldedV1` |
285 | | PropertyBinaryChangesWhenNfkcCasefoldedV1, |
286 | | PropertyCodePointSet<'static>, |
287 | | is_singleton = true |
288 | | ); |
289 | | icu_provider::data_marker!( |
290 | | /// `PropertyBinaryChangesWhenTitlecasedV1` |
291 | | PropertyBinaryChangesWhenTitlecasedV1, |
292 | | PropertyCodePointSet<'static>, |
293 | | is_singleton = true |
294 | | ); |
295 | | icu_provider::data_marker!( |
296 | | /// `PropertyBinaryChangesWhenUppercasedV1` |
297 | | PropertyBinaryChangesWhenUppercasedV1, |
298 | | PropertyCodePointSet<'static>, |
299 | | is_singleton = true |
300 | | ); |
301 | | icu_provider::data_marker!( |
302 | | /// `PropertyBinaryDashV1` |
303 | | PropertyBinaryDashV1, |
304 | | PropertyCodePointSet<'static>, |
305 | | is_singleton = true |
306 | | ); |
307 | | icu_provider::data_marker!( |
308 | | /// `PropertyBinaryDefaultIgnorableCodePointV1` |
309 | | PropertyBinaryDefaultIgnorableCodePointV1, |
310 | | PropertyCodePointSet<'static>, |
311 | | is_singleton = true |
312 | | ); |
313 | | icu_provider::data_marker!( |
314 | | /// `PropertyBinaryDeprecatedV1` |
315 | | PropertyBinaryDeprecatedV1, |
316 | | PropertyCodePointSet<'static>, |
317 | | is_singleton = true |
318 | | ); |
319 | | icu_provider::data_marker!( |
320 | | /// `PropertyBinaryDiacriticV1` |
321 | | PropertyBinaryDiacriticV1, |
322 | | PropertyCodePointSet<'static>, |
323 | | is_singleton = true |
324 | | ); |
325 | | icu_provider::data_marker!( |
326 | | /// `PropertyBinaryEmojiComponentV1` |
327 | | PropertyBinaryEmojiComponentV1, |
328 | | PropertyCodePointSet<'static>, |
329 | | is_singleton = true |
330 | | ); |
331 | | icu_provider::data_marker!( |
332 | | /// `PropertyBinaryEmojiModifierBaseV1` |
333 | | PropertyBinaryEmojiModifierBaseV1, |
334 | | PropertyCodePointSet<'static>, |
335 | | is_singleton = true |
336 | | ); |
337 | | icu_provider::data_marker!( |
338 | | /// `PropertyBinaryEmojiModifierV1` |
339 | | PropertyBinaryEmojiModifierV1, |
340 | | PropertyCodePointSet<'static>, |
341 | | is_singleton = true |
342 | | ); |
343 | | icu_provider::data_marker!( |
344 | | /// `PropertyBinaryEmojiPresentationV1` |
345 | | PropertyBinaryEmojiPresentationV1, |
346 | | PropertyCodePointSet<'static>, |
347 | | is_singleton = true |
348 | | ); |
349 | | icu_provider::data_marker!( |
350 | | /// `PropertyBinaryEmojiV1` |
351 | | PropertyBinaryEmojiV1, |
352 | | PropertyCodePointSet<'static>, |
353 | | is_singleton = true |
354 | | ); |
355 | | icu_provider::data_marker!( |
356 | | /// `PropertyBinaryExtendedPictographicV1` |
357 | | PropertyBinaryExtendedPictographicV1, |
358 | | PropertyCodePointSet<'static>, |
359 | | is_singleton = true |
360 | | ); |
361 | | icu_provider::data_marker!( |
362 | | /// `PropertyBinaryExtenderV1` |
363 | | PropertyBinaryExtenderV1, |
364 | | PropertyCodePointSet<'static>, |
365 | | is_singleton = true |
366 | | ); |
367 | | icu_provider::data_marker!( |
368 | | /// `PropertyBinaryFullCompositionExclusionV1` |
369 | | PropertyBinaryFullCompositionExclusionV1, |
370 | | PropertyCodePointSet<'static>, |
371 | | is_singleton = true |
372 | | ); |
373 | | icu_provider::data_marker!( |
374 | | /// `PropertyBinaryGraphemeBaseV1` |
375 | | PropertyBinaryGraphemeBaseV1, |
376 | | PropertyCodePointSet<'static>, |
377 | | is_singleton = true |
378 | | ); |
379 | | icu_provider::data_marker!( |
380 | | /// `PropertyBinaryGraphemeExtendV1` |
381 | | PropertyBinaryGraphemeExtendV1, |
382 | | PropertyCodePointSet<'static>, |
383 | | is_singleton = true |
384 | | ); |
385 | | icu_provider::data_marker!( |
386 | | /// `PropertyBinaryGraphemeLinkV1` |
387 | | PropertyBinaryGraphemeLinkV1, |
388 | | PropertyCodePointSet<'static>, |
389 | | is_singleton = true |
390 | | ); |
391 | | icu_provider::data_marker!( |
392 | | /// `PropertyBinaryGraphV1` |
393 | | PropertyBinaryGraphV1, |
394 | | PropertyCodePointSet<'static>, |
395 | | is_singleton = true |
396 | | ); |
397 | | icu_provider::data_marker!( |
398 | | /// `PropertyBinaryHexDigitV1` |
399 | | PropertyBinaryHexDigitV1, |
400 | | PropertyCodePointSet<'static>, |
401 | | is_singleton = true |
402 | | ); |
403 | | icu_provider::data_marker!( |
404 | | /// `PropertyBinaryHyphenV1` |
405 | | PropertyBinaryHyphenV1, |
406 | | PropertyCodePointSet<'static>, |
407 | | is_singleton = true |
408 | | ); |
409 | | icu_provider::data_marker!( |
410 | | /// `PropertyBinaryIdCompatMathContinueV1` |
411 | | PropertyBinaryIdCompatMathContinueV1, |
412 | | PropertyCodePointSet<'static>, |
413 | | is_singleton = true |
414 | | ); |
415 | | icu_provider::data_marker!( |
416 | | /// `PropertyBinaryIdCompatMathStartV1` |
417 | | PropertyBinaryIdCompatMathStartV1, |
418 | | PropertyCodePointSet<'static>, |
419 | | is_singleton = true |
420 | | ); |
421 | | icu_provider::data_marker!( |
422 | | /// `PropertyBinaryIdContinueV1` |
423 | | PropertyBinaryIdContinueV1, |
424 | | PropertyCodePointSet<'static>, |
425 | | is_singleton = true |
426 | | ); |
427 | | icu_provider::data_marker!( |
428 | | /// `PropertyBinaryIdeographicV1` |
429 | | PropertyBinaryIdeographicV1, |
430 | | PropertyCodePointSet<'static>, |
431 | | is_singleton = true |
432 | | ); |
433 | | icu_provider::data_marker!( |
434 | | /// `PropertyBinaryIdsBinaryOperatorV1` |
435 | | PropertyBinaryIdsBinaryOperatorV1, |
436 | | PropertyCodePointSet<'static>, |
437 | | is_singleton = true |
438 | | ); |
439 | | icu_provider::data_marker!( |
440 | | /// `PropertyBinaryIdStartV1` |
441 | | PropertyBinaryIdStartV1, |
442 | | PropertyCodePointSet<'static>, |
443 | | is_singleton = true |
444 | | ); |
445 | | icu_provider::data_marker!( |
446 | | /// `PropertyBinaryIdsTrinaryOperatorV1` |
447 | | PropertyBinaryIdsTrinaryOperatorV1, |
448 | | PropertyCodePointSet<'static>, |
449 | | is_singleton = true |
450 | | ); |
451 | | icu_provider::data_marker!( |
452 | | /// `PropertyBinaryIdsUnaryOperatorV1` |
453 | | PropertyBinaryIdsUnaryOperatorV1, |
454 | | PropertyCodePointSet<'static>, |
455 | | is_singleton = true |
456 | | ); |
457 | | icu_provider::data_marker!( |
458 | | /// `PropertyBinaryJoinControlV1` |
459 | | PropertyBinaryJoinControlV1, |
460 | | PropertyCodePointSet<'static>, |
461 | | is_singleton = true |
462 | | ); |
463 | | icu_provider::data_marker!( |
464 | | /// `PropertyBinaryLogicalOrderExceptionV1` |
465 | | PropertyBinaryLogicalOrderExceptionV1, |
466 | | PropertyCodePointSet<'static>, |
467 | | is_singleton = true |
468 | | ); |
469 | | icu_provider::data_marker!( |
470 | | /// `PropertyBinaryLowercaseV1` |
471 | | PropertyBinaryLowercaseV1, |
472 | | PropertyCodePointSet<'static>, |
473 | | is_singleton = true |
474 | | ); |
475 | | icu_provider::data_marker!( |
476 | | /// `PropertyBinaryMathV1` |
477 | | PropertyBinaryMathV1, |
478 | | PropertyCodePointSet<'static>, |
479 | | is_singleton = true |
480 | | ); |
481 | | icu_provider::data_marker!( |
482 | | /// `PropertyBinaryModifierCombiningMarkV1` |
483 | | PropertyBinaryModifierCombiningMarkV1, |
484 | | PropertyCodePointSet<'static>, |
485 | | is_singleton = true |
486 | | ); |
487 | | icu_provider::data_marker!( |
488 | | /// `PropertyBinaryNfcInertV1` |
489 | | PropertyBinaryNfcInertV1, |
490 | | PropertyCodePointSet<'static>, |
491 | | is_singleton = true |
492 | | ); |
493 | | icu_provider::data_marker!( |
494 | | /// `PropertyBinaryNfdInertV1` |
495 | | PropertyBinaryNfdInertV1, |
496 | | PropertyCodePointSet<'static>, |
497 | | is_singleton = true |
498 | | ); |
499 | | icu_provider::data_marker!( |
500 | | /// `PropertyBinaryNfkcInertV1` |
501 | | PropertyBinaryNfkcInertV1, |
502 | | PropertyCodePointSet<'static>, |
503 | | is_singleton = true |
504 | | ); |
505 | | icu_provider::data_marker!( |
506 | | /// `PropertyBinaryNfkdInertV1` |
507 | | PropertyBinaryNfkdInertV1, |
508 | | PropertyCodePointSet<'static>, |
509 | | is_singleton = true |
510 | | ); |
511 | | icu_provider::data_marker!( |
512 | | /// `PropertyBinaryNoncharacterCodePointV1` |
513 | | PropertyBinaryNoncharacterCodePointV1, |
514 | | PropertyCodePointSet<'static>, |
515 | | is_singleton = true |
516 | | ); |
517 | | icu_provider::data_marker!( |
518 | | /// `PropertyBinaryPatternSyntaxV1` |
519 | | PropertyBinaryPatternSyntaxV1, |
520 | | PropertyCodePointSet<'static>, |
521 | | is_singleton = true |
522 | | ); |
523 | | icu_provider::data_marker!( |
524 | | /// `PropertyBinaryPatternWhiteSpaceV1` |
525 | | PropertyBinaryPatternWhiteSpaceV1, |
526 | | PropertyCodePointSet<'static>, |
527 | | is_singleton = true |
528 | | ); |
529 | | icu_provider::data_marker!( |
530 | | /// `PropertyBinaryPrependedConcatenationMarkV1` |
531 | | PropertyBinaryPrependedConcatenationMarkV1, |
532 | | PropertyCodePointSet<'static>, |
533 | | is_singleton = true |
534 | | ); |
535 | | icu_provider::data_marker!( |
536 | | /// `PropertyBinaryPrintV1` |
537 | | PropertyBinaryPrintV1, |
538 | | PropertyCodePointSet<'static>, |
539 | | is_singleton = true |
540 | | ); |
541 | | icu_provider::data_marker!( |
542 | | /// `PropertyBinaryQuotationMarkV1` |
543 | | PropertyBinaryQuotationMarkV1, |
544 | | PropertyCodePointSet<'static>, |
545 | | is_singleton = true |
546 | | ); |
547 | | icu_provider::data_marker!( |
548 | | /// `PropertyBinaryRadicalV1` |
549 | | PropertyBinaryRadicalV1, |
550 | | PropertyCodePointSet<'static>, |
551 | | is_singleton = true |
552 | | ); |
553 | | icu_provider::data_marker!( |
554 | | /// `PropertyBinaryRegionalIndicatorV1` |
555 | | PropertyBinaryRegionalIndicatorV1, |
556 | | PropertyCodePointSet<'static>, |
557 | | is_singleton = true |
558 | | ); |
559 | | icu_provider::data_marker!( |
560 | | /// `PropertyBinarySegmentStarterV1` |
561 | | PropertyBinarySegmentStarterV1, |
562 | | PropertyCodePointSet<'static>, |
563 | | is_singleton = true |
564 | | ); |
565 | | icu_provider::data_marker!( |
566 | | /// `PropertyBinarySentenceTerminalV1` |
567 | | PropertyBinarySentenceTerminalV1, |
568 | | PropertyCodePointSet<'static>, |
569 | | is_singleton = true |
570 | | ); |
571 | | icu_provider::data_marker!( |
572 | | /// `PropertyBinarySoftDottedV1` |
573 | | PropertyBinarySoftDottedV1, |
574 | | PropertyCodePointSet<'static>, |
575 | | is_singleton = true |
576 | | ); |
577 | | icu_provider::data_marker!( |
578 | | /// `PropertyBinaryTerminalPunctuationV1` |
579 | | PropertyBinaryTerminalPunctuationV1, |
580 | | PropertyCodePointSet<'static>, |
581 | | is_singleton = true |
582 | | ); |
583 | | icu_provider::data_marker!( |
584 | | /// `PropertyBinaryUnifiedIdeographV1` |
585 | | PropertyBinaryUnifiedIdeographV1, |
586 | | PropertyCodePointSet<'static>, |
587 | | is_singleton = true |
588 | | ); |
589 | | icu_provider::data_marker!( |
590 | | /// `PropertyBinaryUppercaseV1` |
591 | | PropertyBinaryUppercaseV1, |
592 | | PropertyCodePointSet<'static>, |
593 | | is_singleton = true |
594 | | ); |
595 | | icu_provider::data_marker!( |
596 | | /// `PropertyBinaryVariationSelectorV1` |
597 | | PropertyBinaryVariationSelectorV1, |
598 | | PropertyCodePointSet<'static>, |
599 | | is_singleton = true |
600 | | ); |
601 | | icu_provider::data_marker!( |
602 | | /// `PropertyBinaryWhiteSpaceV1` |
603 | | PropertyBinaryWhiteSpaceV1, |
604 | | PropertyCodePointSet<'static>, |
605 | | is_singleton = true |
606 | | ); |
607 | | icu_provider::data_marker!( |
608 | | /// `PropertyBinaryXdigitV1` |
609 | | PropertyBinaryXdigitV1, |
610 | | PropertyCodePointSet<'static>, |
611 | | is_singleton = true |
612 | | ); |
613 | | icu_provider::data_marker!( |
614 | | /// `PropertyBinaryXidContinueV1` |
615 | | PropertyBinaryXidContinueV1, |
616 | | PropertyCodePointSet<'static>, |
617 | | is_singleton = true |
618 | | ); |
619 | | icu_provider::data_marker!( |
620 | | /// `PropertyBinaryXidStartV1` |
621 | | PropertyBinaryXidStartV1, |
622 | | PropertyCodePointSet<'static>, |
623 | | is_singleton = true |
624 | | ); |
625 | | icu_provider::data_marker!( |
626 | | /// Data marker for the 'BidiClass' Unicode property |
627 | | PropertyEnumBidiClassV1, |
628 | | PropertyCodePointMap<'static, crate::props::BidiClass>, |
629 | | is_singleton = true, |
630 | | ); |
631 | | icu_provider::data_marker!( |
632 | | /// Data marker for the 'CanonicalCombiningClass' Unicode property |
633 | | PropertyEnumCanonicalCombiningClassV1, |
634 | | PropertyCodePointMap<'static, crate::props::CanonicalCombiningClass>, |
635 | | is_singleton = true, |
636 | | ); |
637 | | icu_provider::data_marker!( |
638 | | /// Data marker for the 'EastAsianWidth' Unicode property |
639 | | PropertyEnumEastAsianWidthV1, |
640 | | PropertyCodePointMap<'static, crate::props::EastAsianWidth>, |
641 | | is_singleton = true, |
642 | | ); |
643 | | icu_provider::data_marker!( |
644 | | /// Data marker for the 'GeneralCategory' Unicode property |
645 | | PropertyEnumGeneralCategoryV1, |
646 | | PropertyCodePointMap<'static, crate::props::GeneralCategory>, |
647 | | is_singleton = true, |
648 | | ); |
649 | | icu_provider::data_marker!( |
650 | | /// Data marker for the 'GraphemeClusterBreak' Unicode property |
651 | | PropertyEnumGraphemeClusterBreakV1, |
652 | | PropertyCodePointMap<'static, crate::props::GraphemeClusterBreak>, |
653 | | is_singleton = true, |
654 | | ); |
655 | | icu_provider::data_marker!( |
656 | | /// Data marker for the 'HangulSyllableType' Unicode property |
657 | | PropertyEnumHangulSyllableTypeV1, |
658 | | PropertyCodePointMap<'static, crate::props::HangulSyllableType>, |
659 | | is_singleton = true, |
660 | | ); |
661 | | icu_provider::data_marker!( |
662 | | /// Data marker for the 'IndicConjunctBreak' Unicode property |
663 | | PropertyEnumIndicConjunctBreakV1, |
664 | | PropertyCodePointMap<'static, crate::props::IndicConjunctBreak>, |
665 | | is_singleton = true, |
666 | | ); |
667 | | icu_provider::data_marker!( |
668 | | /// Data marker for the 'IndicSyllabicCategory' Unicode property |
669 | | PropertyEnumIndicSyllabicCategoryV1, |
670 | | PropertyCodePointMap<'static, crate::props::IndicSyllabicCategory>, |
671 | | is_singleton = true, |
672 | | ); |
673 | | icu_provider::data_marker!( |
674 | | /// Data marker for the 'JoiningType' Unicode property |
675 | | PropertyEnumJoiningTypeV1, |
676 | | PropertyCodePointMap<'static, crate::props::JoiningType>, |
677 | | is_singleton = true, |
678 | | ); |
679 | | icu_provider::data_marker!( |
680 | | /// Data marker for the 'LineBreak' Unicode property |
681 | | PropertyEnumLineBreakV1, |
682 | | PropertyCodePointMap<'static, crate::props::LineBreak>, |
683 | | is_singleton = true, |
684 | | ); |
685 | | icu_provider::data_marker!( |
686 | | /// Data marker for the 'Script' Unicode property |
687 | | PropertyEnumScriptV1, |
688 | | PropertyCodePointMap<'static, crate::props::Script>, |
689 | | is_singleton = true, |
690 | | ); |
691 | | icu_provider::data_marker!( |
692 | | /// Data marker for the 'SentenceBreak' Unicode property |
693 | | PropertyEnumSentenceBreakV1, |
694 | | PropertyCodePointMap<'static, crate::props::SentenceBreak>, |
695 | | is_singleton = true, |
696 | | ); |
697 | | icu_provider::data_marker!( |
698 | | /// Data marker for the 'Vertical_Orientation' Unicode property |
699 | | PropertyEnumVerticalOrientationV1, |
700 | | PropertyCodePointMap<'static, crate::props::VerticalOrientation>, |
701 | | is_singleton = true, |
702 | | ); |
703 | | icu_provider::data_marker!( |
704 | | /// Data marker for the 'WordBreak' Unicode property |
705 | | PropertyEnumWordBreakV1, |
706 | | PropertyCodePointMap<'static, crate::props::WordBreak>, |
707 | | is_singleton = true, |
708 | | ); |
709 | | icu_provider::data_marker!( |
710 | | /// Data marker for the 'BidiMirroringGlyph' Unicode property |
711 | | PropertyEnumBidiMirroringGlyphV1, |
712 | | PropertyCodePointMap<'static, crate::bidi::BidiMirroringGlyph>, |
713 | | is_singleton = true, |
714 | | ); |
715 | | icu_provider::data_marker!( |
716 | | /// `PropertyBinaryBasicEmojiV1` |
717 | | PropertyBinaryBasicEmojiV1, |
718 | | PropertyUnicodeSet<'static>, |
719 | | is_singleton = true |
720 | | ); |
721 | | icu_provider::data_marker!( |
722 | | /// `PropertyScriptWithExtensionsV1` |
723 | | PropertyScriptWithExtensionsV1, |
724 | | ScriptWithExtensionsProperty<'static>, |
725 | | is_singleton = true |
726 | | ); |
727 | | |
728 | | /// All data keys in this module. |
729 | | pub const MARKERS: &[DataMarkerInfo] = &[ |
730 | | PropertyNameLongBidiClassV1::INFO, |
731 | | #[cfg(feature = "alloc")] |
732 | | PropertyNameLongCanonicalCombiningClassV1::INFO, |
733 | | PropertyNameLongEastAsianWidthV1::INFO, |
734 | | PropertyNameLongGeneralCategoryV1::INFO, |
735 | | PropertyNameLongGraphemeClusterBreakV1::INFO, |
736 | | PropertyNameLongHangulSyllableTypeV1::INFO, |
737 | | PropertyNameLongIndicSyllabicCategoryV1::INFO, |
738 | | PropertyNameLongIndicConjunctBreakV1::INFO, |
739 | | PropertyNameLongJoiningTypeV1::INFO, |
740 | | PropertyNameLongLineBreakV1::INFO, |
741 | | PropertyNameLongScriptV1::INFO, |
742 | | PropertyNameLongSentenceBreakV1::INFO, |
743 | | PropertyNameLongVerticalOrientationV1::INFO, |
744 | | PropertyNameLongWordBreakV1::INFO, |
745 | | PropertyNameParseBidiClassV1::INFO, |
746 | | PropertyNameParseCanonicalCombiningClassV1::INFO, |
747 | | PropertyNameParseEastAsianWidthV1::INFO, |
748 | | PropertyNameParseGeneralCategoryMaskV1::INFO, |
749 | | PropertyNameParseGeneralCategoryV1::INFO, |
750 | | PropertyNameParseGraphemeClusterBreakV1::INFO, |
751 | | PropertyNameParseHangulSyllableTypeV1::INFO, |
752 | | PropertyNameParseIndicSyllabicCategoryV1::INFO, |
753 | | PropertyNameParseIndicConjunctBreakV1::INFO, |
754 | | PropertyNameParseJoiningTypeV1::INFO, |
755 | | PropertyNameParseLineBreakV1::INFO, |
756 | | PropertyNameParseScriptV1::INFO, |
757 | | PropertyNameParseSentenceBreakV1::INFO, |
758 | | PropertyNameParseVerticalOrientationV1::INFO, |
759 | | PropertyNameParseWordBreakV1::INFO, |
760 | | PropertyNameShortBidiClassV1::INFO, |
761 | | #[cfg(feature = "alloc")] |
762 | | PropertyNameShortCanonicalCombiningClassV1::INFO, |
763 | | PropertyNameShortEastAsianWidthV1::INFO, |
764 | | PropertyNameShortGeneralCategoryV1::INFO, |
765 | | PropertyNameShortGraphemeClusterBreakV1::INFO, |
766 | | PropertyNameShortHangulSyllableTypeV1::INFO, |
767 | | PropertyNameShortIndicSyllabicCategoryV1::INFO, |
768 | | PropertyNameShortIndicConjunctBreakV1::INFO, |
769 | | PropertyNameShortJoiningTypeV1::INFO, |
770 | | PropertyNameShortLineBreakV1::INFO, |
771 | | PropertyNameShortScriptV1::INFO, |
772 | | PropertyNameShortSentenceBreakV1::INFO, |
773 | | PropertyNameShortVerticalOrientationV1::INFO, |
774 | | PropertyNameShortWordBreakV1::INFO, |
775 | | PropertyBinaryAlnumV1::INFO, |
776 | | PropertyBinaryAlphabeticV1::INFO, |
777 | | PropertyBinaryAsciiHexDigitV1::INFO, |
778 | | PropertyBinaryBidiControlV1::INFO, |
779 | | PropertyBinaryBidiMirroredV1::INFO, |
780 | | PropertyBinaryBlankV1::INFO, |
781 | | PropertyBinaryCasedV1::INFO, |
782 | | PropertyBinaryCaseIgnorableV1::INFO, |
783 | | PropertyBinaryCaseSensitiveV1::INFO, |
784 | | PropertyBinaryChangesWhenCasefoldedV1::INFO, |
785 | | PropertyBinaryChangesWhenCasemappedV1::INFO, |
786 | | PropertyBinaryChangesWhenLowercasedV1::INFO, |
787 | | PropertyBinaryChangesWhenNfkcCasefoldedV1::INFO, |
788 | | PropertyBinaryChangesWhenTitlecasedV1::INFO, |
789 | | PropertyBinaryChangesWhenUppercasedV1::INFO, |
790 | | PropertyBinaryDashV1::INFO, |
791 | | PropertyBinaryDefaultIgnorableCodePointV1::INFO, |
792 | | PropertyBinaryDeprecatedV1::INFO, |
793 | | PropertyBinaryDiacriticV1::INFO, |
794 | | PropertyBinaryEmojiComponentV1::INFO, |
795 | | PropertyBinaryEmojiModifierBaseV1::INFO, |
796 | | PropertyBinaryEmojiModifierV1::INFO, |
797 | | PropertyBinaryEmojiPresentationV1::INFO, |
798 | | PropertyBinaryEmojiV1::INFO, |
799 | | PropertyBinaryExtendedPictographicV1::INFO, |
800 | | PropertyBinaryExtenderV1::INFO, |
801 | | PropertyBinaryFullCompositionExclusionV1::INFO, |
802 | | PropertyBinaryGraphemeBaseV1::INFO, |
803 | | PropertyBinaryGraphemeExtendV1::INFO, |
804 | | PropertyBinaryGraphemeLinkV1::INFO, |
805 | | PropertyBinaryGraphV1::INFO, |
806 | | PropertyBinaryHexDigitV1::INFO, |
807 | | PropertyBinaryHyphenV1::INFO, |
808 | | PropertyBinaryIdCompatMathContinueV1::INFO, |
809 | | PropertyBinaryIdCompatMathStartV1::INFO, |
810 | | PropertyBinaryIdContinueV1::INFO, |
811 | | PropertyBinaryIdeographicV1::INFO, |
812 | | PropertyBinaryIdsBinaryOperatorV1::INFO, |
813 | | PropertyBinaryIdStartV1::INFO, |
814 | | PropertyBinaryIdsTrinaryOperatorV1::INFO, |
815 | | PropertyBinaryIdsUnaryOperatorV1::INFO, |
816 | | PropertyBinaryJoinControlV1::INFO, |
817 | | PropertyBinaryLogicalOrderExceptionV1::INFO, |
818 | | PropertyBinaryLowercaseV1::INFO, |
819 | | PropertyBinaryMathV1::INFO, |
820 | | PropertyBinaryModifierCombiningMarkV1::INFO, |
821 | | PropertyBinaryNfcInertV1::INFO, |
822 | | PropertyBinaryNfdInertV1::INFO, |
823 | | PropertyBinaryNfkcInertV1::INFO, |
824 | | PropertyBinaryNfkdInertV1::INFO, |
825 | | PropertyBinaryNoncharacterCodePointV1::INFO, |
826 | | PropertyBinaryPatternSyntaxV1::INFO, |
827 | | PropertyBinaryPatternWhiteSpaceV1::INFO, |
828 | | PropertyBinaryPrependedConcatenationMarkV1::INFO, |
829 | | PropertyBinaryPrintV1::INFO, |
830 | | PropertyBinaryQuotationMarkV1::INFO, |
831 | | PropertyBinaryRadicalV1::INFO, |
832 | | PropertyBinaryRegionalIndicatorV1::INFO, |
833 | | PropertyBinarySegmentStarterV1::INFO, |
834 | | PropertyBinarySentenceTerminalV1::INFO, |
835 | | PropertyBinarySoftDottedV1::INFO, |
836 | | PropertyBinaryTerminalPunctuationV1::INFO, |
837 | | PropertyBinaryUnifiedIdeographV1::INFO, |
838 | | PropertyBinaryUppercaseV1::INFO, |
839 | | PropertyBinaryVariationSelectorV1::INFO, |
840 | | PropertyBinaryWhiteSpaceV1::INFO, |
841 | | PropertyBinaryXdigitV1::INFO, |
842 | | PropertyBinaryXidContinueV1::INFO, |
843 | | PropertyBinaryXidStartV1::INFO, |
844 | | PropertyEnumBidiClassV1::INFO, |
845 | | PropertyEnumCanonicalCombiningClassV1::INFO, |
846 | | PropertyEnumEastAsianWidthV1::INFO, |
847 | | PropertyEnumGeneralCategoryV1::INFO, |
848 | | PropertyEnumGraphemeClusterBreakV1::INFO, |
849 | | PropertyEnumHangulSyllableTypeV1::INFO, |
850 | | PropertyEnumIndicConjunctBreakV1::INFO, |
851 | | PropertyEnumIndicSyllabicCategoryV1::INFO, |
852 | | PropertyEnumJoiningTypeV1::INFO, |
853 | | PropertyEnumLineBreakV1::INFO, |
854 | | PropertyEnumScriptV1::INFO, |
855 | | PropertyEnumSentenceBreakV1::INFO, |
856 | | PropertyEnumVerticalOrientationV1::INFO, |
857 | | PropertyEnumWordBreakV1::INFO, |
858 | | PropertyEnumBidiMirroringGlyphV1::INFO, |
859 | | PropertyBinaryBasicEmojiV1::INFO, |
860 | | PropertyScriptWithExtensionsV1::INFO, |
861 | | ]; |
862 | | |
863 | | /// A set of characters which share a particular property value. |
864 | | /// |
865 | | /// This data enum is extensible, more backends may be added in the future. |
866 | | /// Old data can be used with newer code but not vice versa. |
867 | | /// |
868 | | /// <div class="stab unstable"> |
869 | | /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, |
870 | | /// including in SemVer minor releases. While the serde representation of data structs is guaranteed |
871 | | /// to be stable, their Rust representation might not be. Use with caution. |
872 | | /// </div> |
873 | | #[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] |
874 | | #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] |
875 | | #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider))] |
876 | | #[cfg_attr(feature = "serde", derive(serde::Deserialize))] |
877 | | #[non_exhaustive] |
878 | | pub enum PropertyCodePointSet<'data> { |
879 | | /// The set of characters, represented as an inversion list |
880 | | InversionList(#[cfg_attr(feature = "serde", serde(borrow))] CodePointInversionList<'data>), |
881 | | // new variants should go BELOW existing ones |
882 | | // Serde serializes based on variant name and index in the enum |
883 | | // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant |
884 | | } |
885 | | |
886 | | icu_provider::data_struct!( |
887 | | PropertyCodePointSet<'_>, |
888 | | #[cfg(feature = "datagen")] |
889 | | ); |
890 | | |
891 | | // See CodePointSetData for documentation of these functions |
892 | | impl<'data> PropertyCodePointSet<'data> { |
893 | | #[inline] |
894 | | pub(crate) fn contains(&self, ch: char) -> bool { |
895 | | match *self { |
896 | | Self::InversionList(ref l) => l.contains(ch), |
897 | | } |
898 | | } |
899 | | |
900 | | #[inline] |
901 | | pub(crate) fn contains32(&self, ch: u32) -> bool { |
902 | | match *self { |
903 | | Self::InversionList(ref l) => l.contains32(ch), |
904 | | } |
905 | | } |
906 | | |
907 | | #[inline] |
908 | | pub(crate) fn iter_ranges(&self) -> impl Iterator<Item = RangeInclusive<u32>> + '_ { |
909 | | match *self { |
910 | | Self::InversionList(ref l) => l.iter_ranges(), |
911 | | } |
912 | | } |
913 | | |
914 | | #[inline] |
915 | | pub(crate) fn iter_ranges_complemented( |
916 | | &self, |
917 | | ) -> impl Iterator<Item = RangeInclusive<u32>> + '_ { |
918 | | match *self { |
919 | | Self::InversionList(ref l) => l.iter_ranges_complemented(), |
920 | | } |
921 | | } |
922 | | |
923 | | #[inline] |
924 | 0 | pub(crate) fn from_code_point_inversion_list(l: CodePointInversionList<'static>) -> Self { |
925 | 0 | Self::InversionList(l) |
926 | 0 | } |
927 | | |
928 | | #[inline] |
929 | 0 | pub(crate) fn as_code_point_inversion_list( |
930 | 0 | &'_ self, |
931 | 0 | ) -> Option<&'_ CodePointInversionList<'data>> { |
932 | 0 | match *self { |
933 | 0 | Self::InversionList(ref l) => Some(l), |
934 | | // any other backing data structure that cannot return a CPInvList in O(1) time should return None |
935 | | } |
936 | 0 | } |
937 | | |
938 | | #[inline] |
939 | 0 | pub(crate) fn to_code_point_inversion_list(&self) -> CodePointInversionList<'_> { |
940 | 0 | match *self { |
941 | 0 | Self::InversionList(ref t) => ZeroFrom::zero_from(t), |
942 | | } |
943 | 0 | } |
944 | | } |
945 | | |
946 | | /// A map efficiently storing data about individual characters. |
947 | | /// |
948 | | /// This data enum is extensible, more backends may be added in the future. |
949 | | /// Old data can be used with newer code but not vice versa. |
950 | | /// |
951 | | /// <div class="stab unstable"> |
952 | | /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, |
953 | | /// including in SemVer minor releases. While the serde representation of data structs is guaranteed |
954 | | /// to be stable, their Rust representation might not be. Use with caution. |
955 | | /// </div> |
956 | | #[derive(Clone, Debug, Eq, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)] |
957 | | #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] |
958 | | #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider))] |
959 | | #[cfg_attr(feature = "serde", derive(serde::Deserialize))] |
960 | | #[non_exhaustive] |
961 | | pub enum PropertyCodePointMap<'data, T: TrieValue> { |
962 | | /// A codepoint trie storing the data |
963 | | CodePointTrie(#[cfg_attr(feature = "serde", serde(borrow))] CodePointTrie<'data, T>), |
964 | | // new variants should go BELOW existing ones |
965 | | // Serde serializes based on variant name and index in the enum |
966 | | // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant |
967 | | } |
968 | | |
969 | | icu_provider::data_struct!( |
970 | | <T: TrieValue> PropertyCodePointMap<'_, T>, |
971 | | #[cfg(feature = "datagen")] |
972 | | ); |
973 | | |
974 | | // See CodePointMapData for documentation of these functions |
975 | | impl<'data, T: TrieValue> PropertyCodePointMap<'data, T> { |
976 | | #[inline] |
977 | | pub(crate) fn get32(&self, ch: u32) -> T { |
978 | | match *self { |
979 | | Self::CodePointTrie(ref t) => t.get32(ch), |
980 | | } |
981 | | } |
982 | | |
983 | | #[inline] |
984 | 0 | pub(crate) fn get(&self, c: char) -> T { |
985 | 0 | match *self { |
986 | 0 | Self::CodePointTrie(ref t) => t.get(c), |
987 | | } |
988 | 0 | } Unexecuted instantiation: <icu_properties::provider::PropertyCodePointMap<icu_properties::props::JoiningType>>::get Unexecuted instantiation: <icu_properties::provider::PropertyCodePointMap<icu_properties::props::BidiClass>>::get Unexecuted instantiation: <icu_properties::provider::PropertyCodePointMap<icu_properties::props::gc::GeneralCategory>>::get |
989 | | |
990 | | #[inline] |
991 | | #[cfg(feature = "alloc")] |
992 | | pub(crate) fn try_into_converted<P>( |
993 | | self, |
994 | | ) -> Result<PropertyCodePointMap<'data, P>, zerovec::ule::UleError> |
995 | | where |
996 | | P: TrieValue, |
997 | | { |
998 | | match self { |
999 | | Self::CodePointTrie(t) => t |
1000 | | .try_into_converted() |
1001 | | .map(PropertyCodePointMap::CodePointTrie), |
1002 | | } |
1003 | | } |
1004 | | |
1005 | | #[inline] |
1006 | | #[cfg(feature = "alloc")] |
1007 | | pub(crate) fn get_set_for_value(&self, value: T) -> CodePointInversionList<'static> { |
1008 | | match *self { |
1009 | | Self::CodePointTrie(ref t) => t.get_set_for_value(value), |
1010 | | } |
1011 | | } |
1012 | | |
1013 | | #[inline] |
1014 | | pub(crate) fn iter_ranges(&self) -> impl Iterator<Item = CodePointMapRange<T>> + '_ { |
1015 | | match *self { |
1016 | | Self::CodePointTrie(ref t) => t.iter_ranges(), |
1017 | | } |
1018 | | } |
1019 | | #[inline] |
1020 | 0 | pub(crate) fn iter_ranges_mapped<'a, U: Eq + 'a>( |
1021 | 0 | &'a self, |
1022 | 0 | map: impl FnMut(T) -> U + Copy + 'a, |
1023 | 0 | ) -> impl Iterator<Item = CodePointMapRange<U>> + 'a { |
1024 | 0 | match *self { |
1025 | 0 | Self::CodePointTrie(ref t) => t.iter_ranges_mapped(map), |
1026 | | } |
1027 | 0 | } |
1028 | | |
1029 | | #[inline] |
1030 | | pub(crate) fn from_code_point_trie(trie: CodePointTrie<'static, T>) -> Self { |
1031 | | Self::CodePointTrie(trie) |
1032 | | } |
1033 | | |
1034 | | #[inline] |
1035 | | pub(crate) fn as_code_point_trie(&self) -> Option<&CodePointTrie<'data, T>> { |
1036 | | match *self { |
1037 | | Self::CodePointTrie(ref t) => Some(t), |
1038 | | // any other backing data structure that cannot return a CPT in O(1) time should return None |
1039 | | } |
1040 | | } |
1041 | | |
1042 | | #[inline] |
1043 | | pub(crate) fn to_code_point_trie(&self) -> CodePointTrie<'_, T> { |
1044 | | match *self { |
1045 | | Self::CodePointTrie(ref t) => ZeroFrom::zero_from(t), |
1046 | | } |
1047 | | } |
1048 | | } |
1049 | | |
1050 | | /// A set of characters and strings which share a particular property value. |
1051 | | /// |
1052 | | /// <div class="stab unstable"> |
1053 | | /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, |
1054 | | /// including in SemVer minor releases. While the serde representation of data structs is guaranteed |
1055 | | /// to be stable, their Rust representation might not be. Use with caution. |
1056 | | /// </div> |
1057 | | #[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] |
1058 | | #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] |
1059 | | #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider))] |
1060 | | #[cfg_attr(feature = "serde", derive(serde::Deserialize))] |
1061 | | #[non_exhaustive] |
1062 | | pub enum PropertyUnicodeSet<'data> { |
1063 | | /// A set representing characters in an inversion list, and the strings in a list. |
1064 | | CPInversionListStrList( |
1065 | | #[cfg_attr(feature = "serde", serde(borrow))] CodePointInversionListAndStringList<'data>, |
1066 | | ), |
1067 | | // new variants should go BELOW existing ones |
1068 | | // Serde serializes based on variant name and index in the enum |
1069 | | // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant |
1070 | | } |
1071 | | |
1072 | | icu_provider::data_struct!( |
1073 | | PropertyUnicodeSet<'_>, |
1074 | | #[cfg(feature = "datagen")] |
1075 | | ); |
1076 | | |
1077 | | impl<'data> PropertyUnicodeSet<'data> { |
1078 | | #[inline] |
1079 | | pub(crate) fn contains_str(&self, s: &str) -> bool { |
1080 | | match *self { |
1081 | | Self::CPInversionListStrList(ref l) => l.contains_str(s), |
1082 | | } |
1083 | | } |
1084 | | |
1085 | | #[inline] |
1086 | | pub(crate) fn contains32(&self, cp: u32) -> bool { |
1087 | | match *self { |
1088 | | Self::CPInversionListStrList(ref l) => l.contains32(cp), |
1089 | | } |
1090 | | } |
1091 | | |
1092 | | #[inline] |
1093 | | pub(crate) fn contains(&self, ch: char) -> bool { |
1094 | | match *self { |
1095 | | Self::CPInversionListStrList(ref l) => l.contains(ch), |
1096 | | } |
1097 | | } |
1098 | | |
1099 | | #[inline] |
1100 | 0 | pub(crate) fn from_code_point_inversion_list_string_list( |
1101 | 0 | l: CodePointInversionListAndStringList<'static>, |
1102 | 0 | ) -> Self { |
1103 | 0 | Self::CPInversionListStrList(l) |
1104 | 0 | } |
1105 | | |
1106 | | #[inline] |
1107 | 0 | pub(crate) fn as_code_point_inversion_list_string_list( |
1108 | 0 | &'_ self, |
1109 | 0 | ) -> Option<&'_ CodePointInversionListAndStringList<'data>> { |
1110 | 0 | match *self { |
1111 | 0 | Self::CPInversionListStrList(ref l) => Some(l), |
1112 | | // any other backing data structure that cannot return a CPInversionListStrList in O(1) time should return None |
1113 | | } |
1114 | 0 | } |
1115 | | |
1116 | | #[inline] |
1117 | 0 | pub(crate) fn to_code_point_inversion_list_string_list( |
1118 | 0 | &self, |
1119 | 0 | ) -> CodePointInversionListAndStringList<'_> { |
1120 | 0 | match *self { |
1121 | 0 | Self::CPInversionListStrList(ref t) => ZeroFrom::zero_from(t), |
1122 | | } |
1123 | 0 | } |
1124 | | } |
1125 | | |
1126 | | /// A struct that efficiently stores `Script` and `Script_Extensions` property data. |
1127 | | /// |
1128 | | /// <div class="stab unstable"> |
1129 | | /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, |
1130 | | /// including in SemVer minor releases. While the serde representation of data structs is guaranteed |
1131 | | /// to be stable, their Rust representation might not be. Use with caution. |
1132 | | /// </div> |
1133 | | #[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] |
1134 | | #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] |
1135 | | #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider))] |
1136 | | #[cfg_attr(feature = "serde", derive(serde::Deserialize))] |
1137 | | pub struct ScriptWithExtensionsProperty<'data> { |
1138 | | /// Note: The `ScriptWithExt` values in this array will assume a 12-bit layout. The 2 |
1139 | | /// higher order bits 11..10 will indicate how to deduce the Script value and |
1140 | | /// Script_Extensions value, nearly matching the representation |
1141 | | /// [in ICU](https://github.com/unicode-org/icu/blob/main/icu4c/source/common/uprops.h): |
1142 | | /// |
1143 | | /// | High order 2 bits value | Script | Script_Extensions | |
1144 | | /// |-------------------------|--------------------------------------------------------|----------------------------------------------------------------| |
1145 | | /// | 3 | First value in sub-array, index given by lower 10 bits | Sub-array excluding first value, index given by lower 10 bits | |
1146 | | /// | 2 | Script=Inherited | Entire sub-array, index given by lower 10 bits | |
1147 | | /// | 1 | Script=Common | Entire sub-array, index given by lower 10 bits | |
1148 | | /// | 0 | Value in lower 10 bits | `[ Script value ]` single-element array | |
1149 | | /// |
1150 | | /// When the lower 10 bits of the value are used as an index, that index is |
1151 | | /// used for the outer-level vector of the nested `extensions` structure. |
1152 | | #[cfg_attr(feature = "serde", serde(borrow))] |
1153 | | pub trie: CodePointTrie<'data, ScriptWithExt>, |
1154 | | |
1155 | | /// This companion structure stores Script_Extensions values, which are |
1156 | | /// themselves arrays / vectors. This structure only stores the values for |
1157 | | /// cases in which `scx(cp) != [ sc(cp) ]`. Each sub-vector is distinct. The |
1158 | | /// sub-vector represents the Script_Extensions array value for a code point, |
1159 | | /// and may also indicate Script value, as described for the `trie` field. |
1160 | | #[cfg_attr(feature = "serde", serde(borrow))] |
1161 | | pub extensions: VarZeroVec<'data, ZeroSlice<Script>>, |
1162 | | } |
1163 | | |
1164 | | icu_provider::data_struct!( |
1165 | | ScriptWithExtensionsProperty<'_>, |
1166 | | #[cfg(feature = "datagen")] |
1167 | | ); |