/rust/registry/src/index.crates.io-1949cf8c6b5b557f/icu_properties-2.1.1/src/props.rs
Line | Count | Source |
1 | | // This file is part of ICU4X. For terms of use, please see the file |
2 | | // called LICENSE at the top level of the ICU4X source tree |
3 | | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | | |
5 | | //! This module defines all available properties. |
6 | | //! |
7 | | //! Properties may be empty marker types and implement [`BinaryProperty`], or enumerations[^1] |
8 | | //! and implement [`EnumeratedProperty`]. |
9 | | //! |
10 | | //! [`BinaryProperty`]s are queried through a [`CodePointSetData`](crate::CodePointSetData), |
11 | | //! while [`EnumeratedProperty`]s are queried through [`CodePointMapData`](crate::CodePointMapData). |
12 | | //! |
13 | | //! In addition, some [`EnumeratedProperty`]s also implement [`ParseableEnumeratedProperty`] or |
14 | | //! [`NamedEnumeratedProperty`]. For these properties, [`PropertyParser`](crate::PropertyParser), |
15 | | //! [`PropertyNamesLong`](crate::PropertyNamesLong), and [`PropertyNamesShort`](crate::PropertyNamesShort) |
16 | | //! can be constructed. |
17 | | //! |
18 | | //! [^1]: either Rust `enum`s, or Rust `struct`s with associated constants (open enums) |
19 | | |
20 | | pub use crate::names::{NamedEnumeratedProperty, ParseableEnumeratedProperty}; |
21 | | |
22 | | pub use crate::bidi::{BidiMirroringGlyph, BidiPairedBracketType}; |
23 | | |
24 | | /// See [`test_enumerated_property_completeness`] for usage. |
25 | | /// Example input: |
26 | | /// ```ignore |
27 | | /// impl EastAsianWidth { |
28 | | /// pub const Neutral: EastAsianWidth = EastAsianWidth(0); |
29 | | /// pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); |
30 | | /// ... |
31 | | /// } |
32 | | /// ``` |
33 | | /// Produces `const ALL_VALUES = &[("Neutral", 0u16), ...];` by |
34 | | /// explicitly casting first field of the struct to u16. |
35 | | macro_rules! create_const_array { |
36 | | ( |
37 | | $ ( #[$meta:meta] )* |
38 | | impl $enum_ty:ident { |
39 | | $( $(#[$const_meta:meta])* $v:vis const $i:ident: $t:ty = $e:expr; )* |
40 | | } |
41 | | ) => { |
42 | | $( #[$meta] )* |
43 | | impl $enum_ty { |
44 | | $( |
45 | | $(#[$const_meta])* |
46 | | $v const $i: $t = $e; |
47 | | )* |
48 | | |
49 | | /// All possible values of this enum in the Unicode version |
50 | | /// from this ICU4X release. |
51 | | pub const ALL_VALUES: &'static [$enum_ty] = &[ |
52 | | $($enum_ty::$i),* |
53 | | ]; |
54 | | } |
55 | | |
56 | | #[cfg(feature = "datagen")] |
57 | | impl databake::Bake for $enum_ty { |
58 | | fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream { |
59 | | env.insert("icu_properties"); |
60 | | match *self { |
61 | | $( |
62 | | Self::$i => databake::quote!(icu_properties::props::$enum_ty::$i), |
63 | | )* |
64 | | Self(v) => databake::quote!(icu_properties::props::$enum_ty::from_icu4c_value(#v)), |
65 | | } |
66 | | } |
67 | | } |
68 | | |
69 | | |
70 | | impl From<$enum_ty> for u16 { |
71 | 0 | fn from(other: $enum_ty) -> Self { |
72 | 0 | other.0 as u16 |
73 | 0 | } Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::LineBreak>>::from Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::GraphemeClusterBreak>>::from Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::HangulSyllableType>>::from Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::EastAsianWidth>>::from Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::WordBreak>>::from Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::IndicConjunctBreak>>::from Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::SentenceBreak>>::from Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::CanonicalCombiningClass>>::from Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::IndicSyllabicCategory>>::from Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::JoiningType>>::from Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::VerticalOrientation>>::from Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::Script>>::from Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::BidiClass>>::from |
74 | | } |
75 | | } |
76 | | } |
77 | | |
78 | | pub use crate::code_point_map::EnumeratedProperty; |
79 | | |
80 | | macro_rules! make_enumerated_property { |
81 | | ( |
82 | | name: $name:literal; |
83 | | short_name: $short_name:literal; |
84 | | ident: $value_ty:path; |
85 | | data_marker: $data_marker:ty; |
86 | | singleton: $singleton:ident; |
87 | | $(ule_ty: $ule_ty:ty;)? |
88 | | ) => { |
89 | | impl crate::private::Sealed for $value_ty {} |
90 | | |
91 | | impl EnumeratedProperty for $value_ty { |
92 | | type DataMarker = $data_marker; |
93 | | #[cfg(feature = "compiled_data")] |
94 | | const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> = |
95 | | crate::provider::Baked::$singleton; |
96 | | const NAME: &'static [u8] = $name.as_bytes(); |
97 | | const SHORT_NAME: &'static [u8] = $short_name.as_bytes(); |
98 | | } |
99 | | |
100 | | $( |
101 | | impl zerovec::ule::AsULE for $value_ty { |
102 | | type ULE = $ule_ty; |
103 | | |
104 | 0 | fn to_unaligned(self) -> Self::ULE { |
105 | 0 | self.0.to_unaligned() |
106 | 0 | } Unexecuted instantiation: <icu_properties::props::Script as zerovec::ule::AsULE>::to_unaligned Unexecuted instantiation: <icu_properties::props::LineBreak as zerovec::ule::AsULE>::to_unaligned Unexecuted instantiation: <icu_properties::props::GraphemeClusterBreak as zerovec::ule::AsULE>::to_unaligned Unexecuted instantiation: <icu_properties::props::HangulSyllableType as zerovec::ule::AsULE>::to_unaligned Unexecuted instantiation: <icu_properties::props::EastAsianWidth as zerovec::ule::AsULE>::to_unaligned Unexecuted instantiation: <icu_properties::props::WordBreak as zerovec::ule::AsULE>::to_unaligned Unexecuted instantiation: <icu_properties::props::CanonicalCombiningClass as zerovec::ule::AsULE>::to_unaligned Unexecuted instantiation: <icu_properties::props::IndicConjunctBreak as zerovec::ule::AsULE>::to_unaligned Unexecuted instantiation: <icu_properties::props::SentenceBreak as zerovec::ule::AsULE>::to_unaligned Unexecuted instantiation: <icu_properties::props::IndicSyllabicCategory as zerovec::ule::AsULE>::to_unaligned Unexecuted instantiation: <icu_properties::props::JoiningType as zerovec::ule::AsULE>::to_unaligned Unexecuted instantiation: <icu_properties::props::VerticalOrientation as zerovec::ule::AsULE>::to_unaligned Unexecuted instantiation: <icu_properties::props::BidiClass as zerovec::ule::AsULE>::to_unaligned |
107 | 9.73M | fn from_unaligned(unaligned: Self::ULE) -> Self { |
108 | 9.73M | Self(zerovec::ule::AsULE::from_unaligned(unaligned)) |
109 | 9.73M | } Unexecuted instantiation: <icu_properties::props::Script as zerovec::ule::AsULE>::from_unaligned Unexecuted instantiation: <icu_properties::props::LineBreak as zerovec::ule::AsULE>::from_unaligned Unexecuted instantiation: <icu_properties::props::GraphemeClusterBreak as zerovec::ule::AsULE>::from_unaligned Unexecuted instantiation: <icu_properties::props::HangulSyllableType as zerovec::ule::AsULE>::from_unaligned Unexecuted instantiation: <icu_properties::props::EastAsianWidth as zerovec::ule::AsULE>::from_unaligned Unexecuted instantiation: <icu_properties::props::WordBreak as zerovec::ule::AsULE>::from_unaligned Unexecuted instantiation: <icu_properties::props::CanonicalCombiningClass as zerovec::ule::AsULE>::from_unaligned Unexecuted instantiation: <icu_properties::props::IndicConjunctBreak as zerovec::ule::AsULE>::from_unaligned Unexecuted instantiation: <icu_properties::props::SentenceBreak as zerovec::ule::AsULE>::from_unaligned Unexecuted instantiation: <icu_properties::props::IndicSyllabicCategory as zerovec::ule::AsULE>::from_unaligned <icu_properties::props::JoiningType as zerovec::ule::AsULE>::from_unaligned Line | Count | Source | 107 | 417k | fn from_unaligned(unaligned: Self::ULE) -> Self { | 108 | 417k | Self(zerovec::ule::AsULE::from_unaligned(unaligned)) | 109 | 417k | } |
Unexecuted instantiation: <icu_properties::props::VerticalOrientation as zerovec::ule::AsULE>::from_unaligned <icu_properties::props::BidiClass as zerovec::ule::AsULE>::from_unaligned Line | Count | Source | 107 | 9.31M | fn from_unaligned(unaligned: Self::ULE) -> Self { | 108 | 9.31M | Self(zerovec::ule::AsULE::from_unaligned(unaligned)) | 109 | 9.31M | } |
|
110 | | } |
111 | | )? |
112 | | }; |
113 | | } |
114 | | |
115 | | /// Enumerated property Bidi_Class |
116 | | /// |
117 | | /// These are the categories required by the Unicode Bidirectional Algorithm. |
118 | | /// For the property values, see [Bidirectional Class Values](https://unicode.org/reports/tr44/#Bidi_Class_Values). |
119 | | /// For more information, see [Unicode Standard Annex #9](https://unicode.org/reports/tr41/tr41-28.html#UAX9). |
120 | | /// |
121 | | /// # Example |
122 | | /// |
123 | | /// ``` |
124 | | /// use icu::properties::{props::BidiClass, CodePointMapData}; |
125 | | /// |
126 | | /// assert_eq!( |
127 | | /// CodePointMapData::<BidiClass>::new().get('y'), |
128 | | /// BidiClass::LeftToRight |
129 | | /// ); // U+0079 |
130 | | /// assert_eq!( |
131 | | /// CodePointMapData::<BidiClass>::new().get('ع'), |
132 | | /// BidiClass::ArabicLetter |
133 | | /// ); // U+0639 |
134 | | /// ``` |
135 | | #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
136 | | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] |
137 | | #[allow(clippy::exhaustive_structs)] // newtype |
138 | | #[repr(transparent)] |
139 | | pub struct BidiClass(pub(crate) u8); |
140 | | |
141 | | impl BidiClass { |
142 | | /// Returns an ICU4C `UBidiClass` value. |
143 | 9.27M | pub const fn to_icu4c_value(self) -> u8 { |
144 | 9.27M | self.0 |
145 | 9.27M | } |
146 | | /// Constructor from an ICU4C `UBidiClass` value. |
147 | 0 | pub const fn from_icu4c_value(value: u8) -> Self { |
148 | 0 | Self(value) |
149 | 0 | } |
150 | | } |
151 | | |
152 | | create_const_array! { |
153 | | #[allow(non_upper_case_globals)] |
154 | | impl BidiClass { |
155 | | /// (`L`) any strong left-to-right character |
156 | | pub const LeftToRight: BidiClass = BidiClass(0); |
157 | | /// (`R`) any strong right-to-left (non-Arabic-type) character |
158 | | pub const RightToLeft: BidiClass = BidiClass(1); |
159 | | /// (`EN`) any ASCII digit or Eastern Arabic-Indic digit |
160 | | pub const EuropeanNumber: BidiClass = BidiClass(2); |
161 | | /// (`ES`) plus and minus signs |
162 | | pub const EuropeanSeparator: BidiClass = BidiClass(3); |
163 | | /// (`ET`) a terminator in a numeric format context, includes currency signs |
164 | | pub const EuropeanTerminator: BidiClass = BidiClass(4); |
165 | | /// (`AN`) any Arabic-Indic digit |
166 | | pub const ArabicNumber: BidiClass = BidiClass(5); |
167 | | /// (`CS`) commas, colons, and slashes |
168 | | pub const CommonSeparator: BidiClass = BidiClass(6); |
169 | | /// (`B`) various newline characters |
170 | | pub const ParagraphSeparator: BidiClass = BidiClass(7); |
171 | | /// (`S`) various segment-related control codes |
172 | | pub const SegmentSeparator: BidiClass = BidiClass(8); |
173 | | /// (`WS`) spaces |
174 | | pub const WhiteSpace: BidiClass = BidiClass(9); |
175 | | /// (`ON`) most other symbols and punctuation marks |
176 | | pub const OtherNeutral: BidiClass = BidiClass(10); |
177 | | /// (`LRE`) U+202A: the LR embedding control |
178 | | pub const LeftToRightEmbedding: BidiClass = BidiClass(11); |
179 | | /// (`LRO`) U+202D: the LR override control |
180 | | pub const LeftToRightOverride: BidiClass = BidiClass(12); |
181 | | /// (`AL`) any strong right-to-left (Arabic-type) character |
182 | | pub const ArabicLetter: BidiClass = BidiClass(13); |
183 | | /// (`RLE`) U+202B: the RL embedding control |
184 | | pub const RightToLeftEmbedding: BidiClass = BidiClass(14); |
185 | | /// (`RLO`) U+202E: the RL override control |
186 | | pub const RightToLeftOverride: BidiClass = BidiClass(15); |
187 | | /// (`PDF`) U+202C: terminates an embedding or override control |
188 | | pub const PopDirectionalFormat: BidiClass = BidiClass(16); |
189 | | /// (`NSM`) any nonspacing mark |
190 | | pub const NonspacingMark: BidiClass = BidiClass(17); |
191 | | /// (`BN`) most format characters, control codes, or noncharacters |
192 | | pub const BoundaryNeutral: BidiClass = BidiClass(18); |
193 | | /// (`FSI`) U+2068: the first strong isolate control |
194 | | pub const FirstStrongIsolate: BidiClass = BidiClass(19); |
195 | | /// (`LRI`) U+2066: the LR isolate control |
196 | | pub const LeftToRightIsolate: BidiClass = BidiClass(20); |
197 | | /// (`RLI`) U+2067: the RL isolate control |
198 | | pub const RightToLeftIsolate: BidiClass = BidiClass(21); |
199 | | /// (`PDI`) U+2069: terminates an isolate control |
200 | | pub const PopDirectionalIsolate: BidiClass = BidiClass(22); |
201 | | } |
202 | | } |
203 | | |
204 | | make_enumerated_property! { |
205 | | name: "Bidi_Class"; |
206 | | short_name: "bc"; |
207 | | ident: BidiClass; |
208 | | data_marker: crate::provider::PropertyEnumBidiClassV1; |
209 | | singleton: SINGLETON_PROPERTY_ENUM_BIDI_CLASS_V1; |
210 | | ule_ty: u8; |
211 | | } |
212 | | |
213 | | // This exists to encapsulate GeneralCategoryULE so that it can exist in the provider module rather than props |
214 | | pub(crate) mod gc { |
215 | | /// Enumerated property General_Category. |
216 | | /// |
217 | | /// General_Category specifies the most general classification of a code point, usually |
218 | | /// determined based on the primary characteristic of the assigned character. For example, is the |
219 | | /// character a letter, a mark, a number, punctuation, or a symbol, and if so, of what type? |
220 | | /// |
221 | | /// GeneralCategory only supports specific subcategories (eg `UppercaseLetter`). |
222 | | /// It does not support grouped categories (eg `Letter`). For grouped categories, use [`GeneralCategoryGroup`]( |
223 | | /// crate::props::GeneralCategoryGroup). |
224 | | /// |
225 | | /// # Example |
226 | | /// |
227 | | /// ``` |
228 | | /// use icu::properties::{props::GeneralCategory, CodePointMapData}; |
229 | | /// |
230 | | /// assert_eq!( |
231 | | /// CodePointMapData::<GeneralCategory>::new().get('木'), |
232 | | /// GeneralCategory::OtherLetter |
233 | | /// ); // U+6728 |
234 | | /// assert_eq!( |
235 | | /// CodePointMapData::<GeneralCategory>::new().get('🎃'), |
236 | | /// GeneralCategory::OtherSymbol |
237 | | /// ); // U+1F383 JACK-O-LANTERN |
238 | | /// ``` |
239 | | #[derive(Copy, Clone, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)] |
240 | | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] |
241 | | #[cfg_attr(feature = "datagen", derive(databake::Bake))] |
242 | | #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] |
243 | | #[allow(clippy::exhaustive_enums)] // this type is stable |
244 | | #[zerovec::make_ule(GeneralCategoryULE)] |
245 | | #[cfg_attr(not(feature = "alloc"), zerovec::skip_derive(ZeroMapKV))] |
246 | | #[repr(u8)] |
247 | | pub enum GeneralCategory { |
248 | | /// (`Cn`) A reserved unassigned code point or a noncharacter |
249 | | Unassigned = 0, |
250 | | |
251 | | /// (`Lu`) An uppercase letter |
252 | | UppercaseLetter = 1, |
253 | | /// (`Ll`) A lowercase letter |
254 | | LowercaseLetter = 2, |
255 | | /// (`Lt`) A digraphic letter, with first part uppercase |
256 | | TitlecaseLetter = 3, |
257 | | /// (`Lm`) A modifier letter |
258 | | ModifierLetter = 4, |
259 | | /// (`Lo`) Other letters, including syllables and ideographs |
260 | | OtherLetter = 5, |
261 | | |
262 | | /// (`Mn`) A nonspacing combining mark (zero advance width) |
263 | | NonspacingMark = 6, |
264 | | /// (`Mc`) A spacing combining mark (positive advance width) |
265 | | SpacingMark = 8, |
266 | | /// (`Me`) An enclosing combining mark |
267 | | EnclosingMark = 7, |
268 | | |
269 | | /// (`Nd`) A decimal digit |
270 | | DecimalNumber = 9, |
271 | | /// (`Nl`) A letterlike numeric character |
272 | | LetterNumber = 10, |
273 | | /// (`No`) A numeric character of other type |
274 | | OtherNumber = 11, |
275 | | |
276 | | /// (`Zs`) A space character (of various non-zero widths) |
277 | | SpaceSeparator = 12, |
278 | | /// (`Zl`) U+2028 LINE SEPARATOR only |
279 | | LineSeparator = 13, |
280 | | /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only |
281 | | ParagraphSeparator = 14, |
282 | | |
283 | | /// (`Cc`) A C0 or C1 control code |
284 | | Control = 15, |
285 | | /// (`Cf`) A format control character |
286 | | Format = 16, |
287 | | /// (`Co`) A private-use character |
288 | | PrivateUse = 17, |
289 | | /// (`Cs`) A surrogate code point |
290 | | Surrogate = 18, |
291 | | |
292 | | /// (`Pd`) A dash or hyphen punctuation mark |
293 | | DashPunctuation = 19, |
294 | | /// (`Ps`) An opening punctuation mark (of a pair) |
295 | | OpenPunctuation = 20, |
296 | | /// (`Pe`) A closing punctuation mark (of a pair) |
297 | | ClosePunctuation = 21, |
298 | | /// (`Pc`) A connecting punctuation mark, like a tie |
299 | | ConnectorPunctuation = 22, |
300 | | /// (`Pi`) An initial quotation mark |
301 | | InitialPunctuation = 28, |
302 | | /// (`Pf`) A final quotation mark |
303 | | FinalPunctuation = 29, |
304 | | /// (`Po`) A punctuation mark of other type |
305 | | OtherPunctuation = 23, |
306 | | |
307 | | /// (`Sm`) A symbol of mathematical use |
308 | | MathSymbol = 24, |
309 | | /// (`Sc`) A currency sign |
310 | | CurrencySymbol = 25, |
311 | | /// (`Sk`) A non-letterlike modifier symbol |
312 | | ModifierSymbol = 26, |
313 | | /// (`So`) A symbol of other type |
314 | | OtherSymbol = 27, |
315 | | } |
316 | | } |
317 | | |
318 | | pub use gc::GeneralCategory; |
319 | | |
320 | | impl GeneralCategory { |
321 | | /// All possible values of this enum |
322 | | pub const ALL_VALUES: &'static [GeneralCategory] = &[ |
323 | | GeneralCategory::Unassigned, |
324 | | GeneralCategory::UppercaseLetter, |
325 | | GeneralCategory::LowercaseLetter, |
326 | | GeneralCategory::TitlecaseLetter, |
327 | | GeneralCategory::ModifierLetter, |
328 | | GeneralCategory::OtherLetter, |
329 | | GeneralCategory::NonspacingMark, |
330 | | GeneralCategory::SpacingMark, |
331 | | GeneralCategory::EnclosingMark, |
332 | | GeneralCategory::DecimalNumber, |
333 | | GeneralCategory::LetterNumber, |
334 | | GeneralCategory::OtherNumber, |
335 | | GeneralCategory::SpaceSeparator, |
336 | | GeneralCategory::LineSeparator, |
337 | | GeneralCategory::ParagraphSeparator, |
338 | | GeneralCategory::Control, |
339 | | GeneralCategory::Format, |
340 | | GeneralCategory::PrivateUse, |
341 | | GeneralCategory::Surrogate, |
342 | | GeneralCategory::DashPunctuation, |
343 | | GeneralCategory::OpenPunctuation, |
344 | | GeneralCategory::ClosePunctuation, |
345 | | GeneralCategory::ConnectorPunctuation, |
346 | | GeneralCategory::InitialPunctuation, |
347 | | GeneralCategory::FinalPunctuation, |
348 | | GeneralCategory::OtherPunctuation, |
349 | | GeneralCategory::MathSymbol, |
350 | | GeneralCategory::CurrencySymbol, |
351 | | GeneralCategory::ModifierSymbol, |
352 | | GeneralCategory::OtherSymbol, |
353 | | ]; |
354 | | } |
355 | | |
356 | | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)] |
357 | | /// Error value for `impl TryFrom<u8> for GeneralCategory`. |
358 | | #[non_exhaustive] |
359 | | pub struct GeneralCategoryOutOfBoundsError; |
360 | | |
361 | | impl TryFrom<u8> for GeneralCategory { |
362 | | type Error = GeneralCategoryOutOfBoundsError; |
363 | | /// Construct this [`GeneralCategory`] from an integer, returning |
364 | | /// an error if it is out of bounds |
365 | 0 | fn try_from(val: u8) -> Result<Self, GeneralCategoryOutOfBoundsError> { |
366 | 0 | GeneralCategory::new_from_u8(val).ok_or(GeneralCategoryOutOfBoundsError) |
367 | 0 | } |
368 | | } |
369 | | |
370 | | make_enumerated_property! { |
371 | | name: "General_Category"; |
372 | | short_name: "gc"; |
373 | | ident: GeneralCategory; |
374 | | data_marker: crate::provider::PropertyEnumGeneralCategoryV1; |
375 | | singleton: SINGLETON_PROPERTY_ENUM_GENERAL_CATEGORY_V1; |
376 | | } |
377 | | |
378 | | /// Groupings of multiple General_Category property values. |
379 | | /// |
380 | | /// Instances of `GeneralCategoryGroup` represent the defined multi-category |
381 | | /// values that are useful for users in certain contexts, such as regex. In |
382 | | /// other words, unlike [`GeneralCategory`], this supports groups of general |
383 | | /// categories: for example, `Letter` /// is the union of `UppercaseLetter`, |
384 | | /// `LowercaseLetter`, etc. |
385 | | /// |
386 | | /// See <https://www.unicode.org/reports/tr44/> . |
387 | | /// |
388 | | /// The discriminants correspond to the `U_GC_XX_MASK` constants in ICU4C. |
389 | | /// Unlike [`GeneralCategory`], this supports groups of general categories: for example, `Letter` |
390 | | /// is the union of `UppercaseLetter`, `LowercaseLetter`, etc. |
391 | | /// |
392 | | /// See `UCharCategory` and `U_GET_GC_MASK` in ICU4C. |
393 | | #[derive(Copy, Clone, PartialEq, Debug, Eq)] |
394 | | #[allow(clippy::exhaustive_structs)] // newtype |
395 | | #[repr(transparent)] |
396 | | pub struct GeneralCategoryGroup(pub(crate) u32); |
397 | | |
398 | | impl crate::private::Sealed for GeneralCategoryGroup {} |
399 | | |
400 | | use GeneralCategory as GC; |
401 | | use GeneralCategoryGroup as GCG; |
402 | | |
403 | | #[allow(non_upper_case_globals)] |
404 | | impl GeneralCategoryGroup { |
405 | | /// (`Lu`) An uppercase letter |
406 | | pub const UppercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32)); |
407 | | /// (`Ll`) A lowercase letter |
408 | | pub const LowercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::LowercaseLetter as u32)); |
409 | | /// (`Lt`) A digraphic letter, with first part uppercase |
410 | | pub const TitlecaseLetter: GeneralCategoryGroup = GCG(1 << (GC::TitlecaseLetter as u32)); |
411 | | /// (`Lm`) A modifier letter |
412 | | pub const ModifierLetter: GeneralCategoryGroup = GCG(1 << (GC::ModifierLetter as u32)); |
413 | | /// (`Lo`) Other letters, including syllables and ideographs |
414 | | pub const OtherLetter: GeneralCategoryGroup = GCG(1 << (GC::OtherLetter as u32)); |
415 | | /// (`LC`) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter |
416 | | pub const CasedLetter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32)) |
417 | | | (1 << (GC::LowercaseLetter as u32)) |
418 | | | (1 << (GC::TitlecaseLetter as u32))); |
419 | | /// (`L`) The union of all letter categories |
420 | | pub const Letter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32)) |
421 | | | (1 << (GC::LowercaseLetter as u32)) |
422 | | | (1 << (GC::TitlecaseLetter as u32)) |
423 | | | (1 << (GC::ModifierLetter as u32)) |
424 | | | (1 << (GC::OtherLetter as u32))); |
425 | | |
426 | | /// (`Mn`) A nonspacing combining mark (zero advance width) |
427 | | pub const NonspacingMark: GeneralCategoryGroup = GCG(1 << (GC::NonspacingMark as u32)); |
428 | | /// (`Mc`) A spacing combining mark (positive advance width) |
429 | | pub const EnclosingMark: GeneralCategoryGroup = GCG(1 << (GC::EnclosingMark as u32)); |
430 | | /// (`Me`) An enclosing combining mark |
431 | | pub const SpacingMark: GeneralCategoryGroup = GCG(1 << (GC::SpacingMark as u32)); |
432 | | /// (`M`) The union of all mark categories |
433 | | pub const Mark: GeneralCategoryGroup = GCG((1 << (GC::NonspacingMark as u32)) |
434 | | | (1 << (GC::EnclosingMark as u32)) |
435 | | | (1 << (GC::SpacingMark as u32))); |
436 | | |
437 | | /// (`Nd`) A decimal digit |
438 | | pub const DecimalNumber: GeneralCategoryGroup = GCG(1 << (GC::DecimalNumber as u32)); |
439 | | /// (`Nl`) A letterlike numeric character |
440 | | pub const LetterNumber: GeneralCategoryGroup = GCG(1 << (GC::LetterNumber as u32)); |
441 | | /// (`No`) A numeric character of other type |
442 | | pub const OtherNumber: GeneralCategoryGroup = GCG(1 << (GC::OtherNumber as u32)); |
443 | | /// (`N`) The union of all number categories |
444 | | pub const Number: GeneralCategoryGroup = GCG((1 << (GC::DecimalNumber as u32)) |
445 | | | (1 << (GC::LetterNumber as u32)) |
446 | | | (1 << (GC::OtherNumber as u32))); |
447 | | |
448 | | /// (`Zs`) A space character (of various non-zero widths) |
449 | | pub const SpaceSeparator: GeneralCategoryGroup = GCG(1 << (GC::SpaceSeparator as u32)); |
450 | | /// (`Zl`) U+2028 LINE SEPARATOR only |
451 | | pub const LineSeparator: GeneralCategoryGroup = GCG(1 << (GC::LineSeparator as u32)); |
452 | | /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only |
453 | | pub const ParagraphSeparator: GeneralCategoryGroup = GCG(1 << (GC::ParagraphSeparator as u32)); |
454 | | /// (`Z`) The union of all separator categories |
455 | | pub const Separator: GeneralCategoryGroup = GCG((1 << (GC::SpaceSeparator as u32)) |
456 | | | (1 << (GC::LineSeparator as u32)) |
457 | | | (1 << (GC::ParagraphSeparator as u32))); |
458 | | |
459 | | /// (`Cc`) A C0 or C1 control code |
460 | | pub const Control: GeneralCategoryGroup = GCG(1 << (GC::Control as u32)); |
461 | | /// (`Cf`) A format control character |
462 | | pub const Format: GeneralCategoryGroup = GCG(1 << (GC::Format as u32)); |
463 | | /// (`Co`) A private-use character |
464 | | pub const PrivateUse: GeneralCategoryGroup = GCG(1 << (GC::PrivateUse as u32)); |
465 | | /// (`Cs`) A surrogate code point |
466 | | pub const Surrogate: GeneralCategoryGroup = GCG(1 << (GC::Surrogate as u32)); |
467 | | /// (`Cn`) A reserved unassigned code point or a noncharacter |
468 | | pub const Unassigned: GeneralCategoryGroup = GCG(1 << (GC::Unassigned as u32)); |
469 | | /// (`C`) The union of all control code, reserved, and unassigned categories |
470 | | pub const Other: GeneralCategoryGroup = GCG((1 << (GC::Control as u32)) |
471 | | | (1 << (GC::Format as u32)) |
472 | | | (1 << (GC::PrivateUse as u32)) |
473 | | | (1 << (GC::Surrogate as u32)) |
474 | | | (1 << (GC::Unassigned as u32))); |
475 | | |
476 | | /// (`Pd`) A dash or hyphen punctuation mark |
477 | | pub const DashPunctuation: GeneralCategoryGroup = GCG(1 << (GC::DashPunctuation as u32)); |
478 | | /// (`Ps`) An opening punctuation mark (of a pair) |
479 | | pub const OpenPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OpenPunctuation as u32)); |
480 | | /// (`Pe`) A closing punctuation mark (of a pair) |
481 | | pub const ClosePunctuation: GeneralCategoryGroup = GCG(1 << (GC::ClosePunctuation as u32)); |
482 | | /// (`Pc`) A connecting punctuation mark, like a tie |
483 | | pub const ConnectorPunctuation: GeneralCategoryGroup = |
484 | | GCG(1 << (GC::ConnectorPunctuation as u32)); |
485 | | /// (`Pi`) An initial quotation mark |
486 | | pub const InitialPunctuation: GeneralCategoryGroup = GCG(1 << (GC::InitialPunctuation as u32)); |
487 | | /// (`Pf`) A final quotation mark |
488 | | pub const FinalPunctuation: GeneralCategoryGroup = GCG(1 << (GC::FinalPunctuation as u32)); |
489 | | /// (`Po`) A punctuation mark of other type |
490 | | pub const OtherPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OtherPunctuation as u32)); |
491 | | /// (`P`) The union of all punctuation categories |
492 | | pub const Punctuation: GeneralCategoryGroup = GCG((1 << (GC::DashPunctuation as u32)) |
493 | | | (1 << (GC::OpenPunctuation as u32)) |
494 | | | (1 << (GC::ClosePunctuation as u32)) |
495 | | | (1 << (GC::ConnectorPunctuation as u32)) |
496 | | | (1 << (GC::OtherPunctuation as u32)) |
497 | | | (1 << (GC::InitialPunctuation as u32)) |
498 | | | (1 << (GC::FinalPunctuation as u32))); |
499 | | |
500 | | /// (`Sm`) A symbol of mathematical use |
501 | | pub const MathSymbol: GeneralCategoryGroup = GCG(1 << (GC::MathSymbol as u32)); |
502 | | /// (`Sc`) A currency sign |
503 | | pub const CurrencySymbol: GeneralCategoryGroup = GCG(1 << (GC::CurrencySymbol as u32)); |
504 | | /// (`Sk`) A non-letterlike modifier symbol |
505 | | pub const ModifierSymbol: GeneralCategoryGroup = GCG(1 << (GC::ModifierSymbol as u32)); |
506 | | /// (`So`) A symbol of other type |
507 | | pub const OtherSymbol: GeneralCategoryGroup = GCG(1 << (GC::OtherSymbol as u32)); |
508 | | /// (`S`) The union of all symbol categories |
509 | | pub const Symbol: GeneralCategoryGroup = GCG((1 << (GC::MathSymbol as u32)) |
510 | | | (1 << (GC::CurrencySymbol as u32)) |
511 | | | (1 << (GC::ModifierSymbol as u32)) |
512 | | | (1 << (GC::OtherSymbol as u32))); |
513 | | |
514 | | const ALL: u32 = (1 << (GC::FinalPunctuation as u32 + 1)) - 1; |
515 | | |
516 | | /// Return whether the code point belongs in the provided multi-value category. |
517 | | /// |
518 | | /// ``` |
519 | | /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; |
520 | | /// use icu::properties::CodePointMapData; |
521 | | /// |
522 | | /// let gc = CodePointMapData::<GeneralCategory>::new(); |
523 | | /// |
524 | | /// assert_eq!(gc.get('A'), GeneralCategory::UppercaseLetter); |
525 | | /// assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A'))); |
526 | | /// |
527 | | /// // U+0B1E ORIYA LETTER NYA |
528 | | /// assert_eq!(gc.get('ଞ'), GeneralCategory::OtherLetter); |
529 | | /// assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ'))); |
530 | | /// assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ'))); |
531 | | /// |
532 | | /// // U+0301 COMBINING ACUTE ACCENT |
533 | | /// assert_eq!(gc.get('\u{0301}'), GeneralCategory::NonspacingMark); |
534 | | /// assert!(GeneralCategoryGroup::Mark.contains(gc.get('\u{0301}'))); |
535 | | /// assert!(!GeneralCategoryGroup::Letter.contains(gc.get('\u{0301}'))); |
536 | | /// |
537 | | /// assert_eq!(gc.get('0'), GeneralCategory::DecimalNumber); |
538 | | /// assert!(GeneralCategoryGroup::Number.contains(gc.get('0'))); |
539 | | /// assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0'))); |
540 | | /// |
541 | | /// assert_eq!(gc.get('('), GeneralCategory::OpenPunctuation); |
542 | | /// assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('('))); |
543 | | /// assert!(!GeneralCategoryGroup::Number.contains(gc.get('('))); |
544 | | /// |
545 | | /// // U+2713 CHECK MARK |
546 | | /// assert_eq!(gc.get('✓'), GeneralCategory::OtherSymbol); |
547 | | /// assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓'))); |
548 | | /// assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓'))); |
549 | | /// |
550 | | /// assert_eq!(gc.get(' '), GeneralCategory::SpaceSeparator); |
551 | | /// assert!(GeneralCategoryGroup::Separator.contains(gc.get(' '))); |
552 | | /// assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' '))); |
553 | | /// |
554 | | /// // U+E007F CANCEL TAG |
555 | | /// assert_eq!(gc.get('\u{E007F}'), GeneralCategory::Format); |
556 | | /// assert!(GeneralCategoryGroup::Other.contains(gc.get('\u{E007F}'))); |
557 | | /// assert!(!GeneralCategoryGroup::Separator.contains(gc.get('\u{E007F}'))); |
558 | | /// ``` |
559 | 0 | pub const fn contains(self, val: GeneralCategory) -> bool { |
560 | 0 | 0 != (1 << (val as u32)) & self.0 |
561 | 0 | } |
562 | | |
563 | | /// Produce a GeneralCategoryGroup that is the inverse of this one |
564 | | /// |
565 | | /// # Example |
566 | | /// |
567 | | /// ```rust |
568 | | /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; |
569 | | /// |
570 | | /// let letter = GeneralCategoryGroup::Letter; |
571 | | /// let not_letter = letter.complement(); |
572 | | /// |
573 | | /// assert!(not_letter.contains(GeneralCategory::MathSymbol)); |
574 | | /// assert!(!letter.contains(GeneralCategory::MathSymbol)); |
575 | | /// assert!(not_letter.contains(GeneralCategory::OtherPunctuation)); |
576 | | /// assert!(!letter.contains(GeneralCategory::OtherPunctuation)); |
577 | | /// assert!(!not_letter.contains(GeneralCategory::UppercaseLetter)); |
578 | | /// assert!(letter.contains(GeneralCategory::UppercaseLetter)); |
579 | | /// ``` |
580 | 0 | pub const fn complement(self) -> Self { |
581 | | // Mask off things not in Self::ALL to guarantee the mask |
582 | | // values stay in-range |
583 | 0 | GeneralCategoryGroup(!self.0 & Self::ALL) |
584 | 0 | } |
585 | | |
586 | | /// Return the group representing all GeneralCategory values |
587 | | /// |
588 | | /// # Example |
589 | | /// |
590 | | /// ```rust |
591 | | /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; |
592 | | /// |
593 | | /// let all = GeneralCategoryGroup::all(); |
594 | | /// |
595 | | /// assert!(all.contains(GeneralCategory::MathSymbol)); |
596 | | /// assert!(all.contains(GeneralCategory::OtherPunctuation)); |
597 | | /// assert!(all.contains(GeneralCategory::UppercaseLetter)); |
598 | | /// ``` |
599 | 0 | pub const fn all() -> Self { |
600 | 0 | Self(Self::ALL) |
601 | 0 | } |
602 | | |
603 | | /// Return the empty group |
604 | | /// |
605 | | /// # Example |
606 | | /// |
607 | | /// ```rust |
608 | | /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; |
609 | | /// |
610 | | /// let empty = GeneralCategoryGroup::empty(); |
611 | | /// |
612 | | /// assert!(!empty.contains(GeneralCategory::MathSymbol)); |
613 | | /// assert!(!empty.contains(GeneralCategory::OtherPunctuation)); |
614 | | /// assert!(!empty.contains(GeneralCategory::UppercaseLetter)); |
615 | | /// ``` |
616 | 0 | pub const fn empty() -> Self { |
617 | 0 | Self(0) |
618 | 0 | } |
619 | | |
620 | | /// Take the union of two groups |
621 | | /// |
622 | | /// # Example |
623 | | /// |
624 | | /// ```rust |
625 | | /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; |
626 | | /// |
627 | | /// let letter = GeneralCategoryGroup::Letter; |
628 | | /// let symbol = GeneralCategoryGroup::Symbol; |
629 | | /// let union = letter.union(symbol); |
630 | | /// |
631 | | /// assert!(union.contains(GeneralCategory::MathSymbol)); |
632 | | /// assert!(!union.contains(GeneralCategory::OtherPunctuation)); |
633 | | /// assert!(union.contains(GeneralCategory::UppercaseLetter)); |
634 | | /// ``` |
635 | 0 | pub const fn union(self, other: Self) -> Self { |
636 | 0 | Self(self.0 | other.0) |
637 | 0 | } |
638 | | |
639 | | /// Take the intersection of two groups |
640 | | /// |
641 | | /// # Example |
642 | | /// |
643 | | /// ```rust |
644 | | /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; |
645 | | /// |
646 | | /// let letter = GeneralCategoryGroup::Letter; |
647 | | /// let lu = GeneralCategoryGroup::UppercaseLetter; |
648 | | /// let intersection = letter.intersection(lu); |
649 | | /// |
650 | | /// assert!(!intersection.contains(GeneralCategory::MathSymbol)); |
651 | | /// assert!(!intersection.contains(GeneralCategory::OtherPunctuation)); |
652 | | /// assert!(intersection.contains(GeneralCategory::UppercaseLetter)); |
653 | | /// assert!(!intersection.contains(GeneralCategory::LowercaseLetter)); |
654 | | /// ``` |
655 | 0 | pub const fn intersection(self, other: Self) -> Self { |
656 | 0 | Self(self.0 & other.0) |
657 | 0 | } |
658 | | } |
659 | | |
660 | | impl From<GeneralCategory> for GeneralCategoryGroup { |
661 | 0 | fn from(subcategory: GeneralCategory) -> Self { |
662 | 0 | GeneralCategoryGroup(1 << (subcategory as u32)) |
663 | 0 | } |
664 | | } |
665 | | impl From<u32> for GeneralCategoryGroup { |
666 | 0 | fn from(mask: u32) -> Self { |
667 | | // Mask off things not in Self::ALL to guarantee the mask |
668 | | // values stay in-range |
669 | 0 | GeneralCategoryGroup(mask & Self::ALL) |
670 | 0 | } |
671 | | } |
672 | | impl From<GeneralCategoryGroup> for u32 { |
673 | 0 | fn from(group: GeneralCategoryGroup) -> Self { |
674 | 0 | group.0 |
675 | 0 | } |
676 | | } |
677 | | |
678 | | /// Enumerated property Script. |
679 | | /// |
680 | | /// This is used with both the Script and Script_Extensions Unicode properties. |
681 | | /// Each character is assigned a single Script, but characters that are used in |
682 | | /// a particular subset of scripts will be in more than one Script_Extensions set. |
683 | | /// For example, DEVANAGARI DIGIT NINE has Script=Devanagari, but is also in the |
684 | | /// Script_Extensions set for Dogra, Kaithi, and Mahajani. If you are trying to |
685 | | /// determine whether a code point belongs to a certain script, you should use |
686 | | /// [`ScriptWithExtensionsBorrowed::has_script`]. |
687 | | /// |
688 | | /// For more information, see UAX #24: <http://www.unicode.org/reports/tr24/>. |
689 | | /// See `UScriptCode` in ICU4C. |
690 | | /// |
691 | | /// # Example |
692 | | /// |
693 | | /// ``` |
694 | | /// use icu::properties::{CodePointMapData, props::Script}; |
695 | | /// |
696 | | /// assert_eq!(CodePointMapData::<Script>::new().get('木'), Script::Han); // U+6728 |
697 | | /// assert_eq!(CodePointMapData::<Script>::new().get('🎃'), Script::Common); // U+1F383 JACK-O-LANTERN |
698 | | /// ``` |
699 | | /// [`ScriptWithExtensionsBorrowed::has_script`]: crate::script::ScriptWithExtensionsBorrowed::has_script |
700 | | #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
701 | | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] |
702 | | #[allow(clippy::exhaustive_structs)] // newtype |
703 | | #[repr(transparent)] |
704 | | pub struct Script(pub(crate) u16); |
705 | | |
706 | | impl Script { |
707 | | /// Returns an ICU4C `UScriptCode` value. |
708 | 0 | pub const fn to_icu4c_value(self) -> u16 { |
709 | 0 | self.0 |
710 | 0 | } |
711 | | /// Constructor from an ICU4C `UScriptCode` value. |
712 | 0 | pub const fn from_icu4c_value(value: u16) -> Self { |
713 | 0 | Self(value) |
714 | 0 | } |
715 | | } |
716 | | |
717 | | create_const_array! { |
718 | | #[allow(missing_docs)] // These constants don't need individual documentation. |
719 | | #[allow(non_upper_case_globals)] |
720 | | impl Script { |
721 | | pub const Adlam: Script = Script(167); |
722 | | pub const Ahom: Script = Script(161); |
723 | | pub const AnatolianHieroglyphs: Script = Script(156); |
724 | | pub const Arabic: Script = Script(2); |
725 | | pub const Armenian: Script = Script(3); |
726 | | pub const Avestan: Script = Script(117); |
727 | | pub const Balinese: Script = Script(62); |
728 | | pub const Bamum: Script = Script(130); |
729 | | pub const BassaVah: Script = Script(134); |
730 | | pub const Batak: Script = Script(63); |
731 | | pub const Bengali: Script = Script(4); |
732 | | pub const BeriaErfe: Script = Script(208); |
733 | | pub const Bhaiksuki: Script = Script(168); |
734 | | pub const Bopomofo: Script = Script(5); |
735 | | pub const Brahmi: Script = Script(65); |
736 | | pub const Braille: Script = Script(46); |
737 | | pub const Buginese: Script = Script(55); |
738 | | pub const Buhid: Script = Script(44); |
739 | | pub const CanadianAboriginal: Script = Script(40); |
740 | | pub const Carian: Script = Script(104); |
741 | | pub const CaucasianAlbanian: Script = Script(159); |
742 | | pub const Chakma: Script = Script(118); |
743 | | pub const Cham: Script = Script(66); |
744 | | pub const Cherokee: Script = Script(6); |
745 | | pub const Chisoi: Script = Script(209); |
746 | | pub const Chorasmian: Script = Script(189); |
747 | | pub const Common: Script = Script(0); |
748 | | pub const Coptic: Script = Script(7); |
749 | | pub const Cuneiform: Script = Script(101); |
750 | | pub const Cypriot: Script = Script(47); |
751 | | pub const CyproMinoan: Script = Script(193); |
752 | | pub const Cyrillic: Script = Script(8); |
753 | | pub const Deseret: Script = Script(9); |
754 | | pub const Devanagari: Script = Script(10); |
755 | | pub const DivesAkuru: Script = Script(190); |
756 | | pub const Dogra: Script = Script(178); |
757 | | pub const Duployan: Script = Script(135); |
758 | | pub const EgyptianHieroglyphs: Script = Script(71); |
759 | | pub const Elbasan: Script = Script(136); |
760 | | pub const Elymaic: Script = Script(185); |
761 | | pub const Ethiopian: Script = Script(11); |
762 | | pub const Georgian: Script = Script(12); |
763 | | pub const Glagolitic: Script = Script(56); |
764 | | pub const Gothic: Script = Script(13); |
765 | | pub const Grantha: Script = Script(137); |
766 | | pub const Greek: Script = Script(14); |
767 | | pub const Gujarati: Script = Script(15); |
768 | | pub const GunjalaGondi: Script = Script(179); |
769 | | pub const Gurmukhi: Script = Script(16); |
770 | | pub const Han: Script = Script(17); |
771 | | pub const Hangul: Script = Script(18); |
772 | | pub const HanifiRohingya: Script = Script(182); |
773 | | pub const Hanunoo: Script = Script(43); |
774 | | pub const Hatran: Script = Script(162); |
775 | | pub const Hebrew: Script = Script(19); |
776 | | pub const Hiragana: Script = Script(20); |
777 | | pub const ImperialAramaic: Script = Script(116); |
778 | | pub const Inherited: Script = Script(1); |
779 | | pub const InscriptionalPahlavi: Script = Script(122); |
780 | | pub const InscriptionalParthian: Script = Script(125); |
781 | | pub const Javanese: Script = Script(78); |
782 | | pub const Kaithi: Script = Script(120); |
783 | | pub const Kannada: Script = Script(21); |
784 | | pub const Katakana: Script = Script(22); |
785 | | pub const Kawi: Script = Script(198); |
786 | | pub const KayahLi: Script = Script(79); |
787 | | pub const Kharoshthi: Script = Script(57); |
788 | | pub const KhitanSmallScript: Script = Script(191); |
789 | | pub const Khmer: Script = Script(23); |
790 | | pub const Khojki: Script = Script(157); |
791 | | pub const Khudawadi: Script = Script(145); |
792 | | pub const Lao: Script = Script(24); |
793 | | pub const Latin: Script = Script(25); |
794 | | pub const Lepcha: Script = Script(82); |
795 | | pub const Limbu: Script = Script(48); |
796 | | pub const LinearA: Script = Script(83); |
797 | | pub const LinearB: Script = Script(49); |
798 | | pub const Lisu: Script = Script(131); |
799 | | pub const Lycian: Script = Script(107); |
800 | | pub const Lydian: Script = Script(108); |
801 | | pub const Mahajani: Script = Script(160); |
802 | | pub const Makasar: Script = Script(180); |
803 | | pub const Malayalam: Script = Script(26); |
804 | | pub const Mandaic: Script = Script(84); |
805 | | pub const Manichaean: Script = Script(121); |
806 | | pub const Marchen: Script = Script(169); |
807 | | pub const MasaramGondi: Script = Script(175); |
808 | | pub const Medefaidrin: Script = Script(181); |
809 | | pub const MeeteiMayek: Script = Script(115); |
810 | | pub const MendeKikakui: Script = Script(140); |
811 | | pub const MeroiticCursive: Script = Script(141); |
812 | | pub const MeroiticHieroglyphs: Script = Script(86); |
813 | | pub const Miao: Script = Script(92); |
814 | | pub const Modi: Script = Script(163); |
815 | | pub const Mongolian: Script = Script(27); |
816 | | pub const Mro: Script = Script(149); |
817 | | pub const Multani: Script = Script(164); |
818 | | pub const Myanmar: Script = Script(28); |
819 | | pub const Nabataean: Script = Script(143); |
820 | | pub const NagMundari: Script = Script(199); |
821 | | pub const Nandinagari: Script = Script(187); |
822 | | pub const Nastaliq: Script = Script(200); |
823 | | pub const Newa: Script = Script(170); |
824 | | pub const NewTaiLue: Script = Script(59); |
825 | | pub const Nko: Script = Script(87); |
826 | | pub const Nushu: Script = Script(150); |
827 | | pub const NyiakengPuachueHmong: Script = Script(186); |
828 | | pub const Ogham: Script = Script(29); |
829 | | pub const OlChiki: Script = Script(109); |
830 | | pub const OldHungarian: Script = Script(76); |
831 | | pub const OldItalic: Script = Script(30); |
832 | | pub const OldNorthArabian: Script = Script(142); |
833 | | pub const OldPermic: Script = Script(89); |
834 | | pub const OldPersian: Script = Script(61); |
835 | | pub const OldSogdian: Script = Script(184); |
836 | | pub const OldSouthArabian: Script = Script(133); |
837 | | pub const OldTurkic: Script = Script(88); |
838 | | pub const OldUyghur: Script = Script(194); |
839 | | pub const Oriya: Script = Script(31); |
840 | | pub const Osage: Script = Script(171); |
841 | | pub const Osmanya: Script = Script(50); |
842 | | pub const PahawhHmong: Script = Script(75); |
843 | | pub const Palmyrene: Script = Script(144); |
844 | | pub const PauCinHau: Script = Script(165); |
845 | | pub const PhagsPa: Script = Script(90); |
846 | | pub const Phoenician: Script = Script(91); |
847 | | pub const PsalterPahlavi: Script = Script(123); |
848 | | pub const Rejang: Script = Script(110); |
849 | | pub const Runic: Script = Script(32); |
850 | | pub const Samaritan: Script = Script(126); |
851 | | pub const Saurashtra: Script = Script(111); |
852 | | pub const Sharada: Script = Script(151); |
853 | | pub const Shavian: Script = Script(51); |
854 | | pub const Siddham: Script = Script(166); |
855 | | pub const Sidetic: Script = Script(210); |
856 | | pub const SignWriting: Script = Script(112); |
857 | | pub const Sinhala: Script = Script(33); |
858 | | pub const Sogdian: Script = Script(183); |
859 | | pub const SoraSompeng: Script = Script(152); |
860 | | pub const Soyombo: Script = Script(176); |
861 | | pub const Sundanese: Script = Script(113); |
862 | | pub const SylotiNagri: Script = Script(58); |
863 | | pub const Syriac: Script = Script(34); |
864 | | pub const Tagalog: Script = Script(42); |
865 | | pub const Tagbanwa: Script = Script(45); |
866 | | pub const TaiLe: Script = Script(52); |
867 | | pub const TaiTham: Script = Script(106); |
868 | | pub const TaiViet: Script = Script(127); |
869 | | pub const TaiYo: Script = Script(211); |
870 | | pub const Takri: Script = Script(153); |
871 | | pub const Tamil: Script = Script(35); |
872 | | pub const Tangsa: Script = Script(195); |
873 | | pub const Tangut: Script = Script(154); |
874 | | pub const Telugu: Script = Script(36); |
875 | | pub const Thaana: Script = Script(37); |
876 | | pub const Thai: Script = Script(38); |
877 | | pub const Tibetan: Script = Script(39); |
878 | | pub const Tifinagh: Script = Script(60); |
879 | | pub const Tirhuta: Script = Script(158); |
880 | | pub const TolongSiki: Script = Script(212); |
881 | | pub const Toto: Script = Script(196); |
882 | | pub const Ugaritic: Script = Script(53); |
883 | | pub const Unknown: Script = Script(103); |
884 | | pub const Vai: Script = Script(99); |
885 | | pub const Vithkuqi: Script = Script(197); |
886 | | pub const Wancho: Script = Script(188); |
887 | | pub const WarangCiti: Script = Script(146); |
888 | | pub const Yezidi: Script = Script(192); |
889 | | pub const Yi: Script = Script(41); |
890 | | pub const ZanabazarSquare: Script = Script(177); |
891 | | } |
892 | | } |
893 | | |
894 | | make_enumerated_property! { |
895 | | name: "Script"; |
896 | | short_name: "sc"; |
897 | | ident: Script; |
898 | | data_marker: crate::provider::PropertyEnumScriptV1; |
899 | | singleton: SINGLETON_PROPERTY_ENUM_SCRIPT_V1; |
900 | | ule_ty: <u16 as zerovec::ule::AsULE>::ULE; |
901 | | } |
902 | | |
903 | | /// Enumerated property Hangul_Syllable_Type |
904 | | /// |
905 | | /// The Unicode standard provides both precomposed Hangul syllables and conjoining Jamo to compose |
906 | | /// arbitrary Hangul syllables. This property provides that ontology of Hangul code points. |
907 | | /// |
908 | | /// For more information, see the [Unicode Korean FAQ](https://www.unicode.org/faq/korean.html). |
909 | | /// |
910 | | /// # Example |
911 | | /// |
912 | | /// ``` |
913 | | /// use icu::properties::{props::HangulSyllableType, CodePointMapData}; |
914 | | /// |
915 | | /// assert_eq!( |
916 | | /// CodePointMapData::<HangulSyllableType>::new().get('ᄀ'), |
917 | | /// HangulSyllableType::LeadingJamo |
918 | | /// ); // U+1100 |
919 | | /// assert_eq!( |
920 | | /// CodePointMapData::<HangulSyllableType>::new().get('가'), |
921 | | /// HangulSyllableType::LeadingVowelSyllable |
922 | | /// ); // U+AC00 |
923 | | /// ``` |
924 | | #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
925 | | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] |
926 | | #[allow(clippy::exhaustive_structs)] // newtype |
927 | | #[repr(transparent)] |
928 | | pub struct HangulSyllableType(pub(crate) u8); |
929 | | |
930 | | impl HangulSyllableType { |
931 | | /// Returns an ICU4C `UHangulSyllableType` value. |
932 | 0 | pub const fn to_icu4c_value(self) -> u8 { |
933 | 0 | self.0 |
934 | 0 | } |
935 | | /// Constructor from an ICU4C `UHangulSyllableType` value. |
936 | 0 | pub const fn from_icu4c_value(value: u8) -> Self { |
937 | 0 | Self(value) |
938 | 0 | } |
939 | | } |
940 | | |
941 | | create_const_array! { |
942 | | #[allow(non_upper_case_globals)] |
943 | | impl HangulSyllableType { |
944 | | /// (`NA`) not applicable (e.g. not a Hangul code point). |
945 | | pub const NotApplicable: HangulSyllableType = HangulSyllableType(0); |
946 | | /// (`L`) a conjoining leading consonant Jamo. |
947 | | pub const LeadingJamo: HangulSyllableType = HangulSyllableType(1); |
948 | | /// (`V`) a conjoining vowel Jamo. |
949 | | pub const VowelJamo: HangulSyllableType = HangulSyllableType(2); |
950 | | /// (`T`) a conjoining trailing consonant Jamo. |
951 | | pub const TrailingJamo: HangulSyllableType = HangulSyllableType(3); |
952 | | /// (`LV`) a precomposed syllable with a leading consonant and a vowel. |
953 | | pub const LeadingVowelSyllable: HangulSyllableType = HangulSyllableType(4); |
954 | | /// (`LVT`) a precomposed syllable with a leading consonant, a vowel, and a trailing consonant. |
955 | | pub const LeadingVowelTrailingSyllable: HangulSyllableType = HangulSyllableType(5); |
956 | | } |
957 | | } |
958 | | |
959 | | make_enumerated_property! { |
960 | | name: "Hangul_Syllable_Type"; |
961 | | short_name: "hst"; |
962 | | ident: HangulSyllableType; |
963 | | data_marker: crate::provider::PropertyEnumHangulSyllableTypeV1; |
964 | | singleton: SINGLETON_PROPERTY_ENUM_HANGUL_SYLLABLE_TYPE_V1; |
965 | | ule_ty: u8; |
966 | | |
967 | | } |
968 | | |
969 | | /// Enumerated property East_Asian_Width. |
970 | | /// |
971 | | /// See "Definition" in UAX #11 for the summary of each property value: |
972 | | /// <https://www.unicode.org/reports/tr11/#Definitions> |
973 | | /// |
974 | | /// # Example |
975 | | /// |
976 | | /// ``` |
977 | | /// use icu::properties::{props::EastAsianWidth, CodePointMapData}; |
978 | | /// |
979 | | /// assert_eq!( |
980 | | /// CodePointMapData::<EastAsianWidth>::new().get('ア'), |
981 | | /// EastAsianWidth::Halfwidth |
982 | | /// ); // U+FF71: Halfwidth Katakana Letter A |
983 | | /// assert_eq!( |
984 | | /// CodePointMapData::<EastAsianWidth>::new().get('ア'), |
985 | | /// EastAsianWidth::Wide |
986 | | /// ); //U+30A2: Katakana Letter A |
987 | | /// ``` |
988 | | #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
989 | | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] |
990 | | #[allow(clippy::exhaustive_structs)] // newtype |
991 | | #[repr(transparent)] |
992 | | pub struct EastAsianWidth(pub(crate) u8); |
993 | | |
994 | | impl EastAsianWidth { |
995 | | /// Returns an ICU4C `UEastAsianWidth` value. |
996 | 0 | pub const fn to_icu4c_value(self) -> u8 { |
997 | 0 | self.0 |
998 | 0 | } |
999 | | /// Constructor from an ICU4C `UEastAsianWidth` value. |
1000 | 0 | pub const fn from_icu4c_value(value: u8) -> Self { |
1001 | 0 | Self(value) |
1002 | 0 | } |
1003 | | } |
1004 | | |
1005 | | create_const_array! { |
1006 | | #[allow(missing_docs)] // These constants don't need individual documentation. |
1007 | | #[allow(non_upper_case_globals)] |
1008 | | impl EastAsianWidth { |
1009 | | pub const Neutral: EastAsianWidth = EastAsianWidth(0); //name="N" |
1010 | | pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); //name="A" |
1011 | | pub const Halfwidth: EastAsianWidth = EastAsianWidth(2); //name="H" |
1012 | | pub const Fullwidth: EastAsianWidth = EastAsianWidth(3); //name="F" |
1013 | | pub const Narrow: EastAsianWidth = EastAsianWidth(4); //name="Na" |
1014 | | pub const Wide: EastAsianWidth = EastAsianWidth(5); //name="W" |
1015 | | } |
1016 | | } |
1017 | | |
1018 | | make_enumerated_property! { |
1019 | | name: "East_Asian_Width"; |
1020 | | short_name: "ea"; |
1021 | | ident: EastAsianWidth; |
1022 | | data_marker: crate::provider::PropertyEnumEastAsianWidthV1; |
1023 | | singleton: SINGLETON_PROPERTY_ENUM_EAST_ASIAN_WIDTH_V1; |
1024 | | ule_ty: u8; |
1025 | | } |
1026 | | |
1027 | | /// Enumerated property Line_Break. |
1028 | | /// |
1029 | | /// See "Line Breaking Properties" in UAX #14 for the summary of each property |
1030 | | /// value: <https://www.unicode.org/reports/tr14/#Properties> |
1031 | | /// |
1032 | | /// The numeric value is compatible with `ULineBreak` in ICU4C. |
1033 | | /// |
1034 | | /// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation. |
1035 | | /// |
1036 | | /// # Example |
1037 | | /// |
1038 | | /// ``` |
1039 | | /// use icu::properties::{props::LineBreak, CodePointMapData}; |
1040 | | /// |
1041 | | /// assert_eq!( |
1042 | | /// CodePointMapData::<LineBreak>::new().get(')'), |
1043 | | /// LineBreak::CloseParenthesis |
1044 | | /// ); // U+0029: Right Parenthesis |
1045 | | /// assert_eq!( |
1046 | | /// CodePointMapData::<LineBreak>::new().get('ぁ'), |
1047 | | /// LineBreak::ConditionalJapaneseStarter |
1048 | | /// ); //U+3041: Hiragana Letter Small A |
1049 | | /// ``` |
1050 | | #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
1051 | | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] |
1052 | | #[allow(clippy::exhaustive_structs)] // newtype |
1053 | | #[repr(transparent)] |
1054 | | pub struct LineBreak(pub(crate) u8); |
1055 | | |
1056 | | impl LineBreak { |
1057 | | /// Returns an ICU4C `ULineBreak` value. |
1058 | 0 | pub const fn to_icu4c_value(self) -> u8 { |
1059 | 0 | self.0 |
1060 | 0 | } |
1061 | | /// Constructor from an ICU4C `ULineBreak` value. |
1062 | 0 | pub const fn from_icu4c_value(value: u8) -> Self { |
1063 | 0 | Self(value) |
1064 | 0 | } |
1065 | | } |
1066 | | |
1067 | | create_const_array! { |
1068 | | #[allow(missing_docs)] // These constants don't need individual documentation. |
1069 | | #[allow(non_upper_case_globals)] |
1070 | | impl LineBreak { |
1071 | | pub const Unknown: LineBreak = LineBreak(0); // name="XX" |
1072 | | pub const Ambiguous: LineBreak = LineBreak(1); // name="AI" |
1073 | | pub const Alphabetic: LineBreak = LineBreak(2); // name="AL" |
1074 | | pub const BreakBoth: LineBreak = LineBreak(3); // name="B2" |
1075 | | pub const BreakAfter: LineBreak = LineBreak(4); // name="BA" |
1076 | | pub const BreakBefore: LineBreak = LineBreak(5); // name="BB" |
1077 | | pub const MandatoryBreak: LineBreak = LineBreak(6); // name="BK" |
1078 | | pub const ContingentBreak: LineBreak = LineBreak(7); // name="CB" |
1079 | | pub const ClosePunctuation: LineBreak = LineBreak(8); // name="CL" |
1080 | | pub const CombiningMark: LineBreak = LineBreak(9); // name="CM" |
1081 | | pub const CarriageReturn: LineBreak = LineBreak(10); // name="CR" |
1082 | | pub const Exclamation: LineBreak = LineBreak(11); // name="EX" |
1083 | | pub const Glue: LineBreak = LineBreak(12); // name="GL" |
1084 | | pub const Hyphen: LineBreak = LineBreak(13); // name="HY" |
1085 | | pub const Ideographic: LineBreak = LineBreak(14); // name="ID" |
1086 | | pub const Inseparable: LineBreak = LineBreak(15); // name="IN" |
1087 | | pub const InfixNumeric: LineBreak = LineBreak(16); // name="IS" |
1088 | | pub const LineFeed: LineBreak = LineBreak(17); // name="LF" |
1089 | | pub const Nonstarter: LineBreak = LineBreak(18); // name="NS" |
1090 | | pub const Numeric: LineBreak = LineBreak(19); // name="NU" |
1091 | | pub const OpenPunctuation: LineBreak = LineBreak(20); // name="OP" |
1092 | | pub const PostfixNumeric: LineBreak = LineBreak(21); // name="PO" |
1093 | | pub const PrefixNumeric: LineBreak = LineBreak(22); // name="PR" |
1094 | | pub const Quotation: LineBreak = LineBreak(23); // name="QU" |
1095 | | pub const ComplexContext: LineBreak = LineBreak(24); // name="SA" |
1096 | | pub const Surrogate: LineBreak = LineBreak(25); // name="SG" |
1097 | | pub const Space: LineBreak = LineBreak(26); // name="SP" |
1098 | | pub const BreakSymbols: LineBreak = LineBreak(27); // name="SY" |
1099 | | pub const ZWSpace: LineBreak = LineBreak(28); // name="ZW" |
1100 | | pub const NextLine: LineBreak = LineBreak(29); // name="NL" |
1101 | | pub const WordJoiner: LineBreak = LineBreak(30); // name="WJ" |
1102 | | pub const H2: LineBreak = LineBreak(31); // name="H2" |
1103 | | pub const H3: LineBreak = LineBreak(32); // name="H3" |
1104 | | pub const JL: LineBreak = LineBreak(33); // name="JL" |
1105 | | pub const JT: LineBreak = LineBreak(34); // name="JT" |
1106 | | pub const JV: LineBreak = LineBreak(35); // name="JV" |
1107 | | pub const CloseParenthesis: LineBreak = LineBreak(36); // name="CP" |
1108 | | pub const ConditionalJapaneseStarter: LineBreak = LineBreak(37); // name="CJ" |
1109 | | pub const HebrewLetter: LineBreak = LineBreak(38); // name="HL" |
1110 | | pub const RegionalIndicator: LineBreak = LineBreak(39); // name="RI" |
1111 | | pub const EBase: LineBreak = LineBreak(40); // name="EB" |
1112 | | pub const EModifier: LineBreak = LineBreak(41); // name="EM" |
1113 | | pub const ZWJ: LineBreak = LineBreak(42); // name="ZWJ" |
1114 | | |
1115 | | // Added in ICU 74: |
1116 | | pub const Aksara: LineBreak = LineBreak(43); // name="AK" |
1117 | | pub const AksaraPrebase: LineBreak = LineBreak(44); // name="AP" |
1118 | | pub const AksaraStart: LineBreak = LineBreak(45); // name="AS" |
1119 | | pub const ViramaFinal: LineBreak = LineBreak(46); // name="VF" |
1120 | | pub const Virama: LineBreak = LineBreak(47); // name="VI" |
1121 | | |
1122 | | // Added in ICU 78: |
1123 | | pub const UnambiguousHyphen: LineBreak = LineBreak(48); // name="HH" |
1124 | | } |
1125 | | } |
1126 | | |
1127 | | make_enumerated_property! { |
1128 | | name: "Line_Break"; |
1129 | | short_name: "lb"; |
1130 | | ident: LineBreak; |
1131 | | data_marker: crate::provider::PropertyEnumLineBreakV1; |
1132 | | singleton: SINGLETON_PROPERTY_ENUM_LINE_BREAK_V1; |
1133 | | ule_ty: u8; |
1134 | | } |
1135 | | |
1136 | | /// Enumerated property Grapheme_Cluster_Break. |
1137 | | /// |
1138 | | /// See "Default Grapheme Cluster Boundary Specification" in UAX #29 for the |
1139 | | /// summary of each property value: |
1140 | | /// <https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table> |
1141 | | /// |
1142 | | /// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation. |
1143 | | /// |
1144 | | /// # Example |
1145 | | /// |
1146 | | /// ``` |
1147 | | /// use icu::properties::{props::GraphemeClusterBreak, CodePointMapData}; |
1148 | | /// |
1149 | | /// assert_eq!( |
1150 | | /// CodePointMapData::<GraphemeClusterBreak>::new().get('🇦'), |
1151 | | /// GraphemeClusterBreak::RegionalIndicator |
1152 | | /// ); // U+1F1E6: Regional Indicator Symbol Letter A |
1153 | | /// assert_eq!( |
1154 | | /// CodePointMapData::<GraphemeClusterBreak>::new().get('ำ'), |
1155 | | /// GraphemeClusterBreak::SpacingMark |
1156 | | /// ); //U+0E33: Thai Character Sara Am |
1157 | | /// ``` |
1158 | | #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
1159 | | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] |
1160 | | #[allow(clippy::exhaustive_structs)] // this type is stable |
1161 | | #[repr(transparent)] |
1162 | | pub struct GraphemeClusterBreak(pub(crate) u8); |
1163 | | |
1164 | | impl GraphemeClusterBreak { |
1165 | | /// Returns an ICU4C `UGraphemeClusterBreak` value. |
1166 | 0 | pub const fn to_icu4c_value(self) -> u8 { |
1167 | 0 | self.0 |
1168 | 0 | } |
1169 | | /// Constructor from an ICU4C `UGraphemeClusterBreak` value. |
1170 | 0 | pub const fn from_icu4c_value(value: u8) -> Self { |
1171 | 0 | Self(value) |
1172 | 0 | } |
1173 | | } |
1174 | | |
1175 | | create_const_array! { |
1176 | | #[allow(missing_docs)] // These constants don't need individual documentation. |
1177 | | #[allow(non_upper_case_globals)] |
1178 | | impl GraphemeClusterBreak { |
1179 | | pub const Other: GraphemeClusterBreak = GraphemeClusterBreak(0); // name="XX" |
1180 | | pub const Control: GraphemeClusterBreak = GraphemeClusterBreak(1); // name="CN" |
1181 | | pub const CR: GraphemeClusterBreak = GraphemeClusterBreak(2); // name="CR" |
1182 | | pub const Extend: GraphemeClusterBreak = GraphemeClusterBreak(3); // name="EX" |
1183 | | pub const L: GraphemeClusterBreak = GraphemeClusterBreak(4); // name="L" |
1184 | | pub const LF: GraphemeClusterBreak = GraphemeClusterBreak(5); // name="LF" |
1185 | | pub const LV: GraphemeClusterBreak = GraphemeClusterBreak(6); // name="LV" |
1186 | | pub const LVT: GraphemeClusterBreak = GraphemeClusterBreak(7); // name="LVT" |
1187 | | pub const T: GraphemeClusterBreak = GraphemeClusterBreak(8); // name="T" |
1188 | | pub const V: GraphemeClusterBreak = GraphemeClusterBreak(9); // name="V" |
1189 | | pub const SpacingMark: GraphemeClusterBreak = GraphemeClusterBreak(10); // name="SM" |
1190 | | pub const Prepend: GraphemeClusterBreak = GraphemeClusterBreak(11); // name="PP" |
1191 | | pub const RegionalIndicator: GraphemeClusterBreak = GraphemeClusterBreak(12); // name="RI" |
1192 | | /// This value is obsolete and unused. |
1193 | | pub const EBase: GraphemeClusterBreak = GraphemeClusterBreak(13); // name="EB" |
1194 | | /// This value is obsolete and unused. |
1195 | | pub const EBaseGAZ: GraphemeClusterBreak = GraphemeClusterBreak(14); // name="EBG" |
1196 | | /// This value is obsolete and unused. |
1197 | | pub const EModifier: GraphemeClusterBreak = GraphemeClusterBreak(15); // name="EM" |
1198 | | /// This value is obsolete and unused. |
1199 | | pub const GlueAfterZwj: GraphemeClusterBreak = GraphemeClusterBreak(16); // name="GAZ" |
1200 | | pub const ZWJ: GraphemeClusterBreak = GraphemeClusterBreak(17); // name="ZWJ" |
1201 | | } |
1202 | | } |
1203 | | |
1204 | | make_enumerated_property! { |
1205 | | name: "Grapheme_Cluster_Break"; |
1206 | | short_name: "GCB"; |
1207 | | ident: GraphemeClusterBreak; |
1208 | | data_marker: crate::provider::PropertyEnumGraphemeClusterBreakV1; |
1209 | | singleton: SINGLETON_PROPERTY_ENUM_GRAPHEME_CLUSTER_BREAK_V1; |
1210 | | ule_ty: u8; |
1211 | | } |
1212 | | |
1213 | | /// Enumerated property Word_Break. |
1214 | | /// |
1215 | | /// See "Default Word Boundary Specification" in UAX #29 for the summary of |
1216 | | /// each property value: |
1217 | | /// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>. |
1218 | | /// |
1219 | | /// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation. |
1220 | | /// |
1221 | | /// # Example |
1222 | | /// |
1223 | | /// ``` |
1224 | | /// use icu::properties::{props::WordBreak, CodePointMapData}; |
1225 | | /// |
1226 | | /// assert_eq!( |
1227 | | /// CodePointMapData::<WordBreak>::new().get('.'), |
1228 | | /// WordBreak::MidNumLet |
1229 | | /// ); // U+002E: Full Stop |
1230 | | /// assert_eq!( |
1231 | | /// CodePointMapData::<WordBreak>::new().get(','), |
1232 | | /// WordBreak::MidNum |
1233 | | /// ); // U+FF0C: Fullwidth Comma |
1234 | | /// ``` |
1235 | | #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
1236 | | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] |
1237 | | #[allow(clippy::exhaustive_structs)] // newtype |
1238 | | #[repr(transparent)] |
1239 | | pub struct WordBreak(pub(crate) u8); |
1240 | | |
1241 | | impl WordBreak { |
1242 | | /// Returns an ICU4C `UWordBreak` value. |
1243 | 0 | pub const fn to_icu4c_value(self) -> u8 { |
1244 | 0 | self.0 |
1245 | 0 | } |
1246 | | /// Constructor from an ICU4C `UWordBreak` value. |
1247 | 0 | pub const fn from_icu4c_value(value: u8) -> Self { |
1248 | 0 | Self(value) |
1249 | 0 | } |
1250 | | } |
1251 | | |
1252 | | create_const_array! { |
1253 | | #[allow(missing_docs)] // These constants don't need individual documentation. |
1254 | | #[allow(non_upper_case_globals)] |
1255 | | impl WordBreak { |
1256 | | pub const Other: WordBreak = WordBreak(0); // name="XX" |
1257 | | pub const ALetter: WordBreak = WordBreak(1); // name="LE" |
1258 | | pub const Format: WordBreak = WordBreak(2); // name="FO" |
1259 | | pub const Katakana: WordBreak = WordBreak(3); // name="KA" |
1260 | | pub const MidLetter: WordBreak = WordBreak(4); // name="ML" |
1261 | | pub const MidNum: WordBreak = WordBreak(5); // name="MN" |
1262 | | pub const Numeric: WordBreak = WordBreak(6); // name="NU" |
1263 | | pub const ExtendNumLet: WordBreak = WordBreak(7); // name="EX" |
1264 | | pub const CR: WordBreak = WordBreak(8); // name="CR" |
1265 | | pub const Extend: WordBreak = WordBreak(9); // name="Extend" |
1266 | | pub const LF: WordBreak = WordBreak(10); // name="LF" |
1267 | | pub const MidNumLet: WordBreak = WordBreak(11); // name="MB" |
1268 | | pub const Newline: WordBreak = WordBreak(12); // name="NL" |
1269 | | pub const RegionalIndicator: WordBreak = WordBreak(13); // name="RI" |
1270 | | pub const HebrewLetter: WordBreak = WordBreak(14); // name="HL" |
1271 | | pub const SingleQuote: WordBreak = WordBreak(15); // name="SQ" |
1272 | | pub const DoubleQuote: WordBreak = WordBreak(16); // name=DQ |
1273 | | /// This value is obsolete and unused. |
1274 | | pub const EBase: WordBreak = WordBreak(17); // name="EB" |
1275 | | /// This value is obsolete and unused. |
1276 | | pub const EBaseGAZ: WordBreak = WordBreak(18); // name="EBG" |
1277 | | /// This value is obsolete and unused. |
1278 | | pub const EModifier: WordBreak = WordBreak(19); // name="EM" |
1279 | | /// This value is obsolete and unused. |
1280 | | pub const GlueAfterZwj: WordBreak = WordBreak(20); // name="GAZ" |
1281 | | pub const ZWJ: WordBreak = WordBreak(21); // name="ZWJ" |
1282 | | pub const WSegSpace: WordBreak = WordBreak(22); // name="WSegSpace" |
1283 | | } |
1284 | | } |
1285 | | |
1286 | | make_enumerated_property! { |
1287 | | name: "Word_Break"; |
1288 | | short_name: "WB"; |
1289 | | ident: WordBreak; |
1290 | | data_marker: crate::provider::PropertyEnumWordBreakV1; |
1291 | | singleton: SINGLETON_PROPERTY_ENUM_WORD_BREAK_V1; |
1292 | | ule_ty: u8; |
1293 | | } |
1294 | | |
1295 | | /// Enumerated property Sentence_Break. |
1296 | | /// |
1297 | | /// See "Default Sentence Boundary Specification" in UAX #29 for the summary of |
1298 | | /// each property value: |
1299 | | /// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>. |
1300 | | /// |
1301 | | /// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation. |
1302 | | /// |
1303 | | /// # Example |
1304 | | /// |
1305 | | /// ``` |
1306 | | /// use icu::properties::{props::SentenceBreak, CodePointMapData}; |
1307 | | /// |
1308 | | /// assert_eq!( |
1309 | | /// CodePointMapData::<SentenceBreak>::new().get('9'), |
1310 | | /// SentenceBreak::Numeric |
1311 | | /// ); // U+FF19: Fullwidth Digit Nine |
1312 | | /// assert_eq!( |
1313 | | /// CodePointMapData::<SentenceBreak>::new().get(','), |
1314 | | /// SentenceBreak::SContinue |
1315 | | /// ); // U+002C: Comma |
1316 | | /// ``` |
1317 | | #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
1318 | | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] |
1319 | | #[allow(clippy::exhaustive_structs)] // newtype |
1320 | | #[repr(transparent)] |
1321 | | pub struct SentenceBreak(pub(crate) u8); |
1322 | | |
1323 | | impl SentenceBreak { |
1324 | | /// Returns an ICU4C `USentenceBreak` value. |
1325 | 0 | pub const fn to_icu4c_value(self) -> u8 { |
1326 | 0 | self.0 |
1327 | 0 | } |
1328 | | /// Constructor from an ICU4C `USentenceBreak` value. |
1329 | 0 | pub const fn from_icu4c_value(value: u8) -> Self { |
1330 | 0 | Self(value) |
1331 | 0 | } |
1332 | | } |
1333 | | |
1334 | | create_const_array! { |
1335 | | #[allow(missing_docs)] // These constants don't need individual documentation. |
1336 | | #[allow(non_upper_case_globals)] |
1337 | | impl SentenceBreak { |
1338 | | pub const Other: SentenceBreak = SentenceBreak(0); // name="XX" |
1339 | | pub const ATerm: SentenceBreak = SentenceBreak(1); // name="AT" |
1340 | | pub const Close: SentenceBreak = SentenceBreak(2); // name="CL" |
1341 | | pub const Format: SentenceBreak = SentenceBreak(3); // name="FO" |
1342 | | pub const Lower: SentenceBreak = SentenceBreak(4); // name="LO" |
1343 | | pub const Numeric: SentenceBreak = SentenceBreak(5); // name="NU" |
1344 | | pub const OLetter: SentenceBreak = SentenceBreak(6); // name="LE" |
1345 | | pub const Sep: SentenceBreak = SentenceBreak(7); // name="SE" |
1346 | | pub const Sp: SentenceBreak = SentenceBreak(8); // name="SP" |
1347 | | pub const STerm: SentenceBreak = SentenceBreak(9); // name="ST" |
1348 | | pub const Upper: SentenceBreak = SentenceBreak(10); // name="UP" |
1349 | | pub const CR: SentenceBreak = SentenceBreak(11); // name="CR" |
1350 | | pub const Extend: SentenceBreak = SentenceBreak(12); // name="EX" |
1351 | | pub const LF: SentenceBreak = SentenceBreak(13); // name="LF" |
1352 | | pub const SContinue: SentenceBreak = SentenceBreak(14); // name="SC" |
1353 | | } |
1354 | | } |
1355 | | |
1356 | | make_enumerated_property! { |
1357 | | name: "Sentence_Break"; |
1358 | | short_name: "SB"; |
1359 | | ident: SentenceBreak; |
1360 | | data_marker: crate::provider::PropertyEnumSentenceBreakV1; |
1361 | | singleton: SINGLETON_PROPERTY_ENUM_SENTENCE_BREAK_V1; |
1362 | | ule_ty: u8; |
1363 | | } |
1364 | | |
1365 | | /// Property Canonical_Combining_Class. |
1366 | | /// See UAX #15: |
1367 | | /// <https://www.unicode.org/reports/tr15/>. |
1368 | | /// |
1369 | | /// See `icu::normalizer::properties::CanonicalCombiningClassMap` for the API |
1370 | | /// to look up the Canonical_Combining_Class property by scalar value. |
1371 | | /// |
1372 | | /// **Note:** See `icu::normalizer::CanonicalCombiningClassMap` for the preferred API |
1373 | | /// to look up the Canonical_Combining_Class property by scalar value. |
1374 | | /// |
1375 | | /// # Example |
1376 | | /// |
1377 | | /// ``` |
1378 | | /// use icu::properties::{props::CanonicalCombiningClass, CodePointMapData}; |
1379 | | /// |
1380 | | /// assert_eq!( |
1381 | | /// CodePointMapData::<CanonicalCombiningClass>::new().get('a'), |
1382 | | /// CanonicalCombiningClass::NotReordered |
1383 | | /// ); // U+0061: LATIN SMALL LETTER A |
1384 | | /// assert_eq!( |
1385 | | /// CodePointMapData::<CanonicalCombiningClass>::new().get('\u{0301}'), |
1386 | | /// CanonicalCombiningClass::Above |
1387 | | /// ); // U+0301: COMBINING ACUTE ACCENT |
1388 | | /// ``` |
1389 | | // |
1390 | | // NOTE: The Pernosco debugger has special knowledge |
1391 | | // of this struct. Please do not change the bit layout |
1392 | | // or the crate-module-qualified name of this struct |
1393 | | // without coordination. |
1394 | | #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
1395 | | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] |
1396 | | #[allow(clippy::exhaustive_structs)] // newtype |
1397 | | #[repr(transparent)] |
1398 | | pub struct CanonicalCombiningClass(pub(crate) u8); |
1399 | | |
1400 | | impl CanonicalCombiningClass { |
1401 | | /// Returns an ICU4C `UCanonicalCombiningClass` value. |
1402 | 0 | pub const fn to_icu4c_value(self) -> u8 { |
1403 | 0 | self.0 |
1404 | 0 | } |
1405 | | /// Constructor from an ICU4C `UCanonicalCombiningClass` value. |
1406 | 0 | pub const fn from_icu4c_value(value: u8) -> Self { |
1407 | 0 | Self(value) |
1408 | 0 | } |
1409 | | } |
1410 | | |
1411 | | create_const_array! { |
1412 | | // These constant names come from PropertyValueAliases.txt |
1413 | | #[allow(missing_docs)] // These constants don't need individual documentation. |
1414 | | #[allow(non_upper_case_globals)] |
1415 | | impl CanonicalCombiningClass { |
1416 | | pub const NotReordered: CanonicalCombiningClass = CanonicalCombiningClass(0); // name="NR" |
1417 | | pub const Overlay: CanonicalCombiningClass = CanonicalCombiningClass(1); // name="OV" |
1418 | | pub const HanReading: CanonicalCombiningClass = CanonicalCombiningClass(6); // name="HANR" |
1419 | | pub const Nukta: CanonicalCombiningClass = CanonicalCombiningClass(7); // name="NK" |
1420 | | pub const KanaVoicing: CanonicalCombiningClass = CanonicalCombiningClass(8); // name="KV" |
1421 | | pub const Virama: CanonicalCombiningClass = CanonicalCombiningClass(9); // name="VR" |
1422 | | pub const CCC10: CanonicalCombiningClass = CanonicalCombiningClass(10); // name="CCC10" |
1423 | | pub const CCC11: CanonicalCombiningClass = CanonicalCombiningClass(11); // name="CCC11" |
1424 | | pub const CCC12: CanonicalCombiningClass = CanonicalCombiningClass(12); // name="CCC12" |
1425 | | pub const CCC13: CanonicalCombiningClass = CanonicalCombiningClass(13); // name="CCC13" |
1426 | | pub const CCC14: CanonicalCombiningClass = CanonicalCombiningClass(14); // name="CCC14" |
1427 | | pub const CCC15: CanonicalCombiningClass = CanonicalCombiningClass(15); // name="CCC15" |
1428 | | pub const CCC16: CanonicalCombiningClass = CanonicalCombiningClass(16); // name="CCC16" |
1429 | | pub const CCC17: CanonicalCombiningClass = CanonicalCombiningClass(17); // name="CCC17" |
1430 | | pub const CCC18: CanonicalCombiningClass = CanonicalCombiningClass(18); // name="CCC18" |
1431 | | pub const CCC19: CanonicalCombiningClass = CanonicalCombiningClass(19); // name="CCC19" |
1432 | | pub const CCC20: CanonicalCombiningClass = CanonicalCombiningClass(20); // name="CCC20" |
1433 | | pub const CCC21: CanonicalCombiningClass = CanonicalCombiningClass(21); // name="CCC21" |
1434 | | pub const CCC22: CanonicalCombiningClass = CanonicalCombiningClass(22); // name="CCC22" |
1435 | | pub const CCC23: CanonicalCombiningClass = CanonicalCombiningClass(23); // name="CCC23" |
1436 | | pub const CCC24: CanonicalCombiningClass = CanonicalCombiningClass(24); // name="CCC24" |
1437 | | pub const CCC25: CanonicalCombiningClass = CanonicalCombiningClass(25); // name="CCC25" |
1438 | | pub const CCC26: CanonicalCombiningClass = CanonicalCombiningClass(26); // name="CCC26" |
1439 | | pub const CCC27: CanonicalCombiningClass = CanonicalCombiningClass(27); // name="CCC27" |
1440 | | pub const CCC28: CanonicalCombiningClass = CanonicalCombiningClass(28); // name="CCC28" |
1441 | | pub const CCC29: CanonicalCombiningClass = CanonicalCombiningClass(29); // name="CCC29" |
1442 | | pub const CCC30: CanonicalCombiningClass = CanonicalCombiningClass(30); // name="CCC30" |
1443 | | pub const CCC31: CanonicalCombiningClass = CanonicalCombiningClass(31); // name="CCC31" |
1444 | | pub const CCC32: CanonicalCombiningClass = CanonicalCombiningClass(32); // name="CCC32" |
1445 | | pub const CCC33: CanonicalCombiningClass = CanonicalCombiningClass(33); // name="CCC33" |
1446 | | pub const CCC34: CanonicalCombiningClass = CanonicalCombiningClass(34); // name="CCC34" |
1447 | | pub const CCC35: CanonicalCombiningClass = CanonicalCombiningClass(35); // name="CCC35" |
1448 | | pub const CCC36: CanonicalCombiningClass = CanonicalCombiningClass(36); // name="CCC36" |
1449 | | pub const CCC84: CanonicalCombiningClass = CanonicalCombiningClass(84); // name="CCC84" |
1450 | | pub const CCC91: CanonicalCombiningClass = CanonicalCombiningClass(91); // name="CCC91" |
1451 | | pub const CCC103: CanonicalCombiningClass = CanonicalCombiningClass(103); // name="CCC103" |
1452 | | pub const CCC107: CanonicalCombiningClass = CanonicalCombiningClass(107); // name="CCC107" |
1453 | | pub const CCC118: CanonicalCombiningClass = CanonicalCombiningClass(118); // name="CCC118" |
1454 | | pub const CCC122: CanonicalCombiningClass = CanonicalCombiningClass(122); // name="CCC122" |
1455 | | pub const CCC129: CanonicalCombiningClass = CanonicalCombiningClass(129); // name="CCC129" |
1456 | | pub const CCC130: CanonicalCombiningClass = CanonicalCombiningClass(130); // name="CCC130" |
1457 | | pub const CCC132: CanonicalCombiningClass = CanonicalCombiningClass(132); // name="CCC132" |
1458 | | pub const CCC133: CanonicalCombiningClass = CanonicalCombiningClass(133); // name="CCC133" // RESERVED |
1459 | | pub const AttachedBelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(200); // name="ATBL" |
1460 | | pub const AttachedBelow: CanonicalCombiningClass = CanonicalCombiningClass(202); // name="ATB" |
1461 | | pub const AttachedAbove: CanonicalCombiningClass = CanonicalCombiningClass(214); // name="ATA" |
1462 | | pub const AttachedAboveRight: CanonicalCombiningClass = CanonicalCombiningClass(216); // name="ATAR" |
1463 | | pub const BelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(218); // name="BL" |
1464 | | pub const Below: CanonicalCombiningClass = CanonicalCombiningClass(220); // name="B" |
1465 | | pub const BelowRight: CanonicalCombiningClass = CanonicalCombiningClass(222); // name="BR" |
1466 | | pub const Left: CanonicalCombiningClass = CanonicalCombiningClass(224); // name="L" |
1467 | | pub const Right: CanonicalCombiningClass = CanonicalCombiningClass(226); // name="R" |
1468 | | pub const AboveLeft: CanonicalCombiningClass = CanonicalCombiningClass(228); // name="AL" |
1469 | | pub const Above: CanonicalCombiningClass = CanonicalCombiningClass(230); // name="A" |
1470 | | pub const AboveRight: CanonicalCombiningClass = CanonicalCombiningClass(232); // name="AR" |
1471 | | pub const DoubleBelow: CanonicalCombiningClass = CanonicalCombiningClass(233); // name="DB" |
1472 | | pub const DoubleAbove: CanonicalCombiningClass = CanonicalCombiningClass(234); // name="DA" |
1473 | | pub const IotaSubscript: CanonicalCombiningClass = CanonicalCombiningClass(240); // name="IS" |
1474 | | } |
1475 | | } |
1476 | | |
1477 | | make_enumerated_property! { |
1478 | | name: "Canonical_Combining_Class"; |
1479 | | short_name: "ccc"; |
1480 | | ident: CanonicalCombiningClass; |
1481 | | data_marker: crate::provider::PropertyEnumCanonicalCombiningClassV1; |
1482 | | singleton: SINGLETON_PROPERTY_ENUM_CANONICAL_COMBINING_CLASS_V1; |
1483 | | ule_ty: u8; |
1484 | | } |
1485 | | |
1486 | | /// Property Indic_Conjunct_Break. |
1487 | | /// See UAX #44: |
1488 | | /// <https://www.unicode.org/reports/tr44/#Indic_Conjunct_Break>. |
1489 | | /// |
1490 | | /// # Example |
1491 | | /// |
1492 | | /// ``` |
1493 | | /// use icu::properties::{props::IndicConjunctBreak, CodePointMapData}; |
1494 | | /// |
1495 | | /// assert_eq!( |
1496 | | /// CodePointMapData::<IndicConjunctBreak>::new().get('a'), |
1497 | | /// IndicConjunctBreak::None |
1498 | | /// ); |
1499 | | /// assert_eq!( |
1500 | | /// CodePointMapData::<IndicConjunctBreak>::new().get('\u{094d}'), |
1501 | | /// IndicConjunctBreak::Linker |
1502 | | /// ); |
1503 | | /// assert_eq!( |
1504 | | /// CodePointMapData::<IndicConjunctBreak>::new().get('\u{0915}'), |
1505 | | /// IndicConjunctBreak::Consonant |
1506 | | /// ); |
1507 | | /// assert_eq!( |
1508 | | /// CodePointMapData::<IndicConjunctBreak>::new().get('\u{0300}'), |
1509 | | /// IndicConjunctBreak::Extend |
1510 | | /// ); |
1511 | | /// ``` |
1512 | | #[doc(hidden)] // draft API in ICU4C |
1513 | | #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
1514 | | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] |
1515 | | #[allow(clippy::exhaustive_structs)] // newtype |
1516 | | #[repr(transparent)] |
1517 | | pub struct IndicConjunctBreak(pub(crate) u8); |
1518 | | |
1519 | | impl IndicConjunctBreak { |
1520 | | /// Returns an ICU4C `UIndicConjunctBreak` value. |
1521 | 0 | pub const fn to_icu4c_value(self) -> u8 { |
1522 | 0 | self.0 |
1523 | 0 | } |
1524 | | /// Constructor from an ICU4C `UIndicConjunctBreak` value. |
1525 | 0 | pub const fn from_icu4c_value(value: u8) -> Self { |
1526 | 0 | Self(value) |
1527 | 0 | } |
1528 | | } |
1529 | | |
1530 | | create_const_array! { |
1531 | | #[doc(hidden)] // draft API in ICU4C |
1532 | | #[allow(non_upper_case_globals)] |
1533 | | impl IndicConjunctBreak { |
1534 | | pub const None: IndicConjunctBreak = IndicConjunctBreak(0); |
1535 | | pub const Consonant: IndicConjunctBreak = IndicConjunctBreak(1); |
1536 | | pub const Extend: IndicConjunctBreak = IndicConjunctBreak(2); |
1537 | | pub const Linker: IndicConjunctBreak = IndicConjunctBreak(3); |
1538 | | } |
1539 | | } |
1540 | | |
1541 | | make_enumerated_property! { |
1542 | | name: "Indic_Conjunct_Break"; |
1543 | | short_name: "InCB"; |
1544 | | ident: IndicConjunctBreak; |
1545 | | data_marker: crate::provider::PropertyEnumIndicConjunctBreakV1; |
1546 | | singleton: SINGLETON_PROPERTY_ENUM_INDIC_CONJUNCT_BREAK_V1; |
1547 | | ule_ty: u8; |
1548 | | } |
1549 | | |
1550 | | /// Property Indic_Syllabic_Category. |
1551 | | /// See UAX #44: |
1552 | | /// <https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category>. |
1553 | | /// |
1554 | | /// # Example |
1555 | | /// |
1556 | | /// ``` |
1557 | | /// use icu::properties::{props::IndicSyllabicCategory, CodePointMapData}; |
1558 | | /// |
1559 | | /// assert_eq!( |
1560 | | /// CodePointMapData::<IndicSyllabicCategory>::new().get('a'), |
1561 | | /// IndicSyllabicCategory::Other |
1562 | | /// ); |
1563 | | /// assert_eq!( |
1564 | | /// CodePointMapData::<IndicSyllabicCategory>::new().get('\u{0900}'), |
1565 | | /// IndicSyllabicCategory::Bindu |
1566 | | /// ); // U+0900: DEVANAGARI SIGN INVERTED CANDRABINDU |
1567 | | /// ``` |
1568 | | #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
1569 | | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] |
1570 | | #[allow(clippy::exhaustive_structs)] // newtype |
1571 | | #[repr(transparent)] |
1572 | | pub struct IndicSyllabicCategory(pub(crate) u8); |
1573 | | |
1574 | | impl IndicSyllabicCategory { |
1575 | | /// Returns an ICU4C `UIndicSyllabicCategory` value. |
1576 | 0 | pub const fn to_icu4c_value(self) -> u8 { |
1577 | 0 | self.0 |
1578 | 0 | } |
1579 | | /// Constructor from an ICU4C `UIndicSyllabicCategory` value. |
1580 | 0 | pub const fn from_icu4c_value(value: u8) -> Self { |
1581 | 0 | Self(value) |
1582 | 0 | } |
1583 | | } |
1584 | | |
1585 | | create_const_array! { |
1586 | | #[allow(missing_docs)] // These constants don't need individual documentation. |
1587 | | #[allow(non_upper_case_globals)] |
1588 | | impl IndicSyllabicCategory { |
1589 | | pub const Other: IndicSyllabicCategory = IndicSyllabicCategory(0); |
1590 | | pub const Avagraha: IndicSyllabicCategory = IndicSyllabicCategory(1); |
1591 | | pub const Bindu: IndicSyllabicCategory = IndicSyllabicCategory(2); |
1592 | | pub const BrahmiJoiningNumber: IndicSyllabicCategory = IndicSyllabicCategory(3); |
1593 | | pub const CantillationMark: IndicSyllabicCategory = IndicSyllabicCategory(4); |
1594 | | pub const Consonant: IndicSyllabicCategory = IndicSyllabicCategory(5); |
1595 | | pub const ConsonantDead: IndicSyllabicCategory = IndicSyllabicCategory(6); |
1596 | | pub const ConsonantFinal: IndicSyllabicCategory = IndicSyllabicCategory(7); |
1597 | | pub const ConsonantHeadLetter: IndicSyllabicCategory = IndicSyllabicCategory(8); |
1598 | | pub const ConsonantInitialPostfixed: IndicSyllabicCategory = IndicSyllabicCategory(9); |
1599 | | pub const ConsonantKiller: IndicSyllabicCategory = IndicSyllabicCategory(10); |
1600 | | pub const ConsonantMedial: IndicSyllabicCategory = IndicSyllabicCategory(11); |
1601 | | pub const ConsonantPlaceholder: IndicSyllabicCategory = IndicSyllabicCategory(12); |
1602 | | pub const ConsonantPrecedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(13); |
1603 | | pub const ConsonantPrefixed: IndicSyllabicCategory = IndicSyllabicCategory(14); |
1604 | | pub const ConsonantSucceedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(15); |
1605 | | pub const ConsonantSubjoined: IndicSyllabicCategory = IndicSyllabicCategory(16); |
1606 | | pub const ConsonantWithStacker: IndicSyllabicCategory = IndicSyllabicCategory(17); |
1607 | | pub const GeminationMark: IndicSyllabicCategory = IndicSyllabicCategory(18); |
1608 | | pub const InvisibleStacker: IndicSyllabicCategory = IndicSyllabicCategory(19); |
1609 | | pub const Joiner: IndicSyllabicCategory = IndicSyllabicCategory(20); |
1610 | | pub const ModifyingLetter: IndicSyllabicCategory = IndicSyllabicCategory(21); |
1611 | | pub const NonJoiner: IndicSyllabicCategory = IndicSyllabicCategory(22); |
1612 | | pub const Nukta: IndicSyllabicCategory = IndicSyllabicCategory(23); |
1613 | | pub const Number: IndicSyllabicCategory = IndicSyllabicCategory(24); |
1614 | | pub const NumberJoiner: IndicSyllabicCategory = IndicSyllabicCategory(25); |
1615 | | pub const PureKiller: IndicSyllabicCategory = IndicSyllabicCategory(26); |
1616 | | pub const RegisterShifter: IndicSyllabicCategory = IndicSyllabicCategory(27); |
1617 | | pub const SyllableModifier: IndicSyllabicCategory = IndicSyllabicCategory(28); |
1618 | | pub const ToneLetter: IndicSyllabicCategory = IndicSyllabicCategory(29); |
1619 | | pub const ToneMark: IndicSyllabicCategory = IndicSyllabicCategory(30); |
1620 | | pub const Virama: IndicSyllabicCategory = IndicSyllabicCategory(31); |
1621 | | pub const Visarga: IndicSyllabicCategory = IndicSyllabicCategory(32); |
1622 | | pub const Vowel: IndicSyllabicCategory = IndicSyllabicCategory(33); |
1623 | | pub const VowelDependent: IndicSyllabicCategory = IndicSyllabicCategory(34); |
1624 | | pub const VowelIndependent: IndicSyllabicCategory = IndicSyllabicCategory(35); |
1625 | | pub const ReorderingKiller: IndicSyllabicCategory = IndicSyllabicCategory(36); |
1626 | | } |
1627 | | } |
1628 | | |
1629 | | make_enumerated_property! { |
1630 | | name: "Indic_Syllabic_Category"; |
1631 | | short_name: "InSC"; |
1632 | | ident: IndicSyllabicCategory; |
1633 | | data_marker: crate::provider::PropertyEnumIndicSyllabicCategoryV1; |
1634 | | singleton: SINGLETON_PROPERTY_ENUM_INDIC_SYLLABIC_CATEGORY_V1; |
1635 | | ule_ty: u8; |
1636 | | } |
1637 | | |
1638 | | /// Enumerated property Joining_Type. |
1639 | | /// |
1640 | | /// See Section 9.2, Arabic Cursive Joining in The Unicode Standard for the summary of |
1641 | | /// each property value. |
1642 | | /// |
1643 | | /// # Example |
1644 | | /// |
1645 | | /// ``` |
1646 | | /// use icu::properties::{props::JoiningType, CodePointMapData}; |
1647 | | /// |
1648 | | /// assert_eq!( |
1649 | | /// CodePointMapData::<JoiningType>::new().get('ؠ'), |
1650 | | /// JoiningType::DualJoining |
1651 | | /// ); // U+0620: Arabic Letter Kashmiri Yeh |
1652 | | /// assert_eq!( |
1653 | | /// CodePointMapData::<JoiningType>::new().get('𐫍'), |
1654 | | /// JoiningType::LeftJoining |
1655 | | /// ); // U+10ACD: Manichaean Letter Heth |
1656 | | /// ``` |
1657 | | #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
1658 | | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] |
1659 | | #[allow(clippy::exhaustive_structs)] // newtype |
1660 | | #[repr(transparent)] |
1661 | | pub struct JoiningType(pub(crate) u8); |
1662 | | |
1663 | | impl JoiningType { |
1664 | | /// Returns an ICU4C `UJoiningType` value. |
1665 | 417k | pub const fn to_icu4c_value(self) -> u8 { |
1666 | 417k | self.0 |
1667 | 417k | } |
1668 | | /// Constructor from an ICU4C `UJoiningType` value. |
1669 | 0 | pub const fn from_icu4c_value(value: u8) -> Self { |
1670 | 0 | Self(value) |
1671 | 0 | } |
1672 | | } |
1673 | | |
1674 | | create_const_array! { |
1675 | | #[allow(missing_docs)] // These constants don't need individual documentation. |
1676 | | #[allow(non_upper_case_globals)] |
1677 | | impl JoiningType { |
1678 | | pub const NonJoining: JoiningType = JoiningType(0); // name="U" |
1679 | | pub const JoinCausing: JoiningType = JoiningType(1); // name="C" |
1680 | | pub const DualJoining: JoiningType = JoiningType(2); // name="D" |
1681 | | pub const LeftJoining: JoiningType = JoiningType(3); // name="L" |
1682 | | pub const RightJoining: JoiningType = JoiningType(4); // name="R" |
1683 | | pub const Transparent: JoiningType = JoiningType(5); // name="T" |
1684 | | } |
1685 | | } |
1686 | | |
1687 | | make_enumerated_property! { |
1688 | | name: "Joining_Type"; |
1689 | | short_name: "jt"; |
1690 | | ident: JoiningType; |
1691 | | data_marker: crate::provider::PropertyEnumJoiningTypeV1; |
1692 | | singleton: SINGLETON_PROPERTY_ENUM_JOINING_TYPE_V1; |
1693 | | ule_ty: u8; |
1694 | | } |
1695 | | |
1696 | | /// Property Vertical_Orientation |
1697 | | /// |
1698 | | /// See UTR #50: |
1699 | | /// <https://www.unicode.org/reports/tr50/#vo> |
1700 | | /// |
1701 | | /// # Example |
1702 | | /// |
1703 | | /// ``` |
1704 | | /// use icu::properties::{props::VerticalOrientation, CodePointMapData}; |
1705 | | /// |
1706 | | /// assert_eq!( |
1707 | | /// CodePointMapData::<VerticalOrientation>::new().get('a'), |
1708 | | /// VerticalOrientation::Rotated |
1709 | | /// ); |
1710 | | /// assert_eq!( |
1711 | | /// CodePointMapData::<VerticalOrientation>::new().get('§'), |
1712 | | /// VerticalOrientation::Upright |
1713 | | /// ); |
1714 | | /// assert_eq!( |
1715 | | /// CodePointMapData::<VerticalOrientation>::new().get32(0x2329), |
1716 | | /// VerticalOrientation::TransformedRotated |
1717 | | /// ); |
1718 | | /// assert_eq!( |
1719 | | /// CodePointMapData::<VerticalOrientation>::new().get32(0x3001), |
1720 | | /// VerticalOrientation::TransformedUpright |
1721 | | /// ); |
1722 | | /// ``` |
1723 | | #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
1724 | | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] |
1725 | | #[allow(clippy::exhaustive_structs)] // newtype |
1726 | | #[repr(transparent)] |
1727 | | pub struct VerticalOrientation(pub(crate) u8); |
1728 | | |
1729 | | impl VerticalOrientation { |
1730 | | /// Returns an ICU4C `UVerticalOrientation` value. |
1731 | 0 | pub const fn to_icu4c_value(self) -> u8 { |
1732 | 0 | self.0 |
1733 | 0 | } |
1734 | | /// Constructor from an ICU4C `UVerticalOrientation` value. |
1735 | 0 | pub const fn from_icu4c_value(value: u8) -> Self { |
1736 | 0 | Self(value) |
1737 | 0 | } |
1738 | | } |
1739 | | |
1740 | | create_const_array! { |
1741 | | #[allow(missing_docs)] // These constants don't need individual documentation. |
1742 | | #[allow(non_upper_case_globals)] |
1743 | | impl VerticalOrientation { |
1744 | | pub const Rotated: VerticalOrientation = VerticalOrientation(0); // name="R" |
1745 | | pub const TransformedRotated: VerticalOrientation = VerticalOrientation(1); // name="Tr" |
1746 | | pub const TransformedUpright: VerticalOrientation = VerticalOrientation(2); // name="Tu" |
1747 | | pub const Upright: VerticalOrientation = VerticalOrientation(3); // name="U" |
1748 | | } |
1749 | | } |
1750 | | |
1751 | | make_enumerated_property! { |
1752 | | name: "Vertical_Orientation"; |
1753 | | short_name: "vo"; |
1754 | | ident: VerticalOrientation; |
1755 | | data_marker: crate::provider::PropertyEnumVerticalOrientationV1; |
1756 | | singleton: SINGLETON_PROPERTY_ENUM_VERTICAL_ORIENTATION_V1; |
1757 | | ule_ty: u8; |
1758 | | } |
1759 | | |
1760 | | pub use crate::code_point_set::BinaryProperty; |
1761 | | |
1762 | | macro_rules! make_binary_property { |
1763 | | ( |
1764 | | name: $name:literal; |
1765 | | short_name: $short_name:literal; |
1766 | | ident: $ident:ident; |
1767 | | data_marker: $data_marker:ty; |
1768 | | singleton: $singleton:ident; |
1769 | | $(#[$doc:meta])+ |
1770 | | ) => { |
1771 | | $(#[$doc])+ |
1772 | | #[derive(Debug)] |
1773 | | #[non_exhaustive] |
1774 | | pub struct $ident; |
1775 | | |
1776 | | impl crate::private::Sealed for $ident {} |
1777 | | |
1778 | | impl BinaryProperty for $ident { |
1779 | | type DataMarker = $data_marker; |
1780 | | #[cfg(feature = "compiled_data")] |
1781 | | const SINGLETON: &'static crate::provider::PropertyCodePointSet<'static> = |
1782 | | &crate::provider::Baked::$singleton; |
1783 | | const NAME: &'static [u8] = $name.as_bytes(); |
1784 | | const SHORT_NAME: &'static [u8] = $short_name.as_bytes(); |
1785 | | } |
1786 | | }; |
1787 | | } |
1788 | | |
1789 | | make_binary_property! { |
1790 | | name: "ASCII_Hex_Digit"; |
1791 | | short_name: "AHex"; |
1792 | | ident: AsciiHexDigit; |
1793 | | data_marker: crate::provider::PropertyBinaryAsciiHexDigitV1; |
1794 | | singleton: SINGLETON_PROPERTY_BINARY_ASCII_HEX_DIGIT_V1; |
1795 | | /// ASCII characters commonly used for the representation of hexadecimal numbers. |
1796 | | /// |
1797 | | /// # Example |
1798 | | /// |
1799 | | /// ``` |
1800 | | /// use icu::properties::CodePointSetData; |
1801 | | /// use icu::properties::props::AsciiHexDigit; |
1802 | | /// |
1803 | | /// let ascii_hex_digit = CodePointSetData::new::<AsciiHexDigit>(); |
1804 | | /// |
1805 | | /// assert!(ascii_hex_digit.contains('3')); |
1806 | | /// assert!(!ascii_hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE |
1807 | | /// assert!(ascii_hex_digit.contains('A')); |
1808 | | /// assert!(!ascii_hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
1809 | | /// ``` |
1810 | | } |
1811 | | |
1812 | | make_binary_property! { |
1813 | | name: "Alnum"; |
1814 | | short_name: "Alnum"; |
1815 | | ident: Alnum; |
1816 | | data_marker: crate::provider::PropertyBinaryAlnumV1; |
1817 | | singleton: SINGLETON_PROPERTY_BINARY_ALNUM_V1; |
1818 | | /// Characters with the `Alphabetic` or `Decimal_Number` property. |
1819 | | /// |
1820 | | /// This is defined for POSIX compatibility. |
1821 | | } |
1822 | | |
1823 | | make_binary_property! { |
1824 | | name: "Alphabetic"; |
1825 | | short_name: "Alpha"; |
1826 | | ident: Alphabetic; |
1827 | | data_marker: crate::provider::PropertyBinaryAlphabeticV1; |
1828 | | singleton: SINGLETON_PROPERTY_BINARY_ALPHABETIC_V1; |
1829 | | /// Alphabetic characters. |
1830 | | /// |
1831 | | /// # Example |
1832 | | /// |
1833 | | /// ``` |
1834 | | /// use icu::properties::CodePointSetData; |
1835 | | /// use icu::properties::props::Alphabetic; |
1836 | | /// |
1837 | | /// let alphabetic = CodePointSetData::new::<Alphabetic>(); |
1838 | | /// |
1839 | | /// assert!(!alphabetic.contains('3')); |
1840 | | /// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE |
1841 | | /// assert!(alphabetic.contains('A')); |
1842 | | /// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
1843 | | /// ``` |
1844 | | |
1845 | | } |
1846 | | |
1847 | | make_binary_property! { |
1848 | | name: "Bidi_Control"; |
1849 | | short_name: "Bidi_C"; |
1850 | | ident: BidiControl; |
1851 | | data_marker: crate::provider::PropertyBinaryBidiControlV1; |
1852 | | singleton: SINGLETON_PROPERTY_BINARY_BIDI_CONTROL_V1; |
1853 | | /// Format control characters which have specific functions in the Unicode Bidirectional |
1854 | | /// Algorithm. |
1855 | | /// |
1856 | | /// # Example |
1857 | | /// |
1858 | | /// ``` |
1859 | | /// use icu::properties::CodePointSetData; |
1860 | | /// use icu::properties::props::BidiControl; |
1861 | | /// |
1862 | | /// let bidi_control = CodePointSetData::new::<BidiControl>(); |
1863 | | /// |
1864 | | /// assert!(bidi_control.contains('\u{200F}')); // RIGHT-TO-LEFT MARK |
1865 | | /// assert!(!bidi_control.contains('ش')); // U+0634 ARABIC LETTER SHEEN |
1866 | | /// ``` |
1867 | | |
1868 | | } |
1869 | | |
1870 | | make_binary_property! { |
1871 | | name: "Bidi_Mirrored"; |
1872 | | short_name: "Bidi_M"; |
1873 | | ident: BidiMirrored; |
1874 | | data_marker: crate::provider::PropertyBinaryBidiMirroredV1; |
1875 | | singleton: SINGLETON_PROPERTY_BINARY_BIDI_MIRRORED_V1; |
1876 | | /// Characters that are mirrored in bidirectional text. |
1877 | | /// |
1878 | | /// # Example |
1879 | | /// |
1880 | | /// ``` |
1881 | | /// use icu::properties::CodePointSetData; |
1882 | | /// use icu::properties::props::BidiMirrored; |
1883 | | /// |
1884 | | /// let bidi_mirrored = CodePointSetData::new::<BidiMirrored>(); |
1885 | | /// |
1886 | | /// assert!(bidi_mirrored.contains('[')); |
1887 | | /// assert!(bidi_mirrored.contains(']')); |
1888 | | /// assert!(bidi_mirrored.contains('∑')); // U+2211 N-ARY SUMMATION |
1889 | | /// assert!(!bidi_mirrored.contains('ཉ')); // U+0F49 TIBETAN LETTER NYA |
1890 | | /// ``` |
1891 | | |
1892 | | } |
1893 | | |
1894 | | make_binary_property! { |
1895 | | name: "Blank"; |
1896 | | short_name: "Blank"; |
1897 | | ident: Blank; |
1898 | | data_marker: crate::provider::PropertyBinaryBlankV1; |
1899 | | singleton: SINGLETON_PROPERTY_BINARY_BLANK_V1; |
1900 | | /// Horizontal whitespace characters |
1901 | | |
1902 | | } |
1903 | | |
1904 | | make_binary_property! { |
1905 | | name: "Cased"; |
1906 | | short_name: "Cased"; |
1907 | | ident: Cased; |
1908 | | data_marker: crate::provider::PropertyBinaryCasedV1; |
1909 | | singleton: SINGLETON_PROPERTY_BINARY_CASED_V1; |
1910 | | /// Uppercase, lowercase, and titlecase characters. |
1911 | | /// |
1912 | | /// # Example |
1913 | | /// |
1914 | | /// ``` |
1915 | | /// use icu::properties::CodePointSetData; |
1916 | | /// use icu::properties::props::Cased; |
1917 | | /// |
1918 | | /// let cased = CodePointSetData::new::<Cased>(); |
1919 | | /// |
1920 | | /// assert!(cased.contains('Ꙡ')); // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE |
1921 | | /// assert!(!cased.contains('ދ')); // U+078B THAANA LETTER DHAALU |
1922 | | /// ``` |
1923 | | |
1924 | | } |
1925 | | |
1926 | | make_binary_property! { |
1927 | | name: "Case_Ignorable"; |
1928 | | short_name: "CI"; |
1929 | | ident: CaseIgnorable; |
1930 | | data_marker: crate::provider::PropertyBinaryCaseIgnorableV1; |
1931 | | singleton: SINGLETON_PROPERTY_BINARY_CASE_IGNORABLE_V1; |
1932 | | /// Characters which are ignored for casing purposes. |
1933 | | /// |
1934 | | /// # Example |
1935 | | /// |
1936 | | /// ``` |
1937 | | /// use icu::properties::CodePointSetData; |
1938 | | /// use icu::properties::props::CaseIgnorable; |
1939 | | /// |
1940 | | /// let case_ignorable = CodePointSetData::new::<CaseIgnorable>(); |
1941 | | /// |
1942 | | /// assert!(case_ignorable.contains(':')); |
1943 | | /// assert!(!case_ignorable.contains('λ')); // U+03BB GREEK SMALL LETTER LAMBDA |
1944 | | /// ``` |
1945 | | |
1946 | | } |
1947 | | |
1948 | | make_binary_property! { |
1949 | | name: "Full_Composition_Exclusion"; |
1950 | | short_name: "Comp_Ex"; |
1951 | | ident: FullCompositionExclusion; |
1952 | | data_marker: crate::provider::PropertyBinaryFullCompositionExclusionV1; |
1953 | | singleton: SINGLETON_PROPERTY_BINARY_FULL_COMPOSITION_EXCLUSION_V1; |
1954 | | /// Characters that are excluded from composition. |
1955 | | /// |
1956 | | /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt> |
1957 | | |
1958 | | } |
1959 | | |
1960 | | make_binary_property! { |
1961 | | name: "Changes_When_Casefolded"; |
1962 | | short_name: "CWCF"; |
1963 | | ident: ChangesWhenCasefolded; |
1964 | | data_marker: crate::provider::PropertyBinaryChangesWhenCasefoldedV1; |
1965 | | singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEFOLDED_V1; |
1966 | | /// Characters whose normalized forms are not stable under case folding. |
1967 | | /// |
1968 | | /// # Example |
1969 | | /// |
1970 | | /// ``` |
1971 | | /// use icu::properties::CodePointSetData; |
1972 | | /// use icu::properties::props::ChangesWhenCasefolded; |
1973 | | /// |
1974 | | /// let changes_when_casefolded = CodePointSetData::new::<ChangesWhenCasefolded>(); |
1975 | | /// |
1976 | | /// assert!(changes_when_casefolded.contains('ß')); // U+00DF LATIN SMALL LETTER SHARP S |
1977 | | /// assert!(!changes_when_casefolded.contains('ᜉ')); // U+1709 TAGALOG LETTER PA |
1978 | | /// ``` |
1979 | | |
1980 | | } |
1981 | | |
1982 | | make_binary_property! { |
1983 | | name: "Changes_When_Casemapped"; |
1984 | | short_name: "CWCM"; |
1985 | | ident: ChangesWhenCasemapped; |
1986 | | data_marker: crate::provider::PropertyBinaryChangesWhenCasemappedV1; |
1987 | | singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEMAPPED_V1; |
1988 | | /// Characters which may change when they undergo case mapping. |
1989 | | |
1990 | | } |
1991 | | |
1992 | | make_binary_property! { |
1993 | | name: "Changes_When_NFKC_Casefolded"; |
1994 | | short_name: "CWKCF"; |
1995 | | ident: ChangesWhenNfkcCasefolded; |
1996 | | data_marker: crate::provider::PropertyBinaryChangesWhenNfkcCasefoldedV1; |
1997 | | singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_NFKC_CASEFOLDED_V1; |
1998 | | /// Characters which are not identical to their `NFKC_Casefold` mapping. |
1999 | | /// |
2000 | | /// # Example |
2001 | | /// |
2002 | | /// ``` |
2003 | | /// use icu::properties::CodePointSetData; |
2004 | | /// use icu::properties::props::ChangesWhenNfkcCasefolded; |
2005 | | /// |
2006 | | /// let changes_when_nfkc_casefolded = CodePointSetData::new::<ChangesWhenNfkcCasefolded>(); |
2007 | | /// |
2008 | | /// assert!(changes_when_nfkc_casefolded.contains('🄵')); // U+1F135 SQUARED LATIN CAPITAL LETTER F |
2009 | | /// assert!(!changes_when_nfkc_casefolded.contains('f')); |
2010 | | /// ``` |
2011 | | |
2012 | | } |
2013 | | |
2014 | | make_binary_property! { |
2015 | | name: "Changes_When_Lowercased"; |
2016 | | short_name: "CWL"; |
2017 | | ident: ChangesWhenLowercased; |
2018 | | data_marker: crate::provider::PropertyBinaryChangesWhenLowercasedV1; |
2019 | | singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_LOWERCASED_V1; |
2020 | | /// Characters whose normalized forms are not stable under a `toLowercase` mapping. |
2021 | | /// |
2022 | | /// # Example |
2023 | | /// |
2024 | | /// ``` |
2025 | | /// use icu::properties::CodePointSetData; |
2026 | | /// use icu::properties::props::ChangesWhenLowercased; |
2027 | | /// |
2028 | | /// let changes_when_lowercased = CodePointSetData::new::<ChangesWhenLowercased>(); |
2029 | | /// |
2030 | | /// assert!(changes_when_lowercased.contains('Ⴔ')); // U+10B4 GEORGIAN CAPITAL LETTER PHAR |
2031 | | /// assert!(!changes_when_lowercased.contains('ფ')); // U+10E4 GEORGIAN LETTER PHAR |
2032 | | /// ``` |
2033 | | |
2034 | | } |
2035 | | |
2036 | | make_binary_property! { |
2037 | | name: "Changes_When_Titlecased"; |
2038 | | short_name: "CWT"; |
2039 | | ident: ChangesWhenTitlecased; |
2040 | | data_marker: crate::provider::PropertyBinaryChangesWhenTitlecasedV1; |
2041 | | singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_TITLECASED_V1; |
2042 | | /// Characters whose normalized forms are not stable under a `toTitlecase` mapping. |
2043 | | /// |
2044 | | /// # Example |
2045 | | /// |
2046 | | /// ``` |
2047 | | /// use icu::properties::CodePointSetData; |
2048 | | /// use icu::properties::props::ChangesWhenTitlecased; |
2049 | | /// |
2050 | | /// let changes_when_titlecased = CodePointSetData::new::<ChangesWhenTitlecased>(); |
2051 | | /// |
2052 | | /// assert!(changes_when_titlecased.contains('æ')); // U+00E6 LATIN SMALL LETTER AE |
2053 | | /// assert!(!changes_when_titlecased.contains('Æ')); // U+00E6 LATIN CAPITAL LETTER AE |
2054 | | /// ``` |
2055 | | |
2056 | | } |
2057 | | |
2058 | | make_binary_property! { |
2059 | | name: "Changes_When_Uppercased"; |
2060 | | short_name: "CWU"; |
2061 | | ident: ChangesWhenUppercased; |
2062 | | data_marker: crate::provider::PropertyBinaryChangesWhenUppercasedV1; |
2063 | | singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_UPPERCASED_V1; |
2064 | | /// Characters whose normalized forms are not stable under a `toUppercase` mapping. |
2065 | | /// |
2066 | | /// # Example |
2067 | | /// |
2068 | | /// ``` |
2069 | | /// use icu::properties::CodePointSetData; |
2070 | | /// use icu::properties::props::ChangesWhenUppercased; |
2071 | | /// |
2072 | | /// let changes_when_uppercased = CodePointSetData::new::<ChangesWhenUppercased>(); |
2073 | | /// |
2074 | | /// assert!(changes_when_uppercased.contains('ւ')); // U+0582 ARMENIAN SMALL LETTER YIWN |
2075 | | /// assert!(!changes_when_uppercased.contains('Ւ')); // U+0552 ARMENIAN CAPITAL LETTER YIWN |
2076 | | /// ``` |
2077 | | |
2078 | | } |
2079 | | |
2080 | | make_binary_property! { |
2081 | | name: "Dash"; |
2082 | | short_name: "Dash"; |
2083 | | ident: Dash; |
2084 | | data_marker: crate::provider::PropertyBinaryDashV1; |
2085 | | singleton: SINGLETON_PROPERTY_BINARY_DASH_V1; |
2086 | | /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus |
2087 | | /// their compatibility equivalents. |
2088 | | /// |
2089 | | /// # Example |
2090 | | /// |
2091 | | /// ``` |
2092 | | /// use icu::properties::CodePointSetData; |
2093 | | /// use icu::properties::props::Dash; |
2094 | | /// |
2095 | | /// let dash = CodePointSetData::new::<Dash>(); |
2096 | | /// |
2097 | | /// assert!(dash.contains('⸺')); // U+2E3A TWO-EM DASH |
2098 | | /// assert!(dash.contains('-')); // U+002D |
2099 | | /// assert!(!dash.contains('=')); // U+003D |
2100 | | /// ``` |
2101 | | |
2102 | | } |
2103 | | |
2104 | | make_binary_property! { |
2105 | | name: "Deprecated"; |
2106 | | short_name: "Dep"; |
2107 | | ident: Deprecated; |
2108 | | data_marker: crate::provider::PropertyBinaryDeprecatedV1; |
2109 | | singleton: SINGLETON_PROPERTY_BINARY_DEPRECATED_V1; |
2110 | | /// Deprecated characters. |
2111 | | /// |
2112 | | /// No characters will ever be removed from the standard, but the |
2113 | | /// usage of deprecated characters is strongly discouraged. |
2114 | | /// |
2115 | | /// # Example |
2116 | | /// |
2117 | | /// ``` |
2118 | | /// use icu::properties::CodePointSetData; |
2119 | | /// use icu::properties::props::Deprecated; |
2120 | | /// |
2121 | | /// let deprecated = CodePointSetData::new::<Deprecated>(); |
2122 | | /// |
2123 | | /// assert!(deprecated.contains('ឣ')); // U+17A3 KHMER INDEPENDENT VOWEL QAQ |
2124 | | /// assert!(!deprecated.contains('A')); |
2125 | | /// ``` |
2126 | | |
2127 | | } |
2128 | | |
2129 | | make_binary_property! { |
2130 | | name: "Default_Ignorable_Code_Point"; |
2131 | | short_name: "DI"; |
2132 | | ident: DefaultIgnorableCodePoint; |
2133 | | data_marker: crate::provider::PropertyBinaryDefaultIgnorableCodePointV1; |
2134 | | singleton: SINGLETON_PROPERTY_BINARY_DEFAULT_IGNORABLE_CODE_POINT_V1; |
2135 | | /// For programmatic determination of default ignorable code points. |
2136 | | /// |
2137 | | /// New characters that |
2138 | | /// should be ignored in rendering (unless explicitly supported) will be assigned in these |
2139 | | /// ranges, permitting programs to correctly handle the default rendering of such |
2140 | | /// characters when not otherwise supported. |
2141 | | /// |
2142 | | /// # Example |
2143 | | /// |
2144 | | /// ``` |
2145 | | /// use icu::properties::CodePointSetData; |
2146 | | /// use icu::properties::props::DefaultIgnorableCodePoint; |
2147 | | /// |
2148 | | /// let default_ignorable_code_point = CodePointSetData::new::<DefaultIgnorableCodePoint>(); |
2149 | | /// |
2150 | | /// assert!(default_ignorable_code_point.contains('\u{180B}')); // MONGOLIAN FREE VARIATION SELECTOR ONE |
2151 | | /// assert!(!default_ignorable_code_point.contains('E')); |
2152 | | /// ``` |
2153 | | |
2154 | | } |
2155 | | |
2156 | | make_binary_property! { |
2157 | | name: "Diacritic"; |
2158 | | short_name: "Dia"; |
2159 | | ident: Diacritic; |
2160 | | data_marker: crate::provider::PropertyBinaryDiacriticV1; |
2161 | | singleton: SINGLETON_PROPERTY_BINARY_DIACRITIC_V1; |
2162 | | /// Characters that linguistically modify the meaning of another character to which they apply. |
2163 | | /// |
2164 | | /// # Example |
2165 | | /// |
2166 | | /// ``` |
2167 | | /// use icu::properties::CodePointSetData; |
2168 | | /// use icu::properties::props::Diacritic; |
2169 | | /// |
2170 | | /// let diacritic = CodePointSetData::new::<Diacritic>(); |
2171 | | /// |
2172 | | /// assert!(diacritic.contains('\u{05B3}')); // HEBREW POINT HATAF QAMATS |
2173 | | /// assert!(!diacritic.contains('א')); // U+05D0 HEBREW LETTER ALEF |
2174 | | /// ``` |
2175 | | |
2176 | | } |
2177 | | |
2178 | | make_binary_property! { |
2179 | | name: "Emoji_Modifier_Base"; |
2180 | | short_name: "EBase"; |
2181 | | ident: EmojiModifierBase; |
2182 | | data_marker: crate::provider::PropertyBinaryEmojiModifierBaseV1; |
2183 | | singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_BASE_V1; |
2184 | | /// Characters that can serve as a base for emoji modifiers. |
2185 | | /// |
2186 | | /// # Example |
2187 | | /// |
2188 | | /// ``` |
2189 | | /// use icu::properties::CodePointSetData; |
2190 | | /// use icu::properties::props::EmojiModifierBase; |
2191 | | /// |
2192 | | /// let emoji_modifier_base = CodePointSetData::new::<EmojiModifierBase>(); |
2193 | | /// |
2194 | | /// assert!(emoji_modifier_base.contains('✊')); // U+270A RAISED FIST |
2195 | | /// assert!(!emoji_modifier_base.contains('⛰')); // U+26F0 MOUNTAIN |
2196 | | /// ``` |
2197 | | |
2198 | | } |
2199 | | |
2200 | | make_binary_property! { |
2201 | | name: "Emoji_Component"; |
2202 | | short_name: "EComp"; |
2203 | | ident: EmojiComponent; |
2204 | | data_marker: crate::provider::PropertyBinaryEmojiComponentV1; |
2205 | | singleton: SINGLETON_PROPERTY_BINARY_EMOJI_COMPONENT_V1; |
2206 | | /// Characters used in emoji sequences that normally do not appear on emoji keyboards as |
2207 | | /// separate choices, such as base characters for emoji keycaps. |
2208 | | /// |
2209 | | /// # Example |
2210 | | /// |
2211 | | /// ``` |
2212 | | /// use icu::properties::CodePointSetData; |
2213 | | /// use icu::properties::props::EmojiComponent; |
2214 | | /// |
2215 | | /// let emoji_component = CodePointSetData::new::<EmojiComponent>(); |
2216 | | /// |
2217 | | /// assert!(emoji_component.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T |
2218 | | /// assert!(emoji_component.contains('\u{20E3}')); // COMBINING ENCLOSING KEYCAP |
2219 | | /// assert!(emoji_component.contains('7')); |
2220 | | /// assert!(!emoji_component.contains('T')); |
2221 | | /// ``` |
2222 | | |
2223 | | } |
2224 | | |
2225 | | make_binary_property! { |
2226 | | name: "Emoji_Modifier"; |
2227 | | short_name: "EMod"; |
2228 | | ident: EmojiModifier; |
2229 | | data_marker: crate::provider::PropertyBinaryEmojiModifierV1; |
2230 | | singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_V1; |
2231 | | /// Characters that are emoji modifiers. |
2232 | | /// |
2233 | | /// # Example |
2234 | | /// |
2235 | | /// ``` |
2236 | | /// use icu::properties::CodePointSetData; |
2237 | | /// use icu::properties::props::EmojiModifier; |
2238 | | /// |
2239 | | /// let emoji_modifier = CodePointSetData::new::<EmojiModifier>(); |
2240 | | /// |
2241 | | /// assert!(emoji_modifier.contains('\u{1F3FD}')); // EMOJI MODIFIER FITZPATRICK TYPE-4 |
2242 | | /// assert!(!emoji_modifier.contains('\u{200C}')); // ZERO WIDTH NON-JOINER |
2243 | | /// ``` |
2244 | | |
2245 | | } |
2246 | | |
2247 | | make_binary_property! { |
2248 | | name: "Emoji"; |
2249 | | short_name: "Emoji"; |
2250 | | ident: Emoji; |
2251 | | data_marker: crate::provider::PropertyBinaryEmojiV1; |
2252 | | singleton: SINGLETON_PROPERTY_BINARY_EMOJI_V1; |
2253 | | /// Characters that are emoji. |
2254 | | /// |
2255 | | /// # Example |
2256 | | /// |
2257 | | /// ``` |
2258 | | /// use icu::properties::CodePointSetData; |
2259 | | /// use icu::properties::props::Emoji; |
2260 | | /// |
2261 | | /// let emoji = CodePointSetData::new::<Emoji>(); |
2262 | | /// |
2263 | | /// assert!(emoji.contains('🔥')); // U+1F525 FIRE |
2264 | | /// assert!(!emoji.contains('V')); |
2265 | | /// ``` |
2266 | | |
2267 | | } |
2268 | | |
2269 | | make_binary_property! { |
2270 | | name: "Emoji_Presentation"; |
2271 | | short_name: "EPres"; |
2272 | | ident: EmojiPresentation; |
2273 | | data_marker: crate::provider::PropertyBinaryEmojiPresentationV1; |
2274 | | singleton: SINGLETON_PROPERTY_BINARY_EMOJI_PRESENTATION_V1; |
2275 | | /// Characters that have emoji presentation by default. |
2276 | | /// |
2277 | | /// # Example |
2278 | | /// |
2279 | | /// ``` |
2280 | | /// use icu::properties::CodePointSetData; |
2281 | | /// use icu::properties::props::EmojiPresentation; |
2282 | | /// |
2283 | | /// let emoji_presentation = CodePointSetData::new::<EmojiPresentation>(); |
2284 | | /// |
2285 | | /// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON |
2286 | | /// assert!(!emoji_presentation.contains('♻')); // U+267B BLACK UNIVERSAL RECYCLING SYMBOL |
2287 | | /// ``` |
2288 | | |
2289 | | } |
2290 | | |
2291 | | make_binary_property! { |
2292 | | name: "Extender"; |
2293 | | short_name: "Ext"; |
2294 | | ident: Extender; |
2295 | | data_marker: crate::provider::PropertyBinaryExtenderV1; |
2296 | | singleton: SINGLETON_PROPERTY_BINARY_EXTENDER_V1; |
2297 | | /// Characters whose principal function is to extend the value of a preceding alphabetic |
2298 | | /// character or to extend the shape of adjacent characters. |
2299 | | /// |
2300 | | /// # Example |
2301 | | /// |
2302 | | /// ``` |
2303 | | /// use icu::properties::CodePointSetData; |
2304 | | /// use icu::properties::props::Extender; |
2305 | | /// |
2306 | | /// let extender = CodePointSetData::new::<Extender>(); |
2307 | | /// |
2308 | | /// assert!(extender.contains('ヾ')); // U+30FE KATAKANA VOICED ITERATION MARK |
2309 | | /// assert!(extender.contains('ー')); // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK |
2310 | | /// assert!(!extender.contains('・')); // U+30FB KATAKANA MIDDLE DOT |
2311 | | /// ``` |
2312 | | |
2313 | | } |
2314 | | |
2315 | | make_binary_property! { |
2316 | | name: "Extended_Pictographic"; |
2317 | | short_name: "ExtPict"; |
2318 | | ident: ExtendedPictographic; |
2319 | | data_marker: crate::provider::PropertyBinaryExtendedPictographicV1; |
2320 | | singleton: SINGLETON_PROPERTY_BINARY_EXTENDED_PICTOGRAPHIC_V1; |
2321 | | /// Pictographic symbols, as well as reserved ranges in blocks largely associated with |
2322 | | /// emoji characters |
2323 | | /// |
2324 | | /// # Example |
2325 | | /// |
2326 | | /// ``` |
2327 | | /// use icu::properties::CodePointSetData; |
2328 | | /// use icu::properties::props::ExtendedPictographic; |
2329 | | /// |
2330 | | /// let extended_pictographic = CodePointSetData::new::<ExtendedPictographic>(); |
2331 | | /// |
2332 | | /// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT |
2333 | | /// assert!(!extended_pictographic.contains('🇪')); // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E |
2334 | | /// ``` |
2335 | | |
2336 | | } |
2337 | | |
2338 | | make_binary_property! { |
2339 | | name: "Graph"; |
2340 | | short_name: "Graph"; |
2341 | | ident: Graph; |
2342 | | data_marker: crate::provider::PropertyBinaryGraphV1; |
2343 | | singleton: SINGLETON_PROPERTY_BINARY_GRAPH_V1; |
2344 | | /// Invisible characters. |
2345 | | /// |
2346 | | /// This is defined for POSIX compatibility. |
2347 | | |
2348 | | } |
2349 | | |
2350 | | make_binary_property! { |
2351 | | name: "Grapheme_Base"; |
2352 | | short_name: "Gr_Base"; |
2353 | | ident: GraphemeBase; |
2354 | | data_marker: crate::provider::PropertyBinaryGraphemeBaseV1; |
2355 | | singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_BASE_V1; |
2356 | | /// Property used together with the definition of Standard Korean Syllable Block to define |
2357 | | /// "Grapheme base". |
2358 | | /// |
2359 | | /// See D58 in Chapter 3, Conformance in the Unicode Standard. |
2360 | | /// |
2361 | | /// # Example |
2362 | | /// |
2363 | | /// ``` |
2364 | | /// use icu::properties::CodePointSetData; |
2365 | | /// use icu::properties::props::GraphemeBase; |
2366 | | /// |
2367 | | /// let grapheme_base = CodePointSetData::new::<GraphemeBase>(); |
2368 | | /// |
2369 | | /// assert!(grapheme_base.contains('ക')); // U+0D15 MALAYALAM LETTER KA |
2370 | | /// assert!(grapheme_base.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I |
2371 | | /// assert!(!grapheme_base.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA |
2372 | | /// ``` |
2373 | | |
2374 | | } |
2375 | | |
2376 | | make_binary_property! { |
2377 | | name: "Grapheme_Extend"; |
2378 | | short_name: "Gr_Ext"; |
2379 | | ident: GraphemeExtend; |
2380 | | data_marker: crate::provider::PropertyBinaryGraphemeExtendV1; |
2381 | | singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_EXTEND_V1; |
2382 | | /// Property used to define "Grapheme extender". |
2383 | | /// |
2384 | | /// See D59 in Chapter 3, Conformance in the |
2385 | | /// Unicode Standard. |
2386 | | /// |
2387 | | /// # Example |
2388 | | /// |
2389 | | /// ``` |
2390 | | /// use icu::properties::CodePointSetData; |
2391 | | /// use icu::properties::props::GraphemeExtend; |
2392 | | /// |
2393 | | /// let grapheme_extend = CodePointSetData::new::<GraphemeExtend>(); |
2394 | | /// |
2395 | | /// assert!(!grapheme_extend.contains('ക')); // U+0D15 MALAYALAM LETTER KA |
2396 | | /// assert!(!grapheme_extend.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I |
2397 | | /// assert!(grapheme_extend.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA |
2398 | | /// ``` |
2399 | | |
2400 | | } |
2401 | | |
2402 | | make_binary_property! { |
2403 | | name: "Grapheme_Link"; |
2404 | | short_name: "Gr_Link"; |
2405 | | ident: GraphemeLink; |
2406 | | data_marker: crate::provider::PropertyBinaryGraphemeLinkV1; |
2407 | | singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_LINK_V1; |
2408 | | /// Deprecated property. |
2409 | | /// |
2410 | | /// Formerly proposed for programmatic determination of grapheme |
2411 | | /// cluster boundaries. |
2412 | | } |
2413 | | |
2414 | | make_binary_property! { |
2415 | | name: "Hex_Digit"; |
2416 | | short_name: "Hex"; |
2417 | | ident: HexDigit; |
2418 | | data_marker: crate::provider::PropertyBinaryHexDigitV1; |
2419 | | singleton: SINGLETON_PROPERTY_BINARY_HEX_DIGIT_V1; |
2420 | | /// Characters commonly used for the representation of hexadecimal numbers, plus their |
2421 | | /// compatibility equivalents. |
2422 | | /// |
2423 | | /// # Example |
2424 | | /// |
2425 | | /// ``` |
2426 | | /// use icu::properties::CodePointSetData; |
2427 | | /// use icu::properties::props::HexDigit; |
2428 | | /// |
2429 | | /// let hex_digit = CodePointSetData::new::<HexDigit>(); |
2430 | | /// |
2431 | | /// assert!(hex_digit.contains('0')); |
2432 | | /// assert!(!hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE |
2433 | | /// assert!(hex_digit.contains('f')); |
2434 | | /// assert!(hex_digit.contains('f')); // U+FF46 FULLWIDTH LATIN SMALL LETTER F |
2435 | | /// assert!(hex_digit.contains('F')); // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F |
2436 | | /// assert!(!hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
2437 | | /// ``` |
2438 | | } |
2439 | | |
2440 | | make_binary_property! { |
2441 | | name: "Hyphen"; |
2442 | | short_name: "Hyphen"; |
2443 | | ident: Hyphen; |
2444 | | data_marker: crate::provider::PropertyBinaryHyphenV1; |
2445 | | singleton: SINGLETON_PROPERTY_BINARY_HYPHEN_V1; |
2446 | | /// Deprecated property. |
2447 | | /// |
2448 | | /// Dashes which are used to mark connections between pieces of |
2449 | | /// words, plus the Katakana middle dot. |
2450 | | } |
2451 | | |
2452 | | make_binary_property! { |
2453 | | name: "ID_Compat_Math_Continue"; |
2454 | | short_name: "ID_Compat_Math_Continue"; |
2455 | | ident: IdCompatMathContinue; |
2456 | | data_marker: crate::provider::PropertyBinaryIdCompatMathContinueV1; |
2457 | | singleton: SINGLETON_PROPERTY_BINARY_ID_COMPAT_MATH_CONTINUE_V1; |
2458 | | /// ID_Compat_Math_Continue Property |
2459 | | } |
2460 | | |
2461 | | make_binary_property! { |
2462 | | name: "ID_Compat_Math_Start"; |
2463 | | short_name: "ID_Compat_Math_Start"; |
2464 | | ident: IdCompatMathStart; |
2465 | | data_marker: crate::provider::PropertyBinaryIdCompatMathStartV1; |
2466 | | singleton: SINGLETON_PROPERTY_BINARY_ID_COMPAT_MATH_START_V1; |
2467 | | /// ID_Compat_Math_Start Property |
2468 | | } |
2469 | | |
2470 | | make_binary_property! { |
2471 | | name: "Id_Continue"; |
2472 | | short_name: "IDC"; |
2473 | | ident: IdContinue; |
2474 | | data_marker: crate::provider::PropertyBinaryIdContinueV1; |
2475 | | singleton: SINGLETON_PROPERTY_BINARY_ID_CONTINUE_V1; |
2476 | | /// Characters that can come after the first character in an identifier. |
2477 | | /// |
2478 | | /// If using NFKC to |
2479 | | /// fold differences between characters, use [`XidContinue`] instead. See |
2480 | | /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for |
2481 | | /// more details. |
2482 | | /// |
2483 | | /// # Example |
2484 | | /// |
2485 | | /// ``` |
2486 | | /// use icu::properties::CodePointSetData; |
2487 | | /// use icu::properties::props::IdContinue; |
2488 | | /// |
2489 | | /// let id_continue = CodePointSetData::new::<IdContinue>(); |
2490 | | /// |
2491 | | /// assert!(id_continue.contains('x')); |
2492 | | /// assert!(id_continue.contains('1')); |
2493 | | /// assert!(id_continue.contains('_')); |
2494 | | /// assert!(id_continue.contains('ߝ')); // U+07DD NKO LETTER FA |
2495 | | /// assert!(!id_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X |
2496 | | /// assert!(id_continue.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM |
2497 | | /// ``` |
2498 | | } |
2499 | | |
2500 | | make_binary_property! { |
2501 | | name: "Ideographic"; |
2502 | | short_name: "Ideo"; |
2503 | | ident: Ideographic; |
2504 | | data_marker: crate::provider::PropertyBinaryIdeographicV1; |
2505 | | singleton: SINGLETON_PROPERTY_BINARY_IDEOGRAPHIC_V1; |
2506 | | /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese) |
2507 | | /// ideographs, or related siniform ideographs |
2508 | | /// |
2509 | | /// # Example |
2510 | | /// |
2511 | | /// ``` |
2512 | | /// use icu::properties::CodePointSetData; |
2513 | | /// use icu::properties::props::Ideographic; |
2514 | | /// |
2515 | | /// let ideographic = CodePointSetData::new::<Ideographic>(); |
2516 | | /// |
2517 | | /// assert!(ideographic.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD |
2518 | | /// assert!(!ideographic.contains('밥')); // U+BC25 HANGUL SYLLABLE BAB |
2519 | | /// ``` |
2520 | | } |
2521 | | |
2522 | | make_binary_property! { |
2523 | | name: "Id_Start"; |
2524 | | short_name: "IDS"; |
2525 | | ident: IdStart; |
2526 | | data_marker: crate::provider::PropertyBinaryIdStartV1; |
2527 | | singleton: SINGLETON_PROPERTY_BINARY_ID_START_V1; |
2528 | | /// Characters that can begin an identifier. |
2529 | | /// |
2530 | | /// If using NFKC to fold differences between |
2531 | | /// characters, use [`XidStart`] instead. See [`Unicode Standard Annex |
2532 | | /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details. |
2533 | | /// |
2534 | | /// # Example |
2535 | | /// |
2536 | | /// ``` |
2537 | | /// use icu::properties::CodePointSetData; |
2538 | | /// use icu::properties::props::IdStart; |
2539 | | /// |
2540 | | /// let id_start = CodePointSetData::new::<IdStart>(); |
2541 | | /// |
2542 | | /// assert!(id_start.contains('x')); |
2543 | | /// assert!(!id_start.contains('1')); |
2544 | | /// assert!(!id_start.contains('_')); |
2545 | | /// assert!(id_start.contains('ߝ')); // U+07DD NKO LETTER FA |
2546 | | /// assert!(!id_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X |
2547 | | /// assert!(id_start.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM |
2548 | | /// ``` |
2549 | | } |
2550 | | |
2551 | | make_binary_property! { |
2552 | | name: "Ids_Binary_Operator"; |
2553 | | short_name: "IDSB"; |
2554 | | ident: IdsBinaryOperator; |
2555 | | data_marker: crate::provider::PropertyBinaryIdsBinaryOperatorV1; |
2556 | | singleton: SINGLETON_PROPERTY_BINARY_IDS_BINARY_OPERATOR_V1; |
2557 | | /// Characters used in Ideographic Description Sequences. |
2558 | | /// |
2559 | | /// # Example |
2560 | | /// |
2561 | | /// ``` |
2562 | | /// use icu::properties::CodePointSetData; |
2563 | | /// use icu::properties::props::IdsBinaryOperator; |
2564 | | /// |
2565 | | /// let ids_binary_operator = CodePointSetData::new::<IdsBinaryOperator>(); |
2566 | | /// |
2567 | | /// assert!(ids_binary_operator.contains('\u{2FF5}')); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE |
2568 | | /// assert!(!ids_binary_operator.contains('\u{3006}')); // IDEOGRAPHIC CLOSING MARK |
2569 | | /// ``` |
2570 | | } |
2571 | | |
2572 | | make_binary_property! { |
2573 | | name: "Ids_Trinary_Operator"; |
2574 | | short_name: "IDST"; |
2575 | | ident: IdsTrinaryOperator; |
2576 | | data_marker: crate::provider::PropertyBinaryIdsTrinaryOperatorV1; |
2577 | | singleton: SINGLETON_PROPERTY_BINARY_IDS_TRINARY_OPERATOR_V1; |
2578 | | /// Characters used in Ideographic Description Sequences. |
2579 | | /// |
2580 | | /// # Example |
2581 | | /// |
2582 | | /// ``` |
2583 | | /// use icu::properties::CodePointSetData; |
2584 | | /// use icu::properties::props::IdsTrinaryOperator; |
2585 | | /// |
2586 | | /// let ids_trinary_operator = CodePointSetData::new::<IdsTrinaryOperator>(); |
2587 | | /// |
2588 | | /// assert!(ids_trinary_operator.contains('\u{2FF2}')); // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT |
2589 | | /// assert!(ids_trinary_operator.contains('\u{2FF3}')); // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW |
2590 | | /// assert!(!ids_trinary_operator.contains('\u{2FF4}')); |
2591 | | /// assert!(!ids_trinary_operator.contains('\u{2FF5}')); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE |
2592 | | /// assert!(!ids_trinary_operator.contains('\u{3006}')); // IDEOGRAPHIC CLOSING MARK |
2593 | | /// ``` |
2594 | | } |
2595 | | |
2596 | | make_binary_property! { |
2597 | | name: "IDS_Unary_Operator"; |
2598 | | short_name: "IDSU"; |
2599 | | ident: IdsUnaryOperator; |
2600 | | data_marker: crate::provider::PropertyBinaryIdsUnaryOperatorV1; |
2601 | | singleton: SINGLETON_PROPERTY_BINARY_IDS_UNARY_OPERATOR_V1; |
2602 | | /// IDS_Unary_Operator Property |
2603 | | } |
2604 | | |
2605 | | make_binary_property! { |
2606 | | name: "Join_Control"; |
2607 | | short_name: "Join_C"; |
2608 | | ident: JoinControl; |
2609 | | data_marker: crate::provider::PropertyBinaryJoinControlV1; |
2610 | | singleton: SINGLETON_PROPERTY_BINARY_JOIN_CONTROL_V1; |
2611 | | /// Format control characters which have specific functions for control of cursive joining |
2612 | | /// and ligation. |
2613 | | /// |
2614 | | /// # Example |
2615 | | /// |
2616 | | /// ``` |
2617 | | /// use icu::properties::CodePointSetData; |
2618 | | /// use icu::properties::props::JoinControl; |
2619 | | /// |
2620 | | /// let join_control = CodePointSetData::new::<JoinControl>(); |
2621 | | /// |
2622 | | /// assert!(join_control.contains('\u{200C}')); // ZERO WIDTH NON-JOINER |
2623 | | /// assert!(join_control.contains('\u{200D}')); // ZERO WIDTH JOINER |
2624 | | /// assert!(!join_control.contains('\u{200E}')); |
2625 | | /// ``` |
2626 | | } |
2627 | | |
2628 | | make_binary_property! { |
2629 | | name: "Logical_Order_Exception"; |
2630 | | short_name: "LOE"; |
2631 | | ident: LogicalOrderException; |
2632 | | data_marker: crate::provider::PropertyBinaryLogicalOrderExceptionV1; |
2633 | | singleton: SINGLETON_PROPERTY_BINARY_LOGICAL_ORDER_EXCEPTION_V1; |
2634 | | /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao. |
2635 | | /// |
2636 | | /// # Example |
2637 | | /// |
2638 | | /// ``` |
2639 | | /// use icu::properties::CodePointSetData; |
2640 | | /// use icu::properties::props::LogicalOrderException; |
2641 | | /// |
2642 | | /// let logical_order_exception = CodePointSetData::new::<LogicalOrderException>(); |
2643 | | /// |
2644 | | /// assert!(logical_order_exception.contains('ແ')); // U+0EC1 LAO VOWEL SIGN EI |
2645 | | /// assert!(!logical_order_exception.contains('ະ')); // U+0EB0 LAO VOWEL SIGN A |
2646 | | /// ``` |
2647 | | } |
2648 | | |
2649 | | make_binary_property! { |
2650 | | name: "Lowercase"; |
2651 | | short_name: "Lower"; |
2652 | | ident: Lowercase; |
2653 | | data_marker: crate::provider::PropertyBinaryLowercaseV1; |
2654 | | singleton: SINGLETON_PROPERTY_BINARY_LOWERCASE_V1; |
2655 | | /// Lowercase characters. |
2656 | | /// |
2657 | | /// # Example |
2658 | | /// |
2659 | | /// ``` |
2660 | | /// use icu::properties::CodePointSetData; |
2661 | | /// use icu::properties::props::Lowercase; |
2662 | | /// |
2663 | | /// let lowercase = CodePointSetData::new::<Lowercase>(); |
2664 | | /// |
2665 | | /// assert!(lowercase.contains('a')); |
2666 | | /// assert!(!lowercase.contains('A')); |
2667 | | /// ``` |
2668 | | } |
2669 | | |
2670 | | make_binary_property! { |
2671 | | name: "Math"; |
2672 | | short_name: "Math"; |
2673 | | ident: Math; |
2674 | | data_marker: crate::provider::PropertyBinaryMathV1; |
2675 | | singleton: SINGLETON_PROPERTY_BINARY_MATH_V1; |
2676 | | /// Characters used in mathematical notation. |
2677 | | /// |
2678 | | /// # Example |
2679 | | /// |
2680 | | /// ``` |
2681 | | /// use icu::properties::CodePointSetData; |
2682 | | /// use icu::properties::props::Math; |
2683 | | /// |
2684 | | /// let math = CodePointSetData::new::<Math>(); |
2685 | | /// |
2686 | | /// assert!(math.contains('=')); |
2687 | | /// assert!(math.contains('+')); |
2688 | | /// assert!(!math.contains('-')); |
2689 | | /// assert!(math.contains('−')); // U+2212 MINUS SIGN |
2690 | | /// assert!(!math.contains('/')); |
2691 | | /// assert!(math.contains('∕')); // U+2215 DIVISION SLASH |
2692 | | /// ``` |
2693 | | } |
2694 | | |
2695 | | make_binary_property! { |
2696 | | name: "Modifier_Combining_Mark"; |
2697 | | short_name: "MCM"; |
2698 | | ident: ModifierCombiningMark; |
2699 | | data_marker: crate::provider::PropertyBinaryModifierCombiningMarkV1; |
2700 | | singleton: SINGLETON_PROPERTY_BINARY_MODIFIER_COMBINING_MARK_V1; |
2701 | | /// Modifier_Combining_Mark Property |
2702 | | } |
2703 | | |
2704 | | make_binary_property! { |
2705 | | name: "Noncharacter_Code_Point"; |
2706 | | short_name: "NChar"; |
2707 | | ident: NoncharacterCodePoint; |
2708 | | data_marker: crate::provider::PropertyBinaryNoncharacterCodePointV1; |
2709 | | singleton: SINGLETON_PROPERTY_BINARY_NONCHARACTER_CODE_POINT_V1; |
2710 | | /// Code points permanently reserved for internal use. |
2711 | | /// |
2712 | | /// # Example |
2713 | | /// |
2714 | | /// ``` |
2715 | | /// use icu::properties::CodePointSetData; |
2716 | | /// use icu::properties::props::NoncharacterCodePoint; |
2717 | | /// |
2718 | | /// let noncharacter_code_point = CodePointSetData::new::<NoncharacterCodePoint>(); |
2719 | | /// |
2720 | | /// assert!(noncharacter_code_point.contains('\u{FDD0}')); |
2721 | | /// assert!(noncharacter_code_point.contains('\u{FFFF}')); |
2722 | | /// assert!(!noncharacter_code_point.contains('\u{10000}')); |
2723 | | /// ``` |
2724 | | } |
2725 | | |
2726 | | make_binary_property! { |
2727 | | name: "NFC_Inert"; |
2728 | | short_name: "NFC_Inert"; |
2729 | | ident: NfcInert; |
2730 | | data_marker: crate::provider::PropertyBinaryNfcInertV1; |
2731 | | singleton: SINGLETON_PROPERTY_BINARY_NFC_INERT_V1; |
2732 | | /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters. |
2733 | | } |
2734 | | |
2735 | | make_binary_property! { |
2736 | | name: "NFD_Inert"; |
2737 | | short_name: "NFD_Inert"; |
2738 | | ident: NfdInert; |
2739 | | data_marker: crate::provider::PropertyBinaryNfdInertV1; |
2740 | | singleton: SINGLETON_PROPERTY_BINARY_NFD_INERT_V1; |
2741 | | /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters. |
2742 | | } |
2743 | | |
2744 | | make_binary_property! { |
2745 | | name: "NFKC_Inert"; |
2746 | | short_name: "NFKC_Inert"; |
2747 | | ident: NfkcInert; |
2748 | | data_marker: crate::provider::PropertyBinaryNfkcInertV1; |
2749 | | singleton: SINGLETON_PROPERTY_BINARY_NFKC_INERT_V1; |
2750 | | /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters. |
2751 | | } |
2752 | | |
2753 | | make_binary_property! { |
2754 | | name: "NFKD_Inert"; |
2755 | | short_name: "NFKD_Inert"; |
2756 | | ident: NfkdInert; |
2757 | | data_marker: crate::provider::PropertyBinaryNfkdInertV1; |
2758 | | singleton: SINGLETON_PROPERTY_BINARY_NFKD_INERT_V1; |
2759 | | /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters. |
2760 | | } |
2761 | | |
2762 | | make_binary_property! { |
2763 | | name: "Pattern_Syntax"; |
2764 | | short_name: "Pat_Syn"; |
2765 | | ident: PatternSyntax; |
2766 | | data_marker: crate::provider::PropertyBinaryPatternSyntaxV1; |
2767 | | singleton: SINGLETON_PROPERTY_BINARY_PATTERN_SYNTAX_V1; |
2768 | | /// Characters used as syntax in patterns (such as regular expressions). |
2769 | | /// |
2770 | | /// See [`Unicode |
2771 | | /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more |
2772 | | /// details. |
2773 | | /// |
2774 | | /// # Example |
2775 | | /// |
2776 | | /// ``` |
2777 | | /// use icu::properties::CodePointSetData; |
2778 | | /// use icu::properties::props::PatternSyntax; |
2779 | | /// |
2780 | | /// let pattern_syntax = CodePointSetData::new::<PatternSyntax>(); |
2781 | | /// |
2782 | | /// assert!(pattern_syntax.contains('{')); |
2783 | | /// assert!(pattern_syntax.contains('⇒')); // U+21D2 RIGHTWARDS DOUBLE ARROW |
2784 | | /// assert!(!pattern_syntax.contains('0')); |
2785 | | /// ``` |
2786 | | } |
2787 | | |
2788 | | make_binary_property! { |
2789 | | name: "Pattern_White_Space"; |
2790 | | short_name: "Pat_WS"; |
2791 | | ident: PatternWhiteSpace; |
2792 | | data_marker: crate::provider::PropertyBinaryPatternWhiteSpaceV1; |
2793 | | singleton: SINGLETON_PROPERTY_BINARY_PATTERN_WHITE_SPACE_V1; |
2794 | | /// Characters used as whitespace in patterns (such as regular expressions). |
2795 | | /// |
2796 | | /// See |
2797 | | /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for |
2798 | | /// more details. |
2799 | | /// |
2800 | | /// # Example |
2801 | | /// |
2802 | | /// ``` |
2803 | | /// use icu::properties::CodePointSetData; |
2804 | | /// use icu::properties::props::PatternWhiteSpace; |
2805 | | /// |
2806 | | /// let pattern_white_space = CodePointSetData::new::<PatternWhiteSpace>(); |
2807 | | /// |
2808 | | /// assert!(pattern_white_space.contains(' ')); |
2809 | | /// assert!(pattern_white_space.contains('\u{2029}')); // PARAGRAPH SEPARATOR |
2810 | | /// assert!(pattern_white_space.contains('\u{000A}')); // NEW LINE |
2811 | | /// assert!(!pattern_white_space.contains('\u{00A0}')); // NO-BREAK SPACE |
2812 | | /// ``` |
2813 | | } |
2814 | | |
2815 | | make_binary_property! { |
2816 | | name: "Prepended_Concatenation_Mark"; |
2817 | | short_name: "PCM"; |
2818 | | ident: PrependedConcatenationMark; |
2819 | | data_marker: crate::provider::PropertyBinaryPrependedConcatenationMarkV1; |
2820 | | singleton: SINGLETON_PROPERTY_BINARY_PREPENDED_CONCATENATION_MARK_V1; |
2821 | | /// A small class of visible format controls, which precede and then span a sequence of |
2822 | | /// other characters, usually digits. |
2823 | | } |
2824 | | |
2825 | | make_binary_property! { |
2826 | | name: "Print"; |
2827 | | short_name: "Print"; |
2828 | | ident: Print; |
2829 | | data_marker: crate::provider::PropertyBinaryPrintV1; |
2830 | | singleton: SINGLETON_PROPERTY_BINARY_PRINT_V1; |
2831 | | /// Printable characters (visible characters and whitespace). |
2832 | | /// |
2833 | | /// This is defined for POSIX compatibility. |
2834 | | } |
2835 | | |
2836 | | make_binary_property! { |
2837 | | name: "Quotation_Mark"; |
2838 | | short_name: "QMark"; |
2839 | | ident: QuotationMark; |
2840 | | data_marker: crate::provider::PropertyBinaryQuotationMarkV1; |
2841 | | singleton: SINGLETON_PROPERTY_BINARY_QUOTATION_MARK_V1; |
2842 | | /// Punctuation characters that function as quotation marks. |
2843 | | /// |
2844 | | /// # Example |
2845 | | /// |
2846 | | /// ``` |
2847 | | /// use icu::properties::CodePointSetData; |
2848 | | /// use icu::properties::props::QuotationMark; |
2849 | | /// |
2850 | | /// let quotation_mark = CodePointSetData::new::<QuotationMark>(); |
2851 | | /// |
2852 | | /// assert!(quotation_mark.contains('\'')); |
2853 | | /// assert!(quotation_mark.contains('„')); // U+201E DOUBLE LOW-9 QUOTATION MARK |
2854 | | /// assert!(!quotation_mark.contains('<')); |
2855 | | /// ``` |
2856 | | } |
2857 | | |
2858 | | make_binary_property! { |
2859 | | name: "Radical"; |
2860 | | short_name: "Radical"; |
2861 | | ident: Radical; |
2862 | | data_marker: crate::provider::PropertyBinaryRadicalV1; |
2863 | | singleton: SINGLETON_PROPERTY_BINARY_RADICAL_V1; |
2864 | | /// Characters used in the definition of Ideographic Description Sequences. |
2865 | | /// |
2866 | | /// # Example |
2867 | | /// |
2868 | | /// ``` |
2869 | | /// use icu::properties::CodePointSetData; |
2870 | | /// use icu::properties::props::Radical; |
2871 | | /// |
2872 | | /// let radical = CodePointSetData::new::<Radical>(); |
2873 | | /// |
2874 | | /// assert!(radical.contains('⺆')); // U+2E86 CJK RADICAL BOX |
2875 | | /// assert!(!radical.contains('丹')); // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E |
2876 | | /// ``` |
2877 | | } |
2878 | | |
2879 | | make_binary_property! { |
2880 | | name: "Regional_Indicator"; |
2881 | | short_name: "RI"; |
2882 | | ident: RegionalIndicator; |
2883 | | data_marker: crate::provider::PropertyBinaryRegionalIndicatorV1; |
2884 | | singleton: SINGLETON_PROPERTY_BINARY_REGIONAL_INDICATOR_V1; |
2885 | | /// Regional indicator characters, `U+1F1E6..U+1F1FF`. |
2886 | | /// |
2887 | | /// # Example |
2888 | | /// |
2889 | | /// ``` |
2890 | | /// use icu::properties::CodePointSetData; |
2891 | | /// use icu::properties::props::RegionalIndicator; |
2892 | | /// |
2893 | | /// let regional_indicator = CodePointSetData::new::<RegionalIndicator>(); |
2894 | | /// |
2895 | | /// assert!(regional_indicator.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T |
2896 | | /// assert!(!regional_indicator.contains('Ⓣ')); // U+24C9 CIRCLED LATIN CAPITAL LETTER T |
2897 | | /// assert!(!regional_indicator.contains('T')); |
2898 | | /// ``` |
2899 | | } |
2900 | | |
2901 | | make_binary_property! { |
2902 | | name: "Soft_Dotted"; |
2903 | | short_name: "SD"; |
2904 | | ident: SoftDotted; |
2905 | | data_marker: crate::provider::PropertyBinarySoftDottedV1; |
2906 | | singleton: SINGLETON_PROPERTY_BINARY_SOFT_DOTTED_V1; |
2907 | | /// Characters with a "soft dot", like i or j. |
2908 | | /// |
2909 | | /// An accent placed on these characters causes |
2910 | | /// the dot to disappear. |
2911 | | /// |
2912 | | /// # Example |
2913 | | /// |
2914 | | /// ``` |
2915 | | /// use icu::properties::CodePointSetData; |
2916 | | /// use icu::properties::props::SoftDotted; |
2917 | | /// |
2918 | | /// let soft_dotted = CodePointSetData::new::<SoftDotted>(); |
2919 | | /// |
2920 | | /// assert!(soft_dotted.contains('і')); //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I |
2921 | | /// assert!(!soft_dotted.contains('ı')); // U+0131 LATIN SMALL LETTER DOTLESS I |
2922 | | /// ``` |
2923 | | } |
2924 | | |
2925 | | make_binary_property! { |
2926 | | name: "Segment_Starter"; |
2927 | | short_name: "Segment_Starter"; |
2928 | | ident: SegmentStarter; |
2929 | | data_marker: crate::provider::PropertyBinarySegmentStarterV1; |
2930 | | singleton: SINGLETON_PROPERTY_BINARY_SEGMENT_STARTER_V1; |
2931 | | /// Characters that are starters in terms of Unicode normalization and combining character |
2932 | | /// sequences. |
2933 | | } |
2934 | | |
2935 | | make_binary_property! { |
2936 | | name: "Case_Sensitive"; |
2937 | | short_name: "Case_Sensitive"; |
2938 | | ident: CaseSensitive; |
2939 | | data_marker: crate::provider::PropertyBinaryCaseSensitiveV1; |
2940 | | singleton: SINGLETON_PROPERTY_BINARY_CASE_SENSITIVE_V1; |
2941 | | /// Characters that are either the source of a case mapping or in the target of a case |
2942 | | /// mapping. |
2943 | | } |
2944 | | |
2945 | | make_binary_property! { |
2946 | | name: "Sentence_Terminal"; |
2947 | | short_name: "STerm"; |
2948 | | ident: SentenceTerminal; |
2949 | | data_marker: crate::provider::PropertyBinarySentenceTerminalV1; |
2950 | | singleton: SINGLETON_PROPERTY_BINARY_SENTENCE_TERMINAL_V1; |
2951 | | /// Punctuation characters that generally mark the end of sentences. |
2952 | | /// |
2953 | | /// # Example |
2954 | | /// |
2955 | | /// ``` |
2956 | | /// use icu::properties::CodePointSetData; |
2957 | | /// use icu::properties::props::SentenceTerminal; |
2958 | | /// |
2959 | | /// let sentence_terminal = CodePointSetData::new::<SentenceTerminal>(); |
2960 | | /// |
2961 | | /// assert!(sentence_terminal.contains('.')); |
2962 | | /// assert!(sentence_terminal.contains('?')); |
2963 | | /// assert!(sentence_terminal.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN |
2964 | | /// assert!(!sentence_terminal.contains(',')); |
2965 | | /// assert!(!sentence_terminal.contains('¿')); // U+00BF INVERTED QUESTION MARK |
2966 | | /// ``` |
2967 | | } |
2968 | | |
2969 | | make_binary_property! { |
2970 | | name: "Terminal_Punctuation"; |
2971 | | short_name: "Term"; |
2972 | | ident: TerminalPunctuation; |
2973 | | data_marker: crate::provider::PropertyBinaryTerminalPunctuationV1; |
2974 | | singleton: SINGLETON_PROPERTY_BINARY_TERMINAL_PUNCTUATION_V1; |
2975 | | /// Punctuation characters that generally mark the end of textual units. |
2976 | | /// |
2977 | | /// # Example |
2978 | | /// |
2979 | | /// ``` |
2980 | | /// use icu::properties::CodePointSetData; |
2981 | | /// use icu::properties::props::TerminalPunctuation; |
2982 | | /// |
2983 | | /// let terminal_punctuation = CodePointSetData::new::<TerminalPunctuation>(); |
2984 | | /// |
2985 | | /// assert!(terminal_punctuation.contains('.')); |
2986 | | /// assert!(terminal_punctuation.contains('?')); |
2987 | | /// assert!(terminal_punctuation.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN |
2988 | | /// assert!(terminal_punctuation.contains(',')); |
2989 | | /// assert!(!terminal_punctuation.contains('¿')); // U+00BF INVERTED QUESTION MARK |
2990 | | /// ``` |
2991 | | } |
2992 | | |
2993 | | make_binary_property! { |
2994 | | name: "Unified_Ideograph"; |
2995 | | short_name: "UIdeo"; |
2996 | | ident: UnifiedIdeograph; |
2997 | | data_marker: crate::provider::PropertyBinaryUnifiedIdeographV1; |
2998 | | singleton: SINGLETON_PROPERTY_BINARY_UNIFIED_IDEOGRAPH_V1; |
2999 | | /// A property which specifies the exact set of Unified CJK Ideographs in the standard. |
3000 | | /// |
3001 | | /// # Example |
3002 | | /// |
3003 | | /// ``` |
3004 | | /// use icu::properties::CodePointSetData; |
3005 | | /// use icu::properties::props::UnifiedIdeograph; |
3006 | | /// |
3007 | | /// let unified_ideograph = CodePointSetData::new::<UnifiedIdeograph>(); |
3008 | | /// |
3009 | | /// assert!(unified_ideograph.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD |
3010 | | /// assert!(unified_ideograph.contains('木')); // U+6728 CJK UNIFIED IDEOGRAPH-6728 |
3011 | | /// assert!(!unified_ideograph.contains('𛅸')); // U+1B178 NUSHU CHARACTER-1B178 |
3012 | | /// ``` |
3013 | | } |
3014 | | |
3015 | | make_binary_property! { |
3016 | | name: "Uppercase"; |
3017 | | short_name: "Upper"; |
3018 | | ident: Uppercase; |
3019 | | data_marker: crate::provider::PropertyBinaryUppercaseV1; |
3020 | | singleton: SINGLETON_PROPERTY_BINARY_UPPERCASE_V1; |
3021 | | /// Uppercase characters. |
3022 | | /// |
3023 | | /// # Example |
3024 | | /// |
3025 | | /// ``` |
3026 | | /// use icu::properties::CodePointSetData; |
3027 | | /// use icu::properties::props::Uppercase; |
3028 | | /// |
3029 | | /// let uppercase = CodePointSetData::new::<Uppercase>(); |
3030 | | /// |
3031 | | /// assert!(uppercase.contains('U')); |
3032 | | /// assert!(!uppercase.contains('u')); |
3033 | | /// ``` |
3034 | | } |
3035 | | |
3036 | | make_binary_property! { |
3037 | | name: "Variation_Selector"; |
3038 | | short_name: "VS"; |
3039 | | ident: VariationSelector; |
3040 | | data_marker: crate::provider::PropertyBinaryVariationSelectorV1; |
3041 | | singleton: SINGLETON_PROPERTY_BINARY_VARIATION_SELECTOR_V1; |
3042 | | /// Characters that are Variation Selectors. |
3043 | | /// |
3044 | | /// # Example |
3045 | | /// |
3046 | | /// ``` |
3047 | | /// use icu::properties::CodePointSetData; |
3048 | | /// use icu::properties::props::VariationSelector; |
3049 | | /// |
3050 | | /// let variation_selector = CodePointSetData::new::<VariationSelector>(); |
3051 | | /// |
3052 | | /// assert!(variation_selector.contains('\u{180D}')); // MONGOLIAN FREE VARIATION SELECTOR THREE |
3053 | | /// assert!(!variation_selector.contains('\u{303E}')); // IDEOGRAPHIC VARIATION INDICATOR |
3054 | | /// assert!(variation_selector.contains('\u{FE0F}')); // VARIATION SELECTOR-16 |
3055 | | /// assert!(!variation_selector.contains('\u{FE10}')); // PRESENTATION FORM FOR VERTICAL COMMA |
3056 | | /// assert!(variation_selector.contains('\u{E01EF}')); // VARIATION SELECTOR-256 |
3057 | | /// ``` |
3058 | | } |
3059 | | |
3060 | | make_binary_property! { |
3061 | | name: "White_Space"; |
3062 | | short_name: "space"; |
3063 | | ident: WhiteSpace; |
3064 | | data_marker: crate::provider::PropertyBinaryWhiteSpaceV1; |
3065 | | singleton: SINGLETON_PROPERTY_BINARY_WHITE_SPACE_V1; |
3066 | | /// Spaces, separator characters and other control characters which should be treated by |
3067 | | /// programming languages as "white space" for the purpose of parsing elements. |
3068 | | /// |
3069 | | /// # Example |
3070 | | /// |
3071 | | /// ``` |
3072 | | /// use icu::properties::CodePointSetData; |
3073 | | /// use icu::properties::props::WhiteSpace; |
3074 | | /// |
3075 | | /// let white_space = CodePointSetData::new::<WhiteSpace>(); |
3076 | | /// |
3077 | | /// assert!(white_space.contains(' ')); |
3078 | | /// assert!(white_space.contains('\u{000A}')); // NEW LINE |
3079 | | /// assert!(white_space.contains('\u{00A0}')); // NO-BREAK SPACE |
3080 | | /// assert!(!white_space.contains('\u{200B}')); // ZERO WIDTH SPACE |
3081 | | /// ``` |
3082 | | } |
3083 | | |
3084 | | make_binary_property! { |
3085 | | name: "Xdigit"; |
3086 | | short_name: "Xdigit"; |
3087 | | ident: Xdigit; |
3088 | | data_marker: crate::provider::PropertyBinaryXdigitV1; |
3089 | | singleton: SINGLETON_PROPERTY_BINARY_XDIGIT_V1; |
3090 | | /// Hexadecimal digits |
3091 | | /// |
3092 | | /// This is defined for POSIX compatibility. |
3093 | | } |
3094 | | |
3095 | | make_binary_property! { |
3096 | | name: "XID_Continue"; |
3097 | | short_name: "XIDC"; |
3098 | | ident: XidContinue; |
3099 | | data_marker: crate::provider::PropertyBinaryXidContinueV1; |
3100 | | singleton: SINGLETON_PROPERTY_BINARY_XID_CONTINUE_V1; |
3101 | | /// Characters that can come after the first character in an identifier. |
3102 | | /// |
3103 | | /// See [`Unicode Standard Annex |
3104 | | /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details. |
3105 | | /// |
3106 | | /// # Example |
3107 | | /// |
3108 | | /// ``` |
3109 | | /// use icu::properties::CodePointSetData; |
3110 | | /// use icu::properties::props::XidContinue; |
3111 | | /// |
3112 | | /// let xid_continue = CodePointSetData::new::<XidContinue>(); |
3113 | | /// |
3114 | | /// assert!(xid_continue.contains('x')); |
3115 | | /// assert!(xid_continue.contains('1')); |
3116 | | /// assert!(xid_continue.contains('_')); |
3117 | | /// assert!(xid_continue.contains('ߝ')); // U+07DD NKO LETTER FA |
3118 | | /// assert!(!xid_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X |
3119 | | /// assert!(!xid_continue.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM |
3120 | | /// ``` |
3121 | | } |
3122 | | |
3123 | | make_binary_property! { |
3124 | | name: "XID_Start"; |
3125 | | short_name: "XIDS"; |
3126 | | ident: XidStart; |
3127 | | data_marker: crate::provider::PropertyBinaryXidStartV1; |
3128 | | singleton: SINGLETON_PROPERTY_BINARY_XID_START_V1; |
3129 | | /// Characters that can begin an identifier. |
3130 | | /// |
3131 | | /// See [`Unicode |
3132 | | /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more |
3133 | | /// details. |
3134 | | /// |
3135 | | /// # Example |
3136 | | /// |
3137 | | /// ``` |
3138 | | /// use icu::properties::CodePointSetData; |
3139 | | /// use icu::properties::props::XidStart; |
3140 | | /// |
3141 | | /// let xid_start = CodePointSetData::new::<XidStart>(); |
3142 | | /// |
3143 | | /// assert!(xid_start.contains('x')); |
3144 | | /// assert!(!xid_start.contains('1')); |
3145 | | /// assert!(!xid_start.contains('_')); |
3146 | | /// assert!(xid_start.contains('ߝ')); // U+07DD NKO LETTER FA |
3147 | | /// assert!(!xid_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X |
3148 | | /// assert!(!xid_start.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM |
3149 | | /// ``` |
3150 | | } |
3151 | | |
3152 | | pub use crate::emoji::EmojiSet; |
3153 | | |
3154 | | macro_rules! make_emoji_set { |
3155 | | ( |
3156 | | ident: $ident:ident; |
3157 | | data_marker: $data_marker:ty; |
3158 | | singleton: $singleton:ident; |
3159 | | $(#[$doc:meta])+ |
3160 | | ) => { |
3161 | | $(#[$doc])+ |
3162 | | #[derive(Debug)] |
3163 | | #[non_exhaustive] |
3164 | | pub struct $ident; |
3165 | | |
3166 | | impl crate::private::Sealed for $ident {} |
3167 | | |
3168 | | impl EmojiSet for $ident { |
3169 | | type DataMarker = $data_marker; |
3170 | | #[cfg(feature = "compiled_data")] |
3171 | | const SINGLETON: &'static crate::provider::PropertyUnicodeSet<'static> = |
3172 | | &crate::provider::Baked::$singleton; |
3173 | | } |
3174 | | } |
3175 | | } |
3176 | | |
3177 | | make_emoji_set! { |
3178 | | ident: BasicEmoji; |
3179 | | data_marker: crate::provider::PropertyBinaryBasicEmojiV1; |
3180 | | singleton: SINGLETON_PROPERTY_BINARY_BASIC_EMOJI_V1; |
3181 | | /// Characters and character sequences intended for general-purpose, independent, direct input. |
3182 | | /// |
3183 | | /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more |
3184 | | /// details. |
3185 | | /// |
3186 | | /// # Example |
3187 | | /// |
3188 | | /// ``` |
3189 | | /// use icu::properties::EmojiSetData; |
3190 | | /// use icu::properties::props::BasicEmoji; |
3191 | | /// |
3192 | | /// let basic_emoji = EmojiSetData::new::<BasicEmoji>(); |
3193 | | /// |
3194 | | /// assert!(!basic_emoji.contains('\u{0020}')); |
3195 | | /// assert!(!basic_emoji.contains('\n')); |
3196 | | /// assert!(basic_emoji.contains('🦃')); // U+1F983 TURKEY |
3197 | | /// assert!(basic_emoji.contains_str("\u{1F983}")); |
3198 | | /// assert!(basic_emoji.contains_str("\u{1F6E4}\u{FE0F}")); // railway track |
3199 | | /// assert!(!basic_emoji.contains_str("\u{0033}\u{FE0F}\u{20E3}")); // Emoji_Keycap_Sequence, keycap 3 |
3200 | | /// ``` |
3201 | | } |
3202 | | |
3203 | | #[cfg(test)] |
3204 | | mod test_enumerated_property_completeness { |
3205 | | use super::*; |
3206 | | use std::collections::BTreeMap; |
3207 | | |
3208 | | fn check_enum<'a, T: NamedEnumeratedProperty>( |
3209 | | lookup: &crate::provider::names::PropertyValueNameToEnumMap<'static>, |
3210 | | consts: impl IntoIterator<Item = &'a T>, |
3211 | | ) where |
3212 | | u16: From<T>, |
3213 | | { |
3214 | | let mut data: BTreeMap<_, _> = lookup |
3215 | | .map |
3216 | | .iter() |
3217 | | .map(|(name, value)| (value, (name, "Data"))) |
3218 | | .collect(); |
3219 | | |
3220 | | let names = crate::PropertyNamesLong::<T>::new(); |
3221 | | let consts = consts.into_iter().map(|value| { |
3222 | | ( |
3223 | | u16::from(*value) as usize, |
3224 | | ( |
3225 | | names.get(*value).unwrap_or("<unknown>").to_string(), |
3226 | | "Consts", |
3227 | | ), |
3228 | | ) |
3229 | | }); |
3230 | | |
3231 | | let mut diff = Vec::new(); |
3232 | | for t @ (value, _) in consts { |
3233 | | if data.remove(&value).is_none() { |
3234 | | diff.push(t); |
3235 | | } |
3236 | | } |
3237 | | diff.extend(data); |
3238 | | |
3239 | | let mut fmt_diff = String::new(); |
3240 | | for (value, (name, source)) in diff { |
3241 | | fmt_diff.push_str(&format!("{source}:\t{name} = {value:?}\n")); |
3242 | | } |
3243 | | |
3244 | | assert!( |
3245 | | fmt_diff.is_empty(), |
3246 | | "Values defined in data do not match values defined in consts. Difference:\n{fmt_diff}" |
3247 | | ); |
3248 | | } |
3249 | | |
3250 | | #[test] |
3251 | | fn test_ea() { |
3252 | | check_enum( |
3253 | | crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_EAST_ASIAN_WIDTH_V1, |
3254 | | EastAsianWidth::ALL_VALUES, |
3255 | | ); |
3256 | | } |
3257 | | |
3258 | | #[test] |
3259 | | fn test_ccc() { |
3260 | | check_enum( |
3261 | | crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_CANONICAL_COMBINING_CLASS_V1, |
3262 | | CanonicalCombiningClass::ALL_VALUES, |
3263 | | ); |
3264 | | } |
3265 | | |
3266 | | #[test] |
3267 | | fn test_jt() { |
3268 | | check_enum( |
3269 | | crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_JOINING_TYPE_V1, |
3270 | | JoiningType::ALL_VALUES, |
3271 | | ); |
3272 | | } |
3273 | | |
3274 | | #[test] |
3275 | | fn test_insc() { |
3276 | | check_enum( |
3277 | | crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_INDIC_SYLLABIC_CATEGORY_V1, |
3278 | | IndicSyllabicCategory::ALL_VALUES, |
3279 | | ); |
3280 | | } |
3281 | | |
3282 | | #[test] |
3283 | | fn test_sb() { |
3284 | | check_enum( |
3285 | | crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_SENTENCE_BREAK_V1, |
3286 | | SentenceBreak::ALL_VALUES, |
3287 | | ); |
3288 | | } |
3289 | | |
3290 | | #[test] |
3291 | | fn test_wb() { |
3292 | | check_enum( |
3293 | | crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_WORD_BREAK_V1, |
3294 | | WordBreak::ALL_VALUES, |
3295 | | ); |
3296 | | } |
3297 | | |
3298 | | #[test] |
3299 | | fn test_bc() { |
3300 | | check_enum( |
3301 | | crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_BIDI_CLASS_V1, |
3302 | | BidiClass::ALL_VALUES, |
3303 | | ); |
3304 | | } |
3305 | | |
3306 | | #[test] |
3307 | | fn test_hst() { |
3308 | | check_enum( |
3309 | | crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_HANGUL_SYLLABLE_TYPE_V1, |
3310 | | HangulSyllableType::ALL_VALUES, |
3311 | | ); |
3312 | | } |
3313 | | |
3314 | | #[test] |
3315 | | fn test_vo() { |
3316 | | check_enum( |
3317 | | crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_VERTICAL_ORIENTATION_V1, |
3318 | | VerticalOrientation::ALL_VALUES, |
3319 | | ); |
3320 | | } |
3321 | | } |