Coverage Report

Created: 2025-11-16 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/icu_properties-2.1.1/src/props.rs
Line
Count
Source
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
//! This module defines all available properties.
6
//!
7
//! Properties may be empty marker types and implement [`BinaryProperty`], or enumerations[^1]
8
//! and implement [`EnumeratedProperty`].
9
//!
10
//! [`BinaryProperty`]s are queried through a [`CodePointSetData`](crate::CodePointSetData),
11
//! while [`EnumeratedProperty`]s are queried through [`CodePointMapData`](crate::CodePointMapData).
12
//!
13
//! In addition, some [`EnumeratedProperty`]s also implement [`ParseableEnumeratedProperty`] or
14
//! [`NamedEnumeratedProperty`]. For these properties, [`PropertyParser`](crate::PropertyParser),
15
//! [`PropertyNamesLong`](crate::PropertyNamesLong), and [`PropertyNamesShort`](crate::PropertyNamesShort)
16
//! can be constructed.
17
//!
18
//! [^1]: either Rust `enum`s, or Rust `struct`s with associated constants (open enums)
19
20
pub use crate::names::{NamedEnumeratedProperty, ParseableEnumeratedProperty};
21
22
pub use crate::bidi::{BidiMirroringGlyph, BidiPairedBracketType};
23
24
/// See [`test_enumerated_property_completeness`] for usage.
25
/// Example input:
26
/// ```ignore
27
/// impl EastAsianWidth {
28
///     pub const Neutral: EastAsianWidth = EastAsianWidth(0);
29
///     pub const Ambiguous: EastAsianWidth = EastAsianWidth(1);
30
///     ...
31
/// }
32
/// ```
33
/// Produces `const ALL_VALUES = &[("Neutral", 0u16), ...];` by
34
/// explicitly casting first field of the struct to u16.
35
macro_rules! create_const_array {
36
    (
37
        $ ( #[$meta:meta] )*
38
        impl $enum_ty:ident {
39
            $( $(#[$const_meta:meta])* $v:vis const $i:ident: $t:ty = $e:expr; )*
40
        }
41
    ) => {
42
        $( #[$meta] )*
43
        impl $enum_ty {
44
            $(
45
                $(#[$const_meta])*
46
                $v const $i: $t = $e;
47
            )*
48
49
            /// All possible values of this enum in the Unicode version
50
            /// from this ICU4X release.
51
            pub const ALL_VALUES: &'static [$enum_ty] = &[
52
                $($enum_ty::$i),*
53
            ];
54
        }
55
56
        #[cfg(feature = "datagen")]
57
        impl databake::Bake for $enum_ty {
58
            fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
59
                env.insert("icu_properties");
60
                match *self {
61
                    $(
62
                        Self::$i => databake::quote!(icu_properties::props::$enum_ty::$i),
63
                    )*
64
                    Self(v) => databake::quote!(icu_properties::props::$enum_ty::from_icu4c_value(#v)),
65
                }
66
            }
67
        }
68
69
70
        impl From<$enum_ty> for u16  {
71
0
            fn from(other: $enum_ty) -> Self {
72
0
                other.0 as u16
73
0
            }
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::LineBreak>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::GraphemeClusterBreak>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::HangulSyllableType>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::EastAsianWidth>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::WordBreak>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::IndicConjunctBreak>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::SentenceBreak>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::CanonicalCombiningClass>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::IndicSyllabicCategory>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::JoiningType>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::VerticalOrientation>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::Script>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::BidiClass>>::from
74
        }
75
    }
76
}
77
78
pub use crate::code_point_map::EnumeratedProperty;
79
80
macro_rules! make_enumerated_property {
81
    (
82
        name: $name:literal;
83
        short_name: $short_name:literal;
84
        ident: $value_ty:path;
85
        data_marker: $data_marker:ty;
86
        singleton: $singleton:ident;
87
        $(ule_ty: $ule_ty:ty;)?
88
    ) => {
89
        impl crate::private::Sealed for $value_ty {}
90
91
        impl EnumeratedProperty for $value_ty {
92
            type DataMarker = $data_marker;
93
            #[cfg(feature = "compiled_data")]
94
            const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> =
95
                crate::provider::Baked::$singleton;
96
            const NAME: &'static [u8] = $name.as_bytes();
97
            const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
98
        }
99
100
        $(
101
            impl zerovec::ule::AsULE for $value_ty {
102
                type ULE = $ule_ty;
103
104
0
                fn to_unaligned(self) -> Self::ULE {
105
0
                    self.0.to_unaligned()
106
0
                }
Unexecuted instantiation: <icu_properties::props::Script as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::LineBreak as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::GraphemeClusterBreak as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::HangulSyllableType as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::EastAsianWidth as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::WordBreak as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::CanonicalCombiningClass as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::IndicConjunctBreak as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::SentenceBreak as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::IndicSyllabicCategory as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::JoiningType as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::VerticalOrientation as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::BidiClass as zerovec::ule::AsULE>::to_unaligned
107
9.73M
                fn from_unaligned(unaligned: Self::ULE) -> Self {
108
9.73M
                    Self(zerovec::ule::AsULE::from_unaligned(unaligned))
109
9.73M
                }
Unexecuted instantiation: <icu_properties::props::Script as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::LineBreak as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::GraphemeClusterBreak as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::HangulSyllableType as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::EastAsianWidth as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::WordBreak as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::CanonicalCombiningClass as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::IndicConjunctBreak as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::SentenceBreak as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::IndicSyllabicCategory as zerovec::ule::AsULE>::from_unaligned
<icu_properties::props::JoiningType as zerovec::ule::AsULE>::from_unaligned
Line
Count
Source
107
417k
                fn from_unaligned(unaligned: Self::ULE) -> Self {
108
417k
                    Self(zerovec::ule::AsULE::from_unaligned(unaligned))
109
417k
                }
Unexecuted instantiation: <icu_properties::props::VerticalOrientation as zerovec::ule::AsULE>::from_unaligned
<icu_properties::props::BidiClass as zerovec::ule::AsULE>::from_unaligned
Line
Count
Source
107
9.31M
                fn from_unaligned(unaligned: Self::ULE) -> Self {
108
9.31M
                    Self(zerovec::ule::AsULE::from_unaligned(unaligned))
109
9.31M
                }
110
            }
111
        )?
112
    };
113
}
114
115
/// Enumerated property Bidi_Class
116
///
117
/// These are the categories required by the Unicode Bidirectional Algorithm.
118
/// For the property values, see [Bidirectional Class Values](https://unicode.org/reports/tr44/#Bidi_Class_Values).
119
/// For more information, see [Unicode Standard Annex #9](https://unicode.org/reports/tr41/tr41-28.html#UAX9).
120
///
121
/// # Example
122
///
123
/// ```
124
/// use icu::properties::{props::BidiClass, CodePointMapData};
125
///
126
/// assert_eq!(
127
///     CodePointMapData::<BidiClass>::new().get('y'),
128
///     BidiClass::LeftToRight
129
/// ); // U+0079
130
/// assert_eq!(
131
///     CodePointMapData::<BidiClass>::new().get('ع'),
132
///     BidiClass::ArabicLetter
133
/// ); // U+0639
134
/// ```
135
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
136
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
137
#[allow(clippy::exhaustive_structs)] // newtype
138
#[repr(transparent)]
139
pub struct BidiClass(pub(crate) u8);
140
141
impl BidiClass {
142
    /// Returns an ICU4C `UBidiClass` value.
143
9.27M
    pub const fn to_icu4c_value(self) -> u8 {
144
9.27M
        self.0
145
9.27M
    }
146
    /// Constructor from an ICU4C `UBidiClass` value.
147
0
    pub const fn from_icu4c_value(value: u8) -> Self {
148
0
        Self(value)
149
0
    }
150
}
151
152
create_const_array! {
153
#[allow(non_upper_case_globals)]
154
impl BidiClass {
155
    /// (`L`) any strong left-to-right character
156
    pub const LeftToRight: BidiClass = BidiClass(0);
157
    /// (`R`) any strong right-to-left (non-Arabic-type) character
158
    pub const RightToLeft: BidiClass = BidiClass(1);
159
    /// (`EN`) any ASCII digit or Eastern Arabic-Indic digit
160
    pub const EuropeanNumber: BidiClass = BidiClass(2);
161
    /// (`ES`) plus and minus signs
162
    pub const EuropeanSeparator: BidiClass = BidiClass(3);
163
    /// (`ET`) a terminator in a numeric format context, includes currency signs
164
    pub const EuropeanTerminator: BidiClass = BidiClass(4);
165
    /// (`AN`) any Arabic-Indic digit
166
    pub const ArabicNumber: BidiClass = BidiClass(5);
167
    /// (`CS`) commas, colons, and slashes
168
    pub const CommonSeparator: BidiClass = BidiClass(6);
169
    /// (`B`) various newline characters
170
    pub const ParagraphSeparator: BidiClass = BidiClass(7);
171
    /// (`S`) various segment-related control codes
172
    pub const SegmentSeparator: BidiClass = BidiClass(8);
173
    /// (`WS`) spaces
174
    pub const WhiteSpace: BidiClass = BidiClass(9);
175
    /// (`ON`) most other symbols and punctuation marks
176
    pub const OtherNeutral: BidiClass = BidiClass(10);
177
    /// (`LRE`) U+202A: the LR embedding control
178
    pub const LeftToRightEmbedding: BidiClass = BidiClass(11);
179
    /// (`LRO`) U+202D: the LR override control
180
    pub const LeftToRightOverride: BidiClass = BidiClass(12);
181
    /// (`AL`) any strong right-to-left (Arabic-type) character
182
    pub const ArabicLetter: BidiClass = BidiClass(13);
183
    /// (`RLE`) U+202B: the RL embedding control
184
    pub const RightToLeftEmbedding: BidiClass = BidiClass(14);
185
    /// (`RLO`) U+202E: the RL override control
186
    pub const RightToLeftOverride: BidiClass = BidiClass(15);
187
    /// (`PDF`) U+202C: terminates an embedding or override control
188
    pub const PopDirectionalFormat: BidiClass = BidiClass(16);
189
    /// (`NSM`) any nonspacing mark
190
    pub const NonspacingMark: BidiClass = BidiClass(17);
191
    /// (`BN`) most format characters, control codes, or noncharacters
192
    pub const BoundaryNeutral: BidiClass = BidiClass(18);
193
    /// (`FSI`) U+2068: the first strong isolate control
194
    pub const FirstStrongIsolate: BidiClass = BidiClass(19);
195
    /// (`LRI`) U+2066: the LR isolate control
196
    pub const LeftToRightIsolate: BidiClass = BidiClass(20);
197
    /// (`RLI`) U+2067: the RL isolate control
198
    pub const RightToLeftIsolate: BidiClass = BidiClass(21);
199
    /// (`PDI`) U+2069: terminates an isolate control
200
    pub const PopDirectionalIsolate: BidiClass = BidiClass(22);
201
}
202
}
203
204
make_enumerated_property! {
205
    name: "Bidi_Class";
206
    short_name: "bc";
207
    ident: BidiClass;
208
    data_marker: crate::provider::PropertyEnumBidiClassV1;
209
    singleton: SINGLETON_PROPERTY_ENUM_BIDI_CLASS_V1;
210
    ule_ty: u8;
211
}
212
213
// This exists to encapsulate GeneralCategoryULE so that it can exist in the provider module rather than props
214
pub(crate) mod gc {
215
    /// Enumerated property General_Category.
216
    ///
217
    /// General_Category specifies the most general classification of a code point, usually
218
    /// determined based on the primary characteristic of the assigned character. For example, is the
219
    /// character a letter, a mark, a number, punctuation, or a symbol, and if so, of what type?
220
    ///
221
    /// GeneralCategory only supports specific subcategories (eg `UppercaseLetter`).
222
    /// It does not support grouped categories (eg `Letter`). For grouped categories, use [`GeneralCategoryGroup`](
223
    /// crate::props::GeneralCategoryGroup).
224
    ///
225
    /// # Example
226
    ///
227
    /// ```
228
    /// use icu::properties::{props::GeneralCategory, CodePointMapData};
229
    ///
230
    /// assert_eq!(
231
    ///     CodePointMapData::<GeneralCategory>::new().get('木'),
232
    ///     GeneralCategory::OtherLetter
233
    /// ); // U+6728
234
    /// assert_eq!(
235
    ///     CodePointMapData::<GeneralCategory>::new().get('🎃'),
236
    ///     GeneralCategory::OtherSymbol
237
    /// ); // U+1F383 JACK-O-LANTERN
238
    /// ```
239
    #[derive(Copy, Clone, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)]
240
    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
241
    #[cfg_attr(feature = "datagen", derive(databake::Bake))]
242
    #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
243
    #[allow(clippy::exhaustive_enums)] // this type is stable
244
    #[zerovec::make_ule(GeneralCategoryULE)]
245
    #[cfg_attr(not(feature = "alloc"), zerovec::skip_derive(ZeroMapKV))]
246
    #[repr(u8)]
247
    pub enum GeneralCategory {
248
        /// (`Cn`) A reserved unassigned code point or a noncharacter
249
        Unassigned = 0,
250
251
        /// (`Lu`) An uppercase letter
252
        UppercaseLetter = 1,
253
        /// (`Ll`) A lowercase letter
254
        LowercaseLetter = 2,
255
        /// (`Lt`) A digraphic letter, with first part uppercase
256
        TitlecaseLetter = 3,
257
        /// (`Lm`) A modifier letter
258
        ModifierLetter = 4,
259
        /// (`Lo`) Other letters, including syllables and ideographs
260
        OtherLetter = 5,
261
262
        /// (`Mn`) A nonspacing combining mark (zero advance width)
263
        NonspacingMark = 6,
264
        /// (`Mc`) A spacing combining mark (positive advance width)
265
        SpacingMark = 8,
266
        /// (`Me`) An enclosing combining mark
267
        EnclosingMark = 7,
268
269
        /// (`Nd`) A decimal digit
270
        DecimalNumber = 9,
271
        /// (`Nl`) A letterlike numeric character
272
        LetterNumber = 10,
273
        /// (`No`) A numeric character of other type
274
        OtherNumber = 11,
275
276
        /// (`Zs`) A space character (of various non-zero widths)
277
        SpaceSeparator = 12,
278
        /// (`Zl`) U+2028 LINE SEPARATOR only
279
        LineSeparator = 13,
280
        /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
281
        ParagraphSeparator = 14,
282
283
        /// (`Cc`) A C0 or C1 control code
284
        Control = 15,
285
        /// (`Cf`) A format control character
286
        Format = 16,
287
        /// (`Co`) A private-use character
288
        PrivateUse = 17,
289
        /// (`Cs`) A surrogate code point
290
        Surrogate = 18,
291
292
        /// (`Pd`) A dash or hyphen punctuation mark
293
        DashPunctuation = 19,
294
        /// (`Ps`) An opening punctuation mark (of a pair)
295
        OpenPunctuation = 20,
296
        /// (`Pe`) A closing punctuation mark (of a pair)
297
        ClosePunctuation = 21,
298
        /// (`Pc`) A connecting punctuation mark, like a tie
299
        ConnectorPunctuation = 22,
300
        /// (`Pi`) An initial quotation mark
301
        InitialPunctuation = 28,
302
        /// (`Pf`) A final quotation mark
303
        FinalPunctuation = 29,
304
        /// (`Po`) A punctuation mark of other type
305
        OtherPunctuation = 23,
306
307
        /// (`Sm`) A symbol of mathematical use
308
        MathSymbol = 24,
309
        /// (`Sc`) A currency sign
310
        CurrencySymbol = 25,
311
        /// (`Sk`) A non-letterlike modifier symbol
312
        ModifierSymbol = 26,
313
        /// (`So`) A symbol of other type
314
        OtherSymbol = 27,
315
    }
316
}
317
318
pub use gc::GeneralCategory;
319
320
impl GeneralCategory {
321
    /// All possible values of this enum
322
    pub const ALL_VALUES: &'static [GeneralCategory] = &[
323
        GeneralCategory::Unassigned,
324
        GeneralCategory::UppercaseLetter,
325
        GeneralCategory::LowercaseLetter,
326
        GeneralCategory::TitlecaseLetter,
327
        GeneralCategory::ModifierLetter,
328
        GeneralCategory::OtherLetter,
329
        GeneralCategory::NonspacingMark,
330
        GeneralCategory::SpacingMark,
331
        GeneralCategory::EnclosingMark,
332
        GeneralCategory::DecimalNumber,
333
        GeneralCategory::LetterNumber,
334
        GeneralCategory::OtherNumber,
335
        GeneralCategory::SpaceSeparator,
336
        GeneralCategory::LineSeparator,
337
        GeneralCategory::ParagraphSeparator,
338
        GeneralCategory::Control,
339
        GeneralCategory::Format,
340
        GeneralCategory::PrivateUse,
341
        GeneralCategory::Surrogate,
342
        GeneralCategory::DashPunctuation,
343
        GeneralCategory::OpenPunctuation,
344
        GeneralCategory::ClosePunctuation,
345
        GeneralCategory::ConnectorPunctuation,
346
        GeneralCategory::InitialPunctuation,
347
        GeneralCategory::FinalPunctuation,
348
        GeneralCategory::OtherPunctuation,
349
        GeneralCategory::MathSymbol,
350
        GeneralCategory::CurrencySymbol,
351
        GeneralCategory::ModifierSymbol,
352
        GeneralCategory::OtherSymbol,
353
    ];
354
}
355
356
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)]
357
/// Error value for `impl TryFrom<u8> for GeneralCategory`.
358
#[non_exhaustive]
359
pub struct GeneralCategoryOutOfBoundsError;
360
361
impl TryFrom<u8> for GeneralCategory {
362
    type Error = GeneralCategoryOutOfBoundsError;
363
    /// Construct this [`GeneralCategory`] from an integer, returning
364
    /// an error if it is out of bounds
365
0
    fn try_from(val: u8) -> Result<Self, GeneralCategoryOutOfBoundsError> {
366
0
        GeneralCategory::new_from_u8(val).ok_or(GeneralCategoryOutOfBoundsError)
367
0
    }
368
}
369
370
make_enumerated_property! {
371
    name: "General_Category";
372
    short_name: "gc";
373
    ident: GeneralCategory;
374
    data_marker: crate::provider::PropertyEnumGeneralCategoryV1;
375
    singleton: SINGLETON_PROPERTY_ENUM_GENERAL_CATEGORY_V1;
376
}
377
378
/// Groupings of multiple General_Category property values.
379
///
380
/// Instances of `GeneralCategoryGroup` represent the defined multi-category
381
/// values that are useful for users in certain contexts, such as regex. In
382
/// other words, unlike [`GeneralCategory`], this supports groups of general
383
/// categories: for example, `Letter` /// is the union of `UppercaseLetter`,
384
/// `LowercaseLetter`, etc.
385
///
386
/// See <https://www.unicode.org/reports/tr44/> .
387
///
388
/// The discriminants correspond to the `U_GC_XX_MASK` constants in ICU4C.
389
/// Unlike [`GeneralCategory`], this supports groups of general categories: for example, `Letter`
390
/// is the union of `UppercaseLetter`, `LowercaseLetter`, etc.
391
///
392
/// See `UCharCategory` and `U_GET_GC_MASK` in ICU4C.
393
#[derive(Copy, Clone, PartialEq, Debug, Eq)]
394
#[allow(clippy::exhaustive_structs)] // newtype
395
#[repr(transparent)]
396
pub struct GeneralCategoryGroup(pub(crate) u32);
397
398
impl crate::private::Sealed for GeneralCategoryGroup {}
399
400
use GeneralCategory as GC;
401
use GeneralCategoryGroup as GCG;
402
403
#[allow(non_upper_case_globals)]
404
impl GeneralCategoryGroup {
405
    /// (`Lu`) An uppercase letter
406
    pub const UppercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32));
407
    /// (`Ll`) A lowercase letter
408
    pub const LowercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::LowercaseLetter as u32));
409
    /// (`Lt`) A digraphic letter, with first part uppercase
410
    pub const TitlecaseLetter: GeneralCategoryGroup = GCG(1 << (GC::TitlecaseLetter as u32));
411
    /// (`Lm`) A modifier letter
412
    pub const ModifierLetter: GeneralCategoryGroup = GCG(1 << (GC::ModifierLetter as u32));
413
    /// (`Lo`) Other letters, including syllables and ideographs
414
    pub const OtherLetter: GeneralCategoryGroup = GCG(1 << (GC::OtherLetter as u32));
415
    /// (`LC`) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter
416
    pub const CasedLetter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
417
        | (1 << (GC::LowercaseLetter as u32))
418
        | (1 << (GC::TitlecaseLetter as u32)));
419
    /// (`L`) The union of all letter categories
420
    pub const Letter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
421
        | (1 << (GC::LowercaseLetter as u32))
422
        | (1 << (GC::TitlecaseLetter as u32))
423
        | (1 << (GC::ModifierLetter as u32))
424
        | (1 << (GC::OtherLetter as u32)));
425
426
    /// (`Mn`) A nonspacing combining mark (zero advance width)
427
    pub const NonspacingMark: GeneralCategoryGroup = GCG(1 << (GC::NonspacingMark as u32));
428
    /// (`Mc`) A spacing combining mark (positive advance width)
429
    pub const EnclosingMark: GeneralCategoryGroup = GCG(1 << (GC::EnclosingMark as u32));
430
    /// (`Me`) An enclosing combining mark
431
    pub const SpacingMark: GeneralCategoryGroup = GCG(1 << (GC::SpacingMark as u32));
432
    /// (`M`) The union of all mark categories
433
    pub const Mark: GeneralCategoryGroup = GCG((1 << (GC::NonspacingMark as u32))
434
        | (1 << (GC::EnclosingMark as u32))
435
        | (1 << (GC::SpacingMark as u32)));
436
437
    /// (`Nd`) A decimal digit
438
    pub const DecimalNumber: GeneralCategoryGroup = GCG(1 << (GC::DecimalNumber as u32));
439
    /// (`Nl`) A letterlike numeric character
440
    pub const LetterNumber: GeneralCategoryGroup = GCG(1 << (GC::LetterNumber as u32));
441
    /// (`No`) A numeric character of other type
442
    pub const OtherNumber: GeneralCategoryGroup = GCG(1 << (GC::OtherNumber as u32));
443
    /// (`N`) The union of all number categories
444
    pub const Number: GeneralCategoryGroup = GCG((1 << (GC::DecimalNumber as u32))
445
        | (1 << (GC::LetterNumber as u32))
446
        | (1 << (GC::OtherNumber as u32)));
447
448
    /// (`Zs`) A space character (of various non-zero widths)
449
    pub const SpaceSeparator: GeneralCategoryGroup = GCG(1 << (GC::SpaceSeparator as u32));
450
    /// (`Zl`) U+2028 LINE SEPARATOR only
451
    pub const LineSeparator: GeneralCategoryGroup = GCG(1 << (GC::LineSeparator as u32));
452
    /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
453
    pub const ParagraphSeparator: GeneralCategoryGroup = GCG(1 << (GC::ParagraphSeparator as u32));
454
    /// (`Z`) The union of all separator categories
455
    pub const Separator: GeneralCategoryGroup = GCG((1 << (GC::SpaceSeparator as u32))
456
        | (1 << (GC::LineSeparator as u32))
457
        | (1 << (GC::ParagraphSeparator as u32)));
458
459
    /// (`Cc`) A C0 or C1 control code
460
    pub const Control: GeneralCategoryGroup = GCG(1 << (GC::Control as u32));
461
    /// (`Cf`) A format control character
462
    pub const Format: GeneralCategoryGroup = GCG(1 << (GC::Format as u32));
463
    /// (`Co`) A private-use character
464
    pub const PrivateUse: GeneralCategoryGroup = GCG(1 << (GC::PrivateUse as u32));
465
    /// (`Cs`) A surrogate code point
466
    pub const Surrogate: GeneralCategoryGroup = GCG(1 << (GC::Surrogate as u32));
467
    /// (`Cn`) A reserved unassigned code point or a noncharacter
468
    pub const Unassigned: GeneralCategoryGroup = GCG(1 << (GC::Unassigned as u32));
469
    /// (`C`) The union of all control code, reserved, and unassigned categories
470
    pub const Other: GeneralCategoryGroup = GCG((1 << (GC::Control as u32))
471
        | (1 << (GC::Format as u32))
472
        | (1 << (GC::PrivateUse as u32))
473
        | (1 << (GC::Surrogate as u32))
474
        | (1 << (GC::Unassigned as u32)));
475
476
    /// (`Pd`) A dash or hyphen punctuation mark
477
    pub const DashPunctuation: GeneralCategoryGroup = GCG(1 << (GC::DashPunctuation as u32));
478
    /// (`Ps`) An opening punctuation mark (of a pair)
479
    pub const OpenPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OpenPunctuation as u32));
480
    /// (`Pe`) A closing punctuation mark (of a pair)
481
    pub const ClosePunctuation: GeneralCategoryGroup = GCG(1 << (GC::ClosePunctuation as u32));
482
    /// (`Pc`) A connecting punctuation mark, like a tie
483
    pub const ConnectorPunctuation: GeneralCategoryGroup =
484
        GCG(1 << (GC::ConnectorPunctuation as u32));
485
    /// (`Pi`) An initial quotation mark
486
    pub const InitialPunctuation: GeneralCategoryGroup = GCG(1 << (GC::InitialPunctuation as u32));
487
    /// (`Pf`) A final quotation mark
488
    pub const FinalPunctuation: GeneralCategoryGroup = GCG(1 << (GC::FinalPunctuation as u32));
489
    /// (`Po`) A punctuation mark of other type
490
    pub const OtherPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OtherPunctuation as u32));
491
    /// (`P`) The union of all punctuation categories
492
    pub const Punctuation: GeneralCategoryGroup = GCG((1 << (GC::DashPunctuation as u32))
493
        | (1 << (GC::OpenPunctuation as u32))
494
        | (1 << (GC::ClosePunctuation as u32))
495
        | (1 << (GC::ConnectorPunctuation as u32))
496
        | (1 << (GC::OtherPunctuation as u32))
497
        | (1 << (GC::InitialPunctuation as u32))
498
        | (1 << (GC::FinalPunctuation as u32)));
499
500
    /// (`Sm`) A symbol of mathematical use
501
    pub const MathSymbol: GeneralCategoryGroup = GCG(1 << (GC::MathSymbol as u32));
502
    /// (`Sc`) A currency sign
503
    pub const CurrencySymbol: GeneralCategoryGroup = GCG(1 << (GC::CurrencySymbol as u32));
504
    /// (`Sk`) A non-letterlike modifier symbol
505
    pub const ModifierSymbol: GeneralCategoryGroup = GCG(1 << (GC::ModifierSymbol as u32));
506
    /// (`So`) A symbol of other type
507
    pub const OtherSymbol: GeneralCategoryGroup = GCG(1 << (GC::OtherSymbol as u32));
508
    /// (`S`) The union of all symbol categories
509
    pub const Symbol: GeneralCategoryGroup = GCG((1 << (GC::MathSymbol as u32))
510
        | (1 << (GC::CurrencySymbol as u32))
511
        | (1 << (GC::ModifierSymbol as u32))
512
        | (1 << (GC::OtherSymbol as u32)));
513
514
    const ALL: u32 = (1 << (GC::FinalPunctuation as u32 + 1)) - 1;
515
516
    /// Return whether the code point belongs in the provided multi-value category.
517
    ///
518
    /// ```
519
    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
520
    /// use icu::properties::CodePointMapData;
521
    ///
522
    /// let gc = CodePointMapData::<GeneralCategory>::new();
523
    ///
524
    /// assert_eq!(gc.get('A'), GeneralCategory::UppercaseLetter);
525
    /// assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A')));
526
    ///
527
    /// // U+0B1E ORIYA LETTER NYA
528
    /// assert_eq!(gc.get('ଞ'), GeneralCategory::OtherLetter);
529
    /// assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ')));
530
    /// assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ')));
531
    ///
532
    /// // U+0301 COMBINING ACUTE ACCENT
533
    /// assert_eq!(gc.get('\u{0301}'), GeneralCategory::NonspacingMark);
534
    /// assert!(GeneralCategoryGroup::Mark.contains(gc.get('\u{0301}')));
535
    /// assert!(!GeneralCategoryGroup::Letter.contains(gc.get('\u{0301}')));
536
    ///
537
    /// assert_eq!(gc.get('0'), GeneralCategory::DecimalNumber);
538
    /// assert!(GeneralCategoryGroup::Number.contains(gc.get('0')));
539
    /// assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0')));
540
    ///
541
    /// assert_eq!(gc.get('('), GeneralCategory::OpenPunctuation);
542
    /// assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('(')));
543
    /// assert!(!GeneralCategoryGroup::Number.contains(gc.get('(')));
544
    ///
545
    /// // U+2713 CHECK MARK
546
    /// assert_eq!(gc.get('✓'), GeneralCategory::OtherSymbol);
547
    /// assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓')));
548
    /// assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓')));
549
    ///
550
    /// assert_eq!(gc.get(' '), GeneralCategory::SpaceSeparator);
551
    /// assert!(GeneralCategoryGroup::Separator.contains(gc.get(' ')));
552
    /// assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' ')));
553
    ///
554
    /// // U+E007F CANCEL TAG
555
    /// assert_eq!(gc.get('\u{E007F}'), GeneralCategory::Format);
556
    /// assert!(GeneralCategoryGroup::Other.contains(gc.get('\u{E007F}')));
557
    /// assert!(!GeneralCategoryGroup::Separator.contains(gc.get('\u{E007F}')));
558
    /// ```
559
0
    pub const fn contains(self, val: GeneralCategory) -> bool {
560
0
        0 != (1 << (val as u32)) & self.0
561
0
    }
562
563
    /// Produce a GeneralCategoryGroup that is the inverse of this one
564
    ///
565
    /// # Example
566
    ///
567
    /// ```rust
568
    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
569
    ///
570
    /// let letter = GeneralCategoryGroup::Letter;
571
    /// let not_letter = letter.complement();
572
    ///
573
    /// assert!(not_letter.contains(GeneralCategory::MathSymbol));
574
    /// assert!(!letter.contains(GeneralCategory::MathSymbol));
575
    /// assert!(not_letter.contains(GeneralCategory::OtherPunctuation));
576
    /// assert!(!letter.contains(GeneralCategory::OtherPunctuation));
577
    /// assert!(!not_letter.contains(GeneralCategory::UppercaseLetter));
578
    /// assert!(letter.contains(GeneralCategory::UppercaseLetter));
579
    /// ```
580
0
    pub const fn complement(self) -> Self {
581
        // Mask off things not in Self::ALL to guarantee the mask
582
        // values stay in-range
583
0
        GeneralCategoryGroup(!self.0 & Self::ALL)
584
0
    }
585
586
    /// Return the group representing all GeneralCategory values
587
    ///
588
    /// # Example
589
    ///
590
    /// ```rust
591
    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
592
    ///
593
    /// let all = GeneralCategoryGroup::all();
594
    ///
595
    /// assert!(all.contains(GeneralCategory::MathSymbol));
596
    /// assert!(all.contains(GeneralCategory::OtherPunctuation));
597
    /// assert!(all.contains(GeneralCategory::UppercaseLetter));
598
    /// ```
599
0
    pub const fn all() -> Self {
600
0
        Self(Self::ALL)
601
0
    }
602
603
    /// Return the empty group
604
    ///
605
    /// # Example
606
    ///
607
    /// ```rust
608
    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
609
    ///
610
    /// let empty = GeneralCategoryGroup::empty();
611
    ///
612
    /// assert!(!empty.contains(GeneralCategory::MathSymbol));
613
    /// assert!(!empty.contains(GeneralCategory::OtherPunctuation));
614
    /// assert!(!empty.contains(GeneralCategory::UppercaseLetter));
615
    /// ```
616
0
    pub const fn empty() -> Self {
617
0
        Self(0)
618
0
    }
619
620
    /// Take the union of two groups
621
    ///
622
    /// # Example
623
    ///
624
    /// ```rust
625
    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
626
    ///
627
    /// let letter = GeneralCategoryGroup::Letter;
628
    /// let symbol = GeneralCategoryGroup::Symbol;
629
    /// let union = letter.union(symbol);
630
    ///
631
    /// assert!(union.contains(GeneralCategory::MathSymbol));
632
    /// assert!(!union.contains(GeneralCategory::OtherPunctuation));
633
    /// assert!(union.contains(GeneralCategory::UppercaseLetter));
634
    /// ```
635
0
    pub const fn union(self, other: Self) -> Self {
636
0
        Self(self.0 | other.0)
637
0
    }
638
639
    /// Take the intersection of two groups
640
    ///
641
    /// # Example
642
    ///
643
    /// ```rust
644
    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
645
    ///
646
    /// let letter = GeneralCategoryGroup::Letter;
647
    /// let lu = GeneralCategoryGroup::UppercaseLetter;
648
    /// let intersection = letter.intersection(lu);
649
    ///
650
    /// assert!(!intersection.contains(GeneralCategory::MathSymbol));
651
    /// assert!(!intersection.contains(GeneralCategory::OtherPunctuation));
652
    /// assert!(intersection.contains(GeneralCategory::UppercaseLetter));
653
    /// assert!(!intersection.contains(GeneralCategory::LowercaseLetter));
654
    /// ```
655
0
    pub const fn intersection(self, other: Self) -> Self {
656
0
        Self(self.0 & other.0)
657
0
    }
658
}
659
660
impl From<GeneralCategory> for GeneralCategoryGroup {
661
0
    fn from(subcategory: GeneralCategory) -> Self {
662
0
        GeneralCategoryGroup(1 << (subcategory as u32))
663
0
    }
664
}
665
impl From<u32> for GeneralCategoryGroup {
666
0
    fn from(mask: u32) -> Self {
667
        // Mask off things not in Self::ALL to guarantee the mask
668
        // values stay in-range
669
0
        GeneralCategoryGroup(mask & Self::ALL)
670
0
    }
671
}
672
impl From<GeneralCategoryGroup> for u32 {
673
0
    fn from(group: GeneralCategoryGroup) -> Self {
674
0
        group.0
675
0
    }
676
}
677
678
/// Enumerated property Script.
679
///
680
/// This is used with both the Script and Script_Extensions Unicode properties.
681
/// Each character is assigned a single Script, but characters that are used in
682
/// a particular subset of scripts will be in more than one Script_Extensions set.
683
/// For example, DEVANAGARI DIGIT NINE has Script=Devanagari, but is also in the
684
/// Script_Extensions set for Dogra, Kaithi, and Mahajani. If you are trying to
685
/// determine whether a code point belongs to a certain script, you should use
686
/// [`ScriptWithExtensionsBorrowed::has_script`].
687
///
688
/// For more information, see UAX #24: <http://www.unicode.org/reports/tr24/>.
689
/// See `UScriptCode` in ICU4C.
690
///
691
/// # Example
692
///
693
/// ```
694
/// use icu::properties::{CodePointMapData, props::Script};
695
///
696
/// assert_eq!(CodePointMapData::<Script>::new().get('木'), Script::Han);  // U+6728
697
/// assert_eq!(CodePointMapData::<Script>::new().get('🎃'), Script::Common);  // U+1F383 JACK-O-LANTERN
698
/// ```
699
/// [`ScriptWithExtensionsBorrowed::has_script`]: crate::script::ScriptWithExtensionsBorrowed::has_script
700
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
701
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
702
#[allow(clippy::exhaustive_structs)] // newtype
703
#[repr(transparent)]
704
pub struct Script(pub(crate) u16);
705
706
impl Script {
707
    /// Returns an ICU4C `UScriptCode` value.
708
0
    pub const fn to_icu4c_value(self) -> u16 {
709
0
        self.0
710
0
    }
711
    /// Constructor from an ICU4C `UScriptCode` value.
712
0
    pub const fn from_icu4c_value(value: u16) -> Self {
713
0
        Self(value)
714
0
    }
715
}
716
717
create_const_array! {
718
#[allow(missing_docs)] // These constants don't need individual documentation.
719
#[allow(non_upper_case_globals)]
720
impl Script {
721
    pub const Adlam: Script = Script(167);
722
    pub const Ahom: Script = Script(161);
723
    pub const AnatolianHieroglyphs: Script = Script(156);
724
    pub const Arabic: Script = Script(2);
725
    pub const Armenian: Script = Script(3);
726
    pub const Avestan: Script = Script(117);
727
    pub const Balinese: Script = Script(62);
728
    pub const Bamum: Script = Script(130);
729
    pub const BassaVah: Script = Script(134);
730
    pub const Batak: Script = Script(63);
731
    pub const Bengali: Script = Script(4);
732
    pub const BeriaErfe: Script = Script(208);
733
    pub const Bhaiksuki: Script = Script(168);
734
    pub const Bopomofo: Script = Script(5);
735
    pub const Brahmi: Script = Script(65);
736
    pub const Braille: Script = Script(46);
737
    pub const Buginese: Script = Script(55);
738
    pub const Buhid: Script = Script(44);
739
    pub const CanadianAboriginal: Script = Script(40);
740
    pub const Carian: Script = Script(104);
741
    pub const CaucasianAlbanian: Script = Script(159);
742
    pub const Chakma: Script = Script(118);
743
    pub const Cham: Script = Script(66);
744
    pub const Cherokee: Script = Script(6);
745
    pub const Chisoi: Script = Script(209);
746
    pub const Chorasmian: Script = Script(189);
747
    pub const Common: Script = Script(0);
748
    pub const Coptic: Script = Script(7);
749
    pub const Cuneiform: Script = Script(101);
750
    pub const Cypriot: Script = Script(47);
751
    pub const CyproMinoan: Script = Script(193);
752
    pub const Cyrillic: Script = Script(8);
753
    pub const Deseret: Script = Script(9);
754
    pub const Devanagari: Script = Script(10);
755
    pub const DivesAkuru: Script = Script(190);
756
    pub const Dogra: Script = Script(178);
757
    pub const Duployan: Script = Script(135);
758
    pub const EgyptianHieroglyphs: Script = Script(71);
759
    pub const Elbasan: Script = Script(136);
760
    pub const Elymaic: Script = Script(185);
761
    pub const Ethiopian: Script = Script(11);
762
    pub const Georgian: Script = Script(12);
763
    pub const Glagolitic: Script = Script(56);
764
    pub const Gothic: Script = Script(13);
765
    pub const Grantha: Script = Script(137);
766
    pub const Greek: Script = Script(14);
767
    pub const Gujarati: Script = Script(15);
768
    pub const GunjalaGondi: Script = Script(179);
769
    pub const Gurmukhi: Script = Script(16);
770
    pub const Han: Script = Script(17);
771
    pub const Hangul: Script = Script(18);
772
    pub const HanifiRohingya: Script = Script(182);
773
    pub const Hanunoo: Script = Script(43);
774
    pub const Hatran: Script = Script(162);
775
    pub const Hebrew: Script = Script(19);
776
    pub const Hiragana: Script = Script(20);
777
    pub const ImperialAramaic: Script = Script(116);
778
    pub const Inherited: Script = Script(1);
779
    pub const InscriptionalPahlavi: Script = Script(122);
780
    pub const InscriptionalParthian: Script = Script(125);
781
    pub const Javanese: Script = Script(78);
782
    pub const Kaithi: Script = Script(120);
783
    pub const Kannada: Script = Script(21);
784
    pub const Katakana: Script = Script(22);
785
    pub const Kawi: Script = Script(198);
786
    pub const KayahLi: Script = Script(79);
787
    pub const Kharoshthi: Script = Script(57);
788
    pub const KhitanSmallScript: Script = Script(191);
789
    pub const Khmer: Script = Script(23);
790
    pub const Khojki: Script = Script(157);
791
    pub const Khudawadi: Script = Script(145);
792
    pub const Lao: Script = Script(24);
793
    pub const Latin: Script = Script(25);
794
    pub const Lepcha: Script = Script(82);
795
    pub const Limbu: Script = Script(48);
796
    pub const LinearA: Script = Script(83);
797
    pub const LinearB: Script = Script(49);
798
    pub const Lisu: Script = Script(131);
799
    pub const Lycian: Script = Script(107);
800
    pub const Lydian: Script = Script(108);
801
    pub const Mahajani: Script = Script(160);
802
    pub const Makasar: Script = Script(180);
803
    pub const Malayalam: Script = Script(26);
804
    pub const Mandaic: Script = Script(84);
805
    pub const Manichaean: Script = Script(121);
806
    pub const Marchen: Script = Script(169);
807
    pub const MasaramGondi: Script = Script(175);
808
    pub const Medefaidrin: Script = Script(181);
809
    pub const MeeteiMayek: Script = Script(115);
810
    pub const MendeKikakui: Script = Script(140);
811
    pub const MeroiticCursive: Script = Script(141);
812
    pub const MeroiticHieroglyphs: Script = Script(86);
813
    pub const Miao: Script = Script(92);
814
    pub const Modi: Script = Script(163);
815
    pub const Mongolian: Script = Script(27);
816
    pub const Mro: Script = Script(149);
817
    pub const Multani: Script = Script(164);
818
    pub const Myanmar: Script = Script(28);
819
    pub const Nabataean: Script = Script(143);
820
    pub const NagMundari: Script = Script(199);
821
    pub const Nandinagari: Script = Script(187);
822
    pub const Nastaliq: Script = Script(200);
823
    pub const Newa: Script = Script(170);
824
    pub const NewTaiLue: Script = Script(59);
825
    pub const Nko: Script = Script(87);
826
    pub const Nushu: Script = Script(150);
827
    pub const NyiakengPuachueHmong: Script = Script(186);
828
    pub const Ogham: Script = Script(29);
829
    pub const OlChiki: Script = Script(109);
830
    pub const OldHungarian: Script = Script(76);
831
    pub const OldItalic: Script = Script(30);
832
    pub const OldNorthArabian: Script = Script(142);
833
    pub const OldPermic: Script = Script(89);
834
    pub const OldPersian: Script = Script(61);
835
    pub const OldSogdian: Script = Script(184);
836
    pub const OldSouthArabian: Script = Script(133);
837
    pub const OldTurkic: Script = Script(88);
838
    pub const OldUyghur: Script = Script(194);
839
    pub const Oriya: Script = Script(31);
840
    pub const Osage: Script = Script(171);
841
    pub const Osmanya: Script = Script(50);
842
    pub const PahawhHmong: Script = Script(75);
843
    pub const Palmyrene: Script = Script(144);
844
    pub const PauCinHau: Script = Script(165);
845
    pub const PhagsPa: Script = Script(90);
846
    pub const Phoenician: Script = Script(91);
847
    pub const PsalterPahlavi: Script = Script(123);
848
    pub const Rejang: Script = Script(110);
849
    pub const Runic: Script = Script(32);
850
    pub const Samaritan: Script = Script(126);
851
    pub const Saurashtra: Script = Script(111);
852
    pub const Sharada: Script = Script(151);
853
    pub const Shavian: Script = Script(51);
854
    pub const Siddham: Script = Script(166);
855
    pub const Sidetic: Script = Script(210);
856
    pub const SignWriting: Script = Script(112);
857
    pub const Sinhala: Script = Script(33);
858
    pub const Sogdian: Script = Script(183);
859
    pub const SoraSompeng: Script = Script(152);
860
    pub const Soyombo: Script = Script(176);
861
    pub const Sundanese: Script = Script(113);
862
    pub const SylotiNagri: Script = Script(58);
863
    pub const Syriac: Script = Script(34);
864
    pub const Tagalog: Script = Script(42);
865
    pub const Tagbanwa: Script = Script(45);
866
    pub const TaiLe: Script = Script(52);
867
    pub const TaiTham: Script = Script(106);
868
    pub const TaiViet: Script = Script(127);
869
    pub const TaiYo: Script = Script(211);
870
    pub const Takri: Script = Script(153);
871
    pub const Tamil: Script = Script(35);
872
    pub const Tangsa: Script = Script(195);
873
    pub const Tangut: Script = Script(154);
874
    pub const Telugu: Script = Script(36);
875
    pub const Thaana: Script = Script(37);
876
    pub const Thai: Script = Script(38);
877
    pub const Tibetan: Script = Script(39);
878
    pub const Tifinagh: Script = Script(60);
879
    pub const Tirhuta: Script = Script(158);
880
    pub const TolongSiki: Script = Script(212);
881
    pub const Toto: Script = Script(196);
882
    pub const Ugaritic: Script = Script(53);
883
    pub const Unknown: Script = Script(103);
884
    pub const Vai: Script = Script(99);
885
    pub const Vithkuqi: Script = Script(197);
886
    pub const Wancho: Script = Script(188);
887
    pub const WarangCiti: Script = Script(146);
888
    pub const Yezidi: Script = Script(192);
889
    pub const Yi: Script = Script(41);
890
    pub const ZanabazarSquare: Script = Script(177);
891
}
892
}
893
894
make_enumerated_property! {
895
    name: "Script";
896
    short_name: "sc";
897
    ident: Script;
898
    data_marker: crate::provider::PropertyEnumScriptV1;
899
    singleton: SINGLETON_PROPERTY_ENUM_SCRIPT_V1;
900
    ule_ty: <u16 as zerovec::ule::AsULE>::ULE;
901
}
902
903
/// Enumerated property Hangul_Syllable_Type
904
///
905
/// The Unicode standard provides both precomposed Hangul syllables and conjoining Jamo to compose
906
/// arbitrary Hangul syllables. This property provides that ontology of Hangul code points.
907
///
908
/// For more information, see the [Unicode Korean FAQ](https://www.unicode.org/faq/korean.html).
909
///
910
/// # Example
911
///
912
/// ```
913
/// use icu::properties::{props::HangulSyllableType, CodePointMapData};
914
///
915
/// assert_eq!(
916
///     CodePointMapData::<HangulSyllableType>::new().get('ᄀ'),
917
///     HangulSyllableType::LeadingJamo
918
/// ); // U+1100
919
/// assert_eq!(
920
///     CodePointMapData::<HangulSyllableType>::new().get('가'),
921
///     HangulSyllableType::LeadingVowelSyllable
922
/// ); // U+AC00
923
/// ```
924
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
925
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
926
#[allow(clippy::exhaustive_structs)] // newtype
927
#[repr(transparent)]
928
pub struct HangulSyllableType(pub(crate) u8);
929
930
impl HangulSyllableType {
931
    /// Returns an ICU4C `UHangulSyllableType` value.
932
0
    pub const fn to_icu4c_value(self) -> u8 {
933
0
        self.0
934
0
    }
935
    /// Constructor from an ICU4C `UHangulSyllableType` value.
936
0
    pub const fn from_icu4c_value(value: u8) -> Self {
937
0
        Self(value)
938
0
    }
939
}
940
941
create_const_array! {
942
#[allow(non_upper_case_globals)]
943
impl HangulSyllableType {
944
    /// (`NA`) not applicable (e.g. not a Hangul code point).
945
    pub const NotApplicable: HangulSyllableType = HangulSyllableType(0);
946
    /// (`L`) a conjoining leading consonant Jamo.
947
    pub const LeadingJamo: HangulSyllableType = HangulSyllableType(1);
948
    /// (`V`) a conjoining vowel Jamo.
949
    pub const VowelJamo: HangulSyllableType = HangulSyllableType(2);
950
    /// (`T`) a conjoining trailing consonant Jamo.
951
    pub const TrailingJamo: HangulSyllableType = HangulSyllableType(3);
952
    /// (`LV`) a precomposed syllable with a leading consonant and a vowel.
953
    pub const LeadingVowelSyllable: HangulSyllableType = HangulSyllableType(4);
954
    /// (`LVT`) a precomposed syllable with a leading consonant, a vowel, and a trailing consonant.
955
    pub const LeadingVowelTrailingSyllable: HangulSyllableType = HangulSyllableType(5);
956
}
957
}
958
959
make_enumerated_property! {
960
    name: "Hangul_Syllable_Type";
961
    short_name: "hst";
962
    ident: HangulSyllableType;
963
    data_marker: crate::provider::PropertyEnumHangulSyllableTypeV1;
964
    singleton: SINGLETON_PROPERTY_ENUM_HANGUL_SYLLABLE_TYPE_V1;
965
    ule_ty: u8;
966
967
}
968
969
/// Enumerated property East_Asian_Width.
970
///
971
/// See "Definition" in UAX #11 for the summary of each property value:
972
/// <https://www.unicode.org/reports/tr11/#Definitions>
973
///
974
/// # Example
975
///
976
/// ```
977
/// use icu::properties::{props::EastAsianWidth, CodePointMapData};
978
///
979
/// assert_eq!(
980
///     CodePointMapData::<EastAsianWidth>::new().get('ア'),
981
///     EastAsianWidth::Halfwidth
982
/// ); // U+FF71: Halfwidth Katakana Letter A
983
/// assert_eq!(
984
///     CodePointMapData::<EastAsianWidth>::new().get('ア'),
985
///     EastAsianWidth::Wide
986
/// ); //U+30A2: Katakana Letter A
987
/// ```
988
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
989
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
990
#[allow(clippy::exhaustive_structs)] // newtype
991
#[repr(transparent)]
992
pub struct EastAsianWidth(pub(crate) u8);
993
994
impl EastAsianWidth {
995
    /// Returns an ICU4C `UEastAsianWidth` value.
996
0
    pub const fn to_icu4c_value(self) -> u8 {
997
0
        self.0
998
0
    }
999
    /// Constructor from an ICU4C `UEastAsianWidth` value.
1000
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1001
0
        Self(value)
1002
0
    }
1003
}
1004
1005
create_const_array! {
1006
#[allow(missing_docs)] // These constants don't need individual documentation.
1007
#[allow(non_upper_case_globals)]
1008
impl EastAsianWidth {
1009
    pub const Neutral: EastAsianWidth = EastAsianWidth(0); //name="N"
1010
    pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); //name="A"
1011
    pub const Halfwidth: EastAsianWidth = EastAsianWidth(2); //name="H"
1012
    pub const Fullwidth: EastAsianWidth = EastAsianWidth(3); //name="F"
1013
    pub const Narrow: EastAsianWidth = EastAsianWidth(4); //name="Na"
1014
    pub const Wide: EastAsianWidth = EastAsianWidth(5); //name="W"
1015
}
1016
}
1017
1018
make_enumerated_property! {
1019
    name: "East_Asian_Width";
1020
    short_name: "ea";
1021
    ident: EastAsianWidth;
1022
    data_marker: crate::provider::PropertyEnumEastAsianWidthV1;
1023
    singleton: SINGLETON_PROPERTY_ENUM_EAST_ASIAN_WIDTH_V1;
1024
    ule_ty: u8;
1025
}
1026
1027
/// Enumerated property Line_Break.
1028
///
1029
/// See "Line Breaking Properties" in UAX #14 for the summary of each property
1030
/// value: <https://www.unicode.org/reports/tr14/#Properties>
1031
///
1032
/// The numeric value is compatible with `ULineBreak` in ICU4C.
1033
///
1034
/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1035
///
1036
/// # Example
1037
///
1038
/// ```
1039
/// use icu::properties::{props::LineBreak, CodePointMapData};
1040
///
1041
/// assert_eq!(
1042
///     CodePointMapData::<LineBreak>::new().get(')'),
1043
///     LineBreak::CloseParenthesis
1044
/// ); // U+0029: Right Parenthesis
1045
/// assert_eq!(
1046
///     CodePointMapData::<LineBreak>::new().get('ぁ'),
1047
///     LineBreak::ConditionalJapaneseStarter
1048
/// ); //U+3041: Hiragana Letter Small A
1049
/// ```
1050
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1051
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1052
#[allow(clippy::exhaustive_structs)] // newtype
1053
#[repr(transparent)]
1054
pub struct LineBreak(pub(crate) u8);
1055
1056
impl LineBreak {
1057
    /// Returns an ICU4C `ULineBreak` value.
1058
0
    pub const fn to_icu4c_value(self) -> u8 {
1059
0
        self.0
1060
0
    }
1061
    /// Constructor from an ICU4C `ULineBreak` value.
1062
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1063
0
        Self(value)
1064
0
    }
1065
}
1066
1067
create_const_array! {
1068
#[allow(missing_docs)] // These constants don't need individual documentation.
1069
#[allow(non_upper_case_globals)]
1070
impl LineBreak {
1071
    pub const Unknown: LineBreak = LineBreak(0); // name="XX"
1072
    pub const Ambiguous: LineBreak = LineBreak(1); // name="AI"
1073
    pub const Alphabetic: LineBreak = LineBreak(2); // name="AL"
1074
    pub const BreakBoth: LineBreak = LineBreak(3); // name="B2"
1075
    pub const BreakAfter: LineBreak = LineBreak(4); // name="BA"
1076
    pub const BreakBefore: LineBreak = LineBreak(5); // name="BB"
1077
    pub const MandatoryBreak: LineBreak = LineBreak(6); // name="BK"
1078
    pub const ContingentBreak: LineBreak = LineBreak(7); // name="CB"
1079
    pub const ClosePunctuation: LineBreak = LineBreak(8); // name="CL"
1080
    pub const CombiningMark: LineBreak = LineBreak(9); // name="CM"
1081
    pub const CarriageReturn: LineBreak = LineBreak(10); // name="CR"
1082
    pub const Exclamation: LineBreak = LineBreak(11); // name="EX"
1083
    pub const Glue: LineBreak = LineBreak(12); // name="GL"
1084
    pub const Hyphen: LineBreak = LineBreak(13); // name="HY"
1085
    pub const Ideographic: LineBreak = LineBreak(14); // name="ID"
1086
    pub const Inseparable: LineBreak = LineBreak(15); // name="IN"
1087
    pub const InfixNumeric: LineBreak = LineBreak(16); // name="IS"
1088
    pub const LineFeed: LineBreak = LineBreak(17); // name="LF"
1089
    pub const Nonstarter: LineBreak = LineBreak(18); // name="NS"
1090
    pub const Numeric: LineBreak = LineBreak(19); // name="NU"
1091
    pub const OpenPunctuation: LineBreak = LineBreak(20); // name="OP"
1092
    pub const PostfixNumeric: LineBreak = LineBreak(21); // name="PO"
1093
    pub const PrefixNumeric: LineBreak = LineBreak(22); // name="PR"
1094
    pub const Quotation: LineBreak = LineBreak(23); // name="QU"
1095
    pub const ComplexContext: LineBreak = LineBreak(24); // name="SA"
1096
    pub const Surrogate: LineBreak = LineBreak(25); // name="SG"
1097
    pub const Space: LineBreak = LineBreak(26); // name="SP"
1098
    pub const BreakSymbols: LineBreak = LineBreak(27); // name="SY"
1099
    pub const ZWSpace: LineBreak = LineBreak(28); // name="ZW"
1100
    pub const NextLine: LineBreak = LineBreak(29); // name="NL"
1101
    pub const WordJoiner: LineBreak = LineBreak(30); // name="WJ"
1102
    pub const H2: LineBreak = LineBreak(31); // name="H2"
1103
    pub const H3: LineBreak = LineBreak(32); // name="H3"
1104
    pub const JL: LineBreak = LineBreak(33); // name="JL"
1105
    pub const JT: LineBreak = LineBreak(34); // name="JT"
1106
    pub const JV: LineBreak = LineBreak(35); // name="JV"
1107
    pub const CloseParenthesis: LineBreak = LineBreak(36); // name="CP"
1108
    pub const ConditionalJapaneseStarter: LineBreak = LineBreak(37); // name="CJ"
1109
    pub const HebrewLetter: LineBreak = LineBreak(38); // name="HL"
1110
    pub const RegionalIndicator: LineBreak = LineBreak(39); // name="RI"
1111
    pub const EBase: LineBreak = LineBreak(40); // name="EB"
1112
    pub const EModifier: LineBreak = LineBreak(41); // name="EM"
1113
    pub const ZWJ: LineBreak = LineBreak(42); // name="ZWJ"
1114
1115
    // Added in ICU 74:
1116
    pub const Aksara: LineBreak = LineBreak(43); // name="AK"
1117
    pub const AksaraPrebase: LineBreak = LineBreak(44); // name="AP"
1118
    pub const AksaraStart: LineBreak = LineBreak(45); // name="AS"
1119
    pub const ViramaFinal: LineBreak = LineBreak(46); // name="VF"
1120
    pub const Virama: LineBreak = LineBreak(47); // name="VI"
1121
1122
    // Added in ICU 78:
1123
    pub const UnambiguousHyphen: LineBreak = LineBreak(48); // name="HH"
1124
}
1125
}
1126
1127
make_enumerated_property! {
1128
    name: "Line_Break";
1129
    short_name: "lb";
1130
    ident: LineBreak;
1131
    data_marker: crate::provider::PropertyEnumLineBreakV1;
1132
    singleton: SINGLETON_PROPERTY_ENUM_LINE_BREAK_V1;
1133
    ule_ty: u8;
1134
}
1135
1136
/// Enumerated property Grapheme_Cluster_Break.
1137
///
1138
/// See "Default Grapheme Cluster Boundary Specification" in UAX #29 for the
1139
/// summary of each property value:
1140
/// <https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table>
1141
///
1142
/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1143
///
1144
/// # Example
1145
///
1146
/// ```
1147
/// use icu::properties::{props::GraphemeClusterBreak, CodePointMapData};
1148
///
1149
/// assert_eq!(
1150
///     CodePointMapData::<GraphemeClusterBreak>::new().get('🇦'),
1151
///     GraphemeClusterBreak::RegionalIndicator
1152
/// ); // U+1F1E6: Regional Indicator Symbol Letter A
1153
/// assert_eq!(
1154
///     CodePointMapData::<GraphemeClusterBreak>::new().get('ำ'),
1155
///     GraphemeClusterBreak::SpacingMark
1156
/// ); //U+0E33: Thai Character Sara Am
1157
/// ```
1158
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1159
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1160
#[allow(clippy::exhaustive_structs)] // this type is stable
1161
#[repr(transparent)]
1162
pub struct GraphemeClusterBreak(pub(crate) u8);
1163
1164
impl GraphemeClusterBreak {
1165
    /// Returns an ICU4C `UGraphemeClusterBreak` value.
1166
0
    pub const fn to_icu4c_value(self) -> u8 {
1167
0
        self.0
1168
0
    }
1169
    /// Constructor from an ICU4C `UGraphemeClusterBreak` value.
1170
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1171
0
        Self(value)
1172
0
    }
1173
}
1174
1175
create_const_array! {
1176
#[allow(missing_docs)] // These constants don't need individual documentation.
1177
#[allow(non_upper_case_globals)]
1178
impl GraphemeClusterBreak {
1179
    pub const Other: GraphemeClusterBreak = GraphemeClusterBreak(0); // name="XX"
1180
    pub const Control: GraphemeClusterBreak = GraphemeClusterBreak(1); // name="CN"
1181
    pub const CR: GraphemeClusterBreak = GraphemeClusterBreak(2); // name="CR"
1182
    pub const Extend: GraphemeClusterBreak = GraphemeClusterBreak(3); // name="EX"
1183
    pub const L: GraphemeClusterBreak = GraphemeClusterBreak(4); // name="L"
1184
    pub const LF: GraphemeClusterBreak = GraphemeClusterBreak(5); // name="LF"
1185
    pub const LV: GraphemeClusterBreak = GraphemeClusterBreak(6); // name="LV"
1186
    pub const LVT: GraphemeClusterBreak = GraphemeClusterBreak(7); // name="LVT"
1187
    pub const T: GraphemeClusterBreak = GraphemeClusterBreak(8); // name="T"
1188
    pub const V: GraphemeClusterBreak = GraphemeClusterBreak(9); // name="V"
1189
    pub const SpacingMark: GraphemeClusterBreak = GraphemeClusterBreak(10); // name="SM"
1190
    pub const Prepend: GraphemeClusterBreak = GraphemeClusterBreak(11); // name="PP"
1191
    pub const RegionalIndicator: GraphemeClusterBreak = GraphemeClusterBreak(12); // name="RI"
1192
    /// This value is obsolete and unused.
1193
    pub const EBase: GraphemeClusterBreak = GraphemeClusterBreak(13); // name="EB"
1194
    /// This value is obsolete and unused.
1195
    pub const EBaseGAZ: GraphemeClusterBreak = GraphemeClusterBreak(14); // name="EBG"
1196
    /// This value is obsolete and unused.
1197
    pub const EModifier: GraphemeClusterBreak = GraphemeClusterBreak(15); // name="EM"
1198
    /// This value is obsolete and unused.
1199
    pub const GlueAfterZwj: GraphemeClusterBreak = GraphemeClusterBreak(16); // name="GAZ"
1200
    pub const ZWJ: GraphemeClusterBreak = GraphemeClusterBreak(17); // name="ZWJ"
1201
}
1202
}
1203
1204
make_enumerated_property! {
1205
    name: "Grapheme_Cluster_Break";
1206
    short_name: "GCB";
1207
    ident: GraphemeClusterBreak;
1208
    data_marker: crate::provider::PropertyEnumGraphemeClusterBreakV1;
1209
    singleton: SINGLETON_PROPERTY_ENUM_GRAPHEME_CLUSTER_BREAK_V1;
1210
    ule_ty: u8;
1211
}
1212
1213
/// Enumerated property Word_Break.
1214
///
1215
/// See "Default Word Boundary Specification" in UAX #29 for the summary of
1216
/// each property value:
1217
/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1218
///
1219
/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1220
///
1221
/// # Example
1222
///
1223
/// ```
1224
/// use icu::properties::{props::WordBreak, CodePointMapData};
1225
///
1226
/// assert_eq!(
1227
///     CodePointMapData::<WordBreak>::new().get('.'),
1228
///     WordBreak::MidNumLet
1229
/// ); // U+002E: Full Stop
1230
/// assert_eq!(
1231
///     CodePointMapData::<WordBreak>::new().get(','),
1232
///     WordBreak::MidNum
1233
/// ); // U+FF0C: Fullwidth Comma
1234
/// ```
1235
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1236
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1237
#[allow(clippy::exhaustive_structs)] // newtype
1238
#[repr(transparent)]
1239
pub struct WordBreak(pub(crate) u8);
1240
1241
impl WordBreak {
1242
    /// Returns an ICU4C `UWordBreak` value.
1243
0
    pub const fn to_icu4c_value(self) -> u8 {
1244
0
        self.0
1245
0
    }
1246
    /// Constructor from an ICU4C `UWordBreak` value.
1247
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1248
0
        Self(value)
1249
0
    }
1250
}
1251
1252
create_const_array! {
1253
#[allow(missing_docs)] // These constants don't need individual documentation.
1254
#[allow(non_upper_case_globals)]
1255
impl WordBreak {
1256
    pub const Other: WordBreak = WordBreak(0); // name="XX"
1257
    pub const ALetter: WordBreak = WordBreak(1); // name="LE"
1258
    pub const Format: WordBreak = WordBreak(2); // name="FO"
1259
    pub const Katakana: WordBreak = WordBreak(3); // name="KA"
1260
    pub const MidLetter: WordBreak = WordBreak(4); // name="ML"
1261
    pub const MidNum: WordBreak = WordBreak(5); // name="MN"
1262
    pub const Numeric: WordBreak = WordBreak(6); // name="NU"
1263
    pub const ExtendNumLet: WordBreak = WordBreak(7); // name="EX"
1264
    pub const CR: WordBreak = WordBreak(8); // name="CR"
1265
    pub const Extend: WordBreak = WordBreak(9); // name="Extend"
1266
    pub const LF: WordBreak = WordBreak(10); // name="LF"
1267
    pub const MidNumLet: WordBreak = WordBreak(11); // name="MB"
1268
    pub const Newline: WordBreak = WordBreak(12); // name="NL"
1269
    pub const RegionalIndicator: WordBreak = WordBreak(13); // name="RI"
1270
    pub const HebrewLetter: WordBreak = WordBreak(14); // name="HL"
1271
    pub const SingleQuote: WordBreak = WordBreak(15); // name="SQ"
1272
    pub const DoubleQuote: WordBreak = WordBreak(16); // name=DQ
1273
    /// This value is obsolete and unused.
1274
    pub const EBase: WordBreak = WordBreak(17); // name="EB"
1275
    /// This value is obsolete and unused.
1276
    pub const EBaseGAZ: WordBreak = WordBreak(18); // name="EBG"
1277
    /// This value is obsolete and unused.
1278
    pub const EModifier: WordBreak = WordBreak(19); // name="EM"
1279
    /// This value is obsolete and unused.
1280
    pub const GlueAfterZwj: WordBreak = WordBreak(20); // name="GAZ"
1281
    pub const ZWJ: WordBreak = WordBreak(21); // name="ZWJ"
1282
    pub const WSegSpace: WordBreak = WordBreak(22); // name="WSegSpace"
1283
}
1284
}
1285
1286
make_enumerated_property! {
1287
    name: "Word_Break";
1288
    short_name: "WB";
1289
    ident: WordBreak;
1290
    data_marker: crate::provider::PropertyEnumWordBreakV1;
1291
    singleton: SINGLETON_PROPERTY_ENUM_WORD_BREAK_V1;
1292
    ule_ty: u8;
1293
}
1294
1295
/// Enumerated property Sentence_Break.
1296
///
1297
/// See "Default Sentence Boundary Specification" in UAX #29 for the summary of
1298
/// each property value:
1299
/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1300
///
1301
/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1302
///
1303
/// # Example
1304
///
1305
/// ```
1306
/// use icu::properties::{props::SentenceBreak, CodePointMapData};
1307
///
1308
/// assert_eq!(
1309
///     CodePointMapData::<SentenceBreak>::new().get('9'),
1310
///     SentenceBreak::Numeric
1311
/// ); // U+FF19: Fullwidth Digit Nine
1312
/// assert_eq!(
1313
///     CodePointMapData::<SentenceBreak>::new().get(','),
1314
///     SentenceBreak::SContinue
1315
/// ); // U+002C: Comma
1316
/// ```
1317
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1318
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1319
#[allow(clippy::exhaustive_structs)] // newtype
1320
#[repr(transparent)]
1321
pub struct SentenceBreak(pub(crate) u8);
1322
1323
impl SentenceBreak {
1324
    /// Returns an ICU4C `USentenceBreak` value.
1325
0
    pub const fn to_icu4c_value(self) -> u8 {
1326
0
        self.0
1327
0
    }
1328
    /// Constructor from an ICU4C `USentenceBreak` value.
1329
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1330
0
        Self(value)
1331
0
    }
1332
}
1333
1334
create_const_array! {
1335
#[allow(missing_docs)] // These constants don't need individual documentation.
1336
#[allow(non_upper_case_globals)]
1337
impl SentenceBreak {
1338
    pub const Other: SentenceBreak = SentenceBreak(0); // name="XX"
1339
    pub const ATerm: SentenceBreak = SentenceBreak(1); // name="AT"
1340
    pub const Close: SentenceBreak = SentenceBreak(2); // name="CL"
1341
    pub const Format: SentenceBreak = SentenceBreak(3); // name="FO"
1342
    pub const Lower: SentenceBreak = SentenceBreak(4); // name="LO"
1343
    pub const Numeric: SentenceBreak = SentenceBreak(5); // name="NU"
1344
    pub const OLetter: SentenceBreak = SentenceBreak(6); // name="LE"
1345
    pub const Sep: SentenceBreak = SentenceBreak(7); // name="SE"
1346
    pub const Sp: SentenceBreak = SentenceBreak(8); // name="SP"
1347
    pub const STerm: SentenceBreak = SentenceBreak(9); // name="ST"
1348
    pub const Upper: SentenceBreak = SentenceBreak(10); // name="UP"
1349
    pub const CR: SentenceBreak = SentenceBreak(11); // name="CR"
1350
    pub const Extend: SentenceBreak = SentenceBreak(12); // name="EX"
1351
    pub const LF: SentenceBreak = SentenceBreak(13); // name="LF"
1352
    pub const SContinue: SentenceBreak = SentenceBreak(14); // name="SC"
1353
}
1354
}
1355
1356
make_enumerated_property! {
1357
    name: "Sentence_Break";
1358
    short_name: "SB";
1359
    ident: SentenceBreak;
1360
    data_marker: crate::provider::PropertyEnumSentenceBreakV1;
1361
    singleton: SINGLETON_PROPERTY_ENUM_SENTENCE_BREAK_V1;
1362
    ule_ty: u8;
1363
}
1364
1365
/// Property Canonical_Combining_Class.
1366
/// See UAX #15:
1367
/// <https://www.unicode.org/reports/tr15/>.
1368
///
1369
/// See `icu::normalizer::properties::CanonicalCombiningClassMap` for the API
1370
/// to look up the Canonical_Combining_Class property by scalar value.
1371
///
1372
/// **Note:** See `icu::normalizer::CanonicalCombiningClassMap` for the preferred API
1373
/// to look up the Canonical_Combining_Class property by scalar value.
1374
///
1375
/// # Example
1376
///
1377
/// ```
1378
/// use icu::properties::{props::CanonicalCombiningClass, CodePointMapData};
1379
///
1380
/// assert_eq!(
1381
///     CodePointMapData::<CanonicalCombiningClass>::new().get('a'),
1382
///     CanonicalCombiningClass::NotReordered
1383
/// ); // U+0061: LATIN SMALL LETTER A
1384
/// assert_eq!(
1385
///     CodePointMapData::<CanonicalCombiningClass>::new().get('\u{0301}'),
1386
///     CanonicalCombiningClass::Above
1387
/// ); // U+0301: COMBINING ACUTE ACCENT
1388
/// ```
1389
//
1390
// NOTE: The Pernosco debugger has special knowledge
1391
// of this struct. Please do not change the bit layout
1392
// or the crate-module-qualified name of this struct
1393
// without coordination.
1394
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1395
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1396
#[allow(clippy::exhaustive_structs)] // newtype
1397
#[repr(transparent)]
1398
pub struct CanonicalCombiningClass(pub(crate) u8);
1399
1400
impl CanonicalCombiningClass {
1401
    /// Returns an ICU4C `UCanonicalCombiningClass` value.
1402
0
    pub const fn to_icu4c_value(self) -> u8 {
1403
0
        self.0
1404
0
    }
1405
    /// Constructor from an ICU4C `UCanonicalCombiningClass` value.
1406
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1407
0
        Self(value)
1408
0
    }
1409
}
1410
1411
create_const_array! {
1412
// These constant names come from PropertyValueAliases.txt
1413
#[allow(missing_docs)] // These constants don't need individual documentation.
1414
#[allow(non_upper_case_globals)]
1415
impl CanonicalCombiningClass {
1416
    pub const NotReordered: CanonicalCombiningClass = CanonicalCombiningClass(0); // name="NR"
1417
    pub const Overlay: CanonicalCombiningClass = CanonicalCombiningClass(1); // name="OV"
1418
    pub const HanReading: CanonicalCombiningClass = CanonicalCombiningClass(6); // name="HANR"
1419
    pub const Nukta: CanonicalCombiningClass = CanonicalCombiningClass(7); // name="NK"
1420
    pub const KanaVoicing: CanonicalCombiningClass = CanonicalCombiningClass(8); // name="KV"
1421
    pub const Virama: CanonicalCombiningClass = CanonicalCombiningClass(9); // name="VR"
1422
    pub const CCC10: CanonicalCombiningClass = CanonicalCombiningClass(10); // name="CCC10"
1423
    pub const CCC11: CanonicalCombiningClass = CanonicalCombiningClass(11); // name="CCC11"
1424
    pub const CCC12: CanonicalCombiningClass = CanonicalCombiningClass(12); // name="CCC12"
1425
    pub const CCC13: CanonicalCombiningClass = CanonicalCombiningClass(13); // name="CCC13"
1426
    pub const CCC14: CanonicalCombiningClass = CanonicalCombiningClass(14); // name="CCC14"
1427
    pub const CCC15: CanonicalCombiningClass = CanonicalCombiningClass(15); // name="CCC15"
1428
    pub const CCC16: CanonicalCombiningClass = CanonicalCombiningClass(16); // name="CCC16"
1429
    pub const CCC17: CanonicalCombiningClass = CanonicalCombiningClass(17); // name="CCC17"
1430
    pub const CCC18: CanonicalCombiningClass = CanonicalCombiningClass(18); // name="CCC18"
1431
    pub const CCC19: CanonicalCombiningClass = CanonicalCombiningClass(19); // name="CCC19"
1432
    pub const CCC20: CanonicalCombiningClass = CanonicalCombiningClass(20); // name="CCC20"
1433
    pub const CCC21: CanonicalCombiningClass = CanonicalCombiningClass(21); // name="CCC21"
1434
    pub const CCC22: CanonicalCombiningClass = CanonicalCombiningClass(22); // name="CCC22"
1435
    pub const CCC23: CanonicalCombiningClass = CanonicalCombiningClass(23); // name="CCC23"
1436
    pub const CCC24: CanonicalCombiningClass = CanonicalCombiningClass(24); // name="CCC24"
1437
    pub const CCC25: CanonicalCombiningClass = CanonicalCombiningClass(25); // name="CCC25"
1438
    pub const CCC26: CanonicalCombiningClass = CanonicalCombiningClass(26); // name="CCC26"
1439
    pub const CCC27: CanonicalCombiningClass = CanonicalCombiningClass(27); // name="CCC27"
1440
    pub const CCC28: CanonicalCombiningClass = CanonicalCombiningClass(28); // name="CCC28"
1441
    pub const CCC29: CanonicalCombiningClass = CanonicalCombiningClass(29); // name="CCC29"
1442
    pub const CCC30: CanonicalCombiningClass = CanonicalCombiningClass(30); // name="CCC30"
1443
    pub const CCC31: CanonicalCombiningClass = CanonicalCombiningClass(31); // name="CCC31"
1444
    pub const CCC32: CanonicalCombiningClass = CanonicalCombiningClass(32); // name="CCC32"
1445
    pub const CCC33: CanonicalCombiningClass = CanonicalCombiningClass(33); // name="CCC33"
1446
    pub const CCC34: CanonicalCombiningClass = CanonicalCombiningClass(34); // name="CCC34"
1447
    pub const CCC35: CanonicalCombiningClass = CanonicalCombiningClass(35); // name="CCC35"
1448
    pub const CCC36: CanonicalCombiningClass = CanonicalCombiningClass(36); // name="CCC36"
1449
    pub const CCC84: CanonicalCombiningClass = CanonicalCombiningClass(84); // name="CCC84"
1450
    pub const CCC91: CanonicalCombiningClass = CanonicalCombiningClass(91); // name="CCC91"
1451
    pub const CCC103: CanonicalCombiningClass = CanonicalCombiningClass(103); // name="CCC103"
1452
    pub const CCC107: CanonicalCombiningClass = CanonicalCombiningClass(107); // name="CCC107"
1453
    pub const CCC118: CanonicalCombiningClass = CanonicalCombiningClass(118); // name="CCC118"
1454
    pub const CCC122: CanonicalCombiningClass = CanonicalCombiningClass(122); // name="CCC122"
1455
    pub const CCC129: CanonicalCombiningClass = CanonicalCombiningClass(129); // name="CCC129"
1456
    pub const CCC130: CanonicalCombiningClass = CanonicalCombiningClass(130); // name="CCC130"
1457
    pub const CCC132: CanonicalCombiningClass = CanonicalCombiningClass(132); // name="CCC132"
1458
    pub const CCC133: CanonicalCombiningClass = CanonicalCombiningClass(133); // name="CCC133" // RESERVED
1459
    pub const AttachedBelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(200); // name="ATBL"
1460
    pub const AttachedBelow: CanonicalCombiningClass = CanonicalCombiningClass(202); // name="ATB"
1461
    pub const AttachedAbove: CanonicalCombiningClass = CanonicalCombiningClass(214); // name="ATA"
1462
    pub const AttachedAboveRight: CanonicalCombiningClass = CanonicalCombiningClass(216); // name="ATAR"
1463
    pub const BelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(218); // name="BL"
1464
    pub const Below: CanonicalCombiningClass = CanonicalCombiningClass(220); // name="B"
1465
    pub const BelowRight: CanonicalCombiningClass = CanonicalCombiningClass(222); // name="BR"
1466
    pub const Left: CanonicalCombiningClass = CanonicalCombiningClass(224); // name="L"
1467
    pub const Right: CanonicalCombiningClass = CanonicalCombiningClass(226); // name="R"
1468
    pub const AboveLeft: CanonicalCombiningClass = CanonicalCombiningClass(228); // name="AL"
1469
    pub const Above: CanonicalCombiningClass = CanonicalCombiningClass(230); // name="A"
1470
    pub const AboveRight: CanonicalCombiningClass = CanonicalCombiningClass(232); // name="AR"
1471
    pub const DoubleBelow: CanonicalCombiningClass = CanonicalCombiningClass(233); // name="DB"
1472
    pub const DoubleAbove: CanonicalCombiningClass = CanonicalCombiningClass(234); // name="DA"
1473
    pub const IotaSubscript: CanonicalCombiningClass = CanonicalCombiningClass(240); // name="IS"
1474
}
1475
}
1476
1477
make_enumerated_property! {
1478
    name: "Canonical_Combining_Class";
1479
    short_name: "ccc";
1480
    ident: CanonicalCombiningClass;
1481
    data_marker: crate::provider::PropertyEnumCanonicalCombiningClassV1;
1482
    singleton: SINGLETON_PROPERTY_ENUM_CANONICAL_COMBINING_CLASS_V1;
1483
    ule_ty: u8;
1484
}
1485
1486
/// Property Indic_Conjunct_Break.
1487
/// See UAX #44:
1488
/// <https://www.unicode.org/reports/tr44/#Indic_Conjunct_Break>.
1489
///
1490
/// # Example
1491
///
1492
/// ```
1493
/// use icu::properties::{props::IndicConjunctBreak, CodePointMapData};
1494
///
1495
/// assert_eq!(
1496
///     CodePointMapData::<IndicConjunctBreak>::new().get('a'),
1497
///     IndicConjunctBreak::None
1498
/// );
1499
/// assert_eq!(
1500
///     CodePointMapData::<IndicConjunctBreak>::new().get('\u{094d}'),
1501
///     IndicConjunctBreak::Linker
1502
/// );
1503
/// assert_eq!(
1504
///     CodePointMapData::<IndicConjunctBreak>::new().get('\u{0915}'),
1505
///     IndicConjunctBreak::Consonant
1506
/// );
1507
/// assert_eq!(
1508
///     CodePointMapData::<IndicConjunctBreak>::new().get('\u{0300}'),
1509
///     IndicConjunctBreak::Extend
1510
/// );
1511
/// ```
1512
#[doc(hidden)] // draft API in ICU4C
1513
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1514
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1515
#[allow(clippy::exhaustive_structs)] // newtype
1516
#[repr(transparent)]
1517
pub struct IndicConjunctBreak(pub(crate) u8);
1518
1519
impl IndicConjunctBreak {
1520
    /// Returns an ICU4C `UIndicConjunctBreak` value.
1521
0
    pub const fn to_icu4c_value(self) -> u8 {
1522
0
        self.0
1523
0
    }
1524
    /// Constructor from an ICU4C `UIndicConjunctBreak` value.
1525
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1526
0
        Self(value)
1527
0
    }
1528
}
1529
1530
create_const_array! {
1531
#[doc(hidden)] // draft API in ICU4C
1532
#[allow(non_upper_case_globals)]
1533
impl IndicConjunctBreak {
1534
    pub const None: IndicConjunctBreak = IndicConjunctBreak(0);
1535
    pub const Consonant: IndicConjunctBreak = IndicConjunctBreak(1);
1536
    pub const Extend: IndicConjunctBreak = IndicConjunctBreak(2);
1537
    pub const Linker: IndicConjunctBreak = IndicConjunctBreak(3);
1538
}
1539
}
1540
1541
make_enumerated_property! {
1542
    name: "Indic_Conjunct_Break";
1543
    short_name: "InCB";
1544
    ident: IndicConjunctBreak;
1545
    data_marker: crate::provider::PropertyEnumIndicConjunctBreakV1;
1546
    singleton: SINGLETON_PROPERTY_ENUM_INDIC_CONJUNCT_BREAK_V1;
1547
    ule_ty: u8;
1548
}
1549
1550
/// Property Indic_Syllabic_Category.
1551
/// See UAX #44:
1552
/// <https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category>.
1553
///
1554
/// # Example
1555
///
1556
/// ```
1557
/// use icu::properties::{props::IndicSyllabicCategory, CodePointMapData};
1558
///
1559
/// assert_eq!(
1560
///     CodePointMapData::<IndicSyllabicCategory>::new().get('a'),
1561
///     IndicSyllabicCategory::Other
1562
/// );
1563
/// assert_eq!(
1564
///     CodePointMapData::<IndicSyllabicCategory>::new().get('\u{0900}'),
1565
///     IndicSyllabicCategory::Bindu
1566
/// ); // U+0900: DEVANAGARI SIGN INVERTED CANDRABINDU
1567
/// ```
1568
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1569
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1570
#[allow(clippy::exhaustive_structs)] // newtype
1571
#[repr(transparent)]
1572
pub struct IndicSyllabicCategory(pub(crate) u8);
1573
1574
impl IndicSyllabicCategory {
1575
    /// Returns an ICU4C `UIndicSyllabicCategory` value.
1576
0
    pub const fn to_icu4c_value(self) -> u8 {
1577
0
        self.0
1578
0
    }
1579
    /// Constructor from an ICU4C `UIndicSyllabicCategory` value.
1580
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1581
0
        Self(value)
1582
0
    }
1583
}
1584
1585
create_const_array! {
1586
#[allow(missing_docs)] // These constants don't need individual documentation.
1587
#[allow(non_upper_case_globals)]
1588
impl IndicSyllabicCategory {
1589
    pub const Other: IndicSyllabicCategory = IndicSyllabicCategory(0);
1590
    pub const Avagraha: IndicSyllabicCategory = IndicSyllabicCategory(1);
1591
    pub const Bindu: IndicSyllabicCategory = IndicSyllabicCategory(2);
1592
    pub const BrahmiJoiningNumber: IndicSyllabicCategory = IndicSyllabicCategory(3);
1593
    pub const CantillationMark: IndicSyllabicCategory = IndicSyllabicCategory(4);
1594
    pub const Consonant: IndicSyllabicCategory = IndicSyllabicCategory(5);
1595
    pub const ConsonantDead: IndicSyllabicCategory = IndicSyllabicCategory(6);
1596
    pub const ConsonantFinal: IndicSyllabicCategory = IndicSyllabicCategory(7);
1597
    pub const ConsonantHeadLetter: IndicSyllabicCategory = IndicSyllabicCategory(8);
1598
    pub const ConsonantInitialPostfixed: IndicSyllabicCategory = IndicSyllabicCategory(9);
1599
    pub const ConsonantKiller: IndicSyllabicCategory = IndicSyllabicCategory(10);
1600
    pub const ConsonantMedial: IndicSyllabicCategory = IndicSyllabicCategory(11);
1601
    pub const ConsonantPlaceholder: IndicSyllabicCategory = IndicSyllabicCategory(12);
1602
    pub const ConsonantPrecedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(13);
1603
    pub const ConsonantPrefixed: IndicSyllabicCategory = IndicSyllabicCategory(14);
1604
    pub const ConsonantSucceedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(15);
1605
    pub const ConsonantSubjoined: IndicSyllabicCategory = IndicSyllabicCategory(16);
1606
    pub const ConsonantWithStacker: IndicSyllabicCategory = IndicSyllabicCategory(17);
1607
    pub const GeminationMark: IndicSyllabicCategory = IndicSyllabicCategory(18);
1608
    pub const InvisibleStacker: IndicSyllabicCategory = IndicSyllabicCategory(19);
1609
    pub const Joiner: IndicSyllabicCategory = IndicSyllabicCategory(20);
1610
    pub const ModifyingLetter: IndicSyllabicCategory = IndicSyllabicCategory(21);
1611
    pub const NonJoiner: IndicSyllabicCategory = IndicSyllabicCategory(22);
1612
    pub const Nukta: IndicSyllabicCategory = IndicSyllabicCategory(23);
1613
    pub const Number: IndicSyllabicCategory = IndicSyllabicCategory(24);
1614
    pub const NumberJoiner: IndicSyllabicCategory = IndicSyllabicCategory(25);
1615
    pub const PureKiller: IndicSyllabicCategory = IndicSyllabicCategory(26);
1616
    pub const RegisterShifter: IndicSyllabicCategory = IndicSyllabicCategory(27);
1617
    pub const SyllableModifier: IndicSyllabicCategory = IndicSyllabicCategory(28);
1618
    pub const ToneLetter: IndicSyllabicCategory = IndicSyllabicCategory(29);
1619
    pub const ToneMark: IndicSyllabicCategory = IndicSyllabicCategory(30);
1620
    pub const Virama: IndicSyllabicCategory = IndicSyllabicCategory(31);
1621
    pub const Visarga: IndicSyllabicCategory = IndicSyllabicCategory(32);
1622
    pub const Vowel: IndicSyllabicCategory = IndicSyllabicCategory(33);
1623
    pub const VowelDependent: IndicSyllabicCategory = IndicSyllabicCategory(34);
1624
    pub const VowelIndependent: IndicSyllabicCategory = IndicSyllabicCategory(35);
1625
    pub const ReorderingKiller: IndicSyllabicCategory = IndicSyllabicCategory(36);
1626
}
1627
}
1628
1629
make_enumerated_property! {
1630
    name: "Indic_Syllabic_Category";
1631
    short_name: "InSC";
1632
    ident: IndicSyllabicCategory;
1633
    data_marker: crate::provider::PropertyEnumIndicSyllabicCategoryV1;
1634
    singleton: SINGLETON_PROPERTY_ENUM_INDIC_SYLLABIC_CATEGORY_V1;
1635
    ule_ty: u8;
1636
}
1637
1638
/// Enumerated property Joining_Type.
1639
///
1640
/// See Section 9.2, Arabic Cursive Joining in The Unicode Standard for the summary of
1641
/// each property value.
1642
///
1643
/// # Example
1644
///
1645
/// ```
1646
/// use icu::properties::{props::JoiningType, CodePointMapData};
1647
///
1648
/// assert_eq!(
1649
///     CodePointMapData::<JoiningType>::new().get('ؠ'),
1650
///     JoiningType::DualJoining
1651
/// ); // U+0620: Arabic Letter Kashmiri Yeh
1652
/// assert_eq!(
1653
///     CodePointMapData::<JoiningType>::new().get('𐫍'),
1654
///     JoiningType::LeftJoining
1655
/// ); // U+10ACD: Manichaean Letter Heth
1656
/// ```
1657
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1658
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1659
#[allow(clippy::exhaustive_structs)] // newtype
1660
#[repr(transparent)]
1661
pub struct JoiningType(pub(crate) u8);
1662
1663
impl JoiningType {
1664
    /// Returns an ICU4C `UJoiningType` value.
1665
417k
    pub const fn to_icu4c_value(self) -> u8 {
1666
417k
        self.0
1667
417k
    }
1668
    /// Constructor from an ICU4C `UJoiningType` value.
1669
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1670
0
        Self(value)
1671
0
    }
1672
}
1673
1674
create_const_array! {
1675
#[allow(missing_docs)] // These constants don't need individual documentation.
1676
#[allow(non_upper_case_globals)]
1677
impl JoiningType {
1678
    pub const NonJoining: JoiningType = JoiningType(0); // name="U"
1679
    pub const JoinCausing: JoiningType = JoiningType(1); // name="C"
1680
    pub const DualJoining: JoiningType = JoiningType(2); // name="D"
1681
    pub const LeftJoining: JoiningType = JoiningType(3); // name="L"
1682
    pub const RightJoining: JoiningType = JoiningType(4); // name="R"
1683
    pub const Transparent: JoiningType = JoiningType(5); // name="T"
1684
}
1685
}
1686
1687
make_enumerated_property! {
1688
    name: "Joining_Type";
1689
    short_name: "jt";
1690
    ident: JoiningType;
1691
    data_marker: crate::provider::PropertyEnumJoiningTypeV1;
1692
    singleton: SINGLETON_PROPERTY_ENUM_JOINING_TYPE_V1;
1693
    ule_ty: u8;
1694
}
1695
1696
/// Property Vertical_Orientation
1697
///
1698
/// See UTR #50:
1699
/// <https://www.unicode.org/reports/tr50/#vo>
1700
///
1701
/// # Example
1702
///
1703
/// ```
1704
/// use icu::properties::{props::VerticalOrientation, CodePointMapData};
1705
///
1706
/// assert_eq!(
1707
///     CodePointMapData::<VerticalOrientation>::new().get('a'),
1708
///     VerticalOrientation::Rotated
1709
/// );
1710
/// assert_eq!(
1711
///     CodePointMapData::<VerticalOrientation>::new().get('§'),
1712
///     VerticalOrientation::Upright
1713
/// );
1714
/// assert_eq!(
1715
///     CodePointMapData::<VerticalOrientation>::new().get32(0x2329),
1716
///     VerticalOrientation::TransformedRotated
1717
/// );
1718
/// assert_eq!(
1719
///     CodePointMapData::<VerticalOrientation>::new().get32(0x3001),
1720
///     VerticalOrientation::TransformedUpright
1721
/// );
1722
/// ```
1723
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1724
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1725
#[allow(clippy::exhaustive_structs)] // newtype
1726
#[repr(transparent)]
1727
pub struct VerticalOrientation(pub(crate) u8);
1728
1729
impl VerticalOrientation {
1730
    /// Returns an ICU4C `UVerticalOrientation` value.
1731
0
    pub const fn to_icu4c_value(self) -> u8 {
1732
0
        self.0
1733
0
    }
1734
    /// Constructor from an ICU4C `UVerticalOrientation` value.
1735
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1736
0
        Self(value)
1737
0
    }
1738
}
1739
1740
create_const_array! {
1741
#[allow(missing_docs)] // These constants don't need individual documentation.
1742
#[allow(non_upper_case_globals)]
1743
impl VerticalOrientation {
1744
    pub const Rotated: VerticalOrientation = VerticalOrientation(0); // name="R"
1745
    pub const TransformedRotated: VerticalOrientation = VerticalOrientation(1); // name="Tr"
1746
    pub const TransformedUpright: VerticalOrientation = VerticalOrientation(2); // name="Tu"
1747
    pub const Upright: VerticalOrientation = VerticalOrientation(3); // name="U"
1748
}
1749
}
1750
1751
make_enumerated_property! {
1752
    name: "Vertical_Orientation";
1753
    short_name: "vo";
1754
    ident: VerticalOrientation;
1755
    data_marker: crate::provider::PropertyEnumVerticalOrientationV1;
1756
    singleton: SINGLETON_PROPERTY_ENUM_VERTICAL_ORIENTATION_V1;
1757
    ule_ty: u8;
1758
}
1759
1760
pub use crate::code_point_set::BinaryProperty;
1761
1762
macro_rules! make_binary_property {
1763
    (
1764
        name: $name:literal;
1765
        short_name: $short_name:literal;
1766
        ident: $ident:ident;
1767
        data_marker: $data_marker:ty;
1768
        singleton: $singleton:ident;
1769
            $(#[$doc:meta])+
1770
    ) => {
1771
        $(#[$doc])+
1772
        #[derive(Debug)]
1773
        #[non_exhaustive]
1774
        pub struct $ident;
1775
1776
        impl crate::private::Sealed for $ident {}
1777
1778
        impl BinaryProperty for $ident {
1779
        type DataMarker = $data_marker;
1780
            #[cfg(feature = "compiled_data")]
1781
            const SINGLETON: &'static crate::provider::PropertyCodePointSet<'static> =
1782
                &crate::provider::Baked::$singleton;
1783
            const NAME: &'static [u8] = $name.as_bytes();
1784
            const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
1785
        }
1786
    };
1787
}
1788
1789
make_binary_property! {
1790
    name: "ASCII_Hex_Digit";
1791
    short_name: "AHex";
1792
    ident: AsciiHexDigit;
1793
    data_marker: crate::provider::PropertyBinaryAsciiHexDigitV1;
1794
    singleton: SINGLETON_PROPERTY_BINARY_ASCII_HEX_DIGIT_V1;
1795
    /// ASCII characters commonly used for the representation of hexadecimal numbers.
1796
    ///
1797
    /// # Example
1798
    ///
1799
    /// ```
1800
    /// use icu::properties::CodePointSetData;
1801
    /// use icu::properties::props::AsciiHexDigit;
1802
    ///
1803
    /// let ascii_hex_digit = CodePointSetData::new::<AsciiHexDigit>();
1804
    ///
1805
    /// assert!(ascii_hex_digit.contains('3'));
1806
    /// assert!(!ascii_hex_digit.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
1807
    /// assert!(ascii_hex_digit.contains('A'));
1808
    /// assert!(!ascii_hex_digit.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1809
    /// ```
1810
}
1811
1812
make_binary_property! {
1813
    name: "Alnum";
1814
    short_name: "Alnum";
1815
    ident: Alnum;
1816
    data_marker: crate::provider::PropertyBinaryAlnumV1;
1817
    singleton: SINGLETON_PROPERTY_BINARY_ALNUM_V1;
1818
    /// Characters with the `Alphabetic` or `Decimal_Number` property.
1819
    ///
1820
    /// This is defined for POSIX compatibility.
1821
}
1822
1823
make_binary_property! {
1824
    name: "Alphabetic";
1825
    short_name: "Alpha";
1826
    ident: Alphabetic;
1827
    data_marker: crate::provider::PropertyBinaryAlphabeticV1;
1828
    singleton: SINGLETON_PROPERTY_BINARY_ALPHABETIC_V1;
1829
    /// Alphabetic characters.
1830
    ///
1831
    /// # Example
1832
    ///
1833
    /// ```
1834
    /// use icu::properties::CodePointSetData;
1835
    /// use icu::properties::props::Alphabetic;
1836
    ///
1837
    /// let alphabetic = CodePointSetData::new::<Alphabetic>();
1838
    ///
1839
    /// assert!(!alphabetic.contains('3'));
1840
    /// assert!(!alphabetic.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
1841
    /// assert!(alphabetic.contains('A'));
1842
    /// assert!(alphabetic.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1843
    /// ```
1844
1845
}
1846
1847
make_binary_property! {
1848
    name: "Bidi_Control";
1849
    short_name: "Bidi_C";
1850
    ident: BidiControl;
1851
    data_marker: crate::provider::PropertyBinaryBidiControlV1;
1852
    singleton: SINGLETON_PROPERTY_BINARY_BIDI_CONTROL_V1;
1853
    /// Format control characters which have specific functions in the Unicode Bidirectional
1854
    /// Algorithm.
1855
    ///
1856
    /// # Example
1857
    ///
1858
    /// ```
1859
    /// use icu::properties::CodePointSetData;
1860
    /// use icu::properties::props::BidiControl;
1861
    ///
1862
    /// let bidi_control = CodePointSetData::new::<BidiControl>();
1863
    ///
1864
    /// assert!(bidi_control.contains('\u{200F}'));  // RIGHT-TO-LEFT MARK
1865
    /// assert!(!bidi_control.contains('ش'));  // U+0634 ARABIC LETTER SHEEN
1866
    /// ```
1867
1868
}
1869
1870
make_binary_property! {
1871
    name: "Bidi_Mirrored";
1872
    short_name: "Bidi_M";
1873
    ident: BidiMirrored;
1874
    data_marker: crate::provider::PropertyBinaryBidiMirroredV1;
1875
    singleton: SINGLETON_PROPERTY_BINARY_BIDI_MIRRORED_V1;
1876
    /// Characters that are mirrored in bidirectional text.
1877
    ///
1878
    /// # Example
1879
    ///
1880
    /// ```
1881
    /// use icu::properties::CodePointSetData;
1882
    /// use icu::properties::props::BidiMirrored;
1883
    ///
1884
    /// let bidi_mirrored = CodePointSetData::new::<BidiMirrored>();
1885
    ///
1886
    /// assert!(bidi_mirrored.contains('['));
1887
    /// assert!(bidi_mirrored.contains(']'));
1888
    /// assert!(bidi_mirrored.contains('∑'));  // U+2211 N-ARY SUMMATION
1889
    /// assert!(!bidi_mirrored.contains('ཉ'));  // U+0F49 TIBETAN LETTER NYA
1890
    /// ```
1891
1892
}
1893
1894
make_binary_property! {
1895
    name: "Blank";
1896
    short_name: "Blank";
1897
    ident: Blank;
1898
    data_marker: crate::provider::PropertyBinaryBlankV1;
1899
    singleton: SINGLETON_PROPERTY_BINARY_BLANK_V1;
1900
    /// Horizontal whitespace characters
1901
1902
}
1903
1904
make_binary_property! {
1905
    name: "Cased";
1906
    short_name: "Cased";
1907
    ident: Cased;
1908
    data_marker: crate::provider::PropertyBinaryCasedV1;
1909
    singleton: SINGLETON_PROPERTY_BINARY_CASED_V1;
1910
    /// Uppercase, lowercase, and titlecase characters.
1911
    ///
1912
    /// # Example
1913
    ///
1914
    /// ```
1915
    /// use icu::properties::CodePointSetData;
1916
    /// use icu::properties::props::Cased;
1917
    ///
1918
    /// let cased = CodePointSetData::new::<Cased>();
1919
    ///
1920
    /// assert!(cased.contains('Ꙡ'));  // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE
1921
    /// assert!(!cased.contains('ދ'));  // U+078B THAANA LETTER DHAALU
1922
    /// ```
1923
1924
}
1925
1926
make_binary_property! {
1927
    name: "Case_Ignorable";
1928
    short_name: "CI";
1929
    ident: CaseIgnorable;
1930
    data_marker: crate::provider::PropertyBinaryCaseIgnorableV1;
1931
    singleton: SINGLETON_PROPERTY_BINARY_CASE_IGNORABLE_V1;
1932
    /// Characters which are ignored for casing purposes.
1933
    ///
1934
    /// # Example
1935
    ///
1936
    /// ```
1937
    /// use icu::properties::CodePointSetData;
1938
    /// use icu::properties::props::CaseIgnorable;
1939
    ///
1940
    /// let case_ignorable = CodePointSetData::new::<CaseIgnorable>();
1941
    ///
1942
    /// assert!(case_ignorable.contains(':'));
1943
    /// assert!(!case_ignorable.contains('λ'));  // U+03BB GREEK SMALL LETTER LAMBDA
1944
    /// ```
1945
1946
}
1947
1948
make_binary_property! {
1949
    name: "Full_Composition_Exclusion";
1950
    short_name: "Comp_Ex";
1951
    ident: FullCompositionExclusion;
1952
    data_marker: crate::provider::PropertyBinaryFullCompositionExclusionV1;
1953
    singleton: SINGLETON_PROPERTY_BINARY_FULL_COMPOSITION_EXCLUSION_V1;
1954
    /// Characters that are excluded from composition.
1955
    ///
1956
    /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt>
1957
1958
}
1959
1960
make_binary_property! {
1961
    name: "Changes_When_Casefolded";
1962
    short_name: "CWCF";
1963
    ident: ChangesWhenCasefolded;
1964
    data_marker: crate::provider::PropertyBinaryChangesWhenCasefoldedV1;
1965
    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEFOLDED_V1;
1966
    /// Characters whose normalized forms are not stable under case folding.
1967
    ///
1968
    /// # Example
1969
    ///
1970
    /// ```
1971
    /// use icu::properties::CodePointSetData;
1972
    /// use icu::properties::props::ChangesWhenCasefolded;
1973
    ///
1974
    /// let changes_when_casefolded = CodePointSetData::new::<ChangesWhenCasefolded>();
1975
    ///
1976
    /// assert!(changes_when_casefolded.contains('ß'));  // U+00DF LATIN SMALL LETTER SHARP S
1977
    /// assert!(!changes_when_casefolded.contains('ᜉ'));  // U+1709 TAGALOG LETTER PA
1978
    /// ```
1979
1980
}
1981
1982
make_binary_property! {
1983
    name: "Changes_When_Casemapped";
1984
    short_name: "CWCM";
1985
    ident: ChangesWhenCasemapped;
1986
    data_marker: crate::provider::PropertyBinaryChangesWhenCasemappedV1;
1987
    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEMAPPED_V1;
1988
    /// Characters which may change when they undergo case mapping.
1989
1990
}
1991
1992
make_binary_property! {
1993
    name: "Changes_When_NFKC_Casefolded";
1994
    short_name: "CWKCF";
1995
    ident: ChangesWhenNfkcCasefolded;
1996
    data_marker: crate::provider::PropertyBinaryChangesWhenNfkcCasefoldedV1;
1997
    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_NFKC_CASEFOLDED_V1;
1998
    /// Characters which are not identical to their `NFKC_Casefold` mapping.
1999
    ///
2000
    /// # Example
2001
    ///
2002
    /// ```
2003
    /// use icu::properties::CodePointSetData;
2004
    /// use icu::properties::props::ChangesWhenNfkcCasefolded;
2005
    ///
2006
    /// let changes_when_nfkc_casefolded = CodePointSetData::new::<ChangesWhenNfkcCasefolded>();
2007
    ///
2008
    /// assert!(changes_when_nfkc_casefolded.contains('🄵'));  // U+1F135 SQUARED LATIN CAPITAL LETTER F
2009
    /// assert!(!changes_when_nfkc_casefolded.contains('f'));
2010
    /// ```
2011
2012
}
2013
2014
make_binary_property! {
2015
    name: "Changes_When_Lowercased";
2016
    short_name: "CWL";
2017
    ident: ChangesWhenLowercased;
2018
    data_marker: crate::provider::PropertyBinaryChangesWhenLowercasedV1;
2019
    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_LOWERCASED_V1;
2020
    /// Characters whose normalized forms are not stable under a `toLowercase` mapping.
2021
    ///
2022
    /// # Example
2023
    ///
2024
    /// ```
2025
    /// use icu::properties::CodePointSetData;
2026
    /// use icu::properties::props::ChangesWhenLowercased;
2027
    ///
2028
    /// let changes_when_lowercased = CodePointSetData::new::<ChangesWhenLowercased>();
2029
    ///
2030
    /// assert!(changes_when_lowercased.contains('Ⴔ'));  // U+10B4 GEORGIAN CAPITAL LETTER PHAR
2031
    /// assert!(!changes_when_lowercased.contains('ფ'));  // U+10E4 GEORGIAN LETTER PHAR
2032
    /// ```
2033
2034
}
2035
2036
make_binary_property! {
2037
    name: "Changes_When_Titlecased";
2038
    short_name: "CWT";
2039
    ident: ChangesWhenTitlecased;
2040
    data_marker: crate::provider::PropertyBinaryChangesWhenTitlecasedV1;
2041
    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_TITLECASED_V1;
2042
    /// Characters whose normalized forms are not stable under a `toTitlecase` mapping.
2043
    ///
2044
    /// # Example
2045
    ///
2046
    /// ```
2047
    /// use icu::properties::CodePointSetData;
2048
    /// use icu::properties::props::ChangesWhenTitlecased;
2049
    ///
2050
    /// let changes_when_titlecased = CodePointSetData::new::<ChangesWhenTitlecased>();
2051
    ///
2052
    /// assert!(changes_when_titlecased.contains('æ'));  // U+00E6 LATIN SMALL LETTER AE
2053
    /// assert!(!changes_when_titlecased.contains('Æ'));  // U+00E6 LATIN CAPITAL LETTER AE
2054
    /// ```
2055
2056
}
2057
2058
make_binary_property! {
2059
    name: "Changes_When_Uppercased";
2060
    short_name: "CWU";
2061
    ident: ChangesWhenUppercased;
2062
    data_marker: crate::provider::PropertyBinaryChangesWhenUppercasedV1;
2063
    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_UPPERCASED_V1;
2064
    /// Characters whose normalized forms are not stable under a `toUppercase` mapping.
2065
    ///
2066
    /// # Example
2067
    ///
2068
    /// ```
2069
    /// use icu::properties::CodePointSetData;
2070
    /// use icu::properties::props::ChangesWhenUppercased;
2071
    ///
2072
    /// let changes_when_uppercased = CodePointSetData::new::<ChangesWhenUppercased>();
2073
    ///
2074
    /// assert!(changes_when_uppercased.contains('ւ'));  // U+0582 ARMENIAN SMALL LETTER YIWN
2075
    /// assert!(!changes_when_uppercased.contains('Ւ'));  // U+0552 ARMENIAN CAPITAL LETTER YIWN
2076
    /// ```
2077
2078
}
2079
2080
make_binary_property! {
2081
    name: "Dash";
2082
    short_name: "Dash";
2083
    ident: Dash;
2084
    data_marker: crate::provider::PropertyBinaryDashV1;
2085
    singleton: SINGLETON_PROPERTY_BINARY_DASH_V1;
2086
    /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus
2087
    /// their compatibility equivalents.
2088
    ///
2089
    /// # Example
2090
    ///
2091
    /// ```
2092
    /// use icu::properties::CodePointSetData;
2093
    /// use icu::properties::props::Dash;
2094
    ///
2095
    /// let dash = CodePointSetData::new::<Dash>();
2096
    ///
2097
    /// assert!(dash.contains('⸺'));  // U+2E3A TWO-EM DASH
2098
    /// assert!(dash.contains('-'));  // U+002D
2099
    /// assert!(!dash.contains('='));  // U+003D
2100
    /// ```
2101
2102
}
2103
2104
make_binary_property! {
2105
    name: "Deprecated";
2106
    short_name: "Dep";
2107
    ident: Deprecated;
2108
    data_marker: crate::provider::PropertyBinaryDeprecatedV1;
2109
    singleton: SINGLETON_PROPERTY_BINARY_DEPRECATED_V1;
2110
    /// Deprecated characters.
2111
    ///
2112
    /// No characters will ever be removed from the standard, but the
2113
    /// usage of deprecated characters is strongly discouraged.
2114
    ///
2115
    /// # Example
2116
    ///
2117
    /// ```
2118
    /// use icu::properties::CodePointSetData;
2119
    /// use icu::properties::props::Deprecated;
2120
    ///
2121
    /// let deprecated = CodePointSetData::new::<Deprecated>();
2122
    ///
2123
    /// assert!(deprecated.contains('ឣ'));  // U+17A3 KHMER INDEPENDENT VOWEL QAQ
2124
    /// assert!(!deprecated.contains('A'));
2125
    /// ```
2126
2127
}
2128
2129
make_binary_property! {
2130
    name: "Default_Ignorable_Code_Point";
2131
    short_name: "DI";
2132
    ident: DefaultIgnorableCodePoint;
2133
    data_marker: crate::provider::PropertyBinaryDefaultIgnorableCodePointV1;
2134
    singleton: SINGLETON_PROPERTY_BINARY_DEFAULT_IGNORABLE_CODE_POINT_V1;
2135
    /// For programmatic determination of default ignorable code points.
2136
    ///
2137
    /// New characters that
2138
    /// should be ignored in rendering (unless explicitly supported) will be assigned in these
2139
    /// ranges, permitting programs to correctly handle the default rendering of such
2140
    /// characters when not otherwise supported.
2141
    ///
2142
    /// # Example
2143
    ///
2144
    /// ```
2145
    /// use icu::properties::CodePointSetData;
2146
    /// use icu::properties::props::DefaultIgnorableCodePoint;
2147
    ///
2148
    /// let default_ignorable_code_point = CodePointSetData::new::<DefaultIgnorableCodePoint>();
2149
    ///
2150
    /// assert!(default_ignorable_code_point.contains('\u{180B}'));  // MONGOLIAN FREE VARIATION SELECTOR ONE
2151
    /// assert!(!default_ignorable_code_point.contains('E'));
2152
    /// ```
2153
2154
}
2155
2156
make_binary_property! {
2157
    name: "Diacritic";
2158
    short_name: "Dia";
2159
    ident: Diacritic;
2160
    data_marker: crate::provider::PropertyBinaryDiacriticV1;
2161
    singleton: SINGLETON_PROPERTY_BINARY_DIACRITIC_V1;
2162
    /// Characters that linguistically modify the meaning of another character to which they apply.
2163
    ///
2164
    /// # Example
2165
    ///
2166
    /// ```
2167
    /// use icu::properties::CodePointSetData;
2168
    /// use icu::properties::props::Diacritic;
2169
    ///
2170
    /// let diacritic = CodePointSetData::new::<Diacritic>();
2171
    ///
2172
    /// assert!(diacritic.contains('\u{05B3}'));  // HEBREW POINT HATAF QAMATS
2173
    /// assert!(!diacritic.contains('א'));  // U+05D0 HEBREW LETTER ALEF
2174
    /// ```
2175
2176
}
2177
2178
make_binary_property! {
2179
    name: "Emoji_Modifier_Base";
2180
    short_name: "EBase";
2181
    ident: EmojiModifierBase;
2182
    data_marker: crate::provider::PropertyBinaryEmojiModifierBaseV1;
2183
    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_BASE_V1;
2184
    /// Characters that can serve as a base for emoji modifiers.
2185
    ///
2186
    /// # Example
2187
    ///
2188
    /// ```
2189
    /// use icu::properties::CodePointSetData;
2190
    /// use icu::properties::props::EmojiModifierBase;
2191
    ///
2192
    /// let emoji_modifier_base = CodePointSetData::new::<EmojiModifierBase>();
2193
    ///
2194
    /// assert!(emoji_modifier_base.contains('✊'));  // U+270A RAISED FIST
2195
    /// assert!(!emoji_modifier_base.contains('⛰'));  // U+26F0 MOUNTAIN
2196
    /// ```
2197
2198
}
2199
2200
make_binary_property! {
2201
    name: "Emoji_Component";
2202
    short_name: "EComp";
2203
    ident: EmojiComponent;
2204
    data_marker: crate::provider::PropertyBinaryEmojiComponentV1;
2205
    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_COMPONENT_V1;
2206
    /// Characters used in emoji sequences that normally do not appear on emoji keyboards as
2207
    /// separate choices, such as base characters for emoji keycaps.
2208
    ///
2209
    /// # Example
2210
    ///
2211
    /// ```
2212
    /// use icu::properties::CodePointSetData;
2213
    /// use icu::properties::props::EmojiComponent;
2214
    ///
2215
    /// let emoji_component = CodePointSetData::new::<EmojiComponent>();
2216
    ///
2217
    /// assert!(emoji_component.contains('🇹'));  // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2218
    /// assert!(emoji_component.contains('\u{20E3}'));  // COMBINING ENCLOSING KEYCAP
2219
    /// assert!(emoji_component.contains('7'));
2220
    /// assert!(!emoji_component.contains('T'));
2221
    /// ```
2222
2223
}
2224
2225
make_binary_property! {
2226
    name: "Emoji_Modifier";
2227
    short_name: "EMod";
2228
    ident: EmojiModifier;
2229
    data_marker: crate::provider::PropertyBinaryEmojiModifierV1;
2230
    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_V1;
2231
    /// Characters that are emoji modifiers.
2232
    ///
2233
    /// # Example
2234
    ///
2235
    /// ```
2236
    /// use icu::properties::CodePointSetData;
2237
    /// use icu::properties::props::EmojiModifier;
2238
    ///
2239
    /// let emoji_modifier = CodePointSetData::new::<EmojiModifier>();
2240
    ///
2241
    /// assert!(emoji_modifier.contains('\u{1F3FD}'));  // EMOJI MODIFIER FITZPATRICK TYPE-4
2242
    /// assert!(!emoji_modifier.contains('\u{200C}'));  // ZERO WIDTH NON-JOINER
2243
    /// ```
2244
2245
}
2246
2247
make_binary_property! {
2248
    name: "Emoji";
2249
    short_name: "Emoji";
2250
    ident: Emoji;
2251
    data_marker: crate::provider::PropertyBinaryEmojiV1;
2252
    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_V1;
2253
    /// Characters that are emoji.
2254
    ///
2255
    /// # Example
2256
    ///
2257
    /// ```
2258
    /// use icu::properties::CodePointSetData;
2259
    /// use icu::properties::props::Emoji;
2260
    ///
2261
    /// let emoji = CodePointSetData::new::<Emoji>();
2262
    ///
2263
    /// assert!(emoji.contains('🔥'));  // U+1F525 FIRE
2264
    /// assert!(!emoji.contains('V'));
2265
    /// ```
2266
2267
}
2268
2269
make_binary_property! {
2270
    name: "Emoji_Presentation";
2271
    short_name: "EPres";
2272
    ident: EmojiPresentation;
2273
    data_marker: crate::provider::PropertyBinaryEmojiPresentationV1;
2274
    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_PRESENTATION_V1;
2275
    /// Characters that have emoji presentation by default.
2276
    ///
2277
    /// # Example
2278
    ///
2279
    /// ```
2280
    /// use icu::properties::CodePointSetData;
2281
    /// use icu::properties::props::EmojiPresentation;
2282
    ///
2283
    /// let emoji_presentation = CodePointSetData::new::<EmojiPresentation>();
2284
    ///
2285
    /// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON
2286
    /// assert!(!emoji_presentation.contains('♻'));  // U+267B BLACK UNIVERSAL RECYCLING SYMBOL
2287
    /// ```
2288
2289
}
2290
2291
make_binary_property! {
2292
    name: "Extender";
2293
    short_name: "Ext";
2294
    ident: Extender;
2295
    data_marker: crate::provider::PropertyBinaryExtenderV1;
2296
    singleton: SINGLETON_PROPERTY_BINARY_EXTENDER_V1;
2297
    /// Characters whose principal function is to extend the value of a preceding alphabetic
2298
    /// character or to extend the shape of adjacent characters.
2299
    ///
2300
    /// # Example
2301
    ///
2302
    /// ```
2303
    /// use icu::properties::CodePointSetData;
2304
    /// use icu::properties::props::Extender;
2305
    ///
2306
    /// let extender = CodePointSetData::new::<Extender>();
2307
    ///
2308
    /// assert!(extender.contains('ヾ'));  // U+30FE KATAKANA VOICED ITERATION MARK
2309
    /// assert!(extender.contains('ー'));  // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK
2310
    /// assert!(!extender.contains('・'));  // U+30FB KATAKANA MIDDLE DOT
2311
    /// ```
2312
2313
}
2314
2315
make_binary_property! {
2316
    name: "Extended_Pictographic";
2317
    short_name: "ExtPict";
2318
    ident: ExtendedPictographic;
2319
    data_marker: crate::provider::PropertyBinaryExtendedPictographicV1;
2320
    singleton: SINGLETON_PROPERTY_BINARY_EXTENDED_PICTOGRAPHIC_V1;
2321
    /// Pictographic symbols, as well as reserved ranges in blocks largely associated with
2322
    /// emoji characters
2323
    ///
2324
    /// # Example
2325
    ///
2326
    /// ```
2327
    /// use icu::properties::CodePointSetData;
2328
    /// use icu::properties::props::ExtendedPictographic;
2329
    ///
2330
    /// let extended_pictographic = CodePointSetData::new::<ExtendedPictographic>();
2331
    ///
2332
    /// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
2333
    /// assert!(!extended_pictographic.contains('🇪'));  // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E
2334
    /// ```
2335
2336
}
2337
2338
make_binary_property! {
2339
    name: "Graph";
2340
    short_name: "Graph";
2341
    ident: Graph;
2342
    data_marker: crate::provider::PropertyBinaryGraphV1;
2343
    singleton: SINGLETON_PROPERTY_BINARY_GRAPH_V1;
2344
    /// Invisible characters.
2345
    ///
2346
    /// This is defined for POSIX compatibility.
2347
2348
}
2349
2350
make_binary_property! {
2351
    name: "Grapheme_Base";
2352
    short_name: "Gr_Base";
2353
    ident: GraphemeBase;
2354
    data_marker: crate::provider::PropertyBinaryGraphemeBaseV1;
2355
    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_BASE_V1;
2356
    /// Property used together with the definition of Standard Korean Syllable Block to define
2357
    /// "Grapheme base".
2358
    ///
2359
    /// See D58 in Chapter 3, Conformance in the Unicode Standard.
2360
    ///
2361
    /// # Example
2362
    ///
2363
    /// ```
2364
    /// use icu::properties::CodePointSetData;
2365
    /// use icu::properties::props::GraphemeBase;
2366
    ///
2367
    /// let grapheme_base = CodePointSetData::new::<GraphemeBase>();
2368
    ///
2369
    /// assert!(grapheme_base.contains('ക'));  // U+0D15 MALAYALAM LETTER KA
2370
    /// assert!(grapheme_base.contains('\u{0D3F}'));  // U+0D3F MALAYALAM VOWEL SIGN I
2371
    /// assert!(!grapheme_base.contains('\u{0D3E}'));  // U+0D3E MALAYALAM VOWEL SIGN AA
2372
    /// ```
2373
2374
}
2375
2376
make_binary_property! {
2377
    name: "Grapheme_Extend";
2378
    short_name: "Gr_Ext";
2379
    ident: GraphemeExtend;
2380
    data_marker: crate::provider::PropertyBinaryGraphemeExtendV1;
2381
    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_EXTEND_V1;
2382
    /// Property used to define "Grapheme extender".
2383
    ///
2384
    /// See D59 in Chapter 3, Conformance in the
2385
    /// Unicode Standard.
2386
    ///
2387
    /// # Example
2388
    ///
2389
    /// ```
2390
    /// use icu::properties::CodePointSetData;
2391
    /// use icu::properties::props::GraphemeExtend;
2392
    ///
2393
    /// let grapheme_extend = CodePointSetData::new::<GraphemeExtend>();
2394
    ///
2395
    /// assert!(!grapheme_extend.contains('ക'));  // U+0D15 MALAYALAM LETTER KA
2396
    /// assert!(!grapheme_extend.contains('\u{0D3F}'));  // U+0D3F MALAYALAM VOWEL SIGN I
2397
    /// assert!(grapheme_extend.contains('\u{0D3E}'));  // U+0D3E MALAYALAM VOWEL SIGN AA
2398
    /// ```
2399
2400
}
2401
2402
make_binary_property! {
2403
    name: "Grapheme_Link";
2404
    short_name: "Gr_Link";
2405
    ident: GraphemeLink;
2406
    data_marker: crate::provider::PropertyBinaryGraphemeLinkV1;
2407
    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_LINK_V1;
2408
    /// Deprecated property.
2409
    ///
2410
    /// Formerly proposed for programmatic determination of grapheme
2411
    /// cluster boundaries.
2412
}
2413
2414
make_binary_property! {
2415
    name: "Hex_Digit";
2416
    short_name: "Hex";
2417
    ident: HexDigit;
2418
    data_marker: crate::provider::PropertyBinaryHexDigitV1;
2419
    singleton: SINGLETON_PROPERTY_BINARY_HEX_DIGIT_V1;
2420
    /// Characters commonly used for the representation of hexadecimal numbers, plus their
2421
    /// compatibility equivalents.
2422
    ///
2423
    /// # Example
2424
    ///
2425
    /// ```
2426
    /// use icu::properties::CodePointSetData;
2427
    /// use icu::properties::props::HexDigit;
2428
    ///
2429
    /// let hex_digit = CodePointSetData::new::<HexDigit>();
2430
    ///
2431
    /// assert!(hex_digit.contains('0'));
2432
    /// assert!(!hex_digit.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
2433
    /// assert!(hex_digit.contains('f'));
2434
    /// assert!(hex_digit.contains('f'));  // U+FF46 FULLWIDTH LATIN SMALL LETTER F
2435
    /// assert!(hex_digit.contains('F'));  // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F
2436
    /// assert!(!hex_digit.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
2437
    /// ```
2438
}
2439
2440
make_binary_property! {
2441
    name: "Hyphen";
2442
    short_name: "Hyphen";
2443
    ident: Hyphen;
2444
    data_marker: crate::provider::PropertyBinaryHyphenV1;
2445
    singleton: SINGLETON_PROPERTY_BINARY_HYPHEN_V1;
2446
    /// Deprecated property.
2447
    ///
2448
    /// Dashes which are used to mark connections between pieces of
2449
    /// words, plus the Katakana middle dot.
2450
}
2451
2452
make_binary_property! {
2453
    name: "ID_Compat_Math_Continue";
2454
    short_name: "ID_Compat_Math_Continue";
2455
    ident: IdCompatMathContinue;
2456
    data_marker: crate::provider::PropertyBinaryIdCompatMathContinueV1;
2457
    singleton: SINGLETON_PROPERTY_BINARY_ID_COMPAT_MATH_CONTINUE_V1;
2458
    /// ID_Compat_Math_Continue Property
2459
}
2460
2461
make_binary_property! {
2462
    name: "ID_Compat_Math_Start";
2463
    short_name: "ID_Compat_Math_Start";
2464
    ident: IdCompatMathStart;
2465
    data_marker: crate::provider::PropertyBinaryIdCompatMathStartV1;
2466
    singleton: SINGLETON_PROPERTY_BINARY_ID_COMPAT_MATH_START_V1;
2467
    /// ID_Compat_Math_Start Property
2468
}
2469
2470
make_binary_property! {
2471
    name: "Id_Continue";
2472
    short_name: "IDC";
2473
    ident: IdContinue;
2474
    data_marker: crate::provider::PropertyBinaryIdContinueV1;
2475
    singleton: SINGLETON_PROPERTY_BINARY_ID_CONTINUE_V1;
2476
    /// Characters that can come after the first character in an identifier.
2477
    ///
2478
    /// If using NFKC to
2479
    /// fold differences between characters, use [`XidContinue`] instead.  See
2480
    /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2481
    /// more details.
2482
    ///
2483
    /// # Example
2484
    ///
2485
    /// ```
2486
    /// use icu::properties::CodePointSetData;
2487
    /// use icu::properties::props::IdContinue;
2488
    ///
2489
    /// let id_continue = CodePointSetData::new::<IdContinue>();
2490
    ///
2491
    /// assert!(id_continue.contains('x'));
2492
    /// assert!(id_continue.contains('1'));
2493
    /// assert!(id_continue.contains('_'));
2494
    /// assert!(id_continue.contains('ߝ'));  // U+07DD NKO LETTER FA
2495
    /// assert!(!id_continue.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
2496
    /// assert!(id_continue.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2497
    /// ```
2498
}
2499
2500
make_binary_property! {
2501
    name: "Ideographic";
2502
    short_name: "Ideo";
2503
    ident: Ideographic;
2504
    data_marker: crate::provider::PropertyBinaryIdeographicV1;
2505
    singleton: SINGLETON_PROPERTY_BINARY_IDEOGRAPHIC_V1;
2506
    /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese)
2507
    /// ideographs, or related siniform ideographs
2508
    ///
2509
    /// # Example
2510
    ///
2511
    /// ```
2512
    /// use icu::properties::CodePointSetData;
2513
    /// use icu::properties::props::Ideographic;
2514
    ///
2515
    /// let ideographic = CodePointSetData::new::<Ideographic>();
2516
    ///
2517
    /// assert!(ideographic.contains('川'));  // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
2518
    /// assert!(!ideographic.contains('밥'));  // U+BC25 HANGUL SYLLABLE BAB
2519
    /// ```
2520
}
2521
2522
make_binary_property! {
2523
    name: "Id_Start";
2524
    short_name: "IDS";
2525
    ident: IdStart;
2526
    data_marker: crate::provider::PropertyBinaryIdStartV1;
2527
    singleton: SINGLETON_PROPERTY_BINARY_ID_START_V1;
2528
    /// Characters that can begin an identifier.
2529
    ///
2530
    /// If using NFKC to fold differences between
2531
    /// characters, use [`XidStart`] instead.  See [`Unicode Standard Annex
2532
    /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
2533
    ///
2534
    /// # Example
2535
    ///
2536
    /// ```
2537
    /// use icu::properties::CodePointSetData;
2538
    /// use icu::properties::props::IdStart;
2539
    ///
2540
    /// let id_start = CodePointSetData::new::<IdStart>();
2541
    ///
2542
    /// assert!(id_start.contains('x'));
2543
    /// assert!(!id_start.contains('1'));
2544
    /// assert!(!id_start.contains('_'));
2545
    /// assert!(id_start.contains('ߝ'));  // U+07DD NKO LETTER FA
2546
    /// assert!(!id_start.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
2547
    /// assert!(id_start.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2548
    /// ```
2549
}
2550
2551
make_binary_property! {
2552
    name: "Ids_Binary_Operator";
2553
    short_name: "IDSB";
2554
    ident: IdsBinaryOperator;
2555
    data_marker: crate::provider::PropertyBinaryIdsBinaryOperatorV1;
2556
    singleton: SINGLETON_PROPERTY_BINARY_IDS_BINARY_OPERATOR_V1;
2557
    /// Characters used in Ideographic Description Sequences.
2558
    ///
2559
    /// # Example
2560
    ///
2561
    /// ```
2562
    /// use icu::properties::CodePointSetData;
2563
    /// use icu::properties::props::IdsBinaryOperator;
2564
    ///
2565
    /// let ids_binary_operator = CodePointSetData::new::<IdsBinaryOperator>();
2566
    ///
2567
    /// assert!(ids_binary_operator.contains('\u{2FF5}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2568
    /// assert!(!ids_binary_operator.contains('\u{3006}'));  // IDEOGRAPHIC CLOSING MARK
2569
    /// ```
2570
}
2571
2572
make_binary_property! {
2573
    name: "Ids_Trinary_Operator";
2574
    short_name: "IDST";
2575
    ident: IdsTrinaryOperator;
2576
    data_marker: crate::provider::PropertyBinaryIdsTrinaryOperatorV1;
2577
    singleton: SINGLETON_PROPERTY_BINARY_IDS_TRINARY_OPERATOR_V1;
2578
    /// Characters used in Ideographic Description Sequences.
2579
    ///
2580
    /// # Example
2581
    ///
2582
    /// ```
2583
    /// use icu::properties::CodePointSetData;
2584
    /// use icu::properties::props::IdsTrinaryOperator;
2585
    ///
2586
    /// let ids_trinary_operator = CodePointSetData::new::<IdsTrinaryOperator>();
2587
    ///
2588
    /// assert!(ids_trinary_operator.contains('\u{2FF2}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT
2589
    /// assert!(ids_trinary_operator.contains('\u{2FF3}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW
2590
    /// assert!(!ids_trinary_operator.contains('\u{2FF4}'));
2591
    /// assert!(!ids_trinary_operator.contains('\u{2FF5}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2592
    /// assert!(!ids_trinary_operator.contains('\u{3006}'));  // IDEOGRAPHIC CLOSING MARK
2593
    /// ```
2594
}
2595
2596
make_binary_property! {
2597
    name: "IDS_Unary_Operator";
2598
    short_name: "IDSU";
2599
    ident: IdsUnaryOperator;
2600
    data_marker: crate::provider::PropertyBinaryIdsUnaryOperatorV1;
2601
    singleton: SINGLETON_PROPERTY_BINARY_IDS_UNARY_OPERATOR_V1;
2602
    /// IDS_Unary_Operator Property
2603
}
2604
2605
make_binary_property! {
2606
    name: "Join_Control";
2607
    short_name: "Join_C";
2608
    ident: JoinControl;
2609
    data_marker: crate::provider::PropertyBinaryJoinControlV1;
2610
    singleton: SINGLETON_PROPERTY_BINARY_JOIN_CONTROL_V1;
2611
    /// Format control characters which have specific functions for control of cursive joining
2612
    /// and ligation.
2613
    ///
2614
    /// # Example
2615
    ///
2616
    /// ```
2617
    /// use icu::properties::CodePointSetData;
2618
    /// use icu::properties::props::JoinControl;
2619
    ///
2620
    /// let join_control = CodePointSetData::new::<JoinControl>();
2621
    ///
2622
    /// assert!(join_control.contains('\u{200C}'));  // ZERO WIDTH NON-JOINER
2623
    /// assert!(join_control.contains('\u{200D}'));  // ZERO WIDTH JOINER
2624
    /// assert!(!join_control.contains('\u{200E}'));
2625
    /// ```
2626
}
2627
2628
make_binary_property! {
2629
    name: "Logical_Order_Exception";
2630
    short_name: "LOE";
2631
    ident: LogicalOrderException;
2632
    data_marker: crate::provider::PropertyBinaryLogicalOrderExceptionV1;
2633
    singleton: SINGLETON_PROPERTY_BINARY_LOGICAL_ORDER_EXCEPTION_V1;
2634
    /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao.
2635
    ///
2636
    /// # Example
2637
    ///
2638
    /// ```
2639
    /// use icu::properties::CodePointSetData;
2640
    /// use icu::properties::props::LogicalOrderException;
2641
    ///
2642
    /// let logical_order_exception = CodePointSetData::new::<LogicalOrderException>();
2643
    ///
2644
    /// assert!(logical_order_exception.contains('ແ'));  // U+0EC1 LAO VOWEL SIGN EI
2645
    /// assert!(!logical_order_exception.contains('ະ'));  // U+0EB0 LAO VOWEL SIGN A
2646
    /// ```
2647
}
2648
2649
make_binary_property! {
2650
    name: "Lowercase";
2651
    short_name: "Lower";
2652
    ident: Lowercase;
2653
    data_marker: crate::provider::PropertyBinaryLowercaseV1;
2654
    singleton: SINGLETON_PROPERTY_BINARY_LOWERCASE_V1;
2655
    /// Lowercase characters.
2656
    ///
2657
    /// # Example
2658
    ///
2659
    /// ```
2660
    /// use icu::properties::CodePointSetData;
2661
    /// use icu::properties::props::Lowercase;
2662
    ///
2663
    /// let lowercase = CodePointSetData::new::<Lowercase>();
2664
    ///
2665
    /// assert!(lowercase.contains('a'));
2666
    /// assert!(!lowercase.contains('A'));
2667
    /// ```
2668
}
2669
2670
make_binary_property! {
2671
    name: "Math";
2672
    short_name: "Math";
2673
    ident: Math;
2674
    data_marker: crate::provider::PropertyBinaryMathV1;
2675
    singleton: SINGLETON_PROPERTY_BINARY_MATH_V1;
2676
    /// Characters used in mathematical notation.
2677
    ///
2678
    /// # Example
2679
    ///
2680
    /// ```
2681
    /// use icu::properties::CodePointSetData;
2682
    /// use icu::properties::props::Math;
2683
    ///
2684
    /// let math = CodePointSetData::new::<Math>();
2685
    ///
2686
    /// assert!(math.contains('='));
2687
    /// assert!(math.contains('+'));
2688
    /// assert!(!math.contains('-'));
2689
    /// assert!(math.contains('−'));  // U+2212 MINUS SIGN
2690
    /// assert!(!math.contains('/'));
2691
    /// assert!(math.contains('∕'));  // U+2215 DIVISION SLASH
2692
    /// ```
2693
}
2694
2695
make_binary_property! {
2696
    name: "Modifier_Combining_Mark";
2697
    short_name: "MCM";
2698
    ident: ModifierCombiningMark;
2699
    data_marker: crate::provider::PropertyBinaryModifierCombiningMarkV1;
2700
    singleton: SINGLETON_PROPERTY_BINARY_MODIFIER_COMBINING_MARK_V1;
2701
    /// Modifier_Combining_Mark Property
2702
}
2703
2704
make_binary_property! {
2705
    name: "Noncharacter_Code_Point";
2706
    short_name: "NChar";
2707
    ident: NoncharacterCodePoint;
2708
    data_marker: crate::provider::PropertyBinaryNoncharacterCodePointV1;
2709
    singleton: SINGLETON_PROPERTY_BINARY_NONCHARACTER_CODE_POINT_V1;
2710
    /// Code points permanently reserved for internal use.
2711
    ///
2712
    /// # Example
2713
    ///
2714
    /// ```
2715
    /// use icu::properties::CodePointSetData;
2716
    /// use icu::properties::props::NoncharacterCodePoint;
2717
    ///
2718
    /// let noncharacter_code_point = CodePointSetData::new::<NoncharacterCodePoint>();
2719
    ///
2720
    /// assert!(noncharacter_code_point.contains('\u{FDD0}'));
2721
    /// assert!(noncharacter_code_point.contains('\u{FFFF}'));
2722
    /// assert!(!noncharacter_code_point.contains('\u{10000}'));
2723
    /// ```
2724
}
2725
2726
make_binary_property! {
2727
    name: "NFC_Inert";
2728
    short_name: "NFC_Inert";
2729
    ident: NfcInert;
2730
    data_marker: crate::provider::PropertyBinaryNfcInertV1;
2731
    singleton: SINGLETON_PROPERTY_BINARY_NFC_INERT_V1;
2732
    /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters.
2733
}
2734
2735
make_binary_property! {
2736
    name: "NFD_Inert";
2737
    short_name: "NFD_Inert";
2738
    ident: NfdInert;
2739
    data_marker: crate::provider::PropertyBinaryNfdInertV1;
2740
    singleton: SINGLETON_PROPERTY_BINARY_NFD_INERT_V1;
2741
    /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters.
2742
}
2743
2744
make_binary_property! {
2745
    name: "NFKC_Inert";
2746
    short_name: "NFKC_Inert";
2747
    ident: NfkcInert;
2748
    data_marker: crate::provider::PropertyBinaryNfkcInertV1;
2749
    singleton: SINGLETON_PROPERTY_BINARY_NFKC_INERT_V1;
2750
    /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters.
2751
}
2752
2753
make_binary_property! {
2754
    name: "NFKD_Inert";
2755
    short_name: "NFKD_Inert";
2756
    ident: NfkdInert;
2757
    data_marker: crate::provider::PropertyBinaryNfkdInertV1;
2758
    singleton: SINGLETON_PROPERTY_BINARY_NFKD_INERT_V1;
2759
    /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters.
2760
}
2761
2762
make_binary_property! {
2763
    name: "Pattern_Syntax";
2764
    short_name: "Pat_Syn";
2765
    ident: PatternSyntax;
2766
    data_marker: crate::provider::PropertyBinaryPatternSyntaxV1;
2767
    singleton: SINGLETON_PROPERTY_BINARY_PATTERN_SYNTAX_V1;
2768
    /// Characters used as syntax in patterns (such as regular expressions).
2769
    ///
2770
    /// See [`Unicode
2771
    /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
2772
    /// details.
2773
    ///
2774
    /// # Example
2775
    ///
2776
    /// ```
2777
    /// use icu::properties::CodePointSetData;
2778
    /// use icu::properties::props::PatternSyntax;
2779
    ///
2780
    /// let pattern_syntax = CodePointSetData::new::<PatternSyntax>();
2781
    ///
2782
    /// assert!(pattern_syntax.contains('{'));
2783
    /// assert!(pattern_syntax.contains('⇒'));  // U+21D2 RIGHTWARDS DOUBLE ARROW
2784
    /// assert!(!pattern_syntax.contains('0'));
2785
    /// ```
2786
}
2787
2788
make_binary_property! {
2789
    name: "Pattern_White_Space";
2790
    short_name: "Pat_WS";
2791
    ident: PatternWhiteSpace;
2792
    data_marker: crate::provider::PropertyBinaryPatternWhiteSpaceV1;
2793
    singleton: SINGLETON_PROPERTY_BINARY_PATTERN_WHITE_SPACE_V1;
2794
    /// Characters used as whitespace in patterns (such as regular expressions).
2795
    ///
2796
    /// See
2797
    /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2798
    /// more details.
2799
    ///
2800
    /// # Example
2801
    ///
2802
    /// ```
2803
    /// use icu::properties::CodePointSetData;
2804
    /// use icu::properties::props::PatternWhiteSpace;
2805
    ///
2806
    /// let pattern_white_space = CodePointSetData::new::<PatternWhiteSpace>();
2807
    ///
2808
    /// assert!(pattern_white_space.contains(' '));
2809
    /// assert!(pattern_white_space.contains('\u{2029}'));  // PARAGRAPH SEPARATOR
2810
    /// assert!(pattern_white_space.contains('\u{000A}'));  // NEW LINE
2811
    /// assert!(!pattern_white_space.contains('\u{00A0}'));  // NO-BREAK SPACE
2812
    /// ```
2813
}
2814
2815
make_binary_property! {
2816
    name: "Prepended_Concatenation_Mark";
2817
    short_name: "PCM";
2818
    ident: PrependedConcatenationMark;
2819
    data_marker: crate::provider::PropertyBinaryPrependedConcatenationMarkV1;
2820
    singleton: SINGLETON_PROPERTY_BINARY_PREPENDED_CONCATENATION_MARK_V1;
2821
    /// A small class of visible format controls, which precede and then span a sequence of
2822
    /// other characters, usually digits.
2823
}
2824
2825
make_binary_property! {
2826
    name: "Print";
2827
    short_name: "Print";
2828
    ident: Print;
2829
    data_marker: crate::provider::PropertyBinaryPrintV1;
2830
    singleton: SINGLETON_PROPERTY_BINARY_PRINT_V1;
2831
    /// Printable characters (visible characters and whitespace).
2832
    ///
2833
    /// This is defined for POSIX compatibility.
2834
}
2835
2836
make_binary_property! {
2837
    name: "Quotation_Mark";
2838
    short_name: "QMark";
2839
    ident: QuotationMark;
2840
    data_marker: crate::provider::PropertyBinaryQuotationMarkV1;
2841
    singleton: SINGLETON_PROPERTY_BINARY_QUOTATION_MARK_V1;
2842
    /// Punctuation characters that function as quotation marks.
2843
    ///
2844
    /// # Example
2845
    ///
2846
    /// ```
2847
    /// use icu::properties::CodePointSetData;
2848
    /// use icu::properties::props::QuotationMark;
2849
    ///
2850
    /// let quotation_mark = CodePointSetData::new::<QuotationMark>();
2851
    ///
2852
    /// assert!(quotation_mark.contains('\''));
2853
    /// assert!(quotation_mark.contains('„'));  // U+201E DOUBLE LOW-9 QUOTATION MARK
2854
    /// assert!(!quotation_mark.contains('<'));
2855
    /// ```
2856
}
2857
2858
make_binary_property! {
2859
    name: "Radical";
2860
    short_name: "Radical";
2861
    ident: Radical;
2862
    data_marker: crate::provider::PropertyBinaryRadicalV1;
2863
    singleton: SINGLETON_PROPERTY_BINARY_RADICAL_V1;
2864
    /// Characters used in the definition of Ideographic Description Sequences.
2865
    ///
2866
    /// # Example
2867
    ///
2868
    /// ```
2869
    /// use icu::properties::CodePointSetData;
2870
    /// use icu::properties::props::Radical;
2871
    ///
2872
    /// let radical = CodePointSetData::new::<Radical>();
2873
    ///
2874
    /// assert!(radical.contains('⺆'));  // U+2E86 CJK RADICAL BOX
2875
    /// assert!(!radical.contains('丹'));  // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E
2876
    /// ```
2877
}
2878
2879
make_binary_property! {
2880
    name: "Regional_Indicator";
2881
    short_name: "RI";
2882
    ident: RegionalIndicator;
2883
    data_marker: crate::provider::PropertyBinaryRegionalIndicatorV1;
2884
    singleton: SINGLETON_PROPERTY_BINARY_REGIONAL_INDICATOR_V1;
2885
    /// Regional indicator characters, `U+1F1E6..U+1F1FF`.
2886
    ///
2887
    /// # Example
2888
    ///
2889
    /// ```
2890
    /// use icu::properties::CodePointSetData;
2891
    /// use icu::properties::props::RegionalIndicator;
2892
    ///
2893
    /// let regional_indicator = CodePointSetData::new::<RegionalIndicator>();
2894
    ///
2895
    /// assert!(regional_indicator.contains('🇹'));  // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2896
    /// assert!(!regional_indicator.contains('Ⓣ'));  // U+24C9 CIRCLED LATIN CAPITAL LETTER T
2897
    /// assert!(!regional_indicator.contains('T'));
2898
    /// ```
2899
}
2900
2901
make_binary_property! {
2902
    name: "Soft_Dotted";
2903
    short_name: "SD";
2904
    ident: SoftDotted;
2905
    data_marker: crate::provider::PropertyBinarySoftDottedV1;
2906
    singleton: SINGLETON_PROPERTY_BINARY_SOFT_DOTTED_V1;
2907
    /// Characters with a "soft dot", like i or j.
2908
    ///
2909
    /// An accent placed on these characters causes
2910
    /// the dot to disappear.
2911
    ///
2912
    /// # Example
2913
    ///
2914
    /// ```
2915
    /// use icu::properties::CodePointSetData;
2916
    /// use icu::properties::props::SoftDotted;
2917
    ///
2918
    /// let soft_dotted = CodePointSetData::new::<SoftDotted>();
2919
    ///
2920
    /// assert!(soft_dotted.contains('і'));  //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
2921
    /// assert!(!soft_dotted.contains('ı'));  // U+0131 LATIN SMALL LETTER DOTLESS I
2922
    /// ```
2923
}
2924
2925
make_binary_property! {
2926
    name: "Segment_Starter";
2927
    short_name: "Segment_Starter";
2928
    ident: SegmentStarter;
2929
    data_marker: crate::provider::PropertyBinarySegmentStarterV1;
2930
    singleton: SINGLETON_PROPERTY_BINARY_SEGMENT_STARTER_V1;
2931
    /// Characters that are starters in terms of Unicode normalization and combining character
2932
    /// sequences.
2933
}
2934
2935
make_binary_property! {
2936
    name: "Case_Sensitive";
2937
    short_name: "Case_Sensitive";
2938
    ident: CaseSensitive;
2939
    data_marker: crate::provider::PropertyBinaryCaseSensitiveV1;
2940
    singleton: SINGLETON_PROPERTY_BINARY_CASE_SENSITIVE_V1;
2941
    /// Characters that are either the source of a case mapping or in the target of a case
2942
    /// mapping.
2943
}
2944
2945
make_binary_property! {
2946
    name: "Sentence_Terminal";
2947
    short_name: "STerm";
2948
    ident: SentenceTerminal;
2949
    data_marker: crate::provider::PropertyBinarySentenceTerminalV1;
2950
    singleton: SINGLETON_PROPERTY_BINARY_SENTENCE_TERMINAL_V1;
2951
    /// Punctuation characters that generally mark the end of sentences.
2952
    ///
2953
    /// # Example
2954
    ///
2955
    /// ```
2956
    /// use icu::properties::CodePointSetData;
2957
    /// use icu::properties::props::SentenceTerminal;
2958
    ///
2959
    /// let sentence_terminal = CodePointSetData::new::<SentenceTerminal>();
2960
    ///
2961
    /// assert!(sentence_terminal.contains('.'));
2962
    /// assert!(sentence_terminal.contains('?'));
2963
    /// assert!(sentence_terminal.contains('᪨'));  // U+1AA8 TAI THAM SIGN KAAN
2964
    /// assert!(!sentence_terminal.contains(','));
2965
    /// assert!(!sentence_terminal.contains('¿'));  // U+00BF INVERTED QUESTION MARK
2966
    /// ```
2967
}
2968
2969
make_binary_property! {
2970
    name: "Terminal_Punctuation";
2971
    short_name: "Term";
2972
    ident: TerminalPunctuation;
2973
    data_marker: crate::provider::PropertyBinaryTerminalPunctuationV1;
2974
    singleton: SINGLETON_PROPERTY_BINARY_TERMINAL_PUNCTUATION_V1;
2975
    /// Punctuation characters that generally mark the end of textual units.
2976
    ///
2977
    /// # Example
2978
    ///
2979
    /// ```
2980
    /// use icu::properties::CodePointSetData;
2981
    /// use icu::properties::props::TerminalPunctuation;
2982
    ///
2983
    /// let terminal_punctuation = CodePointSetData::new::<TerminalPunctuation>();
2984
    ///
2985
    /// assert!(terminal_punctuation.contains('.'));
2986
    /// assert!(terminal_punctuation.contains('?'));
2987
    /// assert!(terminal_punctuation.contains('᪨'));  // U+1AA8 TAI THAM SIGN KAAN
2988
    /// assert!(terminal_punctuation.contains(','));
2989
    /// assert!(!terminal_punctuation.contains('¿'));  // U+00BF INVERTED QUESTION MARK
2990
    /// ```
2991
}
2992
2993
make_binary_property! {
2994
    name: "Unified_Ideograph";
2995
    short_name: "UIdeo";
2996
    ident: UnifiedIdeograph;
2997
    data_marker: crate::provider::PropertyBinaryUnifiedIdeographV1;
2998
    singleton: SINGLETON_PROPERTY_BINARY_UNIFIED_IDEOGRAPH_V1;
2999
    /// A property which specifies the exact set of Unified CJK Ideographs in the standard.
3000
    ///
3001
    /// # Example
3002
    ///
3003
    /// ```
3004
    /// use icu::properties::CodePointSetData;
3005
    /// use icu::properties::props::UnifiedIdeograph;
3006
    ///
3007
    /// let unified_ideograph = CodePointSetData::new::<UnifiedIdeograph>();
3008
    ///
3009
    /// assert!(unified_ideograph.contains('川'));  // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
3010
    /// assert!(unified_ideograph.contains('木'));  // U+6728 CJK UNIFIED IDEOGRAPH-6728
3011
    /// assert!(!unified_ideograph.contains('𛅸'));  // U+1B178 NUSHU CHARACTER-1B178
3012
    /// ```
3013
}
3014
3015
make_binary_property! {
3016
    name: "Uppercase";
3017
    short_name: "Upper";
3018
    ident: Uppercase;
3019
    data_marker: crate::provider::PropertyBinaryUppercaseV1;
3020
    singleton: SINGLETON_PROPERTY_BINARY_UPPERCASE_V1;
3021
    /// Uppercase characters.
3022
    ///
3023
    /// # Example
3024
    ///
3025
    /// ```
3026
    /// use icu::properties::CodePointSetData;
3027
    /// use icu::properties::props::Uppercase;
3028
    ///
3029
    /// let uppercase = CodePointSetData::new::<Uppercase>();
3030
    ///
3031
    /// assert!(uppercase.contains('U'));
3032
    /// assert!(!uppercase.contains('u'));
3033
    /// ```
3034
}
3035
3036
make_binary_property! {
3037
    name: "Variation_Selector";
3038
    short_name: "VS";
3039
    ident: VariationSelector;
3040
    data_marker: crate::provider::PropertyBinaryVariationSelectorV1;
3041
    singleton: SINGLETON_PROPERTY_BINARY_VARIATION_SELECTOR_V1;
3042
    /// Characters that are Variation Selectors.
3043
    ///
3044
    /// # Example
3045
    ///
3046
    /// ```
3047
    /// use icu::properties::CodePointSetData;
3048
    /// use icu::properties::props::VariationSelector;
3049
    ///
3050
    /// let variation_selector = CodePointSetData::new::<VariationSelector>();
3051
    ///
3052
    /// assert!(variation_selector.contains('\u{180D}'));  // MONGOLIAN FREE VARIATION SELECTOR THREE
3053
    /// assert!(!variation_selector.contains('\u{303E}'));  // IDEOGRAPHIC VARIATION INDICATOR
3054
    /// assert!(variation_selector.contains('\u{FE0F}'));  // VARIATION SELECTOR-16
3055
    /// assert!(!variation_selector.contains('\u{FE10}'));  // PRESENTATION FORM FOR VERTICAL COMMA
3056
    /// assert!(variation_selector.contains('\u{E01EF}'));  // VARIATION SELECTOR-256
3057
    /// ```
3058
}
3059
3060
make_binary_property! {
3061
    name: "White_Space";
3062
    short_name: "space";
3063
    ident: WhiteSpace;
3064
    data_marker: crate::provider::PropertyBinaryWhiteSpaceV1;
3065
    singleton: SINGLETON_PROPERTY_BINARY_WHITE_SPACE_V1;
3066
    /// Spaces, separator characters and other control characters which should be treated by
3067
    /// programming languages as "white space" for the purpose of parsing elements.
3068
    ///
3069
    /// # Example
3070
    ///
3071
    /// ```
3072
    /// use icu::properties::CodePointSetData;
3073
    /// use icu::properties::props::WhiteSpace;
3074
    ///
3075
    /// let white_space = CodePointSetData::new::<WhiteSpace>();
3076
    ///
3077
    /// assert!(white_space.contains(' '));
3078
    /// assert!(white_space.contains('\u{000A}'));  // NEW LINE
3079
    /// assert!(white_space.contains('\u{00A0}'));  // NO-BREAK SPACE
3080
    /// assert!(!white_space.contains('\u{200B}'));  // ZERO WIDTH SPACE
3081
    /// ```
3082
}
3083
3084
make_binary_property! {
3085
    name: "Xdigit";
3086
    short_name: "Xdigit";
3087
    ident: Xdigit;
3088
    data_marker: crate::provider::PropertyBinaryXdigitV1;
3089
    singleton: SINGLETON_PROPERTY_BINARY_XDIGIT_V1;
3090
    /// Hexadecimal digits
3091
    ///
3092
    /// This is defined for POSIX compatibility.
3093
}
3094
3095
make_binary_property! {
3096
    name: "XID_Continue";
3097
    short_name: "XIDC";
3098
    ident: XidContinue;
3099
    data_marker: crate::provider::PropertyBinaryXidContinueV1;
3100
    singleton: SINGLETON_PROPERTY_BINARY_XID_CONTINUE_V1;
3101
    /// Characters that can come after the first character in an identifier.
3102
    ///
3103
    /// See [`Unicode Standard Annex
3104
    /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
3105
    ///
3106
    /// # Example
3107
    ///
3108
    /// ```
3109
    /// use icu::properties::CodePointSetData;
3110
    /// use icu::properties::props::XidContinue;
3111
    ///
3112
    /// let xid_continue = CodePointSetData::new::<XidContinue>();
3113
    ///
3114
    /// assert!(xid_continue.contains('x'));
3115
    /// assert!(xid_continue.contains('1'));
3116
    /// assert!(xid_continue.contains('_'));
3117
    /// assert!(xid_continue.contains('ߝ'));  // U+07DD NKO LETTER FA
3118
    /// assert!(!xid_continue.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
3119
    /// assert!(!xid_continue.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3120
    /// ```
3121
}
3122
3123
make_binary_property! {
3124
    name: "XID_Start";
3125
    short_name: "XIDS";
3126
    ident: XidStart;
3127
    data_marker: crate::provider::PropertyBinaryXidStartV1;
3128
    singleton: SINGLETON_PROPERTY_BINARY_XID_START_V1;
3129
    /// Characters that can begin an identifier.
3130
    ///
3131
    /// See [`Unicode
3132
    /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
3133
    /// details.
3134
    ///
3135
    /// # Example
3136
    ///
3137
    /// ```
3138
    /// use icu::properties::CodePointSetData;
3139
    /// use icu::properties::props::XidStart;
3140
    ///
3141
    /// let xid_start = CodePointSetData::new::<XidStart>();
3142
    ///
3143
    /// assert!(xid_start.contains('x'));
3144
    /// assert!(!xid_start.contains('1'));
3145
    /// assert!(!xid_start.contains('_'));
3146
    /// assert!(xid_start.contains('ߝ'));  // U+07DD NKO LETTER FA
3147
    /// assert!(!xid_start.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
3148
    /// assert!(!xid_start.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3149
    /// ```
3150
}
3151
3152
pub use crate::emoji::EmojiSet;
3153
3154
macro_rules! make_emoji_set {
3155
    (
3156
        ident: $ident:ident;
3157
        data_marker: $data_marker:ty;
3158
        singleton: $singleton:ident;
3159
        $(#[$doc:meta])+
3160
    ) => {
3161
        $(#[$doc])+
3162
        #[derive(Debug)]
3163
        #[non_exhaustive]
3164
        pub struct $ident;
3165
3166
        impl crate::private::Sealed for $ident {}
3167
3168
        impl EmojiSet for $ident {
3169
            type DataMarker = $data_marker;
3170
            #[cfg(feature = "compiled_data")]
3171
            const SINGLETON: &'static crate::provider::PropertyUnicodeSet<'static> =
3172
                &crate::provider::Baked::$singleton;
3173
        }
3174
    }
3175
}
3176
3177
make_emoji_set! {
3178
    ident: BasicEmoji;
3179
    data_marker: crate::provider::PropertyBinaryBasicEmojiV1;
3180
    singleton: SINGLETON_PROPERTY_BINARY_BASIC_EMOJI_V1;
3181
    /// Characters and character sequences intended for general-purpose, independent, direct input.
3182
    ///
3183
    /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more
3184
    /// details.
3185
    ///
3186
    /// # Example
3187
    ///
3188
    /// ```
3189
    /// use icu::properties::EmojiSetData;
3190
    /// use icu::properties::props::BasicEmoji;
3191
    ///
3192
    /// let basic_emoji = EmojiSetData::new::<BasicEmoji>();
3193
    ///
3194
    /// assert!(!basic_emoji.contains('\u{0020}'));
3195
    /// assert!(!basic_emoji.contains('\n'));
3196
    /// assert!(basic_emoji.contains('🦃')); // U+1F983 TURKEY
3197
    /// assert!(basic_emoji.contains_str("\u{1F983}"));
3198
    /// assert!(basic_emoji.contains_str("\u{1F6E4}\u{FE0F}")); // railway track
3199
    /// assert!(!basic_emoji.contains_str("\u{0033}\u{FE0F}\u{20E3}"));  // Emoji_Keycap_Sequence, keycap 3
3200
    /// ```
3201
}
3202
3203
#[cfg(test)]
3204
mod test_enumerated_property_completeness {
3205
    use super::*;
3206
    use std::collections::BTreeMap;
3207
3208
    fn check_enum<'a, T: NamedEnumeratedProperty>(
3209
        lookup: &crate::provider::names::PropertyValueNameToEnumMap<'static>,
3210
        consts: impl IntoIterator<Item = &'a T>,
3211
    ) where
3212
        u16: From<T>,
3213
    {
3214
        let mut data: BTreeMap<_, _> = lookup
3215
            .map
3216
            .iter()
3217
            .map(|(name, value)| (value, (name, "Data")))
3218
            .collect();
3219
3220
        let names = crate::PropertyNamesLong::<T>::new();
3221
        let consts = consts.into_iter().map(|value| {
3222
            (
3223
                u16::from(*value) as usize,
3224
                (
3225
                    names.get(*value).unwrap_or("<unknown>").to_string(),
3226
                    "Consts",
3227
                ),
3228
            )
3229
        });
3230
3231
        let mut diff = Vec::new();
3232
        for t @ (value, _) in consts {
3233
            if data.remove(&value).is_none() {
3234
                diff.push(t);
3235
            }
3236
        }
3237
        diff.extend(data);
3238
3239
        let mut fmt_diff = String::new();
3240
        for (value, (name, source)) in diff {
3241
            fmt_diff.push_str(&format!("{source}:\t{name} = {value:?}\n"));
3242
        }
3243
3244
        assert!(
3245
            fmt_diff.is_empty(),
3246
            "Values defined in data do not match values defined in consts. Difference:\n{fmt_diff}"
3247
        );
3248
    }
3249
3250
    #[test]
3251
    fn test_ea() {
3252
        check_enum(
3253
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_EAST_ASIAN_WIDTH_V1,
3254
            EastAsianWidth::ALL_VALUES,
3255
        );
3256
    }
3257
3258
    #[test]
3259
    fn test_ccc() {
3260
        check_enum(
3261
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_CANONICAL_COMBINING_CLASS_V1,
3262
            CanonicalCombiningClass::ALL_VALUES,
3263
        );
3264
    }
3265
3266
    #[test]
3267
    fn test_jt() {
3268
        check_enum(
3269
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_JOINING_TYPE_V1,
3270
            JoiningType::ALL_VALUES,
3271
        );
3272
    }
3273
3274
    #[test]
3275
    fn test_insc() {
3276
        check_enum(
3277
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_INDIC_SYLLABIC_CATEGORY_V1,
3278
            IndicSyllabicCategory::ALL_VALUES,
3279
        );
3280
    }
3281
3282
    #[test]
3283
    fn test_sb() {
3284
        check_enum(
3285
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_SENTENCE_BREAK_V1,
3286
            SentenceBreak::ALL_VALUES,
3287
        );
3288
    }
3289
3290
    #[test]
3291
    fn test_wb() {
3292
        check_enum(
3293
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_WORD_BREAK_V1,
3294
            WordBreak::ALL_VALUES,
3295
        );
3296
    }
3297
3298
    #[test]
3299
    fn test_bc() {
3300
        check_enum(
3301
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_BIDI_CLASS_V1,
3302
            BidiClass::ALL_VALUES,
3303
        );
3304
    }
3305
3306
    #[test]
3307
    fn test_hst() {
3308
        check_enum(
3309
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_HANGUL_SYLLABLE_TYPE_V1,
3310
            HangulSyllableType::ALL_VALUES,
3311
        );
3312
    }
3313
3314
    #[test]
3315
    fn test_vo() {
3316
        check_enum(
3317
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_VERTICAL_ORIENTATION_V1,
3318
            VerticalOrientation::ALL_VALUES,
3319
        );
3320
    }
3321
}