Coverage Report

Created: 2026-02-14 06:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/icu_properties-2.1.2/src/props.rs
Line
Count
Source
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
//! This module defines all available properties.
6
//!
7
//! Properties may be empty marker types and implement [`BinaryProperty`], or enumerations[^1]
8
//! and implement [`EnumeratedProperty`].
9
//!
10
//! [`BinaryProperty`]s are queried through a [`CodePointSetData`](crate::CodePointSetData),
11
//! while [`EnumeratedProperty`]s are queried through [`CodePointMapData`](crate::CodePointMapData).
12
//!
13
//! In addition, some [`EnumeratedProperty`]s also implement [`ParseableEnumeratedProperty`] or
14
//! [`NamedEnumeratedProperty`]. For these properties, [`PropertyParser`](crate::PropertyParser),
15
//! [`PropertyNamesLong`](crate::PropertyNamesLong), and [`PropertyNamesShort`](crate::PropertyNamesShort)
16
//! can be constructed.
17
//!
18
//! [^1]: either Rust `enum`s, or Rust `struct`s with associated constants (open enums)
19
20
pub use crate::names::{NamedEnumeratedProperty, ParseableEnumeratedProperty};
21
22
pub use crate::bidi::{BidiMirroringGlyph, BidiPairedBracketType};
23
24
/// See [`test_enumerated_property_completeness`] for usage.
25
/// Example input:
26
/// ```ignore
27
/// impl EastAsianWidth {
28
///     pub const Neutral: EastAsianWidth = EastAsianWidth(0);
29
///     pub const Ambiguous: EastAsianWidth = EastAsianWidth(1);
30
///     ...
31
/// }
32
/// ```
33
/// Produces `const ALL_VALUES = &[("Neutral", 0u16), ...];` by
34
/// explicitly casting first field of the struct to u16.
35
macro_rules! create_const_array {
36
    (
37
        $ ( #[$meta:meta] )*
38
        impl $enum_ty:ident {
39
            $( $(#[$const_meta:meta])* $v:vis const $i:ident: $t:ty = $e:expr; )*
40
        }
41
        #[test]
42
        fn $consts_test:ident();
43
    ) => {
44
        $( #[$meta] )*
45
        impl $enum_ty {
46
            $(
47
                $(#[$const_meta])*
48
                $v const $i: $t = $e;
49
            )*
50
51
            /// All possible values of this enum in the Unicode version
52
            /// from this ICU4X release.
53
            pub const ALL_VALUES: &'static [$enum_ty] = &[
54
                $($enum_ty::$i),*
55
            ];
56
        }
57
58
        #[cfg(feature = "datagen")]
59
        impl databake::Bake for $enum_ty {
60
            fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
61
                env.insert("icu_properties");
62
                match *self {
63
                    $(
64
                        Self::$i => databake::quote!(icu_properties::props::$enum_ty::$i),
65
                    )*
66
                    Self(v) => databake::quote!(icu_properties::props::$enum_ty::from_icu4c_value(#v)),
67
                }
68
            }
69
        }
70
71
72
        impl From<$enum_ty> for u16  {
73
0
            fn from(other: $enum_ty) -> Self {
74
0
                other.0 as u16
75
0
            }
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::Script>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::LineBreak>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::GraphemeClusterBreak>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::HangulSyllableType>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::EastAsianWidth>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::WordBreak>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::CanonicalCombiningClass>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::IndicConjunctBreak>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::SentenceBreak>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::IndicSyllabicCategory>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::JoiningType>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::VerticalOrientation>>::from
Unexecuted instantiation: <u16 as core::convert::From<icu_properties::props::BidiClass>>::from
76
        }
77
78
        #[test]
79
        fn $consts_test() {
80
            $(
81
                assert_eq!(
82
                    crate::names::PropertyNamesLong::<$enum_ty>::new().get($enum_ty::$i).unwrap()
83
                        // Rust identifiers use camel case
84
                        .replace('_', "")
85
                        // We use Ethiopian
86
                        .replace("Ethiopic", "Ethiopian")
87
                        // Nastaliq is missing a long name?
88
                        .replace("Aran", "Nastaliq")
89
                        // We spell these out
90
                        .replace("LVSyllable", "LeadingVowelSyllable")
91
                        .replace("LVTSyllable", "LeadingVowelTrailingSyllable"),
92
                    stringify!($i)
93
                );
94
            )*
95
        }
96
    }
97
}
98
99
pub use crate::code_point_map::EnumeratedProperty;
100
101
macro_rules! make_enumerated_property {
102
    (
103
        name: $name:literal;
104
        short_name: $short_name:literal;
105
        ident: $value_ty:path;
106
        data_marker: $data_marker:ty;
107
        singleton: $singleton:ident;
108
        $(ule_ty: $ule_ty:ty;)?
109
    ) => {
110
        impl crate::private::Sealed for $value_ty {}
111
112
        impl EnumeratedProperty for $value_ty {
113
            type DataMarker = $data_marker;
114
            #[cfg(feature = "compiled_data")]
115
            const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> =
116
                crate::provider::Baked::$singleton;
117
            const NAME: &'static [u8] = $name.as_bytes();
118
            const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
119
        }
120
121
        $(
122
            impl zerovec::ule::AsULE for $value_ty {
123
                type ULE = $ule_ty;
124
125
0
                fn to_unaligned(self) -> Self::ULE {
126
0
                    self.0.to_unaligned()
127
0
                }
Unexecuted instantiation: <icu_properties::props::Script as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::LineBreak as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::GraphemeClusterBreak as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::HangulSyllableType as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::EastAsianWidth as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::CanonicalCombiningClass as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::IndicConjunctBreak as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::WordBreak as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::SentenceBreak as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::IndicSyllabicCategory as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::JoiningType as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::VerticalOrientation as zerovec::ule::AsULE>::to_unaligned
Unexecuted instantiation: <icu_properties::props::BidiClass as zerovec::ule::AsULE>::to_unaligned
128
0
                fn from_unaligned(unaligned: Self::ULE) -> Self {
129
0
                    Self(zerovec::ule::AsULE::from_unaligned(unaligned))
130
0
                }
Unexecuted instantiation: <icu_properties::props::Script as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::LineBreak as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::GraphemeClusterBreak as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::HangulSyllableType as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::EastAsianWidth as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::CanonicalCombiningClass as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::IndicConjunctBreak as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::WordBreak as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::SentenceBreak as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::IndicSyllabicCategory as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::JoiningType as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::VerticalOrientation as zerovec::ule::AsULE>::from_unaligned
Unexecuted instantiation: <icu_properties::props::BidiClass as zerovec::ule::AsULE>::from_unaligned
131
            }
132
        )?
133
    };
134
}
135
136
/// Enumerated property Bidi_Class
137
///
138
/// These are the categories required by the Unicode Bidirectional Algorithm.
139
/// For the property values, see [Bidirectional Class Values](https://unicode.org/reports/tr44/#Bidi_Class_Values).
140
/// For more information, see [Unicode Standard Annex #9](https://unicode.org/reports/tr41/tr41-28.html#UAX9).
141
///
142
/// # Example
143
///
144
/// ```
145
/// use icu::properties::{props::BidiClass, CodePointMapData};
146
///
147
/// assert_eq!(
148
///     CodePointMapData::<BidiClass>::new().get('y'),
149
///     BidiClass::LeftToRight
150
/// ); // U+0079
151
/// assert_eq!(
152
///     CodePointMapData::<BidiClass>::new().get('ع'),
153
///     BidiClass::ArabicLetter
154
/// ); // U+0639
155
/// ```
156
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
157
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
158
#[allow(clippy::exhaustive_structs)] // newtype
159
#[repr(transparent)]
160
pub struct BidiClass(pub(crate) u8);
161
162
impl BidiClass {
163
    /// Returns an ICU4C `UBidiClass` value.
164
0
    pub const fn to_icu4c_value(self) -> u8 {
165
0
        self.0
166
0
    }
167
    /// Constructor from an ICU4C `UBidiClass` value.
168
0
    pub const fn from_icu4c_value(value: u8) -> Self {
169
0
        Self(value)
170
0
    }
171
}
172
173
create_const_array! {
174
#[allow(non_upper_case_globals)]
175
impl BidiClass {
176
    /// (`L`) any strong left-to-right character
177
    pub const LeftToRight: BidiClass = BidiClass(0);
178
    /// (`R`) any strong right-to-left (non-Arabic-type) character
179
    pub const RightToLeft: BidiClass = BidiClass(1);
180
    /// (`EN`) any ASCII digit or Eastern Arabic-Indic digit
181
    pub const EuropeanNumber: BidiClass = BidiClass(2);
182
    /// (`ES`) plus and minus signs
183
    pub const EuropeanSeparator: BidiClass = BidiClass(3);
184
    /// (`ET`) a terminator in a numeric format context, includes currency signs
185
    pub const EuropeanTerminator: BidiClass = BidiClass(4);
186
    /// (`AN`) any Arabic-Indic digit
187
    pub const ArabicNumber: BidiClass = BidiClass(5);
188
    /// (`CS`) commas, colons, and slashes
189
    pub const CommonSeparator: BidiClass = BidiClass(6);
190
    /// (`B`) various newline characters
191
    pub const ParagraphSeparator: BidiClass = BidiClass(7);
192
    /// (`S`) various segment-related control codes
193
    pub const SegmentSeparator: BidiClass = BidiClass(8);
194
    /// (`WS`) spaces
195
    pub const WhiteSpace: BidiClass = BidiClass(9);
196
    /// (`ON`) most other symbols and punctuation marks
197
    pub const OtherNeutral: BidiClass = BidiClass(10);
198
    /// (`LRE`) U+202A: the LR embedding control
199
    pub const LeftToRightEmbedding: BidiClass = BidiClass(11);
200
    /// (`LRO`) U+202D: the LR override control
201
    pub const LeftToRightOverride: BidiClass = BidiClass(12);
202
    /// (`AL`) any strong right-to-left (Arabic-type) character
203
    pub const ArabicLetter: BidiClass = BidiClass(13);
204
    /// (`RLE`) U+202B: the RL embedding control
205
    pub const RightToLeftEmbedding: BidiClass = BidiClass(14);
206
    /// (`RLO`) U+202E: the RL override control
207
    pub const RightToLeftOverride: BidiClass = BidiClass(15);
208
    /// (`PDF`) U+202C: terminates an embedding or override control
209
    pub const PopDirectionalFormat: BidiClass = BidiClass(16);
210
    /// (`NSM`) any nonspacing mark
211
    pub const NonspacingMark: BidiClass = BidiClass(17);
212
    /// (`BN`) most format characters, control codes, or noncharacters
213
    pub const BoundaryNeutral: BidiClass = BidiClass(18);
214
    /// (`FSI`) U+2068: the first strong isolate control
215
    pub const FirstStrongIsolate: BidiClass = BidiClass(19);
216
    /// (`LRI`) U+2066: the LR isolate control
217
    pub const LeftToRightIsolate: BidiClass = BidiClass(20);
218
    /// (`RLI`) U+2067: the RL isolate control
219
    pub const RightToLeftIsolate: BidiClass = BidiClass(21);
220
    /// (`PDI`) U+2069: terminates an isolate control
221
    pub const PopDirectionalIsolate: BidiClass = BidiClass(22);
222
}
223
#[test]
224
fn bidi_props_consts();
225
}
226
227
make_enumerated_property! {
228
    name: "Bidi_Class";
229
    short_name: "bc";
230
    ident: BidiClass;
231
    data_marker: crate::provider::PropertyEnumBidiClassV1;
232
    singleton: SINGLETON_PROPERTY_ENUM_BIDI_CLASS_V1;
233
    ule_ty: u8;
234
}
235
236
// This exists to encapsulate GeneralCategoryULE so that it can exist in the provider module rather than props
237
pub(crate) mod gc {
238
    /// Enumerated property General_Category.
239
    ///
240
    /// General_Category specifies the most general classification of a code point, usually
241
    /// determined based on the primary characteristic of the assigned character. For example, is the
242
    /// character a letter, a mark, a number, punctuation, or a symbol, and if so, of what type?
243
    ///
244
    /// GeneralCategory only supports specific subcategories (eg `UppercaseLetter`).
245
    /// It does not support grouped categories (eg `Letter`). For grouped categories, use [`GeneralCategoryGroup`](
246
    /// crate::props::GeneralCategoryGroup).
247
    ///
248
    /// # Example
249
    ///
250
    /// ```
251
    /// use icu::properties::{props::GeneralCategory, CodePointMapData};
252
    ///
253
    /// assert_eq!(
254
    ///     CodePointMapData::<GeneralCategory>::new().get('木'),
255
    ///     GeneralCategory::OtherLetter
256
    /// ); // U+6728
257
    /// assert_eq!(
258
    ///     CodePointMapData::<GeneralCategory>::new().get('🎃'),
259
    ///     GeneralCategory::OtherSymbol
260
    /// ); // U+1F383 JACK-O-LANTERN
261
    /// ```
262
    #[derive(Copy, Clone, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)]
263
    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
264
    #[cfg_attr(feature = "datagen", derive(databake::Bake))]
265
    #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
266
    #[allow(clippy::exhaustive_enums)] // this type is stable
267
    #[zerovec::make_ule(GeneralCategoryULE)]
268
    #[cfg_attr(not(feature = "alloc"), zerovec::skip_derive(ZeroMapKV))]
269
    #[repr(u8)]
270
    pub enum GeneralCategory {
271
        /// (`Cn`) A reserved unassigned code point or a noncharacter
272
        Unassigned = 0,
273
274
        /// (`Lu`) An uppercase letter
275
        UppercaseLetter = 1,
276
        /// (`Ll`) A lowercase letter
277
        LowercaseLetter = 2,
278
        /// (`Lt`) A digraphic letter, with first part uppercase
279
        TitlecaseLetter = 3,
280
        /// (`Lm`) A modifier letter
281
        ModifierLetter = 4,
282
        /// (`Lo`) Other letters, including syllables and ideographs
283
        OtherLetter = 5,
284
285
        /// (`Mn`) A nonspacing combining mark (zero advance width)
286
        NonspacingMark = 6,
287
        /// (`Mc`) A spacing combining mark (positive advance width)
288
        SpacingMark = 8,
289
        /// (`Me`) An enclosing combining mark
290
        EnclosingMark = 7,
291
292
        /// (`Nd`) A decimal digit
293
        DecimalNumber = 9,
294
        /// (`Nl`) A letterlike numeric character
295
        LetterNumber = 10,
296
        /// (`No`) A numeric character of other type
297
        OtherNumber = 11,
298
299
        /// (`Zs`) A space character (of various non-zero widths)
300
        SpaceSeparator = 12,
301
        /// (`Zl`) U+2028 LINE SEPARATOR only
302
        LineSeparator = 13,
303
        /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
304
        ParagraphSeparator = 14,
305
306
        /// (`Cc`) A C0 or C1 control code
307
        Control = 15,
308
        /// (`Cf`) A format control character
309
        Format = 16,
310
        /// (`Co`) A private-use character
311
        PrivateUse = 17,
312
        /// (`Cs`) A surrogate code point
313
        Surrogate = 18,
314
315
        /// (`Pd`) A dash or hyphen punctuation mark
316
        DashPunctuation = 19,
317
        /// (`Ps`) An opening punctuation mark (of a pair)
318
        OpenPunctuation = 20,
319
        /// (`Pe`) A closing punctuation mark (of a pair)
320
        ClosePunctuation = 21,
321
        /// (`Pc`) A connecting punctuation mark, like a tie
322
        ConnectorPunctuation = 22,
323
        /// (`Pi`) An initial quotation mark
324
        InitialPunctuation = 28,
325
        /// (`Pf`) A final quotation mark
326
        FinalPunctuation = 29,
327
        /// (`Po`) A punctuation mark of other type
328
        OtherPunctuation = 23,
329
330
        /// (`Sm`) A symbol of mathematical use
331
        MathSymbol = 24,
332
        /// (`Sc`) A currency sign
333
        CurrencySymbol = 25,
334
        /// (`Sk`) A non-letterlike modifier symbol
335
        ModifierSymbol = 26,
336
        /// (`So`) A symbol of other type
337
        OtherSymbol = 27,
338
    }
339
}
340
341
pub use gc::GeneralCategory;
342
343
impl GeneralCategory {
344
    /// All possible values of this enum
345
    pub const ALL_VALUES: &'static [GeneralCategory] = &[
346
        GeneralCategory::Unassigned,
347
        GeneralCategory::UppercaseLetter,
348
        GeneralCategory::LowercaseLetter,
349
        GeneralCategory::TitlecaseLetter,
350
        GeneralCategory::ModifierLetter,
351
        GeneralCategory::OtherLetter,
352
        GeneralCategory::NonspacingMark,
353
        GeneralCategory::SpacingMark,
354
        GeneralCategory::EnclosingMark,
355
        GeneralCategory::DecimalNumber,
356
        GeneralCategory::LetterNumber,
357
        GeneralCategory::OtherNumber,
358
        GeneralCategory::SpaceSeparator,
359
        GeneralCategory::LineSeparator,
360
        GeneralCategory::ParagraphSeparator,
361
        GeneralCategory::Control,
362
        GeneralCategory::Format,
363
        GeneralCategory::PrivateUse,
364
        GeneralCategory::Surrogate,
365
        GeneralCategory::DashPunctuation,
366
        GeneralCategory::OpenPunctuation,
367
        GeneralCategory::ClosePunctuation,
368
        GeneralCategory::ConnectorPunctuation,
369
        GeneralCategory::InitialPunctuation,
370
        GeneralCategory::FinalPunctuation,
371
        GeneralCategory::OtherPunctuation,
372
        GeneralCategory::MathSymbol,
373
        GeneralCategory::CurrencySymbol,
374
        GeneralCategory::ModifierSymbol,
375
        GeneralCategory::OtherSymbol,
376
    ];
377
}
378
379
#[test]
380
fn gc_variants() {
381
    for &variant in GeneralCategory::ALL_VALUES {
382
        assert_eq!(
383
            crate::names::PropertyNamesLong::<GeneralCategory>::new()
384
                .get(variant)
385
                .unwrap()
386
                // Rust identifiers use camel case
387
                .replace('_', ""),
388
            format!("{variant:?}")
389
        );
390
    }
391
}
392
393
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)]
394
/// Error value for `impl TryFrom<u8> for GeneralCategory`.
395
#[non_exhaustive]
396
pub struct GeneralCategoryOutOfBoundsError;
397
398
impl TryFrom<u8> for GeneralCategory {
399
    type Error = GeneralCategoryOutOfBoundsError;
400
    /// Construct this [`GeneralCategory`] from an integer, returning
401
    /// an error if it is out of bounds
402
0
    fn try_from(val: u8) -> Result<Self, GeneralCategoryOutOfBoundsError> {
403
0
        GeneralCategory::new_from_u8(val).ok_or(GeneralCategoryOutOfBoundsError)
404
0
    }
405
}
406
407
make_enumerated_property! {
408
    name: "General_Category";
409
    short_name: "gc";
410
    ident: GeneralCategory;
411
    data_marker: crate::provider::PropertyEnumGeneralCategoryV1;
412
    singleton: SINGLETON_PROPERTY_ENUM_GENERAL_CATEGORY_V1;
413
}
414
415
/// Groupings of multiple General_Category property values.
416
///
417
/// Instances of `GeneralCategoryGroup` represent the defined multi-category
418
/// values that are useful for users in certain contexts, such as regex. In
419
/// other words, unlike [`GeneralCategory`], this supports groups of general
420
/// categories: for example, `Letter` /// is the union of `UppercaseLetter`,
421
/// `LowercaseLetter`, etc.
422
///
423
/// See <https://www.unicode.org/reports/tr44/> .
424
///
425
/// The discriminants correspond to the `U_GC_XX_MASK` constants in ICU4C.
426
/// Unlike [`GeneralCategory`], this supports groups of general categories: for example, `Letter`
427
/// is the union of `UppercaseLetter`, `LowercaseLetter`, etc.
428
///
429
/// See `UCharCategory` and `U_GET_GC_MASK` in ICU4C.
430
#[derive(Copy, Clone, PartialEq, Debug, Eq)]
431
#[allow(clippy::exhaustive_structs)] // newtype
432
#[repr(transparent)]
433
pub struct GeneralCategoryGroup(pub(crate) u32);
434
435
impl crate::private::Sealed for GeneralCategoryGroup {}
436
437
use GeneralCategory as GC;
438
use GeneralCategoryGroup as GCG;
439
440
#[allow(non_upper_case_globals)]
441
impl GeneralCategoryGroup {
442
    /// (`Lu`) An uppercase letter
443
    pub const UppercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32));
444
    /// (`Ll`) A lowercase letter
445
    pub const LowercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::LowercaseLetter as u32));
446
    /// (`Lt`) A digraphic letter, with first part uppercase
447
    pub const TitlecaseLetter: GeneralCategoryGroup = GCG(1 << (GC::TitlecaseLetter as u32));
448
    /// (`Lm`) A modifier letter
449
    pub const ModifierLetter: GeneralCategoryGroup = GCG(1 << (GC::ModifierLetter as u32));
450
    /// (`Lo`) Other letters, including syllables and ideographs
451
    pub const OtherLetter: GeneralCategoryGroup = GCG(1 << (GC::OtherLetter as u32));
452
    /// (`LC`) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter
453
    pub const CasedLetter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
454
        | (1 << (GC::LowercaseLetter as u32))
455
        | (1 << (GC::TitlecaseLetter as u32)));
456
    /// (`L`) The union of all letter categories
457
    pub const Letter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
458
        | (1 << (GC::LowercaseLetter as u32))
459
        | (1 << (GC::TitlecaseLetter as u32))
460
        | (1 << (GC::ModifierLetter as u32))
461
        | (1 << (GC::OtherLetter as u32)));
462
463
    /// (`Mn`) A nonspacing combining mark (zero advance width)
464
    pub const NonspacingMark: GeneralCategoryGroup = GCG(1 << (GC::NonspacingMark as u32));
465
    /// (`Mc`) A spacing combining mark (positive advance width)
466
    pub const EnclosingMark: GeneralCategoryGroup = GCG(1 << (GC::EnclosingMark as u32));
467
    /// (`Me`) An enclosing combining mark
468
    pub const SpacingMark: GeneralCategoryGroup = GCG(1 << (GC::SpacingMark as u32));
469
    /// (`M`) The union of all mark categories
470
    pub const Mark: GeneralCategoryGroup = GCG((1 << (GC::NonspacingMark as u32))
471
        | (1 << (GC::EnclosingMark as u32))
472
        | (1 << (GC::SpacingMark as u32)));
473
474
    /// (`Nd`) A decimal digit
475
    pub const DecimalNumber: GeneralCategoryGroup = GCG(1 << (GC::DecimalNumber as u32));
476
    /// (`Nl`) A letterlike numeric character
477
    pub const LetterNumber: GeneralCategoryGroup = GCG(1 << (GC::LetterNumber as u32));
478
    /// (`No`) A numeric character of other type
479
    pub const OtherNumber: GeneralCategoryGroup = GCG(1 << (GC::OtherNumber as u32));
480
    /// (`N`) The union of all number categories
481
    pub const Number: GeneralCategoryGroup = GCG((1 << (GC::DecimalNumber as u32))
482
        | (1 << (GC::LetterNumber as u32))
483
        | (1 << (GC::OtherNumber as u32)));
484
485
    /// (`Zs`) A space character (of various non-zero widths)
486
    pub const SpaceSeparator: GeneralCategoryGroup = GCG(1 << (GC::SpaceSeparator as u32));
487
    /// (`Zl`) U+2028 LINE SEPARATOR only
488
    pub const LineSeparator: GeneralCategoryGroup = GCG(1 << (GC::LineSeparator as u32));
489
    /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
490
    pub const ParagraphSeparator: GeneralCategoryGroup = GCG(1 << (GC::ParagraphSeparator as u32));
491
    /// (`Z`) The union of all separator categories
492
    pub const Separator: GeneralCategoryGroup = GCG((1 << (GC::SpaceSeparator as u32))
493
        | (1 << (GC::LineSeparator as u32))
494
        | (1 << (GC::ParagraphSeparator as u32)));
495
496
    /// (`Cc`) A C0 or C1 control code
497
    pub const Control: GeneralCategoryGroup = GCG(1 << (GC::Control as u32));
498
    /// (`Cf`) A format control character
499
    pub const Format: GeneralCategoryGroup = GCG(1 << (GC::Format as u32));
500
    /// (`Co`) A private-use character
501
    pub const PrivateUse: GeneralCategoryGroup = GCG(1 << (GC::PrivateUse as u32));
502
    /// (`Cs`) A surrogate code point
503
    pub const Surrogate: GeneralCategoryGroup = GCG(1 << (GC::Surrogate as u32));
504
    /// (`Cn`) A reserved unassigned code point or a noncharacter
505
    pub const Unassigned: GeneralCategoryGroup = GCG(1 << (GC::Unassigned as u32));
506
    /// (`C`) The union of all control code, reserved, and unassigned categories
507
    pub const Other: GeneralCategoryGroup = GCG((1 << (GC::Control as u32))
508
        | (1 << (GC::Format as u32))
509
        | (1 << (GC::PrivateUse as u32))
510
        | (1 << (GC::Surrogate as u32))
511
        | (1 << (GC::Unassigned as u32)));
512
513
    /// (`Pd`) A dash or hyphen punctuation mark
514
    pub const DashPunctuation: GeneralCategoryGroup = GCG(1 << (GC::DashPunctuation as u32));
515
    /// (`Ps`) An opening punctuation mark (of a pair)
516
    pub const OpenPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OpenPunctuation as u32));
517
    /// (`Pe`) A closing punctuation mark (of a pair)
518
    pub const ClosePunctuation: GeneralCategoryGroup = GCG(1 << (GC::ClosePunctuation as u32));
519
    /// (`Pc`) A connecting punctuation mark, like a tie
520
    pub const ConnectorPunctuation: GeneralCategoryGroup =
521
        GCG(1 << (GC::ConnectorPunctuation as u32));
522
    /// (`Pi`) An initial quotation mark
523
    pub const InitialPunctuation: GeneralCategoryGroup = GCG(1 << (GC::InitialPunctuation as u32));
524
    /// (`Pf`) A final quotation mark
525
    pub const FinalPunctuation: GeneralCategoryGroup = GCG(1 << (GC::FinalPunctuation as u32));
526
    /// (`Po`) A punctuation mark of other type
527
    pub const OtherPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OtherPunctuation as u32));
528
    /// (`P`) The union of all punctuation categories
529
    pub const Punctuation: GeneralCategoryGroup = GCG((1 << (GC::DashPunctuation as u32))
530
        | (1 << (GC::OpenPunctuation as u32))
531
        | (1 << (GC::ClosePunctuation as u32))
532
        | (1 << (GC::ConnectorPunctuation as u32))
533
        | (1 << (GC::OtherPunctuation as u32))
534
        | (1 << (GC::InitialPunctuation as u32))
535
        | (1 << (GC::FinalPunctuation as u32)));
536
537
    /// (`Sm`) A symbol of mathematical use
538
    pub const MathSymbol: GeneralCategoryGroup = GCG(1 << (GC::MathSymbol as u32));
539
    /// (`Sc`) A currency sign
540
    pub const CurrencySymbol: GeneralCategoryGroup = GCG(1 << (GC::CurrencySymbol as u32));
541
    /// (`Sk`) A non-letterlike modifier symbol
542
    pub const ModifierSymbol: GeneralCategoryGroup = GCG(1 << (GC::ModifierSymbol as u32));
543
    /// (`So`) A symbol of other type
544
    pub const OtherSymbol: GeneralCategoryGroup = GCG(1 << (GC::OtherSymbol as u32));
545
    /// (`S`) The union of all symbol categories
546
    pub const Symbol: GeneralCategoryGroup = GCG((1 << (GC::MathSymbol as u32))
547
        | (1 << (GC::CurrencySymbol as u32))
548
        | (1 << (GC::ModifierSymbol as u32))
549
        | (1 << (GC::OtherSymbol as u32)));
550
551
    const ALL: u32 = (1 << (GC::FinalPunctuation as u32 + 1)) - 1;
552
553
    /// Return whether the code point belongs in the provided multi-value category.
554
    ///
555
    /// ```
556
    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
557
    /// use icu::properties::CodePointMapData;
558
    ///
559
    /// let gc = CodePointMapData::<GeneralCategory>::new();
560
    ///
561
    /// assert_eq!(gc.get('A'), GeneralCategory::UppercaseLetter);
562
    /// assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A')));
563
    ///
564
    /// // U+0B1E ORIYA LETTER NYA
565
    /// assert_eq!(gc.get('ଞ'), GeneralCategory::OtherLetter);
566
    /// assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ')));
567
    /// assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ')));
568
    ///
569
    /// // U+0301 COMBINING ACUTE ACCENT
570
    /// assert_eq!(gc.get('\u{0301}'), GeneralCategory::NonspacingMark);
571
    /// assert!(GeneralCategoryGroup::Mark.contains(gc.get('\u{0301}')));
572
    /// assert!(!GeneralCategoryGroup::Letter.contains(gc.get('\u{0301}')));
573
    ///
574
    /// assert_eq!(gc.get('0'), GeneralCategory::DecimalNumber);
575
    /// assert!(GeneralCategoryGroup::Number.contains(gc.get('0')));
576
    /// assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0')));
577
    ///
578
    /// assert_eq!(gc.get('('), GeneralCategory::OpenPunctuation);
579
    /// assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('(')));
580
    /// assert!(!GeneralCategoryGroup::Number.contains(gc.get('(')));
581
    ///
582
    /// // U+2713 CHECK MARK
583
    /// assert_eq!(gc.get('✓'), GeneralCategory::OtherSymbol);
584
    /// assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓')));
585
    /// assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓')));
586
    ///
587
    /// assert_eq!(gc.get(' '), GeneralCategory::SpaceSeparator);
588
    /// assert!(GeneralCategoryGroup::Separator.contains(gc.get(' ')));
589
    /// assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' ')));
590
    ///
591
    /// // U+E007F CANCEL TAG
592
    /// assert_eq!(gc.get('\u{E007F}'), GeneralCategory::Format);
593
    /// assert!(GeneralCategoryGroup::Other.contains(gc.get('\u{E007F}')));
594
    /// assert!(!GeneralCategoryGroup::Separator.contains(gc.get('\u{E007F}')));
595
    /// ```
596
0
    pub const fn contains(self, val: GeneralCategory) -> bool {
597
0
        0 != (1 << (val as u32)) & self.0
598
0
    }
599
600
    /// Produce a GeneralCategoryGroup that is the inverse of this one
601
    ///
602
    /// # Example
603
    ///
604
    /// ```rust
605
    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
606
    ///
607
    /// let letter = GeneralCategoryGroup::Letter;
608
    /// let not_letter = letter.complement();
609
    ///
610
    /// assert!(not_letter.contains(GeneralCategory::MathSymbol));
611
    /// assert!(!letter.contains(GeneralCategory::MathSymbol));
612
    /// assert!(not_letter.contains(GeneralCategory::OtherPunctuation));
613
    /// assert!(!letter.contains(GeneralCategory::OtherPunctuation));
614
    /// assert!(!not_letter.contains(GeneralCategory::UppercaseLetter));
615
    /// assert!(letter.contains(GeneralCategory::UppercaseLetter));
616
    /// ```
617
0
    pub const fn complement(self) -> Self {
618
        // Mask off things not in Self::ALL to guarantee the mask
619
        // values stay in-range
620
0
        GeneralCategoryGroup(!self.0 & Self::ALL)
621
0
    }
622
623
    /// Return the group representing all GeneralCategory values
624
    ///
625
    /// # Example
626
    ///
627
    /// ```rust
628
    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
629
    ///
630
    /// let all = GeneralCategoryGroup::all();
631
    ///
632
    /// assert!(all.contains(GeneralCategory::MathSymbol));
633
    /// assert!(all.contains(GeneralCategory::OtherPunctuation));
634
    /// assert!(all.contains(GeneralCategory::UppercaseLetter));
635
    /// ```
636
0
    pub const fn all() -> Self {
637
0
        Self(Self::ALL)
638
0
    }
639
640
    /// Return the empty group
641
    ///
642
    /// # Example
643
    ///
644
    /// ```rust
645
    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
646
    ///
647
    /// let empty = GeneralCategoryGroup::empty();
648
    ///
649
    /// assert!(!empty.contains(GeneralCategory::MathSymbol));
650
    /// assert!(!empty.contains(GeneralCategory::OtherPunctuation));
651
    /// assert!(!empty.contains(GeneralCategory::UppercaseLetter));
652
    /// ```
653
0
    pub const fn empty() -> Self {
654
0
        Self(0)
655
0
    }
656
657
    /// Take the union of two groups
658
    ///
659
    /// # Example
660
    ///
661
    /// ```rust
662
    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
663
    ///
664
    /// let letter = GeneralCategoryGroup::Letter;
665
    /// let symbol = GeneralCategoryGroup::Symbol;
666
    /// let union = letter.union(symbol);
667
    ///
668
    /// assert!(union.contains(GeneralCategory::MathSymbol));
669
    /// assert!(!union.contains(GeneralCategory::OtherPunctuation));
670
    /// assert!(union.contains(GeneralCategory::UppercaseLetter));
671
    /// ```
672
0
    pub const fn union(self, other: Self) -> Self {
673
0
        Self(self.0 | other.0)
674
0
    }
675
676
    /// Take the intersection of two groups
677
    ///
678
    /// # Example
679
    ///
680
    /// ```rust
681
    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
682
    ///
683
    /// let letter = GeneralCategoryGroup::Letter;
684
    /// let lu = GeneralCategoryGroup::UppercaseLetter;
685
    /// let intersection = letter.intersection(lu);
686
    ///
687
    /// assert!(!intersection.contains(GeneralCategory::MathSymbol));
688
    /// assert!(!intersection.contains(GeneralCategory::OtherPunctuation));
689
    /// assert!(intersection.contains(GeneralCategory::UppercaseLetter));
690
    /// assert!(!intersection.contains(GeneralCategory::LowercaseLetter));
691
    /// ```
692
0
    pub const fn intersection(self, other: Self) -> Self {
693
0
        Self(self.0 & other.0)
694
0
    }
695
}
696
697
impl From<GeneralCategory> for GeneralCategoryGroup {
698
0
    fn from(subcategory: GeneralCategory) -> Self {
699
0
        GeneralCategoryGroup(1 << (subcategory as u32))
700
0
    }
701
}
702
impl From<u32> for GeneralCategoryGroup {
703
0
    fn from(mask: u32) -> Self {
704
        // Mask off things not in Self::ALL to guarantee the mask
705
        // values stay in-range
706
0
        GeneralCategoryGroup(mask & Self::ALL)
707
0
    }
708
}
709
impl From<GeneralCategoryGroup> for u32 {
710
0
    fn from(group: GeneralCategoryGroup) -> Self {
711
0
        group.0
712
0
    }
713
}
714
715
/// Enumerated property Script.
716
///
717
/// This is used with both the Script and Script_Extensions Unicode properties.
718
/// Each character is assigned a single Script, but characters that are used in
719
/// a particular subset of scripts will be in more than one Script_Extensions set.
720
/// For example, DEVANAGARI DIGIT NINE has Script=Devanagari, but is also in the
721
/// Script_Extensions set for Dogra, Kaithi, and Mahajani. If you are trying to
722
/// determine whether a code point belongs to a certain script, you should use
723
/// [`ScriptWithExtensionsBorrowed::has_script`].
724
///
725
/// For more information, see UAX #24: <http://www.unicode.org/reports/tr24/>.
726
/// See `UScriptCode` in ICU4C.
727
///
728
/// # Example
729
///
730
/// ```
731
/// use icu::properties::{CodePointMapData, props::Script};
732
///
733
/// assert_eq!(CodePointMapData::<Script>::new().get('木'), Script::Han);  // U+6728
734
/// assert_eq!(CodePointMapData::<Script>::new().get('🎃'), Script::Common);  // U+1F383 JACK-O-LANTERN
735
/// ```
736
/// [`ScriptWithExtensionsBorrowed::has_script`]: crate::script::ScriptWithExtensionsBorrowed::has_script
737
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
738
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
739
#[allow(clippy::exhaustive_structs)] // newtype
740
#[repr(transparent)]
741
pub struct Script(pub(crate) u16);
742
743
impl Script {
744
    /// Returns an ICU4C `UScriptCode` value.
745
0
    pub const fn to_icu4c_value(self) -> u16 {
746
0
        self.0
747
0
    }
748
    /// Constructor from an ICU4C `UScriptCode` value.
749
0
    pub const fn from_icu4c_value(value: u16) -> Self {
750
0
        Self(value)
751
0
    }
752
}
753
754
create_const_array! {
755
#[allow(missing_docs)] // These constants don't need individual documentation.
756
#[allow(non_upper_case_globals)]
757
impl Script {
758
    pub const Adlam: Script = Script(167);
759
    pub const Ahom: Script = Script(161);
760
    pub const AnatolianHieroglyphs: Script = Script(156);
761
    pub const Arabic: Script = Script(2);
762
    pub const Armenian: Script = Script(3);
763
    pub const Avestan: Script = Script(117);
764
    pub const Balinese: Script = Script(62);
765
    pub const Bamum: Script = Script(130);
766
    pub const BassaVah: Script = Script(134);
767
    pub const Batak: Script = Script(63);
768
    pub const Bengali: Script = Script(4);
769
    pub const BeriaErfe: Script = Script(208);
770
    pub const Bhaiksuki: Script = Script(168);
771
    pub const Bopomofo: Script = Script(5);
772
    pub const Brahmi: Script = Script(65);
773
    pub const Braille: Script = Script(46);
774
    pub const Buginese: Script = Script(55);
775
    pub const Buhid: Script = Script(44);
776
    pub const CanadianAboriginal: Script = Script(40);
777
    pub const Carian: Script = Script(104);
778
    pub const CaucasianAlbanian: Script = Script(159);
779
    pub const Chakma: Script = Script(118);
780
    pub const Cham: Script = Script(66);
781
    pub const Cherokee: Script = Script(6);
782
    pub const Chorasmian: Script = Script(189);
783
    pub const Common: Script = Script(0);
784
    pub const Coptic: Script = Script(7);
785
    pub const Cuneiform: Script = Script(101);
786
    pub const Cypriot: Script = Script(47);
787
    pub const CyproMinoan: Script = Script(193);
788
    pub const Cyrillic: Script = Script(8);
789
    pub const Deseret: Script = Script(9);
790
    pub const Devanagari: Script = Script(10);
791
    pub const DivesAkuru: Script = Script(190);
792
    pub const Dogra: Script = Script(178);
793
    pub const Duployan: Script = Script(135);
794
    pub const EgyptianHieroglyphs: Script = Script(71);
795
    pub const Elbasan: Script = Script(136);
796
    pub const Elymaic: Script = Script(185);
797
    pub const Ethiopian: Script = Script(11);
798
    pub const Garay: Script = Script(201);
799
    pub const Georgian: Script = Script(12);
800
    pub const Glagolitic: Script = Script(56);
801
    pub const Gothic: Script = Script(13);
802
    pub const Grantha: Script = Script(137);
803
    pub const Greek: Script = Script(14);
804
    pub const Gujarati: Script = Script(15);
805
    pub const GunjalaGondi: Script = Script(179);
806
    pub const Gurmukhi: Script = Script(16);
807
    pub const GurungKhema: Script = Script(202);
808
    pub const Han: Script = Script(17);
809
    pub const Hangul: Script = Script(18);
810
    pub const HanifiRohingya: Script = Script(182);
811
    pub const Hanunoo: Script = Script(43);
812
    pub const Hatran: Script = Script(162);
813
    pub const Hebrew: Script = Script(19);
814
    pub const Hiragana: Script = Script(20);
815
    pub const ImperialAramaic: Script = Script(116);
816
    pub const Inherited: Script = Script(1);
817
    pub const InscriptionalPahlavi: Script = Script(122);
818
    pub const InscriptionalParthian: Script = Script(125);
819
    pub const Javanese: Script = Script(78);
820
    pub const Kaithi: Script = Script(120);
821
    pub const Kannada: Script = Script(21);
822
    pub const Katakana: Script = Script(22);
823
    pub const Kawi: Script = Script(198);
824
    pub const KayahLi: Script = Script(79);
825
    pub const Kharoshthi: Script = Script(57);
826
    pub const KhitanSmallScript: Script = Script(191);
827
    pub const Khmer: Script = Script(23);
828
    pub const Khojki: Script = Script(157);
829
    pub const Khudawadi: Script = Script(145);
830
    pub const KiratRai: Script = Script(203);
831
    pub const Lao: Script = Script(24);
832
    pub const Latin: Script = Script(25);
833
    pub const Lepcha: Script = Script(82);
834
    pub const Limbu: Script = Script(48);
835
    pub const LinearA: Script = Script(83);
836
    pub const LinearB: Script = Script(49);
837
    pub const Lisu: Script = Script(131);
838
    pub const Lycian: Script = Script(107);
839
    pub const Lydian: Script = Script(108);
840
    pub const Mahajani: Script = Script(160);
841
    pub const Makasar: Script = Script(180);
842
    pub const Malayalam: Script = Script(26);
843
    pub const Mandaic: Script = Script(84);
844
    pub const Manichaean: Script = Script(121);
845
    pub const Marchen: Script = Script(169);
846
    pub const MasaramGondi: Script = Script(175);
847
    pub const Medefaidrin: Script = Script(181);
848
    pub const MeeteiMayek: Script = Script(115);
849
    pub const MendeKikakui: Script = Script(140);
850
    pub const MeroiticCursive: Script = Script(141);
851
    pub const MeroiticHieroglyphs: Script = Script(86);
852
    pub const Miao: Script = Script(92);
853
    pub const Modi: Script = Script(163);
854
    pub const Mongolian: Script = Script(27);
855
    pub const Mro: Script = Script(149);
856
    pub const Multani: Script = Script(164);
857
    pub const Myanmar: Script = Script(28);
858
    pub const Nabataean: Script = Script(143);
859
    pub const NagMundari: Script = Script(199);
860
    pub const Nandinagari: Script = Script(187);
861
    pub const Nastaliq: Script = Script(200);
862
    pub const Newa: Script = Script(170);
863
    pub const NewTaiLue: Script = Script(59);
864
    pub const Nko: Script = Script(87);
865
    pub const Nushu: Script = Script(150);
866
    pub const NyiakengPuachueHmong: Script = Script(186);
867
    pub const Ogham: Script = Script(29);
868
    pub const OlChiki: Script = Script(109);
869
    pub const OldHungarian: Script = Script(76);
870
    pub const OldItalic: Script = Script(30);
871
    pub const OldNorthArabian: Script = Script(142);
872
    pub const OldPermic: Script = Script(89);
873
    pub const OldPersian: Script = Script(61);
874
    pub const OldSogdian: Script = Script(184);
875
    pub const OldSouthArabian: Script = Script(133);
876
    pub const OldTurkic: Script = Script(88);
877
    pub const OldUyghur: Script = Script(194);
878
    pub const OlOnal: Script = Script(204);
879
    pub const Oriya: Script = Script(31);
880
    pub const Osage: Script = Script(171);
881
    pub const Osmanya: Script = Script(50);
882
    pub const PahawhHmong: Script = Script(75);
883
    pub const Palmyrene: Script = Script(144);
884
    pub const PauCinHau: Script = Script(165);
885
    pub const PhagsPa: Script = Script(90);
886
    pub const Phoenician: Script = Script(91);
887
    pub const PsalterPahlavi: Script = Script(123);
888
    pub const Rejang: Script = Script(110);
889
    pub const Runic: Script = Script(32);
890
    pub const Samaritan: Script = Script(126);
891
    pub const Saurashtra: Script = Script(111);
892
    pub const Sharada: Script = Script(151);
893
    pub const Shavian: Script = Script(51);
894
    pub const Siddham: Script = Script(166);
895
    pub const Sidetic: Script = Script(209);
896
    pub const SignWriting: Script = Script(112);
897
    pub const Sinhala: Script = Script(33);
898
    pub const Sogdian: Script = Script(183);
899
    pub const SoraSompeng: Script = Script(152);
900
    pub const Soyombo: Script = Script(176);
901
    pub const Sundanese: Script = Script(113);
902
    pub const Sunuwar: Script = Script(205);
903
    pub const SylotiNagri: Script = Script(58);
904
    pub const Syriac: Script = Script(34);
905
    pub const Tagalog: Script = Script(42);
906
    pub const Tagbanwa: Script = Script(45);
907
    pub const TaiLe: Script = Script(52);
908
    pub const TaiTham: Script = Script(106);
909
    pub const TaiViet: Script = Script(127);
910
    pub const TaiYo: Script = Script(210);
911
    pub const Takri: Script = Script(153);
912
    pub const Tamil: Script = Script(35);
913
    pub const Tangsa: Script = Script(195);
914
    pub const Tangut: Script = Script(154);
915
    pub const Telugu: Script = Script(36);
916
    pub const Thaana: Script = Script(37);
917
    pub const Thai: Script = Script(38);
918
    pub const Tibetan: Script = Script(39);
919
    pub const Tifinagh: Script = Script(60);
920
    pub const Tirhuta: Script = Script(158);
921
    pub const Todhri: Script = Script(206);
922
    pub const TolongSiki: Script = Script(211);
923
    pub const Toto: Script = Script(196);
924
    pub const TuluTigalari: Script = Script(207);
925
    pub const Ugaritic: Script = Script(53);
926
    pub const Unknown: Script = Script(103);
927
    pub const Vai: Script = Script(99);
928
    pub const Vithkuqi: Script = Script(197);
929
    pub const Wancho: Script = Script(188);
930
    pub const WarangCiti: Script = Script(146);
931
    pub const Yezidi: Script = Script(192);
932
    pub const Yi: Script = Script(41);
933
    pub const ZanabazarSquare: Script = Script(177);
934
}
935
#[test]
936
fn script_consts();
937
}
938
939
impl Script {
940
    // Doesn't actually exist!
941
    #[doc(hidden)]
942
    #[allow(non_upper_case_globals)]
943
    #[deprecated]
944
    // Some high value that ICU4C will not use anytime soon
945
    pub const Chisoi: Script = Self(60_000);
946
}
947
948
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
949
#[cfg(feature = "compiled_data")]
950
impl From<Script> for icu_locale_core::subtags::Script {
951
0
    fn from(value: Script) -> Self {
952
0
        crate::PropertyNamesShort::new()
953
0
            .get_locale_script(value)
954
0
            .unwrap_or(icu_locale_core::subtags::script!("Zzzz"))
955
0
    }
956
}
957
958
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
959
#[cfg(feature = "compiled_data")]
960
impl From<icu_locale_core::subtags::Script> for Script {
961
0
    fn from(value: icu_locale_core::subtags::Script) -> Self {
962
0
        crate::PropertyParser::new()
963
0
            .get_strict(value.as_str())
964
0
            .unwrap_or(Self::Unknown)
965
0
    }
966
}
967
968
make_enumerated_property! {
969
    name: "Script";
970
    short_name: "sc";
971
    ident: Script;
972
    data_marker: crate::provider::PropertyEnumScriptV1;
973
    singleton: SINGLETON_PROPERTY_ENUM_SCRIPT_V1;
974
    ule_ty: <u16 as zerovec::ule::AsULE>::ULE;
975
}
976
977
/// Enumerated property Hangul_Syllable_Type
978
///
979
/// The Unicode standard provides both precomposed Hangul syllables and conjoining Jamo to compose
980
/// arbitrary Hangul syllables. This property provides that ontology of Hangul code points.
981
///
982
/// For more information, see the [Unicode Korean FAQ](https://www.unicode.org/faq/korean.html).
983
///
984
/// # Example
985
///
986
/// ```
987
/// use icu::properties::{props::HangulSyllableType, CodePointMapData};
988
///
989
/// assert_eq!(
990
///     CodePointMapData::<HangulSyllableType>::new().get('ᄀ'),
991
///     HangulSyllableType::LeadingJamo
992
/// ); // U+1100
993
/// assert_eq!(
994
///     CodePointMapData::<HangulSyllableType>::new().get('가'),
995
///     HangulSyllableType::LeadingVowelSyllable
996
/// ); // U+AC00
997
/// ```
998
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
999
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1000
#[allow(clippy::exhaustive_structs)] // newtype
1001
#[repr(transparent)]
1002
pub struct HangulSyllableType(pub(crate) u8);
1003
1004
impl HangulSyllableType {
1005
    /// Returns an ICU4C `UHangulSyllableType` value.
1006
0
    pub const fn to_icu4c_value(self) -> u8 {
1007
0
        self.0
1008
0
    }
1009
    /// Constructor from an ICU4C `UHangulSyllableType` value.
1010
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1011
0
        Self(value)
1012
0
    }
1013
}
1014
1015
create_const_array! {
1016
#[allow(non_upper_case_globals)]
1017
impl HangulSyllableType {
1018
    /// (`NA`) not applicable (e.g. not a Hangul code point).
1019
    pub const NotApplicable: HangulSyllableType = HangulSyllableType(0);
1020
    /// (`L`) a conjoining leading consonant Jamo.
1021
    pub const LeadingJamo: HangulSyllableType = HangulSyllableType(1);
1022
    /// (`V`) a conjoining vowel Jamo.
1023
    pub const VowelJamo: HangulSyllableType = HangulSyllableType(2);
1024
    /// (`T`) a conjoining trailing consonant Jamo.
1025
    pub const TrailingJamo: HangulSyllableType = HangulSyllableType(3);
1026
    /// (`LV`) a precomposed syllable with a leading consonant and a vowel.
1027
    pub const LeadingVowelSyllable: HangulSyllableType = HangulSyllableType(4);
1028
    /// (`LVT`) a precomposed syllable with a leading consonant, a vowel, and a trailing consonant.
1029
    pub const LeadingVowelTrailingSyllable: HangulSyllableType = HangulSyllableType(5);
1030
}
1031
#[test]
1032
fn hangul_syllable_type_consts();
1033
}
1034
1035
make_enumerated_property! {
1036
    name: "Hangul_Syllable_Type";
1037
    short_name: "hst";
1038
    ident: HangulSyllableType;
1039
    data_marker: crate::provider::PropertyEnumHangulSyllableTypeV1;
1040
    singleton: SINGLETON_PROPERTY_ENUM_HANGUL_SYLLABLE_TYPE_V1;
1041
    ule_ty: u8;
1042
1043
}
1044
1045
/// Enumerated property East_Asian_Width.
1046
///
1047
/// See "Definition" in UAX #11 for the summary of each property value:
1048
/// <https://www.unicode.org/reports/tr11/#Definitions>
1049
///
1050
/// # Example
1051
///
1052
/// ```
1053
/// use icu::properties::{props::EastAsianWidth, CodePointMapData};
1054
///
1055
/// assert_eq!(
1056
///     CodePointMapData::<EastAsianWidth>::new().get('ア'),
1057
///     EastAsianWidth::Halfwidth
1058
/// ); // U+FF71: Halfwidth Katakana Letter A
1059
/// assert_eq!(
1060
///     CodePointMapData::<EastAsianWidth>::new().get('ア'),
1061
///     EastAsianWidth::Wide
1062
/// ); //U+30A2: Katakana Letter A
1063
/// ```
1064
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1065
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1066
#[allow(clippy::exhaustive_structs)] // newtype
1067
#[repr(transparent)]
1068
pub struct EastAsianWidth(pub(crate) u8);
1069
1070
impl EastAsianWidth {
1071
    /// Returns an ICU4C `UEastAsianWidth` value.
1072
0
    pub const fn to_icu4c_value(self) -> u8 {
1073
0
        self.0
1074
0
    }
1075
    /// Constructor from an ICU4C `UEastAsianWidth` value.
1076
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1077
0
        Self(value)
1078
0
    }
1079
}
1080
1081
create_const_array! {
1082
#[allow(missing_docs)] // These constants don't need individual documentation.
1083
#[allow(non_upper_case_globals)]
1084
impl EastAsianWidth {
1085
    pub const Neutral: EastAsianWidth = EastAsianWidth(0); //name="N"
1086
    pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); //name="A"
1087
    pub const Halfwidth: EastAsianWidth = EastAsianWidth(2); //name="H"
1088
    pub const Fullwidth: EastAsianWidth = EastAsianWidth(3); //name="F"
1089
    pub const Narrow: EastAsianWidth = EastAsianWidth(4); //name="Na"
1090
    pub const Wide: EastAsianWidth = EastAsianWidth(5); //name="W"
1091
}
1092
#[test]
1093
fn east_asian_width_consts();
1094
}
1095
1096
make_enumerated_property! {
1097
    name: "East_Asian_Width";
1098
    short_name: "ea";
1099
    ident: EastAsianWidth;
1100
    data_marker: crate::provider::PropertyEnumEastAsianWidthV1;
1101
    singleton: SINGLETON_PROPERTY_ENUM_EAST_ASIAN_WIDTH_V1;
1102
    ule_ty: u8;
1103
}
1104
1105
/// Enumerated property Line_Break.
1106
///
1107
/// See "Line Breaking Properties" in UAX #14 for the summary of each property
1108
/// value: <https://www.unicode.org/reports/tr14/#Properties>
1109
///
1110
/// The numeric value is compatible with `ULineBreak` in ICU4C.
1111
///
1112
/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1113
///
1114
/// # Example
1115
///
1116
/// ```
1117
/// use icu::properties::{props::LineBreak, CodePointMapData};
1118
///
1119
/// assert_eq!(
1120
///     CodePointMapData::<LineBreak>::new().get(')'),
1121
///     LineBreak::CloseParenthesis
1122
/// ); // U+0029: Right Parenthesis
1123
/// assert_eq!(
1124
///     CodePointMapData::<LineBreak>::new().get('ぁ'),
1125
///     LineBreak::ConditionalJapaneseStarter
1126
/// ); //U+3041: Hiragana Letter Small A
1127
/// ```
1128
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1129
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1130
#[allow(clippy::exhaustive_structs)] // newtype
1131
#[repr(transparent)]
1132
pub struct LineBreak(pub(crate) u8);
1133
1134
impl LineBreak {
1135
    /// Returns an ICU4C `ULineBreak` value.
1136
0
    pub const fn to_icu4c_value(self) -> u8 {
1137
0
        self.0
1138
0
    }
1139
    /// Constructor from an ICU4C `ULineBreak` value.
1140
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1141
0
        Self(value)
1142
0
    }
1143
}
1144
1145
create_const_array! {
1146
#[allow(missing_docs)] // These constants don't need individual documentation.
1147
#[allow(non_upper_case_globals)]
1148
impl LineBreak {
1149
    pub const Unknown: LineBreak = LineBreak(0); // name="XX"
1150
    pub const Ambiguous: LineBreak = LineBreak(1); // name="AI"
1151
    pub const Alphabetic: LineBreak = LineBreak(2); // name="AL"
1152
    pub const BreakBoth: LineBreak = LineBreak(3); // name="B2"
1153
    pub const BreakAfter: LineBreak = LineBreak(4); // name="BA"
1154
    pub const BreakBefore: LineBreak = LineBreak(5); // name="BB"
1155
    pub const MandatoryBreak: LineBreak = LineBreak(6); // name="BK"
1156
    pub const ContingentBreak: LineBreak = LineBreak(7); // name="CB"
1157
    pub const ClosePunctuation: LineBreak = LineBreak(8); // name="CL"
1158
    pub const CombiningMark: LineBreak = LineBreak(9); // name="CM"
1159
    pub const CarriageReturn: LineBreak = LineBreak(10); // name="CR"
1160
    pub const Exclamation: LineBreak = LineBreak(11); // name="EX"
1161
    pub const Glue: LineBreak = LineBreak(12); // name="GL"
1162
    pub const Hyphen: LineBreak = LineBreak(13); // name="HY"
1163
    pub const Ideographic: LineBreak = LineBreak(14); // name="ID"
1164
    pub const Inseparable: LineBreak = LineBreak(15); // name="IN"
1165
    pub const InfixNumeric: LineBreak = LineBreak(16); // name="IS"
1166
    pub const LineFeed: LineBreak = LineBreak(17); // name="LF"
1167
    pub const Nonstarter: LineBreak = LineBreak(18); // name="NS"
1168
    pub const Numeric: LineBreak = LineBreak(19); // name="NU"
1169
    pub const OpenPunctuation: LineBreak = LineBreak(20); // name="OP"
1170
    pub const PostfixNumeric: LineBreak = LineBreak(21); // name="PO"
1171
    pub const PrefixNumeric: LineBreak = LineBreak(22); // name="PR"
1172
    pub const Quotation: LineBreak = LineBreak(23); // name="QU"
1173
    pub const ComplexContext: LineBreak = LineBreak(24); // name="SA"
1174
    pub const Surrogate: LineBreak = LineBreak(25); // name="SG"
1175
    pub const Space: LineBreak = LineBreak(26); // name="SP"
1176
    pub const BreakSymbols: LineBreak = LineBreak(27); // name="SY"
1177
    pub const ZWSpace: LineBreak = LineBreak(28); // name="ZW"
1178
    pub const NextLine: LineBreak = LineBreak(29); // name="NL"
1179
    pub const WordJoiner: LineBreak = LineBreak(30); // name="WJ"
1180
    pub const H2: LineBreak = LineBreak(31); // name="H2"
1181
    pub const H3: LineBreak = LineBreak(32); // name="H3"
1182
    pub const JL: LineBreak = LineBreak(33); // name="JL"
1183
    pub const JT: LineBreak = LineBreak(34); // name="JT"
1184
    pub const JV: LineBreak = LineBreak(35); // name="JV"
1185
    pub const CloseParenthesis: LineBreak = LineBreak(36); // name="CP"
1186
    pub const ConditionalJapaneseStarter: LineBreak = LineBreak(37); // name="CJ"
1187
    pub const HebrewLetter: LineBreak = LineBreak(38); // name="HL"
1188
    pub const RegionalIndicator: LineBreak = LineBreak(39); // name="RI"
1189
    pub const EBase: LineBreak = LineBreak(40); // name="EB"
1190
    pub const EModifier: LineBreak = LineBreak(41); // name="EM"
1191
    pub const ZWJ: LineBreak = LineBreak(42); // name="ZWJ"
1192
1193
    // Added in ICU 74:
1194
    pub const Aksara: LineBreak = LineBreak(43); // name="AK"
1195
    pub const AksaraPrebase: LineBreak = LineBreak(44); // name="AP"
1196
    pub const AksaraStart: LineBreak = LineBreak(45); // name="AS"
1197
    pub const ViramaFinal: LineBreak = LineBreak(46); // name="VF"
1198
    pub const Virama: LineBreak = LineBreak(47); // name="VI"
1199
1200
    // Added in ICU 78:
1201
    pub const UnambiguousHyphen: LineBreak = LineBreak(48); // name="HH"
1202
}
1203
#[test]
1204
fn line_break_consts();
1205
}
1206
1207
make_enumerated_property! {
1208
    name: "Line_Break";
1209
    short_name: "lb";
1210
    ident: LineBreak;
1211
    data_marker: crate::provider::PropertyEnumLineBreakV1;
1212
    singleton: SINGLETON_PROPERTY_ENUM_LINE_BREAK_V1;
1213
    ule_ty: u8;
1214
}
1215
1216
/// Enumerated property Grapheme_Cluster_Break.
1217
///
1218
/// See "Default Grapheme Cluster Boundary Specification" in UAX #29 for the
1219
/// summary of each property value:
1220
/// <https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table>
1221
///
1222
/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1223
///
1224
/// # Example
1225
///
1226
/// ```
1227
/// use icu::properties::{props::GraphemeClusterBreak, CodePointMapData};
1228
///
1229
/// assert_eq!(
1230
///     CodePointMapData::<GraphemeClusterBreak>::new().get('🇦'),
1231
///     GraphemeClusterBreak::RegionalIndicator
1232
/// ); // U+1F1E6: Regional Indicator Symbol Letter A
1233
/// assert_eq!(
1234
///     CodePointMapData::<GraphemeClusterBreak>::new().get('ำ'),
1235
///     GraphemeClusterBreak::SpacingMark
1236
/// ); //U+0E33: Thai Character Sara Am
1237
/// ```
1238
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1239
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1240
#[allow(clippy::exhaustive_structs)] // this type is stable
1241
#[repr(transparent)]
1242
pub struct GraphemeClusterBreak(pub(crate) u8);
1243
1244
impl GraphemeClusterBreak {
1245
    /// Returns an ICU4C `UGraphemeClusterBreak` value.
1246
0
    pub const fn to_icu4c_value(self) -> u8 {
1247
0
        self.0
1248
0
    }
1249
    /// Constructor from an ICU4C `UGraphemeClusterBreak` value.
1250
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1251
0
        Self(value)
1252
0
    }
1253
}
1254
1255
create_const_array! {
1256
#[allow(missing_docs)] // These constants don't need individual documentation.
1257
#[allow(non_upper_case_globals)]
1258
impl GraphemeClusterBreak {
1259
    pub const Other: GraphemeClusterBreak = GraphemeClusterBreak(0); // name="XX"
1260
    pub const Control: GraphemeClusterBreak = GraphemeClusterBreak(1); // name="CN"
1261
    pub const CR: GraphemeClusterBreak = GraphemeClusterBreak(2); // name="CR"
1262
    pub const Extend: GraphemeClusterBreak = GraphemeClusterBreak(3); // name="EX"
1263
    pub const L: GraphemeClusterBreak = GraphemeClusterBreak(4); // name="L"
1264
    pub const LF: GraphemeClusterBreak = GraphemeClusterBreak(5); // name="LF"
1265
    pub const LV: GraphemeClusterBreak = GraphemeClusterBreak(6); // name="LV"
1266
    pub const LVT: GraphemeClusterBreak = GraphemeClusterBreak(7); // name="LVT"
1267
    pub const T: GraphemeClusterBreak = GraphemeClusterBreak(8); // name="T"
1268
    pub const V: GraphemeClusterBreak = GraphemeClusterBreak(9); // name="V"
1269
    pub const SpacingMark: GraphemeClusterBreak = GraphemeClusterBreak(10); // name="SM"
1270
    pub const Prepend: GraphemeClusterBreak = GraphemeClusterBreak(11); // name="PP"
1271
    pub const RegionalIndicator: GraphemeClusterBreak = GraphemeClusterBreak(12); // name="RI"
1272
    /// This value is obsolete and unused.
1273
    pub const EBase: GraphemeClusterBreak = GraphemeClusterBreak(13); // name="EB"
1274
    /// This value is obsolete and unused.
1275
    pub const EBaseGAZ: GraphemeClusterBreak = GraphemeClusterBreak(14); // name="EBG"
1276
    /// This value is obsolete and unused.
1277
    pub const EModifier: GraphemeClusterBreak = GraphemeClusterBreak(15); // name="EM"
1278
    /// This value is obsolete and unused.
1279
    pub const GlueAfterZwj: GraphemeClusterBreak = GraphemeClusterBreak(16); // name="GAZ"
1280
    pub const ZWJ: GraphemeClusterBreak = GraphemeClusterBreak(17); // name="ZWJ"
1281
}
1282
#[test]
1283
fn gcb_consts();
1284
}
1285
1286
make_enumerated_property! {
1287
    name: "Grapheme_Cluster_Break";
1288
    short_name: "GCB";
1289
    ident: GraphemeClusterBreak;
1290
    data_marker: crate::provider::PropertyEnumGraphemeClusterBreakV1;
1291
    singleton: SINGLETON_PROPERTY_ENUM_GRAPHEME_CLUSTER_BREAK_V1;
1292
    ule_ty: u8;
1293
}
1294
1295
/// Enumerated property Word_Break.
1296
///
1297
/// See "Default Word Boundary Specification" in UAX #29 for the summary of
1298
/// each property value:
1299
/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1300
///
1301
/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1302
///
1303
/// # Example
1304
///
1305
/// ```
1306
/// use icu::properties::{props::WordBreak, CodePointMapData};
1307
///
1308
/// assert_eq!(
1309
///     CodePointMapData::<WordBreak>::new().get('.'),
1310
///     WordBreak::MidNumLet
1311
/// ); // U+002E: Full Stop
1312
/// assert_eq!(
1313
///     CodePointMapData::<WordBreak>::new().get(','),
1314
///     WordBreak::MidNum
1315
/// ); // U+FF0C: Fullwidth Comma
1316
/// ```
1317
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1318
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1319
#[allow(clippy::exhaustive_structs)] // newtype
1320
#[repr(transparent)]
1321
pub struct WordBreak(pub(crate) u8);
1322
1323
impl WordBreak {
1324
    /// Returns an ICU4C `UWordBreak` value.
1325
0
    pub const fn to_icu4c_value(self) -> u8 {
1326
0
        self.0
1327
0
    }
1328
    /// Constructor from an ICU4C `UWordBreak` value.
1329
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1330
0
        Self(value)
1331
0
    }
1332
}
1333
1334
create_const_array! {
1335
#[allow(missing_docs)] // These constants don't need individual documentation.
1336
#[allow(non_upper_case_globals)]
1337
impl WordBreak {
1338
    pub const Other: WordBreak = WordBreak(0); // name="XX"
1339
    pub const ALetter: WordBreak = WordBreak(1); // name="LE"
1340
    pub const Format: WordBreak = WordBreak(2); // name="FO"
1341
    pub const Katakana: WordBreak = WordBreak(3); // name="KA"
1342
    pub const MidLetter: WordBreak = WordBreak(4); // name="ML"
1343
    pub const MidNum: WordBreak = WordBreak(5); // name="MN"
1344
    pub const Numeric: WordBreak = WordBreak(6); // name="NU"
1345
    pub const ExtendNumLet: WordBreak = WordBreak(7); // name="EX"
1346
    pub const CR: WordBreak = WordBreak(8); // name="CR"
1347
    pub const Extend: WordBreak = WordBreak(9); // name="Extend"
1348
    pub const LF: WordBreak = WordBreak(10); // name="LF"
1349
    pub const MidNumLet: WordBreak = WordBreak(11); // name="MB"
1350
    pub const Newline: WordBreak = WordBreak(12); // name="NL"
1351
    pub const RegionalIndicator: WordBreak = WordBreak(13); // name="RI"
1352
    pub const HebrewLetter: WordBreak = WordBreak(14); // name="HL"
1353
    pub const SingleQuote: WordBreak = WordBreak(15); // name="SQ"
1354
    pub const DoubleQuote: WordBreak = WordBreak(16); // name=DQ
1355
    /// This value is obsolete and unused.
1356
    pub const EBase: WordBreak = WordBreak(17); // name="EB"
1357
    /// This value is obsolete and unused.
1358
    pub const EBaseGAZ: WordBreak = WordBreak(18); // name="EBG"
1359
    /// This value is obsolete and unused.
1360
    pub const EModifier: WordBreak = WordBreak(19); // name="EM"
1361
    /// This value is obsolete and unused.
1362
    pub const GlueAfterZwj: WordBreak = WordBreak(20); // name="GAZ"
1363
    pub const ZWJ: WordBreak = WordBreak(21); // name="ZWJ"
1364
    pub const WSegSpace: WordBreak = WordBreak(22); // name="WSegSpace"
1365
}
1366
#[test]
1367
fn word_break_consts();
1368
}
1369
1370
make_enumerated_property! {
1371
    name: "Word_Break";
1372
    short_name: "WB";
1373
    ident: WordBreak;
1374
    data_marker: crate::provider::PropertyEnumWordBreakV1;
1375
    singleton: SINGLETON_PROPERTY_ENUM_WORD_BREAK_V1;
1376
    ule_ty: u8;
1377
}
1378
1379
/// Enumerated property Sentence_Break.
1380
///
1381
/// See "Default Sentence Boundary Specification" in UAX #29 for the summary of
1382
/// each property value:
1383
/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1384
///
1385
/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1386
///
1387
/// # Example
1388
///
1389
/// ```
1390
/// use icu::properties::{props::SentenceBreak, CodePointMapData};
1391
///
1392
/// assert_eq!(
1393
///     CodePointMapData::<SentenceBreak>::new().get('9'),
1394
///     SentenceBreak::Numeric
1395
/// ); // U+FF19: Fullwidth Digit Nine
1396
/// assert_eq!(
1397
///     CodePointMapData::<SentenceBreak>::new().get(','),
1398
///     SentenceBreak::SContinue
1399
/// ); // U+002C: Comma
1400
/// ```
1401
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1402
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1403
#[allow(clippy::exhaustive_structs)] // newtype
1404
#[repr(transparent)]
1405
pub struct SentenceBreak(pub(crate) u8);
1406
1407
impl SentenceBreak {
1408
    /// Returns an ICU4C `USentenceBreak` value.
1409
0
    pub const fn to_icu4c_value(self) -> u8 {
1410
0
        self.0
1411
0
    }
1412
    /// Constructor from an ICU4C `USentenceBreak` value.
1413
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1414
0
        Self(value)
1415
0
    }
1416
}
1417
1418
create_const_array! {
1419
#[allow(missing_docs)] // These constants don't need individual documentation.
1420
#[allow(non_upper_case_globals)]
1421
impl SentenceBreak {
1422
    pub const Other: SentenceBreak = SentenceBreak(0); // name="XX"
1423
    pub const ATerm: SentenceBreak = SentenceBreak(1); // name="AT"
1424
    pub const Close: SentenceBreak = SentenceBreak(2); // name="CL"
1425
    pub const Format: SentenceBreak = SentenceBreak(3); // name="FO"
1426
    pub const Lower: SentenceBreak = SentenceBreak(4); // name="LO"
1427
    pub const Numeric: SentenceBreak = SentenceBreak(5); // name="NU"
1428
    pub const OLetter: SentenceBreak = SentenceBreak(6); // name="LE"
1429
    pub const Sep: SentenceBreak = SentenceBreak(7); // name="SE"
1430
    pub const Sp: SentenceBreak = SentenceBreak(8); // name="SP"
1431
    pub const STerm: SentenceBreak = SentenceBreak(9); // name="ST"
1432
    pub const Upper: SentenceBreak = SentenceBreak(10); // name="UP"
1433
    pub const CR: SentenceBreak = SentenceBreak(11); // name="CR"
1434
    pub const Extend: SentenceBreak = SentenceBreak(12); // name="EX"
1435
    pub const LF: SentenceBreak = SentenceBreak(13); // name="LF"
1436
    pub const SContinue: SentenceBreak = SentenceBreak(14); // name="SC"
1437
}
1438
#[test]
1439
fn sentence_break_consts();
1440
}
1441
1442
make_enumerated_property! {
1443
    name: "Sentence_Break";
1444
    short_name: "SB";
1445
    ident: SentenceBreak;
1446
    data_marker: crate::provider::PropertyEnumSentenceBreakV1;
1447
    singleton: SINGLETON_PROPERTY_ENUM_SENTENCE_BREAK_V1;
1448
    ule_ty: u8;
1449
}
1450
1451
/// Property Canonical_Combining_Class.
1452
/// See UAX #15:
1453
/// <https://www.unicode.org/reports/tr15/>.
1454
///
1455
/// See `icu::normalizer::properties::CanonicalCombiningClassMap` for the API
1456
/// to look up the Canonical_Combining_Class property by scalar value.
1457
///
1458
/// **Note:** See `icu::normalizer::CanonicalCombiningClassMap` for the preferred API
1459
/// to look up the Canonical_Combining_Class property by scalar value.
1460
///
1461
/// # Example
1462
///
1463
/// ```
1464
/// use icu::properties::{props::CanonicalCombiningClass, CodePointMapData};
1465
///
1466
/// assert_eq!(
1467
///     CodePointMapData::<CanonicalCombiningClass>::new().get('a'),
1468
///     CanonicalCombiningClass::NotReordered
1469
/// ); // U+0061: LATIN SMALL LETTER A
1470
/// assert_eq!(
1471
///     CodePointMapData::<CanonicalCombiningClass>::new().get('\u{0301}'),
1472
///     CanonicalCombiningClass::Above
1473
/// ); // U+0301: COMBINING ACUTE ACCENT
1474
/// ```
1475
//
1476
// NOTE: The Pernosco debugger has special knowledge
1477
// of this struct. Please do not change the bit layout
1478
// or the crate-module-qualified name of this struct
1479
// without coordination.
1480
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1481
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1482
#[allow(clippy::exhaustive_structs)] // newtype
1483
#[repr(transparent)]
1484
pub struct CanonicalCombiningClass(pub(crate) u8);
1485
1486
impl CanonicalCombiningClass {
1487
    /// Returns an ICU4C `UCanonicalCombiningClass` value.
1488
0
    pub const fn to_icu4c_value(self) -> u8 {
1489
0
        self.0
1490
0
    }
1491
    /// Constructor from an ICU4C `UCanonicalCombiningClass` value.
1492
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1493
0
        Self(value)
1494
0
    }
1495
}
1496
1497
create_const_array! {
1498
// These constant names come from PropertyValueAliases.txt
1499
#[allow(missing_docs)] // These constants don't need individual documentation.
1500
#[allow(non_upper_case_globals)]
1501
impl CanonicalCombiningClass {
1502
    pub const NotReordered: CanonicalCombiningClass = CanonicalCombiningClass(0); // name="NR"
1503
    pub const Overlay: CanonicalCombiningClass = CanonicalCombiningClass(1); // name="OV"
1504
    pub const HanReading: CanonicalCombiningClass = CanonicalCombiningClass(6); // name="HANR"
1505
    pub const Nukta: CanonicalCombiningClass = CanonicalCombiningClass(7); // name="NK"
1506
    pub const KanaVoicing: CanonicalCombiningClass = CanonicalCombiningClass(8); // name="KV"
1507
    pub const Virama: CanonicalCombiningClass = CanonicalCombiningClass(9); // name="VR"
1508
    pub const CCC10: CanonicalCombiningClass = CanonicalCombiningClass(10); // name="CCC10"
1509
    pub const CCC11: CanonicalCombiningClass = CanonicalCombiningClass(11); // name="CCC11"
1510
    pub const CCC12: CanonicalCombiningClass = CanonicalCombiningClass(12); // name="CCC12"
1511
    pub const CCC13: CanonicalCombiningClass = CanonicalCombiningClass(13); // name="CCC13"
1512
    pub const CCC14: CanonicalCombiningClass = CanonicalCombiningClass(14); // name="CCC14"
1513
    pub const CCC15: CanonicalCombiningClass = CanonicalCombiningClass(15); // name="CCC15"
1514
    pub const CCC16: CanonicalCombiningClass = CanonicalCombiningClass(16); // name="CCC16"
1515
    pub const CCC17: CanonicalCombiningClass = CanonicalCombiningClass(17); // name="CCC17"
1516
    pub const CCC18: CanonicalCombiningClass = CanonicalCombiningClass(18); // name="CCC18"
1517
    pub const CCC19: CanonicalCombiningClass = CanonicalCombiningClass(19); // name="CCC19"
1518
    pub const CCC20: CanonicalCombiningClass = CanonicalCombiningClass(20); // name="CCC20"
1519
    pub const CCC21: CanonicalCombiningClass = CanonicalCombiningClass(21); // name="CCC21"
1520
    pub const CCC22: CanonicalCombiningClass = CanonicalCombiningClass(22); // name="CCC22"
1521
    pub const CCC23: CanonicalCombiningClass = CanonicalCombiningClass(23); // name="CCC23"
1522
    pub const CCC24: CanonicalCombiningClass = CanonicalCombiningClass(24); // name="CCC24"
1523
    pub const CCC25: CanonicalCombiningClass = CanonicalCombiningClass(25); // name="CCC25"
1524
    pub const CCC26: CanonicalCombiningClass = CanonicalCombiningClass(26); // name="CCC26"
1525
    pub const CCC27: CanonicalCombiningClass = CanonicalCombiningClass(27); // name="CCC27"
1526
    pub const CCC28: CanonicalCombiningClass = CanonicalCombiningClass(28); // name="CCC28"
1527
    pub const CCC29: CanonicalCombiningClass = CanonicalCombiningClass(29); // name="CCC29"
1528
    pub const CCC30: CanonicalCombiningClass = CanonicalCombiningClass(30); // name="CCC30"
1529
    pub const CCC31: CanonicalCombiningClass = CanonicalCombiningClass(31); // name="CCC31"
1530
    pub const CCC32: CanonicalCombiningClass = CanonicalCombiningClass(32); // name="CCC32"
1531
    pub const CCC33: CanonicalCombiningClass = CanonicalCombiningClass(33); // name="CCC33"
1532
    pub const CCC34: CanonicalCombiningClass = CanonicalCombiningClass(34); // name="CCC34"
1533
    pub const CCC35: CanonicalCombiningClass = CanonicalCombiningClass(35); // name="CCC35"
1534
    pub const CCC36: CanonicalCombiningClass = CanonicalCombiningClass(36); // name="CCC36"
1535
    pub const CCC84: CanonicalCombiningClass = CanonicalCombiningClass(84); // name="CCC84"
1536
    pub const CCC91: CanonicalCombiningClass = CanonicalCombiningClass(91); // name="CCC91"
1537
    pub const CCC103: CanonicalCombiningClass = CanonicalCombiningClass(103); // name="CCC103"
1538
    pub const CCC107: CanonicalCombiningClass = CanonicalCombiningClass(107); // name="CCC107"
1539
    pub const CCC118: CanonicalCombiningClass = CanonicalCombiningClass(118); // name="CCC118"
1540
    pub const CCC122: CanonicalCombiningClass = CanonicalCombiningClass(122); // name="CCC122"
1541
    pub const CCC129: CanonicalCombiningClass = CanonicalCombiningClass(129); // name="CCC129"
1542
    pub const CCC130: CanonicalCombiningClass = CanonicalCombiningClass(130); // name="CCC130"
1543
    pub const CCC132: CanonicalCombiningClass = CanonicalCombiningClass(132); // name="CCC132"
1544
    pub const CCC133: CanonicalCombiningClass = CanonicalCombiningClass(133); // name="CCC133" // RESERVED
1545
    pub const AttachedBelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(200); // name="ATBL"
1546
    pub const AttachedBelow: CanonicalCombiningClass = CanonicalCombiningClass(202); // name="ATB"
1547
    pub const AttachedAbove: CanonicalCombiningClass = CanonicalCombiningClass(214); // name="ATA"
1548
    pub const AttachedAboveRight: CanonicalCombiningClass = CanonicalCombiningClass(216); // name="ATAR"
1549
    pub const BelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(218); // name="BL"
1550
    pub const Below: CanonicalCombiningClass = CanonicalCombiningClass(220); // name="B"
1551
    pub const BelowRight: CanonicalCombiningClass = CanonicalCombiningClass(222); // name="BR"
1552
    pub const Left: CanonicalCombiningClass = CanonicalCombiningClass(224); // name="L"
1553
    pub const Right: CanonicalCombiningClass = CanonicalCombiningClass(226); // name="R"
1554
    pub const AboveLeft: CanonicalCombiningClass = CanonicalCombiningClass(228); // name="AL"
1555
    pub const Above: CanonicalCombiningClass = CanonicalCombiningClass(230); // name="A"
1556
    pub const AboveRight: CanonicalCombiningClass = CanonicalCombiningClass(232); // name="AR"
1557
    pub const DoubleBelow: CanonicalCombiningClass = CanonicalCombiningClass(233); // name="DB"
1558
    pub const DoubleAbove: CanonicalCombiningClass = CanonicalCombiningClass(234); // name="DA"
1559
    pub const IotaSubscript: CanonicalCombiningClass = CanonicalCombiningClass(240); // name="IS"
1560
}
1561
#[test]
1562
fn ccc_consts();
1563
}
1564
1565
make_enumerated_property! {
1566
    name: "Canonical_Combining_Class";
1567
    short_name: "ccc";
1568
    ident: CanonicalCombiningClass;
1569
    data_marker: crate::provider::PropertyEnumCanonicalCombiningClassV1;
1570
    singleton: SINGLETON_PROPERTY_ENUM_CANONICAL_COMBINING_CLASS_V1;
1571
    ule_ty: u8;
1572
}
1573
1574
/// Property Indic_Conjunct_Break.
1575
/// See UAX #44:
1576
/// <https://www.unicode.org/reports/tr44/#Indic_Conjunct_Break>.
1577
///
1578
/// # Example
1579
///
1580
/// ```
1581
/// use icu::properties::{props::IndicConjunctBreak, CodePointMapData};
1582
///
1583
/// assert_eq!(
1584
///     CodePointMapData::<IndicConjunctBreak>::new().get('a'),
1585
///     IndicConjunctBreak::None
1586
/// );
1587
/// assert_eq!(
1588
///     CodePointMapData::<IndicConjunctBreak>::new().get('\u{094d}'),
1589
///     IndicConjunctBreak::Linker
1590
/// );
1591
/// assert_eq!(
1592
///     CodePointMapData::<IndicConjunctBreak>::new().get('\u{0915}'),
1593
///     IndicConjunctBreak::Consonant
1594
/// );
1595
/// assert_eq!(
1596
///     CodePointMapData::<IndicConjunctBreak>::new().get('\u{0300}'),
1597
///     IndicConjunctBreak::Extend
1598
/// );
1599
/// ```
1600
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1601
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1602
#[allow(clippy::exhaustive_structs)] // newtype
1603
#[repr(transparent)]
1604
pub struct IndicConjunctBreak(pub(crate) u8);
1605
1606
impl IndicConjunctBreak {
1607
    /// Returns an ICU4C `UIndicConjunctBreak` value.
1608
0
    pub const fn to_icu4c_value(self) -> u8 {
1609
0
        self.0
1610
0
    }
1611
    /// Constructor from an ICU4C `UIndicConjunctBreak` value.
1612
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1613
0
        Self(value)
1614
0
    }
1615
}
1616
1617
create_const_array! {
1618
#[allow(missing_docs)] // These constants don't need individual documentation.
1619
#[allow(non_upper_case_globals)]
1620
impl IndicConjunctBreak {
1621
    pub const None: IndicConjunctBreak = IndicConjunctBreak(0);
1622
    pub const Consonant: IndicConjunctBreak = IndicConjunctBreak(1);
1623
    pub const Extend: IndicConjunctBreak = IndicConjunctBreak(2);
1624
    pub const Linker: IndicConjunctBreak = IndicConjunctBreak(3);
1625
}
1626
#[test]
1627
fn indic_conjunct_break_consts();
1628
}
1629
1630
make_enumerated_property! {
1631
    name: "Indic_Conjunct_Break";
1632
    short_name: "InCB";
1633
    ident: IndicConjunctBreak;
1634
    data_marker: crate::provider::PropertyEnumIndicConjunctBreakV1;
1635
    singleton: SINGLETON_PROPERTY_ENUM_INDIC_CONJUNCT_BREAK_V1;
1636
    ule_ty: u8;
1637
}
1638
1639
/// Property Indic_Syllabic_Category.
1640
/// See UAX #44:
1641
/// <https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category>.
1642
///
1643
/// # Example
1644
///
1645
/// ```
1646
/// use icu::properties::{props::IndicSyllabicCategory, CodePointMapData};
1647
///
1648
/// assert_eq!(
1649
///     CodePointMapData::<IndicSyllabicCategory>::new().get('a'),
1650
///     IndicSyllabicCategory::Other
1651
/// );
1652
/// assert_eq!(
1653
///     CodePointMapData::<IndicSyllabicCategory>::new().get('\u{0900}'),
1654
///     IndicSyllabicCategory::Bindu
1655
/// ); // U+0900: DEVANAGARI SIGN INVERTED CANDRABINDU
1656
/// ```
1657
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1658
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1659
#[allow(clippy::exhaustive_structs)] // newtype
1660
#[repr(transparent)]
1661
pub struct IndicSyllabicCategory(pub(crate) u8);
1662
1663
impl IndicSyllabicCategory {
1664
    /// Returns an ICU4C `UIndicSyllabicCategory` value.
1665
0
    pub const fn to_icu4c_value(self) -> u8 {
1666
0
        self.0
1667
0
    }
1668
    /// Constructor from an ICU4C `UIndicSyllabicCategory` value.
1669
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1670
0
        Self(value)
1671
0
    }
1672
}
1673
1674
create_const_array! {
1675
#[allow(missing_docs)] // These constants don't need individual documentation.
1676
#[allow(non_upper_case_globals)]
1677
impl IndicSyllabicCategory {
1678
    pub const Other: IndicSyllabicCategory = IndicSyllabicCategory(0);
1679
    pub const Avagraha: IndicSyllabicCategory = IndicSyllabicCategory(1);
1680
    pub const Bindu: IndicSyllabicCategory = IndicSyllabicCategory(2);
1681
    pub const BrahmiJoiningNumber: IndicSyllabicCategory = IndicSyllabicCategory(3);
1682
    pub const CantillationMark: IndicSyllabicCategory = IndicSyllabicCategory(4);
1683
    pub const Consonant: IndicSyllabicCategory = IndicSyllabicCategory(5);
1684
    pub const ConsonantDead: IndicSyllabicCategory = IndicSyllabicCategory(6);
1685
    pub const ConsonantFinal: IndicSyllabicCategory = IndicSyllabicCategory(7);
1686
    pub const ConsonantHeadLetter: IndicSyllabicCategory = IndicSyllabicCategory(8);
1687
    pub const ConsonantInitialPostfixed: IndicSyllabicCategory = IndicSyllabicCategory(9);
1688
    pub const ConsonantKiller: IndicSyllabicCategory = IndicSyllabicCategory(10);
1689
    pub const ConsonantMedial: IndicSyllabicCategory = IndicSyllabicCategory(11);
1690
    pub const ConsonantPlaceholder: IndicSyllabicCategory = IndicSyllabicCategory(12);
1691
    pub const ConsonantPrecedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(13);
1692
    pub const ConsonantPrefixed: IndicSyllabicCategory = IndicSyllabicCategory(14);
1693
    pub const ConsonantSubjoined: IndicSyllabicCategory = IndicSyllabicCategory(15);
1694
    pub const ConsonantSucceedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(16);
1695
    pub const ConsonantWithStacker: IndicSyllabicCategory = IndicSyllabicCategory(17);
1696
    pub const GeminationMark: IndicSyllabicCategory = IndicSyllabicCategory(18);
1697
    pub const InvisibleStacker: IndicSyllabicCategory = IndicSyllabicCategory(19);
1698
    pub const Joiner: IndicSyllabicCategory = IndicSyllabicCategory(20);
1699
    pub const ModifyingLetter: IndicSyllabicCategory = IndicSyllabicCategory(21);
1700
    pub const NonJoiner: IndicSyllabicCategory = IndicSyllabicCategory(22);
1701
    pub const Nukta: IndicSyllabicCategory = IndicSyllabicCategory(23);
1702
    pub const Number: IndicSyllabicCategory = IndicSyllabicCategory(24);
1703
    pub const NumberJoiner: IndicSyllabicCategory = IndicSyllabicCategory(25);
1704
    pub const PureKiller: IndicSyllabicCategory = IndicSyllabicCategory(26);
1705
    pub const RegisterShifter: IndicSyllabicCategory = IndicSyllabicCategory(27);
1706
    pub const SyllableModifier: IndicSyllabicCategory = IndicSyllabicCategory(28);
1707
    pub const ToneLetter: IndicSyllabicCategory = IndicSyllabicCategory(29);
1708
    pub const ToneMark: IndicSyllabicCategory = IndicSyllabicCategory(30);
1709
    pub const Virama: IndicSyllabicCategory = IndicSyllabicCategory(31);
1710
    pub const Visarga: IndicSyllabicCategory = IndicSyllabicCategory(32);
1711
    pub const Vowel: IndicSyllabicCategory = IndicSyllabicCategory(33);
1712
    pub const VowelDependent: IndicSyllabicCategory = IndicSyllabicCategory(34);
1713
    pub const VowelIndependent: IndicSyllabicCategory = IndicSyllabicCategory(35);
1714
    pub const ReorderingKiller: IndicSyllabicCategory = IndicSyllabicCategory(36);
1715
}
1716
#[test]
1717
fn indic_syllabic_category_consts();
1718
}
1719
1720
make_enumerated_property! {
1721
    name: "Indic_Syllabic_Category";
1722
    short_name: "InSC";
1723
    ident: IndicSyllabicCategory;
1724
    data_marker: crate::provider::PropertyEnumIndicSyllabicCategoryV1;
1725
    singleton: SINGLETON_PROPERTY_ENUM_INDIC_SYLLABIC_CATEGORY_V1;
1726
    ule_ty: u8;
1727
}
1728
1729
/// Enumerated property Joining_Type.
1730
///
1731
/// See Section 9.2, Arabic Cursive Joining in The Unicode Standard for the summary of
1732
/// each property value.
1733
///
1734
/// # Example
1735
///
1736
/// ```
1737
/// use icu::properties::{props::JoiningType, CodePointMapData};
1738
///
1739
/// assert_eq!(
1740
///     CodePointMapData::<JoiningType>::new().get('ؠ'),
1741
///     JoiningType::DualJoining
1742
/// ); // U+0620: Arabic Letter Kashmiri Yeh
1743
/// assert_eq!(
1744
///     CodePointMapData::<JoiningType>::new().get('𐫍'),
1745
///     JoiningType::LeftJoining
1746
/// ); // U+10ACD: Manichaean Letter Heth
1747
/// ```
1748
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1749
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1750
#[allow(clippy::exhaustive_structs)] // newtype
1751
#[repr(transparent)]
1752
pub struct JoiningType(pub(crate) u8);
1753
1754
impl JoiningType {
1755
    /// Returns an ICU4C `UJoiningType` value.
1756
0
    pub const fn to_icu4c_value(self) -> u8 {
1757
0
        self.0
1758
0
    }
1759
    /// Constructor from an ICU4C `UJoiningType` value.
1760
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1761
0
        Self(value)
1762
0
    }
1763
}
1764
1765
create_const_array! {
1766
#[allow(missing_docs)] // These constants don't need individual documentation.
1767
#[allow(non_upper_case_globals)]
1768
impl JoiningType {
1769
    pub const NonJoining: JoiningType = JoiningType(0); // name="U"
1770
    pub const JoinCausing: JoiningType = JoiningType(1); // name="C"
1771
    pub const DualJoining: JoiningType = JoiningType(2); // name="D"
1772
    pub const LeftJoining: JoiningType = JoiningType(3); // name="L"
1773
    pub const RightJoining: JoiningType = JoiningType(4); // name="R"
1774
    pub const Transparent: JoiningType = JoiningType(5); // name="T"
1775
}
1776
#[test]
1777
fn joining_type_consts();
1778
}
1779
1780
make_enumerated_property! {
1781
    name: "Joining_Type";
1782
    short_name: "jt";
1783
    ident: JoiningType;
1784
    data_marker: crate::provider::PropertyEnumJoiningTypeV1;
1785
    singleton: SINGLETON_PROPERTY_ENUM_JOINING_TYPE_V1;
1786
    ule_ty: u8;
1787
}
1788
1789
/// Property Vertical_Orientation
1790
///
1791
/// See UTR #50:
1792
/// <https://www.unicode.org/reports/tr50/#vo>
1793
///
1794
/// # Example
1795
///
1796
/// ```
1797
/// use icu::properties::{props::VerticalOrientation, CodePointMapData};
1798
///
1799
/// assert_eq!(
1800
///     CodePointMapData::<VerticalOrientation>::new().get('a'),
1801
///     VerticalOrientation::Rotated
1802
/// );
1803
/// assert_eq!(
1804
///     CodePointMapData::<VerticalOrientation>::new().get('§'),
1805
///     VerticalOrientation::Upright
1806
/// );
1807
/// assert_eq!(
1808
///     CodePointMapData::<VerticalOrientation>::new().get32(0x2329),
1809
///     VerticalOrientation::TransformedRotated
1810
/// );
1811
/// assert_eq!(
1812
///     CodePointMapData::<VerticalOrientation>::new().get32(0x3001),
1813
///     VerticalOrientation::TransformedUpright
1814
/// );
1815
/// ```
1816
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1817
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1818
#[allow(clippy::exhaustive_structs)] // newtype
1819
#[repr(transparent)]
1820
pub struct VerticalOrientation(pub(crate) u8);
1821
1822
impl VerticalOrientation {
1823
    /// Returns an ICU4C `UVerticalOrientation` value.
1824
0
    pub const fn to_icu4c_value(self) -> u8 {
1825
0
        self.0
1826
0
    }
1827
    /// Constructor from an ICU4C `UVerticalOrientation` value.
1828
0
    pub const fn from_icu4c_value(value: u8) -> Self {
1829
0
        Self(value)
1830
0
    }
1831
}
1832
1833
create_const_array! {
1834
#[allow(missing_docs)] // These constants don't need individual documentation.
1835
#[allow(non_upper_case_globals)]
1836
impl VerticalOrientation {
1837
    pub const Rotated: VerticalOrientation = VerticalOrientation(0); // name="R"
1838
    pub const TransformedRotated: VerticalOrientation = VerticalOrientation(1); // name="Tr"
1839
    pub const TransformedUpright: VerticalOrientation = VerticalOrientation(2); // name="Tu"
1840
    pub const Upright: VerticalOrientation = VerticalOrientation(3); // name="U"
1841
}
1842
#[test]
1843
fn vertical_orientation_consts();
1844
}
1845
1846
make_enumerated_property! {
1847
    name: "Vertical_Orientation";
1848
    short_name: "vo";
1849
    ident: VerticalOrientation;
1850
    data_marker: crate::provider::PropertyEnumVerticalOrientationV1;
1851
    singleton: SINGLETON_PROPERTY_ENUM_VERTICAL_ORIENTATION_V1;
1852
    ule_ty: u8;
1853
}
1854
1855
pub use crate::code_point_set::BinaryProperty;
1856
1857
macro_rules! make_binary_property {
1858
    (
1859
        name: $name:literal;
1860
        short_name: $short_name:literal;
1861
        ident: $ident:ident;
1862
        data_marker: $data_marker:ty;
1863
        singleton: $singleton:ident;
1864
            $(#[$doc:meta])+
1865
    ) => {
1866
        $(#[$doc])+
1867
        #[derive(Debug)]
1868
        #[non_exhaustive]
1869
        pub struct $ident;
1870
1871
        impl crate::private::Sealed for $ident {}
1872
1873
        impl BinaryProperty for $ident {
1874
        type DataMarker = $data_marker;
1875
            #[cfg(feature = "compiled_data")]
1876
            const SINGLETON: &'static crate::provider::PropertyCodePointSet<'static> =
1877
                &crate::provider::Baked::$singleton;
1878
            const NAME: &'static [u8] = $name.as_bytes();
1879
            const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
1880
        }
1881
    };
1882
}
1883
1884
make_binary_property! {
1885
    name: "ASCII_Hex_Digit";
1886
    short_name: "AHex";
1887
    ident: AsciiHexDigit;
1888
    data_marker: crate::provider::PropertyBinaryAsciiHexDigitV1;
1889
    singleton: SINGLETON_PROPERTY_BINARY_ASCII_HEX_DIGIT_V1;
1890
    /// ASCII characters commonly used for the representation of hexadecimal numbers.
1891
    ///
1892
    /// # Example
1893
    ///
1894
    /// ```
1895
    /// use icu::properties::CodePointSetData;
1896
    /// use icu::properties::props::AsciiHexDigit;
1897
    ///
1898
    /// let ascii_hex_digit = CodePointSetData::new::<AsciiHexDigit>();
1899
    ///
1900
    /// assert!(ascii_hex_digit.contains('3'));
1901
    /// assert!(!ascii_hex_digit.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
1902
    /// assert!(ascii_hex_digit.contains('A'));
1903
    /// assert!(!ascii_hex_digit.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1904
    /// ```
1905
}
1906
1907
make_binary_property! {
1908
    name: "alnum";
1909
    short_name: "alnum";
1910
    ident: Alnum;
1911
    data_marker: crate::provider::PropertyBinaryAlnumV1;
1912
    singleton: SINGLETON_PROPERTY_BINARY_ALNUM_V1;
1913
    /// Characters with the `Alphabetic` or `Decimal_Number` property.
1914
    ///
1915
    /// This is defined for POSIX compatibility.
1916
}
1917
1918
make_binary_property! {
1919
    name: "Alphabetic";
1920
    short_name: "Alpha";
1921
    ident: Alphabetic;
1922
    data_marker: crate::provider::PropertyBinaryAlphabeticV1;
1923
    singleton: SINGLETON_PROPERTY_BINARY_ALPHABETIC_V1;
1924
    /// Alphabetic characters.
1925
    ///
1926
    /// # Example
1927
    ///
1928
    /// ```
1929
    /// use icu::properties::CodePointSetData;
1930
    /// use icu::properties::props::Alphabetic;
1931
    ///
1932
    /// let alphabetic = CodePointSetData::new::<Alphabetic>();
1933
    ///
1934
    /// assert!(!alphabetic.contains('3'));
1935
    /// assert!(!alphabetic.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
1936
    /// assert!(alphabetic.contains('A'));
1937
    /// assert!(alphabetic.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1938
    /// ```
1939
1940
}
1941
1942
make_binary_property! {
1943
    name: "Bidi_Control";
1944
    short_name: "Bidi_C";
1945
    ident: BidiControl;
1946
    data_marker: crate::provider::PropertyBinaryBidiControlV1;
1947
    singleton: SINGLETON_PROPERTY_BINARY_BIDI_CONTROL_V1;
1948
    /// Format control characters which have specific functions in the Unicode Bidirectional
1949
    /// Algorithm.
1950
    ///
1951
    /// # Example
1952
    ///
1953
    /// ```
1954
    /// use icu::properties::CodePointSetData;
1955
    /// use icu::properties::props::BidiControl;
1956
    ///
1957
    /// let bidi_control = CodePointSetData::new::<BidiControl>();
1958
    ///
1959
    /// assert!(bidi_control.contains('\u{200F}'));  // RIGHT-TO-LEFT MARK
1960
    /// assert!(!bidi_control.contains('ش'));  // U+0634 ARABIC LETTER SHEEN
1961
    /// ```
1962
1963
}
1964
1965
make_binary_property! {
1966
    name: "Bidi_Mirrored";
1967
    short_name: "Bidi_M";
1968
    ident: BidiMirrored;
1969
    data_marker: crate::provider::PropertyBinaryBidiMirroredV1;
1970
    singleton: SINGLETON_PROPERTY_BINARY_BIDI_MIRRORED_V1;
1971
    /// Characters that are mirrored in bidirectional text.
1972
    ///
1973
    /// # Example
1974
    ///
1975
    /// ```
1976
    /// use icu::properties::CodePointSetData;
1977
    /// use icu::properties::props::BidiMirrored;
1978
    ///
1979
    /// let bidi_mirrored = CodePointSetData::new::<BidiMirrored>();
1980
    ///
1981
    /// assert!(bidi_mirrored.contains('['));
1982
    /// assert!(bidi_mirrored.contains(']'));
1983
    /// assert!(bidi_mirrored.contains('∑'));  // U+2211 N-ARY SUMMATION
1984
    /// assert!(!bidi_mirrored.contains('ཉ'));  // U+0F49 TIBETAN LETTER NYA
1985
    /// ```
1986
1987
}
1988
1989
make_binary_property! {
1990
    name: "blank";
1991
    short_name: "blank";
1992
    ident: Blank;
1993
    data_marker: crate::provider::PropertyBinaryBlankV1;
1994
    singleton: SINGLETON_PROPERTY_BINARY_BLANK_V1;
1995
    /// Horizontal whitespace characters
1996
1997
}
1998
1999
make_binary_property! {
2000
    name: "Cased";
2001
    short_name: "Cased";
2002
    ident: Cased;
2003
    data_marker: crate::provider::PropertyBinaryCasedV1;
2004
    singleton: SINGLETON_PROPERTY_BINARY_CASED_V1;
2005
    /// Uppercase, lowercase, and titlecase characters.
2006
    ///
2007
    /// # Example
2008
    ///
2009
    /// ```
2010
    /// use icu::properties::CodePointSetData;
2011
    /// use icu::properties::props::Cased;
2012
    ///
2013
    /// let cased = CodePointSetData::new::<Cased>();
2014
    ///
2015
    /// assert!(cased.contains('Ꙡ'));  // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE
2016
    /// assert!(!cased.contains('ދ'));  // U+078B THAANA LETTER DHAALU
2017
    /// ```
2018
2019
}
2020
2021
make_binary_property! {
2022
    name: "Case_Ignorable";
2023
    short_name: "CI";
2024
    ident: CaseIgnorable;
2025
    data_marker: crate::provider::PropertyBinaryCaseIgnorableV1;
2026
    singleton: SINGLETON_PROPERTY_BINARY_CASE_IGNORABLE_V1;
2027
    /// Characters which are ignored for casing purposes.
2028
    ///
2029
    /// # Example
2030
    ///
2031
    /// ```
2032
    /// use icu::properties::CodePointSetData;
2033
    /// use icu::properties::props::CaseIgnorable;
2034
    ///
2035
    /// let case_ignorable = CodePointSetData::new::<CaseIgnorable>();
2036
    ///
2037
    /// assert!(case_ignorable.contains(':'));
2038
    /// assert!(!case_ignorable.contains('λ'));  // U+03BB GREEK SMALL LETTER LAMBDA
2039
    /// ```
2040
2041
}
2042
2043
make_binary_property! {
2044
    name: "Full_Composition_Exclusion";
2045
    short_name: "Comp_Ex";
2046
    ident: FullCompositionExclusion;
2047
    data_marker: crate::provider::PropertyBinaryFullCompositionExclusionV1;
2048
    singleton: SINGLETON_PROPERTY_BINARY_FULL_COMPOSITION_EXCLUSION_V1;
2049
    /// Characters that are excluded from composition.
2050
    ///
2051
    /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt>
2052
2053
}
2054
2055
make_binary_property! {
2056
    name: "Changes_When_Casefolded";
2057
    short_name: "CWCF";
2058
    ident: ChangesWhenCasefolded;
2059
    data_marker: crate::provider::PropertyBinaryChangesWhenCasefoldedV1;
2060
    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEFOLDED_V1;
2061
    /// Characters whose normalized forms are not stable under case folding.
2062
    ///
2063
    /// # Example
2064
    ///
2065
    /// ```
2066
    /// use icu::properties::CodePointSetData;
2067
    /// use icu::properties::props::ChangesWhenCasefolded;
2068
    ///
2069
    /// let changes_when_casefolded = CodePointSetData::new::<ChangesWhenCasefolded>();
2070
    ///
2071
    /// assert!(changes_when_casefolded.contains('ß'));  // U+00DF LATIN SMALL LETTER SHARP S
2072
    /// assert!(!changes_when_casefolded.contains('ᜉ'));  // U+1709 TAGALOG LETTER PA
2073
    /// ```
2074
2075
}
2076
2077
make_binary_property! {
2078
    name: "Changes_When_Casemapped";
2079
    short_name: "CWCM";
2080
    ident: ChangesWhenCasemapped;
2081
    data_marker: crate::provider::PropertyBinaryChangesWhenCasemappedV1;
2082
    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEMAPPED_V1;
2083
    /// Characters which may change when they undergo case mapping.
2084
2085
}
2086
2087
make_binary_property! {
2088
    name: "Changes_When_NFKC_Casefolded";
2089
    short_name: "CWKCF";
2090
    ident: ChangesWhenNfkcCasefolded;
2091
    data_marker: crate::provider::PropertyBinaryChangesWhenNfkcCasefoldedV1;
2092
    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_NFKC_CASEFOLDED_V1;
2093
    /// Characters which are not identical to their `NFKC_Casefold` mapping.
2094
    ///
2095
    /// # Example
2096
    ///
2097
    /// ```
2098
    /// use icu::properties::CodePointSetData;
2099
    /// use icu::properties::props::ChangesWhenNfkcCasefolded;
2100
    ///
2101
    /// let changes_when_nfkc_casefolded = CodePointSetData::new::<ChangesWhenNfkcCasefolded>();
2102
    ///
2103
    /// assert!(changes_when_nfkc_casefolded.contains('🄵'));  // U+1F135 SQUARED LATIN CAPITAL LETTER F
2104
    /// assert!(!changes_when_nfkc_casefolded.contains('f'));
2105
    /// ```
2106
2107
}
2108
2109
make_binary_property! {
2110
    name: "Changes_When_Lowercased";
2111
    short_name: "CWL";
2112
    ident: ChangesWhenLowercased;
2113
    data_marker: crate::provider::PropertyBinaryChangesWhenLowercasedV1;
2114
    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_LOWERCASED_V1;
2115
    /// Characters whose normalized forms are not stable under a `toLowercase` mapping.
2116
    ///
2117
    /// # Example
2118
    ///
2119
    /// ```
2120
    /// use icu::properties::CodePointSetData;
2121
    /// use icu::properties::props::ChangesWhenLowercased;
2122
    ///
2123
    /// let changes_when_lowercased = CodePointSetData::new::<ChangesWhenLowercased>();
2124
    ///
2125
    /// assert!(changes_when_lowercased.contains('Ⴔ'));  // U+10B4 GEORGIAN CAPITAL LETTER PHAR
2126
    /// assert!(!changes_when_lowercased.contains('ფ'));  // U+10E4 GEORGIAN LETTER PHAR
2127
    /// ```
2128
2129
}
2130
2131
make_binary_property! {
2132
    name: "Changes_When_Titlecased";
2133
    short_name: "CWT";
2134
    ident: ChangesWhenTitlecased;
2135
    data_marker: crate::provider::PropertyBinaryChangesWhenTitlecasedV1;
2136
    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_TITLECASED_V1;
2137
    /// Characters whose normalized forms are not stable under a `toTitlecase` mapping.
2138
    ///
2139
    /// # Example
2140
    ///
2141
    /// ```
2142
    /// use icu::properties::CodePointSetData;
2143
    /// use icu::properties::props::ChangesWhenTitlecased;
2144
    ///
2145
    /// let changes_when_titlecased = CodePointSetData::new::<ChangesWhenTitlecased>();
2146
    ///
2147
    /// assert!(changes_when_titlecased.contains('æ'));  // U+00E6 LATIN SMALL LETTER AE
2148
    /// assert!(!changes_when_titlecased.contains('Æ'));  // U+00E6 LATIN CAPITAL LETTER AE
2149
    /// ```
2150
2151
}
2152
2153
make_binary_property! {
2154
    name: "Changes_When_Uppercased";
2155
    short_name: "CWU";
2156
    ident: ChangesWhenUppercased;
2157
    data_marker: crate::provider::PropertyBinaryChangesWhenUppercasedV1;
2158
    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_UPPERCASED_V1;
2159
    /// Characters whose normalized forms are not stable under a `toUppercase` mapping.
2160
    ///
2161
    /// # Example
2162
    ///
2163
    /// ```
2164
    /// use icu::properties::CodePointSetData;
2165
    /// use icu::properties::props::ChangesWhenUppercased;
2166
    ///
2167
    /// let changes_when_uppercased = CodePointSetData::new::<ChangesWhenUppercased>();
2168
    ///
2169
    /// assert!(changes_when_uppercased.contains('ւ'));  // U+0582 ARMENIAN SMALL LETTER YIWN
2170
    /// assert!(!changes_when_uppercased.contains('Ւ'));  // U+0552 ARMENIAN CAPITAL LETTER YIWN
2171
    /// ```
2172
2173
}
2174
2175
make_binary_property! {
2176
    name: "Dash";
2177
    short_name: "Dash";
2178
    ident: Dash;
2179
    data_marker: crate::provider::PropertyBinaryDashV1;
2180
    singleton: SINGLETON_PROPERTY_BINARY_DASH_V1;
2181
    /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus
2182
    /// their compatibility equivalents.
2183
    ///
2184
    /// # Example
2185
    ///
2186
    /// ```
2187
    /// use icu::properties::CodePointSetData;
2188
    /// use icu::properties::props::Dash;
2189
    ///
2190
    /// let dash = CodePointSetData::new::<Dash>();
2191
    ///
2192
    /// assert!(dash.contains('⸺'));  // U+2E3A TWO-EM DASH
2193
    /// assert!(dash.contains('-'));  // U+002D
2194
    /// assert!(!dash.contains('='));  // U+003D
2195
    /// ```
2196
2197
}
2198
2199
make_binary_property! {
2200
    name: "Deprecated";
2201
    short_name: "Dep";
2202
    ident: Deprecated;
2203
    data_marker: crate::provider::PropertyBinaryDeprecatedV1;
2204
    singleton: SINGLETON_PROPERTY_BINARY_DEPRECATED_V1;
2205
    /// Deprecated characters.
2206
    ///
2207
    /// No characters will ever be removed from the standard, but the
2208
    /// usage of deprecated characters is strongly discouraged.
2209
    ///
2210
    /// # Example
2211
    ///
2212
    /// ```
2213
    /// use icu::properties::CodePointSetData;
2214
    /// use icu::properties::props::Deprecated;
2215
    ///
2216
    /// let deprecated = CodePointSetData::new::<Deprecated>();
2217
    ///
2218
    /// assert!(deprecated.contains('ឣ'));  // U+17A3 KHMER INDEPENDENT VOWEL QAQ
2219
    /// assert!(!deprecated.contains('A'));
2220
    /// ```
2221
2222
}
2223
2224
make_binary_property! {
2225
    name: "Default_Ignorable_Code_Point";
2226
    short_name: "DI";
2227
    ident: DefaultIgnorableCodePoint;
2228
    data_marker: crate::provider::PropertyBinaryDefaultIgnorableCodePointV1;
2229
    singleton: SINGLETON_PROPERTY_BINARY_DEFAULT_IGNORABLE_CODE_POINT_V1;
2230
    /// For programmatic determination of default ignorable code points.
2231
    ///
2232
    /// New characters that
2233
    /// should be ignored in rendering (unless explicitly supported) will be assigned in these
2234
    /// ranges, permitting programs to correctly handle the default rendering of such
2235
    /// characters when not otherwise supported.
2236
    ///
2237
    /// # Example
2238
    ///
2239
    /// ```
2240
    /// use icu::properties::CodePointSetData;
2241
    /// use icu::properties::props::DefaultIgnorableCodePoint;
2242
    ///
2243
    /// let default_ignorable_code_point = CodePointSetData::new::<DefaultIgnorableCodePoint>();
2244
    ///
2245
    /// assert!(default_ignorable_code_point.contains('\u{180B}'));  // MONGOLIAN FREE VARIATION SELECTOR ONE
2246
    /// assert!(!default_ignorable_code_point.contains('E'));
2247
    /// ```
2248
2249
}
2250
2251
make_binary_property! {
2252
    name: "Diacritic";
2253
    short_name: "Dia";
2254
    ident: Diacritic;
2255
    data_marker: crate::provider::PropertyBinaryDiacriticV1;
2256
    singleton: SINGLETON_PROPERTY_BINARY_DIACRITIC_V1;
2257
    /// Characters that linguistically modify the meaning of another character to which they apply.
2258
    ///
2259
    /// # Example
2260
    ///
2261
    /// ```
2262
    /// use icu::properties::CodePointSetData;
2263
    /// use icu::properties::props::Diacritic;
2264
    ///
2265
    /// let diacritic = CodePointSetData::new::<Diacritic>();
2266
    ///
2267
    /// assert!(diacritic.contains('\u{05B3}'));  // HEBREW POINT HATAF QAMATS
2268
    /// assert!(!diacritic.contains('א'));  // U+05D0 HEBREW LETTER ALEF
2269
    /// ```
2270
2271
}
2272
2273
make_binary_property! {
2274
    name: "Emoji_Modifier_Base";
2275
    short_name: "EBase";
2276
    ident: EmojiModifierBase;
2277
    data_marker: crate::provider::PropertyBinaryEmojiModifierBaseV1;
2278
    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_BASE_V1;
2279
    /// Characters that can serve as a base for emoji modifiers.
2280
    ///
2281
    /// # Example
2282
    ///
2283
    /// ```
2284
    /// use icu::properties::CodePointSetData;
2285
    /// use icu::properties::props::EmojiModifierBase;
2286
    ///
2287
    /// let emoji_modifier_base = CodePointSetData::new::<EmojiModifierBase>();
2288
    ///
2289
    /// assert!(emoji_modifier_base.contains('✊'));  // U+270A RAISED FIST
2290
    /// assert!(!emoji_modifier_base.contains('⛰'));  // U+26F0 MOUNTAIN
2291
    /// ```
2292
2293
}
2294
2295
make_binary_property! {
2296
    name: "Emoji_Component";
2297
    short_name: "EComp";
2298
    ident: EmojiComponent;
2299
    data_marker: crate::provider::PropertyBinaryEmojiComponentV1;
2300
    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_COMPONENT_V1;
2301
    /// Characters used in emoji sequences that normally do not appear on emoji keyboards as
2302
    /// separate choices, such as base characters for emoji keycaps.
2303
    ///
2304
    /// # Example
2305
    ///
2306
    /// ```
2307
    /// use icu::properties::CodePointSetData;
2308
    /// use icu::properties::props::EmojiComponent;
2309
    ///
2310
    /// let emoji_component = CodePointSetData::new::<EmojiComponent>();
2311
    ///
2312
    /// assert!(emoji_component.contains('🇹'));  // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2313
    /// assert!(emoji_component.contains('\u{20E3}'));  // COMBINING ENCLOSING KEYCAP
2314
    /// assert!(emoji_component.contains('7'));
2315
    /// assert!(!emoji_component.contains('T'));
2316
    /// ```
2317
2318
}
2319
2320
make_binary_property! {
2321
    name: "Emoji_Modifier";
2322
    short_name: "EMod";
2323
    ident: EmojiModifier;
2324
    data_marker: crate::provider::PropertyBinaryEmojiModifierV1;
2325
    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_V1;
2326
    /// Characters that are emoji modifiers.
2327
    ///
2328
    /// # Example
2329
    ///
2330
    /// ```
2331
    /// use icu::properties::CodePointSetData;
2332
    /// use icu::properties::props::EmojiModifier;
2333
    ///
2334
    /// let emoji_modifier = CodePointSetData::new::<EmojiModifier>();
2335
    ///
2336
    /// assert!(emoji_modifier.contains('\u{1F3FD}'));  // EMOJI MODIFIER FITZPATRICK TYPE-4
2337
    /// assert!(!emoji_modifier.contains('\u{200C}'));  // ZERO WIDTH NON-JOINER
2338
    /// ```
2339
2340
}
2341
2342
make_binary_property! {
2343
    name: "Emoji";
2344
    short_name: "Emoji";
2345
    ident: Emoji;
2346
    data_marker: crate::provider::PropertyBinaryEmojiV1;
2347
    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_V1;
2348
    /// Characters that are emoji.
2349
    ///
2350
    /// # Example
2351
    ///
2352
    /// ```
2353
    /// use icu::properties::CodePointSetData;
2354
    /// use icu::properties::props::Emoji;
2355
    ///
2356
    /// let emoji = CodePointSetData::new::<Emoji>();
2357
    ///
2358
    /// assert!(emoji.contains('🔥'));  // U+1F525 FIRE
2359
    /// assert!(!emoji.contains('V'));
2360
    /// ```
2361
2362
}
2363
2364
make_binary_property! {
2365
    name: "Emoji_Presentation";
2366
    short_name: "EPres";
2367
    ident: EmojiPresentation;
2368
    data_marker: crate::provider::PropertyBinaryEmojiPresentationV1;
2369
    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_PRESENTATION_V1;
2370
    /// Characters that have emoji presentation by default.
2371
    ///
2372
    /// # Example
2373
    ///
2374
    /// ```
2375
    /// use icu::properties::CodePointSetData;
2376
    /// use icu::properties::props::EmojiPresentation;
2377
    ///
2378
    /// let emoji_presentation = CodePointSetData::new::<EmojiPresentation>();
2379
    ///
2380
    /// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON
2381
    /// assert!(!emoji_presentation.contains('♻'));  // U+267B BLACK UNIVERSAL RECYCLING SYMBOL
2382
    /// ```
2383
2384
}
2385
2386
make_binary_property! {
2387
    name: "Extender";
2388
    short_name: "Ext";
2389
    ident: Extender;
2390
    data_marker: crate::provider::PropertyBinaryExtenderV1;
2391
    singleton: SINGLETON_PROPERTY_BINARY_EXTENDER_V1;
2392
    /// Characters whose principal function is to extend the value of a preceding alphabetic
2393
    /// character or to extend the shape of adjacent characters.
2394
    ///
2395
    /// # Example
2396
    ///
2397
    /// ```
2398
    /// use icu::properties::CodePointSetData;
2399
    /// use icu::properties::props::Extender;
2400
    ///
2401
    /// let extender = CodePointSetData::new::<Extender>();
2402
    ///
2403
    /// assert!(extender.contains('ヾ'));  // U+30FE KATAKANA VOICED ITERATION MARK
2404
    /// assert!(extender.contains('ー'));  // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK
2405
    /// assert!(!extender.contains('・'));  // U+30FB KATAKANA MIDDLE DOT
2406
    /// ```
2407
2408
}
2409
2410
make_binary_property! {
2411
    name: "Extended_Pictographic";
2412
    short_name: "ExtPict";
2413
    ident: ExtendedPictographic;
2414
    data_marker: crate::provider::PropertyBinaryExtendedPictographicV1;
2415
    singleton: SINGLETON_PROPERTY_BINARY_EXTENDED_PICTOGRAPHIC_V1;
2416
    /// Pictographic symbols, as well as reserved ranges in blocks largely associated with
2417
    /// emoji characters
2418
    ///
2419
    /// # Example
2420
    ///
2421
    /// ```
2422
    /// use icu::properties::CodePointSetData;
2423
    /// use icu::properties::props::ExtendedPictographic;
2424
    ///
2425
    /// let extended_pictographic = CodePointSetData::new::<ExtendedPictographic>();
2426
    ///
2427
    /// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
2428
    /// assert!(!extended_pictographic.contains('🇪'));  // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E
2429
    /// ```
2430
2431
}
2432
2433
make_binary_property! {
2434
    name: "graph";
2435
    short_name: "graph";
2436
    ident: Graph;
2437
    data_marker: crate::provider::PropertyBinaryGraphV1;
2438
    singleton: SINGLETON_PROPERTY_BINARY_GRAPH_V1;
2439
    /// Invisible characters.
2440
    ///
2441
    /// This is defined for POSIX compatibility.
2442
2443
}
2444
2445
make_binary_property! {
2446
    name: "Grapheme_Base";
2447
    short_name: "Gr_Base";
2448
    ident: GraphemeBase;
2449
    data_marker: crate::provider::PropertyBinaryGraphemeBaseV1;
2450
    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_BASE_V1;
2451
    /// Property used together with the definition of Standard Korean Syllable Block to define
2452
    /// "Grapheme base".
2453
    ///
2454
    /// See D58 in Chapter 3, Conformance in the Unicode Standard.
2455
    ///
2456
    /// # Example
2457
    ///
2458
    /// ```
2459
    /// use icu::properties::CodePointSetData;
2460
    /// use icu::properties::props::GraphemeBase;
2461
    ///
2462
    /// let grapheme_base = CodePointSetData::new::<GraphemeBase>();
2463
    ///
2464
    /// assert!(grapheme_base.contains('ക'));  // U+0D15 MALAYALAM LETTER KA
2465
    /// assert!(grapheme_base.contains('\u{0D3F}'));  // U+0D3F MALAYALAM VOWEL SIGN I
2466
    /// assert!(!grapheme_base.contains('\u{0D3E}'));  // U+0D3E MALAYALAM VOWEL SIGN AA
2467
    /// ```
2468
2469
}
2470
2471
make_binary_property! {
2472
    name: "Grapheme_Extend";
2473
    short_name: "Gr_Ext";
2474
    ident: GraphemeExtend;
2475
    data_marker: crate::provider::PropertyBinaryGraphemeExtendV1;
2476
    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_EXTEND_V1;
2477
    /// Property used to define "Grapheme extender".
2478
    ///
2479
    /// See D59 in Chapter 3, Conformance in the
2480
    /// Unicode Standard.
2481
    ///
2482
    /// # Example
2483
    ///
2484
    /// ```
2485
    /// use icu::properties::CodePointSetData;
2486
    /// use icu::properties::props::GraphemeExtend;
2487
    ///
2488
    /// let grapheme_extend = CodePointSetData::new::<GraphemeExtend>();
2489
    ///
2490
    /// assert!(!grapheme_extend.contains('ക'));  // U+0D15 MALAYALAM LETTER KA
2491
    /// assert!(!grapheme_extend.contains('\u{0D3F}'));  // U+0D3F MALAYALAM VOWEL SIGN I
2492
    /// assert!(grapheme_extend.contains('\u{0D3E}'));  // U+0D3E MALAYALAM VOWEL SIGN AA
2493
    /// ```
2494
2495
}
2496
2497
make_binary_property! {
2498
    name: "Grapheme_Link";
2499
    short_name: "Gr_Link";
2500
    ident: GraphemeLink;
2501
    data_marker: crate::provider::PropertyBinaryGraphemeLinkV1;
2502
    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_LINK_V1;
2503
    /// Deprecated property.
2504
    ///
2505
    /// Formerly proposed for programmatic determination of grapheme
2506
    /// cluster boundaries.
2507
}
2508
2509
make_binary_property! {
2510
    name: "Hex_Digit";
2511
    short_name: "Hex";
2512
    ident: HexDigit;
2513
    data_marker: crate::provider::PropertyBinaryHexDigitV1;
2514
    singleton: SINGLETON_PROPERTY_BINARY_HEX_DIGIT_V1;
2515
    /// Characters commonly used for the representation of hexadecimal numbers, plus their
2516
    /// compatibility equivalents.
2517
    ///
2518
    /// # Example
2519
    ///
2520
    /// ```
2521
    /// use icu::properties::CodePointSetData;
2522
    /// use icu::properties::props::HexDigit;
2523
    ///
2524
    /// let hex_digit = CodePointSetData::new::<HexDigit>();
2525
    ///
2526
    /// assert!(hex_digit.contains('0'));
2527
    /// assert!(!hex_digit.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
2528
    /// assert!(hex_digit.contains('f'));
2529
    /// assert!(hex_digit.contains('f'));  // U+FF46 FULLWIDTH LATIN SMALL LETTER F
2530
    /// assert!(hex_digit.contains('F'));  // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F
2531
    /// assert!(!hex_digit.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
2532
    /// ```
2533
}
2534
2535
make_binary_property! {
2536
    name: "Hyphen";
2537
    short_name: "Hyphen";
2538
    ident: Hyphen;
2539
    data_marker: crate::provider::PropertyBinaryHyphenV1;
2540
    singleton: SINGLETON_PROPERTY_BINARY_HYPHEN_V1;
2541
    /// Deprecated property.
2542
    ///
2543
    /// Dashes which are used to mark connections between pieces of
2544
    /// words, plus the Katakana middle dot.
2545
}
2546
2547
make_binary_property! {
2548
    name: "ID_Compat_Math_Continue";
2549
    short_name: "ID_Compat_Math_Continue";
2550
    ident: IdCompatMathContinue;
2551
    data_marker: crate::provider::PropertyBinaryIdCompatMathContinueV1;
2552
    singleton: SINGLETON_PROPERTY_BINARY_ID_COMPAT_MATH_CONTINUE_V1;
2553
    /// ID_Compat_Math_Continue Property
2554
}
2555
2556
make_binary_property! {
2557
    name: "ID_Compat_Math_Start";
2558
    short_name: "ID_Compat_Math_Start";
2559
    ident: IdCompatMathStart;
2560
    data_marker: crate::provider::PropertyBinaryIdCompatMathStartV1;
2561
    singleton: SINGLETON_PROPERTY_BINARY_ID_COMPAT_MATH_START_V1;
2562
    /// ID_Compat_Math_Start Property
2563
}
2564
2565
make_binary_property! {
2566
    name: "ID_Continue";
2567
    short_name: "IDC";
2568
    ident: IdContinue;
2569
    data_marker: crate::provider::PropertyBinaryIdContinueV1;
2570
    singleton: SINGLETON_PROPERTY_BINARY_ID_CONTINUE_V1;
2571
    /// Characters that can come after the first character in an identifier.
2572
    ///
2573
    /// If using NFKC to
2574
    /// fold differences between characters, use [`XidContinue`] instead.  See
2575
    /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2576
    /// more details.
2577
    ///
2578
    /// # Example
2579
    ///
2580
    /// ```
2581
    /// use icu::properties::CodePointSetData;
2582
    /// use icu::properties::props::IdContinue;
2583
    ///
2584
    /// let id_continue = CodePointSetData::new::<IdContinue>();
2585
    ///
2586
    /// assert!(id_continue.contains('x'));
2587
    /// assert!(id_continue.contains('1'));
2588
    /// assert!(id_continue.contains('_'));
2589
    /// assert!(id_continue.contains('ߝ'));  // U+07DD NKO LETTER FA
2590
    /// assert!(!id_continue.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
2591
    /// assert!(id_continue.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2592
    /// ```
2593
}
2594
2595
make_binary_property! {
2596
    name: "Ideographic";
2597
    short_name: "Ideo";
2598
    ident: Ideographic;
2599
    data_marker: crate::provider::PropertyBinaryIdeographicV1;
2600
    singleton: SINGLETON_PROPERTY_BINARY_IDEOGRAPHIC_V1;
2601
    /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese)
2602
    /// ideographs, or related siniform ideographs
2603
    ///
2604
    /// # Example
2605
    ///
2606
    /// ```
2607
    /// use icu::properties::CodePointSetData;
2608
    /// use icu::properties::props::Ideographic;
2609
    ///
2610
    /// let ideographic = CodePointSetData::new::<Ideographic>();
2611
    ///
2612
    /// assert!(ideographic.contains('川'));  // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
2613
    /// assert!(!ideographic.contains('밥'));  // U+BC25 HANGUL SYLLABLE BAB
2614
    /// ```
2615
}
2616
2617
make_binary_property! {
2618
    name: "ID_Start";
2619
    short_name: "IDS";
2620
    ident: IdStart;
2621
    data_marker: crate::provider::PropertyBinaryIdStartV1;
2622
    singleton: SINGLETON_PROPERTY_BINARY_ID_START_V1;
2623
    /// Characters that can begin an identifier.
2624
    ///
2625
    /// If using NFKC to fold differences between
2626
    /// characters, use [`XidStart`] instead.  See [`Unicode Standard Annex
2627
    /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
2628
    ///
2629
    /// # Example
2630
    ///
2631
    /// ```
2632
    /// use icu::properties::CodePointSetData;
2633
    /// use icu::properties::props::IdStart;
2634
    ///
2635
    /// let id_start = CodePointSetData::new::<IdStart>();
2636
    ///
2637
    /// assert!(id_start.contains('x'));
2638
    /// assert!(!id_start.contains('1'));
2639
    /// assert!(!id_start.contains('_'));
2640
    /// assert!(id_start.contains('ߝ'));  // U+07DD NKO LETTER FA
2641
    /// assert!(!id_start.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
2642
    /// assert!(id_start.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2643
    /// ```
2644
}
2645
2646
make_binary_property! {
2647
    name: "IDS_Binary_Operator";
2648
    short_name: "IDSB";
2649
    ident: IdsBinaryOperator;
2650
    data_marker: crate::provider::PropertyBinaryIdsBinaryOperatorV1;
2651
    singleton: SINGLETON_PROPERTY_BINARY_IDS_BINARY_OPERATOR_V1;
2652
    /// Characters used in Ideographic Description Sequences.
2653
    ///
2654
    /// # Example
2655
    ///
2656
    /// ```
2657
    /// use icu::properties::CodePointSetData;
2658
    /// use icu::properties::props::IdsBinaryOperator;
2659
    ///
2660
    /// let ids_binary_operator = CodePointSetData::new::<IdsBinaryOperator>();
2661
    ///
2662
    /// assert!(ids_binary_operator.contains('\u{2FF5}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2663
    /// assert!(!ids_binary_operator.contains('\u{3006}'));  // IDEOGRAPHIC CLOSING MARK
2664
    /// ```
2665
}
2666
2667
make_binary_property! {
2668
    name: "IDS_Trinary_Operator";
2669
    short_name: "IDST";
2670
    ident: IdsTrinaryOperator;
2671
    data_marker: crate::provider::PropertyBinaryIdsTrinaryOperatorV1;
2672
    singleton: SINGLETON_PROPERTY_BINARY_IDS_TRINARY_OPERATOR_V1;
2673
    /// Characters used in Ideographic Description Sequences.
2674
    ///
2675
    /// # Example
2676
    ///
2677
    /// ```
2678
    /// use icu::properties::CodePointSetData;
2679
    /// use icu::properties::props::IdsTrinaryOperator;
2680
    ///
2681
    /// let ids_trinary_operator = CodePointSetData::new::<IdsTrinaryOperator>();
2682
    ///
2683
    /// assert!(ids_trinary_operator.contains('\u{2FF2}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT
2684
    /// assert!(ids_trinary_operator.contains('\u{2FF3}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW
2685
    /// assert!(!ids_trinary_operator.contains('\u{2FF4}'));
2686
    /// assert!(!ids_trinary_operator.contains('\u{2FF5}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2687
    /// assert!(!ids_trinary_operator.contains('\u{3006}'));  // IDEOGRAPHIC CLOSING MARK
2688
    /// ```
2689
}
2690
2691
make_binary_property! {
2692
    name: "IDS_Unary_Operator";
2693
    short_name: "IDSU";
2694
    ident: IdsUnaryOperator;
2695
    data_marker: crate::provider::PropertyBinaryIdsUnaryOperatorV1;
2696
    singleton: SINGLETON_PROPERTY_BINARY_IDS_UNARY_OPERATOR_V1;
2697
    /// IDS_Unary_Operator Property
2698
}
2699
2700
make_binary_property! {
2701
    name: "Join_Control";
2702
    short_name: "Join_C";
2703
    ident: JoinControl;
2704
    data_marker: crate::provider::PropertyBinaryJoinControlV1;
2705
    singleton: SINGLETON_PROPERTY_BINARY_JOIN_CONTROL_V1;
2706
    /// Format control characters which have specific functions for control of cursive joining
2707
    /// and ligation.
2708
    ///
2709
    /// # Example
2710
    ///
2711
    /// ```
2712
    /// use icu::properties::CodePointSetData;
2713
    /// use icu::properties::props::JoinControl;
2714
    ///
2715
    /// let join_control = CodePointSetData::new::<JoinControl>();
2716
    ///
2717
    /// assert!(join_control.contains('\u{200C}'));  // ZERO WIDTH NON-JOINER
2718
    /// assert!(join_control.contains('\u{200D}'));  // ZERO WIDTH JOINER
2719
    /// assert!(!join_control.contains('\u{200E}'));
2720
    /// ```
2721
}
2722
2723
make_binary_property! {
2724
    name: "Logical_Order_Exception";
2725
    short_name: "LOE";
2726
    ident: LogicalOrderException;
2727
    data_marker: crate::provider::PropertyBinaryLogicalOrderExceptionV1;
2728
    singleton: SINGLETON_PROPERTY_BINARY_LOGICAL_ORDER_EXCEPTION_V1;
2729
    /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao.
2730
    ///
2731
    /// # Example
2732
    ///
2733
    /// ```
2734
    /// use icu::properties::CodePointSetData;
2735
    /// use icu::properties::props::LogicalOrderException;
2736
    ///
2737
    /// let logical_order_exception = CodePointSetData::new::<LogicalOrderException>();
2738
    ///
2739
    /// assert!(logical_order_exception.contains('ແ'));  // U+0EC1 LAO VOWEL SIGN EI
2740
    /// assert!(!logical_order_exception.contains('ະ'));  // U+0EB0 LAO VOWEL SIGN A
2741
    /// ```
2742
}
2743
2744
make_binary_property! {
2745
    name: "Lowercase";
2746
    short_name: "Lower";
2747
    ident: Lowercase;
2748
    data_marker: crate::provider::PropertyBinaryLowercaseV1;
2749
    singleton: SINGLETON_PROPERTY_BINARY_LOWERCASE_V1;
2750
    /// Lowercase characters.
2751
    ///
2752
    /// # Example
2753
    ///
2754
    /// ```
2755
    /// use icu::properties::CodePointSetData;
2756
    /// use icu::properties::props::Lowercase;
2757
    ///
2758
    /// let lowercase = CodePointSetData::new::<Lowercase>();
2759
    ///
2760
    /// assert!(lowercase.contains('a'));
2761
    /// assert!(!lowercase.contains('A'));
2762
    /// ```
2763
}
2764
2765
make_binary_property! {
2766
    name: "Math";
2767
    short_name: "Math";
2768
    ident: Math;
2769
    data_marker: crate::provider::PropertyBinaryMathV1;
2770
    singleton: SINGLETON_PROPERTY_BINARY_MATH_V1;
2771
    /// Characters used in mathematical notation.
2772
    ///
2773
    /// # Example
2774
    ///
2775
    /// ```
2776
    /// use icu::properties::CodePointSetData;
2777
    /// use icu::properties::props::Math;
2778
    ///
2779
    /// let math = CodePointSetData::new::<Math>();
2780
    ///
2781
    /// assert!(math.contains('='));
2782
    /// assert!(math.contains('+'));
2783
    /// assert!(!math.contains('-'));
2784
    /// assert!(math.contains('−'));  // U+2212 MINUS SIGN
2785
    /// assert!(!math.contains('/'));
2786
    /// assert!(math.contains('∕'));  // U+2215 DIVISION SLASH
2787
    /// ```
2788
}
2789
2790
make_binary_property! {
2791
    name: "Modifier_Combining_Mark";
2792
    short_name: "MCM";
2793
    ident: ModifierCombiningMark;
2794
    data_marker: crate::provider::PropertyBinaryModifierCombiningMarkV1;
2795
    singleton: SINGLETON_PROPERTY_BINARY_MODIFIER_COMBINING_MARK_V1;
2796
    /// Modifier_Combining_Mark Property
2797
}
2798
2799
make_binary_property! {
2800
    name: "Noncharacter_Code_Point";
2801
    short_name: "NChar";
2802
    ident: NoncharacterCodePoint;
2803
    data_marker: crate::provider::PropertyBinaryNoncharacterCodePointV1;
2804
    singleton: SINGLETON_PROPERTY_BINARY_NONCHARACTER_CODE_POINT_V1;
2805
    /// Code points permanently reserved for internal use.
2806
    ///
2807
    /// # Example
2808
    ///
2809
    /// ```
2810
    /// use icu::properties::CodePointSetData;
2811
    /// use icu::properties::props::NoncharacterCodePoint;
2812
    ///
2813
    /// let noncharacter_code_point = CodePointSetData::new::<NoncharacterCodePoint>();
2814
    ///
2815
    /// assert!(noncharacter_code_point.contains('\u{FDD0}'));
2816
    /// assert!(noncharacter_code_point.contains('\u{FFFF}'));
2817
    /// assert!(!noncharacter_code_point.contains('\u{10000}'));
2818
    /// ```
2819
}
2820
2821
make_binary_property! {
2822
    name: "NFC_Inert";
2823
    short_name: "nfcinert";
2824
    ident: NfcInert;
2825
    data_marker: crate::provider::PropertyBinaryNfcInertV1;
2826
    singleton: SINGLETON_PROPERTY_BINARY_NFC_INERT_V1;
2827
    /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters.
2828
}
2829
2830
make_binary_property! {
2831
    name: "NFD_Inert";
2832
    short_name: "nfdinert";
2833
    ident: NfdInert;
2834
    data_marker: crate::provider::PropertyBinaryNfdInertV1;
2835
    singleton: SINGLETON_PROPERTY_BINARY_NFD_INERT_V1;
2836
    /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters.
2837
}
2838
2839
make_binary_property! {
2840
    name: "NFKC_Inert";
2841
    short_name: "nfkcinert";
2842
    ident: NfkcInert;
2843
    data_marker: crate::provider::PropertyBinaryNfkcInertV1;
2844
    singleton: SINGLETON_PROPERTY_BINARY_NFKC_INERT_V1;
2845
    /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters.
2846
}
2847
2848
make_binary_property! {
2849
    name: "NFKD_Inert";
2850
    short_name: "nfkdinert";
2851
    ident: NfkdInert;
2852
    data_marker: crate::provider::PropertyBinaryNfkdInertV1;
2853
    singleton: SINGLETON_PROPERTY_BINARY_NFKD_INERT_V1;
2854
    /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters.
2855
}
2856
2857
make_binary_property! {
2858
    name: "Pattern_Syntax";
2859
    short_name: "Pat_Syn";
2860
    ident: PatternSyntax;
2861
    data_marker: crate::provider::PropertyBinaryPatternSyntaxV1;
2862
    singleton: SINGLETON_PROPERTY_BINARY_PATTERN_SYNTAX_V1;
2863
    /// Characters used as syntax in patterns (such as regular expressions).
2864
    ///
2865
    /// See [`Unicode
2866
    /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
2867
    /// details.
2868
    ///
2869
    /// # Example
2870
    ///
2871
    /// ```
2872
    /// use icu::properties::CodePointSetData;
2873
    /// use icu::properties::props::PatternSyntax;
2874
    ///
2875
    /// let pattern_syntax = CodePointSetData::new::<PatternSyntax>();
2876
    ///
2877
    /// assert!(pattern_syntax.contains('{'));
2878
    /// assert!(pattern_syntax.contains('⇒'));  // U+21D2 RIGHTWARDS DOUBLE ARROW
2879
    /// assert!(!pattern_syntax.contains('0'));
2880
    /// ```
2881
}
2882
2883
make_binary_property! {
2884
    name: "Pattern_White_Space";
2885
    short_name: "Pat_WS";
2886
    ident: PatternWhiteSpace;
2887
    data_marker: crate::provider::PropertyBinaryPatternWhiteSpaceV1;
2888
    singleton: SINGLETON_PROPERTY_BINARY_PATTERN_WHITE_SPACE_V1;
2889
    /// Characters used as whitespace in patterns (such as regular expressions).
2890
    ///
2891
    /// See
2892
    /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2893
    /// more details.
2894
    ///
2895
    /// # Example
2896
    ///
2897
    /// ```
2898
    /// use icu::properties::CodePointSetData;
2899
    /// use icu::properties::props::PatternWhiteSpace;
2900
    ///
2901
    /// let pattern_white_space = CodePointSetData::new::<PatternWhiteSpace>();
2902
    ///
2903
    /// assert!(pattern_white_space.contains(' '));
2904
    /// assert!(pattern_white_space.contains('\u{2029}'));  // PARAGRAPH SEPARATOR
2905
    /// assert!(pattern_white_space.contains('\u{000A}'));  // NEW LINE
2906
    /// assert!(!pattern_white_space.contains('\u{00A0}'));  // NO-BREAK SPACE
2907
    /// ```
2908
}
2909
2910
make_binary_property! {
2911
    name: "Prepended_Concatenation_Mark";
2912
    short_name: "PCM";
2913
    ident: PrependedConcatenationMark;
2914
    data_marker: crate::provider::PropertyBinaryPrependedConcatenationMarkV1;
2915
    singleton: SINGLETON_PROPERTY_BINARY_PREPENDED_CONCATENATION_MARK_V1;
2916
    /// A small class of visible format controls, which precede and then span a sequence of
2917
    /// other characters, usually digits.
2918
}
2919
2920
make_binary_property! {
2921
    name: "print";
2922
    short_name: "print";
2923
    ident: Print;
2924
    data_marker: crate::provider::PropertyBinaryPrintV1;
2925
    singleton: SINGLETON_PROPERTY_BINARY_PRINT_V1;
2926
    /// Printable characters (visible characters and whitespace).
2927
    ///
2928
    /// This is defined for POSIX compatibility.
2929
}
2930
2931
make_binary_property! {
2932
    name: "Quotation_Mark";
2933
    short_name: "QMark";
2934
    ident: QuotationMark;
2935
    data_marker: crate::provider::PropertyBinaryQuotationMarkV1;
2936
    singleton: SINGLETON_PROPERTY_BINARY_QUOTATION_MARK_V1;
2937
    /// Punctuation characters that function as quotation marks.
2938
    ///
2939
    /// # Example
2940
    ///
2941
    /// ```
2942
    /// use icu::properties::CodePointSetData;
2943
    /// use icu::properties::props::QuotationMark;
2944
    ///
2945
    /// let quotation_mark = CodePointSetData::new::<QuotationMark>();
2946
    ///
2947
    /// assert!(quotation_mark.contains('\''));
2948
    /// assert!(quotation_mark.contains('„'));  // U+201E DOUBLE LOW-9 QUOTATION MARK
2949
    /// assert!(!quotation_mark.contains('<'));
2950
    /// ```
2951
}
2952
2953
make_binary_property! {
2954
    name: "Radical";
2955
    short_name: "Radical";
2956
    ident: Radical;
2957
    data_marker: crate::provider::PropertyBinaryRadicalV1;
2958
    singleton: SINGLETON_PROPERTY_BINARY_RADICAL_V1;
2959
    /// Characters used in the definition of Ideographic Description Sequences.
2960
    ///
2961
    /// # Example
2962
    ///
2963
    /// ```
2964
    /// use icu::properties::CodePointSetData;
2965
    /// use icu::properties::props::Radical;
2966
    ///
2967
    /// let radical = CodePointSetData::new::<Radical>();
2968
    ///
2969
    /// assert!(radical.contains('⺆'));  // U+2E86 CJK RADICAL BOX
2970
    /// assert!(!radical.contains('丹'));  // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E
2971
    /// ```
2972
}
2973
2974
make_binary_property! {
2975
    name: "Regional_Indicator";
2976
    short_name: "RI";
2977
    ident: RegionalIndicator;
2978
    data_marker: crate::provider::PropertyBinaryRegionalIndicatorV1;
2979
    singleton: SINGLETON_PROPERTY_BINARY_REGIONAL_INDICATOR_V1;
2980
    /// Regional indicator characters, `U+1F1E6..U+1F1FF`.
2981
    ///
2982
    /// # Example
2983
    ///
2984
    /// ```
2985
    /// use icu::properties::CodePointSetData;
2986
    /// use icu::properties::props::RegionalIndicator;
2987
    ///
2988
    /// let regional_indicator = CodePointSetData::new::<RegionalIndicator>();
2989
    ///
2990
    /// assert!(regional_indicator.contains('🇹'));  // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2991
    /// assert!(!regional_indicator.contains('Ⓣ'));  // U+24C9 CIRCLED LATIN CAPITAL LETTER T
2992
    /// assert!(!regional_indicator.contains('T'));
2993
    /// ```
2994
}
2995
2996
make_binary_property! {
2997
    name: "Soft_Dotted";
2998
    short_name: "SD";
2999
    ident: SoftDotted;
3000
    data_marker: crate::provider::PropertyBinarySoftDottedV1;
3001
    singleton: SINGLETON_PROPERTY_BINARY_SOFT_DOTTED_V1;
3002
    /// Characters with a "soft dot", like i or j.
3003
    ///
3004
    /// An accent placed on these characters causes
3005
    /// the dot to disappear.
3006
    ///
3007
    /// # Example
3008
    ///
3009
    /// ```
3010
    /// use icu::properties::CodePointSetData;
3011
    /// use icu::properties::props::SoftDotted;
3012
    ///
3013
    /// let soft_dotted = CodePointSetData::new::<SoftDotted>();
3014
    ///
3015
    /// assert!(soft_dotted.contains('і'));  //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
3016
    /// assert!(!soft_dotted.contains('ı'));  // U+0131 LATIN SMALL LETTER DOTLESS I
3017
    /// ```
3018
}
3019
3020
make_binary_property! {
3021
    name: "Segment_Starter";
3022
    short_name: "segstart";
3023
    ident: SegmentStarter;
3024
    data_marker: crate::provider::PropertyBinarySegmentStarterV1;
3025
    singleton: SINGLETON_PROPERTY_BINARY_SEGMENT_STARTER_V1;
3026
    /// Characters that are starters in terms of Unicode normalization and combining character
3027
    /// sequences.
3028
}
3029
3030
make_binary_property! {
3031
    name: "Case_Sensitive";
3032
    short_name: "Sensitive";
3033
    ident: CaseSensitive;
3034
    data_marker: crate::provider::PropertyBinaryCaseSensitiveV1;
3035
    singleton: SINGLETON_PROPERTY_BINARY_CASE_SENSITIVE_V1;
3036
    /// Characters that are either the source of a case mapping or in the target of a case
3037
    /// mapping.
3038
}
3039
3040
make_binary_property! {
3041
    name: "Sentence_Terminal";
3042
    short_name: "STerm";
3043
    ident: SentenceTerminal;
3044
    data_marker: crate::provider::PropertyBinarySentenceTerminalV1;
3045
    singleton: SINGLETON_PROPERTY_BINARY_SENTENCE_TERMINAL_V1;
3046
    /// Punctuation characters that generally mark the end of sentences.
3047
    ///
3048
    /// # Example
3049
    ///
3050
    /// ```
3051
    /// use icu::properties::CodePointSetData;
3052
    /// use icu::properties::props::SentenceTerminal;
3053
    ///
3054
    /// let sentence_terminal = CodePointSetData::new::<SentenceTerminal>();
3055
    ///
3056
    /// assert!(sentence_terminal.contains('.'));
3057
    /// assert!(sentence_terminal.contains('?'));
3058
    /// assert!(sentence_terminal.contains('᪨'));  // U+1AA8 TAI THAM SIGN KAAN
3059
    /// assert!(!sentence_terminal.contains(','));
3060
    /// assert!(!sentence_terminal.contains('¿'));  // U+00BF INVERTED QUESTION MARK
3061
    /// ```
3062
}
3063
3064
make_binary_property! {
3065
    name: "Terminal_Punctuation";
3066
    short_name: "Term";
3067
    ident: TerminalPunctuation;
3068
    data_marker: crate::provider::PropertyBinaryTerminalPunctuationV1;
3069
    singleton: SINGLETON_PROPERTY_BINARY_TERMINAL_PUNCTUATION_V1;
3070
    /// Punctuation characters that generally mark the end of textual units.
3071
    ///
3072
    /// # Example
3073
    ///
3074
    /// ```
3075
    /// use icu::properties::CodePointSetData;
3076
    /// use icu::properties::props::TerminalPunctuation;
3077
    ///
3078
    /// let terminal_punctuation = CodePointSetData::new::<TerminalPunctuation>();
3079
    ///
3080
    /// assert!(terminal_punctuation.contains('.'));
3081
    /// assert!(terminal_punctuation.contains('?'));
3082
    /// assert!(terminal_punctuation.contains('᪨'));  // U+1AA8 TAI THAM SIGN KAAN
3083
    /// assert!(terminal_punctuation.contains(','));
3084
    /// assert!(!terminal_punctuation.contains('¿'));  // U+00BF INVERTED QUESTION MARK
3085
    /// ```
3086
}
3087
3088
make_binary_property! {
3089
    name: "Unified_Ideograph";
3090
    short_name: "UIdeo";
3091
    ident: UnifiedIdeograph;
3092
    data_marker: crate::provider::PropertyBinaryUnifiedIdeographV1;
3093
    singleton: SINGLETON_PROPERTY_BINARY_UNIFIED_IDEOGRAPH_V1;
3094
    /// A property which specifies the exact set of Unified CJK Ideographs in the standard.
3095
    ///
3096
    /// # Example
3097
    ///
3098
    /// ```
3099
    /// use icu::properties::CodePointSetData;
3100
    /// use icu::properties::props::UnifiedIdeograph;
3101
    ///
3102
    /// let unified_ideograph = CodePointSetData::new::<UnifiedIdeograph>();
3103
    ///
3104
    /// assert!(unified_ideograph.contains('川'));  // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
3105
    /// assert!(unified_ideograph.contains('木'));  // U+6728 CJK UNIFIED IDEOGRAPH-6728
3106
    /// assert!(!unified_ideograph.contains('𛅸'));  // U+1B178 NUSHU CHARACTER-1B178
3107
    /// ```
3108
}
3109
3110
make_binary_property! {
3111
    name: "Uppercase";
3112
    short_name: "Upper";
3113
    ident: Uppercase;
3114
    data_marker: crate::provider::PropertyBinaryUppercaseV1;
3115
    singleton: SINGLETON_PROPERTY_BINARY_UPPERCASE_V1;
3116
    /// Uppercase characters.
3117
    ///
3118
    /// # Example
3119
    ///
3120
    /// ```
3121
    /// use icu::properties::CodePointSetData;
3122
    /// use icu::properties::props::Uppercase;
3123
    ///
3124
    /// let uppercase = CodePointSetData::new::<Uppercase>();
3125
    ///
3126
    /// assert!(uppercase.contains('U'));
3127
    /// assert!(!uppercase.contains('u'));
3128
    /// ```
3129
}
3130
3131
make_binary_property! {
3132
    name: "Variation_Selector";
3133
    short_name: "VS";
3134
    ident: VariationSelector;
3135
    data_marker: crate::provider::PropertyBinaryVariationSelectorV1;
3136
    singleton: SINGLETON_PROPERTY_BINARY_VARIATION_SELECTOR_V1;
3137
    /// Characters that are Variation Selectors.
3138
    ///
3139
    /// # Example
3140
    ///
3141
    /// ```
3142
    /// use icu::properties::CodePointSetData;
3143
    /// use icu::properties::props::VariationSelector;
3144
    ///
3145
    /// let variation_selector = CodePointSetData::new::<VariationSelector>();
3146
    ///
3147
    /// assert!(variation_selector.contains('\u{180D}'));  // MONGOLIAN FREE VARIATION SELECTOR THREE
3148
    /// assert!(!variation_selector.contains('\u{303E}'));  // IDEOGRAPHIC VARIATION INDICATOR
3149
    /// assert!(variation_selector.contains('\u{FE0F}'));  // VARIATION SELECTOR-16
3150
    /// assert!(!variation_selector.contains('\u{FE10}'));  // PRESENTATION FORM FOR VERTICAL COMMA
3151
    /// assert!(variation_selector.contains('\u{E01EF}'));  // VARIATION SELECTOR-256
3152
    /// ```
3153
}
3154
3155
make_binary_property! {
3156
    name: "White_Space";
3157
    short_name: "WSpace";
3158
    ident: WhiteSpace;
3159
    data_marker: crate::provider::PropertyBinaryWhiteSpaceV1;
3160
    singleton: SINGLETON_PROPERTY_BINARY_WHITE_SPACE_V1;
3161
    /// Spaces, separator characters and other control characters which should be treated by
3162
    /// programming languages as "white space" for the purpose of parsing elements.
3163
    ///
3164
    /// # Example
3165
    ///
3166
    /// ```
3167
    /// use icu::properties::CodePointSetData;
3168
    /// use icu::properties::props::WhiteSpace;
3169
    ///
3170
    /// let white_space = CodePointSetData::new::<WhiteSpace>();
3171
    ///
3172
    /// assert!(white_space.contains(' '));
3173
    /// assert!(white_space.contains('\u{000A}'));  // NEW LINE
3174
    /// assert!(white_space.contains('\u{00A0}'));  // NO-BREAK SPACE
3175
    /// assert!(!white_space.contains('\u{200B}'));  // ZERO WIDTH SPACE
3176
    /// ```
3177
}
3178
3179
make_binary_property! {
3180
    name: "xdigit";
3181
    short_name: "xdigit";
3182
    ident: Xdigit;
3183
    data_marker: crate::provider::PropertyBinaryXdigitV1;
3184
    singleton: SINGLETON_PROPERTY_BINARY_XDIGIT_V1;
3185
    /// Hexadecimal digits
3186
    ///
3187
    /// This is defined for POSIX compatibility.
3188
}
3189
3190
make_binary_property! {
3191
    name: "XID_Continue";
3192
    short_name: "XIDC";
3193
    ident: XidContinue;
3194
    data_marker: crate::provider::PropertyBinaryXidContinueV1;
3195
    singleton: SINGLETON_PROPERTY_BINARY_XID_CONTINUE_V1;
3196
    /// Characters that can come after the first character in an identifier.
3197
    ///
3198
    /// See [`Unicode Standard Annex
3199
    /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
3200
    ///
3201
    /// # Example
3202
    ///
3203
    /// ```
3204
    /// use icu::properties::CodePointSetData;
3205
    /// use icu::properties::props::XidContinue;
3206
    ///
3207
    /// let xid_continue = CodePointSetData::new::<XidContinue>();
3208
    ///
3209
    /// assert!(xid_continue.contains('x'));
3210
    /// assert!(xid_continue.contains('1'));
3211
    /// assert!(xid_continue.contains('_'));
3212
    /// assert!(xid_continue.contains('ߝ'));  // U+07DD NKO LETTER FA
3213
    /// assert!(!xid_continue.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
3214
    /// assert!(!xid_continue.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3215
    /// ```
3216
}
3217
3218
make_binary_property! {
3219
    name: "XID_Start";
3220
    short_name: "XIDS";
3221
    ident: XidStart;
3222
    data_marker: crate::provider::PropertyBinaryXidStartV1;
3223
    singleton: SINGLETON_PROPERTY_BINARY_XID_START_V1;
3224
    /// Characters that can begin an identifier.
3225
    ///
3226
    /// See [`Unicode
3227
    /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
3228
    /// details.
3229
    ///
3230
    /// # Example
3231
    ///
3232
    /// ```
3233
    /// use icu::properties::CodePointSetData;
3234
    /// use icu::properties::props::XidStart;
3235
    ///
3236
    /// let xid_start = CodePointSetData::new::<XidStart>();
3237
    ///
3238
    /// assert!(xid_start.contains('x'));
3239
    /// assert!(!xid_start.contains('1'));
3240
    /// assert!(!xid_start.contains('_'));
3241
    /// assert!(xid_start.contains('ߝ'));  // U+07DD NKO LETTER FA
3242
    /// assert!(!xid_start.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
3243
    /// assert!(!xid_start.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
3244
    /// ```
3245
}
3246
3247
pub use crate::emoji::EmojiSet;
3248
3249
macro_rules! make_emoji_set {
3250
    (
3251
        name: $name:literal;
3252
        short_name: $short_name:literal;
3253
        ident: $ident:ident;
3254
        data_marker: $data_marker:ty;
3255
        singleton: $singleton:ident;
3256
        $(#[$doc:meta])+
3257
    ) => {
3258
        $(#[$doc])+
3259
        #[derive(Debug)]
3260
        #[non_exhaustive]
3261
        pub struct $ident;
3262
3263
        impl crate::private::Sealed for $ident {}
3264
3265
        impl EmojiSet for $ident {
3266
            type DataMarker = $data_marker;
3267
            #[cfg(feature = "compiled_data")]
3268
            const SINGLETON: &'static crate::provider::PropertyUnicodeSet<'static> =
3269
                &crate::provider::Baked::$singleton;
3270
            const NAME: &'static [u8] = $name.as_bytes();
3271
            const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
3272
        }
3273
    }
3274
}
3275
3276
make_emoji_set! {
3277
    name: "Basic_Emoji";
3278
    short_name: "Basic_Emoji";
3279
    ident: BasicEmoji;
3280
    data_marker: crate::provider::PropertyBinaryBasicEmojiV1;
3281
    singleton: SINGLETON_PROPERTY_BINARY_BASIC_EMOJI_V1;
3282
    /// Characters and character sequences intended for general-purpose, independent, direct input.
3283
    ///
3284
    /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more
3285
    /// details.
3286
    ///
3287
    /// # Example
3288
    ///
3289
    /// ```
3290
    /// use icu::properties::EmojiSetData;
3291
    /// use icu::properties::props::BasicEmoji;
3292
    ///
3293
    /// let basic_emoji = EmojiSetData::new::<BasicEmoji>();
3294
    ///
3295
    /// assert!(!basic_emoji.contains('\u{0020}'));
3296
    /// assert!(!basic_emoji.contains('\n'));
3297
    /// assert!(basic_emoji.contains('🦃')); // U+1F983 TURKEY
3298
    /// assert!(basic_emoji.contains_str("\u{1F983}"));
3299
    /// assert!(basic_emoji.contains_str("\u{1F6E4}\u{FE0F}")); // railway track
3300
    /// assert!(!basic_emoji.contains_str("\u{0033}\u{FE0F}\u{20E3}"));  // Emoji_Keycap_Sequence, keycap 3
3301
    /// ```
3302
}
3303
3304
#[cfg(test)]
3305
mod test_enumerated_property_completeness {
3306
    use super::*;
3307
    use std::collections::BTreeMap;
3308
3309
    fn check_enum<'a, T: NamedEnumeratedProperty>(
3310
        lookup: &crate::provider::names::PropertyValueNameToEnumMap<'static>,
3311
        consts: impl IntoIterator<Item = &'a T>,
3312
    ) where
3313
        u16: From<T>,
3314
    {
3315
        let mut data: BTreeMap<_, _> = lookup
3316
            .map
3317
            .iter()
3318
            .map(|(name, value)| (value, (name, "Data")))
3319
            .collect();
3320
3321
        let names = crate::PropertyNamesLong::<T>::new();
3322
        let consts = consts.into_iter().map(|value| {
3323
            (
3324
                u16::from(*value) as usize,
3325
                (
3326
                    names.get(*value).unwrap_or("<unknown>").to_string(),
3327
                    "Consts",
3328
                ),
3329
            )
3330
        });
3331
3332
        let mut diff = Vec::new();
3333
        for t @ (value, _) in consts {
3334
            if data.remove(&value).is_none() {
3335
                diff.push(t);
3336
            }
3337
        }
3338
        diff.extend(data);
3339
3340
        let mut fmt_diff = String::new();
3341
        for (value, (name, source)) in diff {
3342
            fmt_diff.push_str(&format!("{source}:\t{name} = {value:?}\n"));
3343
        }
3344
3345
        assert!(
3346
            fmt_diff.is_empty(),
3347
            "Values defined in data do not match values defined in consts. Difference:\n{fmt_diff}"
3348
        );
3349
    }
3350
3351
    #[test]
3352
    fn test_ea() {
3353
        check_enum(
3354
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_EAST_ASIAN_WIDTH_V1,
3355
            EastAsianWidth::ALL_VALUES,
3356
        );
3357
    }
3358
3359
    #[test]
3360
    fn test_ccc() {
3361
        check_enum(
3362
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_CANONICAL_COMBINING_CLASS_V1,
3363
            CanonicalCombiningClass::ALL_VALUES,
3364
        );
3365
    }
3366
3367
    #[test]
3368
    fn test_jt() {
3369
        check_enum(
3370
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_JOINING_TYPE_V1,
3371
            JoiningType::ALL_VALUES,
3372
        );
3373
    }
3374
3375
    #[test]
3376
    fn test_insc() {
3377
        check_enum(
3378
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_INDIC_SYLLABIC_CATEGORY_V1,
3379
            IndicSyllabicCategory::ALL_VALUES,
3380
        );
3381
    }
3382
3383
    #[test]
3384
    fn test_sb() {
3385
        check_enum(
3386
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_SENTENCE_BREAK_V1,
3387
            SentenceBreak::ALL_VALUES,
3388
        );
3389
    }
3390
3391
    #[test]
3392
    fn test_wb() {
3393
        check_enum(
3394
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_WORD_BREAK_V1,
3395
            WordBreak::ALL_VALUES,
3396
        );
3397
    }
3398
3399
    #[test]
3400
    fn test_bc() {
3401
        check_enum(
3402
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_BIDI_CLASS_V1,
3403
            BidiClass::ALL_VALUES,
3404
        );
3405
    }
3406
3407
    #[test]
3408
    fn test_hst() {
3409
        check_enum(
3410
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_HANGUL_SYLLABLE_TYPE_V1,
3411
            HangulSyllableType::ALL_VALUES,
3412
        );
3413
    }
3414
3415
    #[test]
3416
    fn test_vo() {
3417
        check_enum(
3418
            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_VERTICAL_ORIENTATION_V1,
3419
            VerticalOrientation::ALL_VALUES,
3420
        );
3421
    }
3422
}