/rust/registry/src/index.crates.io-6f17d22bba15001f/icu_properties-1.5.1/src/sets.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // This file is part of ICU4X. For terms of use, please see the file |
2 | | // called LICENSE at the top level of the ICU4X source tree |
3 | | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | | |
5 | | //! The functions in this module return a [`CodePointSetData`] containing |
6 | | //! the set of characters with a particular Unicode property. |
7 | | //! |
8 | | //! The descriptions of most properties are taken from [`TR44`], the documentation for the |
9 | | //! Unicode Character Database. Some properties are instead defined in [`TR18`], the |
10 | | //! documentation for Unicode regular expressions. In particular, Annex C of this document |
11 | | //! defines properties for POSIX compatibility. |
12 | | //! |
13 | | //! [`CodePointSetData`]: crate::sets::CodePointSetData |
14 | | //! [`TR44`]: https://www.unicode.org/reports/tr44 |
15 | | //! [`TR18`]: https://www.unicode.org/reports/tr18 |
16 | | |
17 | | use crate::error::PropertiesError; |
18 | | use crate::provider::*; |
19 | | use crate::*; |
20 | | use core::iter::FromIterator; |
21 | | use core::ops::RangeInclusive; |
22 | | use icu_collections::codepointinvlist::CodePointInversionList; |
23 | | use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList; |
24 | | use icu_provider::prelude::*; |
25 | | |
26 | | // |
27 | | // CodePointSet* structs, impls, & macros |
28 | | // (a set with only code points) |
29 | | // |
30 | | |
31 | | /// A wrapper around code point set data. It is returned by APIs that return Unicode |
32 | | /// property data in a set-like form, ex: a set of code points sharing the same |
33 | | /// value for a Unicode property. Access its data via the borrowed version, |
34 | | /// [`CodePointSetDataBorrowed`]. |
35 | | #[derive(Debug)] |
36 | | pub struct CodePointSetData { |
37 | | data: DataPayload<ErasedSetlikeMarker>, |
38 | | } |
39 | | |
40 | | /// Private marker type for CodePointSetData |
41 | | /// to work for all set properties at once |
42 | | #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] |
43 | | pub(crate) struct ErasedSetlikeMarker; |
44 | | impl DataMarker for ErasedSetlikeMarker { |
45 | | type Yokeable = PropertyCodePointSetV1<'static>; |
46 | | } |
47 | | |
48 | | impl CodePointSetData { |
49 | | /// Construct a borrowed version of this type that can be queried. |
50 | | /// |
51 | | /// This owned version if returned by functions that use a runtime data provider. |
52 | | #[inline] |
53 | 0 | pub fn as_borrowed(&self) -> CodePointSetDataBorrowed<'_> { |
54 | 0 | CodePointSetDataBorrowed { |
55 | 0 | set: self.data.get(), |
56 | 0 | } |
57 | 0 | } Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::as_borrowed Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::as_borrowed |
58 | | |
59 | | /// Construct a new one from loaded data |
60 | | /// |
61 | | /// Typically it is preferable to use getters like [`load_ascii_hex_digit()`] instead |
62 | 0 | pub fn from_data<M>(data: DataPayload<M>) -> Self |
63 | 0 | where |
64 | 0 | M: DataMarker<Yokeable = PropertyCodePointSetV1<'static>>, |
65 | 0 | { |
66 | 0 | Self { data: data.cast() } |
67 | 0 | } Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::DashV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::MathV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::AlnumV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::BlankV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::CasedV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::EmojiV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::GraphV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::PrintV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::HyphenV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::XdigitV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::IdStartV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::RadicalV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::ExtenderV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::HexDigitV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::NfcInertV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::NfdInertV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::XidStartV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::DiacriticV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::LowercaseV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::NfkcInertV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::NfkdInertV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::UppercaseV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::AlphabeticV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::DeprecatedV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::IdContinueV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::SoftDottedV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::WhiteSpaceV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::BidiControlV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::IdeographicV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::JoinControlV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::XidContinueV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::BidiMirroredV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::GraphemeBaseV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::GraphemeLinkV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::AsciiHexDigitV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::CaseIgnorableV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::CaseSensitiveV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::EmojiModifierV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::PatternSyntaxV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::QuotationMarkV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::EmojiComponentV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::GraphemeExtendV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::SegmentStarterV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::SentenceTerminalV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::UnifiedIdeographV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::EmojiModifierBaseV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::EmojiPresentationV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::IdsBinaryOperatorV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::PatternWhiteSpaceV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::RegionalIndicatorV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::VariationSelectorV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::IdsTrinaryOperatorV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::TerminalPunctuationV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::ExtendedPictographicV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::ChangesWhenCasefoldedV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::ChangesWhenCasemappedV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::ChangesWhenLowercasedV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::ChangesWhenTitlecasedV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::ChangesWhenUppercasedV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::LogicalOrderExceptionV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::NoncharacterCodePointV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::FullCompositionExclusionV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::ChangesWhenNfkcCasefoldedV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::DefaultIgnorableCodePointV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::provider::PrependedConcatenationMarkV1Marker> Unexecuted instantiation: <icu_properties::sets::CodePointSetData>::from_data::<icu_properties::sets::ErasedSetlikeMarker> |
68 | | |
69 | | /// Construct a new owned [`CodePointInversionList`] |
70 | 0 | pub fn from_code_point_inversion_list(set: CodePointInversionList<'static>) -> Self { |
71 | 0 | let set = PropertyCodePointSetV1::from_code_point_inversion_list(set); |
72 | 0 | CodePointSetData::from_data(DataPayload::<ErasedSetlikeMarker>::from_owned(set)) |
73 | 0 | } |
74 | | |
75 | | /// Convert this type to a [`CodePointInversionList`] as a borrowed value. |
76 | | /// |
77 | | /// The data backing this is extensible and supports multiple implementations. |
78 | | /// Currently it is always [`CodePointInversionList`]; however in the future more backends may be |
79 | | /// added, and users may select which at data generation time. |
80 | | /// |
81 | | /// This method returns an `Option` in order to return `None` when the backing data provider |
82 | | /// cannot return a [`CodePointInversionList`], or cannot do so within the expected constant time |
83 | | /// constraint. |
84 | 0 | pub fn as_code_point_inversion_list(&self) -> Option<&CodePointInversionList<'_>> { |
85 | 0 | self.data.get().as_code_point_inversion_list() |
86 | 0 | } |
87 | | |
88 | | /// Convert this type to a [`CodePointInversionList`], borrowing if possible, |
89 | | /// otherwise allocating a new [`CodePointInversionList`]. |
90 | | /// |
91 | | /// The data backing this is extensible and supports multiple implementations. |
92 | | /// Currently it is always [`CodePointInversionList`]; however in the future more backends may be |
93 | | /// added, and users may select which at data generation time. |
94 | | /// |
95 | | /// The performance of the conversion to this specific return type will vary |
96 | | /// depending on the data structure that is backing `self`. |
97 | 0 | pub fn to_code_point_inversion_list(&self) -> CodePointInversionList<'_> { |
98 | 0 | self.data.get().to_code_point_inversion_list() |
99 | 0 | } |
100 | | } |
101 | | |
102 | | /// A borrowed wrapper around code point set data, returned by |
103 | | /// [`CodePointSetData::as_borrowed()`]. More efficient to query. |
104 | | #[derive(Clone, Copy, Debug)] |
105 | | pub struct CodePointSetDataBorrowed<'a> { |
106 | | set: &'a PropertyCodePointSetV1<'a>, |
107 | | } |
108 | | |
109 | | impl CodePointSetDataBorrowed<'static> { |
110 | | /// Cheaply converts a [`CodePointSetDataBorrowed<'static>`] into a [`CodePointSetData`]. |
111 | | /// |
112 | | /// Note: Due to branching and indirection, using [`CodePointSetData`] might inhibit some |
113 | | /// compile-time optimizations that are possible with [`CodePointSetDataBorrowed`]. |
114 | 0 | pub const fn static_to_owned(self) -> CodePointSetData { |
115 | 0 | CodePointSetData { |
116 | 0 | data: DataPayload::from_static_ref(self.set), |
117 | 0 | } |
118 | 0 | } |
119 | | } |
120 | | |
121 | | impl<'a> CodePointSetDataBorrowed<'a> { |
122 | | /// Check if the set contains a character |
123 | | /// |
124 | | /// ```rust |
125 | | /// use icu::properties::sets; |
126 | | /// |
127 | | /// let alphabetic = sets::alphabetic(); |
128 | | /// |
129 | | /// assert!(!alphabetic.contains('3')); |
130 | | /// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE |
131 | | /// assert!(alphabetic.contains('A')); |
132 | | /// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
133 | | /// ``` |
134 | | #[inline] |
135 | 0 | pub fn contains(self, ch: char) -> bool { |
136 | 0 | self.set.contains(ch) |
137 | 0 | } |
138 | | |
139 | | /// Check if the set contains a character as a UTF32 code unit |
140 | | /// |
141 | | /// ```rust |
142 | | /// use icu::properties::sets; |
143 | | /// |
144 | | /// let alphabetic = sets::alphabetic(); |
145 | | /// |
146 | | /// assert!(!alphabetic.contains32(0x0A69)); // U+0A69 GURMUKHI DIGIT THREE |
147 | | /// assert!(alphabetic.contains32(0x00C4)); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
148 | | /// ``` |
149 | | #[inline] |
150 | 0 | pub fn contains32(self, ch: u32) -> bool { |
151 | 0 | self.set.contains32(ch) |
152 | 0 | } Unexecuted instantiation: <icu_properties::sets::CodePointSetDataBorrowed>::contains32 Unexecuted instantiation: <icu_properties::sets::CodePointSetDataBorrowed>::contains32 |
153 | | |
154 | | // Yields an [`Iterator`] returning the ranges of the code points that are |
155 | | /// included in the [`CodePointSetData`] |
156 | | /// |
157 | | /// Ranges are returned as [`RangeInclusive`], which is inclusive of its |
158 | | /// `end` bound value. An end-inclusive behavior matches the ICU4C/J |
159 | | /// behavior of ranges, ex: `UnicodeSet::contains(UChar32 start, UChar32 end)`. |
160 | | /// |
161 | | /// # Example |
162 | | /// |
163 | | /// ``` |
164 | | /// use icu::properties::sets; |
165 | | /// |
166 | | /// let alphabetic = sets::alphabetic(); |
167 | | /// let mut ranges = alphabetic.iter_ranges(); |
168 | | /// |
169 | | /// assert_eq!(Some(0x0041..=0x005A), ranges.next()); // 'A'..'Z' |
170 | | /// assert_eq!(Some(0x0061..=0x007A), ranges.next()); // 'a'..'z' |
171 | | /// ``` |
172 | | #[inline] |
173 | 0 | pub fn iter_ranges(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { |
174 | 0 | self.set.iter_ranges() |
175 | 0 | } Unexecuted instantiation: <icu_properties::sets::CodePointSetDataBorrowed>::iter_ranges Unexecuted instantiation: <icu_properties::sets::CodePointSetDataBorrowed>::iter_ranges |
176 | | |
177 | | // Yields an [`Iterator`] returning the ranges of the code points that are |
178 | | /// *not* included in the [`CodePointSetData`] |
179 | | /// |
180 | | /// Ranges are returned as [`RangeInclusive`], which is inclusive of its |
181 | | /// `end` bound value. An end-inclusive behavior matches the ICU4C/J |
182 | | /// behavior of ranges, ex: `UnicodeSet::contains(UChar32 start, UChar32 end)`. |
183 | | /// |
184 | | /// # Example |
185 | | /// |
186 | | /// ``` |
187 | | /// use icu::properties::sets; |
188 | | /// |
189 | | /// let alphabetic = sets::alphabetic(); |
190 | | /// let mut ranges = alphabetic.iter_ranges(); |
191 | | /// |
192 | | /// assert_eq!(Some(0x0041..=0x005A), ranges.next()); // 'A'..'Z' |
193 | | /// assert_eq!(Some(0x0061..=0x007A), ranges.next()); // 'a'..'z' |
194 | | /// ``` |
195 | | #[inline] |
196 | 0 | pub fn iter_ranges_complemented(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { |
197 | 0 | self.set.iter_ranges_complemented() |
198 | 0 | } Unexecuted instantiation: <icu_properties::sets::CodePointSetDataBorrowed>::iter_ranges_complemented Unexecuted instantiation: <icu_properties::sets::CodePointSetDataBorrowed>::iter_ranges_complemented |
199 | | } |
200 | | |
201 | | // |
202 | | // UnicodeSet* structs, impls, & macros |
203 | | // (a set with code points + strings) |
204 | | // |
205 | | |
206 | | /// A wrapper around `UnicodeSet` data (characters and strings) |
207 | | #[derive(Debug)] |
208 | | pub struct UnicodeSetData { |
209 | | data: DataPayload<ErasedUnicodeSetlikeMarker>, |
210 | | } |
211 | | |
212 | | #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] |
213 | | pub(crate) struct ErasedUnicodeSetlikeMarker; |
214 | | impl DataMarker for ErasedUnicodeSetlikeMarker { |
215 | | type Yokeable = PropertyUnicodeSetV1<'static>; |
216 | | } |
217 | | |
218 | | impl UnicodeSetData { |
219 | | /// Construct a borrowed version of this type that can be queried. |
220 | | /// |
221 | | /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it |
222 | | /// up front. |
223 | | #[inline] |
224 | 0 | pub fn as_borrowed(&self) -> UnicodeSetDataBorrowed<'_> { |
225 | 0 | UnicodeSetDataBorrowed { |
226 | 0 | set: self.data.get(), |
227 | 0 | } |
228 | 0 | } Unexecuted instantiation: <icu_properties::sets::UnicodeSetData>::as_borrowed Unexecuted instantiation: <icu_properties::sets::UnicodeSetData>::as_borrowed |
229 | | |
230 | | /// Construct a new one from loaded data |
231 | | /// |
232 | | /// Typically it is preferable to use getters instead |
233 | 0 | pub fn from_data<M>(data: DataPayload<M>) -> Self |
234 | 0 | where |
235 | 0 | M: DataMarker<Yokeable = PropertyUnicodeSetV1<'static>>, |
236 | 0 | { |
237 | 0 | Self { data: data.cast() } |
238 | 0 | } Unexecuted instantiation: <icu_properties::sets::UnicodeSetData>::from_data::<icu_properties::provider::BasicEmojiV1Marker> Unexecuted instantiation: <icu_properties::sets::UnicodeSetData>::from_data::<icu_properties::sets::ErasedUnicodeSetlikeMarker> Unexecuted instantiation: <icu_properties::sets::UnicodeSetData>::from_data::<icu_properties::provider::ExemplarCharactersMainV1Marker> Unexecuted instantiation: <icu_properties::sets::UnicodeSetData>::from_data::<icu_properties::provider::ExemplarCharactersIndexV1Marker> Unexecuted instantiation: <icu_properties::sets::UnicodeSetData>::from_data::<icu_properties::provider::ExemplarCharactersNumbersV1Marker> Unexecuted instantiation: <icu_properties::sets::UnicodeSetData>::from_data::<icu_properties::provider::ExemplarCharactersAuxiliaryV1Marker> Unexecuted instantiation: <icu_properties::sets::UnicodeSetData>::from_data::<icu_properties::provider::ExemplarCharactersPunctuationV1Marker> |
239 | | |
240 | | /// Construct a new owned [`CodePointInversionListAndStringList`] |
241 | 0 | pub fn from_code_point_inversion_list_string_list( |
242 | 0 | set: CodePointInversionListAndStringList<'static>, |
243 | 0 | ) -> Self { |
244 | 0 | let set = PropertyUnicodeSetV1::from_code_point_inversion_list_string_list(set); |
245 | 0 | UnicodeSetData::from_data(DataPayload::<ErasedUnicodeSetlikeMarker>::from_owned(set)) |
246 | 0 | } |
247 | | |
248 | | /// Convert this type to a [`CodePointInversionListAndStringList`] as a borrowed value. |
249 | | /// |
250 | | /// The data backing this is extensible and supports multiple implementations. |
251 | | /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be |
252 | | /// added, and users may select which at data generation time. |
253 | | /// |
254 | | /// This method returns an `Option` in order to return `None` when the backing data provider |
255 | | /// cannot return a [`CodePointInversionListAndStringList`], or cannot do so within the expected constant time |
256 | | /// constraint. |
257 | 0 | pub fn as_code_point_inversion_list_string_list( |
258 | 0 | &self, |
259 | 0 | ) -> Option<&CodePointInversionListAndStringList<'_>> { |
260 | 0 | self.data.get().as_code_point_inversion_list_string_list() |
261 | 0 | } |
262 | | |
263 | | /// Convert this type to a [`CodePointInversionListAndStringList`], borrowing if possible, |
264 | | /// otherwise allocating a new [`CodePointInversionListAndStringList`]. |
265 | | /// |
266 | | /// The data backing this is extensible and supports multiple implementations. |
267 | | /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be |
268 | | /// added, and users may select which at data generation time. |
269 | | /// |
270 | | /// The performance of the conversion to this specific return type will vary |
271 | | /// depending on the data structure that is backing `self`. |
272 | 0 | pub fn to_code_point_inversion_list_string_list( |
273 | 0 | &self, |
274 | 0 | ) -> CodePointInversionListAndStringList<'_> { |
275 | 0 | self.data.get().to_code_point_inversion_list_string_list() |
276 | 0 | } |
277 | | } |
278 | | |
279 | | /// A borrowed wrapper around code point set data, returned by |
280 | | /// [`UnicodeSetData::as_borrowed()`]. More efficient to query. |
281 | | #[derive(Clone, Copy, Debug)] |
282 | | pub struct UnicodeSetDataBorrowed<'a> { |
283 | | set: &'a PropertyUnicodeSetV1<'a>, |
284 | | } |
285 | | |
286 | | impl<'a> UnicodeSetDataBorrowed<'a> { |
287 | | /// Check if the set contains the string. Strings consisting of one character |
288 | | /// are treated as a character/code point. |
289 | | /// |
290 | | /// This matches ICU behavior for ICU's `UnicodeSet`. |
291 | | #[inline] |
292 | 0 | pub fn contains(self, s: &str) -> bool { |
293 | 0 | self.set.contains(s) |
294 | 0 | } Unexecuted instantiation: <icu_properties::sets::UnicodeSetDataBorrowed>::contains Unexecuted instantiation: <icu_properties::sets::UnicodeSetDataBorrowed>::contains |
295 | | |
296 | | /// Check if the set contains a character as a UTF32 code unit |
297 | | #[inline] |
298 | 0 | pub fn contains32(&self, cp: u32) -> bool { |
299 | 0 | self.set.contains32(cp) |
300 | 0 | } Unexecuted instantiation: <icu_properties::sets::UnicodeSetDataBorrowed>::contains32 Unexecuted instantiation: <icu_properties::sets::UnicodeSetDataBorrowed>::contains32 |
301 | | |
302 | | /// Check if the set contains the code point corresponding to the Rust character. |
303 | | #[inline] |
304 | 0 | pub fn contains_char(&self, ch: char) -> bool { |
305 | 0 | self.set.contains_char(ch) |
306 | 0 | } |
307 | | } |
308 | | |
309 | | impl UnicodeSetDataBorrowed<'static> { |
310 | | /// Cheaply converts a [`UnicodeSetDataBorrowed<'static>`] into a [`UnicodeSetData`]. |
311 | | /// |
312 | | /// Note: Due to branching and indirection, using [`UnicodeSetData`] might inhibit some |
313 | | /// compile-time optimizations that are possible with [`UnicodeSetDataBorrowed`]. |
314 | 0 | pub const fn static_to_owned(self) -> UnicodeSetData { |
315 | 0 | UnicodeSetData { |
316 | 0 | data: DataPayload::from_static_ref(self.set), |
317 | 0 | } |
318 | 0 | } |
319 | | } |
320 | | |
321 | 0 | pub(crate) fn load_set_data<M, P>(provider: &P) -> Result<CodePointSetData, PropertiesError> |
322 | 0 | where |
323 | 0 | M: KeyedDataMarker<Yokeable = PropertyCodePointSetV1<'static>>, |
324 | 0 | P: DataProvider<M> + ?Sized, |
325 | 0 | { |
326 | 0 | Ok(provider |
327 | 0 | .load(Default::default()) |
328 | 0 | .and_then(DataResponse::take_payload) |
329 | 0 | .map(CodePointSetData::from_data)?) |
330 | 0 | } Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::DashV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::MathV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::AlnumV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::BlankV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::CasedV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::EmojiV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::GraphV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::PrintV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::HyphenV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::XdigitV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::IdStartV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::RadicalV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::ExtenderV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::HexDigitV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::NfcInertV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::NfdInertV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::XidStartV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::DiacriticV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::LowercaseV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::NfkcInertV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::NfkdInertV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::UppercaseV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::AlphabeticV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::DeprecatedV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::IdContinueV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::SoftDottedV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::WhiteSpaceV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::BidiControlV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::IdeographicV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::JoinControlV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::XidContinueV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::BidiMirroredV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::GraphemeBaseV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::GraphemeLinkV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::AsciiHexDigitV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::CaseIgnorableV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::CaseSensitiveV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::EmojiModifierV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::PatternSyntaxV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::QuotationMarkV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::EmojiComponentV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::GraphemeExtendV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::SegmentStarterV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::SentenceTerminalV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::UnifiedIdeographV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::EmojiModifierBaseV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::EmojiPresentationV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::IdsBinaryOperatorV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::PatternWhiteSpaceV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::RegionalIndicatorV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::VariationSelectorV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::IdsTrinaryOperatorV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::TerminalPunctuationV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::ExtendedPictographicV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::ChangesWhenCasefoldedV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::ChangesWhenCasemappedV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::ChangesWhenLowercasedV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::ChangesWhenTitlecasedV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::ChangesWhenUppercasedV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::LogicalOrderExceptionV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::NoncharacterCodePointV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::FullCompositionExclusionV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::ChangesWhenNfkcCasefoldedV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::DefaultIgnorableCodePointV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<icu_properties::provider::PrependedConcatenationMarkV1Marker, icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_set_data::<_, _> |
331 | | |
332 | | // |
333 | | // Binary property getter fns |
334 | | // (data as code point sets) |
335 | | // |
336 | | |
337 | | macro_rules! make_code_point_set_property { |
338 | | ( |
339 | | // currently unused |
340 | | property: $property:expr; |
341 | | // currently unused |
342 | | marker: $marker_name:ident; |
343 | | keyed_data_marker: $keyed_data_marker:ty; |
344 | | func: |
345 | | $(#[$doc:meta])+ |
346 | | $cvis:vis const fn $constname:ident() => $singleton_name:ident; |
347 | | $vis:vis fn $funcname:ident(); |
348 | | ) => { |
349 | | #[doc = concat!("A version of [`", stringify!($constname), "()`] that uses custom data provided by a [`DataProvider`].")] |
350 | | /// |
351 | | /// Note that this will return an owned version of the data. Functionality is available on |
352 | | /// the borrowed version, accessible through [`CodePointSetData::as_borrowed`]. |
353 | 0 | $vis fn $funcname( |
354 | 0 | provider: &(impl DataProvider<$keyed_data_marker> + ?Sized) |
355 | 0 | ) -> Result<CodePointSetData, PropertiesError> { |
356 | 0 | load_set_data(provider) |
357 | 0 | } Unexecuted instantiation: icu_properties::sets::load_alnum::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_blank::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_cased::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_emoji::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_graph::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_print::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_hyphen::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_xdigit::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_radical::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_extender::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_id_start::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_diacritic::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_hex_digit::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_lowercase::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_nfc_inert::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_nfd_inert::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_uppercase::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_xid_start::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_alphabetic::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_deprecated::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_nfkc_inert::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_nfkd_inert::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_id_continue::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_ideographic::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_soft_dotted::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_white_space::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_bidi_control::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_join_control::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_xid_continue::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_bidi_mirrored::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_grapheme_base::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_grapheme_link::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_case_ignorable::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_case_sensitive::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_emoji_modifier::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_pattern_syntax::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_quotation_mark::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_ascii_hex_digit::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_emoji_component::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_grapheme_extend::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_segment_starter::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_sentence_terminal::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_unified_ideograph::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_emoji_presentation::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_regional_indicator::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_variation_selector::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_emoji_modifier_base::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_ids_binary_operator::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_pattern_white_space::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_ids_trinary_operator::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_terminal_punctuation::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_extended_pictographic::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_changes_when_casefolded::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_changes_when_casemapped::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_changes_when_lowercased::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_changes_when_titlecased::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_changes_when_uppercased::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_logical_order_exception::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_noncharacter_code_point::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_full_composition_exclusion::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_changes_when_nfkc_casefolded::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_default_ignorable_code_point::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_prepended_concatenation_mark::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_dash::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_math::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_ascii_hex_digit::<_> Unexecuted instantiation: icu_properties::sets::load_alnum::<_> Unexecuted instantiation: icu_properties::sets::load_alphabetic::<_> Unexecuted instantiation: icu_properties::sets::load_bidi_control::<_> Unexecuted instantiation: icu_properties::sets::load_bidi_mirrored::<_> Unexecuted instantiation: icu_properties::sets::load_blank::<_> Unexecuted instantiation: icu_properties::sets::load_cased::<_> Unexecuted instantiation: icu_properties::sets::load_case_ignorable::<_> Unexecuted instantiation: icu_properties::sets::load_full_composition_exclusion::<_> Unexecuted instantiation: icu_properties::sets::load_changes_when_casefolded::<_> Unexecuted instantiation: icu_properties::sets::load_changes_when_casemapped::<_> Unexecuted instantiation: icu_properties::sets::load_changes_when_nfkc_casefolded::<_> Unexecuted instantiation: icu_properties::sets::load_changes_when_lowercased::<_> Unexecuted instantiation: icu_properties::sets::load_changes_when_titlecased::<_> Unexecuted instantiation: icu_properties::sets::load_changes_when_uppercased::<_> Unexecuted instantiation: icu_properties::sets::load_dash::<_> Unexecuted instantiation: icu_properties::sets::load_deprecated::<_> Unexecuted instantiation: icu_properties::sets::load_default_ignorable_code_point::<_> Unexecuted instantiation: icu_properties::sets::load_diacritic::<_> Unexecuted instantiation: icu_properties::sets::load_emoji_modifier_base::<_> Unexecuted instantiation: icu_properties::sets::load_emoji_component::<_> Unexecuted instantiation: icu_properties::sets::load_emoji_modifier::<_> Unexecuted instantiation: icu_properties::sets::load_emoji::<_> Unexecuted instantiation: icu_properties::sets::load_emoji_presentation::<_> Unexecuted instantiation: icu_properties::sets::load_extender::<_> Unexecuted instantiation: icu_properties::sets::load_extended_pictographic::<_> Unexecuted instantiation: icu_properties::sets::load_graph::<_> Unexecuted instantiation: icu_properties::sets::load_grapheme_base::<_> Unexecuted instantiation: icu_properties::sets::load_grapheme_extend::<_> Unexecuted instantiation: icu_properties::sets::load_grapheme_link::<_> Unexecuted instantiation: icu_properties::sets::load_hex_digit::<_> Unexecuted instantiation: icu_properties::sets::load_hyphen::<_> Unexecuted instantiation: icu_properties::sets::load_id_continue::<_> Unexecuted instantiation: icu_properties::sets::load_ideographic::<_> Unexecuted instantiation: icu_properties::sets::load_id_start::<_> Unexecuted instantiation: icu_properties::sets::load_ids_binary_operator::<_> Unexecuted instantiation: icu_properties::sets::load_ids_trinary_operator::<_> Unexecuted instantiation: icu_properties::sets::load_join_control::<_> Unexecuted instantiation: icu_properties::sets::load_logical_order_exception::<_> Unexecuted instantiation: icu_properties::sets::load_lowercase::<_> Unexecuted instantiation: icu_properties::sets::load_math::<_> Unexecuted instantiation: icu_properties::sets::load_noncharacter_code_point::<_> Unexecuted instantiation: icu_properties::sets::load_nfc_inert::<_> Unexecuted instantiation: icu_properties::sets::load_nfd_inert::<_> Unexecuted instantiation: icu_properties::sets::load_nfkc_inert::<_> Unexecuted instantiation: icu_properties::sets::load_nfkd_inert::<_> Unexecuted instantiation: icu_properties::sets::load_pattern_syntax::<_> Unexecuted instantiation: icu_properties::sets::load_pattern_white_space::<_> Unexecuted instantiation: icu_properties::sets::load_prepended_concatenation_mark::<_> Unexecuted instantiation: icu_properties::sets::load_print::<_> Unexecuted instantiation: icu_properties::sets::load_quotation_mark::<_> Unexecuted instantiation: icu_properties::sets::load_radical::<_> Unexecuted instantiation: icu_properties::sets::load_regional_indicator::<_> Unexecuted instantiation: icu_properties::sets::load_soft_dotted::<_> Unexecuted instantiation: icu_properties::sets::load_segment_starter::<_> Unexecuted instantiation: icu_properties::sets::load_case_sensitive::<_> Unexecuted instantiation: icu_properties::sets::load_sentence_terminal::<_> Unexecuted instantiation: icu_properties::sets::load_terminal_punctuation::<_> Unexecuted instantiation: icu_properties::sets::load_unified_ideograph::<_> Unexecuted instantiation: icu_properties::sets::load_uppercase::<_> Unexecuted instantiation: icu_properties::sets::load_variation_selector::<_> Unexecuted instantiation: icu_properties::sets::load_white_space::<_> Unexecuted instantiation: icu_properties::sets::load_xdigit::<_> Unexecuted instantiation: icu_properties::sets::load_xid_continue::<_> Unexecuted instantiation: icu_properties::sets::load_xid_start::<_> |
358 | | |
359 | | $(#[$doc])* |
360 | | #[cfg(feature = "compiled_data")] |
361 | 0 | $cvis const fn $constname() -> CodePointSetDataBorrowed<'static> { |
362 | 0 | CodePointSetDataBorrowed { |
363 | 0 | set: crate::provider::Baked::$singleton_name, |
364 | 0 | } |
365 | 0 | } Unexecuted instantiation: icu_properties::sets::ascii_hex_digit Unexecuted instantiation: icu_properties::sets::alnum Unexecuted instantiation: icu_properties::sets::alphabetic Unexecuted instantiation: icu_properties::sets::bidi_control Unexecuted instantiation: icu_properties::sets::bidi_mirrored Unexecuted instantiation: icu_properties::sets::blank Unexecuted instantiation: icu_properties::sets::cased Unexecuted instantiation: icu_properties::sets::case_ignorable Unexecuted instantiation: icu_properties::sets::full_composition_exclusion Unexecuted instantiation: icu_properties::sets::changes_when_casefolded Unexecuted instantiation: icu_properties::sets::changes_when_casemapped Unexecuted instantiation: icu_properties::sets::changes_when_nfkc_casefolded Unexecuted instantiation: icu_properties::sets::changes_when_lowercased Unexecuted instantiation: icu_properties::sets::changes_when_titlecased Unexecuted instantiation: icu_properties::sets::changes_when_uppercased Unexecuted instantiation: icu_properties::sets::dash Unexecuted instantiation: icu_properties::sets::deprecated Unexecuted instantiation: icu_properties::sets::default_ignorable_code_point Unexecuted instantiation: icu_properties::sets::diacritic Unexecuted instantiation: icu_properties::sets::emoji_modifier_base Unexecuted instantiation: icu_properties::sets::emoji_component Unexecuted instantiation: icu_properties::sets::emoji_modifier Unexecuted instantiation: icu_properties::sets::emoji Unexecuted instantiation: icu_properties::sets::emoji_presentation Unexecuted instantiation: icu_properties::sets::extender Unexecuted instantiation: icu_properties::sets::extended_pictographic Unexecuted instantiation: icu_properties::sets::graph Unexecuted instantiation: icu_properties::sets::grapheme_base Unexecuted instantiation: icu_properties::sets::grapheme_extend Unexecuted instantiation: icu_properties::sets::grapheme_link Unexecuted instantiation: icu_properties::sets::hex_digit Unexecuted instantiation: icu_properties::sets::hyphen Unexecuted instantiation: icu_properties::sets::id_continue Unexecuted instantiation: icu_properties::sets::ideographic Unexecuted instantiation: icu_properties::sets::id_start Unexecuted instantiation: icu_properties::sets::ids_binary_operator Unexecuted instantiation: icu_properties::sets::ids_trinary_operator Unexecuted instantiation: icu_properties::sets::join_control Unexecuted instantiation: icu_properties::sets::logical_order_exception Unexecuted instantiation: icu_properties::sets::lowercase Unexecuted instantiation: icu_properties::sets::math Unexecuted instantiation: icu_properties::sets::noncharacter_code_point Unexecuted instantiation: icu_properties::sets::nfc_inert Unexecuted instantiation: icu_properties::sets::nfd_inert Unexecuted instantiation: icu_properties::sets::nfkc_inert Unexecuted instantiation: icu_properties::sets::nfkd_inert Unexecuted instantiation: icu_properties::sets::pattern_syntax Unexecuted instantiation: icu_properties::sets::pattern_white_space Unexecuted instantiation: icu_properties::sets::prepended_concatenation_mark Unexecuted instantiation: icu_properties::sets::print Unexecuted instantiation: icu_properties::sets::quotation_mark Unexecuted instantiation: icu_properties::sets::radical Unexecuted instantiation: icu_properties::sets::regional_indicator Unexecuted instantiation: icu_properties::sets::soft_dotted Unexecuted instantiation: icu_properties::sets::segment_starter Unexecuted instantiation: icu_properties::sets::case_sensitive Unexecuted instantiation: icu_properties::sets::sentence_terminal Unexecuted instantiation: icu_properties::sets::terminal_punctuation Unexecuted instantiation: icu_properties::sets::unified_ideograph Unexecuted instantiation: icu_properties::sets::uppercase Unexecuted instantiation: icu_properties::sets::variation_selector Unexecuted instantiation: icu_properties::sets::white_space Unexecuted instantiation: icu_properties::sets::xdigit Unexecuted instantiation: icu_properties::sets::xid_continue Unexecuted instantiation: icu_properties::sets::xid_start |
366 | | } |
367 | | } |
368 | | |
369 | | make_code_point_set_property! { |
370 | | property: "ASCII_Hex_Digit"; |
371 | | marker: AsciiHexDigitProperty; |
372 | | keyed_data_marker: AsciiHexDigitV1Marker; |
373 | | func: |
374 | | /// ASCII characters commonly used for the representation of hexadecimal numbers |
375 | | /// |
376 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
377 | | /// |
378 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
379 | | /// |
380 | | /// # Example |
381 | | /// |
382 | | /// ``` |
383 | | /// use icu::properties::sets; |
384 | | /// |
385 | | /// let ascii_hex_digit = sets::ascii_hex_digit(); |
386 | | /// |
387 | | /// assert!(ascii_hex_digit.contains('3')); |
388 | | /// assert!(!ascii_hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE |
389 | | /// assert!(ascii_hex_digit.contains('A')); |
390 | | /// assert!(!ascii_hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
391 | | /// ``` |
392 | | pub const fn ascii_hex_digit() => SINGLETON_PROPS_AHEX_V1; |
393 | | pub fn load_ascii_hex_digit(); |
394 | | } |
395 | | |
396 | | make_code_point_set_property! { |
397 | | property: "Alnum"; |
398 | | marker: AlnumProperty; |
399 | | keyed_data_marker: AlnumV1Marker; |
400 | | func: |
401 | | /// Characters with the Alphabetic or Decimal_Number property |
402 | | /// This is defined for POSIX compatibility. |
403 | | |
404 | | pub const fn alnum() => SINGLETON_PROPS_ALNUM_V1; |
405 | | pub fn load_alnum(); |
406 | | } |
407 | | |
408 | | make_code_point_set_property! { |
409 | | property: "Alphabetic"; |
410 | | marker: AlphabeticProperty; |
411 | | keyed_data_marker: AlphabeticV1Marker; |
412 | | func: |
413 | | /// Alphabetic characters |
414 | | /// |
415 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
416 | | /// |
417 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
418 | | /// |
419 | | /// # Example |
420 | | /// |
421 | | /// ``` |
422 | | /// use icu::properties::sets; |
423 | | /// |
424 | | /// let alphabetic = sets::alphabetic(); |
425 | | /// |
426 | | /// assert!(!alphabetic.contains('3')); |
427 | | /// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE |
428 | | /// assert!(alphabetic.contains('A')); |
429 | | /// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
430 | | /// ``` |
431 | | |
432 | | pub const fn alphabetic() => SINGLETON_PROPS_ALPHA_V1; |
433 | | pub fn load_alphabetic(); |
434 | | } |
435 | | |
436 | | make_code_point_set_property! { |
437 | | property: "Bidi_Control"; |
438 | | marker: BidiControlProperty; |
439 | | keyed_data_marker: BidiControlV1Marker; |
440 | | func: |
441 | | /// Format control characters which have specific functions in the Unicode Bidirectional |
442 | | /// Algorithm |
443 | | /// |
444 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
445 | | /// |
446 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
447 | | /// |
448 | | /// # Example |
449 | | /// |
450 | | /// ``` |
451 | | /// use icu::properties::sets; |
452 | | /// |
453 | | /// let bidi_control = sets::bidi_control(); |
454 | | /// |
455 | | /// assert!(bidi_control.contains32(0x200F)); // RIGHT-TO-LEFT MARK |
456 | | /// assert!(!bidi_control.contains('ش')); // U+0634 ARABIC LETTER SHEEN |
457 | | /// ``` |
458 | | |
459 | | pub const fn bidi_control() => SINGLETON_PROPS_BIDI_C_V1; |
460 | | pub fn load_bidi_control(); |
461 | | } |
462 | | |
463 | | make_code_point_set_property! { |
464 | | property: "Bidi_Mirrored"; |
465 | | marker: BidiMirroredProperty; |
466 | | keyed_data_marker: BidiMirroredV1Marker; |
467 | | func: |
468 | | /// Characters that are mirrored in bidirectional text |
469 | | /// |
470 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
471 | | /// |
472 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
473 | | /// |
474 | | /// # Example |
475 | | /// |
476 | | /// ``` |
477 | | /// use icu::properties::sets; |
478 | | /// |
479 | | /// let bidi_mirrored = sets::bidi_mirrored(); |
480 | | /// |
481 | | /// assert!(bidi_mirrored.contains('[')); |
482 | | /// assert!(bidi_mirrored.contains(']')); |
483 | | /// assert!(bidi_mirrored.contains('∑')); // U+2211 N-ARY SUMMATION |
484 | | /// assert!(!bidi_mirrored.contains('ཉ')); // U+0F49 TIBETAN LETTER NYA |
485 | | /// ``` |
486 | | |
487 | | pub const fn bidi_mirrored() => SINGLETON_PROPS_BIDI_M_V1; |
488 | | pub fn load_bidi_mirrored(); |
489 | | } |
490 | | |
491 | | make_code_point_set_property! { |
492 | | property: "Blank"; |
493 | | marker: BlankProperty; |
494 | | keyed_data_marker: BlankV1Marker; |
495 | | func: |
496 | | /// Horizontal whitespace characters |
497 | | |
498 | | pub const fn blank() => SINGLETON_PROPS_BLANK_V1; |
499 | | pub fn load_blank(); |
500 | | } |
501 | | |
502 | | make_code_point_set_property! { |
503 | | property: "Cased"; |
504 | | marker: CasedProperty; |
505 | | keyed_data_marker: CasedV1Marker; |
506 | | func: |
507 | | /// Uppercase, lowercase, and titlecase characters |
508 | | /// |
509 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
510 | | /// |
511 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
512 | | /// |
513 | | /// # Example |
514 | | /// |
515 | | /// ``` |
516 | | /// use icu::properties::sets; |
517 | | /// |
518 | | /// let cased = sets::cased(); |
519 | | /// |
520 | | /// assert!(cased.contains('Ꙡ')); // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE |
521 | | /// assert!(!cased.contains('ދ')); // U+078B THAANA LETTER DHAALU |
522 | | /// ``` |
523 | | |
524 | | pub const fn cased() => SINGLETON_PROPS_CASED_V1; |
525 | | pub fn load_cased(); |
526 | | } |
527 | | |
528 | | make_code_point_set_property! { |
529 | | property: "Case_Ignorable"; |
530 | | marker: CaseIgnorableProperty; |
531 | | keyed_data_marker: CaseIgnorableV1Marker; |
532 | | func: |
533 | | /// Characters which are ignored for casing purposes |
534 | | /// |
535 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
536 | | /// |
537 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
538 | | /// |
539 | | /// # Example |
540 | | /// |
541 | | /// ``` |
542 | | /// use icu::properties::sets; |
543 | | /// |
544 | | /// let case_ignorable = sets::case_ignorable(); |
545 | | /// |
546 | | /// assert!(case_ignorable.contains(':')); |
547 | | /// assert!(!case_ignorable.contains('λ')); // U+03BB GREEK SMALL LETTER LAMDA |
548 | | /// ``` |
549 | | |
550 | | pub const fn case_ignorable() => SINGLETON_PROPS_CI_V1; |
551 | | pub fn load_case_ignorable(); |
552 | | } |
553 | | |
554 | | make_code_point_set_property! { |
555 | | property: "Full_Composition_Exclusion"; |
556 | | marker: FullCompositionExclusionProperty; |
557 | | keyed_data_marker: FullCompositionExclusionV1Marker; |
558 | | func: |
559 | | /// Characters that are excluded from composition |
560 | | /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt> |
561 | | |
562 | | pub const fn full_composition_exclusion() => SINGLETON_PROPS_COMP_EX_V1; |
563 | | pub fn load_full_composition_exclusion(); |
564 | | } |
565 | | |
566 | | make_code_point_set_property! { |
567 | | property: "Changes_When_Casefolded"; |
568 | | marker: ChangesWhenCasefoldedProperty; |
569 | | keyed_data_marker: ChangesWhenCasefoldedV1Marker; |
570 | | func: |
571 | | /// Characters whose normalized forms are not stable under case folding |
572 | | /// |
573 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
574 | | /// |
575 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
576 | | /// |
577 | | /// # Example |
578 | | /// |
579 | | /// ``` |
580 | | /// use icu::properties::sets; |
581 | | /// |
582 | | /// let changes_when_casefolded = sets::changes_when_casefolded(); |
583 | | /// |
584 | | /// assert!(changes_when_casefolded.contains('ß')); // U+00DF LATIN SMALL LETTER SHARP S |
585 | | /// assert!(!changes_when_casefolded.contains('ᜉ')); // U+1709 TAGALOG LETTER PA |
586 | | /// ``` |
587 | | |
588 | | pub const fn changes_when_casefolded() => SINGLETON_PROPS_CWCF_V1; |
589 | | pub fn load_changes_when_casefolded(); |
590 | | } |
591 | | |
592 | | make_code_point_set_property! { |
593 | | property: "Changes_When_Casemapped"; |
594 | | marker: ChangesWhenCasemappedProperty; |
595 | | keyed_data_marker: ChangesWhenCasemappedV1Marker; |
596 | | func: |
597 | | /// Characters which may change when they undergo case mapping |
598 | | |
599 | | pub const fn changes_when_casemapped() => SINGLETON_PROPS_CWCM_V1; |
600 | | pub fn load_changes_when_casemapped(); |
601 | | } |
602 | | |
603 | | make_code_point_set_property! { |
604 | | property: "Changes_When_NFKC_Casefolded"; |
605 | | marker: ChangesWhenNfkcCasefoldedProperty; |
606 | | keyed_data_marker: ChangesWhenNfkcCasefoldedV1Marker; |
607 | | func: |
608 | | /// Characters which are not identical to their NFKC_Casefold mapping |
609 | | /// |
610 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
611 | | /// |
612 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
613 | | /// |
614 | | /// # Example |
615 | | /// |
616 | | /// ``` |
617 | | /// use icu::properties::sets; |
618 | | /// |
619 | | /// let changes_when_nfkc_casefolded = sets::changes_when_nfkc_casefolded(); |
620 | | /// |
621 | | /// assert!(changes_when_nfkc_casefolded.contains('🄵')); // U+1F135 SQUARED LATIN CAPITAL LETTER F |
622 | | /// assert!(!changes_when_nfkc_casefolded.contains('f')); |
623 | | /// ``` |
624 | | |
625 | | pub const fn changes_when_nfkc_casefolded() => SINGLETON_PROPS_CWKCF_V1; |
626 | | pub fn load_changes_when_nfkc_casefolded(); |
627 | | } |
628 | | |
629 | | make_code_point_set_property! { |
630 | | property: "Changes_When_Lowercased"; |
631 | | marker: ChangesWhenLowercasedProperty; |
632 | | keyed_data_marker: ChangesWhenLowercasedV1Marker; |
633 | | func: |
634 | | /// Characters whose normalized forms are not stable under a toLowercase mapping |
635 | | /// |
636 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
637 | | /// |
638 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
639 | | /// |
640 | | /// # Example |
641 | | /// |
642 | | /// ``` |
643 | | /// use icu::properties::sets; |
644 | | /// |
645 | | /// let changes_when_lowercased = sets::changes_when_lowercased(); |
646 | | /// |
647 | | /// assert!(changes_when_lowercased.contains('Ⴔ')); // U+10B4 GEORGIAN CAPITAL LETTER PHAR |
648 | | /// assert!(!changes_when_lowercased.contains('ფ')); // U+10E4 GEORGIAN LETTER PHAR |
649 | | /// ``` |
650 | | |
651 | | pub const fn changes_when_lowercased() => SINGLETON_PROPS_CWL_V1; |
652 | | pub fn load_changes_when_lowercased(); |
653 | | } |
654 | | |
655 | | make_code_point_set_property! { |
656 | | property: "Changes_When_Titlecased"; |
657 | | marker: ChangesWhenTitlecasedProperty; |
658 | | keyed_data_marker: ChangesWhenTitlecasedV1Marker; |
659 | | func: |
660 | | /// Characters whose normalized forms are not stable under a toTitlecase mapping |
661 | | /// |
662 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
663 | | /// |
664 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
665 | | /// |
666 | | /// # Example |
667 | | /// |
668 | | /// ``` |
669 | | /// use icu::properties::sets; |
670 | | /// |
671 | | /// let changes_when_titlecased = sets::changes_when_titlecased(); |
672 | | /// |
673 | | /// assert!(changes_when_titlecased.contains('æ')); // U+00E6 LATIN SMALL LETTER AE |
674 | | /// assert!(!changes_when_titlecased.contains('Æ')); // U+00E6 LATIN CAPITAL LETTER AE |
675 | | /// ``` |
676 | | |
677 | | pub const fn changes_when_titlecased() => SINGLETON_PROPS_CWT_V1; |
678 | | pub fn load_changes_when_titlecased(); |
679 | | } |
680 | | |
681 | | make_code_point_set_property! { |
682 | | property: "Changes_When_Uppercased"; |
683 | | marker: ChangesWhenUppercasedProperty; |
684 | | keyed_data_marker: ChangesWhenUppercasedV1Marker; |
685 | | func: |
686 | | /// Characters whose normalized forms are not stable under a toUppercase mapping |
687 | | /// |
688 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
689 | | /// |
690 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
691 | | /// |
692 | | /// # Example |
693 | | /// |
694 | | /// ``` |
695 | | /// use icu::properties::sets; |
696 | | /// |
697 | | /// let changes_when_uppercased = sets::changes_when_uppercased(); |
698 | | /// |
699 | | /// assert!(changes_when_uppercased.contains('ւ')); // U+0582 ARMENIAN SMALL LETTER YIWN |
700 | | /// assert!(!changes_when_uppercased.contains('Ւ')); // U+0552 ARMENIAN CAPITAL LETTER YIWN |
701 | | /// ``` |
702 | | |
703 | | pub const fn changes_when_uppercased() => SINGLETON_PROPS_CWU_V1; |
704 | | pub fn load_changes_when_uppercased(); |
705 | | } |
706 | | |
707 | | make_code_point_set_property! { |
708 | | property: "Dash"; |
709 | | marker: DashProperty; |
710 | | keyed_data_marker: DashV1Marker; |
711 | | func: |
712 | | /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus |
713 | | /// their compatibility equivalents |
714 | | /// |
715 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
716 | | /// |
717 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
718 | | /// |
719 | | /// # Example |
720 | | /// |
721 | | /// ``` |
722 | | /// use icu::properties::sets; |
723 | | /// |
724 | | /// let dash = sets::dash(); |
725 | | /// |
726 | | /// assert!(dash.contains('⸺')); // U+2E3A TWO-EM DASH |
727 | | /// assert!(dash.contains('-')); // U+002D |
728 | | /// assert!(!dash.contains('=')); // U+003D |
729 | | /// ``` |
730 | | |
731 | | pub const fn dash() => SINGLETON_PROPS_DASH_V1; |
732 | | pub fn load_dash(); |
733 | | } |
734 | | |
735 | | make_code_point_set_property! { |
736 | | property: "Deprecated"; |
737 | | marker: DeprecatedProperty; |
738 | | keyed_data_marker: DeprecatedV1Marker; |
739 | | func: |
740 | | /// Deprecated characters. No characters will ever be removed from the standard, but the |
741 | | /// usage of deprecated characters is strongly discouraged. |
742 | | /// |
743 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
744 | | /// |
745 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
746 | | /// |
747 | | /// # Example |
748 | | /// |
749 | | /// ``` |
750 | | /// use icu::properties::sets; |
751 | | /// |
752 | | /// let deprecated = sets::deprecated(); |
753 | | /// |
754 | | /// assert!(deprecated.contains('ឣ')); // U+17A3 KHMER INDEPENDENT VOWEL QAQ |
755 | | /// assert!(!deprecated.contains('A')); |
756 | | /// ``` |
757 | | |
758 | | pub const fn deprecated() => SINGLETON_PROPS_DEP_V1; |
759 | | pub fn load_deprecated(); |
760 | | } |
761 | | |
762 | | make_code_point_set_property! { |
763 | | property: "Default_Ignorable_Code_Point"; |
764 | | marker: DefaultIgnorableCodePointProperty; |
765 | | keyed_data_marker: DefaultIgnorableCodePointV1Marker; |
766 | | func: |
767 | | /// For programmatic determination of default ignorable code points. New characters that |
768 | | /// should be ignored in rendering (unless explicitly supported) will be assigned in these |
769 | | /// ranges, permitting programs to correctly handle the default rendering of such |
770 | | /// characters when not otherwise supported. |
771 | | /// |
772 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
773 | | /// |
774 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
775 | | /// |
776 | | /// # Example |
777 | | /// |
778 | | /// ``` |
779 | | /// use icu::properties::sets; |
780 | | /// |
781 | | /// let default_ignorable_code_point = sets::default_ignorable_code_point(); |
782 | | /// |
783 | | /// assert!(default_ignorable_code_point.contains32(0x180B)); // MONGOLIAN FREE VARIATION SELECTOR ONE |
784 | | /// assert!(!default_ignorable_code_point.contains('E')); |
785 | | /// ``` |
786 | | |
787 | | pub const fn default_ignorable_code_point() => SINGLETON_PROPS_DI_V1; |
788 | | pub fn load_default_ignorable_code_point(); |
789 | | } |
790 | | |
791 | | make_code_point_set_property! { |
792 | | property: "Diacritic"; |
793 | | marker: DiacriticProperty; |
794 | | keyed_data_marker: DiacriticV1Marker; |
795 | | func: |
796 | | /// Characters that linguistically modify the meaning of another character to which they apply |
797 | | /// |
798 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
799 | | /// |
800 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
801 | | /// |
802 | | /// # Example |
803 | | /// |
804 | | /// ``` |
805 | | /// use icu::properties::sets; |
806 | | /// |
807 | | /// let diacritic = sets::diacritic(); |
808 | | /// |
809 | | /// assert!(diacritic.contains('\u{05B3}')); // HEBREW POINT HATAF QAMATS |
810 | | /// assert!(!diacritic.contains('א')); // U+05D0 HEBREW LETTER ALEF |
811 | | /// ``` |
812 | | |
813 | | pub const fn diacritic() => SINGLETON_PROPS_DIA_V1; |
814 | | pub fn load_diacritic(); |
815 | | } |
816 | | |
817 | | make_code_point_set_property! { |
818 | | property: "Emoji_Modifier_Base"; |
819 | | marker: EmojiModifierBaseProperty; |
820 | | keyed_data_marker: EmojiModifierBaseV1Marker; |
821 | | func: |
822 | | /// Characters that can serve as a base for emoji modifiers |
823 | | /// |
824 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
825 | | /// |
826 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
827 | | /// |
828 | | /// # Example |
829 | | /// |
830 | | /// ``` |
831 | | /// use icu::properties::sets; |
832 | | /// |
833 | | /// let emoji_modifier_base = sets::emoji_modifier_base(); |
834 | | /// |
835 | | /// assert!(emoji_modifier_base.contains('✊')); // U+270A RAISED FIST |
836 | | /// assert!(!emoji_modifier_base.contains('⛰')); // U+26F0 MOUNTAIN |
837 | | /// ``` |
838 | | |
839 | | pub const fn emoji_modifier_base() => SINGLETON_PROPS_EBASE_V1; |
840 | | pub fn load_emoji_modifier_base(); |
841 | | } |
842 | | |
843 | | make_code_point_set_property! { |
844 | | property: "Emoji_Component"; |
845 | | marker: EmojiComponentProperty; |
846 | | keyed_data_marker: EmojiComponentV1Marker; |
847 | | func: |
848 | | /// Characters used in emoji sequences that normally do not appear on emoji keyboards as |
849 | | /// separate choices, such as base characters for emoji keycaps |
850 | | /// |
851 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
852 | | /// |
853 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
854 | | /// |
855 | | /// # Example |
856 | | /// |
857 | | /// ``` |
858 | | /// use icu::properties::sets; |
859 | | /// |
860 | | /// let emoji_component = sets::emoji_component(); |
861 | | /// |
862 | | /// assert!(emoji_component.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T |
863 | | /// assert!(emoji_component.contains32(0x20E3)); // COMBINING ENCLOSING KEYCAP |
864 | | /// assert!(emoji_component.contains('7')); |
865 | | /// assert!(!emoji_component.contains('T')); |
866 | | /// ``` |
867 | | |
868 | | pub const fn emoji_component() => SINGLETON_PROPS_ECOMP_V1; |
869 | | pub fn load_emoji_component(); |
870 | | } |
871 | | |
872 | | make_code_point_set_property! { |
873 | | property: "Emoji_Modifier"; |
874 | | marker: EmojiModifierProperty; |
875 | | keyed_data_marker: EmojiModifierV1Marker; |
876 | | func: |
877 | | /// Characters that are emoji modifiers |
878 | | /// |
879 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
880 | | /// |
881 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
882 | | /// |
883 | | /// # Example |
884 | | /// |
885 | | /// ``` |
886 | | /// use icu::properties::sets; |
887 | | /// |
888 | | /// let emoji_modifier = sets::emoji_modifier(); |
889 | | /// |
890 | | /// assert!(emoji_modifier.contains32(0x1F3FD)); // EMOJI MODIFIER FITZPATRICK TYPE-4 |
891 | | /// assert!(!emoji_modifier.contains32(0x200C)); // ZERO WIDTH NON-JOINER |
892 | | /// ``` |
893 | | |
894 | | pub const fn emoji_modifier() => SINGLETON_PROPS_EMOD_V1; |
895 | | pub fn load_emoji_modifier(); |
896 | | } |
897 | | |
898 | | make_code_point_set_property! { |
899 | | property: "Emoji"; |
900 | | marker: EmojiProperty; |
901 | | keyed_data_marker: EmojiV1Marker; |
902 | | func: |
903 | | /// Characters that are emoji |
904 | | /// |
905 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
906 | | /// |
907 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
908 | | /// |
909 | | /// # Example |
910 | | /// |
911 | | /// ``` |
912 | | /// use icu::properties::sets; |
913 | | /// |
914 | | /// let emoji = sets::emoji(); |
915 | | /// |
916 | | /// assert!(emoji.contains('🔥')); // U+1F525 FIRE |
917 | | /// assert!(!emoji.contains('V')); |
918 | | /// ``` |
919 | | |
920 | | pub const fn emoji() => SINGLETON_PROPS_EMOJI_V1; |
921 | | pub fn load_emoji(); |
922 | | } |
923 | | |
924 | | make_code_point_set_property! { |
925 | | property: "Emoji_Presentation"; |
926 | | marker: EmojiPresentationProperty; |
927 | | keyed_data_marker: EmojiPresentationV1Marker; |
928 | | func: |
929 | | /// Characters that have emoji presentation by default |
930 | | /// |
931 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
932 | | /// |
933 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
934 | | /// |
935 | | /// # Example |
936 | | /// |
937 | | /// ``` |
938 | | /// use icu::properties::sets; |
939 | | /// |
940 | | /// let emoji_presentation = sets::emoji_presentation(); |
941 | | /// |
942 | | /// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON |
943 | | /// assert!(!emoji_presentation.contains('♻')); // U+267B BLACK UNIVERSAL RECYCLING SYMBOL |
944 | | /// ``` |
945 | | |
946 | | pub const fn emoji_presentation() => SINGLETON_PROPS_EPRES_V1; |
947 | | pub fn load_emoji_presentation(); |
948 | | } |
949 | | |
950 | | make_code_point_set_property! { |
951 | | property: "Extender"; |
952 | | marker: ExtenderProperty; |
953 | | keyed_data_marker: ExtenderV1Marker; |
954 | | func: |
955 | | /// Characters whose principal function is to extend the value of a preceding alphabetic |
956 | | /// character or to extend the shape of adjacent characters. |
957 | | /// |
958 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
959 | | /// |
960 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
961 | | /// |
962 | | /// # Example |
963 | | /// |
964 | | /// ``` |
965 | | /// use icu::properties::sets; |
966 | | /// |
967 | | /// let extender = sets::extender(); |
968 | | /// |
969 | | /// assert!(extender.contains('ヾ')); // U+30FE KATAKANA VOICED ITERATION MARK |
970 | | /// assert!(extender.contains('ー')); // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK |
971 | | /// assert!(!extender.contains('・')); // U+30FB KATAKANA MIDDLE DOT |
972 | | /// ``` |
973 | | |
974 | | pub const fn extender() => SINGLETON_PROPS_EXT_V1; |
975 | | pub fn load_extender(); |
976 | | } |
977 | | |
978 | | make_code_point_set_property! { |
979 | | property: "Extended_Pictographic"; |
980 | | marker: ExtendedPictographicProperty; |
981 | | keyed_data_marker: ExtendedPictographicV1Marker; |
982 | | func: |
983 | | /// Pictographic symbols, as well as reserved ranges in blocks largely associated with |
984 | | /// emoji characters |
985 | | /// |
986 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
987 | | /// |
988 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
989 | | /// |
990 | | /// # Example |
991 | | /// |
992 | | /// ``` |
993 | | /// use icu::properties::sets; |
994 | | /// |
995 | | /// let extended_pictographic = sets::extended_pictographic(); |
996 | | /// |
997 | | /// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT |
998 | | /// assert!(!extended_pictographic.contains('🇪')); // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E |
999 | | /// ``` |
1000 | | |
1001 | | pub const fn extended_pictographic() => SINGLETON_PROPS_EXTPICT_V1; |
1002 | | pub fn load_extended_pictographic(); |
1003 | | } |
1004 | | |
1005 | | make_code_point_set_property! { |
1006 | | property: "Graph"; |
1007 | | marker: GraphProperty; |
1008 | | keyed_data_marker: GraphV1Marker; |
1009 | | func: |
1010 | | /// Visible characters. |
1011 | | /// This is defined for POSIX compatibility. |
1012 | | |
1013 | | pub const fn graph() => SINGLETON_PROPS_GRAPH_V1; |
1014 | | pub fn load_graph(); |
1015 | | } |
1016 | | |
1017 | | make_code_point_set_property! { |
1018 | | property: "Grapheme_Base"; |
1019 | | marker: GraphemeBaseProperty; |
1020 | | keyed_data_marker: GraphemeBaseV1Marker; |
1021 | | func: |
1022 | | /// Property used together with the definition of Standard Korean Syllable Block to define |
1023 | | /// "Grapheme base". See D58 in Chapter 3, Conformance in the Unicode Standard. |
1024 | | /// |
1025 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1026 | | /// |
1027 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1028 | | /// |
1029 | | /// # Example |
1030 | | /// |
1031 | | /// ``` |
1032 | | /// use icu::properties::sets; |
1033 | | /// |
1034 | | /// let grapheme_base = sets::grapheme_base(); |
1035 | | /// |
1036 | | /// assert!(grapheme_base.contains('ക')); // U+0D15 MALAYALAM LETTER KA |
1037 | | /// assert!(grapheme_base.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I |
1038 | | /// assert!(!grapheme_base.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA |
1039 | | /// ``` |
1040 | | |
1041 | | pub const fn grapheme_base() => SINGLETON_PROPS_GR_BASE_V1; |
1042 | | pub fn load_grapheme_base(); |
1043 | | } |
1044 | | |
1045 | | make_code_point_set_property! { |
1046 | | property: "Grapheme_Extend"; |
1047 | | marker: GraphemeExtendProperty; |
1048 | | keyed_data_marker: GraphemeExtendV1Marker; |
1049 | | func: |
1050 | | /// Property used to define "Grapheme extender". See D59 in Chapter 3, Conformance in the |
1051 | | /// Unicode Standard. |
1052 | | /// |
1053 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1054 | | /// |
1055 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1056 | | /// |
1057 | | /// # Example |
1058 | | /// |
1059 | | /// ``` |
1060 | | /// use icu::properties::sets; |
1061 | | /// |
1062 | | /// let grapheme_extend = sets::grapheme_extend(); |
1063 | | /// |
1064 | | /// assert!(!grapheme_extend.contains('ക')); // U+0D15 MALAYALAM LETTER KA |
1065 | | /// assert!(!grapheme_extend.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I |
1066 | | /// assert!(grapheme_extend.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA |
1067 | | /// ``` |
1068 | | |
1069 | | pub const fn grapheme_extend() => SINGLETON_PROPS_GR_EXT_V1; |
1070 | | pub fn load_grapheme_extend(); |
1071 | | } |
1072 | | |
1073 | | make_code_point_set_property! { |
1074 | | property: "Grapheme_Link"; |
1075 | | marker: GraphemeLinkProperty; |
1076 | | keyed_data_marker: GraphemeLinkV1Marker; |
1077 | | func: |
1078 | | /// Deprecated property. Formerly proposed for programmatic determination of grapheme |
1079 | | /// cluster boundaries. |
1080 | | |
1081 | | pub const fn grapheme_link() => SINGLETON_PROPS_GR_LINK_V1; |
1082 | | pub fn load_grapheme_link(); |
1083 | | } |
1084 | | |
1085 | | make_code_point_set_property! { |
1086 | | property: "Hex_Digit"; |
1087 | | marker: HexDigitProperty; |
1088 | | keyed_data_marker: HexDigitV1Marker; |
1089 | | func: |
1090 | | /// Characters commonly used for the representation of hexadecimal numbers, plus their |
1091 | | /// compatibility equivalents |
1092 | | /// |
1093 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1094 | | /// |
1095 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1096 | | /// |
1097 | | /// # Example |
1098 | | /// |
1099 | | /// ``` |
1100 | | /// use icu::properties::sets; |
1101 | | /// |
1102 | | /// let hex_digit = sets::hex_digit(); |
1103 | | /// |
1104 | | /// assert!(hex_digit.contains('0')); |
1105 | | /// assert!(!hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE |
1106 | | /// assert!(hex_digit.contains('f')); |
1107 | | /// assert!(hex_digit.contains('f')); // U+FF46 FULLWIDTH LATIN SMALL LETTER F |
1108 | | /// assert!(hex_digit.contains('F')); // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F |
1109 | | /// assert!(!hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
1110 | | /// ``` |
1111 | | |
1112 | | pub const fn hex_digit() => SINGLETON_PROPS_HEX_V1; |
1113 | | pub fn load_hex_digit(); |
1114 | | } |
1115 | | |
1116 | | make_code_point_set_property! { |
1117 | | property: "Hyphen"; |
1118 | | marker: HyphenProperty; |
1119 | | keyed_data_marker: HyphenV1Marker; |
1120 | | func: |
1121 | | /// Deprecated property. Dashes which are used to mark connections between pieces of |
1122 | | /// words, plus the Katakana middle dot. |
1123 | | |
1124 | | pub const fn hyphen() => SINGLETON_PROPS_HYPHEN_V1; |
1125 | | pub fn load_hyphen(); |
1126 | | } |
1127 | | |
1128 | | make_code_point_set_property! { |
1129 | | property: "Id_Continue"; |
1130 | | marker: IdContinueProperty; |
1131 | | keyed_data_marker: IdContinueV1Marker; |
1132 | | func: |
1133 | | /// Characters that can come after the first character in an identifier. If using NFKC to |
1134 | | /// fold differences between characters, use [`load_xid_continue`] instead. See |
1135 | | /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for |
1136 | | /// more details. |
1137 | | /// |
1138 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1139 | | /// |
1140 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1141 | | /// |
1142 | | /// # Example |
1143 | | /// |
1144 | | /// ``` |
1145 | | /// use icu::properties::sets; |
1146 | | /// |
1147 | | /// let id_continue = sets::id_continue(); |
1148 | | /// |
1149 | | /// assert!(id_continue.contains('x')); |
1150 | | /// assert!(id_continue.contains('1')); |
1151 | | /// assert!(id_continue.contains('_')); |
1152 | | /// assert!(id_continue.contains('ߝ')); // U+07DD NKO LETTER FA |
1153 | | /// assert!(!id_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X |
1154 | | /// assert!(id_continue.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM |
1155 | | /// ``` |
1156 | | |
1157 | | pub const fn id_continue() => SINGLETON_PROPS_IDC_V1; |
1158 | | pub fn load_id_continue(); |
1159 | | } |
1160 | | |
1161 | | make_code_point_set_property! { |
1162 | | property: "Ideographic"; |
1163 | | marker: IdeographicProperty; |
1164 | | keyed_data_marker: IdeographicV1Marker; |
1165 | | func: |
1166 | | /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese) |
1167 | | /// ideographs, or related siniform ideographs |
1168 | | /// |
1169 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1170 | | /// |
1171 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1172 | | /// |
1173 | | /// # Example |
1174 | | /// |
1175 | | /// ``` |
1176 | | /// use icu::properties::sets; |
1177 | | /// |
1178 | | /// let ideographic = sets::ideographic(); |
1179 | | /// |
1180 | | /// assert!(ideographic.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD |
1181 | | /// assert!(!ideographic.contains('밥')); // U+BC25 HANGUL SYLLABLE BAB |
1182 | | /// ``` |
1183 | | |
1184 | | pub const fn ideographic() => SINGLETON_PROPS_IDEO_V1; |
1185 | | pub fn load_ideographic(); |
1186 | | } |
1187 | | |
1188 | | make_code_point_set_property! { |
1189 | | property: "Id_Start"; |
1190 | | marker: IdStartProperty; |
1191 | | keyed_data_marker: IdStartV1Marker; |
1192 | | func: |
1193 | | /// Characters that can begin an identifier. If using NFKC to fold differences between |
1194 | | /// characters, use [`load_xid_start`] instead. See [`Unicode Standard Annex |
1195 | | /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details. |
1196 | | /// |
1197 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1198 | | /// |
1199 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1200 | | /// |
1201 | | /// # Example |
1202 | | /// |
1203 | | /// ``` |
1204 | | /// use icu::properties::sets; |
1205 | | /// |
1206 | | /// let id_start = sets::id_start(); |
1207 | | /// |
1208 | | /// assert!(id_start.contains('x')); |
1209 | | /// assert!(!id_start.contains('1')); |
1210 | | /// assert!(!id_start.contains('_')); |
1211 | | /// assert!(id_start.contains('ߝ')); // U+07DD NKO LETTER FA |
1212 | | /// assert!(!id_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X |
1213 | | /// assert!(id_start.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM |
1214 | | /// ``` |
1215 | | |
1216 | | pub const fn id_start() => SINGLETON_PROPS_IDS_V1; |
1217 | | pub fn load_id_start(); |
1218 | | } |
1219 | | |
1220 | | make_code_point_set_property! { |
1221 | | property: "Ids_Binary_Operator"; |
1222 | | marker: IdsBinaryOperatorProperty; |
1223 | | keyed_data_marker: IdsBinaryOperatorV1Marker; |
1224 | | func: |
1225 | | /// Characters used in Ideographic Description Sequences |
1226 | | /// |
1227 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1228 | | /// |
1229 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1230 | | /// |
1231 | | /// # Example |
1232 | | /// |
1233 | | /// ``` |
1234 | | /// use icu::properties::sets; |
1235 | | /// |
1236 | | /// let ids_binary_operator = sets::ids_binary_operator(); |
1237 | | /// |
1238 | | /// assert!(ids_binary_operator.contains32(0x2FF5)); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE |
1239 | | /// assert!(!ids_binary_operator.contains32(0x3006)); // IDEOGRAPHIC CLOSING MARK |
1240 | | /// ``` |
1241 | | |
1242 | | pub const fn ids_binary_operator() => SINGLETON_PROPS_IDSB_V1; |
1243 | | pub fn load_ids_binary_operator(); |
1244 | | } |
1245 | | |
1246 | | make_code_point_set_property! { |
1247 | | property: "Ids_Trinary_Operator"; |
1248 | | marker: IdsTrinaryOperatorProperty; |
1249 | | keyed_data_marker: IdsTrinaryOperatorV1Marker; |
1250 | | func: |
1251 | | /// Characters used in Ideographic Description Sequences |
1252 | | /// |
1253 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1254 | | /// |
1255 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1256 | | /// |
1257 | | /// # Example |
1258 | | /// |
1259 | | /// ``` |
1260 | | /// use icu::properties::sets; |
1261 | | /// |
1262 | | /// let ids_trinary_operator = sets::ids_trinary_operator(); |
1263 | | /// |
1264 | | /// assert!(ids_trinary_operator.contains32(0x2FF2)); // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT |
1265 | | /// assert!(ids_trinary_operator.contains32(0x2FF3)); // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW |
1266 | | /// assert!(!ids_trinary_operator.contains32(0x2FF4)); |
1267 | | /// assert!(!ids_trinary_operator.contains32(0x2FF5)); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE |
1268 | | /// assert!(!ids_trinary_operator.contains32(0x3006)); // IDEOGRAPHIC CLOSING MARK |
1269 | | /// ``` |
1270 | | |
1271 | | pub const fn ids_trinary_operator() => SINGLETON_PROPS_IDST_V1; |
1272 | | pub fn load_ids_trinary_operator(); |
1273 | | } |
1274 | | |
1275 | | make_code_point_set_property! { |
1276 | | property: "Join_Control"; |
1277 | | marker: JoinControlProperty; |
1278 | | keyed_data_marker: JoinControlV1Marker; |
1279 | | func: |
1280 | | /// Format control characters which have specific functions for control of cursive joining |
1281 | | /// and ligation |
1282 | | /// |
1283 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1284 | | /// |
1285 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1286 | | /// |
1287 | | /// # Example |
1288 | | /// |
1289 | | /// ``` |
1290 | | /// use icu::properties::sets; |
1291 | | /// |
1292 | | /// let join_control = sets::join_control(); |
1293 | | /// |
1294 | | /// assert!(join_control.contains32(0x200C)); // ZERO WIDTH NON-JOINER |
1295 | | /// assert!(join_control.contains32(0x200D)); // ZERO WIDTH JOINER |
1296 | | /// assert!(!join_control.contains32(0x200E)); |
1297 | | /// ``` |
1298 | | |
1299 | | pub const fn join_control() => SINGLETON_PROPS_JOIN_C_V1; |
1300 | | pub fn load_join_control(); |
1301 | | } |
1302 | | |
1303 | | make_code_point_set_property! { |
1304 | | property: "Logical_Order_Exception"; |
1305 | | marker: LogicalOrderExceptionProperty; |
1306 | | keyed_data_marker: LogicalOrderExceptionV1Marker; |
1307 | | func: |
1308 | | /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao |
1309 | | /// |
1310 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1311 | | /// |
1312 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1313 | | /// |
1314 | | /// # Example |
1315 | | /// |
1316 | | /// ``` |
1317 | | /// use icu::properties::sets; |
1318 | | /// |
1319 | | /// let logical_order_exception = sets::logical_order_exception(); |
1320 | | /// |
1321 | | /// assert!(logical_order_exception.contains('ແ')); // U+0EC1 LAO VOWEL SIGN EI |
1322 | | /// assert!(!logical_order_exception.contains('ະ')); // U+0EB0 LAO VOWEL SIGN A |
1323 | | /// ``` |
1324 | | |
1325 | | pub const fn logical_order_exception() => SINGLETON_PROPS_LOE_V1; |
1326 | | pub fn load_logical_order_exception(); |
1327 | | } |
1328 | | |
1329 | | make_code_point_set_property! { |
1330 | | property: "Lowercase"; |
1331 | | marker: LowercaseProperty; |
1332 | | keyed_data_marker: LowercaseV1Marker; |
1333 | | func: |
1334 | | /// Lowercase characters |
1335 | | /// |
1336 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1337 | | /// |
1338 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1339 | | /// |
1340 | | /// # Example |
1341 | | /// |
1342 | | /// ``` |
1343 | | /// use icu::properties::sets; |
1344 | | /// |
1345 | | /// let lowercase = sets::lowercase(); |
1346 | | /// |
1347 | | /// assert!(lowercase.contains('a')); |
1348 | | /// assert!(!lowercase.contains('A')); |
1349 | | /// ``` |
1350 | | |
1351 | | pub const fn lowercase() => SINGLETON_PROPS_LOWER_V1; |
1352 | | pub fn load_lowercase(); |
1353 | | } |
1354 | | |
1355 | | make_code_point_set_property! { |
1356 | | property: "Math"; |
1357 | | marker: MathProperty; |
1358 | | keyed_data_marker: MathV1Marker; |
1359 | | func: |
1360 | | /// Characters used in mathematical notation |
1361 | | /// |
1362 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1363 | | /// |
1364 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1365 | | /// |
1366 | | /// # Example |
1367 | | /// |
1368 | | /// ``` |
1369 | | /// use icu::properties::sets; |
1370 | | /// |
1371 | | /// let math = sets::math(); |
1372 | | /// |
1373 | | /// assert!(math.contains('=')); |
1374 | | /// assert!(math.contains('+')); |
1375 | | /// assert!(!math.contains('-')); |
1376 | | /// assert!(math.contains('−')); // U+2212 MINUS SIGN |
1377 | | /// assert!(!math.contains('/')); |
1378 | | /// assert!(math.contains('∕')); // U+2215 DIVISION SLASH |
1379 | | /// ``` |
1380 | | |
1381 | | pub const fn math() => SINGLETON_PROPS_MATH_V1; |
1382 | | pub fn load_math(); |
1383 | | } |
1384 | | |
1385 | | make_code_point_set_property! { |
1386 | | property: "Noncharacter_Code_Point"; |
1387 | | marker: NoncharacterCodePointProperty; |
1388 | | keyed_data_marker: NoncharacterCodePointV1Marker; |
1389 | | func: |
1390 | | /// Code points permanently reserved for internal use |
1391 | | /// |
1392 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1393 | | /// |
1394 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1395 | | /// |
1396 | | /// # Example |
1397 | | /// |
1398 | | /// ``` |
1399 | | /// use icu::properties::sets; |
1400 | | /// |
1401 | | /// let noncharacter_code_point = sets::noncharacter_code_point(); |
1402 | | /// |
1403 | | /// assert!(noncharacter_code_point.contains32(0xFDD0)); |
1404 | | /// assert!(noncharacter_code_point.contains32(0xFFFF)); |
1405 | | /// assert!(!noncharacter_code_point.contains32(0x10000)); |
1406 | | /// ``` |
1407 | | |
1408 | | pub const fn noncharacter_code_point() => SINGLETON_PROPS_NCHAR_V1; |
1409 | | pub fn load_noncharacter_code_point(); |
1410 | | } |
1411 | | |
1412 | | make_code_point_set_property! { |
1413 | | property: "NFC_Inert"; |
1414 | | marker: NfcInertProperty; |
1415 | | keyed_data_marker: NfcInertV1Marker; |
1416 | | func: |
1417 | | /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters |
1418 | | |
1419 | | pub const fn nfc_inert() => SINGLETON_PROPS_NFCINERT_V1; |
1420 | | pub fn load_nfc_inert(); |
1421 | | } |
1422 | | |
1423 | | make_code_point_set_property! { |
1424 | | property: "NFD_Inert"; |
1425 | | marker: NfdInertProperty; |
1426 | | keyed_data_marker: NfdInertV1Marker; |
1427 | | func: |
1428 | | /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters |
1429 | | |
1430 | | pub const fn nfd_inert() => SINGLETON_PROPS_NFDINERT_V1; |
1431 | | pub fn load_nfd_inert(); |
1432 | | } |
1433 | | |
1434 | | make_code_point_set_property! { |
1435 | | property: "NFKC_Inert"; |
1436 | | marker: NfkcInertProperty; |
1437 | | keyed_data_marker: NfkcInertV1Marker; |
1438 | | func: |
1439 | | /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters |
1440 | | |
1441 | | pub const fn nfkc_inert() => SINGLETON_PROPS_NFKCINERT_V1; |
1442 | | pub fn load_nfkc_inert(); |
1443 | | } |
1444 | | |
1445 | | make_code_point_set_property! { |
1446 | | property: "NFKD_Inert"; |
1447 | | marker: NfkdInertProperty; |
1448 | | keyed_data_marker: NfkdInertV1Marker; |
1449 | | func: |
1450 | | /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters |
1451 | | |
1452 | | pub const fn nfkd_inert() => SINGLETON_PROPS_NFKDINERT_V1; |
1453 | | pub fn load_nfkd_inert(); |
1454 | | } |
1455 | | |
1456 | | make_code_point_set_property! { |
1457 | | property: "Pattern_Syntax"; |
1458 | | marker: PatternSyntaxProperty; |
1459 | | keyed_data_marker: PatternSyntaxV1Marker; |
1460 | | func: |
1461 | | /// Characters used as syntax in patterns (such as regular expressions). See [`Unicode |
1462 | | /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more |
1463 | | /// details. |
1464 | | /// |
1465 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1466 | | /// |
1467 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1468 | | /// |
1469 | | /// # Example |
1470 | | /// |
1471 | | /// ``` |
1472 | | /// use icu::properties::sets; |
1473 | | /// |
1474 | | /// let pattern_syntax = sets::pattern_syntax(); |
1475 | | /// |
1476 | | /// assert!(pattern_syntax.contains('{')); |
1477 | | /// assert!(pattern_syntax.contains('⇒')); // U+21D2 RIGHTWARDS DOUBLE ARROW |
1478 | | /// assert!(!pattern_syntax.contains('0')); |
1479 | | /// ``` |
1480 | | |
1481 | | pub const fn pattern_syntax() => SINGLETON_PROPS_PAT_SYN_V1; |
1482 | | pub fn load_pattern_syntax(); |
1483 | | } |
1484 | | |
1485 | | make_code_point_set_property! { |
1486 | | property: "Pattern_White_Space"; |
1487 | | marker: PatternWhiteSpaceProperty; |
1488 | | keyed_data_marker: PatternWhiteSpaceV1Marker; |
1489 | | func: |
1490 | | /// Characters used as whitespace in patterns (such as regular expressions). See |
1491 | | /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for |
1492 | | /// more details. |
1493 | | /// |
1494 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1495 | | /// |
1496 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1497 | | /// |
1498 | | /// # Example |
1499 | | /// |
1500 | | /// ``` |
1501 | | /// use icu::properties::sets; |
1502 | | /// |
1503 | | /// let pattern_white_space = sets::pattern_white_space(); |
1504 | | /// |
1505 | | /// assert!(pattern_white_space.contains(' ')); |
1506 | | /// assert!(pattern_white_space.contains32(0x2029)); // PARAGRAPH SEPARATOR |
1507 | | /// assert!(pattern_white_space.contains32(0x000A)); // NEW LINE |
1508 | | /// assert!(!pattern_white_space.contains32(0x00A0)); // NO-BREAK SPACE |
1509 | | /// ``` |
1510 | | |
1511 | | pub const fn pattern_white_space() => SINGLETON_PROPS_PAT_WS_V1; |
1512 | | pub fn load_pattern_white_space(); |
1513 | | } |
1514 | | |
1515 | | make_code_point_set_property! { |
1516 | | property: "Prepended_Concatenation_Mark"; |
1517 | | marker: PrependedConcatenationMarkProperty; |
1518 | | keyed_data_marker: PrependedConcatenationMarkV1Marker; |
1519 | | func: |
1520 | | /// A small class of visible format controls, which precede and then span a sequence of |
1521 | | /// other characters, usually digits. |
1522 | | |
1523 | | pub const fn prepended_concatenation_mark() => SINGLETON_PROPS_PCM_V1; |
1524 | | pub fn load_prepended_concatenation_mark(); |
1525 | | } |
1526 | | |
1527 | | make_code_point_set_property! { |
1528 | | property: "Print"; |
1529 | | marker: PrintProperty; |
1530 | | keyed_data_marker: PrintV1Marker; |
1531 | | func: |
1532 | | /// Printable characters (visible characters and whitespace). |
1533 | | /// This is defined for POSIX compatibility. |
1534 | | |
1535 | | pub const fn print() => SINGLETON_PROPS_PRINT_V1; |
1536 | | pub fn load_print(); |
1537 | | } |
1538 | | |
1539 | | make_code_point_set_property! { |
1540 | | property: "Quotation_Mark"; |
1541 | | marker: QuotationMarkProperty; |
1542 | | keyed_data_marker: QuotationMarkV1Marker; |
1543 | | func: |
1544 | | /// Punctuation characters that function as quotation marks. |
1545 | | /// |
1546 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1547 | | /// |
1548 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1549 | | /// |
1550 | | /// # Example |
1551 | | /// |
1552 | | /// ``` |
1553 | | /// use icu::properties::sets; |
1554 | | /// |
1555 | | /// let quotation_mark = sets::quotation_mark(); |
1556 | | /// |
1557 | | /// assert!(quotation_mark.contains('\'')); |
1558 | | /// assert!(quotation_mark.contains('„')); // U+201E DOUBLE LOW-9 QUOTATION MARK |
1559 | | /// assert!(!quotation_mark.contains('<')); |
1560 | | /// ``` |
1561 | | |
1562 | | pub const fn quotation_mark() => SINGLETON_PROPS_QMARK_V1; |
1563 | | pub fn load_quotation_mark(); |
1564 | | } |
1565 | | |
1566 | | make_code_point_set_property! { |
1567 | | property: "Radical"; |
1568 | | marker: RadicalProperty; |
1569 | | keyed_data_marker: RadicalV1Marker; |
1570 | | func: |
1571 | | /// Characters used in the definition of Ideographic Description Sequences |
1572 | | /// |
1573 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1574 | | /// |
1575 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1576 | | /// |
1577 | | /// # Example |
1578 | | /// |
1579 | | /// ``` |
1580 | | /// use icu::properties::sets; |
1581 | | /// |
1582 | | /// let radical = sets::radical(); |
1583 | | /// |
1584 | | /// assert!(radical.contains('⺆')); // U+2E86 CJK RADICAL BOX |
1585 | | /// assert!(!radical.contains('丹')); // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E |
1586 | | /// ``` |
1587 | | |
1588 | | pub const fn radical() => SINGLETON_PROPS_RADICAL_V1; |
1589 | | pub fn load_radical(); |
1590 | | } |
1591 | | |
1592 | | make_code_point_set_property! { |
1593 | | property: "Regional_Indicator"; |
1594 | | marker: RegionalIndicatorProperty; |
1595 | | keyed_data_marker: RegionalIndicatorV1Marker; |
1596 | | func: |
1597 | | /// Regional indicator characters, U+1F1E6..U+1F1FF |
1598 | | /// |
1599 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1600 | | /// |
1601 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1602 | | /// |
1603 | | /// # Example |
1604 | | /// |
1605 | | /// ``` |
1606 | | /// use icu::properties::sets; |
1607 | | /// |
1608 | | /// let regional_indicator = sets::regional_indicator(); |
1609 | | /// |
1610 | | /// assert!(regional_indicator.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T |
1611 | | /// assert!(!regional_indicator.contains('Ⓣ')); // U+24C9 CIRCLED LATIN CAPITAL LETTER T |
1612 | | /// assert!(!regional_indicator.contains('T')); |
1613 | | /// ``` |
1614 | | |
1615 | | pub const fn regional_indicator() => SINGLETON_PROPS_RI_V1; |
1616 | | pub fn load_regional_indicator(); |
1617 | | } |
1618 | | |
1619 | | make_code_point_set_property! { |
1620 | | property: "Soft_Dotted"; |
1621 | | marker: SoftDottedProperty; |
1622 | | keyed_data_marker: SoftDottedV1Marker; |
1623 | | func: |
1624 | | /// Characters with a "soft dot", like i or j. An accent placed on these characters causes |
1625 | | /// the dot to disappear. |
1626 | | /// |
1627 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1628 | | /// |
1629 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1630 | | /// |
1631 | | /// # Example |
1632 | | /// |
1633 | | /// ``` |
1634 | | /// use icu::properties::sets; |
1635 | | /// |
1636 | | /// let soft_dotted = sets::soft_dotted(); |
1637 | | /// |
1638 | | /// assert!(soft_dotted.contains('і')); //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I |
1639 | | /// assert!(!soft_dotted.contains('ı')); // U+0131 LATIN SMALL LETTER DOTLESS I |
1640 | | /// ``` |
1641 | | |
1642 | | pub const fn soft_dotted() => SINGLETON_PROPS_SD_V1; |
1643 | | pub fn load_soft_dotted(); |
1644 | | } |
1645 | | |
1646 | | make_code_point_set_property! { |
1647 | | property: "Segment_Starter"; |
1648 | | marker: SegmentStarterProperty; |
1649 | | keyed_data_marker: SegmentStarterV1Marker; |
1650 | | func: |
1651 | | /// Characters that are starters in terms of Unicode normalization and combining character |
1652 | | /// sequences |
1653 | | |
1654 | | pub const fn segment_starter() => SINGLETON_PROPS_SEGSTART_V1; |
1655 | | pub fn load_segment_starter(); |
1656 | | } |
1657 | | |
1658 | | make_code_point_set_property! { |
1659 | | property: "Case_Sensitive"; |
1660 | | marker: CaseSensitiveProperty; |
1661 | | keyed_data_marker: CaseSensitiveV1Marker; |
1662 | | func: |
1663 | | /// Characters that are either the source of a case mapping or in the target of a case |
1664 | | /// mapping |
1665 | | |
1666 | | pub const fn case_sensitive() => SINGLETON_PROPS_SENSITIVE_V1; |
1667 | | pub fn load_case_sensitive(); |
1668 | | } |
1669 | | |
1670 | | make_code_point_set_property! { |
1671 | | property: "Sentence_Terminal"; |
1672 | | marker: SentenceTerminalProperty; |
1673 | | keyed_data_marker: SentenceTerminalV1Marker; |
1674 | | func: |
1675 | | /// Punctuation characters that generally mark the end of sentences |
1676 | | /// |
1677 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1678 | | /// |
1679 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1680 | | /// |
1681 | | /// # Example |
1682 | | /// |
1683 | | /// ``` |
1684 | | /// use icu::properties::sets; |
1685 | | /// |
1686 | | /// let sentence_terminal = sets::sentence_terminal(); |
1687 | | /// |
1688 | | /// assert!(sentence_terminal.contains('.')); |
1689 | | /// assert!(sentence_terminal.contains('?')); |
1690 | | /// assert!(sentence_terminal.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN |
1691 | | /// assert!(!sentence_terminal.contains(',')); |
1692 | | /// assert!(!sentence_terminal.contains('¿')); // U+00BF INVERTED QUESTION MARK |
1693 | | /// ``` |
1694 | | |
1695 | | pub const fn sentence_terminal() => SINGLETON_PROPS_STERM_V1; |
1696 | | pub fn load_sentence_terminal(); |
1697 | | } |
1698 | | |
1699 | | make_code_point_set_property! { |
1700 | | property: "Terminal_Punctuation"; |
1701 | | marker: TerminalPunctuationProperty; |
1702 | | keyed_data_marker: TerminalPunctuationV1Marker; |
1703 | | func: |
1704 | | /// Punctuation characters that generally mark the end of textual units |
1705 | | /// |
1706 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1707 | | /// |
1708 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1709 | | /// |
1710 | | /// # Example |
1711 | | /// |
1712 | | /// ``` |
1713 | | /// use icu::properties::sets; |
1714 | | /// |
1715 | | /// let terminal_punctuation = sets::terminal_punctuation(); |
1716 | | /// |
1717 | | /// assert!(terminal_punctuation.contains('.')); |
1718 | | /// assert!(terminal_punctuation.contains('?')); |
1719 | | /// assert!(terminal_punctuation.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN |
1720 | | /// assert!(terminal_punctuation.contains(',')); |
1721 | | /// assert!(!terminal_punctuation.contains('¿')); // U+00BF INVERTED QUESTION MARK |
1722 | | /// ``` |
1723 | | |
1724 | | pub const fn terminal_punctuation() => SINGLETON_PROPS_TERM_V1; |
1725 | | pub fn load_terminal_punctuation(); |
1726 | | } |
1727 | | |
1728 | | make_code_point_set_property! { |
1729 | | property: "Unified_Ideograph"; |
1730 | | marker: UnifiedIdeographProperty; |
1731 | | keyed_data_marker: UnifiedIdeographV1Marker; |
1732 | | func: |
1733 | | /// A property which specifies the exact set of Unified CJK Ideographs in the standard |
1734 | | /// |
1735 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1736 | | /// |
1737 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1738 | | /// |
1739 | | /// # Example |
1740 | | /// |
1741 | | /// ``` |
1742 | | /// use icu::properties::sets; |
1743 | | /// |
1744 | | /// let unified_ideograph = sets::unified_ideograph(); |
1745 | | /// |
1746 | | /// assert!(unified_ideograph.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD |
1747 | | /// assert!(unified_ideograph.contains('木')); // U+6728 CJK UNIFIED IDEOGRAPH-6728 |
1748 | | /// assert!(!unified_ideograph.contains('𛅸')); // U+1B178 NUSHU CHARACTER-1B178 |
1749 | | /// ``` |
1750 | | |
1751 | | pub const fn unified_ideograph() => SINGLETON_PROPS_UIDEO_V1; |
1752 | | pub fn load_unified_ideograph(); |
1753 | | } |
1754 | | |
1755 | | make_code_point_set_property! { |
1756 | | property: "Uppercase"; |
1757 | | marker: UppercaseProperty; |
1758 | | keyed_data_marker: UppercaseV1Marker; |
1759 | | func: |
1760 | | /// Uppercase characters |
1761 | | /// |
1762 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1763 | | /// |
1764 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1765 | | /// |
1766 | | /// # Example |
1767 | | /// |
1768 | | /// ``` |
1769 | | /// use icu::properties::sets; |
1770 | | /// |
1771 | | /// let uppercase = sets::uppercase(); |
1772 | | /// |
1773 | | /// assert!(uppercase.contains('U')); |
1774 | | /// assert!(!uppercase.contains('u')); |
1775 | | /// ``` |
1776 | | |
1777 | | pub const fn uppercase() => SINGLETON_PROPS_UPPER_V1; |
1778 | | pub fn load_uppercase(); |
1779 | | } |
1780 | | |
1781 | | make_code_point_set_property! { |
1782 | | property: "Variation_Selector"; |
1783 | | marker: VariationSelectorProperty; |
1784 | | keyed_data_marker: VariationSelectorV1Marker; |
1785 | | func: |
1786 | | /// Characters that are Variation Selectors. |
1787 | | /// |
1788 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1789 | | /// |
1790 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1791 | | /// |
1792 | | /// # Example |
1793 | | /// |
1794 | | /// ``` |
1795 | | /// use icu::properties::sets; |
1796 | | /// |
1797 | | /// let variation_selector = sets::variation_selector(); |
1798 | | /// |
1799 | | /// assert!(variation_selector.contains32(0x180D)); // MONGOLIAN FREE VARIATION SELECTOR THREE |
1800 | | /// assert!(!variation_selector.contains32(0x303E)); // IDEOGRAPHIC VARIATION INDICATOR |
1801 | | /// assert!(variation_selector.contains32(0xFE0F)); // VARIATION SELECTOR-16 |
1802 | | /// assert!(!variation_selector.contains32(0xFE10)); // PRESENTATION FORM FOR VERTICAL COMMA |
1803 | | /// assert!(variation_selector.contains32(0xE01EF)); // VARIATION SELECTOR-256 |
1804 | | /// ``` |
1805 | | |
1806 | | pub const fn variation_selector() => SINGLETON_PROPS_VS_V1; |
1807 | | pub fn load_variation_selector(); |
1808 | | } |
1809 | | |
1810 | | make_code_point_set_property! { |
1811 | | property: "White_Space"; |
1812 | | marker: WhiteSpaceProperty; |
1813 | | keyed_data_marker: WhiteSpaceV1Marker; |
1814 | | func: |
1815 | | /// Spaces, separator characters and other control characters which should be treated by |
1816 | | /// programming languages as "white space" for the purpose of parsing elements |
1817 | | /// |
1818 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1819 | | /// |
1820 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1821 | | /// |
1822 | | /// # Example |
1823 | | /// |
1824 | | /// ``` |
1825 | | /// use icu::properties::sets; |
1826 | | /// |
1827 | | /// let white_space = sets::white_space(); |
1828 | | /// |
1829 | | /// assert!(white_space.contains(' ')); |
1830 | | /// assert!(white_space.contains32(0x000A)); // NEW LINE |
1831 | | /// assert!(white_space.contains32(0x00A0)); // NO-BREAK SPACE |
1832 | | /// assert!(!white_space.contains32(0x200B)); // ZERO WIDTH SPACE |
1833 | | /// ``` |
1834 | | |
1835 | | pub const fn white_space() => SINGLETON_PROPS_WSPACE_V1; |
1836 | | pub fn load_white_space(); |
1837 | | } |
1838 | | |
1839 | | make_code_point_set_property! { |
1840 | | property: "Xdigit"; |
1841 | | marker: XdigitProperty; |
1842 | | keyed_data_marker: XdigitV1Marker; |
1843 | | func: |
1844 | | /// Hexadecimal digits |
1845 | | /// This is defined for POSIX compatibility. |
1846 | | |
1847 | | pub const fn xdigit() => SINGLETON_PROPS_XDIGIT_V1; |
1848 | | pub fn load_xdigit(); |
1849 | | } |
1850 | | |
1851 | | make_code_point_set_property! { |
1852 | | property: "XID_Continue"; |
1853 | | marker: XidContinueProperty; |
1854 | | keyed_data_marker: XidContinueV1Marker; |
1855 | | func: |
1856 | | /// Characters that can come after the first character in an identifier. See [`Unicode Standard Annex |
1857 | | /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details. |
1858 | | /// |
1859 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1860 | | /// |
1861 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1862 | | /// |
1863 | | /// # Example |
1864 | | /// |
1865 | | /// ``` |
1866 | | /// use icu::properties::sets; |
1867 | | /// |
1868 | | /// let xid_continue = sets::xid_continue(); |
1869 | | /// |
1870 | | /// assert!(xid_continue.contains('x')); |
1871 | | /// assert!(xid_continue.contains('1')); |
1872 | | /// assert!(xid_continue.contains('_')); |
1873 | | /// assert!(xid_continue.contains('ߝ')); // U+07DD NKO LETTER FA |
1874 | | /// assert!(!xid_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X |
1875 | | /// assert!(!xid_continue.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM |
1876 | | /// ``` |
1877 | | |
1878 | | pub const fn xid_continue() => SINGLETON_PROPS_XIDC_V1; |
1879 | | pub fn load_xid_continue(); |
1880 | | } |
1881 | | |
1882 | | make_code_point_set_property! { |
1883 | | property: "XID_Start"; |
1884 | | marker: XidStartProperty; |
1885 | | keyed_data_marker: XidStartV1Marker; |
1886 | | func: |
1887 | | /// Characters that can begin an identifier. See [`Unicode |
1888 | | /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more |
1889 | | /// details. |
1890 | | /// |
1891 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1892 | | /// |
1893 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1894 | | /// |
1895 | | /// # Example |
1896 | | /// |
1897 | | /// ``` |
1898 | | /// use icu::properties::sets; |
1899 | | /// |
1900 | | /// let xid_start = sets::xid_start(); |
1901 | | /// |
1902 | | /// assert!(xid_start.contains('x')); |
1903 | | /// assert!(!xid_start.contains('1')); |
1904 | | /// assert!(!xid_start.contains('_')); |
1905 | | /// assert!(xid_start.contains('ߝ')); // U+07DD NKO LETTER FA |
1906 | | /// assert!(!xid_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X |
1907 | | /// assert!(!xid_start.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM |
1908 | | /// ``` |
1909 | | |
1910 | | pub const fn xid_start() => SINGLETON_PROPS_XIDS_V1; |
1911 | | pub fn load_xid_start(); |
1912 | | } |
1913 | | |
1914 | | // |
1915 | | // Binary property getter fns |
1916 | | // (data as sets of strings + code points) |
1917 | | // |
1918 | | |
1919 | | macro_rules! make_unicode_set_property { |
1920 | | ( |
1921 | | // currently unused |
1922 | | property: $property:expr; |
1923 | | // currently unused |
1924 | | marker: $marker_name:ident; |
1925 | | keyed_data_marker: $keyed_data_marker:ty; |
1926 | | func: |
1927 | | $(#[$doc:meta])+ |
1928 | | $cvis:vis const fn $constname:ident() => $singleton:ident; |
1929 | | $vis:vis fn $funcname:ident(); |
1930 | | ) => { |
1931 | | #[doc = concat!("A version of [`", stringify!($constname), "()`] that uses custom data provided by a [`DataProvider`].")] |
1932 | 0 | $vis fn $funcname( |
1933 | 0 | provider: &(impl DataProvider<$keyed_data_marker> + ?Sized) |
1934 | 0 | ) -> Result<UnicodeSetData, PropertiesError> { |
1935 | 0 | Ok(provider.load(Default::default()).and_then(DataResponse::take_payload).map(UnicodeSetData::from_data)?) |
1936 | 0 | } Unexecuted instantiation: icu_properties::sets::load_basic_emoji::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_basic_emoji::<_> |
1937 | | $(#[$doc])* |
1938 | | #[cfg(feature = "compiled_data")] |
1939 | 0 | $cvis const fn $constname() -> UnicodeSetDataBorrowed<'static> { |
1940 | 0 | UnicodeSetDataBorrowed { |
1941 | 0 | set: crate::provider::Baked::$singleton |
1942 | 0 | } |
1943 | 0 | } |
1944 | | } |
1945 | | } |
1946 | | |
1947 | | make_unicode_set_property! { |
1948 | | property: "Basic_Emoji"; |
1949 | | marker: BasicEmojiProperty; |
1950 | | keyed_data_marker: BasicEmojiV1Marker; |
1951 | | func: |
1952 | | /// Characters and character sequences intended for general-purpose, independent, direct input. |
1953 | | /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more |
1954 | | /// details. |
1955 | | /// |
1956 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1957 | | /// |
1958 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1959 | | /// |
1960 | | /// # Example |
1961 | | /// |
1962 | | /// ``` |
1963 | | /// use icu::properties::sets; |
1964 | | /// |
1965 | | /// let basic_emoji = sets::basic_emoji(); |
1966 | | /// |
1967 | | /// assert!(!basic_emoji.contains32(0x0020)); |
1968 | | /// assert!(!basic_emoji.contains_char('\n')); |
1969 | | /// assert!(basic_emoji.contains_char('🦃')); // U+1F983 TURKEY |
1970 | | /// assert!(basic_emoji.contains("\u{1F983}")); |
1971 | | /// assert!(basic_emoji.contains("\u{1F6E4}\u{FE0F}")); // railway track |
1972 | | /// assert!(!basic_emoji.contains("\u{0033}\u{FE0F}\u{20E3}")); // Emoji_Keycap_Sequence, keycap 3 |
1973 | | /// ``` |
1974 | | pub const fn basic_emoji() => SINGLETON_PROPS_BASIC_EMOJI_V1; |
1975 | | pub fn load_basic_emoji(); |
1976 | | } |
1977 | | |
1978 | | // |
1979 | | // Enumerated property getter fns |
1980 | | // |
1981 | | |
1982 | | /// A version of [`for_general_category_group()`] that uses custom data provided by a [`DataProvider`]. |
1983 | | /// |
1984 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1985 | 0 | pub fn load_for_general_category_group( |
1986 | 0 | provider: &(impl DataProvider<GeneralCategoryV1Marker> + ?Sized), |
1987 | 0 | enum_val: GeneralCategoryGroup, |
1988 | 0 | ) -> Result<CodePointSetData, PropertiesError> { |
1989 | 0 | let gc_map_payload = maps::load_general_category(provider)?; |
1990 | 0 | let gc_map = gc_map_payload.as_borrowed(); |
1991 | 0 | let matching_gc_ranges = gc_map |
1992 | 0 | .iter_ranges() |
1993 | 0 | .filter(|cpm_range| (1 << cpm_range.value as u32) & enum_val.0 != 0) Unexecuted instantiation: icu_properties::sets::load_for_general_category_group::<icu_provider_adapters::empty::EmptyDataProvider>::{closure#0} Unexecuted instantiation: icu_properties::sets::load_for_general_category_group::<_>::{closure#0} |
1994 | 0 | .map(|cpm_range| cpm_range.range); Unexecuted instantiation: icu_properties::sets::load_for_general_category_group::<icu_provider_adapters::empty::EmptyDataProvider>::{closure#1} Unexecuted instantiation: icu_properties::sets::load_for_general_category_group::<_>::{closure#1} |
1995 | 0 | let set = CodePointInversionList::from_iter(matching_gc_ranges); |
1996 | 0 | Ok(CodePointSetData::from_code_point_inversion_list(set)) |
1997 | 0 | } Unexecuted instantiation: icu_properties::sets::load_for_general_category_group::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_for_general_category_group::<_> |
1998 | | |
1999 | | /// Return a [`CodePointSetData`] for a value or a grouping of values of the General_Category property. See [`GeneralCategoryGroup`]. |
2000 | | /// |
2001 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2002 | | /// |
2003 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2004 | | #[cfg(feature = "compiled_data")] |
2005 | 0 | pub fn for_general_category_group(enum_val: GeneralCategoryGroup) -> CodePointSetData { |
2006 | 0 | let matching_gc_ranges = maps::general_category() |
2007 | 0 | .iter_ranges() |
2008 | 0 | .filter(|cpm_range| (1 << cpm_range.value as u32) & enum_val.0 != 0) |
2009 | 0 | .map(|cpm_range| cpm_range.range); |
2010 | 0 | let set = CodePointInversionList::from_iter(matching_gc_ranges); |
2011 | 0 | CodePointSetData::from_code_point_inversion_list(set) |
2012 | 0 | } |
2013 | | |
2014 | | /// Returns a type capable of looking up values for a property specified as a string, as long as it is a |
2015 | | /// [binary property listed in ECMA-262][ecma], using strict matching on the names in the spec. |
2016 | | /// |
2017 | | /// This handles every property required by ECMA-262 `/u` regular expressions, except for: |
2018 | | /// |
2019 | | /// - `Script` and `General_Category`: handle these directly with [`maps::load_general_category()`] and |
2020 | | /// [`maps::load_script()`]. |
2021 | | /// using property values parsed via [`GeneralCategory::get_name_to_enum_mapper()`] and [`Script::get_name_to_enum_mapper()`] |
2022 | | /// if necessary. |
2023 | | /// - `Script_Extensions`: handle this directly using APIs from [`crate::script`], like [`script::load_script_with_extensions_unstable()`] |
2024 | | /// - `General_Category` mask values: Handle this alongside `General_Category` using [`GeneralCategoryGroup`], |
2025 | | /// using property values parsed via [`GeneralCategoryGroup::get_name_to_enum_mapper()`] if necessary |
2026 | | /// - `Assigned`, `All`, and `ASCII` pseudoproperties: Handle these using their equivalent sets: |
2027 | | /// - `Any` can be expressed as the range `[\u{0}-\u{10FFFF}]` |
2028 | | /// - `Assigned` can be expressed as the inverse of the set `gc=Cn` (i.e., `\P{gc=Cn}`). |
2029 | | /// - `ASCII` can be expressed as the range `[\u{0}-\u{7F}]` |
2030 | | /// - `General_Category` property values can themselves be treated like properties using a shorthand in ECMA262, |
2031 | | /// simply create the corresponding `GeneralCategory` set. |
2032 | | /// |
2033 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2034 | | /// |
2035 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2036 | | /// |
2037 | | /// ``` |
2038 | | /// use icu::properties::sets; |
2039 | | /// |
2040 | | /// let emoji = sets::load_for_ecma262("Emoji").expect("loading data failed"); |
2041 | | /// |
2042 | | /// assert!(emoji.contains('🔥')); // U+1F525 FIRE |
2043 | | /// assert!(!emoji.contains('V')); |
2044 | | /// ``` |
2045 | | /// |
2046 | | /// [ecma]: https://tc39.es/ecma262/#table-binary-unicode-properties |
2047 | | #[cfg(feature = "compiled_data")] |
2048 | 0 | pub fn load_for_ecma262(name: &str) -> Result<CodePointSetDataBorrowed<'static>, PropertiesError> { |
2049 | | use crate::runtime::UnicodeProperty; |
2050 | | |
2051 | 0 | let prop = if let Some(prop) = UnicodeProperty::parse_ecma262_name(name) { |
2052 | 0 | prop |
2053 | | } else { |
2054 | 0 | return Err(PropertiesError::UnexpectedPropertyName); |
2055 | | }; |
2056 | 0 | Ok(match prop { |
2057 | 0 | UnicodeProperty::AsciiHexDigit => ascii_hex_digit(), |
2058 | 0 | UnicodeProperty::Alphabetic => alphabetic(), |
2059 | 0 | UnicodeProperty::BidiControl => bidi_control(), |
2060 | 0 | UnicodeProperty::BidiMirrored => bidi_mirrored(), |
2061 | 0 | UnicodeProperty::CaseIgnorable => case_ignorable(), |
2062 | 0 | UnicodeProperty::Cased => cased(), |
2063 | 0 | UnicodeProperty::ChangesWhenCasefolded => changes_when_casefolded(), |
2064 | 0 | UnicodeProperty::ChangesWhenCasemapped => changes_when_casemapped(), |
2065 | 0 | UnicodeProperty::ChangesWhenLowercased => changes_when_lowercased(), |
2066 | 0 | UnicodeProperty::ChangesWhenNfkcCasefolded => changes_when_nfkc_casefolded(), |
2067 | 0 | UnicodeProperty::ChangesWhenTitlecased => changes_when_titlecased(), |
2068 | 0 | UnicodeProperty::ChangesWhenUppercased => changes_when_uppercased(), |
2069 | 0 | UnicodeProperty::Dash => dash(), |
2070 | 0 | UnicodeProperty::DefaultIgnorableCodePoint => default_ignorable_code_point(), |
2071 | 0 | UnicodeProperty::Deprecated => deprecated(), |
2072 | 0 | UnicodeProperty::Diacritic => diacritic(), |
2073 | 0 | UnicodeProperty::Emoji => emoji(), |
2074 | 0 | UnicodeProperty::EmojiComponent => emoji_component(), |
2075 | 0 | UnicodeProperty::EmojiModifier => emoji_modifier(), |
2076 | 0 | UnicodeProperty::EmojiModifierBase => emoji_modifier_base(), |
2077 | 0 | UnicodeProperty::EmojiPresentation => emoji_presentation(), |
2078 | 0 | UnicodeProperty::ExtendedPictographic => extended_pictographic(), |
2079 | 0 | UnicodeProperty::Extender => extender(), |
2080 | 0 | UnicodeProperty::GraphemeBase => grapheme_base(), |
2081 | 0 | UnicodeProperty::GraphemeExtend => grapheme_extend(), |
2082 | 0 | UnicodeProperty::HexDigit => hex_digit(), |
2083 | 0 | UnicodeProperty::IdsBinaryOperator => ids_binary_operator(), |
2084 | 0 | UnicodeProperty::IdsTrinaryOperator => ids_trinary_operator(), |
2085 | 0 | UnicodeProperty::IdContinue => id_continue(), |
2086 | 0 | UnicodeProperty::IdStart => id_start(), |
2087 | 0 | UnicodeProperty::Ideographic => ideographic(), |
2088 | 0 | UnicodeProperty::JoinControl => join_control(), |
2089 | 0 | UnicodeProperty::LogicalOrderException => logical_order_exception(), |
2090 | 0 | UnicodeProperty::Lowercase => lowercase(), |
2091 | 0 | UnicodeProperty::Math => math(), |
2092 | 0 | UnicodeProperty::NoncharacterCodePoint => noncharacter_code_point(), |
2093 | 0 | UnicodeProperty::PatternSyntax => pattern_syntax(), |
2094 | 0 | UnicodeProperty::PatternWhiteSpace => pattern_white_space(), |
2095 | 0 | UnicodeProperty::QuotationMark => quotation_mark(), |
2096 | 0 | UnicodeProperty::Radical => radical(), |
2097 | 0 | UnicodeProperty::RegionalIndicator => regional_indicator(), |
2098 | 0 | UnicodeProperty::SentenceTerminal => sentence_terminal(), |
2099 | 0 | UnicodeProperty::SoftDotted => soft_dotted(), |
2100 | 0 | UnicodeProperty::TerminalPunctuation => terminal_punctuation(), |
2101 | 0 | UnicodeProperty::UnifiedIdeograph => unified_ideograph(), |
2102 | 0 | UnicodeProperty::Uppercase => uppercase(), |
2103 | 0 | UnicodeProperty::VariationSelector => variation_selector(), |
2104 | 0 | UnicodeProperty::WhiteSpace => white_space(), |
2105 | 0 | UnicodeProperty::XidContinue => xid_continue(), |
2106 | 0 | UnicodeProperty::XidStart => xid_start(), |
2107 | 0 | _ => return Err(PropertiesError::UnexpectedPropertyName), |
2108 | | }) |
2109 | 0 | } |
2110 | | |
2111 | | icu_provider::gen_any_buffer_data_constructors!( |
2112 | | locale: skip, |
2113 | | name: &str, |
2114 | | result: Result<CodePointSetData, PropertiesError>, |
2115 | | #[cfg(skip)] |
2116 | | functions: [ |
2117 | | load_for_ecma262, |
2118 | | load_for_ecma262_with_any_provider, |
2119 | | load_for_ecma262_with_buffer_provider, |
2120 | | load_for_ecma262_unstable, |
2121 | | ] |
2122 | | ); |
2123 | | |
2124 | | #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, load_for_ecma262)] |
2125 | 0 | pub fn load_for_ecma262_unstable<P>( |
2126 | 0 | provider: &P, |
2127 | 0 | name: &str, |
2128 | 0 | ) -> Result<CodePointSetData, PropertiesError> |
2129 | 0 | where |
2130 | 0 | P: ?Sized |
2131 | 0 | + DataProvider<AsciiHexDigitV1Marker> |
2132 | 0 | + DataProvider<AlphabeticV1Marker> |
2133 | 0 | + DataProvider<BidiControlV1Marker> |
2134 | 0 | + DataProvider<BidiMirroredV1Marker> |
2135 | 0 | + DataProvider<CaseIgnorableV1Marker> |
2136 | 0 | + DataProvider<CasedV1Marker> |
2137 | 0 | + DataProvider<ChangesWhenCasefoldedV1Marker> |
2138 | 0 | + DataProvider<ChangesWhenCasemappedV1Marker> |
2139 | 0 | + DataProvider<ChangesWhenLowercasedV1Marker> |
2140 | 0 | + DataProvider<ChangesWhenNfkcCasefoldedV1Marker> |
2141 | 0 | + DataProvider<ChangesWhenTitlecasedV1Marker> |
2142 | 0 | + DataProvider<ChangesWhenUppercasedV1Marker> |
2143 | 0 | + DataProvider<DashV1Marker> |
2144 | 0 | + DataProvider<DefaultIgnorableCodePointV1Marker> |
2145 | 0 | + DataProvider<DeprecatedV1Marker> |
2146 | 0 | + DataProvider<DiacriticV1Marker> |
2147 | 0 | + DataProvider<EmojiV1Marker> |
2148 | 0 | + DataProvider<EmojiComponentV1Marker> |
2149 | 0 | + DataProvider<EmojiModifierV1Marker> |
2150 | 0 | + DataProvider<EmojiModifierBaseV1Marker> |
2151 | 0 | + DataProvider<EmojiPresentationV1Marker> |
2152 | 0 | + DataProvider<ExtendedPictographicV1Marker> |
2153 | 0 | + DataProvider<ExtenderV1Marker> |
2154 | 0 | + DataProvider<GraphemeBaseV1Marker> |
2155 | 0 | + DataProvider<GraphemeExtendV1Marker> |
2156 | 0 | + DataProvider<HexDigitV1Marker> |
2157 | 0 | + DataProvider<IdsBinaryOperatorV1Marker> |
2158 | 0 | + DataProvider<IdsTrinaryOperatorV1Marker> |
2159 | 0 | + DataProvider<IdContinueV1Marker> |
2160 | 0 | + DataProvider<IdStartV1Marker> |
2161 | 0 | + DataProvider<IdeographicV1Marker> |
2162 | 0 | + DataProvider<JoinControlV1Marker> |
2163 | 0 | + DataProvider<LogicalOrderExceptionV1Marker> |
2164 | 0 | + DataProvider<LowercaseV1Marker> |
2165 | 0 | + DataProvider<MathV1Marker> |
2166 | 0 | + DataProvider<NoncharacterCodePointV1Marker> |
2167 | 0 | + DataProvider<PatternSyntaxV1Marker> |
2168 | 0 | + DataProvider<PatternWhiteSpaceV1Marker> |
2169 | 0 | + DataProvider<QuotationMarkV1Marker> |
2170 | 0 | + DataProvider<RadicalV1Marker> |
2171 | 0 | + DataProvider<RegionalIndicatorV1Marker> |
2172 | 0 | + DataProvider<SentenceTerminalV1Marker> |
2173 | 0 | + DataProvider<SoftDottedV1Marker> |
2174 | 0 | + DataProvider<TerminalPunctuationV1Marker> |
2175 | 0 | + DataProvider<UnifiedIdeographV1Marker> |
2176 | 0 | + DataProvider<UppercaseV1Marker> |
2177 | 0 | + DataProvider<VariationSelectorV1Marker> |
2178 | 0 | + DataProvider<WhiteSpaceV1Marker> |
2179 | 0 | + DataProvider<XidContinueV1Marker> |
2180 | 0 | + DataProvider<XidStartV1Marker>, |
2181 | 0 | { |
2182 | | use crate::runtime::UnicodeProperty; |
2183 | | |
2184 | 0 | let prop = if let Some(prop) = UnicodeProperty::parse_ecma262_name(name) { |
2185 | 0 | prop |
2186 | | } else { |
2187 | 0 | return Err(PropertiesError::UnexpectedPropertyName); |
2188 | | }; |
2189 | 0 | match prop { |
2190 | 0 | UnicodeProperty::AsciiHexDigit => load_ascii_hex_digit(provider), |
2191 | 0 | UnicodeProperty::Alphabetic => load_alphabetic(provider), |
2192 | 0 | UnicodeProperty::BidiControl => load_bidi_control(provider), |
2193 | 0 | UnicodeProperty::BidiMirrored => load_bidi_mirrored(provider), |
2194 | 0 | UnicodeProperty::CaseIgnorable => load_case_ignorable(provider), |
2195 | 0 | UnicodeProperty::Cased => load_cased(provider), |
2196 | 0 | UnicodeProperty::ChangesWhenCasefolded => load_changes_when_casefolded(provider), |
2197 | 0 | UnicodeProperty::ChangesWhenCasemapped => load_changes_when_casemapped(provider), |
2198 | 0 | UnicodeProperty::ChangesWhenLowercased => load_changes_when_lowercased(provider), |
2199 | 0 | UnicodeProperty::ChangesWhenNfkcCasefolded => load_changes_when_nfkc_casefolded(provider), |
2200 | 0 | UnicodeProperty::ChangesWhenTitlecased => load_changes_when_titlecased(provider), |
2201 | 0 | UnicodeProperty::ChangesWhenUppercased => load_changes_when_uppercased(provider), |
2202 | 0 | UnicodeProperty::Dash => load_dash(provider), |
2203 | 0 | UnicodeProperty::DefaultIgnorableCodePoint => load_default_ignorable_code_point(provider), |
2204 | 0 | UnicodeProperty::Deprecated => load_deprecated(provider), |
2205 | 0 | UnicodeProperty::Diacritic => load_diacritic(provider), |
2206 | 0 | UnicodeProperty::Emoji => load_emoji(provider), |
2207 | 0 | UnicodeProperty::EmojiComponent => load_emoji_component(provider), |
2208 | 0 | UnicodeProperty::EmojiModifier => load_emoji_modifier(provider), |
2209 | 0 | UnicodeProperty::EmojiModifierBase => load_emoji_modifier_base(provider), |
2210 | 0 | UnicodeProperty::EmojiPresentation => load_emoji_presentation(provider), |
2211 | 0 | UnicodeProperty::ExtendedPictographic => load_extended_pictographic(provider), |
2212 | 0 | UnicodeProperty::Extender => load_extender(provider), |
2213 | 0 | UnicodeProperty::GraphemeBase => load_grapheme_base(provider), |
2214 | 0 | UnicodeProperty::GraphemeExtend => load_grapheme_extend(provider), |
2215 | 0 | UnicodeProperty::HexDigit => load_hex_digit(provider), |
2216 | 0 | UnicodeProperty::IdsBinaryOperator => load_ids_binary_operator(provider), |
2217 | 0 | UnicodeProperty::IdsTrinaryOperator => load_ids_trinary_operator(provider), |
2218 | 0 | UnicodeProperty::IdContinue => load_id_continue(provider), |
2219 | 0 | UnicodeProperty::IdStart => load_id_start(provider), |
2220 | 0 | UnicodeProperty::Ideographic => load_ideographic(provider), |
2221 | 0 | UnicodeProperty::JoinControl => load_join_control(provider), |
2222 | 0 | UnicodeProperty::LogicalOrderException => load_logical_order_exception(provider), |
2223 | 0 | UnicodeProperty::Lowercase => load_lowercase(provider), |
2224 | 0 | UnicodeProperty::Math => load_math(provider), |
2225 | 0 | UnicodeProperty::NoncharacterCodePoint => load_noncharacter_code_point(provider), |
2226 | 0 | UnicodeProperty::PatternSyntax => load_pattern_syntax(provider), |
2227 | 0 | UnicodeProperty::PatternWhiteSpace => load_pattern_white_space(provider), |
2228 | 0 | UnicodeProperty::QuotationMark => load_quotation_mark(provider), |
2229 | 0 | UnicodeProperty::Radical => load_radical(provider), |
2230 | 0 | UnicodeProperty::RegionalIndicator => load_regional_indicator(provider), |
2231 | 0 | UnicodeProperty::SentenceTerminal => load_sentence_terminal(provider), |
2232 | 0 | UnicodeProperty::SoftDotted => load_soft_dotted(provider), |
2233 | 0 | UnicodeProperty::TerminalPunctuation => load_terminal_punctuation(provider), |
2234 | 0 | UnicodeProperty::UnifiedIdeograph => load_unified_ideograph(provider), |
2235 | 0 | UnicodeProperty::Uppercase => load_uppercase(provider), |
2236 | 0 | UnicodeProperty::VariationSelector => load_variation_selector(provider), |
2237 | 0 | UnicodeProperty::WhiteSpace => load_white_space(provider), |
2238 | 0 | UnicodeProperty::XidContinue => load_xid_continue(provider), |
2239 | 0 | UnicodeProperty::XidStart => load_xid_start(provider), |
2240 | 0 | _ => Err(PropertiesError::UnexpectedPropertyName), |
2241 | | } |
2242 | 0 | } Unexecuted instantiation: icu_properties::sets::load_for_ecma262_unstable::<icu_provider_adapters::empty::EmptyDataProvider> Unexecuted instantiation: icu_properties::sets::load_for_ecma262_unstable::<_> |
2243 | | |
2244 | | #[cfg(test)] |
2245 | | mod tests { |
2246 | | |
2247 | | #[test] |
2248 | | fn test_general_category() { |
2249 | | use icu::properties::sets; |
2250 | | use icu::properties::GeneralCategoryGroup; |
2251 | | |
2252 | | let digits_data = sets::for_general_category_group(GeneralCategoryGroup::Number); |
2253 | | let digits = digits_data.as_borrowed(); |
2254 | | |
2255 | | assert!(digits.contains('5')); |
2256 | | assert!(digits.contains('\u{0665}')); // U+0665 ARABIC-INDIC DIGIT FIVE |
2257 | | assert!(digits.contains('\u{096b}')); // U+0969 DEVANAGARI DIGIT FIVE |
2258 | | |
2259 | | assert!(!digits.contains('A')); |
2260 | | } |
2261 | | |
2262 | | #[test] |
2263 | | fn test_script() { |
2264 | | use icu::properties::maps; |
2265 | | use icu::properties::Script; |
2266 | | |
2267 | | let thai_data = maps::script().get_set_for_value(Script::Thai); |
2268 | | let thai = thai_data.as_borrowed(); |
2269 | | |
2270 | | assert!(thai.contains('\u{0e01}')); // U+0E01 THAI CHARACTER KO KAI |
2271 | | assert!(thai.contains('\u{0e50}')); // U+0E50 THAI DIGIT ZERO |
2272 | | |
2273 | | assert!(!thai.contains('A')); |
2274 | | assert!(!thai.contains('\u{0e3f}')); // U+0E50 THAI CURRENCY SYMBOL BAHT |
2275 | | } |
2276 | | |
2277 | | #[test] |
2278 | | fn test_gc_groupings() { |
2279 | | use icu::properties::{maps, sets}; |
2280 | | use icu::properties::{GeneralCategory, GeneralCategoryGroup}; |
2281 | | use icu_collections::codepointinvlist::CodePointInversionListBuilder; |
2282 | | |
2283 | | let test_group = |category: GeneralCategoryGroup, subcategories: &[GeneralCategory]| { |
2284 | | let category_set = sets::for_general_category_group(category); |
2285 | | let category_set = category_set |
2286 | | .as_code_point_inversion_list() |
2287 | | .expect("The data should be valid"); |
2288 | | |
2289 | | let mut builder = CodePointInversionListBuilder::new(); |
2290 | | for subcategory in subcategories { |
2291 | | let gc_set_data = &maps::general_category().get_set_for_value(*subcategory); |
2292 | | let gc_set = gc_set_data.as_borrowed(); |
2293 | | for range in gc_set.iter_ranges() { |
2294 | | builder.add_range32(&range); |
2295 | | } |
2296 | | } |
2297 | | let combined_set = builder.build(); |
2298 | | println!("{category:?} {subcategories:?}"); |
2299 | | assert_eq!( |
2300 | | category_set.get_inversion_list_vec(), |
2301 | | combined_set.get_inversion_list_vec() |
2302 | | ); |
2303 | | }; |
2304 | | |
2305 | | test_group( |
2306 | | GeneralCategoryGroup::Letter, |
2307 | | &[ |
2308 | | GeneralCategory::UppercaseLetter, |
2309 | | GeneralCategory::LowercaseLetter, |
2310 | | GeneralCategory::TitlecaseLetter, |
2311 | | GeneralCategory::ModifierLetter, |
2312 | | GeneralCategory::OtherLetter, |
2313 | | ], |
2314 | | ); |
2315 | | test_group( |
2316 | | GeneralCategoryGroup::Other, |
2317 | | &[ |
2318 | | GeneralCategory::Control, |
2319 | | GeneralCategory::Format, |
2320 | | GeneralCategory::Unassigned, |
2321 | | GeneralCategory::PrivateUse, |
2322 | | GeneralCategory::Surrogate, |
2323 | | ], |
2324 | | ); |
2325 | | test_group( |
2326 | | GeneralCategoryGroup::Mark, |
2327 | | &[ |
2328 | | GeneralCategory::SpacingMark, |
2329 | | GeneralCategory::EnclosingMark, |
2330 | | GeneralCategory::NonspacingMark, |
2331 | | ], |
2332 | | ); |
2333 | | test_group( |
2334 | | GeneralCategoryGroup::Number, |
2335 | | &[ |
2336 | | GeneralCategory::DecimalNumber, |
2337 | | GeneralCategory::LetterNumber, |
2338 | | GeneralCategory::OtherNumber, |
2339 | | ], |
2340 | | ); |
2341 | | test_group( |
2342 | | GeneralCategoryGroup::Punctuation, |
2343 | | &[ |
2344 | | GeneralCategory::ConnectorPunctuation, |
2345 | | GeneralCategory::DashPunctuation, |
2346 | | GeneralCategory::ClosePunctuation, |
2347 | | GeneralCategory::FinalPunctuation, |
2348 | | GeneralCategory::InitialPunctuation, |
2349 | | GeneralCategory::OtherPunctuation, |
2350 | | GeneralCategory::OpenPunctuation, |
2351 | | ], |
2352 | | ); |
2353 | | test_group( |
2354 | | GeneralCategoryGroup::Symbol, |
2355 | | &[ |
2356 | | GeneralCategory::CurrencySymbol, |
2357 | | GeneralCategory::ModifierSymbol, |
2358 | | GeneralCategory::MathSymbol, |
2359 | | GeneralCategory::OtherSymbol, |
2360 | | ], |
2361 | | ); |
2362 | | test_group( |
2363 | | GeneralCategoryGroup::Separator, |
2364 | | &[ |
2365 | | GeneralCategory::LineSeparator, |
2366 | | GeneralCategory::ParagraphSeparator, |
2367 | | GeneralCategory::SpaceSeparator, |
2368 | | ], |
2369 | | ); |
2370 | | } |
2371 | | |
2372 | | #[test] |
2373 | | fn test_gc_surrogate() { |
2374 | | use icu::properties::maps; |
2375 | | use icu::properties::GeneralCategory; |
2376 | | |
2377 | | let surrogates_data = |
2378 | | maps::general_category().get_set_for_value(GeneralCategory::Surrogate); |
2379 | | let surrogates = surrogates_data.as_borrowed(); |
2380 | | |
2381 | | assert!(surrogates.contains32(0xd800)); |
2382 | | assert!(surrogates.contains32(0xd900)); |
2383 | | assert!(surrogates.contains32(0xdfff)); |
2384 | | |
2385 | | assert!(!surrogates.contains('A')); |
2386 | | } |
2387 | | } |