Coverage Report

Created: 2025-06-16 06:50

/rust/registry/src/index.crates.io-6f17d22bba15001f/icu_casemap-1.5.1/src/casemapper.rs
Line
Count
Source (jump to first uncovered line)
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
use crate::internals::{CaseMapLocale, FoldOptions, FullCaseWriteable, StringAndWriteable};
6
use crate::provider::data::MappingKind;
7
use crate::provider::CaseMapV1;
8
use crate::provider::CaseMapV1Marker;
9
use crate::set::ClosureSink;
10
use crate::titlecase::{LeadingAdjustment, TitlecaseOptions, TrailingCase};
11
use alloc::string::String;
12
use icu_locid::LanguageIdentifier;
13
use icu_provider::prelude::*;
14
use writeable::Writeable;
15
16
/// A struct with the ability to convert characters and strings to uppercase or lowercase,
17
/// or fold them to a normalized form for case-insensitive comparison.
18
///
19
/// # Examples
20
///
21
/// ```rust
22
/// use icu::casemap::CaseMapper;
23
/// use icu::locid::langid;
24
///
25
/// let cm = CaseMapper::new();
26
///
27
/// assert_eq!(
28
///     cm.uppercase_to_string("hello world", &langid!("und")),
29
///     "HELLO WORLD"
30
/// );
31
/// assert_eq!(
32
///     cm.lowercase_to_string("Γειά σου Κόσμε", &langid!("und")),
33
///     "γειά σου κόσμε"
34
/// );
35
/// ```
36
#[derive(Clone, Debug)]
37
pub struct CaseMapper {
38
    pub(crate) data: DataPayload<CaseMapV1Marker>,
39
}
40
41
#[cfg(feature = "compiled_data")]
42
impl Default for CaseMapper {
43
0
    fn default() -> Self {
44
0
        Self::new()
45
0
    }
46
}
47
48
impl AsRef<CaseMapper> for CaseMapper {
49
0
    fn as_ref(&self) -> &CaseMapper {
50
0
        self
51
0
    }
52
}
53
54
impl CaseMapper {
55
    /// Creates a [`CaseMapper`] using compiled data.
56
    ///
57
    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
58
    ///
59
    /// [📚 Help choosing a constructor](icu_provider::constructors)
60
    ///
61
    /// # Examples
62
    ///
63
    /// ```rust
64
    /// use icu::casemap::CaseMapper;
65
    /// use icu::locid::langid;
66
    ///
67
    /// let cm = CaseMapper::new();
68
    ///
69
    /// assert_eq!(
70
    ///     cm.uppercase_to_string("hello world", &langid!("und")),
71
    ///     "HELLO WORLD"
72
    /// );
73
    /// ```
74
    #[cfg(feature = "compiled_data")]
75
0
    pub const fn new() -> Self {
76
0
        Self {
77
0
            data: DataPayload::from_static_ref(crate::provider::Baked::SINGLETON_PROPS_CASEMAP_V1),
78
0
        }
79
0
    }
80
81
    icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: DataError,
82
    #[cfg(skip)]
83
    functions: [
84
        new,
85
        try_new_with_any_provider,
86
        try_new_with_buffer_provider,
87
        try_new_unstable,
88
        Self,
89
    ]);
90
91
    #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
92
0
    pub fn try_new_unstable<P>(provider: &P) -> Result<CaseMapper, DataError>
93
0
    where
94
0
        P: DataProvider<CaseMapV1Marker> + ?Sized,
95
0
    {
96
0
        let data = provider.load(Default::default())?.take_payload()?;
97
0
        Ok(Self { data })
98
0
    }
Unexecuted instantiation: <icu_casemap::casemapper::CaseMapper>::try_new_unstable::<icu_provider::any::DowncastingAnyProvider<icu_provider_adapters::empty::EmptyDataProvider>>
Unexecuted instantiation: <icu_casemap::casemapper::CaseMapper>::try_new_unstable::<_>
99
100
    /// Returns the full lowercase mapping of the given string as a [`Writeable`].
101
    /// This function is context and language sensitive. Callers should pass the text's language
102
    /// as a `LanguageIdentifier` (usually the `id` field of the `Locale`) if available, or
103
    /// `Default::default()` for the root locale.
104
    ///
105
    /// See [`Self::lowercase_to_string()`] for the equivalent convenience function that returns a String,
106
    /// as well as for an example.
107
0
    pub fn lowercase<'a>(
108
0
        &'a self,
109
0
        src: &'a str,
110
0
        langid: &LanguageIdentifier,
111
0
    ) -> impl Writeable + 'a {
112
0
        self.data.get().full_helper_writeable::<false>(
113
0
            src,
114
0
            CaseMapLocale::from_langid(langid),
115
0
            MappingKind::Lower,
116
0
            TrailingCase::default(),
117
0
        )
118
0
    }
119
120
    /// Returns the full uppercase mapping of the given string as a [`Writeable`].
121
    /// This function is context and language sensitive. Callers should pass the text's language
122
    /// as a `LanguageIdentifier` (usually the `id` field of the `Locale`) if available, or
123
    /// `Default::default()` for the root locale.
124
    ///
125
    /// See [`Self::uppercase_to_string()`] for the equivalent convenience function that returns a String,
126
    /// as well as for an example.
127
0
    pub fn uppercase<'a>(
128
0
        &'a self,
129
0
        src: &'a str,
130
0
        langid: &LanguageIdentifier,
131
0
    ) -> impl Writeable + 'a {
132
0
        self.data.get().full_helper_writeable::<false>(
133
0
            src,
134
0
            CaseMapLocale::from_langid(langid),
135
0
            MappingKind::Upper,
136
0
            TrailingCase::default(),
137
0
        )
138
0
    }
139
140
    /// Returns the full titlecase mapping of the given string as a [`Writeable`], treating
141
    /// the string as a single segment (and thus only titlecasing the beginning of it). Performs
142
    /// the specified leading adjustment behavior from the options without loading additional data.
143
    ///
144
    /// This should typically be used as a lower-level helper to construct the titlecasing operation desired
145
    /// by the application, for example one can titlecase on a per-word basis by mixing this with
146
    /// a `WordSegmenter`.
147
    ///
148
    /// This function is context and language sensitive. Callers should pass the text's language
149
    /// as a `LanguageIdentifier` (usually the `id` field of the `Locale`) if available, or
150
    /// `Default::default()` for the root locale.
151
    ///
152
    /// This function performs "adjust to cased" leading adjustment behavior when [`LeadingAdjustment::Auto`] or [`LeadingAdjustment::ToCased`]
153
    /// is set. Auto mode is not able to pick the "adjust to letter/number/symbol" behavior as this type does not load
154
    /// the data to do so, use [`TitlecaseMapper`] if such behavior is desired. See
155
    /// the docs of [`TitlecaseMapper`] for more information on what this means. There is no difference between
156
    /// the behavior of this function and the equivalent ones on [`TitlecaseMapper`] when the head adjustment mode
157
    /// is [`LeadingAdjustment::None`].
158
    ///
159
    /// See [`Self::titlecase_segment_with_only_case_data_to_string()`] for the equivalent convenience function that returns a String,
160
    /// as well as for an example.
161
    ///
162
    /// [`TitlecaseMapper`]: crate::TitlecaseMapper
163
0
    pub fn titlecase_segment_with_only_case_data<'a>(
164
0
        &'a self,
165
0
        src: &'a str,
166
0
        langid: &LanguageIdentifier,
167
0
        options: TitlecaseOptions,
168
0
    ) -> impl Writeable + 'a {
169
0
        self.titlecase_segment_with_adjustment(src, langid, options, |data, ch| data.is_cased(ch))
170
0
    }
171
172
    /// Helper to support different leading adjustment behaviors,
173
    /// `char_is_lead` is a function that returns true for a character that is allowed to be the
174
    /// first relevant character in a titlecasing string, when `leading_adjustment != None`
175
    ///
176
    /// We return a concrete type instead of `impl Trait` so the return value can be mixed with that of other calls
177
    /// to this function with different closures
178
0
    pub(crate) fn titlecase_segment_with_adjustment<'a>(
179
0
        &'a self,
180
0
        src: &'a str,
181
0
        langid: &LanguageIdentifier,
182
0
        options: TitlecaseOptions,
183
0
        char_is_lead: impl Fn(&CaseMapV1, char) -> bool,
184
0
    ) -> StringAndWriteable<FullCaseWriteable<'a, true>> {
185
0
        let data = self.data.get();
186
0
        let (head, rest) = match options.leading_adjustment {
187
            LeadingAdjustment::Auto | LeadingAdjustment::ToCased => {
188
0
                let first_cased = src.char_indices().find(|(_i, ch)| char_is_lead(data, *ch));
Unexecuted instantiation: <icu_casemap::casemapper::CaseMapper>::titlecase_segment_with_adjustment::<<icu_casemap::titlecase::TitlecaseMapper<icu_casemap::casemapper::CaseMapper>>::titlecase_segment::{closure#0}>::{closure#0}
Unexecuted instantiation: <icu_casemap::casemapper::CaseMapper>::titlecase_segment_with_adjustment::<<icu_casemap::titlecase::TitlecaseMapper<icu_casemap::casemapper::CaseMapper>>::titlecase_segment::{closure#1}>::{closure#0}
Unexecuted instantiation: <icu_casemap::casemapper::CaseMapper>::titlecase_segment_with_adjustment::<<icu_casemap::casemapper::CaseMapper>::titlecase_segment_with_only_case_data::{closure#0}>::{closure#0}
189
0
                if let Some((first_cased, _ch)) = first_cased {
190
0
                    (
191
0
                        src.get(..first_cased).unwrap_or(""),
192
0
                        src.get(first_cased..).unwrap_or(""),
193
0
                    )
194
                } else {
195
0
                    (src, "")
196
                }
197
            }
198
0
            LeadingAdjustment::None => ("", src),
199
        };
200
0
        let writeable = data.full_helper_writeable::<true>(
201
0
            rest,
202
0
            CaseMapLocale::from_langid(langid),
203
0
            MappingKind::Title,
204
0
            options.trailing_case,
205
0
        );
206
0
        StringAndWriteable {
207
0
            string: head,
208
0
            writeable,
209
0
        }
210
0
    }
Unexecuted instantiation: <icu_casemap::casemapper::CaseMapper>::titlecase_segment_with_adjustment::<<icu_casemap::titlecase::TitlecaseMapper<icu_casemap::casemapper::CaseMapper>>::titlecase_segment::{closure#0}>
Unexecuted instantiation: <icu_casemap::casemapper::CaseMapper>::titlecase_segment_with_adjustment::<<icu_casemap::titlecase::TitlecaseMapper<icu_casemap::casemapper::CaseMapper>>::titlecase_segment::{closure#1}>
Unexecuted instantiation: <icu_casemap::casemapper::CaseMapper>::titlecase_segment_with_adjustment::<<icu_casemap::casemapper::CaseMapper>::titlecase_segment_with_only_case_data::{closure#0}>
211
    /// Case-folds the characters in the given string as a [`Writeable`].
212
    /// This function is locale-independent and context-insensitive.
213
    ///
214
    /// Can be used to test if two strings are case-insensitively equivalent.
215
    ///
216
    /// See [`Self::fold_string()`] for the equivalent convenience function that returns a String,
217
    /// as well as for an example.
218
0
    pub fn fold<'a>(&'a self, src: &'a str) -> impl Writeable + 'a {
219
0
        self.data.get().full_helper_writeable::<false>(
220
0
            src,
221
0
            CaseMapLocale::Root,
222
0
            MappingKind::Fold,
223
0
            TrailingCase::default(),
224
0
        )
225
0
    }
226
227
    /// Case-folds the characters in the given string as a [`Writeable`],
228
    /// using Turkic (T) mappings for dotted/dotless I.
229
    /// This function is locale-independent and context-insensitive.
230
    ///
231
    /// Can be used to test if two strings are case-insensitively equivalent.
232
    ///
233
    /// See [`Self::fold_turkic_string()`] for the equivalent convenience function that returns a String,
234
    /// as well as for an example.
235
0
    pub fn fold_turkic<'a>(&'a self, src: &'a str) -> impl Writeable + 'a {
236
0
        self.data.get().full_helper_writeable::<false>(
237
0
            src,
238
0
            CaseMapLocale::Turkish,
239
0
            MappingKind::Fold,
240
0
            TrailingCase::default(),
241
0
        )
242
0
    }
243
244
    /// Returns the full lowercase mapping of the given string as a String.
245
    ///
246
    /// This function is context and language sensitive. Callers should pass the text's language
247
    /// as a `LanguageIdentifier` (usually the `id` field of the `Locale`) if available, or
248
    /// `Default::default()` for the root locale.
249
    ///
250
    /// See [`Self::lowercase()`] for the equivalent lower-level function that returns a [`Writeable`]
251
    ///
252
    /// # Examples
253
    ///
254
    /// ```rust
255
    /// use icu::casemap::CaseMapper;
256
    /// use icu::locid::langid;
257
    ///
258
    /// let cm = CaseMapper::new();
259
    /// let root = langid!("und");
260
    ///
261
    /// assert_eq!(cm.lowercase_to_string("hEllO WorLd", &root), "hello world");
262
    /// assert_eq!(cm.lowercase_to_string("Γειά σου Κόσμε", &root), "γειά σου κόσμε");
263
    /// assert_eq!(cm.lowercase_to_string("नमस्ते दुनिया", &root), "नमस्ते दुनिया");
264
    /// assert_eq!(cm.lowercase_to_string("Привет мир", &root), "привет мир");
265
    ///
266
    /// // Some behavior is language-sensitive
267
    /// assert_eq!(cm.lowercase_to_string("CONSTANTINOPLE", &root), "constantinople");
268
    /// assert_eq!(cm.lowercase_to_string("CONSTANTINOPLE", &langid!("tr")), "constantınople");
269
    /// ```
270
0
    pub fn lowercase_to_string(&self, src: &str, langid: &LanguageIdentifier) -> String {
271
0
        self.lowercase(src, langid).write_to_string().into_owned()
272
0
    }
273
274
    /// Returns the full uppercase mapping of the given string as a String.
275
    ///
276
    /// This function is context and language sensitive. Callers should pass the text's language
277
    /// as a `LanguageIdentifier` (usually the `id` field of the `Locale`) if available, or
278
    /// `Default::default()` for the root locale.
279
    ///
280
    /// See [`Self::uppercase()`] for the equivalent lower-level function that returns a [`Writeable`]
281
    ///
282
    /// # Examples
283
    ///
284
    /// ```rust
285
    /// use icu::casemap::CaseMapper;
286
    /// use icu::locid::langid;
287
    ///
288
    /// let cm = CaseMapper::new();
289
    /// let root = langid!("und");
290
    ///
291
    /// assert_eq!(cm.uppercase_to_string("hEllO WorLd", &root), "HELLO WORLD");
292
    /// assert_eq!(cm.uppercase_to_string("Γειά σου Κόσμε", &root), "ΓΕΙΆ ΣΟΥ ΚΌΣΜΕ");
293
    /// assert_eq!(cm.uppercase_to_string("नमस्ते दुनिया", &root), "नमस्ते दुनिया");
294
    /// assert_eq!(cm.uppercase_to_string("Привет мир", &root), "ПРИВЕТ МИР");
295
    ///
296
    /// // Some behavior is language-sensitive
297
    /// assert_eq!(cm.uppercase_to_string("istanbul", &root), "ISTANBUL");
298
    /// assert_eq!(cm.uppercase_to_string("istanbul", &langid!("tr")), "İSTANBUL"); // Turkish dotted i
299
    ///
300
    /// assert_eq!(cm.uppercase_to_string("և Երևանի", &root), "ԵՒ ԵՐԵՒԱՆԻ");
301
    /// assert_eq!(cm.uppercase_to_string("և Երևանի", &langid!("hy")), "ԵՎ ԵՐԵՎԱՆԻ"); // Eastern Armenian ech-yiwn ligature
302
    /// ```
303
0
    pub fn uppercase_to_string(&self, src: &str, langid: &LanguageIdentifier) -> String {
304
0
        self.uppercase(src, langid).write_to_string().into_owned()
305
0
    }
306
307
    /// Returns the full titlecase mapping of the given string as a [`Writeable`], treating
308
    /// the string as a single segment (and thus only titlecasing the beginning of it). Performs
309
    /// the specified leading adjustment behavior from the options without loading additional data.
310
    ///
311
    /// Note that [`TitlecaseMapper`] has better behavior, most users should consider using
312
    /// it instead. This method primarily exists for people who care about the amount of data being loaded.
313
    ///
314
    /// This should typically be used as a lower-level helper to construct the titlecasing operation desired
315
    /// by the application, for example one can titlecase on a per-word basis by mixing this with
316
    /// a `WordSegmenter`.
317
    ///
318
    /// This function is context and language sensitive. Callers should pass the text's language
319
    /// as a `LanguageIdentifier` (usually the `id` field of the `Locale`) if available, or
320
    /// `Default::default()` for the root locale.
321
    ///
322
    /// This function performs "adjust to cased" leading adjustment behavior when [`LeadingAdjustment::Auto`] or [`LeadingAdjustment::ToCased`]
323
    /// is set. Auto mode is not able to pick the "adjust to letter/number/symbol" behavior as this type does not load
324
    /// the data to do so, use [`TitlecaseMapper`] if such behavior is desired. See
325
    /// the docs of [`TitlecaseMapper`] for more information on what this means. There is no difference between
326
    /// the behavior of this function and the equivalent ones on [`TitlecaseMapper`] when the head adjustment mode
327
    /// is [`LeadingAdjustment::None`].
328
    ///
329
    /// See [`Self::titlecase_segment_with_only_case_data()`] for the equivalent lower-level function that returns a [`Writeable`]
330
    ///
331
    /// # Examples
332
    ///
333
    /// ```rust
334
    /// use icu::casemap::CaseMapper;
335
    /// use icu::locid::langid;
336
    ///
337
    /// let cm = CaseMapper::new();
338
    /// let root = langid!("und");
339
    ///
340
    /// let default_options = Default::default();
341
    ///
342
    /// // note that the subsequent words are not titlecased, this function assumes
343
    /// // that the entire string is a single segment and only titlecases at the beginning.
344
    /// assert_eq!(cm.titlecase_segment_with_only_case_data_to_string("hEllO WorLd", &root, default_options), "Hello world");
345
    /// assert_eq!(cm.titlecase_segment_with_only_case_data_to_string("Γειά σου Κόσμε", &root, default_options), "Γειά σου κόσμε");
346
    /// assert_eq!(cm.titlecase_segment_with_only_case_data_to_string("नमस्ते दुनिया", &root, default_options), "नमस्ते दुनिया");
347
    /// assert_eq!(cm.titlecase_segment_with_only_case_data_to_string("Привет мир", &root, default_options), "Привет мир");
348
    ///
349
    /// // Some behavior is language-sensitive
350
    /// assert_eq!(cm.titlecase_segment_with_only_case_data_to_string("istanbul", &root, default_options), "Istanbul");
351
    /// assert_eq!(cm.titlecase_segment_with_only_case_data_to_string("istanbul", &langid!("tr"), default_options), "İstanbul"); // Turkish dotted i
352
    ///
353
    /// assert_eq!(cm.titlecase_segment_with_only_case_data_to_string("և Երևանի", &root, default_options), "Եւ երևանի");
354
    /// assert_eq!(cm.titlecase_segment_with_only_case_data_to_string("և Երևանի", &langid!("hy"), default_options), "Եվ երևանի"); // Eastern Armenian ech-yiwn ligature
355
    ///
356
    /// assert_eq!(cm.titlecase_segment_with_only_case_data_to_string("ijkdijk", &root, default_options), "Ijkdijk");
357
    /// assert_eq!(cm.titlecase_segment_with_only_case_data_to_string("ijkdijk", &langid!("nl"), default_options), "IJkdijk"); // Dutch IJ digraph
358
    /// ```
359
    ///
360
    /// [`TitlecaseMapper`]: crate::TitlecaseMapper
361
0
    pub fn titlecase_segment_with_only_case_data_to_string(
362
0
        &self,
363
0
        src: &str,
364
0
        langid: &LanguageIdentifier,
365
0
        options: TitlecaseOptions,
366
0
    ) -> String {
367
0
        self.titlecase_segment_with_only_case_data(src, langid, options)
368
0
            .write_to_string()
369
0
            .into_owned()
370
0
    }
371
372
    /// Case-folds the characters in the given string as a String.
373
    /// This function is locale-independent and context-insensitive.
374
    ///
375
    /// Can be used to test if two strings are case-insensitively equivalent.
376
    ///
377
    /// See [`Self::fold()`] for the equivalent lower-level function that returns a [`Writeable`]
378
    ///s s
379
    /// # Examples
380
    ///
381
    /// ```rust
382
    /// use icu::casemap::CaseMapper;
383
    ///
384
    /// let cm = CaseMapper::new();
385
    ///
386
    /// // Check if two strings are equivalent case insensitively
387
    /// assert_eq!(cm.fold_string("hEllO WorLd"), cm.fold_string("HELLO worlD"));
388
    ///
389
    /// assert_eq!(cm.fold_string("hEllO WorLd"), "hello world");
390
    /// assert_eq!(cm.fold_string("Γειά σου Κόσμε"), "γειά σου κόσμε");
391
    /// assert_eq!(cm.fold_string("नमस्ते दुनिया"), "नमस्ते दुनिया");
392
    /// assert_eq!(cm.fold_string("Привет мир"), "привет мир");
393
    /// ```
394
0
    pub fn fold_string(&self, src: &str) -> String {
395
0
        self.fold(src).write_to_string().into_owned()
396
0
    }
397
398
    /// Case-folds the characters in the given string as a String,
399
    /// using Turkic (T) mappings for dotted/dotless I.
400
    /// This function is locale-independent and context-insensitive.
401
    ///
402
    /// Can be used to test if two strings are case-insensitively equivalent.
403
    ///
404
    /// See [`Self::fold_turkic()`] for the equivalent lower-level function that returns a [`Writeable`]
405
    ///
406
    /// # Examples
407
    ///
408
    /// ```rust
409
    /// use icu::casemap::CaseMapper;
410
    ///
411
    /// let cm = CaseMapper::new();
412
    ///
413
    /// // Check if two strings are equivalent case insensitively
414
    /// assert_eq!(cm.fold_turkic_string("İstanbul"), cm.fold_turkic_string("iSTANBUL"));
415
    ///
416
    /// assert_eq!(cm.fold_turkic_string("İstanbul not Constantinople"), "istanbul not constantinople");
417
    /// assert_eq!(cm.fold_turkic_string("Istanbul not Constantınople"), "ıstanbul not constantınople");
418
    ///
419
    /// assert_eq!(cm.fold_turkic_string("hEllO WorLd"), "hello world");
420
    /// assert_eq!(cm.fold_turkic_string("Γειά σου Κόσμε"), "γειά σου κόσμε");
421
    /// assert_eq!(cm.fold_turkic_string("नमस्ते दुनिया"), "नमस्ते दुनिया");
422
    /// assert_eq!(cm.fold_turkic_string("Привет мир"), "привет мир");
423
    /// ```
424
0
    pub fn fold_turkic_string(&self, src: &str) -> String {
425
0
        self.fold_turkic(src).write_to_string().into_owned()
426
0
    }
427
428
    /// Adds all simple case mappings and the full case folding for `c` to `set`.
429
    /// Also adds special case closure mappings.
430
    ///
431
    /// Identical to [`CaseMapCloser::add_case_closure_to()`], see docs there for more information.
432
    /// This method is duplicated so that one does not need to load extra unfold data
433
    /// if they only need this and not also [`CaseMapCloser::add_string_case_closure_to()`].
434
    ///
435
    ///
436
    /// # Examples
437
    ///
438
    /// ```rust
439
    /// use icu::casemap::CaseMapper;
440
    /// use icu::collections::codepointinvlist::CodePointInversionListBuilder;
441
    ///
442
    /// let cm = CaseMapper::new();
443
    /// let mut builder = CodePointInversionListBuilder::new();
444
    /// cm.add_case_closure_to('s', &mut builder);
445
    ///
446
    /// let set = builder.build();
447
    ///
448
    /// assert!(set.contains('S'));
449
    /// assert!(set.contains('ſ'));
450
    /// assert!(!set.contains('s')); // does not contain itself
451
    /// ```
452
    ///
453
    /// [`CaseMapCloser::add_case_closure_to()`]: crate::CaseMapCloser::add_case_closure_to
454
    /// [`CaseMapCloser::add_string_case_closure_to()`]: crate::CaseMapCloser::add_string_case_closure_to
455
0
    pub fn add_case_closure_to<S: ClosureSink>(&self, c: char, set: &mut S) {
456
0
        self.data.get().add_case_closure_to(c, set);
457
0
    }
Unexecuted instantiation: <icu_casemap::casemapper::CaseMapper>::add_case_closure_to::<icu_collections::codepointinvlist::builder::CodePointInversionListBuilder>
Unexecuted instantiation: <icu_casemap::casemapper::CaseMapper>::add_case_closure_to::<_>
458
459
    /// Returns the lowercase mapping of the given `char`.
460
    /// This function only implements simple and common mappings. Full mappings,
461
    /// which can map one `char` to a string, are not included.
462
    /// For full mappings, use [`CaseMapper::lowercase`].
463
    ///
464
    /// # Examples
465
    ///
466
    /// ```rust
467
    /// use icu::casemap::CaseMapper;
468
    ///
469
    /// let cm = CaseMapper::new();
470
    ///
471
    /// assert_eq!(cm.simple_lowercase('C'), 'c');
472
    /// assert_eq!(cm.simple_lowercase('c'), 'c');
473
    /// assert_eq!(cm.simple_lowercase('Ć'), 'ć');
474
    /// assert_eq!(cm.simple_lowercase('Γ'), 'γ');
475
    /// ```
476
0
    pub fn simple_lowercase(&self, c: char) -> char {
477
0
        self.data.get().simple_lower(c)
478
0
    }
479
480
    /// Returns the uppercase mapping of the given `char`.
481
    /// This function only implements simple and common mappings. Full mappings,
482
    /// which can map one `char` to a string, are not included.
483
    /// For full mappings, use [`CaseMapper::uppercase`].
484
    ///
485
    /// # Examples
486
    ///
487
    /// ```rust
488
    /// use icu::casemap::CaseMapper;
489
    ///
490
    /// let cm = CaseMapper::new();
491
    ///
492
    /// assert_eq!(cm.simple_uppercase('c'), 'C');
493
    /// assert_eq!(cm.simple_uppercase('C'), 'C');
494
    /// assert_eq!(cm.simple_uppercase('ć'), 'Ć');
495
    /// assert_eq!(cm.simple_uppercase('γ'), 'Γ');
496
    ///
497
    /// assert_eq!(cm.simple_uppercase('dz'), 'DZ');
498
    /// ```
499
0
    pub fn simple_uppercase(&self, c: char) -> char {
500
0
        self.data.get().simple_upper(c)
501
0
    }
502
503
    /// Returns the titlecase mapping of the given `char`.
504
    /// This function only implements simple and common mappings. Full mappings,
505
    /// which can map one `char` to a string, are not included.
506
    ///
507
    /// # Examples
508
    ///
509
    /// ```rust
510
    /// use icu::casemap::CaseMapper;
511
    ///
512
    /// let cm = CaseMapper::new();
513
    ///
514
    /// assert_eq!(cm.simple_titlecase('dz'), 'Dz');
515
    ///
516
    /// assert_eq!(cm.simple_titlecase('c'), 'C');
517
    /// assert_eq!(cm.simple_titlecase('C'), 'C');
518
    /// assert_eq!(cm.simple_titlecase('ć'), 'Ć');
519
    /// assert_eq!(cm.simple_titlecase('γ'), 'Γ');
520
    /// ```
521
0
    pub fn simple_titlecase(&self, c: char) -> char {
522
0
        self.data.get().simple_title(c)
523
0
    }
524
525
    /// Returns the simple case folding of the given char.
526
    /// For full mappings, use [`CaseMapper::fold`].
527
    ///
528
    /// This function can be used to perform caseless matches on
529
    /// individual characters.
530
    /// > *Note:* With Unicode 15.0 data, there are three
531
    /// > pairs of characters for which equivalence under this
532
    /// > function is inconsistent with equivalence of the
533
    /// > one-character strings under [`CaseMapper::fold`].
534
    /// > This is resolved in Unicode 15.1 and later.
535
    ///
536
    /// For compatibility applications where simple case folding
537
    /// of strings is required, this function can be applied to
538
    /// each character of a string.  Note that the resulting
539
    /// equivalence relation is different from that obtained
540
    /// by [`CaseMapper::fold`]:
541
    /// The strings "Straße" and "STRASSE" are distinct
542
    /// under simple case folding, but are equivalent under
543
    /// default (full) case folding.
544
    ///
545
    /// # Examples
546
    ///
547
    /// ```rust
548
    /// use icu::casemap::CaseMapper;
549
    ///
550
    /// let cm = CaseMapper::new();
551
    ///
552
    /// // perform case insensitive checks
553
    /// assert_eq!(cm.simple_fold('σ'), cm.simple_fold('ς'));
554
    /// assert_eq!(cm.simple_fold('Σ'), cm.simple_fold('ς'));
555
    ///
556
    /// assert_eq!(cm.simple_fold('c'), 'c');
557
    /// assert_eq!(cm.simple_fold('Ć'), 'ć');
558
    /// assert_eq!(cm.simple_fold('Γ'), 'γ');
559
    /// assert_eq!(cm.simple_fold('ς'), 'σ');
560
    ///
561
    /// assert_eq!(cm.simple_fold('ß'), 'ß');
562
    /// assert_eq!(cm.simple_fold('I'), 'i');
563
    /// assert_eq!(cm.simple_fold('İ'), 'İ');
564
    /// assert_eq!(cm.simple_fold('ı'), 'ı');
565
    /// ```
566
0
    pub fn simple_fold(&self, c: char) -> char {
567
0
        self.data.get().simple_fold(c, FoldOptions::default())
568
0
    }
569
570
    /// Returns the simple case folding of the given char, using Turkic (T) mappings for
571
    /// dotted/dotless i. This function does not fold `i` and `I` to the same character. Instead,
572
    /// `I` will fold to `ı`, and `İ` will fold to `i`. Otherwise, this is the same as
573
    /// [`CaseMapper::fold()`].
574
    ///
575
    /// You can use the case folding to perform Turkic caseless matches on characters
576
    /// provided they don't full-casefold to strings. To avoid that situation,
577
    /// convert to a string and use [`CaseMapper::fold_turkic`].
578
    ///
579
    ///
580
    /// # Examples
581
    ///
582
    /// ```rust
583
    /// use icu::casemap::CaseMapper;
584
    ///
585
    /// let cm = CaseMapper::new();
586
    ///
587
    /// assert_eq!(cm.simple_fold_turkic('I'), 'ı');
588
    /// assert_eq!(cm.simple_fold_turkic('İ'), 'i');
589
    /// ```
590
0
    pub fn simple_fold_turkic(&self, c: char) -> char {
591
0
        self.data
592
0
            .get()
593
0
            .simple_fold(c, FoldOptions::with_turkic_mappings())
594
0
    }
595
}
596
597
#[cfg(test)]
598
mod tests {
599
    use super::*;
600
    use icu_locid::langid;
601
602
    #[test]
603
    /// Tests for SpecialCasing.txt. Some of the special cases are data-driven, some are code-driven
604
    fn test_special_cases() {
605
        let cm = CaseMapper::new();
606
        let root = langid!("und");
607
        let default_options = Default::default();
608
609
        // Ligatures
610
611
        // U+FB00 LATIN SMALL LIGATURE FF
612
        assert_eq!(cm.uppercase_to_string("ff", &root), "FF");
613
        // U+FB05 LATIN SMALL LIGATURE LONG S T
614
        assert_eq!(cm.uppercase_to_string("ſt", &root), "ST");
615
616
        // No corresponding uppercased character
617
618
        // U+0149 LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
619
        assert_eq!(cm.uppercase_to_string("ʼn", &root), "ʼN");
620
621
        // U+1F50 GREEK SMALL LETTER UPSILON WITH PSILI
622
        assert_eq!(cm.uppercase_to_string("ὐ", &root), "Υ̓");
623
        // U+1FF6 GREEK SMALL LETTER OMEGA WITH PERISPOMENI
624
        assert_eq!(cm.uppercase_to_string("ῶ", &root), "Ω͂");
625
626
        // YPOGEGRAMMENI / PROSGEGRAMMENI special cases
627
628
        // E.g. <alpha><iota_subscript><acute> is uppercased to <ALPHA><acute><IOTA>
629
        assert_eq!(
630
            cm.uppercase_to_string("α\u{0313}\u{0345}", &root),
631
            "Α\u{0313}Ι"
632
        );
633
        // but the YPOGEGRAMMENI should not titlecase
634
        assert_eq!(
635
            cm.titlecase_segment_with_only_case_data_to_string(
636
                "α\u{0313}\u{0345}",
637
                &root,
638
                default_options
639
            ),
640
            "Α\u{0313}\u{0345}"
641
        );
642
643
        // U+1F80 GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
644
        assert_eq!(
645
            cm.titlecase_segment_with_only_case_data_to_string("ᾀ", &root, default_options),
646
            "ᾈ"
647
        );
648
        assert_eq!(cm.uppercase_to_string("ᾀ", &root), "ἈΙ");
649
650
        // U+1FFC GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
651
        assert_eq!(cm.lowercase_to_string("ῼ", &root), "ῳ");
652
        assert_eq!(
653
            cm.titlecase_segment_with_only_case_data_to_string("ῼ", &root, default_options),
654
            "ῼ"
655
        );
656
        assert_eq!(cm.uppercase_to_string("ῼ", &root), "ΩΙ");
657
658
        // U+1F98 GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
659
        assert_eq!(cm.lowercase_to_string("ᾘ", &root), "ᾐ");
660
        assert_eq!(
661
            cm.titlecase_segment_with_only_case_data_to_string("ᾘ", &root, default_options),
662
            "ᾘ"
663
        );
664
        assert_eq!(cm.uppercase_to_string("ᾘ", &root), "ἨΙ");
665
666
        // U+1FB2 GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
667
        assert_eq!(cm.lowercase_to_string("ᾲ", &root), "ᾲ");
668
        assert_eq!(
669
            cm.titlecase_segment_with_only_case_data_to_string("ᾲ", &root, default_options),
670
            "Ὰ\u{345}"
671
        );
672
        assert_eq!(cm.uppercase_to_string("ᾲ", &root), "ᾺΙ");
673
674
        // Final sigma test
675
        // U+03A3 GREEK CAPITAL LETTER SIGMA in Final_Sigma context
676
        assert_eq!(cm.lowercase_to_string("ΙΙΙΣ", &root), "ιιις");
677
678
        // Turkish / Azeri
679
        let tr = langid!("tr");
680
        let az = langid!("az");
681
        // U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
682
        assert_eq!(cm.lowercase_to_string("İ", &tr), "i");
683
        assert_eq!(cm.lowercase_to_string("İ", &az), "i");
684
        assert_eq!(
685
            cm.titlecase_segment_with_only_case_data_to_string("İ", &tr, default_options),
686
            "İ"
687
        );
688
        assert_eq!(
689
            cm.titlecase_segment_with_only_case_data_to_string("İ", &az, default_options),
690
            "İ"
691
        );
692
        assert_eq!(cm.uppercase_to_string("İ", &tr), "İ");
693
        assert_eq!(cm.uppercase_to_string("İ", &az), "İ");
694
695
        // U+0049 LATIN CAPITAL LETTER I and U+0307 COMBINING DOT ABOVE
696
        assert_eq!(cm.lowercase_to_string("I\u{0307}", &tr), "i");
697
        assert_eq!(cm.lowercase_to_string("I\u{0307}", &az), "i");
698
        assert_eq!(
699
            cm.titlecase_segment_with_only_case_data_to_string("I\u{0307}", &tr, default_options),
700
            "I\u{0307}"
701
        );
702
        assert_eq!(
703
            cm.titlecase_segment_with_only_case_data_to_string("I\u{0307}", &az, default_options),
704
            "I\u{0307}"
705
        );
706
        assert_eq!(cm.uppercase_to_string("I\u{0307}", &tr), "I\u{0307}");
707
        assert_eq!(cm.uppercase_to_string("I\u{0307}", &az), "I\u{0307}");
708
709
        // U+0049 LATIN CAPITAL LETTER I
710
        assert_eq!(cm.lowercase_to_string("I", &tr), "ı");
711
        assert_eq!(cm.lowercase_to_string("I", &az), "ı");
712
        assert_eq!(
713
            cm.titlecase_segment_with_only_case_data_to_string("I", &tr, default_options),
714
            "I"
715
        );
716
        assert_eq!(
717
            cm.titlecase_segment_with_only_case_data_to_string("I", &az, default_options),
718
            "I"
719
        );
720
        assert_eq!(cm.uppercase_to_string("I", &tr), "I");
721
        assert_eq!(cm.uppercase_to_string("I", &az), "I");
722
723
        // U+0069 LATIN SMALL LETTER I
724
        assert_eq!(cm.lowercase_to_string("i", &tr), "i");
725
        assert_eq!(cm.lowercase_to_string("i", &az), "i");
726
        assert_eq!(
727
            cm.titlecase_segment_with_only_case_data_to_string("i", &tr, default_options),
728
            "İ"
729
        );
730
        assert_eq!(
731
            cm.titlecase_segment_with_only_case_data_to_string("i", &az, default_options),
732
            "İ"
733
        );
734
        assert_eq!(cm.uppercase_to_string("i", &tr), "İ");
735
        assert_eq!(cm.uppercase_to_string("i", &az), "İ");
736
    }
737
738
    #[test]
739
    fn test_cherokee_case_folding() {
740
        let case_mapping = CaseMapper::new();
741
        assert_eq!(case_mapping.simple_fold('Ꭰ'), 'Ꭰ');
742
        assert_eq!(case_mapping.simple_fold('ꭰ'), 'Ꭰ');
743
        assert_eq!(case_mapping.simple_fold_turkic('Ꭰ'), 'Ꭰ');
744
        assert_eq!(case_mapping.simple_fold_turkic('ꭰ'), 'Ꭰ');
745
        assert_eq!(case_mapping.fold_string("Ꭰ"), "Ꭰ");
746
        assert_eq!(case_mapping.fold_string("ꭰ"), "Ꭰ");
747
        assert_eq!(case_mapping.fold_turkic_string("Ꭰ"), "Ꭰ");
748
        assert_eq!(case_mapping.fold_turkic_string("ꭰ"), "Ꭰ");
749
    }
750
}