Coverage Report

Created: 2025-06-16 06:50

/rust/registry/src/index.crates.io-6f17d22bba15001f/icu_casemap-1.5.1/src/titlecase.rs
Line
Count
Source (jump to first uncovered line)
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
//! Titlecasing-specific try_new_with_mapper_unstable
6
use crate::provider::CaseMapV1Marker;
7
use crate::CaseMapper;
8
use alloc::string::String;
9
use icu_locid::LanguageIdentifier;
10
use icu_properties::maps::CodePointMapData;
11
use icu_properties::provider::GeneralCategoryV1Marker;
12
use icu_properties::{GeneralCategory, GeneralCategoryGroup, PropertiesError};
13
use icu_provider::prelude::*;
14
use writeable::Writeable;
15
16
/// How to handle the rest of the string once the beginning of the
17
/// string has been titlecased.
18
///
19
/// # Examples
20
///
21
/// ```rust
22
/// use icu::casemap::titlecase::{TitlecaseOptions, TrailingCase};
23
/// use icu::casemap::TitlecaseMapper;
24
/// use icu::locid::langid;
25
///
26
/// let cm = TitlecaseMapper::new();
27
/// let root = langid!("und");
28
///
29
/// let default_options = Default::default();
30
/// let mut preserve_case: TitlecaseOptions = Default::default();
31
/// preserve_case.trailing_case = TrailingCase::Unchanged;
32
///
33
/// // Exhibits trailing case when set:
34
/// assert_eq!(
35
///     cm.titlecase_segment_to_string("spOngeBoB", &root, default_options),
36
///     "Spongebob"
37
/// );
38
/// assert_eq!(
39
///     cm.titlecase_segment_to_string("spOngeBoB", &root, preserve_case),
40
///     "SpOngeBoB"
41
/// );
42
/// ```
43
#[non_exhaustive]
44
#[derive(Copy, Clone, Default, PartialEq, Eq, Hash, Debug)]
45
pub enum TrailingCase {
46
    /// Preserve the casing of the rest of the string ("spoNgEBoB" -> "SpoNgEBoB")
47
    Unchanged,
48
    /// Lowercase the rest of the string ("spoNgEBoB" -> "Spongebob")
49
    #[default]
50
    Lower,
51
}
52
53
/// Where to start casing the string.
54
///
55
/// [`TitlecaseMapper`] by default performs "leading adjustment", where it searches for the first "relevant" character
56
/// in the string before initializing the actual titlecasing. For example, it will skip punctuation at the beginning
57
/// of a string, allowing for strings like `'twas` or `«hello»` to be appropriately titlecased.
58
///
59
/// Opinions on exactly what is a "relevant" character may differ. In "adjust to cased" mode the first cased character is considered "relevant",
60
/// whereas in the "auto" mode, it is the first character that is a letter, number, symbol, or private use character. This means
61
/// that the strings `49ers` and `«丰(abc)»` will titlecase in "adjust to cased" mode to `49Ers` and `«丰(Abc)»`, whereas in the "auto" mode they stay unchanged.
62
/// This difference largely matters for things that mix numbers and letters, or mix writing systems, within a single segment.
63
///
64
/// # Examples
65
///
66
/// ```rust
67
/// use icu::casemap::titlecase::{LeadingAdjustment, TitlecaseOptions};
68
/// use icu::casemap::TitlecaseMapper;
69
/// use icu::locid::langid;
70
///
71
/// let cm = TitlecaseMapper::new();
72
/// let root = langid!("und");
73
///
74
/// let default_options = Default::default(); // head adjustment set to Auto
75
/// let mut no_adjust: TitlecaseOptions = Default::default();
76
/// let mut adjust_to_cased: TitlecaseOptions = Default::default();
77
/// no_adjust.leading_adjustment = LeadingAdjustment::None;
78
/// adjust_to_cased.leading_adjustment = LeadingAdjustment::ToCased;
79
///
80
/// // Exhibits leading adjustment when set:
81
/// assert_eq!(
82
///     cm.titlecase_segment_to_string("«hello»", &root, default_options),
83
///     "«Hello»"
84
/// );
85
/// assert_eq!(
86
///     cm.titlecase_segment_to_string("«hello»", &root, adjust_to_cased),
87
///     "«Hello»"
88
/// );
89
/// assert_eq!(
90
///     cm.titlecase_segment_to_string("«hello»", &root, no_adjust),
91
///     "«hello»"
92
/// );
93
///
94
/// // Only changed in adjust-to-cased mode:
95
/// assert_eq!(
96
///     cm.titlecase_segment_to_string("丰(abc)", &root, default_options),
97
///     "丰(abc)"
98
/// );
99
/// assert_eq!(
100
///     cm.titlecase_segment_to_string("丰(abc)", &root, adjust_to_cased),
101
///     "丰(Abc)"
102
/// );
103
/// assert_eq!(
104
///     cm.titlecase_segment_to_string("丰(abc)", &root, no_adjust),
105
///     "丰(abc)"
106
/// );
107
///
108
/// // Only changed in adjust-to-cased mode:
109
/// assert_eq!(
110
///     cm.titlecase_segment_to_string("49ers", &root, default_options),
111
///     "49ers"
112
/// );
113
/// assert_eq!(
114
///     cm.titlecase_segment_to_string("49ers", &root, adjust_to_cased),
115
///     "49Ers"
116
/// );
117
/// assert_eq!(
118
///     cm.titlecase_segment_to_string("49ers", &root, no_adjust),
119
///     "49ers"
120
/// );
121
/// ```
122
#[non_exhaustive]
123
#[derive(Copy, Clone, Default, PartialEq, Eq, Hash, Debug)]
124
pub enum LeadingAdjustment {
125
    /// Start titlecasing immediately, even if the character is not one that is relevant for casing
126
    /// ("'twixt" -> "'twixt", "twixt" -> "Twixt")
127
    None,
128
    /// Adjust the string to the first relevant character before beginning to apply casing
129
    /// ("'twixt" -> "'Twixt"). "Relevant" character is picked by best available algorithm,
130
    /// by default will adjust to first letter, number, symbol, or private use character,
131
    /// but if no data is available (e.g. this API is being called via [`CaseMapper::titlecase_segment_with_only_case_data()`]),
132
    /// then may be equivalent to "adjust to cased".
133
    ///
134
    /// This is the default
135
    #[default]
136
    Auto,
137
    /// Adjust the string to the first cased character before beginning to apply casing
138
    /// ("'twixt" -> "'Twixt")
139
    ToCased,
140
}
141
142
/// Various options for controlling titlecasing
143
///
144
/// See docs of [`TitlecaseMapper`] for examples.
145
#[non_exhaustive]
146
#[derive(Copy, Clone, Default, PartialEq, Eq, Hash, Debug)]
147
pub struct TitlecaseOptions {
148
    /// How to handle the rest of the string once the head of the
149
    /// string has been titlecased
150
    pub trailing_case: TrailingCase,
151
    /// Whether to start casing at the beginning of the string or at the first
152
    /// relevant character.
153
    pub leading_adjustment: LeadingAdjustment,
154
}
155
156
/// A wrapper around [`CaseMapper`] that can compute titlecasing stuff, and is able to load additional data
157
/// to support the non-legacy "head adjustment" behavior.
158
///
159
///
160
/// By default, [`Self::titlecase_segment()`] and [`Self::titlecase_segment_to_string()`] perform "leading adjustment",
161
/// where they wait till the first relevant character to begin titlecasing. For example, in the string `'twixt`, the apostrophe
162
/// is ignored because the word starts at the first "t", which will get titlecased (producing `'Twixt`). Other punctuation will
163
/// also be ignored, like in the string `«hello»`, which will get titlecased to `«Hello»`.
164
///
165
/// This is a separate type from [`CaseMapper`] because it loads the additional data
166
/// required by [`LeadingAdjustment::Auto`] to perform the best possible leading adjustment.
167
///
168
/// If you are planning on only using [`LeadingAdjustment::None`] or [`LeadingAdjustment::ToCased`], consider using [`CaseMapper`] directly; this
169
/// type will have no additional behavior.
170
///
171
/// # Examples
172
///
173
/// Basic casemapping behavior:
174
///
175
/// ```rust
176
/// use icu::casemap::TitlecaseMapper;
177
/// use icu::locid::langid;
178
///
179
/// let cm = TitlecaseMapper::new();
180
/// let root = langid!("und");
181
///
182
/// let default_options = Default::default();
183
///
184
/// // note that the subsequent words are not titlecased, this function assumes
185
/// // that the entire string is a single segment and only titlecases at the beginning.
186
/// assert_eq!(cm.titlecase_segment_to_string("hEllO WorLd", &root, default_options), "Hello world");
187
/// assert_eq!(cm.titlecase_segment_to_string("Γειά σου Κόσμε", &root, default_options), "Γειά σου κόσμε");
188
/// assert_eq!(cm.titlecase_segment_to_string("नमस्ते दुनिया", &root, default_options), "नमस्ते दुनिया");
189
/// assert_eq!(cm.titlecase_segment_to_string("Привет мир", &root, default_options), "Привет мир");
190
///
191
/// // Some behavior is language-sensitive
192
/// assert_eq!(cm.titlecase_segment_to_string("istanbul", &root, default_options), "Istanbul");
193
/// assert_eq!(cm.titlecase_segment_to_string("istanbul", &langid!("tr"), default_options), "İstanbul"); // Turkish dotted i
194
///
195
/// assert_eq!(cm.titlecase_segment_to_string("և Երևանի", &root, default_options), "Եւ երևանի");
196
/// assert_eq!(cm.titlecase_segment_to_string("և Երևանի", &langid!("hy"), default_options), "Եվ երևանի"); // Eastern Armenian ech-yiwn ligature
197
///
198
/// assert_eq!(cm.titlecase_segment_to_string("ijkdijk", &root, default_options), "Ijkdijk");
199
/// assert_eq!(cm.titlecase_segment_to_string("ijkdijk", &langid!("nl"), default_options), "IJkdijk"); // Dutch IJ digraph
200
/// ```
201
#[derive(Clone, Debug)]
202
pub struct TitlecaseMapper<CM> {
203
    cm: CM,
204
    gc: CodePointMapData<GeneralCategory>,
205
}
206
207
#[cfg(feature = "compiled_data")]
208
impl Default for TitlecaseMapper<CaseMapper> {
209
0
    fn default() -> Self {
210
0
        Self::new()
211
0
    }
212
}
213
214
impl TitlecaseMapper<CaseMapper> {
215
    /// A constructor which creates a [`TitlecaseMapper`] using compiled data
216
    ///
217
    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
218
    ///
219
    /// [📚 Help choosing a constructor](icu_provider::constructors)
220
    #[cfg(feature = "compiled_data")]
221
0
    pub const fn new() -> Self {
222
0
        Self {
223
0
            cm: CaseMapper::new(),
224
0
            gc: icu_properties::maps::general_category().static_to_owned(),
225
0
        }
226
0
    }
227
228
    icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: DataError,
229
    #[cfg(skip)]
230
    functions: [
231
        new,
232
        try_new_with_any_provider,
233
        try_new_with_buffer_provider,
234
        try_new_unstable,
235
        Self,
236
    ]);
237
238
    #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
239
0
    pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError>
240
0
    where
241
0
        P: DataProvider<CaseMapV1Marker> + DataProvider<GeneralCategoryV1Marker> + ?Sized,
242
0
    {
243
0
        let cm = CaseMapper::try_new_unstable(provider)?;
244
0
        let gc = icu_properties::maps::load_general_category(provider).map_err(|e| {
245
0
            let PropertiesError::PropDataLoad(e) = e else {
246
0
                unreachable!()
247
            };
248
0
            e
249
0
        })?;
Unexecuted instantiation: <icu_casemap::titlecase::TitlecaseMapper<icu_casemap::casemapper::CaseMapper>>::try_new_unstable::<icu_provider::any::DowncastingAnyProvider<icu_provider_adapters::empty::EmptyDataProvider>>::{closure#0}
Unexecuted instantiation: <icu_casemap::titlecase::TitlecaseMapper<icu_casemap::casemapper::CaseMapper>>::try_new_unstable::<_>::{closure#0}
250
0
        Ok(Self { cm, gc })
251
0
    }
Unexecuted instantiation: <icu_casemap::titlecase::TitlecaseMapper<icu_casemap::casemapper::CaseMapper>>::try_new_unstable::<icu_provider::any::DowncastingAnyProvider<icu_provider_adapters::empty::EmptyDataProvider>>
Unexecuted instantiation: <icu_casemap::titlecase::TitlecaseMapper<icu_casemap::casemapper::CaseMapper>>::try_new_unstable::<_>
252
}
253
254
// We use Borrow, not AsRef, since we want the blanket impl on T
255
impl<CM: AsRef<CaseMapper>> TitlecaseMapper<CM> {
256
    icu_provider::gen_any_buffer_data_constructors!(locale: skip, casemapper: CM, error: DataError,
257
    #[cfg(skip)]
258
    functions: [
259
        new_with_mapper,
260
        try_new_with_mapper_with_any_provider,
261
        try_new_with_mapper_with_buffer_provider,
262
        try_new_with_mapper_unstable,
263
        Self,
264
    ]);
265
266
    /// A constructor which creates a [`TitlecaseMapper`] from an existing [`CaseMapper`]
267
    /// (either owned or as a reference) and compiled data
268
    ///
269
    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
270
    ///
271
    /// [📚 Help choosing a constructor](icu_provider::constructors)
272
    #[cfg(feature = "compiled_data")]
273
0
    pub const fn new_with_mapper(casemapper: CM) -> Self {
274
0
        Self {
275
0
            cm: casemapper,
276
0
            gc: icu_properties::maps::general_category().static_to_owned(),
277
0
        }
278
0
    }
279
280
    /// Construct this object to wrap an existing CaseMapper (or a reference to one), loading additional data as needed.
281
    #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_with_mapper)]
282
0
    pub fn try_new_with_mapper_unstable<P>(provider: &P, casemapper: CM) -> Result<Self, DataError>
283
0
    where
284
0
        P: DataProvider<CaseMapV1Marker> + DataProvider<GeneralCategoryV1Marker> + ?Sized,
285
0
    {
286
0
        let gc = icu_properties::maps::load_general_category(provider).map_err(|e| {
287
0
            let PropertiesError::PropDataLoad(e) = e else {
288
0
                unreachable!()
289
            };
290
0
            e
291
0
        })?;
292
0
        Ok(Self { cm: casemapper, gc })
293
0
    }
294
295
    /// Returns the full titlecase mapping of the given string as a [`Writeable`], treating
296
    /// the string as a single segment (and thus only titlecasing the beginning of it).
297
    ///
298
    /// This should typically be used as a lower-level helper to construct the titlecasing operation desired
299
    /// by the application, for example one can titlecase on a per-word basis by mixing this with
300
    /// a `WordSegmenter`.
301
    ///
302
    /// This function is context and language sensitive. Callers should pass the text's language
303
    /// as a `LanguageIdentifier` (usually the `id` field of the `Locale`) if available, or
304
    /// `Default::default()` for the root locale.
305
    ///
306
    /// See [`Self::titlecase_segment_to_string()`] for the equivalent convenience function that returns a String,
307
    /// as well as for an example.
308
0
    pub fn titlecase_segment<'a>(
309
0
        &'a self,
310
0
        src: &'a str,
311
0
        langid: &LanguageIdentifier,
312
0
        options: TitlecaseOptions,
313
0
    ) -> impl Writeable + 'a {
314
0
        if options.leading_adjustment == LeadingAdjustment::Auto {
315
            // letter, number, symbol, or private use code point
316
            const HEAD_GROUPS: GeneralCategoryGroup = GeneralCategoryGroup::Letter
317
                .union(GeneralCategoryGroup::Number)
318
                .union(GeneralCategoryGroup::Symbol)
319
                .union(GeneralCategoryGroup::PrivateUse);
320
0
            self.cm
321
0
                .as_ref()
322
0
                .titlecase_segment_with_adjustment(src, langid, options, |_data, ch| {
323
0
                    HEAD_GROUPS.contains(self.gc.as_borrowed().get(ch))
324
0
                })
Unexecuted instantiation: <icu_casemap::titlecase::TitlecaseMapper<icu_casemap::casemapper::CaseMapper>>::titlecase_segment::{closure#0}
Unexecuted instantiation: <icu_casemap::titlecase::TitlecaseMapper<_>>::titlecase_segment::{closure#0}
325
        } else {
326
0
            self.cm
327
0
                .as_ref()
328
0
                .titlecase_segment_with_adjustment(src, langid, options, |data, ch| {
329
0
                    data.is_cased(ch)
330
0
                })
Unexecuted instantiation: <icu_casemap::titlecase::TitlecaseMapper<icu_casemap::casemapper::CaseMapper>>::titlecase_segment::{closure#1}
Unexecuted instantiation: <icu_casemap::titlecase::TitlecaseMapper<_>>::titlecase_segment::{closure#1}
331
        }
332
0
    }
Unexecuted instantiation: <icu_casemap::titlecase::TitlecaseMapper<icu_casemap::casemapper::CaseMapper>>::titlecase_segment
Unexecuted instantiation: <icu_casemap::titlecase::TitlecaseMapper<_>>::titlecase_segment
333
334
    /// Returns the full titlecase mapping of the given string as a String, treating
335
    /// the string as a single segment (and thus only titlecasing the beginning of it).
336
    ///
337
    /// This should typically be used as a lower-level helper to construct the titlecasing operation desired
338
    /// by the application, for example one can titlecase on a per-word basis by mixing this with
339
    /// a `WordSegmenter`.
340
    ///
341
    /// This function is context and language sensitive. Callers should pass the text's language
342
    /// as a `LanguageIdentifier` (usually the `id` field of the `Locale`) if available, or
343
    /// `Default::default()` for the root locale.
344
    ///
345
    /// See [`Self::titlecase_segment()`] for the equivalent lower-level function that returns a [`Writeable`]
346
    ///
347
    /// # Examples
348
    ///
349
    /// ```rust
350
    /// use icu::casemap::TitlecaseMapper;
351
    /// use icu::locid::langid;
352
    ///
353
    /// let cm = TitlecaseMapper::new();
354
    /// let root = langid!("und");
355
    ///
356
    /// let default_options = Default::default();
357
    ///
358
    /// // note that the subsequent words are not titlecased, this function assumes
359
    /// // that the entire string is a single segment and only titlecases at the beginning.
360
    /// assert_eq!(cm.titlecase_segment_to_string("hEllO WorLd", &root, default_options), "Hello world");
361
    /// assert_eq!(cm.titlecase_segment_to_string("Γειά σου Κόσμε", &root, default_options), "Γειά σου κόσμε");
362
    /// assert_eq!(cm.titlecase_segment_to_string("नमस्ते दुनिया", &root, default_options), "नमस्ते दुनिया");
363
    /// assert_eq!(cm.titlecase_segment_to_string("Привет мир", &root, default_options), "Привет мир");
364
    ///
365
    /// // Some behavior is language-sensitive
366
    /// assert_eq!(cm.titlecase_segment_to_string("istanbul", &root, default_options), "Istanbul");
367
    /// assert_eq!(cm.titlecase_segment_to_string("istanbul", &langid!("tr"), default_options), "İstanbul"); // Turkish dotted i
368
    ///
369
    /// assert_eq!(cm.titlecase_segment_to_string("և Երևանի", &root, default_options), "Եւ երևանի");
370
    /// assert_eq!(cm.titlecase_segment_to_string("և Երևանի", &langid!("hy"), default_options), "Եվ երևանի"); // Eastern Armenian ech-yiwn ligature
371
    ///
372
    /// assert_eq!(cm.titlecase_segment_to_string("ijkdijk", &root, default_options), "Ijkdijk");
373
    /// assert_eq!(cm.titlecase_segment_to_string("ijkdijk", &langid!("nl"), default_options), "IJkdijk"); // Dutch IJ digraph
374
    /// ```
375
    ///
376
    /// Leading adjustment behaviors:
377
    ///
378
    /// ```rust
379
    /// use icu::casemap::titlecase::{LeadingAdjustment, TitlecaseOptions};
380
    /// use icu::casemap::TitlecaseMapper;
381
    /// use icu::locid::langid;
382
    ///
383
    /// let cm = TitlecaseMapper::new();
384
    /// let root = langid!("und");
385
    ///
386
    /// let default_options = Default::default();
387
    /// let mut no_adjust: TitlecaseOptions = Default::default();
388
    /// no_adjust.leading_adjustment = LeadingAdjustment::None;
389
    ///
390
    /// // Exhibits leading adjustment when set:
391
    /// assert_eq!(
392
    ///     cm.titlecase_segment_to_string("«hello»", &root, default_options),
393
    ///     "«Hello»"
394
    /// );
395
    /// assert_eq!(
396
    ///     cm.titlecase_segment_to_string("«hello»", &root, no_adjust),
397
    ///     "«hello»"
398
    /// );
399
    ///
400
    /// assert_eq!(
401
    ///     cm.titlecase_segment_to_string("'Twas", &root, default_options),
402
    ///     "'Twas"
403
    /// );
404
    /// assert_eq!(
405
    ///     cm.titlecase_segment_to_string("'Twas", &root, no_adjust),
406
    ///     "'twas"
407
    /// );
408
    ///
409
    /// assert_eq!(
410
    ///     cm.titlecase_segment_to_string("", &root, default_options),
411
    ///     ""
412
    /// );
413
    /// assert_eq!(cm.titlecase_segment_to_string("", &root, no_adjust), "");
414
    /// ```
415
    ///
416
    /// Tail casing behaviors:
417
    ///
418
    /// ```rust
419
    /// use icu::casemap::titlecase::{TitlecaseOptions, TrailingCase};
420
    /// use icu::casemap::TitlecaseMapper;
421
    /// use icu::locid::langid;
422
    ///
423
    /// let cm = TitlecaseMapper::new();
424
    /// let root = langid!("und");
425
    ///
426
    /// let default_options = Default::default();
427
    /// let mut preserve_case: TitlecaseOptions = Default::default();
428
    /// preserve_case.trailing_case = TrailingCase::Unchanged;
429
    ///
430
    /// // Exhibits trailing case when set:
431
    /// assert_eq!(
432
    ///     cm.titlecase_segment_to_string("spOngeBoB", &root, default_options),
433
    ///     "Spongebob"
434
    /// );
435
    /// assert_eq!(
436
    ///     cm.titlecase_segment_to_string("spOngeBoB", &root, preserve_case),
437
    ///     "SpOngeBoB"
438
    /// );
439
    /// ```
440
0
    pub fn titlecase_segment_to_string(
441
0
        &self,
442
0
        src: &str,
443
0
        langid: &LanguageIdentifier,
444
0
        options: TitlecaseOptions,
445
0
    ) -> String {
446
0
        self.titlecase_segment(src, langid, options)
447
0
            .write_to_string()
448
0
            .into_owned()
449
0
    }
450
}