Coverage Report

Created: 2025-07-11 06:39

/rust/registry/src/index.crates.io-6f17d22bba15001f/icu_locid-1.5.0/src/locale.rs
Line
Count
Source (jump to first uncovered line)
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
#[allow(deprecated)]
6
use crate::ordering::SubtagOrderingResult;
7
use crate::parser::{
8
    parse_locale, parse_locale_with_single_variant_single_keyword_unicode_keyword_extension,
9
    ParserError, ParserMode, SubtagIterator,
10
};
11
use crate::{extensions, subtags, LanguageIdentifier};
12
use alloc::string::String;
13
use core::cmp::Ordering;
14
use core::str::FromStr;
15
use tinystr::TinyAsciiStr;
16
use writeable::Writeable;
17
18
/// A core struct representing a [`Unicode Locale Identifier`].
19
///
20
/// A locale is made of two parts:
21
///  * Unicode Language Identifier
22
///  * A set of Unicode Extensions
23
///
24
/// [`Locale`] exposes all of the same fields and methods as [`LanguageIdentifier`], and
25
/// on top of that is able to parse, manipulate and serialize unicode extension fields.
26
///
27
///
28
/// # Examples
29
///
30
/// ```
31
/// use icu::locid::{
32
///     extensions::unicode::{key, value},
33
///     locale,
34
///     subtags::{language, region},
35
/// };
36
///
37
/// let loc = locale!("en-US-u-ca-buddhist");
38
///
39
/// assert_eq!(loc.id.language, language!("en"));
40
/// assert_eq!(loc.id.script, None);
41
/// assert_eq!(loc.id.region, Some(region!("US")));
42
/// assert_eq!(loc.id.variants.len(), 0);
43
/// assert_eq!(
44
///     loc.extensions.unicode.keywords.get(&key!("ca")),
45
///     Some(&value!("buddhist"))
46
/// );
47
/// ```
48
///
49
/// # Parsing
50
///
51
/// Unicode recognizes three levels of standard conformance for a locale:
52
///
53
///  * *well-formed* - syntactically correct
54
///  * *valid* - well-formed and only uses registered language subtags, extensions, keywords, types...
55
///  * *canonical* - valid and no deprecated codes or structure.
56
///
57
/// At the moment parsing normalizes a well-formed locale identifier converting
58
/// `_` separators to `-` and adjusting casing to conform to the Unicode standard.
59
///
60
/// Any bogus subtags will cause the parsing to fail with an error.
61
///
62
/// No subtag validation or alias resolution is performed.
63
///
64
/// # Examples
65
///
66
/// ```
67
/// use icu::locid::{subtags::*, Locale};
68
///
69
/// let loc: Locale = "eN_latn_Us-Valencia_u-hC-H12"
70
///     .parse()
71
///     .expect("Failed to parse.");
72
///
73
/// assert_eq!(loc.id.language, "en".parse::<Language>().unwrap());
74
/// assert_eq!(loc.id.script, "Latn".parse::<Script>().ok());
75
/// assert_eq!(loc.id.region, "US".parse::<Region>().ok());
76
/// assert_eq!(
77
///     loc.id.variants.get(0),
78
///     "valencia".parse::<Variant>().ok().as_ref()
79
/// );
80
/// ```
81
/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/tr35.html#Unicode_locale_identifier
82
#[derive(Default, PartialEq, Eq, Clone, Hash)]
83
#[allow(clippy::exhaustive_structs)] // This struct is stable (and invoked by a macro)
84
pub struct Locale {
85
    /// The basic language/script/region components in the locale identifier along with any variants.
86
    pub id: LanguageIdentifier,
87
    /// Any extensions present in the locale identifier.
88
    pub extensions: extensions::Extensions,
89
}
90
91
#[test]
92
fn test_sizes() {
93
    assert_eq!(core::mem::size_of::<subtags::Language>(), 3);
94
    assert_eq!(core::mem::size_of::<subtags::Script>(), 4);
95
    assert_eq!(core::mem::size_of::<subtags::Region>(), 3);
96
    assert_eq!(core::mem::size_of::<subtags::Variant>(), 8);
97
    assert_eq!(core::mem::size_of::<subtags::Variants>(), 16);
98
    assert_eq!(core::mem::size_of::<LanguageIdentifier>(), 32);
99
100
    assert_eq!(core::mem::size_of::<extensions::transform::Transform>(), 56);
101
    assert_eq!(core::mem::size_of::<Option<LanguageIdentifier>>(), 32);
102
    assert_eq!(core::mem::size_of::<extensions::transform::Fields>(), 24);
103
104
    assert_eq!(core::mem::size_of::<extensions::unicode::Attributes>(), 16);
105
    assert_eq!(core::mem::size_of::<extensions::unicode::Keywords>(), 24);
106
    assert_eq!(core::mem::size_of::<Vec<extensions::other::Other>>(), 24);
107
    assert_eq!(core::mem::size_of::<extensions::private::Private>(), 16);
108
    assert_eq!(core::mem::size_of::<extensions::Extensions>(), 136);
109
110
    assert_eq!(core::mem::size_of::<Locale>(), 168);
111
}
112
113
impl Locale {
114
    /// A constructor which takes a utf8 slice, parses it and
115
    /// produces a well-formed [`Locale`].
116
    ///
117
    /// # Examples
118
    ///
119
    /// ```
120
    /// use icu::locid::Locale;
121
    ///
122
    /// Locale::try_from_bytes(b"en-US-u-hc-h12").unwrap();
123
    /// ```
124
0
    pub fn try_from_bytes(v: &[u8]) -> Result<Self, ParserError> {
125
0
        parse_locale(v)
126
0
    }
127
128
    /// The default undefined locale "und". Same as [`default()`](Default::default()).
129
    ///
130
    /// # Examples
131
    ///
132
    /// ```
133
    /// use icu::locid::Locale;
134
    ///
135
    /// assert_eq!(Locale::default(), Locale::UND);
136
    /// ```
137
    pub const UND: Self = Self {
138
        id: LanguageIdentifier::UND,
139
        extensions: extensions::Extensions::new(),
140
    };
141
142
    /// This is a best-effort operation that performs all available levels of canonicalization.
143
    ///
144
    /// At the moment the operation will normalize casing and the separator, but in the future
145
    /// it may also validate and update from deprecated subtags to canonical ones.
146
    ///
147
    /// # Examples
148
    ///
149
    /// ```
150
    /// use icu::locid::Locale;
151
    ///
152
    /// assert_eq!(
153
    ///     Locale::canonicalize("pL_latn_pl-U-HC-H12").as_deref(),
154
    ///     Ok("pl-Latn-PL-u-hc-h12")
155
    /// );
156
    /// ```
157
0
    pub fn canonicalize<S: AsRef<[u8]>>(input: S) -> Result<String, ParserError> {
158
0
        let locale = Self::try_from_bytes(input.as_ref())?;
159
0
        Ok(locale.write_to_string().into_owned())
160
0
    }
Unexecuted instantiation: <icu_locid::locale::Locale>::canonicalize::<&[u8]>
Unexecuted instantiation: <icu_locid::locale::Locale>::canonicalize::<_>
161
162
    /// Compare this [`Locale`] with BCP-47 bytes.
163
    ///
164
    /// The return value is equivalent to what would happen if you first converted this
165
    /// [`Locale`] to a BCP-47 string and then performed a byte comparison.
166
    ///
167
    /// This function is case-sensitive and results in a *total order*, so it is appropriate for
168
    /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
169
    ///
170
    /// # Examples
171
    ///
172
    /// ```
173
    /// use icu::locid::Locale;
174
    /// use std::cmp::Ordering;
175
    ///
176
    /// let bcp47_strings: &[&str] = &[
177
    ///     "pl-Latn-PL",
178
    ///     "und",
179
    ///     "und-fonipa",
180
    ///     "und-t-m0-true",
181
    ///     "und-u-ca-hebrew",
182
    ///     "und-u-ca-japanese",
183
    ///     "zh",
184
    /// ];
185
    ///
186
    /// for ab in bcp47_strings.windows(2) {
187
    ///     let a = ab[0];
188
    ///     let b = ab[1];
189
    ///     assert!(a.cmp(b) == Ordering::Less);
190
    ///     let a_loc = a.parse::<Locale>().unwrap();
191
    ///     assert!(a_loc.strict_cmp(a.as_bytes()) == Ordering::Equal);
192
    ///     assert!(a_loc.strict_cmp(b.as_bytes()) == Ordering::Less);
193
    /// }
194
    /// ```
195
0
    pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
196
0
        self.writeable_cmp_bytes(other)
197
0
    }
198
199
    #[allow(clippy::type_complexity)]
200
0
    pub(crate) fn as_tuple(
201
0
        &self,
202
0
    ) -> (
203
0
        (
204
0
            subtags::Language,
205
0
            Option<subtags::Script>,
206
0
            Option<subtags::Region>,
207
0
            &subtags::Variants,
208
0
        ),
209
0
        (
210
0
            (
211
0
                &extensions::unicode::Attributes,
212
0
                &extensions::unicode::Keywords,
213
0
            ),
214
0
            (
215
0
                Option<(
216
0
                    subtags::Language,
217
0
                    Option<subtags::Script>,
218
0
                    Option<subtags::Region>,
219
0
                    &subtags::Variants,
220
0
                )>,
221
0
                &extensions::transform::Fields,
222
0
            ),
223
0
            &extensions::private::Private,
224
0
            &[extensions::other::Other],
225
0
        ),
226
0
    ) {
227
0
        (self.id.as_tuple(), self.extensions.as_tuple())
228
0
    }
229
230
    /// Returns an ordering suitable for use in [`BTreeSet`].
231
    ///
232
    /// The ordering may or may not be equivalent to string ordering, and it
233
    /// may or may not be stable across ICU4X releases.
234
    ///
235
    /// [`BTreeSet`]: alloc::collections::BTreeSet
236
0
    pub fn total_cmp(&self, other: &Self) -> Ordering {
237
0
        self.as_tuple().cmp(&other.as_tuple())
238
0
    }
239
240
    /// Compare this [`Locale`] with an iterator of BCP-47 subtags.
241
    ///
242
    /// This function has the same equality semantics as [`Locale::strict_cmp`]. It is intended as
243
    /// a more modular version that allows multiple subtag iterators to be chained together.
244
    ///
245
    /// For an additional example, see [`SubtagOrderingResult`].
246
    ///
247
    /// # Examples
248
    ///
249
    /// ```
250
    /// use icu::locid::locale;
251
    /// use std::cmp::Ordering;
252
    ///
253
    /// let subtags: &[&[u8]] =
254
    ///     &[b"ca", b"ES", b"valencia", b"u", b"ca", b"hebrew"];
255
    ///
256
    /// let loc = locale!("ca-ES-valencia-u-ca-hebrew");
257
    /// assert_eq!(
258
    ///     Ordering::Equal,
259
    ///     loc.strict_cmp_iter(subtags.iter().copied()).end()
260
    /// );
261
    ///
262
    /// let loc = locale!("ca-ES-valencia");
263
    /// assert_eq!(
264
    ///     Ordering::Less,
265
    ///     loc.strict_cmp_iter(subtags.iter().copied()).end()
266
    /// );
267
    ///
268
    /// let loc = locale!("ca-ES-valencia-u-nu-arab");
269
    /// assert_eq!(
270
    ///     Ordering::Greater,
271
    ///     loc.strict_cmp_iter(subtags.iter().copied()).end()
272
    /// );
273
    /// ```
274
    #[deprecated(since = "1.5.0", note = "if you need this, please file an issue")]
275
    #[allow(deprecated)]
276
0
    pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
277
0
    where
278
0
        I: Iterator<Item = &'l [u8]>,
279
0
    {
280
0
        let r = self.for_each_subtag_str(&mut |subtag| {
281
0
            if let Some(other) = subtags.next() {
282
0
                match subtag.as_bytes().cmp(other) {
283
0
                    Ordering::Equal => Ok(()),
284
0
                    not_equal => Err(not_equal),
285
                }
286
            } else {
287
0
                Err(Ordering::Greater)
288
            }
289
0
        });
290
0
        match r {
291
0
            Ok(_) => SubtagOrderingResult::Subtags(subtags),
292
0
            Err(o) => SubtagOrderingResult::Ordering(o),
293
        }
294
0
    }
295
296
    /// Compare this `Locale` with a potentially unnormalized BCP-47 string.
297
    ///
298
    /// The return value is equivalent to what would happen if you first parsed the
299
    /// BCP-47 string to a `Locale` and then performed a structural comparison.
300
    ///
301
    /// # Examples
302
    ///
303
    /// ```
304
    /// use icu::locid::Locale;
305
    ///
306
    /// let bcp47_strings: &[&str] = &[
307
    ///     "pl-LaTn-pL",
308
    ///     "uNd",
309
    ///     "UND-FONIPA",
310
    ///     "UnD-t-m0-TrUe",
311
    ///     "uNd-u-CA-Japanese",
312
    ///     "ZH",
313
    /// ];
314
    ///
315
    /// for a in bcp47_strings {
316
    ///     assert!(a.parse::<Locale>().unwrap().normalizing_eq(a));
317
    /// }
318
    /// ```
319
0
    pub fn normalizing_eq(&self, other: &str) -> bool {
320
        macro_rules! subtag_matches {
321
            ($T:ty, $iter:ident, $expected:expr) => {
322
                $iter
323
                    .next()
324
0
                    .map(|b| <$T>::try_from_bytes(b) == Ok($expected))
Unexecuted instantiation: <icu_locid::locale::Locale>::normalizing_eq::{closure#0}
Unexecuted instantiation: <icu_locid::locale::Locale>::normalizing_eq::{closure#1}
Unexecuted instantiation: <icu_locid::locale::Locale>::normalizing_eq::{closure#2}
Unexecuted instantiation: <icu_locid::locale::Locale>::normalizing_eq::{closure#3}
325
                    .unwrap_or(false)
326
            };
327
        }
328
329
0
        let mut iter = SubtagIterator::new(other.as_bytes());
330
0
        if !subtag_matches!(subtags::Language, iter, self.id.language) {
331
0
            return false;
332
0
        }
333
0
        if let Some(ref script) = self.id.script {
334
0
            if !subtag_matches!(subtags::Script, iter, *script) {
335
0
                return false;
336
0
            }
337
0
        }
338
0
        if let Some(ref region) = self.id.region {
339
0
            if !subtag_matches!(subtags::Region, iter, *region) {
340
0
                return false;
341
0
            }
342
0
        }
343
0
        for variant in self.id.variants.iter() {
344
0
            if !subtag_matches!(subtags::Variant, iter, *variant) {
345
0
                return false;
346
0
            }
347
        }
348
0
        if !self.extensions.is_empty() {
349
0
            match extensions::Extensions::try_from_iter(&mut iter) {
350
0
                Ok(exts) => {
351
0
                    if self.extensions != exts {
352
0
                        return false;
353
0
                    }
354
                }
355
                Err(_) => {
356
0
                    return false;
357
                }
358
            }
359
0
        }
360
0
        iter.next().is_none()
361
0
    }
362
363
    #[doc(hidden)]
364
    #[allow(clippy::type_complexity)]
365
0
    pub const fn try_from_bytes_with_single_variant_single_keyword_unicode_extension(
366
0
        v: &[u8],
367
0
    ) -> Result<
368
0
        (
369
0
            subtags::Language,
370
0
            Option<subtags::Script>,
371
0
            Option<subtags::Region>,
372
0
            Option<subtags::Variant>,
373
0
            Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>,
374
0
        ),
375
0
        ParserError,
376
0
    > {
377
0
        parse_locale_with_single_variant_single_keyword_unicode_keyword_extension(
378
0
            v,
379
0
            ParserMode::Locale,
380
0
        )
381
0
    }
382
383
0
    pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
384
0
    where
385
0
        F: FnMut(&str) -> Result<(), E>,
386
0
    {
387
0
        self.id.for_each_subtag_str(f)?;
388
0
        self.extensions.for_each_subtag_str(f)?;
389
0
        Ok(())
390
0
    }
Unexecuted instantiation: <icu_locid::locale::Locale>::for_each_subtag_str::<core::fmt::Error, <icu_locid::locale::Locale as writeable::Writeable>::write_to<diplomat_runtime::writeable::DiplomatWriteable>::{closure#0}>
Unexecuted instantiation: <icu_locid::locale::Locale>::for_each_subtag_str::<core::fmt::Error, <icu_locid::locale::Locale as writeable::Writeable>::write_to<core::fmt::Formatter>::{closure#0}>
Unexecuted instantiation: <icu_locid::locale::Locale>::for_each_subtag_str::<core::fmt::Error, <icu_locid::locale::Locale as writeable::Writeable>::write_to<writeable::cmp::WriteComparator>::{closure#0}>
Unexecuted instantiation: <icu_locid::locale::Locale>::for_each_subtag_str::<core::fmt::Error, <icu_locid::locale::Locale as writeable::Writeable>::write_to<alloc::string::String>::{closure#0}>
Unexecuted instantiation: <icu_locid::locale::Locale>::for_each_subtag_str::<core::convert::Infallible, <icu_locid::locale::Locale as writeable::Writeable>::writeable_length_hint::{closure#0}>
391
}
392
393
impl FromStr for Locale {
394
    type Err = ParserError;
395
396
0
    fn from_str(source: &str) -> Result<Self, Self::Err> {
397
0
        Self::try_from_bytes(source.as_bytes())
398
0
    }
399
}
400
401
impl From<LanguageIdentifier> for Locale {
402
0
    fn from(id: LanguageIdentifier) -> Self {
403
0
        Self {
404
0
            id,
405
0
            extensions: extensions::Extensions::default(),
406
0
        }
407
0
    }
408
}
409
410
impl From<Locale> for LanguageIdentifier {
411
0
    fn from(loc: Locale) -> Self {
412
0
        loc.id
413
0
    }
414
}
415
416
impl AsRef<LanguageIdentifier> for Locale {
417
    #[inline(always)]
418
0
    fn as_ref(&self) -> &LanguageIdentifier {
419
0
        &self.id
420
0
    }
421
}
422
423
impl AsMut<LanguageIdentifier> for Locale {
424
0
    fn as_mut(&mut self) -> &mut LanguageIdentifier {
425
0
        &mut self.id
426
0
    }
427
}
428
429
impl core::fmt::Debug for Locale {
430
0
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
431
0
        writeable::Writeable::write_to(self, f)
432
0
    }
433
}
434
435
impl_writeable_for_each_subtag_str_no_test!(Locale, selff, selff.extensions.is_empty() => selff.id.write_to_string());
436
437
#[test]
438
fn test_writeable() {
439
    use writeable::assert_writeable_eq;
440
    assert_writeable_eq!(Locale::UND, "und");
441
    assert_writeable_eq!("und-001".parse::<Locale>().unwrap(), "und-001");
442
    assert_writeable_eq!("und-Mymr".parse::<Locale>().unwrap(), "und-Mymr");
443
    assert_writeable_eq!("my-Mymr-MM".parse::<Locale>().unwrap(), "my-Mymr-MM");
444
    assert_writeable_eq!(
445
        "my-Mymr-MM-posix".parse::<Locale>().unwrap(),
446
        "my-Mymr-MM-posix",
447
    );
448
    assert_writeable_eq!(
449
        "zh-macos-posix".parse::<Locale>().unwrap(),
450
        "zh-macos-posix",
451
    );
452
    assert_writeable_eq!(
453
        "my-t-my-d0-zawgyi".parse::<Locale>().unwrap(),
454
        "my-t-my-d0-zawgyi",
455
    );
456
    assert_writeable_eq!(
457
        "ar-SA-u-ca-islamic-civil".parse::<Locale>().unwrap(),
458
        "ar-SA-u-ca-islamic-civil",
459
    );
460
    assert_writeable_eq!(
461
        "en-001-x-foo-bar".parse::<Locale>().unwrap(),
462
        "en-001-x-foo-bar",
463
    );
464
    assert_writeable_eq!("und-t-m0-true".parse::<Locale>().unwrap(), "und-t-m0-true",);
465
}
466
467
/// # Examples
468
///
469
/// ```
470
/// use icu::locid::Locale;
471
/// use icu::locid::{locale, subtags::language};
472
///
473
/// assert_eq!(Locale::from(language!("en")), locale!("en"));
474
/// ```
475
impl From<subtags::Language> for Locale {
476
0
    fn from(language: subtags::Language) -> Self {
477
0
        Self {
478
0
            id: language.into(),
479
0
            ..Default::default()
480
0
        }
481
0
    }
482
}
483
484
/// # Examples
485
///
486
/// ```
487
/// use icu::locid::Locale;
488
/// use icu::locid::{locale, subtags::script};
489
///
490
/// assert_eq!(Locale::from(Some(script!("latn"))), locale!("und-Latn"));
491
/// ```
492
impl From<Option<subtags::Script>> for Locale {
493
0
    fn from(script: Option<subtags::Script>) -> Self {
494
0
        Self {
495
0
            id: script.into(),
496
0
            ..Default::default()
497
0
        }
498
0
    }
499
}
500
501
/// # Examples
502
///
503
/// ```
504
/// use icu::locid::Locale;
505
/// use icu::locid::{locale, subtags::region};
506
///
507
/// assert_eq!(Locale::from(Some(region!("US"))), locale!("und-US"));
508
/// ```
509
impl From<Option<subtags::Region>> for Locale {
510
0
    fn from(region: Option<subtags::Region>) -> Self {
511
0
        Self {
512
0
            id: region.into(),
513
0
            ..Default::default()
514
0
        }
515
0
    }
516
}
517
518
/// # Examples
519
///
520
/// ```
521
/// use icu::locid::Locale;
522
/// use icu::locid::{
523
///     locale,
524
///     subtags::{language, region, script},
525
/// };
526
///
527
/// assert_eq!(
528
///     Locale::from((
529
///         language!("en"),
530
///         Some(script!("Latn")),
531
///         Some(region!("US"))
532
///     )),
533
///     locale!("en-Latn-US")
534
/// );
535
/// ```
536
impl
537
    From<(
538
        subtags::Language,
539
        Option<subtags::Script>,
540
        Option<subtags::Region>,
541
    )> for Locale
542
{
543
0
    fn from(
544
0
        lsr: (
545
0
            subtags::Language,
546
0
            Option<subtags::Script>,
547
0
            Option<subtags::Region>,
548
0
        ),
549
0
    ) -> Self {
550
0
        Self {
551
0
            id: lsr.into(),
552
0
            ..Default::default()
553
0
        }
554
0
    }
555
}