Coverage Report

Created: 2025-08-29 06:18

/rust/registry/src/index.crates.io-6f17d22bba15001f/icu_timezone-1.5.0/src/ids.rs
Line
Count
Source (jump to first uncovered line)
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
use alloc::borrow::Cow;
6
use alloc::string::String;
7
use alloc::vec::Vec;
8
use icu_provider::prelude::*;
9
use zerotrie::cursor::ZeroAsciiIgnoreCaseTrieCursor;
10
11
use crate::{
12
    provider::names::{
13
        Bcp47ToIanaMapV1, Bcp47ToIanaMapV1Marker, IanaToBcp47MapV2, IanaToBcp47MapV2Marker,
14
    },
15
    TimeZoneBcp47Id, TimeZoneError,
16
};
17
18
/// A mapper between IANA time zone identifiers and BCP-47 time zone identifiers.
19
///
20
/// This mapper supports two-way mapping, but it is optimized for the case of IANA to BCP-47.
21
/// It also supports normalizing and canonicalizing the IANA strings.
22
///
23
/// There are approximately 600 IANA identifiers and 450 BCP-47 identifiers.
24
///
25
/// BCP-47 time zone identifiers are 8 ASCII characters or less and currently
26
/// average 5.1 characters long. Current IANA time zone identifiers are less than
27
/// 40 ASCII characters and average 14.2 characters long.
28
///
29
/// These lists grow very slowly; in a typical year, 2-3 new identifiers are added.
30
///
31
/// # Normalization vs Canonicalization
32
///
33
/// Multiple IANA time zone identifiers can refer to the same BCP-47 time zone. For example, the
34
/// following three IANA identifiers all map to `"usind"`:
35
///
36
/// - "America/Fort_Wayne"
37
/// - "America/Indiana/Indianapolis"
38
/// - "America/Indianapolis"
39
/// - "US/East-Indiana"
40
///
41
/// There is only one canonical identifier, which is "America/Indiana/Indianapolis". The
42
/// *canonicalization* operation returns the canonical identifier. You should canonicalize if
43
/// you need to compare time zones for equality. Note that the canonical identifier can change
44
/// over time. For example, the identifier "Europe/Kiev" was renamed to the newly-added
45
/// identifier "Europe/Kyiv" in 2022.
46
///
47
/// The *normalization* operation, on the other hand, keeps the input identifier but normalizes
48
/// the casing. For example, "AMERICA/FORT_WAYNE" normalizes to "America/Fort_Wayne".
49
/// Normalization is a data-driven operation because there are no algorithmic casing rules that
50
/// work for all IANA time zone identifiers.
51
///
52
/// Normalization is a cheap operation, but canonicalization might be expensive, since it might
53
/// require searching over all IANA IDs to find the canonicalization. If you need
54
/// canonicalization that is reliably fast, use [`TimeZoneIdMapperWithFastCanonicalization`].
55
///
56
/// # Examples
57
///
58
/// ```
59
/// use icu::timezone::TimeZoneIdMapper;
60
///
61
/// let mapper = TimeZoneIdMapper::new();
62
/// let mapper = mapper.as_borrowed();
63
///
64
/// // The IANA zone "Australia/Melbourne" is the BCP-47 zone "aumel":
65
/// assert_eq!(
66
///     mapper.iana_to_bcp47("Australia/Melbourne"),
67
///     Some("aumel".parse().unwrap())
68
/// );
69
///
70
/// // Lookup is ASCII-case-insensitive:
71
/// assert_eq!(
72
///     mapper.iana_to_bcp47("australia/melbourne"),
73
///     Some("aumel".parse().unwrap())
74
/// );
75
///
76
/// // The IANA zone "Australia/Victoria" is an alias:
77
/// assert_eq!(
78
///     mapper.iana_to_bcp47("Australia/Victoria"),
79
///     Some("aumel".parse().unwrap())
80
/// );
81
///
82
/// // We can recover the canonical identifier from the mapper:
83
/// assert_eq!(
84
///     mapper.canonicalize_iana("Australia/Victoria").unwrap().0,
85
///     "Australia/Melbourne"
86
/// );
87
/// ```
88
#[derive(Debug, Clone)]
89
pub struct TimeZoneIdMapper {
90
    data: DataPayload<IanaToBcp47MapV2Marker>,
91
}
92
93
#[cfg(feature = "compiled_data")]
94
impl Default for TimeZoneIdMapper {
95
0
    fn default() -> Self {
96
0
        Self::new()
97
0
    }
98
}
99
100
impl TimeZoneIdMapper {
101
    /// Creates a new [`TimeZoneIdMapper`] using compiled data.
102
    ///
103
    /// See [`TimeZoneIdMapper`] for an example.
104
    ///
105
    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
106
    ///
107
    /// [📚 Help choosing a constructor](icu_provider::constructors)
108
    #[cfg(feature = "compiled_data")]
109
0
    pub fn new() -> Self {
110
0
        Self {
111
0
            data: DataPayload::from_static_ref(
112
0
                crate::provider::Baked::SINGLETON_TIME_ZONE_IANA_TO_BCP47_V2,
113
0
            ),
114
0
        }
115
0
    }
116
117
    icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: DataError,
118
        #[cfg(skip)]
119
        functions: [
120
            new,
121
            try_new_with_any_provider,
122
            try_new_with_buffer_provider,
123
            try_new_unstable,
124
            Self,
125
        ]
126
    );
127
128
    #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
129
0
    pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError>
130
0
    where
131
0
        P: DataProvider<IanaToBcp47MapV2Marker> + ?Sized,
132
0
    {
133
0
        let data = provider.load(Default::default())?.take_payload()?;
134
0
        Ok(Self { data })
135
0
    }
Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapper>::try_new_unstable::<icu_provider::any::DowncastingAnyProvider<icu_provider_adapters::empty::EmptyDataProvider>>
Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapper>::try_new_unstable::<_>
136
137
    /// Returns a borrowed version of the mapper that can be queried.
138
    ///
139
    /// This avoids a small potential indirection cost when querying the mapper.
140
0
    pub fn as_borrowed(&self) -> TimeZoneIdMapperBorrowed {
141
0
        TimeZoneIdMapperBorrowed {
142
0
            data: self.data.get(),
143
0
        }
144
0
    }
145
}
146
147
impl AsRef<TimeZoneIdMapper> for TimeZoneIdMapper {
148
    #[inline]
149
0
    fn as_ref(&self) -> &TimeZoneIdMapper {
150
0
        self
151
0
    }
Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapper as core::convert::AsRef<icu_timezone::ids::TimeZoneIdMapper>>::as_ref
Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapper as core::convert::AsRef<icu_timezone::ids::TimeZoneIdMapper>>::as_ref
152
}
153
154
/// A borrowed wrapper around the time zone ID mapper, returned by
155
/// [`TimeZoneIdMapper::as_borrowed()`]. More efficient to query.
156
#[derive(Debug, Copy, Clone)]
157
pub struct TimeZoneIdMapperBorrowed<'a> {
158
    data: &'a IanaToBcp47MapV2<'a>,
159
}
160
161
impl<'a> TimeZoneIdMapperBorrowed<'a> {
162
    /// Gets the BCP-47 time zone ID from an IANA time zone ID
163
    /// with a case-insensitive lookup.
164
    ///
165
    /// Returns `None` if the IANA ID is not found.
166
    ///
167
    /// # Examples
168
    ///
169
    /// ```
170
    /// use icu_timezone::TimeZoneBcp47Id;
171
    /// use icu_timezone::TimeZoneIdMapper;
172
    ///
173
    /// let mapper = TimeZoneIdMapper::new();
174
    /// let mapper = mapper.as_borrowed();
175
    ///
176
    /// let result = mapper.iana_to_bcp47("Asia/CALCUTTA").unwrap();
177
    ///
178
    /// assert_eq!(*result, "inccu");
179
    ///
180
    /// // Unknown IANA time zone ID:
181
    /// assert_eq!(mapper.iana_to_bcp47("America/San_Francisco"), None);
182
    /// ```
183
0
    pub fn iana_to_bcp47(&self, iana_id: &str) -> Option<TimeZoneBcp47Id> {
184
0
        self.iana_lookup_quick(iana_id)
185
0
            .and_then(|trie_value| self.data.bcp47_ids.get(trie_value.index()))
186
0
    }
187
188
    /// Same as [`Self::iana_to_bcp47()`] but works with potentially ill-formed UTF-8.
189
0
    pub fn iana_bytes_to_bcp47(&self, iana_id: &[u8]) -> Option<TimeZoneBcp47Id> {
190
0
        self.iana_lookup_quick(iana_id)
191
0
            .and_then(|trie_value| self.data.bcp47_ids.get(trie_value.index()))
192
0
    }
193
194
    /// Normalizes the syntax of an IANA time zone ID.
195
    ///
196
    /// Also returns the BCP-47 time zone ID.
197
    ///
198
    /// Returns `None` if the IANA ID is not found.
199
    ///
200
    /// # Examples
201
    ///
202
    /// ```
203
    /// use icu_timezone::TimeZoneBcp47Id;
204
    /// use icu_timezone::TimeZoneIdMapper;
205
    /// use std::borrow::Cow;
206
    ///
207
    /// let mapper = TimeZoneIdMapper::new();
208
    /// let mapper = mapper.as_borrowed();
209
    ///
210
    /// let result = mapper.normalize_iana("Asia/CALCUTTA").unwrap();
211
    ///
212
    /// assert_eq!(result.0, "Asia/Calcutta");
213
    /// assert!(matches!(result.0, Cow::Owned(_)));
214
    /// assert_eq!(*result.1, "inccu");
215
    ///
216
    /// // Borrows when able:
217
    /// let result = mapper.normalize_iana("America/Chicago").unwrap();
218
    /// assert_eq!(result.0, "America/Chicago");
219
    /// assert!(matches!(result.0, Cow::Borrowed(_)));
220
    ///
221
    /// // Unknown IANA time zone ID:
222
    /// assert_eq!(mapper.normalize_iana("America/San_Francisco"), None);
223
    /// ```
224
0
    pub fn normalize_iana<'s>(&self, iana_id: &'s str) -> Option<(Cow<'s, str>, TimeZoneBcp47Id)> {
225
0
        let (trie_value, string) = self.iana_lookup_with_normalization(iana_id, |_| {})?;
226
0
        let Some(bcp47_id) = self.data.bcp47_ids.get(trie_value.index()) else {
227
0
            debug_assert!(false, "index should be in range");
228
0
            return None;
229
        };
230
0
        Some((string, bcp47_id))
231
0
    }
232
233
    /// Returns the canonical, normalized identifier of the given IANA time zone.
234
    ///
235
    /// Also returns the BCP-47 time zone ID.
236
    ///
237
    /// Returns `None` if the IANA ID is not found.
238
    ///
239
    /// # Examples
240
    ///
241
    /// ```
242
    /// use icu_timezone::TimeZoneBcp47Id;
243
    /// use icu_timezone::TimeZoneIdMapper;
244
    /// use std::borrow::Cow;
245
    ///
246
    /// let mapper = TimeZoneIdMapper::new();
247
    /// let mapper = mapper.as_borrowed();
248
    ///
249
    /// let result = mapper.canonicalize_iana("Asia/CALCUTTA").unwrap();
250
    ///
251
    /// assert_eq!(result.0, "Asia/Kolkata");
252
    /// assert!(matches!(result.0, Cow::Owned(_)));
253
    /// assert_eq!(*result.1, "inccu");
254
    ///
255
    /// // Borrows when able:
256
    /// let result = mapper.canonicalize_iana("America/Chicago").unwrap();
257
    /// assert_eq!(result.0, "America/Chicago");
258
    /// assert!(matches!(result.0, Cow::Borrowed(_)));
259
    ///
260
    /// // Unknown IANA time zone ID:
261
    /// assert_eq!(mapper.canonicalize_iana("America/San_Francisco"), None);
262
    /// ```
263
0
    pub fn canonicalize_iana<'s>(
264
0
        &self,
265
0
        iana_id: &'s str,
266
0
    ) -> Option<(Cow<'s, str>, TimeZoneBcp47Id)> {
267
0
        // Note: We collect the cursors into a stack so that we start probing
268
0
        // nearby the input IANA identifier. This should improve lookup time since
269
0
        // most renames share the same prefix like "Asia" or "Europe".
270
0
        let mut stack = Vec::with_capacity(iana_id.len());
271
0
        let (trie_value, string) = self.iana_lookup_with_normalization(iana_id, |cursor| {
272
0
            stack.push((cursor.clone(), 0, 1));
273
0
        })?;
274
0
        let Some(bcp47_id) = self.data.bcp47_ids.get(trie_value.index()) else {
275
0
            debug_assert!(false, "index should be in range");
276
0
            return None;
277
        };
278
0
        if trie_value.is_canonical() {
279
0
            return Some((string, bcp47_id));
280
0
        }
281
0
        // If we get here, we need to walk the trie to find the canonical IANA ID.
282
0
        let needle = trie_value.to_canonical();
283
0
        let Some(string) = self.iana_search(needle, string.into_owned(), stack) else {
284
0
            debug_assert!(false, "every time zone should have a canonical IANA ID");
285
0
            return None;
286
        };
287
0
        Some((Cow::Owned(string), bcp47_id))
288
0
    }
289
290
    /// Returns the canonical, normalized IANA ID of the given BCP-47 ID.
291
    ///
292
    /// This function performs a linear search over all IANA IDs. If this is problematic, consider one of the
293
    /// following functions instead:
294
    ///
295
    /// 1. [`TimeZoneIdMapperBorrowed::canonicalize_iana()`]
296
    ///    is faster if you have an IANA ID.
297
    /// 2. [`TimeZoneIdMapperWithFastCanonicalizationBorrowed::canonical_iana_from_bcp47()`]
298
    ///    is faster, but it requires loading additional data
299
    ///    (see [`TimeZoneIdMapperWithFastCanonicalization`]).
300
    ///
301
    /// Returns `None` if the BCP-47 ID is not found.
302
    ///
303
    /// # Examples
304
    ///
305
    /// ```
306
    /// use icu_timezone::TimeZoneBcp47Id;
307
    /// use icu_timezone::TimeZoneIdMapper;
308
    /// use std::borrow::Cow;
309
    /// use tinystr::tinystr;
310
    ///
311
    /// let mapper = TimeZoneIdMapper::new();
312
    /// let mapper = mapper.as_borrowed();
313
    ///
314
    /// let bcp47_id = TimeZoneBcp47Id(tinystr!(8, "inccu"));
315
    /// let result = mapper.find_canonical_iana_from_bcp47(bcp47_id).unwrap();
316
    ///
317
    /// assert_eq!(result, "Asia/Kolkata");
318
    ///
319
    /// // Unknown BCP-47 time zone ID:
320
    /// let bcp47_id = TimeZoneBcp47Id(tinystr!(8, "ussfo"));
321
    /// assert_eq!(mapper.find_canonical_iana_from_bcp47(bcp47_id), None);
322
    /// ```
323
0
    pub fn find_canonical_iana_from_bcp47(&self, bcp47_id: TimeZoneBcp47Id) -> Option<String> {
324
0
        let index = self.data.bcp47_ids.binary_search(&bcp47_id).ok()?;
325
0
        let stack = alloc::vec![(self.data.map.cursor(), 0, 0)];
326
0
        let needle = IanaTrieValue::canonical_for_index(index);
327
0
        let string = self.iana_search(needle, String::new(), stack)?;
328
0
        Some(string)
329
0
    }
330
331
    /// Queries the data for `iana_id` without recording the normalized string.
332
    /// This is a fast, no-alloc lookup.
333
0
    fn iana_lookup_quick(&self, iana_id: impl AsRef<[u8]>) -> Option<IanaTrieValue> {
334
0
        self.data.map.get(iana_id).map(IanaTrieValue)
335
0
    }
Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperBorrowed>::iana_lookup_quick::<&[u8]>
Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperBorrowed>::iana_lookup_quick::<&str>
336
337
    /// Queries the data for `iana_id` while keeping track of the normalized string.
338
    /// This is a fast lookup, but it may require allocating memory.
339
0
    fn iana_lookup_with_normalization<'l, 's>(
340
0
        &'l self,
341
0
        iana_id: &'s str,
342
0
        mut cursor_fn: impl FnMut(&ZeroAsciiIgnoreCaseTrieCursor<'l>),
343
0
    ) -> Option<(IanaTrieValue, Cow<'s, str>)> {
344
0
        let mut cursor = self.data.map.cursor();
345
0
        let mut string = Cow::Borrowed(iana_id);
346
0
        let mut i = 0;
347
0
        let trie_value = loop {
348
0
            cursor_fn(&cursor);
349
0
            let Some(&input_byte) = string.as_bytes().get(i) else {
350
0
                break cursor.take_value().map(IanaTrieValue);
351
            };
352
0
            let Some(matched_byte) = cursor.step(input_byte) else {
353
0
                break None;
354
            };
355
0
            if matched_byte != input_byte {
356
                // Safety: we write to input_byte farther down after performing safety checks.
357
0
                let Some(input_byte) = unsafe { string.to_mut().as_bytes_mut() }.get_mut(i) else {
358
0
                    debug_assert!(false, "the same index was just accessed earlier");
359
0
                    break None;
360
                };
361
0
                if !input_byte.is_ascii() {
362
0
                    debug_assert!(false, "non-ASCII input byte: {input_byte}");
363
0
                    break None;
364
0
                }
365
0
                if !matched_byte.is_ascii() {
366
0
                    debug_assert!(false, "non-ASCII matched byte: {matched_byte}");
367
0
                    break None;
368
0
                }
369
0
                // Safety: we just checked that both input_byte and matched_byte are ASCII,
370
0
                // so the buffer remains UTF-8 when we replace one with the other.
371
0
                *input_byte = matched_byte;
372
0
            }
373
0
            i += 1;
374
0
        }?;
375
0
        Some((trie_value, string))
376
0
    }
Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperBorrowed>::iana_lookup_with_normalization::<<icu_timezone::ids::TimeZoneIdMapperBorrowed>::normalize_iana::{closure#0}>
Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperBorrowed>::iana_lookup_with_normalization::<<icu_timezone::ids::TimeZoneIdMapperBorrowed>::canonicalize_iana::{closure#0}>
377
378
    /// Performs a reverse lookup by walking the trie with an optional start position.
379
    /// This is not a fast operation since it requires a linear search.
380
0
    fn iana_search(
381
0
        &self,
382
0
        needle: IanaTrieValue,
383
0
        mut string: String,
384
0
        mut stack: Vec<(ZeroAsciiIgnoreCaseTrieCursor, usize, usize)>,
385
0
    ) -> Option<String> {
386
        loop {
387
0
            let Some((mut cursor, index, suffix_len)) = stack.pop() else {
388
                // Nothing left in the trie.
389
0
                return None;
390
            };
391
            // Check to see if there is a value at the current node.
392
0
            if let Some(candidate) = cursor.take_value().map(IanaTrieValue) {
393
0
                if candidate == needle {
394
                    // Success! Found what we were looking for.
395
0
                    return Some(string);
396
0
                }
397
0
            }
398
            // Now check for children of the current node.
399
0
            let mut sub_cursor = cursor.clone();
400
0
            if let Some(probe_result) = sub_cursor.probe(index) {
401
                // Found a child. Add the current byte edge to the string.
402
0
                if !probe_result.byte.is_ascii() {
403
0
                    debug_assert!(false, "non-ASCII probe byte: {}", probe_result.byte);
404
0
                    return None;
405
0
                }
406
0
                // Safety: the byte being added is ASCII as guarded above
407
0
                unsafe { string.as_mut_vec().push(probe_result.byte) };
408
0
                // Add the child to the stack, and also add back the current
409
0
                // node if there are more siblings to visit.
410
0
                if index + 1 < probe_result.total_siblings as usize {
411
0
                    stack.push((cursor, index + 1, suffix_len));
412
0
                    stack.push((sub_cursor, 0, 1));
413
0
                } else {
414
0
                    stack.push((sub_cursor, 0, suffix_len + 1));
415
0
                }
416
            } else {
417
                // No more children. Pop this node's bytes from the string.
418
0
                for _ in 0..suffix_len {
419
                    // Safety: we check that the bytes being removed are ASCII
420
0
                    let removed_byte = unsafe { string.as_mut_vec().pop() };
421
0
                    if let Some(removed_byte) = removed_byte {
422
0
                        if !removed_byte.is_ascii() {
423
0
                            debug_assert!(false, "non-ASCII removed byte: {removed_byte}");
424
                            // If we get here for some reason, `string` is not in a valid state,
425
                            // so to be extra safe, we can clear it.
426
0
                            string.clear();
427
0
                            return None;
428
0
                        }
429
                    } else {
430
0
                        debug_assert!(false, "could not remove another byte");
431
0
                        return None;
432
                    }
433
                }
434
            }
435
        }
436
0
    }
437
}
438
439
/// A mapper that supplements [`TimeZoneIdMapper`] with about 8 KB of additional data to
440
/// improve the performance of canonical IANA ID lookup.
441
///
442
/// The data in [`TimeZoneIdMapper`] is optimized for IANA to BCP-47 lookup; the reverse
443
/// requires a linear walk over all ~600 IANA identifiers. The data added here allows for
444
/// constant-time mapping from BCP-47 to IANA.
445
#[derive(Debug, Clone)]
446
pub struct TimeZoneIdMapperWithFastCanonicalization<I> {
447
    inner: I,
448
    data: DataPayload<Bcp47ToIanaMapV1Marker>,
449
}
450
451
#[cfg(feature = "compiled_data")]
452
impl Default for TimeZoneIdMapperWithFastCanonicalization<TimeZoneIdMapper> {
453
0
    fn default() -> Self {
454
0
        Self::new()
455
0
    }
456
}
457
458
impl TimeZoneIdMapperWithFastCanonicalization<TimeZoneIdMapper> {
459
    /// Creates a new [`TimeZoneIdMapperWithFastCanonicalization`] using compiled data.
460
    ///
461
    /// See [`TimeZoneIdMapperWithFastCanonicalization`] for an example.
462
    ///
463
    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
464
    ///
465
    /// [📚 Help choosing a constructor](icu_provider::constructors)
466
    #[cfg(feature = "compiled_data")]
467
0
    pub fn new() -> Self {
468
        const _: () = assert!(
469
            crate::provider::Baked::SINGLETON_TIME_ZONE_IANA_TO_BCP47_V2.bcp47_ids_checksum
470
                == crate::provider::Baked::SINGLETON_TIME_ZONE_BCP47_TO_IANA_V1.bcp47_ids_checksum,
471
        );
472
0
        Self {
473
0
            inner: TimeZoneIdMapper {
474
0
                data: DataPayload::from_static_ref(
475
0
                    crate::provider::Baked::SINGLETON_TIME_ZONE_IANA_TO_BCP47_V2,
476
0
                ),
477
0
            },
478
0
            data: DataPayload::from_static_ref(
479
0
                crate::provider::Baked::SINGLETON_TIME_ZONE_BCP47_TO_IANA_V1,
480
0
            ),
481
0
        }
482
0
    }
483
484
    icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: TimeZoneError,
485
        #[cfg(skip)]
486
        functions: [
487
            new,
488
            try_new_with_any_provider,
489
            try_new_with_buffer_provider,
490
            try_new_unstable,
491
            Self,
492
        ]
493
    );
494
495
    #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
496
0
    pub fn try_new_unstable<P>(provider: &P) -> Result<Self, TimeZoneError>
497
0
    where
498
0
        P: DataProvider<IanaToBcp47MapV2Marker> + DataProvider<Bcp47ToIanaMapV1Marker> + ?Sized,
499
0
    {
500
0
        let mapper = TimeZoneIdMapper::try_new_unstable(provider)?;
501
0
        Self::try_new_with_mapper_unstable(provider, mapper)
502
0
    }
Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperWithFastCanonicalization<icu_timezone::ids::TimeZoneIdMapper>>::try_new_unstable::<icu_provider::any::DowncastingAnyProvider<icu_provider_adapters::empty::EmptyDataProvider>>
Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperWithFastCanonicalization<icu_timezone::ids::TimeZoneIdMapper>>::try_new_unstable::<_>
503
}
504
505
impl<I> TimeZoneIdMapperWithFastCanonicalization<I>
506
where
507
    I: AsRef<TimeZoneIdMapper>,
508
{
509
    /// Creates a new [`TimeZoneIdMapperWithFastCanonicalization`] using compiled data
510
    /// and a pre-existing [`TimeZoneIdMapper`], which can be borrowed.
511
    ///
512
    /// See [`TimeZoneIdMapperWithFastCanonicalization`] for an example.
513
    ///
514
    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
515
    ///
516
    /// [📚 Help choosing a constructor](icu_provider::constructors)
517
    #[cfg(feature = "compiled_data")]
518
0
    pub fn try_new_with_mapper(mapper: I) -> Result<Self, TimeZoneError> {
519
0
        Self {
520
0
            inner: mapper,
521
0
            data: DataPayload::from_static_ref(
522
0
                crate::provider::Baked::SINGLETON_TIME_ZONE_BCP47_TO_IANA_V1,
523
0
            ),
524
0
        }
525
0
        .validated()
526
0
    }
527
528
    icu_provider::gen_any_buffer_data_constructors!(locale: skip, mapper: I, error: TimeZoneError,
529
        #[cfg(skip)]
530
        functions: [
531
            try_new_with_mapper,
532
            try_new_with_mapper_with_any_provider,
533
            try_new_with_mapper_with_buffer_provider,
534
            try_new_with_mapper_unstable,
535
            Self,
536
        ]
537
    );
538
539
    #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
540
0
    pub fn try_new_with_mapper_unstable<P>(provider: &P, mapper: I) -> Result<Self, TimeZoneError>
541
0
    where
542
0
        P: DataProvider<IanaToBcp47MapV2Marker> + DataProvider<Bcp47ToIanaMapV1Marker> + ?Sized,
543
0
    {
544
0
        let data = provider.load(Default::default())?.take_payload()?;
545
0
        Self {
546
0
            inner: mapper,
547
0
            data,
548
0
        }
549
0
        .validated()
550
0
    }
Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperWithFastCanonicalization<icu_timezone::ids::TimeZoneIdMapper>>::try_new_with_mapper_unstable::<icu_provider::any::DowncastingAnyProvider<icu_provider_adapters::empty::EmptyDataProvider>>
Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperWithFastCanonicalization<_>>::try_new_with_mapper_unstable::<_>
551
552
0
    fn validated(self) -> Result<Self, TimeZoneError> {
553
0
        if self.inner.as_ref().data.get().bcp47_ids_checksum != self.data.get().bcp47_ids_checksum {
554
0
            return Err(TimeZoneError::MismatchedChecksums);
555
0
        }
556
0
        Ok(self)
557
0
    }
Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperWithFastCanonicalization<icu_timezone::ids::TimeZoneIdMapper>>::validated
Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperWithFastCanonicalization<_>>::validated
558
559
    /// Gets the inner [`TimeZoneIdMapper`] for performing queries.
560
0
    pub fn inner(&self) -> &TimeZoneIdMapper {
561
0
        self.inner.as_ref()
562
0
    }
563
564
    /// Returns a borrowed version of the mapper that can be queried.
565
    ///
566
    /// This avoids a small potential indirection cost when querying the mapper.
567
0
    pub fn as_borrowed(&self) -> TimeZoneIdMapperWithFastCanonicalizationBorrowed {
568
0
        TimeZoneIdMapperWithFastCanonicalizationBorrowed {
569
0
            inner: self.inner.as_ref().as_borrowed(),
570
0
            data: self.data.get(),
571
0
        }
572
0
    }
Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperWithFastCanonicalization<icu_timezone::ids::TimeZoneIdMapper>>::as_borrowed
Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperWithFastCanonicalization<_>>::as_borrowed
573
}
574
575
/// A borrowed wrapper around the time zone ID mapper, returned by
576
/// [`TimeZoneIdMapperWithFastCanonicalization::as_borrowed()`]. More efficient to query.
577
#[derive(Debug, Copy, Clone)]
578
pub struct TimeZoneIdMapperWithFastCanonicalizationBorrowed<'a> {
579
    inner: TimeZoneIdMapperBorrowed<'a>,
580
    data: &'a Bcp47ToIanaMapV1<'a>,
581
}
582
583
impl<'a> TimeZoneIdMapperWithFastCanonicalizationBorrowed<'a> {
584
    /// Gets the inner [`TimeZoneIdMapperBorrowed`] for performing queries.
585
0
    pub fn inner(&self) -> TimeZoneIdMapperBorrowed<'a> {
586
0
        self.inner
587
0
    }
588
589
    /// Returns the canonical, normalized identifier of the given IANA time zone.
590
    ///
591
    /// Also returns the BCP-47 time zone ID.
592
    ///
593
    /// This is a faster version of [`TimeZoneIdMapperBorrowed::canonicalize_iana()`]
594
    /// and it always returns borrowed IANA strings, but it requires loading additional data
595
    /// (see [`TimeZoneIdMapperWithFastCanonicalization`]).
596
    ///
597
    /// Returns `None` if the IANA ID is not found.
598
    ///
599
    /// # Examples
600
    ///
601
    /// ```
602
    /// use icu_timezone::TimeZoneBcp47Id;
603
    /// use icu_timezone::TimeZoneIdMapperWithFastCanonicalization;
604
    /// use std::borrow::Cow;
605
    ///
606
    /// let mapper = TimeZoneIdMapperWithFastCanonicalization::new();
607
    /// let mapper = mapper.as_borrowed();
608
    ///
609
    /// let result = mapper.canonicalize_iana("Asia/CALCUTTA").unwrap();
610
    ///
611
    /// // The Cow is always returned borrowed:
612
    /// assert_eq!(result.0, "Asia/Kolkata");
613
    /// assert_eq!(*result.1, "inccu");
614
    ///
615
    /// // Unknown IANA time zone ID:
616
    /// assert_eq!(mapper.canonicalize_iana("America/San_Francisco"), None);
617
    /// ```
618
0
    pub fn canonicalize_iana(&self, iana_id: &str) -> Option<(&str, TimeZoneBcp47Id)> {
619
0
        let trie_value = self.inner.iana_lookup_quick(iana_id)?;
620
0
        let Some(bcp47_id) = self.inner.data.bcp47_ids.get(trie_value.index()) else {
621
0
            debug_assert!(false, "index should be in range");
622
0
            return None;
623
        };
624
0
        let Some(string) = self.data.canonical_iana_ids.get(trie_value.index()) else {
625
0
            debug_assert!(false, "index should be in range");
626
0
            return None;
627
        };
628
0
        Some((string, bcp47_id))
629
0
    }
630
631
    /// Returns the canonical, normalized IANA ID of the given BCP-47 ID.
632
    ///
633
    /// This is a faster version of [`TimeZoneIdMapperBorrowed::find_canonical_iana_from_bcp47()`]
634
    /// and it always returns borrowed IANA strings, but it requires loading additional data
635
    /// (see [`TimeZoneIdMapperWithFastCanonicalization`]).
636
    ///
637
    /// Returns `None` if the BCP-47 ID is not found.
638
    ///
639
    /// # Examples
640
    ///
641
    /// ```
642
    /// use icu_timezone::TimeZoneBcp47Id;
643
    /// use icu_timezone::TimeZoneIdMapperWithFastCanonicalization;
644
    /// use std::borrow::Cow;
645
    /// use tinystr::tinystr;
646
    ///
647
    /// let mapper = TimeZoneIdMapperWithFastCanonicalization::new();
648
    /// let mapper = mapper.as_borrowed();
649
    ///
650
    /// let bcp47_id = TimeZoneBcp47Id(tinystr!(8, "inccu"));
651
    /// let result = mapper.canonical_iana_from_bcp47(bcp47_id).unwrap();
652
    ///
653
    /// // The Cow is always returned borrowed:
654
    /// assert_eq!(result, "Asia/Kolkata");
655
    ///
656
    /// // Unknown BCP-47 time zone ID:
657
    /// let bcp47_id = TimeZoneBcp47Id(tinystr!(8, "ussfo"));
658
    /// assert_eq!(mapper.canonical_iana_from_bcp47(bcp47_id), None);
659
    /// ```
660
0
    pub fn canonical_iana_from_bcp47(&self, bcp47_id: TimeZoneBcp47Id) -> Option<&str> {
661
0
        let index = self.inner.data.bcp47_ids.binary_search(&bcp47_id).ok()?;
662
0
        let Some(string) = self.data.canonical_iana_ids.get(index) else {
663
0
            debug_assert!(false, "index should be in range");
664
0
            return None;
665
        };
666
0
        Some(string)
667
0
    }
668
}
669
670
#[derive(Copy, Clone, PartialEq, Eq)]
671
#[repr(transparent)]
672
struct IanaTrieValue(usize);
673
674
impl IanaTrieValue {
675
    #[inline]
676
0
    pub(crate) fn to_canonical(self) -> Self {
677
0
        Self(self.0 | 1)
678
0
    }
679
    #[inline]
680
0
    pub(crate) fn canonical_for_index(index: usize) -> Self {
681
0
        Self(index << 1).to_canonical()
682
0
    }
683
    #[inline]
684
0
    pub(crate) fn index(self) -> usize {
685
0
        self.0 >> 1
686
0
    }
687
    #[inline]
688
0
    pub(crate) fn is_canonical(self) -> bool {
689
0
        (self.0 & 0x1) != 0
690
0
    }
691
}