/rust/registry/src/index.crates.io-6f17d22bba15001f/icu_timezone-1.5.0/src/ids.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // This file is part of ICU4X. For terms of use, please see the file |
2 | | // called LICENSE at the top level of the ICU4X source tree |
3 | | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | | |
5 | | use alloc::borrow::Cow; |
6 | | use alloc::string::String; |
7 | | use alloc::vec::Vec; |
8 | | use icu_provider::prelude::*; |
9 | | use zerotrie::cursor::ZeroAsciiIgnoreCaseTrieCursor; |
10 | | |
11 | | use crate::{ |
12 | | provider::names::{ |
13 | | Bcp47ToIanaMapV1, Bcp47ToIanaMapV1Marker, IanaToBcp47MapV2, IanaToBcp47MapV2Marker, |
14 | | }, |
15 | | TimeZoneBcp47Id, TimeZoneError, |
16 | | }; |
17 | | |
18 | | /// A mapper between IANA time zone identifiers and BCP-47 time zone identifiers. |
19 | | /// |
20 | | /// This mapper supports two-way mapping, but it is optimized for the case of IANA to BCP-47. |
21 | | /// It also supports normalizing and canonicalizing the IANA strings. |
22 | | /// |
23 | | /// There are approximately 600 IANA identifiers and 450 BCP-47 identifiers. |
24 | | /// |
25 | | /// BCP-47 time zone identifiers are 8 ASCII characters or less and currently |
26 | | /// average 5.1 characters long. Current IANA time zone identifiers are less than |
27 | | /// 40 ASCII characters and average 14.2 characters long. |
28 | | /// |
29 | | /// These lists grow very slowly; in a typical year, 2-3 new identifiers are added. |
30 | | /// |
31 | | /// # Normalization vs Canonicalization |
32 | | /// |
33 | | /// Multiple IANA time zone identifiers can refer to the same BCP-47 time zone. For example, the |
34 | | /// following three IANA identifiers all map to `"usind"`: |
35 | | /// |
36 | | /// - "America/Fort_Wayne" |
37 | | /// - "America/Indiana/Indianapolis" |
38 | | /// - "America/Indianapolis" |
39 | | /// - "US/East-Indiana" |
40 | | /// |
41 | | /// There is only one canonical identifier, which is "America/Indiana/Indianapolis". The |
42 | | /// *canonicalization* operation returns the canonical identifier. You should canonicalize if |
43 | | /// you need to compare time zones for equality. Note that the canonical identifier can change |
44 | | /// over time. For example, the identifier "Europe/Kiev" was renamed to the newly-added |
45 | | /// identifier "Europe/Kyiv" in 2022. |
46 | | /// |
47 | | /// The *normalization* operation, on the other hand, keeps the input identifier but normalizes |
48 | | /// the casing. For example, "AMERICA/FORT_WAYNE" normalizes to "America/Fort_Wayne". |
49 | | /// Normalization is a data-driven operation because there are no algorithmic casing rules that |
50 | | /// work for all IANA time zone identifiers. |
51 | | /// |
52 | | /// Normalization is a cheap operation, but canonicalization might be expensive, since it might |
53 | | /// require searching over all IANA IDs to find the canonicalization. If you need |
54 | | /// canonicalization that is reliably fast, use [`TimeZoneIdMapperWithFastCanonicalization`]. |
55 | | /// |
56 | | /// # Examples |
57 | | /// |
58 | | /// ``` |
59 | | /// use icu::timezone::TimeZoneIdMapper; |
60 | | /// |
61 | | /// let mapper = TimeZoneIdMapper::new(); |
62 | | /// let mapper = mapper.as_borrowed(); |
63 | | /// |
64 | | /// // The IANA zone "Australia/Melbourne" is the BCP-47 zone "aumel": |
65 | | /// assert_eq!( |
66 | | /// mapper.iana_to_bcp47("Australia/Melbourne"), |
67 | | /// Some("aumel".parse().unwrap()) |
68 | | /// ); |
69 | | /// |
70 | | /// // Lookup is ASCII-case-insensitive: |
71 | | /// assert_eq!( |
72 | | /// mapper.iana_to_bcp47("australia/melbourne"), |
73 | | /// Some("aumel".parse().unwrap()) |
74 | | /// ); |
75 | | /// |
76 | | /// // The IANA zone "Australia/Victoria" is an alias: |
77 | | /// assert_eq!( |
78 | | /// mapper.iana_to_bcp47("Australia/Victoria"), |
79 | | /// Some("aumel".parse().unwrap()) |
80 | | /// ); |
81 | | /// |
82 | | /// // We can recover the canonical identifier from the mapper: |
83 | | /// assert_eq!( |
84 | | /// mapper.canonicalize_iana("Australia/Victoria").unwrap().0, |
85 | | /// "Australia/Melbourne" |
86 | | /// ); |
87 | | /// ``` |
88 | | #[derive(Debug, Clone)] |
89 | | pub struct TimeZoneIdMapper { |
90 | | data: DataPayload<IanaToBcp47MapV2Marker>, |
91 | | } |
92 | | |
93 | | #[cfg(feature = "compiled_data")] |
94 | | impl Default for TimeZoneIdMapper { |
95 | 0 | fn default() -> Self { |
96 | 0 | Self::new() |
97 | 0 | } |
98 | | } |
99 | | |
100 | | impl TimeZoneIdMapper { |
101 | | /// Creates a new [`TimeZoneIdMapper`] using compiled data. |
102 | | /// |
103 | | /// See [`TimeZoneIdMapper`] for an example. |
104 | | /// |
105 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
106 | | /// |
107 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
108 | | #[cfg(feature = "compiled_data")] |
109 | 0 | pub fn new() -> Self { |
110 | 0 | Self { |
111 | 0 | data: DataPayload::from_static_ref( |
112 | 0 | crate::provider::Baked::SINGLETON_TIME_ZONE_IANA_TO_BCP47_V2, |
113 | 0 | ), |
114 | 0 | } |
115 | 0 | } |
116 | | |
117 | | icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: DataError, |
118 | | #[cfg(skip)] |
119 | | functions: [ |
120 | | new, |
121 | | try_new_with_any_provider, |
122 | | try_new_with_buffer_provider, |
123 | | try_new_unstable, |
124 | | Self, |
125 | | ] |
126 | | ); |
127 | | |
128 | | #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] |
129 | 0 | pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError> |
130 | 0 | where |
131 | 0 | P: DataProvider<IanaToBcp47MapV2Marker> + ?Sized, |
132 | 0 | { |
133 | 0 | let data = provider.load(Default::default())?.take_payload()?; |
134 | 0 | Ok(Self { data }) |
135 | 0 | } Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapper>::try_new_unstable::<icu_provider::any::DowncastingAnyProvider<icu_provider_adapters::empty::EmptyDataProvider>> Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapper>::try_new_unstable::<_> |
136 | | |
137 | | /// Returns a borrowed version of the mapper that can be queried. |
138 | | /// |
139 | | /// This avoids a small potential indirection cost when querying the mapper. |
140 | 0 | pub fn as_borrowed(&self) -> TimeZoneIdMapperBorrowed { |
141 | 0 | TimeZoneIdMapperBorrowed { |
142 | 0 | data: self.data.get(), |
143 | 0 | } |
144 | 0 | } |
145 | | } |
146 | | |
147 | | impl AsRef<TimeZoneIdMapper> for TimeZoneIdMapper { |
148 | | #[inline] |
149 | 0 | fn as_ref(&self) -> &TimeZoneIdMapper { |
150 | 0 | self |
151 | 0 | } Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapper as core::convert::AsRef<icu_timezone::ids::TimeZoneIdMapper>>::as_ref Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapper as core::convert::AsRef<icu_timezone::ids::TimeZoneIdMapper>>::as_ref |
152 | | } |
153 | | |
154 | | /// A borrowed wrapper around the time zone ID mapper, returned by |
155 | | /// [`TimeZoneIdMapper::as_borrowed()`]. More efficient to query. |
156 | | #[derive(Debug, Copy, Clone)] |
157 | | pub struct TimeZoneIdMapperBorrowed<'a> { |
158 | | data: &'a IanaToBcp47MapV2<'a>, |
159 | | } |
160 | | |
161 | | impl<'a> TimeZoneIdMapperBorrowed<'a> { |
162 | | /// Gets the BCP-47 time zone ID from an IANA time zone ID |
163 | | /// with a case-insensitive lookup. |
164 | | /// |
165 | | /// Returns `None` if the IANA ID is not found. |
166 | | /// |
167 | | /// # Examples |
168 | | /// |
169 | | /// ``` |
170 | | /// use icu_timezone::TimeZoneBcp47Id; |
171 | | /// use icu_timezone::TimeZoneIdMapper; |
172 | | /// |
173 | | /// let mapper = TimeZoneIdMapper::new(); |
174 | | /// let mapper = mapper.as_borrowed(); |
175 | | /// |
176 | | /// let result = mapper.iana_to_bcp47("Asia/CALCUTTA").unwrap(); |
177 | | /// |
178 | | /// assert_eq!(*result, "inccu"); |
179 | | /// |
180 | | /// // Unknown IANA time zone ID: |
181 | | /// assert_eq!(mapper.iana_to_bcp47("America/San_Francisco"), None); |
182 | | /// ``` |
183 | 0 | pub fn iana_to_bcp47(&self, iana_id: &str) -> Option<TimeZoneBcp47Id> { |
184 | 0 | self.iana_lookup_quick(iana_id) |
185 | 0 | .and_then(|trie_value| self.data.bcp47_ids.get(trie_value.index())) |
186 | 0 | } |
187 | | |
188 | | /// Same as [`Self::iana_to_bcp47()`] but works with potentially ill-formed UTF-8. |
189 | 0 | pub fn iana_bytes_to_bcp47(&self, iana_id: &[u8]) -> Option<TimeZoneBcp47Id> { |
190 | 0 | self.iana_lookup_quick(iana_id) |
191 | 0 | .and_then(|trie_value| self.data.bcp47_ids.get(trie_value.index())) |
192 | 0 | } |
193 | | |
194 | | /// Normalizes the syntax of an IANA time zone ID. |
195 | | /// |
196 | | /// Also returns the BCP-47 time zone ID. |
197 | | /// |
198 | | /// Returns `None` if the IANA ID is not found. |
199 | | /// |
200 | | /// # Examples |
201 | | /// |
202 | | /// ``` |
203 | | /// use icu_timezone::TimeZoneBcp47Id; |
204 | | /// use icu_timezone::TimeZoneIdMapper; |
205 | | /// use std::borrow::Cow; |
206 | | /// |
207 | | /// let mapper = TimeZoneIdMapper::new(); |
208 | | /// let mapper = mapper.as_borrowed(); |
209 | | /// |
210 | | /// let result = mapper.normalize_iana("Asia/CALCUTTA").unwrap(); |
211 | | /// |
212 | | /// assert_eq!(result.0, "Asia/Calcutta"); |
213 | | /// assert!(matches!(result.0, Cow::Owned(_))); |
214 | | /// assert_eq!(*result.1, "inccu"); |
215 | | /// |
216 | | /// // Borrows when able: |
217 | | /// let result = mapper.normalize_iana("America/Chicago").unwrap(); |
218 | | /// assert_eq!(result.0, "America/Chicago"); |
219 | | /// assert!(matches!(result.0, Cow::Borrowed(_))); |
220 | | /// |
221 | | /// // Unknown IANA time zone ID: |
222 | | /// assert_eq!(mapper.normalize_iana("America/San_Francisco"), None); |
223 | | /// ``` |
224 | 0 | pub fn normalize_iana<'s>(&self, iana_id: &'s str) -> Option<(Cow<'s, str>, TimeZoneBcp47Id)> { |
225 | 0 | let (trie_value, string) = self.iana_lookup_with_normalization(iana_id, |_| {})?; |
226 | 0 | let Some(bcp47_id) = self.data.bcp47_ids.get(trie_value.index()) else { |
227 | 0 | debug_assert!(false, "index should be in range"); |
228 | 0 | return None; |
229 | | }; |
230 | 0 | Some((string, bcp47_id)) |
231 | 0 | } |
232 | | |
233 | | /// Returns the canonical, normalized identifier of the given IANA time zone. |
234 | | /// |
235 | | /// Also returns the BCP-47 time zone ID. |
236 | | /// |
237 | | /// Returns `None` if the IANA ID is not found. |
238 | | /// |
239 | | /// # Examples |
240 | | /// |
241 | | /// ``` |
242 | | /// use icu_timezone::TimeZoneBcp47Id; |
243 | | /// use icu_timezone::TimeZoneIdMapper; |
244 | | /// use std::borrow::Cow; |
245 | | /// |
246 | | /// let mapper = TimeZoneIdMapper::new(); |
247 | | /// let mapper = mapper.as_borrowed(); |
248 | | /// |
249 | | /// let result = mapper.canonicalize_iana("Asia/CALCUTTA").unwrap(); |
250 | | /// |
251 | | /// assert_eq!(result.0, "Asia/Kolkata"); |
252 | | /// assert!(matches!(result.0, Cow::Owned(_))); |
253 | | /// assert_eq!(*result.1, "inccu"); |
254 | | /// |
255 | | /// // Borrows when able: |
256 | | /// let result = mapper.canonicalize_iana("America/Chicago").unwrap(); |
257 | | /// assert_eq!(result.0, "America/Chicago"); |
258 | | /// assert!(matches!(result.0, Cow::Borrowed(_))); |
259 | | /// |
260 | | /// // Unknown IANA time zone ID: |
261 | | /// assert_eq!(mapper.canonicalize_iana("America/San_Francisco"), None); |
262 | | /// ``` |
263 | 0 | pub fn canonicalize_iana<'s>( |
264 | 0 | &self, |
265 | 0 | iana_id: &'s str, |
266 | 0 | ) -> Option<(Cow<'s, str>, TimeZoneBcp47Id)> { |
267 | 0 | // Note: We collect the cursors into a stack so that we start probing |
268 | 0 | // nearby the input IANA identifier. This should improve lookup time since |
269 | 0 | // most renames share the same prefix like "Asia" or "Europe". |
270 | 0 | let mut stack = Vec::with_capacity(iana_id.len()); |
271 | 0 | let (trie_value, string) = self.iana_lookup_with_normalization(iana_id, |cursor| { |
272 | 0 | stack.push((cursor.clone(), 0, 1)); |
273 | 0 | })?; |
274 | 0 | let Some(bcp47_id) = self.data.bcp47_ids.get(trie_value.index()) else { |
275 | 0 | debug_assert!(false, "index should be in range"); |
276 | 0 | return None; |
277 | | }; |
278 | 0 | if trie_value.is_canonical() { |
279 | 0 | return Some((string, bcp47_id)); |
280 | 0 | } |
281 | 0 | // If we get here, we need to walk the trie to find the canonical IANA ID. |
282 | 0 | let needle = trie_value.to_canonical(); |
283 | 0 | let Some(string) = self.iana_search(needle, string.into_owned(), stack) else { |
284 | 0 | debug_assert!(false, "every time zone should have a canonical IANA ID"); |
285 | 0 | return None; |
286 | | }; |
287 | 0 | Some((Cow::Owned(string), bcp47_id)) |
288 | 0 | } |
289 | | |
290 | | /// Returns the canonical, normalized IANA ID of the given BCP-47 ID. |
291 | | /// |
292 | | /// This function performs a linear search over all IANA IDs. If this is problematic, consider one of the |
293 | | /// following functions instead: |
294 | | /// |
295 | | /// 1. [`TimeZoneIdMapperBorrowed::canonicalize_iana()`] |
296 | | /// is faster if you have an IANA ID. |
297 | | /// 2. [`TimeZoneIdMapperWithFastCanonicalizationBorrowed::canonical_iana_from_bcp47()`] |
298 | | /// is faster, but it requires loading additional data |
299 | | /// (see [`TimeZoneIdMapperWithFastCanonicalization`]). |
300 | | /// |
301 | | /// Returns `None` if the BCP-47 ID is not found. |
302 | | /// |
303 | | /// # Examples |
304 | | /// |
305 | | /// ``` |
306 | | /// use icu_timezone::TimeZoneBcp47Id; |
307 | | /// use icu_timezone::TimeZoneIdMapper; |
308 | | /// use std::borrow::Cow; |
309 | | /// use tinystr::tinystr; |
310 | | /// |
311 | | /// let mapper = TimeZoneIdMapper::new(); |
312 | | /// let mapper = mapper.as_borrowed(); |
313 | | /// |
314 | | /// let bcp47_id = TimeZoneBcp47Id(tinystr!(8, "inccu")); |
315 | | /// let result = mapper.find_canonical_iana_from_bcp47(bcp47_id).unwrap(); |
316 | | /// |
317 | | /// assert_eq!(result, "Asia/Kolkata"); |
318 | | /// |
319 | | /// // Unknown BCP-47 time zone ID: |
320 | | /// let bcp47_id = TimeZoneBcp47Id(tinystr!(8, "ussfo")); |
321 | | /// assert_eq!(mapper.find_canonical_iana_from_bcp47(bcp47_id), None); |
322 | | /// ``` |
323 | 0 | pub fn find_canonical_iana_from_bcp47(&self, bcp47_id: TimeZoneBcp47Id) -> Option<String> { |
324 | 0 | let index = self.data.bcp47_ids.binary_search(&bcp47_id).ok()?; |
325 | 0 | let stack = alloc::vec![(self.data.map.cursor(), 0, 0)]; |
326 | 0 | let needle = IanaTrieValue::canonical_for_index(index); |
327 | 0 | let string = self.iana_search(needle, String::new(), stack)?; |
328 | 0 | Some(string) |
329 | 0 | } |
330 | | |
331 | | /// Queries the data for `iana_id` without recording the normalized string. |
332 | | /// This is a fast, no-alloc lookup. |
333 | 0 | fn iana_lookup_quick(&self, iana_id: impl AsRef<[u8]>) -> Option<IanaTrieValue> { |
334 | 0 | self.data.map.get(iana_id).map(IanaTrieValue) |
335 | 0 | } Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperBorrowed>::iana_lookup_quick::<&[u8]> Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperBorrowed>::iana_lookup_quick::<&str> |
336 | | |
337 | | /// Queries the data for `iana_id` while keeping track of the normalized string. |
338 | | /// This is a fast lookup, but it may require allocating memory. |
339 | 0 | fn iana_lookup_with_normalization<'l, 's>( |
340 | 0 | &'l self, |
341 | 0 | iana_id: &'s str, |
342 | 0 | mut cursor_fn: impl FnMut(&ZeroAsciiIgnoreCaseTrieCursor<'l>), |
343 | 0 | ) -> Option<(IanaTrieValue, Cow<'s, str>)> { |
344 | 0 | let mut cursor = self.data.map.cursor(); |
345 | 0 | let mut string = Cow::Borrowed(iana_id); |
346 | 0 | let mut i = 0; |
347 | 0 | let trie_value = loop { |
348 | 0 | cursor_fn(&cursor); |
349 | 0 | let Some(&input_byte) = string.as_bytes().get(i) else { |
350 | 0 | break cursor.take_value().map(IanaTrieValue); |
351 | | }; |
352 | 0 | let Some(matched_byte) = cursor.step(input_byte) else { |
353 | 0 | break None; |
354 | | }; |
355 | 0 | if matched_byte != input_byte { |
356 | | // Safety: we write to input_byte farther down after performing safety checks. |
357 | 0 | let Some(input_byte) = unsafe { string.to_mut().as_bytes_mut() }.get_mut(i) else { |
358 | 0 | debug_assert!(false, "the same index was just accessed earlier"); |
359 | 0 | break None; |
360 | | }; |
361 | 0 | if !input_byte.is_ascii() { |
362 | 0 | debug_assert!(false, "non-ASCII input byte: {input_byte}"); |
363 | 0 | break None; |
364 | 0 | } |
365 | 0 | if !matched_byte.is_ascii() { |
366 | 0 | debug_assert!(false, "non-ASCII matched byte: {matched_byte}"); |
367 | 0 | break None; |
368 | 0 | } |
369 | 0 | // Safety: we just checked that both input_byte and matched_byte are ASCII, |
370 | 0 | // so the buffer remains UTF-8 when we replace one with the other. |
371 | 0 | *input_byte = matched_byte; |
372 | 0 | } |
373 | 0 | i += 1; |
374 | 0 | }?; |
375 | 0 | Some((trie_value, string)) |
376 | 0 | } Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperBorrowed>::iana_lookup_with_normalization::<<icu_timezone::ids::TimeZoneIdMapperBorrowed>::normalize_iana::{closure#0}> Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperBorrowed>::iana_lookup_with_normalization::<<icu_timezone::ids::TimeZoneIdMapperBorrowed>::canonicalize_iana::{closure#0}> |
377 | | |
378 | | /// Performs a reverse lookup by walking the trie with an optional start position. |
379 | | /// This is not a fast operation since it requires a linear search. |
380 | 0 | fn iana_search( |
381 | 0 | &self, |
382 | 0 | needle: IanaTrieValue, |
383 | 0 | mut string: String, |
384 | 0 | mut stack: Vec<(ZeroAsciiIgnoreCaseTrieCursor, usize, usize)>, |
385 | 0 | ) -> Option<String> { |
386 | | loop { |
387 | 0 | let Some((mut cursor, index, suffix_len)) = stack.pop() else { |
388 | | // Nothing left in the trie. |
389 | 0 | return None; |
390 | | }; |
391 | | // Check to see if there is a value at the current node. |
392 | 0 | if let Some(candidate) = cursor.take_value().map(IanaTrieValue) { |
393 | 0 | if candidate == needle { |
394 | | // Success! Found what we were looking for. |
395 | 0 | return Some(string); |
396 | 0 | } |
397 | 0 | } |
398 | | // Now check for children of the current node. |
399 | 0 | let mut sub_cursor = cursor.clone(); |
400 | 0 | if let Some(probe_result) = sub_cursor.probe(index) { |
401 | | // Found a child. Add the current byte edge to the string. |
402 | 0 | if !probe_result.byte.is_ascii() { |
403 | 0 | debug_assert!(false, "non-ASCII probe byte: {}", probe_result.byte); |
404 | 0 | return None; |
405 | 0 | } |
406 | 0 | // Safety: the byte being added is ASCII as guarded above |
407 | 0 | unsafe { string.as_mut_vec().push(probe_result.byte) }; |
408 | 0 | // Add the child to the stack, and also add back the current |
409 | 0 | // node if there are more siblings to visit. |
410 | 0 | if index + 1 < probe_result.total_siblings as usize { |
411 | 0 | stack.push((cursor, index + 1, suffix_len)); |
412 | 0 | stack.push((sub_cursor, 0, 1)); |
413 | 0 | } else { |
414 | 0 | stack.push((sub_cursor, 0, suffix_len + 1)); |
415 | 0 | } |
416 | | } else { |
417 | | // No more children. Pop this node's bytes from the string. |
418 | 0 | for _ in 0..suffix_len { |
419 | | // Safety: we check that the bytes being removed are ASCII |
420 | 0 | let removed_byte = unsafe { string.as_mut_vec().pop() }; |
421 | 0 | if let Some(removed_byte) = removed_byte { |
422 | 0 | if !removed_byte.is_ascii() { |
423 | 0 | debug_assert!(false, "non-ASCII removed byte: {removed_byte}"); |
424 | | // If we get here for some reason, `string` is not in a valid state, |
425 | | // so to be extra safe, we can clear it. |
426 | 0 | string.clear(); |
427 | 0 | return None; |
428 | 0 | } |
429 | | } else { |
430 | 0 | debug_assert!(false, "could not remove another byte"); |
431 | 0 | return None; |
432 | | } |
433 | | } |
434 | | } |
435 | | } |
436 | 0 | } |
437 | | } |
438 | | |
439 | | /// A mapper that supplements [`TimeZoneIdMapper`] with about 8 KB of additional data to |
440 | | /// improve the performance of canonical IANA ID lookup. |
441 | | /// |
442 | | /// The data in [`TimeZoneIdMapper`] is optimized for IANA to BCP-47 lookup; the reverse |
443 | | /// requires a linear walk over all ~600 IANA identifiers. The data added here allows for |
444 | | /// constant-time mapping from BCP-47 to IANA. |
445 | | #[derive(Debug, Clone)] |
446 | | pub struct TimeZoneIdMapperWithFastCanonicalization<I> { |
447 | | inner: I, |
448 | | data: DataPayload<Bcp47ToIanaMapV1Marker>, |
449 | | } |
450 | | |
451 | | #[cfg(feature = "compiled_data")] |
452 | | impl Default for TimeZoneIdMapperWithFastCanonicalization<TimeZoneIdMapper> { |
453 | 0 | fn default() -> Self { |
454 | 0 | Self::new() |
455 | 0 | } |
456 | | } |
457 | | |
458 | | impl TimeZoneIdMapperWithFastCanonicalization<TimeZoneIdMapper> { |
459 | | /// Creates a new [`TimeZoneIdMapperWithFastCanonicalization`] using compiled data. |
460 | | /// |
461 | | /// See [`TimeZoneIdMapperWithFastCanonicalization`] for an example. |
462 | | /// |
463 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
464 | | /// |
465 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
466 | | #[cfg(feature = "compiled_data")] |
467 | 0 | pub fn new() -> Self { |
468 | | const _: () = assert!( |
469 | | crate::provider::Baked::SINGLETON_TIME_ZONE_IANA_TO_BCP47_V2.bcp47_ids_checksum |
470 | | == crate::provider::Baked::SINGLETON_TIME_ZONE_BCP47_TO_IANA_V1.bcp47_ids_checksum, |
471 | | ); |
472 | 0 | Self { |
473 | 0 | inner: TimeZoneIdMapper { |
474 | 0 | data: DataPayload::from_static_ref( |
475 | 0 | crate::provider::Baked::SINGLETON_TIME_ZONE_IANA_TO_BCP47_V2, |
476 | 0 | ), |
477 | 0 | }, |
478 | 0 | data: DataPayload::from_static_ref( |
479 | 0 | crate::provider::Baked::SINGLETON_TIME_ZONE_BCP47_TO_IANA_V1, |
480 | 0 | ), |
481 | 0 | } |
482 | 0 | } |
483 | | |
484 | | icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: TimeZoneError, |
485 | | #[cfg(skip)] |
486 | | functions: [ |
487 | | new, |
488 | | try_new_with_any_provider, |
489 | | try_new_with_buffer_provider, |
490 | | try_new_unstable, |
491 | | Self, |
492 | | ] |
493 | | ); |
494 | | |
495 | | #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] |
496 | 0 | pub fn try_new_unstable<P>(provider: &P) -> Result<Self, TimeZoneError> |
497 | 0 | where |
498 | 0 | P: DataProvider<IanaToBcp47MapV2Marker> + DataProvider<Bcp47ToIanaMapV1Marker> + ?Sized, |
499 | 0 | { |
500 | 0 | let mapper = TimeZoneIdMapper::try_new_unstable(provider)?; |
501 | 0 | Self::try_new_with_mapper_unstable(provider, mapper) |
502 | 0 | } Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperWithFastCanonicalization<icu_timezone::ids::TimeZoneIdMapper>>::try_new_unstable::<icu_provider::any::DowncastingAnyProvider<icu_provider_adapters::empty::EmptyDataProvider>> Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperWithFastCanonicalization<icu_timezone::ids::TimeZoneIdMapper>>::try_new_unstable::<_> |
503 | | } |
504 | | |
505 | | impl<I> TimeZoneIdMapperWithFastCanonicalization<I> |
506 | | where |
507 | | I: AsRef<TimeZoneIdMapper>, |
508 | | { |
509 | | /// Creates a new [`TimeZoneIdMapperWithFastCanonicalization`] using compiled data |
510 | | /// and a pre-existing [`TimeZoneIdMapper`], which can be borrowed. |
511 | | /// |
512 | | /// See [`TimeZoneIdMapperWithFastCanonicalization`] for an example. |
513 | | /// |
514 | | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
515 | | /// |
516 | | /// [📚 Help choosing a constructor](icu_provider::constructors) |
517 | | #[cfg(feature = "compiled_data")] |
518 | 0 | pub fn try_new_with_mapper(mapper: I) -> Result<Self, TimeZoneError> { |
519 | 0 | Self { |
520 | 0 | inner: mapper, |
521 | 0 | data: DataPayload::from_static_ref( |
522 | 0 | crate::provider::Baked::SINGLETON_TIME_ZONE_BCP47_TO_IANA_V1, |
523 | 0 | ), |
524 | 0 | } |
525 | 0 | .validated() |
526 | 0 | } |
527 | | |
528 | | icu_provider::gen_any_buffer_data_constructors!(locale: skip, mapper: I, error: TimeZoneError, |
529 | | #[cfg(skip)] |
530 | | functions: [ |
531 | | try_new_with_mapper, |
532 | | try_new_with_mapper_with_any_provider, |
533 | | try_new_with_mapper_with_buffer_provider, |
534 | | try_new_with_mapper_unstable, |
535 | | Self, |
536 | | ] |
537 | | ); |
538 | | |
539 | | #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] |
540 | 0 | pub fn try_new_with_mapper_unstable<P>(provider: &P, mapper: I) -> Result<Self, TimeZoneError> |
541 | 0 | where |
542 | 0 | P: DataProvider<IanaToBcp47MapV2Marker> + DataProvider<Bcp47ToIanaMapV1Marker> + ?Sized, |
543 | 0 | { |
544 | 0 | let data = provider.load(Default::default())?.take_payload()?; |
545 | 0 | Self { |
546 | 0 | inner: mapper, |
547 | 0 | data, |
548 | 0 | } |
549 | 0 | .validated() |
550 | 0 | } Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperWithFastCanonicalization<icu_timezone::ids::TimeZoneIdMapper>>::try_new_with_mapper_unstable::<icu_provider::any::DowncastingAnyProvider<icu_provider_adapters::empty::EmptyDataProvider>> Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperWithFastCanonicalization<_>>::try_new_with_mapper_unstable::<_> |
551 | | |
552 | 0 | fn validated(self) -> Result<Self, TimeZoneError> { |
553 | 0 | if self.inner.as_ref().data.get().bcp47_ids_checksum != self.data.get().bcp47_ids_checksum { |
554 | 0 | return Err(TimeZoneError::MismatchedChecksums); |
555 | 0 | } |
556 | 0 | Ok(self) |
557 | 0 | } Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperWithFastCanonicalization<icu_timezone::ids::TimeZoneIdMapper>>::validated Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperWithFastCanonicalization<_>>::validated |
558 | | |
559 | | /// Gets the inner [`TimeZoneIdMapper`] for performing queries. |
560 | 0 | pub fn inner(&self) -> &TimeZoneIdMapper { |
561 | 0 | self.inner.as_ref() |
562 | 0 | } |
563 | | |
564 | | /// Returns a borrowed version of the mapper that can be queried. |
565 | | /// |
566 | | /// This avoids a small potential indirection cost when querying the mapper. |
567 | 0 | pub fn as_borrowed(&self) -> TimeZoneIdMapperWithFastCanonicalizationBorrowed { |
568 | 0 | TimeZoneIdMapperWithFastCanonicalizationBorrowed { |
569 | 0 | inner: self.inner.as_ref().as_borrowed(), |
570 | 0 | data: self.data.get(), |
571 | 0 | } |
572 | 0 | } Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperWithFastCanonicalization<icu_timezone::ids::TimeZoneIdMapper>>::as_borrowed Unexecuted instantiation: <icu_timezone::ids::TimeZoneIdMapperWithFastCanonicalization<_>>::as_borrowed |
573 | | } |
574 | | |
575 | | /// A borrowed wrapper around the time zone ID mapper, returned by |
576 | | /// [`TimeZoneIdMapperWithFastCanonicalization::as_borrowed()`]. More efficient to query. |
577 | | #[derive(Debug, Copy, Clone)] |
578 | | pub struct TimeZoneIdMapperWithFastCanonicalizationBorrowed<'a> { |
579 | | inner: TimeZoneIdMapperBorrowed<'a>, |
580 | | data: &'a Bcp47ToIanaMapV1<'a>, |
581 | | } |
582 | | |
583 | | impl<'a> TimeZoneIdMapperWithFastCanonicalizationBorrowed<'a> { |
584 | | /// Gets the inner [`TimeZoneIdMapperBorrowed`] for performing queries. |
585 | 0 | pub fn inner(&self) -> TimeZoneIdMapperBorrowed<'a> { |
586 | 0 | self.inner |
587 | 0 | } |
588 | | |
589 | | /// Returns the canonical, normalized identifier of the given IANA time zone. |
590 | | /// |
591 | | /// Also returns the BCP-47 time zone ID. |
592 | | /// |
593 | | /// This is a faster version of [`TimeZoneIdMapperBorrowed::canonicalize_iana()`] |
594 | | /// and it always returns borrowed IANA strings, but it requires loading additional data |
595 | | /// (see [`TimeZoneIdMapperWithFastCanonicalization`]). |
596 | | /// |
597 | | /// Returns `None` if the IANA ID is not found. |
598 | | /// |
599 | | /// # Examples |
600 | | /// |
601 | | /// ``` |
602 | | /// use icu_timezone::TimeZoneBcp47Id; |
603 | | /// use icu_timezone::TimeZoneIdMapperWithFastCanonicalization; |
604 | | /// use std::borrow::Cow; |
605 | | /// |
606 | | /// let mapper = TimeZoneIdMapperWithFastCanonicalization::new(); |
607 | | /// let mapper = mapper.as_borrowed(); |
608 | | /// |
609 | | /// let result = mapper.canonicalize_iana("Asia/CALCUTTA").unwrap(); |
610 | | /// |
611 | | /// // The Cow is always returned borrowed: |
612 | | /// assert_eq!(result.0, "Asia/Kolkata"); |
613 | | /// assert_eq!(*result.1, "inccu"); |
614 | | /// |
615 | | /// // Unknown IANA time zone ID: |
616 | | /// assert_eq!(mapper.canonicalize_iana("America/San_Francisco"), None); |
617 | | /// ``` |
618 | 0 | pub fn canonicalize_iana(&self, iana_id: &str) -> Option<(&str, TimeZoneBcp47Id)> { |
619 | 0 | let trie_value = self.inner.iana_lookup_quick(iana_id)?; |
620 | 0 | let Some(bcp47_id) = self.inner.data.bcp47_ids.get(trie_value.index()) else { |
621 | 0 | debug_assert!(false, "index should be in range"); |
622 | 0 | return None; |
623 | | }; |
624 | 0 | let Some(string) = self.data.canonical_iana_ids.get(trie_value.index()) else { |
625 | 0 | debug_assert!(false, "index should be in range"); |
626 | 0 | return None; |
627 | | }; |
628 | 0 | Some((string, bcp47_id)) |
629 | 0 | } |
630 | | |
631 | | /// Returns the canonical, normalized IANA ID of the given BCP-47 ID. |
632 | | /// |
633 | | /// This is a faster version of [`TimeZoneIdMapperBorrowed::find_canonical_iana_from_bcp47()`] |
634 | | /// and it always returns borrowed IANA strings, but it requires loading additional data |
635 | | /// (see [`TimeZoneIdMapperWithFastCanonicalization`]). |
636 | | /// |
637 | | /// Returns `None` if the BCP-47 ID is not found. |
638 | | /// |
639 | | /// # Examples |
640 | | /// |
641 | | /// ``` |
642 | | /// use icu_timezone::TimeZoneBcp47Id; |
643 | | /// use icu_timezone::TimeZoneIdMapperWithFastCanonicalization; |
644 | | /// use std::borrow::Cow; |
645 | | /// use tinystr::tinystr; |
646 | | /// |
647 | | /// let mapper = TimeZoneIdMapperWithFastCanonicalization::new(); |
648 | | /// let mapper = mapper.as_borrowed(); |
649 | | /// |
650 | | /// let bcp47_id = TimeZoneBcp47Id(tinystr!(8, "inccu")); |
651 | | /// let result = mapper.canonical_iana_from_bcp47(bcp47_id).unwrap(); |
652 | | /// |
653 | | /// // The Cow is always returned borrowed: |
654 | | /// assert_eq!(result, "Asia/Kolkata"); |
655 | | /// |
656 | | /// // Unknown BCP-47 time zone ID: |
657 | | /// let bcp47_id = TimeZoneBcp47Id(tinystr!(8, "ussfo")); |
658 | | /// assert_eq!(mapper.canonical_iana_from_bcp47(bcp47_id), None); |
659 | | /// ``` |
660 | 0 | pub fn canonical_iana_from_bcp47(&self, bcp47_id: TimeZoneBcp47Id) -> Option<&str> { |
661 | 0 | let index = self.inner.data.bcp47_ids.binary_search(&bcp47_id).ok()?; |
662 | 0 | let Some(string) = self.data.canonical_iana_ids.get(index) else { |
663 | 0 | debug_assert!(false, "index should be in range"); |
664 | 0 | return None; |
665 | | }; |
666 | 0 | Some(string) |
667 | 0 | } |
668 | | } |
669 | | |
670 | | #[derive(Copy, Clone, PartialEq, Eq)] |
671 | | #[repr(transparent)] |
672 | | struct IanaTrieValue(usize); |
673 | | |
674 | | impl IanaTrieValue { |
675 | | #[inline] |
676 | 0 | pub(crate) fn to_canonical(self) -> Self { |
677 | 0 | Self(self.0 | 1) |
678 | 0 | } |
679 | | #[inline] |
680 | 0 | pub(crate) fn canonical_for_index(index: usize) -> Self { |
681 | 0 | Self(index << 1).to_canonical() |
682 | 0 | } |
683 | | #[inline] |
684 | 0 | pub(crate) fn index(self) -> usize { |
685 | 0 | self.0 >> 1 |
686 | 0 | } |
687 | | #[inline] |
688 | 0 | pub(crate) fn is_canonical(self) -> bool { |
689 | 0 | (self.0 & 0x1) != 0 |
690 | 0 | } |
691 | | } |