Coverage Report

Created: 2025-12-08 06:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/jiff-0.2.16/src/shared/tzif.rs
Line
Count
Source
1
use alloc::{string::String, vec};
2
3
use super::{
4
    util::{
5
        array_str::Abbreviation,
6
        error::{err, Error},
7
        escape::{Byte, Bytes},
8
        itime::{IOffset, ITimestamp},
9
    },
10
    PosixTimeZone, TzifDateTime, TzifFixed, TzifIndicator, TzifLocalTimeType,
11
    TzifOwned, TzifTransitionInfo, TzifTransitionKind, TzifTransitions,
12
    TzifTransitionsOwned,
13
};
14
15
// These are Jiff min and max timestamp (in seconds) values.
16
//
17
// The TZif parser will clamp timestamps to this range. It's
18
// not ideal, but Jiff can't handle values outside of this range
19
// and completely refusing to use TZif data with pathological
20
// timestamps in typically irrelevant transitions is bad juju.
21
//
22
// Ref: https://github.com/BurntSushi/jiff/issues/163
23
// Ref: https://github.com/BurntSushi/jiff/pull/164
24
const TIMESTAMP_MIN: i64 = -377705023201;
25
const TIMESTAMP_MAX: i64 = 253402207200;
26
27
// Similarly for offsets, although in this case, if we find
28
// an offset outside of this range, we do actually error. This
29
// is because it could result in true incorrect datetimes for
30
// actual transitions.
31
//
32
// But our supported offset range is `-25:59:59..=+25:59:59`.
33
// There's no real time zone with offsets even close to those
34
// boundaries.
35
//
36
// If there is pathological data that we should ignore, then
37
// we should wait for a real bug report in order to determine
38
// the right way to ignore/clamp it.
39
const OFFSET_MIN: i32 = -93599;
40
const OFFSET_MAX: i32 = 93599;
41
42
// When fattening TZif data, this is the year to go up to.
43
//
44
// This year was chosen because it's what the "fat" TZif data generated
45
// by `zic` uses.
46
const FATTEN_UP_TO_YEAR: i16 = 2038;
47
48
// This is a "sanity" limit on the maximum number of transitions we'll
49
// add to TZif data when fattening them up.
50
//
51
// This is mostly just a defense-in-depth limit to avoid weird cases
52
// where a pathological POSIX time zone could be defined to create
53
// many transitions. It's not clear that this is actually possible,
54
// but I felt a little uneasy doing unbounded work that isn't linearly
55
// proportional to the input data. So, this limit is put into place for
56
// reasons of "good sense."
57
//
58
// For "normal" cases, there should be at most two transitions per
59
// year. So this limit permits 300/2=150 years of transition data.
60
// (Although we won't go above 2036. See above.)
61
const FATTEN_MAX_TRANSITIONS: usize = 300;
62
63
impl TzifOwned {
64
    /// Parses the given data as a TZif formatted file.
65
    ///
66
    /// The name given is attached to the `Tzif` value returned, but is
67
    /// otherwise not significant.
68
    ///
69
    /// If the given data is not recognized to be valid TZif, then an error is
70
    /// returned.
71
    ///
72
    /// In general, callers may assume that it is safe to pass arbitrary or
73
    /// even untrusted data to this function and count on it not panicking
74
    /// or using resources that aren't limited to a small constant factor of
75
    /// the size of the data itself. That is, callers can reliably limit the
76
    /// resources used by limiting the size of the data given to this parse
77
    /// function.
78
0
    pub(crate) fn parse(
79
0
        name: Option<String>,
80
0
        bytes: &[u8],
81
0
    ) -> Result<TzifOwned, Error> {
82
0
        let original = bytes;
83
0
        let name = name.into();
84
0
        let (header32, rest) = Header::parse(4, bytes)
85
0
            .map_err(|e| err!("failed to parse 32-bit header: {e}"))?;
86
0
        let (mut tzif, rest) = if header32.version == 0 {
87
0
            TzifOwned::parse32(name, header32, rest)?
88
        } else {
89
0
            TzifOwned::parse64(name, header32, rest)?
90
        };
91
0
        tzif.fatten();
92
        // This should come after fattening, because fattening may add new
93
        // transitions and we want to add civil datetimes to those.
94
0
        tzif.add_civil_datetimes_to_transitions();
95
0
        tzif.verify_posix_time_zone_consistency()?;
96
        // Compute the checksum using the entire contents of the TZif data.
97
0
        let tzif_raw_len = (rest.as_ptr() as usize)
98
0
            .checked_sub(original.as_ptr() as usize)
99
0
            .unwrap();
100
0
        let tzif_raw_bytes = &original[..tzif_raw_len];
101
0
        tzif.fixed.checksum = super::crc32::sum(tzif_raw_bytes);
102
103
        // Shrink all of our allocs so we don't keep excess capacity around.
104
0
        tzif.fixed.designations.shrink_to_fit();
105
0
        tzif.types.shrink_to_fit();
106
0
        tzif.transitions.timestamps.shrink_to_fit();
107
0
        tzif.transitions.civil_starts.shrink_to_fit();
108
0
        tzif.transitions.civil_ends.shrink_to_fit();
109
0
        tzif.transitions.infos.shrink_to_fit();
110
111
0
        Ok(tzif)
112
0
    }
113
114
0
    fn parse32<'b>(
115
0
        name: Option<String>,
116
0
        header32: Header,
117
0
        bytes: &'b [u8],
118
0
    ) -> Result<(TzifOwned, &'b [u8]), Error> {
119
0
        let mut tzif = TzifOwned {
120
0
            fixed: TzifFixed {
121
0
                name,
122
0
                version: header32.version,
123
0
                // filled in later
124
0
                checksum: 0,
125
0
                designations: String::new(),
126
0
                posix_tz: None,
127
0
            },
128
0
            types: vec![],
129
0
            transitions: TzifTransitions {
130
0
                timestamps: vec![],
131
0
                civil_starts: vec![],
132
0
                civil_ends: vec![],
133
0
                infos: vec![],
134
0
            },
135
0
        };
136
0
        let rest = tzif.parse_transitions(&header32, bytes)?;
137
0
        let rest = tzif.parse_transition_types(&header32, rest)?;
138
0
        let rest = tzif.parse_local_time_types(&header32, rest)?;
139
0
        let rest = tzif.parse_time_zone_designations(&header32, rest)?;
140
0
        let rest = tzif.parse_leap_seconds(&header32, rest)?;
141
0
        let rest = tzif.parse_indicators(&header32, rest)?;
142
0
        Ok((tzif, rest))
143
0
    }
144
145
0
    fn parse64<'b>(
146
0
        name: Option<String>,
147
0
        header32: Header,
148
0
        bytes: &'b [u8],
149
0
    ) -> Result<(TzifOwned, &'b [u8]), Error> {
150
0
        let (_, rest) = try_split_at(
151
            "V1 TZif data block",
152
0
            bytes,
153
0
            header32.data_block_len()?,
154
0
        )?;
155
0
        let (header64, rest) = Header::parse(8, rest)
156
0
            .map_err(|e| err!("failed to parse 64-bit header: {e}"))?;
157
0
        let mut tzif = TzifOwned {
158
0
            fixed: TzifFixed {
159
0
                name,
160
0
                version: header64.version,
161
0
                // filled in later
162
0
                checksum: 0,
163
0
                designations: String::new(),
164
0
                posix_tz: None,
165
0
            },
166
0
            types: vec![],
167
0
            transitions: TzifTransitions {
168
0
                timestamps: vec![],
169
0
                civil_starts: vec![],
170
0
                civil_ends: vec![],
171
0
                infos: vec![],
172
0
            },
173
0
        };
174
0
        let rest = tzif.parse_transitions(&header64, rest)?;
175
0
        let rest = tzif.parse_transition_types(&header64, rest)?;
176
0
        let rest = tzif.parse_local_time_types(&header64, rest)?;
177
0
        let rest = tzif.parse_time_zone_designations(&header64, rest)?;
178
0
        let rest = tzif.parse_leap_seconds(&header64, rest)?;
179
0
        let rest = tzif.parse_indicators(&header64, rest)?;
180
0
        let rest = tzif.parse_footer(&header64, rest)?;
181
        // Note that we don't check that the TZif data is fully valid. It is
182
        // possible for it to contain superfluous information. For example, a
183
        // non-zero local time type that is never referenced by a transition.
184
0
        Ok((tzif, rest))
185
0
    }
186
187
0
    fn parse_transitions<'b>(
188
0
        &mut self,
189
0
        header: &Header,
190
0
        bytes: &'b [u8],
191
0
    ) -> Result<&'b [u8], Error> {
192
0
        let (bytes, rest) = try_split_at(
193
            "transition times data block",
194
0
            bytes,
195
0
            header.transition_times_len()?,
196
0
        )?;
197
0
        let mut it = bytes.chunks_exact(header.time_size);
198
        // RFC 8536 says: "If there are no transitions, local time for all
199
        // timestamps is specified by the TZ string in the footer if present
200
        // and nonempty; otherwise, it is specified by time type 0."
201
        //
202
        // RFC 8536 also says: "Local time for timestamps before the first
203
        // transition is specified by the first time type (time type
204
        // 0)."
205
        //
206
        // So if there are no transitions, pushing this dummy one will result
207
        // in the desired behavior even when it's the only transition.
208
        // Similarly, since this is the minimum timestamp value, it will
209
        // trigger for any times before the first transition found in the TZif
210
        // data.
211
0
        self.transitions.add_with_type_index(TIMESTAMP_MIN, 0);
212
0
        while let Some(chunk) = it.next() {
213
0
            let mut timestamp = if header.is_32bit() {
214
0
                i64::from(from_be_bytes_i32(chunk))
215
            } else {
216
0
                from_be_bytes_i64(chunk)
217
            };
218
0
            if !(TIMESTAMP_MIN <= timestamp && timestamp <= TIMESTAMP_MAX) {
219
0
                // We really shouldn't error here just because the Unix
220
0
                // timestamp is outside what Jiff supports. Since what Jiff
221
0
                // supports is _somewhat_ arbitrary. But Jiff's supported
222
0
                // range is good enough for all realistic purposes, so we
223
0
                // just clamp an out-of-range Unix timestamp to the Jiff
224
0
                // min or max value.
225
0
                //
226
0
                // This can't result in the sorting order being wrong, but
227
0
                // it can result in a transition that is duplicative with
228
0
                // the dummy transition we inserted above. This should be
229
0
                // fine.
230
0
                let clamped = timestamp.clamp(TIMESTAMP_MIN, TIMESTAMP_MAX);
231
0
                // only-jiff-start
232
0
                warn!(
233
0
                    "found Unix timestamp {timestamp} that is outside \
234
0
                     Jiff's supported range, clamping to {clamped}",
235
0
                );
236
0
                // only-jiff-end
237
0
                timestamp = clamped;
238
0
            }
239
0
            self.transitions.add(timestamp);
240
        }
241
0
        assert!(it.remainder().is_empty());
242
0
        Ok(rest)
243
0
    }
244
245
0
    fn parse_transition_types<'b>(
246
0
        &mut self,
247
0
        header: &Header,
248
0
        bytes: &'b [u8],
249
0
    ) -> Result<&'b [u8], Error> {
250
0
        let (bytes, rest) = try_split_at(
251
            "transition types data block",
252
0
            bytes,
253
0
            header.transition_types_len()?,
254
0
        )?;
255
        // We skip the first transition because it is our minimum dummy
256
        // transition.
257
0
        for (transition_index, &type_index) in (1..).zip(bytes) {
258
0
            if usize::from(type_index) >= header.tzh_typecnt {
259
0
                return Err(err!(
260
0
                    "found transition type index {type_index},
261
0
                     but there are only {} local time types",
262
0
                    header.tzh_typecnt,
263
0
                ));
264
0
            }
265
0
            self.transitions.infos[transition_index].type_index = type_index;
266
        }
267
0
        Ok(rest)
268
0
    }
269
270
0
    fn parse_local_time_types<'b>(
271
0
        &mut self,
272
0
        header: &Header,
273
0
        bytes: &'b [u8],
274
0
    ) -> Result<&'b [u8], Error> {
275
0
        let (bytes, rest) = try_split_at(
276
            "local time types data block",
277
0
            bytes,
278
0
            header.local_time_types_len()?,
279
0
        )?;
280
0
        let mut it = bytes.chunks_exact(6);
281
0
        while let Some(chunk) = it.next() {
282
0
            let offset = from_be_bytes_i32(&chunk[..4]);
283
0
            if !(OFFSET_MIN <= offset && offset <= OFFSET_MAX) {
284
0
                return Err(err!(
285
0
                    "found local time type with out-of-bounds offset: {offset}"
286
0
                ));
287
0
            }
288
0
            let is_dst = chunk[4] == 1;
289
0
            let designation = (chunk[5], chunk[5]);
290
0
            self.types.push(TzifLocalTimeType {
291
0
                offset,
292
0
                is_dst,
293
0
                designation,
294
0
                indicator: TzifIndicator::LocalWall,
295
0
            });
296
        }
297
0
        assert!(it.remainder().is_empty());
298
0
        Ok(rest)
299
0
    }
300
301
0
    fn parse_time_zone_designations<'b>(
302
0
        &mut self,
303
0
        header: &Header,
304
0
        bytes: &'b [u8],
305
0
    ) -> Result<&'b [u8], Error> {
306
0
        let (bytes, rest) = try_split_at(
307
            "time zone designations data block",
308
0
            bytes,
309
0
            header.time_zone_designations_len()?,
310
0
        )?;
311
0
        self.fixed.designations =
312
0
            String::from_utf8(bytes.to_vec()).map_err(|_| {
313
0
                err!(
314
0
                    "time zone designations are not valid UTF-8: {:?}",
315
0
                    Bytes(bytes),
316
                )
317
0
            })?;
318
        // Holy hell, this is brutal. The boundary conditions are crazy.
319
0
        for (i, typ) in self.types.iter_mut().enumerate() {
320
0
            let start = usize::from(typ.designation.0);
321
0
            let Some(suffix) = self.fixed.designations.get(start..) else {
322
0
                return Err(err!(
323
0
                    "local time type {i} has designation index of {start}, \
324
0
                     but cannot be more than {}",
325
0
                    self.fixed.designations.len(),
326
0
                ));
327
            };
328
0
            let Some(len) = suffix.find('\x00') else {
329
0
                return Err(err!(
330
0
                    "local time type {i} has designation index of {start}, \
331
0
                     but could not find NUL terminator after it in \
332
0
                     designations: {:?}",
333
0
                    self.fixed.designations,
334
0
                ));
335
            };
336
0
            let Some(end) = start.checked_add(len) else {
337
0
                return Err(err!(
338
0
                    "local time type {i} has designation index of {start}, \
339
0
                     but its length {len} is too big",
340
0
                ));
341
            };
342
0
            typ.designation.1 = u8::try_from(end).map_err(|_| {
343
0
                err!(
344
0
                    "local time type {i} has designation range of \
345
0
                     {start}..{end}, but end is too big",
346
                )
347
0
            })?;
348
        }
349
0
        Ok(rest)
350
0
    }
351
352
    /// This parses the leap second corrections in the TZif data.
353
    ///
354
    /// Note that we only parse and verify them. We don't actually use them.
355
    /// Jiff effectively ignores leap seconds.
356
0
    fn parse_leap_seconds<'b>(
357
0
        &mut self,
358
0
        header: &Header,
359
0
        bytes: &'b [u8],
360
0
    ) -> Result<&'b [u8], Error> {
361
0
        let (bytes, rest) = try_split_at(
362
            "leap seconds data block",
363
0
            bytes,
364
0
            header.leap_second_len()?,
365
0
        )?;
366
0
        let chunk_len = header
367
0
            .time_size
368
0
            .checked_add(4)
369
0
            .expect("time_size plus 4 fits in usize");
370
0
        let mut it = bytes.chunks_exact(chunk_len);
371
0
        while let Some(chunk) = it.next() {
372
0
            let (occur_bytes, _corr_bytes) = chunk.split_at(header.time_size);
373
0
            let occur = if header.is_32bit() {
374
0
                i64::from(from_be_bytes_i32(occur_bytes))
375
            } else {
376
0
                from_be_bytes_i64(occur_bytes)
377
            };
378
0
            if !(TIMESTAMP_MIN <= occur && occur <= TIMESTAMP_MAX) {
379
0
                // only-jiff-start
380
0
                warn!(
381
0
                    "leap second occurrence {occur} is \
382
0
                     not in Jiff's supported range"
383
0
                )
384
0
                // only-jiff-end
385
0
            }
386
        }
387
0
        assert!(it.remainder().is_empty());
388
0
        Ok(rest)
389
0
    }
390
391
0
    fn parse_indicators<'b>(
392
0
        &mut self,
393
0
        header: &Header,
394
0
        bytes: &'b [u8],
395
0
    ) -> Result<&'b [u8], Error> {
396
0
        let (std_wall_bytes, rest) = try_split_at(
397
            "standard/wall indicators data block",
398
0
            bytes,
399
0
            header.standard_wall_len()?,
400
0
        )?;
401
0
        let (ut_local_bytes, rest) = try_split_at(
402
            "UT/local indicators data block",
403
0
            rest,
404
0
            header.ut_local_len()?,
405
0
        )?;
406
0
        if std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() {
407
            // This is a weird case, but technically possible only if all
408
            // UT/local indicators are 0. If any are 1, then it's an error,
409
            // because it would require the corresponding std/wall indicator
410
            // to be 1 too. Which it can't be, because there aren't any. So
411
            // we just check that they're all zeros.
412
0
            for (i, &byte) in ut_local_bytes.iter().enumerate() {
413
0
                if byte != 0 {
414
0
                    return Err(err!(
415
0
                        "found UT/local indicator '{byte}' for local time \
416
0
                         type {i}, but it must be 0 since all std/wall \
417
0
                         indicators are 0",
418
0
                    ));
419
0
                }
420
            }
421
0
        } else if !std_wall_bytes.is_empty() && ut_local_bytes.is_empty() {
422
0
            for (i, &byte) in std_wall_bytes.iter().enumerate() {
423
                // Indexing is OK because Header guarantees that the number of
424
                // indicators is 0 or equal to the number of types.
425
0
                self.types[i].indicator = if byte == 0 {
426
0
                    TzifIndicator::LocalWall
427
0
                } else if byte == 1 {
428
0
                    TzifIndicator::LocalStandard
429
                } else {
430
0
                    return Err(err!(
431
0
                        "found invalid std/wall indicator '{byte}' for \
432
0
                         local time type {i}, it must be 0 or 1",
433
0
                    ));
434
                };
435
            }
436
0
        } else if !std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() {
437
0
            assert_eq!(std_wall_bytes.len(), ut_local_bytes.len());
438
0
            let it = std_wall_bytes.iter().zip(ut_local_bytes);
439
0
            for (i, (&stdwall, &utlocal)) in it.enumerate() {
440
                // Indexing is OK because Header guarantees that the number of
441
                // indicators is 0 or equal to the number of types.
442
0
                self.types[i].indicator = match (stdwall, utlocal) {
443
0
                    (0, 0) => TzifIndicator::LocalWall,
444
0
                    (1, 0) => TzifIndicator::LocalStandard,
445
0
                    (1, 1) => TzifIndicator::UTStandard,
446
                    (0, 1) => {
447
0
                        return Err(err!(
448
0
                            "found illegal ut-wall combination for \
449
0
                             local time type {i}, only local-wall, \
450
0
                             local-standard and ut-standard are allowed",
451
0
                        ))
452
                    }
453
                    _ => {
454
0
                        return Err(err!(
455
0
                            "found illegal std/wall or ut/local value for \
456
0
                             local time type {i}, each must be 0 or 1",
457
0
                        ))
458
                    }
459
                };
460
            }
461
        } else {
462
            // If they're both empty then we don't need to do anything. Every
463
            // local time type record already has the correct default for this
464
            // case set.
465
0
            debug_assert!(std_wall_bytes.is_empty());
466
0
            debug_assert!(ut_local_bytes.is_empty());
467
        }
468
0
        Ok(rest)
469
0
    }
470
471
0
    fn parse_footer<'b>(
472
0
        &mut self,
473
0
        _header: &Header,
474
0
        bytes: &'b [u8],
475
0
    ) -> Result<&'b [u8], Error> {
476
0
        if bytes.is_empty() {
477
0
            return Err(err!(
478
0
                "invalid V2+ TZif footer, expected \\n, \
479
0
                 but found unexpected end of data",
480
0
            ));
481
0
        }
482
0
        if bytes[0] != b'\n' {
483
0
            return Err(err!(
484
0
                "invalid V2+ TZif footer, expected {:?}, but found {:?}",
485
0
                Byte(b'\n'),
486
0
                Byte(bytes[0]),
487
0
            ));
488
0
        }
489
0
        let bytes = &bytes[1..];
490
        // Only scan up to 1KB for a NUL terminator in case we somehow got
491
        // passed a huge block of bytes.
492
0
        let toscan = &bytes[..bytes.len().min(1024)];
493
0
        let Some(nlat) = toscan.iter().position(|&b| b == b'\n') else {
494
0
            return Err(err!(
495
0
                "invalid V2 TZif footer, could not find {:?} \
496
0
                 terminator in: {:?}",
497
0
                Byte(b'\n'),
498
0
                Bytes(toscan),
499
0
            ));
500
        };
501
0
        let (bytes, rest) = bytes.split_at(nlat);
502
0
        if !bytes.is_empty() {
503
            // We could in theory limit TZ strings to their strict POSIX
504
            // definition here for TZif V2, but I don't think there is any
505
            // harm in allowing the extensions in V2 formatted TZif data. Note
506
            // that the GNU tooling allow it via the `TZ` environment variable
507
            // even though POSIX doesn't specify it. This all seems okay to me
508
            // because the V3+ extension is a strict superset of functionality.
509
0
            let posix_tz =
510
0
                PosixTimeZone::parse(bytes).map_err(|e| err!("{e}"))?;
511
0
            self.fixed.posix_tz = Some(posix_tz);
512
0
        }
513
0
        Ok(&rest[1..])
514
0
    }
515
516
    /// Validates that the POSIX TZ string we parsed (if one exists) is
517
    /// consistent with the last transition in this time zone. This is
518
    /// required by RFC 8536.
519
    ///
520
    /// RFC 8536 says, "If the string is nonempty and one or more
521
    /// transitions appear in the version 2+ data, the string MUST be
522
    /// consistent with the last version 2+ transition."
523
0
    fn verify_posix_time_zone_consistency(&self) -> Result<(), Error> {
524
        // We need to be a little careful, since we always have at least one
525
        // transition (accounting for the dummy `Timestamp::MIN` transition).
526
        // So if we only have 1 transition and a POSIX TZ string, then we
527
        // should not validate it since it's equivalent to the case of 0
528
        // transitions and a POSIX TZ string.
529
0
        if self.transitions.timestamps.len() <= 1 {
530
0
            return Ok(());
531
0
        }
532
0
        let Some(ref tz) = self.fixed.posix_tz else {
533
0
            return Ok(());
534
        };
535
0
        let last = self
536
0
            .transitions
537
0
            .timestamps
538
0
            .last()
539
0
            .expect("last transition timestamp");
540
0
        let type_index = self
541
0
            .transitions
542
0
            .infos
543
0
            .last()
544
0
            .expect("last transition info")
545
0
            .type_index;
546
0
        let typ = &self.types[usize::from(type_index)];
547
0
        let (ioff, abbrev, is_dst) =
548
0
            tz.to_offset_info(ITimestamp::from_second(*last));
549
0
        if ioff.second != typ.offset {
550
0
            return Err(err!(
551
0
                "expected last transition to have DST offset \
552
0
                 of {expected_offset}, but got {got_offset} \
553
0
                 according to POSIX TZ string {tz}",
554
0
                expected_offset = typ.offset,
555
0
                got_offset = ioff.second,
556
0
                tz = tz,
557
0
            ));
558
0
        }
559
0
        if is_dst != typ.is_dst {
560
0
            return Err(err!(
561
0
                "expected last transition to have is_dst={expected_dst}, \
562
0
                 but got is_dst={got_dst} according to POSIX TZ \
563
0
                 string {tz}",
564
0
                expected_dst = typ.is_dst,
565
0
                got_dst = is_dst,
566
0
                tz = tz,
567
0
            ));
568
0
        }
569
0
        if abbrev != self.designation(&typ) {
570
0
            return Err(err!(
571
0
                "expected last transition to have \
572
0
                 designation={expected_abbrev}, \
573
0
                 but got designation={got_abbrev} according to POSIX TZ \
574
0
                 string {tz}",
575
0
                expected_abbrev = self.designation(&typ),
576
0
                got_abbrev = abbrev,
577
0
                tz = tz,
578
0
            ));
579
0
        }
580
0
        Ok(())
581
0
    }
582
583
    /// Add civil datetimes to our transitions.
584
    ///
585
    /// This isn't strictly necessary, but it speeds up time zone lookups when
586
    /// the input is a civil datetime. It lets us do comparisons directly on
587
    /// the civil datetime as given, instead of needing to convert the civil
588
    /// datetime given to a timestamp first. (Even if we didn't do this, I
589
    /// believe we'd still need at least one additional timestamp that is
590
    /// offset, because TZ lookups for a civil datetime are done in local time,
591
    /// and the timestamps in TZif data are, of course, all in UTC.)
592
0
    fn add_civil_datetimes_to_transitions(&mut self) {
593
0
        fn to_datetime(timestamp: i64, offset: i32) -> TzifDateTime {
594
            use crate::shared::util::itime::{IOffset, ITimestamp};
595
0
            let its = ITimestamp { second: timestamp, nanosecond: 0 };
596
0
            let ioff = IOffset { second: offset };
597
0
            let dt = its.to_datetime(ioff);
598
0
            TzifDateTime::new(
599
0
                dt.date.year,
600
0
                dt.date.month,
601
0
                dt.date.day,
602
0
                dt.time.hour,
603
0
                dt.time.minute,
604
0
                dt.time.second,
605
            )
606
0
        }
607
608
0
        let trans = &mut self.transitions;
609
0
        for i in 0..trans.timestamps.len() {
610
0
            let timestamp = trans.timestamps[i];
611
0
            let offset = {
612
0
                let type_index = trans.infos[i].type_index;
613
0
                self.types[usize::from(type_index)].offset
614
            };
615
0
            let prev_offset = {
616
0
                let type_index = trans.infos[i.saturating_sub(1)].type_index;
617
0
                self.types[usize::from(type_index)].offset
618
            };
619
620
0
            if prev_offset == offset {
621
0
                // Equivalent offsets means there can never be any ambiguity.
622
0
                let start = to_datetime(timestamp, prev_offset);
623
0
                trans.infos[i].kind = TzifTransitionKind::Unambiguous;
624
0
                trans.civil_starts[i] = start;
625
0
            } else if prev_offset < offset {
626
0
                // When the offset of the previous transition is less, that
627
0
                // means there is some non-zero amount of time that is
628
0
                // "skipped" when moving to the next transition. Thus, we have
629
0
                // a gap. The start of the gap is the offset which gets us the
630
0
                // earliest time, i.e., the smaller of the two offsets.
631
0
                trans.infos[i].kind = TzifTransitionKind::Gap;
632
0
                trans.civil_starts[i] = to_datetime(timestamp, prev_offset);
633
0
                trans.civil_ends[i] = to_datetime(timestamp, offset);
634
0
            } else {
635
                // When the offset of the previous transition is greater, that
636
                // means there is some non-zero amount of time that will be
637
                // replayed on a wall clock in this time zone. Thus, we have
638
                // a fold. The start of the gold is the offset which gets us
639
                // the earliest time, i.e., the smaller of the two offsets.
640
0
                assert!(prev_offset > offset);
641
0
                trans.infos[i].kind = TzifTransitionKind::Fold;
642
0
                trans.civil_starts[i] = to_datetime(timestamp, offset);
643
0
                trans.civil_ends[i] = to_datetime(timestamp, prev_offset);
644
            }
645
        }
646
0
    }
647
648
    /// Fatten up this TZif data with additional transitions.
649
    ///
650
    /// These additional transitions often make time zone lookups faster, and
651
    /// they smooth out the performance difference between using "slim" and
652
    /// "fat" tzdbs.
653
0
    fn fatten(&mut self) {
654
        // Note that this is a crate feature for *both* `jiff` and
655
        // `jiff-static`.
656
0
        if !cfg!(feature = "tz-fat") {
657
0
            return;
658
0
        }
659
0
        let Some(posix_tz) = self.fixed.posix_tz.clone() else { return };
660
0
        let last =
661
0
            self.transitions.timestamps.last().expect("last transition");
662
0
        let mut i = 0;
663
0
        let mut prev = ITimestamp::from_second(*last);
664
        loop {
665
0
            if i > FATTEN_MAX_TRANSITIONS {
666
                // only-jiff-start
667
                warn!(
668
                    "fattening TZif data for `{name:?}` somehow generated \
669
                     more than {max} transitions, so giving up to avoid \
670
                     doing too much work",
671
                    name = self.fixed.name,
672
                    max = FATTEN_MAX_TRANSITIONS,
673
                );
674
                // only-jiff-end
675
0
                return;
676
0
            }
677
0
            i += 1;
678
0
            prev = match self.add_transition(&posix_tz, prev) {
679
0
                None => break,
680
0
                Some(next) => next,
681
            };
682
        }
683
0
    }
684
685
    /// If there's a transition strictly after the given timestamp for the
686
    /// given POSIX time zone, then add it to this TZif data.
687
0
    fn add_transition(
688
0
        &mut self,
689
0
        posix_tz: &PosixTimeZone<Abbreviation>,
690
0
        prev: ITimestamp,
691
0
    ) -> Option<ITimestamp> {
692
0
        let (its, ioff, abbrev, is_dst) = posix_tz.next_transition(prev)?;
693
0
        if its.to_datetime(IOffset::UTC).date.year >= FATTEN_UP_TO_YEAR {
694
0
            return None;
695
0
        }
696
0
        let type_index =
697
0
            self.find_or_create_local_time_type(ioff, abbrev, is_dst)?;
698
0
        self.transitions.add_with_type_index(its.second, type_index);
699
0
        Some(its)
700
0
    }
701
702
    /// Look for a local time type matching the data given.
703
    ///
704
    /// If one could not be found, then one is created and its index is
705
    /// returned.
706
    ///
707
    /// If one could not be found and one could not be created (e.g., the index
708
    /// would overflow `u8`), then `None` is returned.
709
0
    fn find_or_create_local_time_type(
710
0
        &mut self,
711
0
        offset: IOffset,
712
0
        abbrev: &str,
713
0
        is_dst: bool,
714
0
    ) -> Option<u8> {
715
0
        for (i, typ) in self.types.iter().enumerate() {
716
0
            if offset.second == typ.offset
717
0
                && abbrev == self.designation(typ)
718
0
                && is_dst == typ.is_dst
719
            {
720
0
                return u8::try_from(i).ok();
721
0
            }
722
        }
723
0
        let i = u8::try_from(self.types.len()).ok()?;
724
0
        let designation = self.find_or_create_designation(abbrev)?;
725
0
        self.types.push(TzifLocalTimeType {
726
0
            offset: offset.second,
727
0
            is_dst,
728
0
            designation,
729
0
            // Not really clear if this is correct, but Jiff
730
0
            // ignores this anyway, so ¯\_(ツ)_/¯.
731
0
            indicator: TzifIndicator::LocalWall,
732
0
        });
733
0
        Some(i)
734
0
    }
735
736
    /// Look for a designation (i.e., time zone abbreviation) matching the data
737
    /// given, and return its range into `self.fixed.designations`.
738
    ///
739
    /// If one could not be found, then one is created and its range is
740
    /// returned.
741
    ///
742
    /// If one could not be found and one could not be created (e.g., the range
743
    /// would overflow `u8`), then `None` is returned.
744
0
    fn find_or_create_designation(
745
0
        &mut self,
746
0
        needle: &str,
747
0
    ) -> Option<(u8, u8)> {
748
0
        let mut start = 0;
749
0
        while let Some(offset) = self.fixed.designations[start..].find('\0') {
750
0
            let end = start + offset;
751
0
            let abbrev = &self.fixed.designations[start..end];
752
0
            if needle == abbrev {
753
0
                return Some((start.try_into().ok()?, end.try_into().ok()?));
754
0
            }
755
0
            start = end + 1;
756
        }
757
758
        // Now we need to add a new abbreviation. This
759
        // should generally only happen for malformed TZif
760
        // data. i.e., TZif data with a POSIX time zone that
761
        // contains an TZ abbreviation that isn't found in
762
        // the TZif's designation list.
763
        //
764
        // And since we're guarding against malformed data,
765
        // the designation list might not end with NUL. If
766
        // not, add one.
767
0
        if !self.fixed.designations.ends_with('\0') {
768
0
            self.fixed.designations.push('\0');
769
0
        }
770
0
        let start = self.fixed.designations.len();
771
0
        self.fixed.designations.push_str(needle);
772
0
        self.fixed.designations.push('\0');
773
0
        let end = self.fixed.designations.len();
774
0
        Some((start.try_into().ok()?, end.try_into().ok()?))
775
0
    }
776
777
0
    fn designation(&self, typ: &TzifLocalTimeType) -> &str {
778
0
        let range =
779
0
            usize::from(typ.designation.0)..usize::from(typ.designation.1);
780
        // OK because we verify that the designation range on every local
781
        // time type is a valid range into `self.designations`.
782
0
        &self.fixed.designations[range]
783
0
    }
784
}
785
786
impl TzifTransitionsOwned {
787
    /// Add a single transition with the given timestamp.
788
    ///
789
    /// This also fills in the other columns (civil starts, civil ends and
790
    /// infos) with sensible default values. It is expected that callers will
791
    /// later fill them in.
792
0
    fn add(&mut self, timestamp: i64) {
793
0
        self.add_with_type_index(timestamp, 0);
794
0
    }
795
796
    /// Like `TzifTransitionsOwned::add`, but let's the caller provide a type
797
    /// index if it is known.
798
0
    fn add_with_type_index(&mut self, timestamp: i64, type_index: u8) {
799
0
        self.timestamps.push(timestamp);
800
0
        self.civil_starts.push(TzifDateTime::ZERO);
801
0
        self.civil_ends.push(TzifDateTime::ZERO);
802
0
        self.infos.push(TzifTransitionInfo {
803
0
            type_index,
804
0
            kind: TzifTransitionKind::Unambiguous,
805
0
        });
806
0
    }
807
}
808
809
/// The header for a TZif formatted file.
810
///
811
/// V2+ TZif format have two headers: one for V1 data, and then a second
812
/// following the V1 data block that describes another data block which uses
813
/// 64-bit timestamps. The two headers both have the same format and both
814
/// use 32-bit big-endian encoded integers.
815
#[derive(Debug)]
816
struct Header {
817
    /// The size of the timestamps encoded in the data block.
818
    ///
819
    /// This is guaranteed to be either 4 (for V1) or 8 (for the 64-bit header
820
    /// block in V2+).
821
    time_size: usize,
822
    /// The file format version.
823
    ///
824
    /// Note that this is either a NUL byte (for version 1), or an ASCII byte
825
    /// corresponding to the version number. That is, `0x32` for `2`, `0x33`
826
    /// for `3` or `0x34` for `4`. Note also that just because zoneinfo might
827
    /// have been recently generated does not mean it uses the latest format
828
    /// version. It seems like newer versions are only compiled by `zic` when
829
    /// they are needed. For example, `America/New_York` on my system (as of
830
    /// `2024-03-25`) has version `0x32`, but `Asia/Jerusalem` has version
831
    /// `0x33`.
832
    version: u8,
833
    /// Number of UT/local indicators stored in the file.
834
    ///
835
    /// This is checked to be either equal to `0` or equal to `tzh_typecnt`.
836
    tzh_ttisutcnt: usize,
837
    /// The number of standard/wall indicators stored in the file.
838
    ///
839
    /// This is checked to be either equal to `0` or equal to `tzh_typecnt`.
840
    tzh_ttisstdcnt: usize,
841
    /// The number of leap seconds for which data entries are stored in the
842
    /// file.
843
    tzh_leapcnt: usize,
844
    /// The number of transition times for which data entries are stored in
845
    /// the file.
846
    tzh_timecnt: usize,
847
    /// The number of local time types for which data entries are stored in the
848
    /// file.
849
    ///
850
    /// This is checked to be at least `1`.
851
    tzh_typecnt: usize,
852
    /// The number of bytes of time zone abbreviation strings stored in the
853
    /// file.
854
    ///
855
    /// This is checked to be at least `1`.
856
    tzh_charcnt: usize,
857
}
858
859
impl Header {
860
    /// Parse the header record from the given bytes.
861
    ///
862
    /// Upon success, return the header and all bytes after the header.
863
    ///
864
    /// The given `time_size` must be 4 or 8, corresponding to either the
865
    /// V1 header block or the V2+ header block, respectively.
866
0
    fn parse(
867
0
        time_size: usize,
868
0
        bytes: &[u8],
869
0
    ) -> Result<(Header, &[u8]), Error> {
870
0
        assert!(time_size == 4 || time_size == 8, "time size must be 4 or 8");
871
0
        if bytes.len() < 44 {
872
0
            return Err(err!("invalid header: too short"));
873
0
        }
874
0
        let (magic, rest) = bytes.split_at(4);
875
0
        if magic != b"TZif" {
876
0
            return Err(err!("invalid header: magic bytes mismatch"));
877
0
        }
878
0
        let (version, rest) = rest.split_at(1);
879
0
        let (_reserved, rest) = rest.split_at(15);
880
881
0
        let (tzh_ttisutcnt_bytes, rest) = rest.split_at(4);
882
0
        let (tzh_ttisstdcnt_bytes, rest) = rest.split_at(4);
883
0
        let (tzh_leapcnt_bytes, rest) = rest.split_at(4);
884
0
        let (tzh_timecnt_bytes, rest) = rest.split_at(4);
885
0
        let (tzh_typecnt_bytes, rest) = rest.split_at(4);
886
0
        let (tzh_charcnt_bytes, rest) = rest.split_at(4);
887
888
0
        let tzh_ttisutcnt = from_be_bytes_u32_to_usize(tzh_ttisutcnt_bytes)
889
0
            .map_err(|e| err!("failed to parse tzh_ttisutcnt: {e}"))?;
890
0
        let tzh_ttisstdcnt = from_be_bytes_u32_to_usize(tzh_ttisstdcnt_bytes)
891
0
            .map_err(|e| err!("failed to parse tzh_ttisstdcnt: {e}"))?;
892
0
        let tzh_leapcnt = from_be_bytes_u32_to_usize(tzh_leapcnt_bytes)
893
0
            .map_err(|e| err!("failed to parse tzh_leapcnt: {e}"))?;
894
0
        let tzh_timecnt = from_be_bytes_u32_to_usize(tzh_timecnt_bytes)
895
0
            .map_err(|e| err!("failed to parse tzh_timecnt: {e}"))?;
896
0
        let tzh_typecnt = from_be_bytes_u32_to_usize(tzh_typecnt_bytes)
897
0
            .map_err(|e| err!("failed to parse tzh_typecnt: {e}"))?;
898
0
        let tzh_charcnt = from_be_bytes_u32_to_usize(tzh_charcnt_bytes)
899
0
            .map_err(|e| err!("failed to parse tzh_charcnt: {e}"))?;
900
901
0
        if tzh_ttisutcnt != 0 && tzh_ttisutcnt != tzh_typecnt {
902
0
            return Err(err!(
903
0
                "expected tzh_ttisutcnt={tzh_ttisutcnt} to be zero \
904
0
                 or equal to tzh_typecnt={tzh_typecnt}",
905
0
            ));
906
0
        }
907
0
        if tzh_ttisstdcnt != 0 && tzh_ttisstdcnt != tzh_typecnt {
908
0
            return Err(err!(
909
0
                "expected tzh_ttisstdcnt={tzh_ttisstdcnt} to be zero \
910
0
                 or equal to tzh_typecnt={tzh_typecnt}",
911
0
            ));
912
0
        }
913
0
        if tzh_typecnt < 1 {
914
0
            return Err(err!(
915
0
                "expected tzh_typecnt={tzh_typecnt} to be at least 1",
916
0
            ));
917
0
        }
918
0
        if tzh_charcnt < 1 {
919
0
            return Err(err!(
920
0
                "expected tzh_charcnt={tzh_charcnt} to be at least 1",
921
0
            ));
922
0
        }
923
924
0
        let header = Header {
925
0
            time_size,
926
0
            version: version[0],
927
0
            tzh_ttisutcnt,
928
0
            tzh_ttisstdcnt,
929
0
            tzh_leapcnt,
930
0
            tzh_timecnt,
931
0
            tzh_typecnt,
932
0
            tzh_charcnt,
933
0
        };
934
0
        Ok((header, rest))
935
0
    }
936
937
    /// Returns true if this header is for a 32-bit data block.
938
    ///
939
    /// When false, it is guaranteed that this header is for a 64-bit data
940
    /// block.
941
0
    fn is_32bit(&self) -> bool {
942
0
        self.time_size == 4
943
0
    }
944
945
    /// Returns the size of the data block, in bytes, for this header.
946
    ///
947
    /// This returns an error if the arithmetic required to compute the
948
    /// length would overflow.
949
    ///
950
    /// This is useful for, e.g., skipping over the 32-bit V1 data block in
951
    /// V2+ TZif formatted files.
952
0
    fn data_block_len(&self) -> Result<usize, Error> {
953
0
        let a = self.transition_times_len()?;
954
0
        let b = self.transition_types_len()?;
955
0
        let c = self.local_time_types_len()?;
956
0
        let d = self.time_zone_designations_len()?;
957
0
        let e = self.leap_second_len()?;
958
0
        let f = self.standard_wall_len()?;
959
0
        let g = self.ut_local_len()?;
960
0
        a.checked_add(b)
961
0
            .and_then(|z| z.checked_add(c))
962
0
            .and_then(|z| z.checked_add(d))
963
0
            .and_then(|z| z.checked_add(e))
964
0
            .and_then(|z| z.checked_add(f))
965
0
            .and_then(|z| z.checked_add(g))
966
0
            .ok_or_else(|| {
967
0
                err!(
968
0
                    "length of data block in V{} tzfile is too big",
969
                    self.version
970
                )
971
0
            })
972
0
    }
973
974
0
    fn transition_times_len(&self) -> Result<usize, Error> {
975
0
        self.tzh_timecnt.checked_mul(self.time_size).ok_or_else(|| {
976
0
            err!("tzh_timecnt value {} is too big", self.tzh_timecnt)
977
0
        })
978
0
    }
979
980
0
    fn transition_types_len(&self) -> Result<usize, Error> {
981
0
        Ok(self.tzh_timecnt)
982
0
    }
983
984
0
    fn local_time_types_len(&self) -> Result<usize, Error> {
985
0
        self.tzh_typecnt.checked_mul(6).ok_or_else(|| {
986
0
            err!("tzh_typecnt value {} is too big", self.tzh_typecnt)
987
0
        })
988
0
    }
989
990
0
    fn time_zone_designations_len(&self) -> Result<usize, Error> {
991
0
        Ok(self.tzh_charcnt)
992
0
    }
993
994
0
    fn leap_second_len(&self) -> Result<usize, Error> {
995
0
        let record_len = self
996
0
            .time_size
997
0
            .checked_add(4)
998
0
            .expect("4-or-8 plus 4 always fits in usize");
999
0
        self.tzh_leapcnt.checked_mul(record_len).ok_or_else(|| {
1000
0
            err!("tzh_leapcnt value {} is too big", self.tzh_leapcnt)
1001
0
        })
1002
0
    }
1003
1004
0
    fn standard_wall_len(&self) -> Result<usize, Error> {
1005
0
        Ok(self.tzh_ttisstdcnt)
1006
0
    }
1007
1008
0
    fn ut_local_len(&self) -> Result<usize, Error> {
1009
0
        Ok(self.tzh_ttisutcnt)
1010
0
    }
1011
}
1012
1013
/// Splits the given slice of bytes at the index given.
1014
///
1015
/// If the index is out of range (greater than `bytes.len()`) then an error is
1016
/// returned. The error message will include the `what` string given, which is
1017
/// meant to describe the thing being split.
1018
0
fn try_split_at<'b>(
1019
0
    what: &'static str,
1020
0
    bytes: &'b [u8],
1021
0
    at: usize,
1022
0
) -> Result<(&'b [u8], &'b [u8]), Error> {
1023
0
    if at > bytes.len() {
1024
0
        Err(err!(
1025
0
            "expected at least {at} bytes for {what}, \
1026
0
             but found only {} bytes",
1027
0
            bytes.len(),
1028
0
        ))
1029
    } else {
1030
0
        Ok(bytes.split_at(at))
1031
    }
1032
0
}
1033
1034
/// Interprets the given slice as an unsigned 32-bit big endian integer,
1035
/// attempts to convert it to a `usize` and returns it.
1036
///
1037
/// # Panics
1038
///
1039
/// When `bytes.len() != 4`.
1040
///
1041
/// # Errors
1042
///
1043
/// This errors if the `u32` parsed from the given bytes cannot fit in a
1044
/// `usize`.
1045
0
fn from_be_bytes_u32_to_usize(bytes: &[u8]) -> Result<usize, Error> {
1046
0
    let n = from_be_bytes_u32(bytes);
1047
0
    usize::try_from(n).map_err(|_| {
1048
0
        err!(
1049
0
            "failed to parse integer {n} (too big, max allowed is {}",
1050
            usize::MAX
1051
        )
1052
0
    })
1053
0
}
1054
1055
/// Interprets the given slice as an unsigned 32-bit big endian integer and
1056
/// returns it.
1057
///
1058
/// # Panics
1059
///
1060
/// When `bytes.len() != 4`.
1061
0
fn from_be_bytes_u32(bytes: &[u8]) -> u32 {
1062
0
    u32::from_be_bytes(bytes.try_into().unwrap())
1063
0
}
1064
1065
/// Interprets the given slice as a signed 32-bit big endian integer and
1066
/// returns it.
1067
///
1068
/// # Panics
1069
///
1070
/// When `bytes.len() != 4`.
1071
0
fn from_be_bytes_i32(bytes: &[u8]) -> i32 {
1072
0
    i32::from_be_bytes(bytes.try_into().unwrap())
1073
0
}
1074
1075
/// Interprets the given slice as a signed 64-bit big endian integer and
1076
/// returns it.
1077
///
1078
/// # Panics
1079
///
1080
/// When `bytes.len() != 8`.
1081
0
fn from_be_bytes_i64(bytes: &[u8]) -> i64 {
1082
0
    i64::from_be_bytes(bytes.try_into().unwrap())
1083
0
}