/rust/registry/src/index.crates.io-1949cf8c6b5b557f/jiff-0.2.16/src/util/parse.rs

Source
use crate::{
    error::{err, Error},
    util::escape::{Byte, Bytes},
};

/// Parses an `i64` number from the beginning to the end of the given slice of
/// ASCII digit characters.
///
/// If any byte in the given slice is not `[0-9]`, then this returns an error.
/// Similarly, if the number parsed does not fit into a `i64`, then this
/// returns an error. Notably, this routine does not permit parsing a negative
/// integer. (We use `i64` because everything in this crate uses signed
/// integers, and because a higher level routine might want to parse the sign
/// and then apply it to the result of this routine.)
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn i64(bytes: &[u8]) -> Result<i64, Error> {
    if bytes.is_empty() {
        return Err(err!("invalid number, no digits found"));
    }
    let mut n: i64 = 0;
    for &byte in bytes {
        let digit = match byte.checked_sub(b'0') {
            None => {
                return Err(err!(
                    "invalid digit, expected 0-9 but got {}",
                    Byte(byte),
                ));
            }
            Some(digit) if digit > 9 => {
                return Err(err!(
                    "invalid digit, expected 0-9 but got {}",
                    Byte(byte),
                ))
            }
            Some(digit) => {
                debug_assert!((0..=9).contains(&digit));
                i64::from(digit)
            }
        };
        n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else(
            || {
                err!(
                    "number '{}' too big to parse into 64-bit integer",
                    Bytes(bytes),
                )
            },
        )?;
    }
    Ok(n)
}

/// Parsed an optional `u64` that is a prefix of `bytes`.
///
/// If no digits (`[0-9]`) were found at the beginning of `bytes`, then `None`
/// is returned.
///
/// Note that this is safe to call on untrusted input. It will not attempt
/// to consume more input than could possibly fit into a parsed integer.
///
/// Since this returns a `u64`, it is possible that an integer that cannot
/// fit into an `i64` is returned. Callers should handle this. (Indeed,
/// `DurationUnits` handles this case.)
///
/// # Errors
///
/// When the parsed integer cannot fit into a `u64`.
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn u64_prefix(bytes: &[u8]) -> Result<(Option<u64>, &[u8]), Error> {
    // Discovered via `u64::MAX.to_string().len()`.
    const MAX_U64_DIGITS: usize = 20;

    let mut digit_count = 0;
    let mut n: u64 = 0;
    while digit_count <= MAX_U64_DIGITS {
        let Some(&byte) = bytes.get(digit_count) else { break };
        if !byte.is_ascii_digit() {
            break;
        }
        digit_count += 1;
        // OK because we confirmed `byte` is an ASCII digit.
        let digit = u64::from(byte - b'0');
        n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else(
            #[inline(never)]
            || {
                err!(
                    "number `{}` too big to parse into 64-bit integer",
                    Bytes(&bytes[..digit_count]),
                )
            },
        )?;
    }
    if digit_count == 0 {
        return Ok((None, bytes));
    }
    Ok((Some(n), &bytes[digit_count..]))
}

/// Parses a `u32` fractional number from the beginning to the end of the given
/// slice of ASCII digit characters.
///
/// The fraction's maximum precision is always 9 digits. The returned integer
/// will always be in units of `10^{max_precision}`. For example, this
/// will parse a fractional amount of seconds with a maximum precision of
/// nanoseconds.
///
/// If any byte in the given slice is not `[0-9]`, then this returns an error.
/// Notably, this routine does not permit parsing a negative integer.
pub(crate) fn fraction(bytes: &[u8]) -> Result<u32, Error> {
    const MAX_PRECISION: usize = 9;

    if bytes.is_empty() {
        return Err(err!("invalid fraction, no digits found"));
    } else if bytes.len() > MAX_PRECISION {
        return Err(err!(
            "invalid fraction, too many digits \
             (at most {MAX_PRECISION} are allowed"
        ));
    }
    let mut n: u32 = 0;
    for &byte in bytes {
        let digit = match byte.checked_sub(b'0') {
            None => {
                return Err(err!(
                    "invalid fractional digit, expected 0-9 but got {}",
                    Byte(byte),
                ));
            }
            Some(digit) if digit > 9 => {
                return Err(err!(
                    "invalid fractional digit, expected 0-9 but got {}",
                    Byte(byte),
                ))
            }
            Some(digit) => {
                debug_assert!((0..=9).contains(&digit));
                u32::from(digit)
            }
        };
        n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else(
            || {
                err!(
                    "fractional '{}' too big to parse into 64-bit integer",
                    Bytes(bytes),
                )
            },
        )?;
    }
    for _ in bytes.len()..MAX_PRECISION {
        n = n.checked_mul(10).ok_or_else(|| {
            err!(
                "fractional '{}' too big to parse into 64-bit integer \
                 (too much precision supported)",
                Bytes(bytes)
            )
        })?;
    }
    Ok(n)
}

/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
///
/// This is effectively `OsStr::to_str`, but with a slightly better error
/// message.
#[cfg(feature = "tzdb-zoneinfo")]
pub(crate) fn os_str_utf8<'o, O>(os_str: &'o O) -> Result<&'o str, Error>
where
    O: ?Sized + AsRef<std::ffi::OsStr>,
{
    let os_str = os_str.as_ref();
    os_str
        .to_str()
        .ok_or_else(|| err!("environment value {os_str:?} is not valid UTF-8"))
}

/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
///
/// The main difference between this and `OsStr::to_str` is that this will
/// be a zero-cost conversion on Unix platforms to `&[u8]`. On Windows, this
/// will do UTF-8 validation and return an error if it's invalid UTF-8.
#[cfg(feature = "tz-system")]
pub(crate) fn os_str_bytes<'o, O>(os_str: &'o O) -> Result<&'o [u8], Error>
where
    O: ?Sized + AsRef<std::ffi::OsStr>,
{
    let os_str = os_str.as_ref();
    #[cfg(unix)]
    {
        use std::os::unix::ffi::OsStrExt;
        Ok(os_str.as_bytes())
    }
    #[cfg(not(unix))]
    {
        let string = os_str.to_str().ok_or_else(|| {
            err!("environment value {os_str:?} is not valid UTF-8")
        })?;
        // It is suspect that we're doing UTF-8 validation and then throwing
        // away the fact that we did UTF-8 validation. So this could lead
        // to an extra UTF-8 check if the caller ultimately needs UTF-8. If
        // that's important, we can add a new API that returns a `&str`. But it
        // probably won't matter because an `OsStr` in this crate is usually
        // just an environment variable.
        Ok(string.as_bytes())
    }
}

/// Splits the given input into two slices at the given position.
///
/// If the position is greater than the length of the slice given, then this
/// returns `None`.
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn split(input: &[u8], at: usize) -> Option<(&[u8], &[u8])> {
    if at > input.len() {
        None
    } else {
        Some(input.split_at(at))
    }
}

/// Returns a function that converts two slices to an offset.
///
/// It takes the starting point as input and returns a function that, when
/// given an ending point (greater than or equal to the starting point), then
/// the corresponding pointers are subtracted and an offset relative to the
/// starting point is returned.
///
/// This is useful as a helper function in parsing routines that use slices
/// but want to report offsets.
///
/// # Panics
///
/// This may panic if the ending point is not a suffix slice of `start`.
pub(crate) fn offseter<'a>(
    start: &'a [u8],
) -> impl Fn(&'a [u8]) -> usize + 'a {
    move |end| (end.as_ptr() as usize) - (start.as_ptr() as usize)
}

/// Returns a function that converts two slices to the slice between them.
///
/// This takes a starting point as input and returns a function that, when
/// given an ending point (greater than or equal to the starting point), it
/// returns a slice beginning at the starting point and ending just at the
/// ending point.
///
/// This is useful as a helper function in parsing routines.
///
/// # Panics
///
/// This may panic if the ending point is not a suffix slice of `start`.
pub(crate) fn slicer<'a>(
    start: &'a [u8],
) -> impl Fn(&'a [u8]) -> &'a [u8] + 'a {
    let mkoffset = offseter(start);
    move |end| {
        let offset = mkoffset(end);
        &start[..offset]
    }
}

Coverage Report

Created: 2025-12-31 06:50

Line	Count	Source
1		use crate::{
2		error::{err, Error},
3		util::escape::{Byte, Bytes},
4		};
5
6		/// Parses an `i64` number from the beginning to the end of the given slice of
7		/// ASCII digit characters.
8		///
9		/// If any byte in the given slice is not `[0-9]`, then this returns an error.
10		/// Similarly, if the number parsed does not fit into a `i64`, then this
11		/// returns an error. Notably, this routine does not permit parsing a negative
12		/// integer. (We use `i64` because everything in this crate uses signed
13		/// integers, and because a higher level routine might want to parse the sign
14		/// and then apply it to the result of this routine.)
15		#[cfg_attr(feature = "perf-inline", inline(always))]
16	0	pub(crate) fn i64(bytes: &[u8]) -> Result<i64, Error> {
17	0	if bytes.is_empty() {
18	0	return Err(err!("invalid number, no digits found"));
19	0	}
20	0	let mut n: i64 = 0;
21	0	for &byte in bytes {
22	0	let digit = match byte.checked_sub(b'0') {
23		None => {
24	0	return Err(err!(
25	0	"invalid digit, expected 0-9 but got {}",
26	0	Byte(byte),
27	0	));
28		}
29	0	Some(digit) if digit > 9 => {
30	0	return Err(err!(
31	0	"invalid digit, expected 0-9 but got {}",
32	0	Byte(byte),
33	0	))
34		}
35	0	Some(digit) => {
36	0	debug_assert!((0..=9).contains(&digit));
37	0	i64::from(digit)
38		}
39		};
40	0	n = n.checked_mul(10).and_then(\|n\| n.checked_add(digit)).ok_or_else(
41	0	\|\| {
42	0	err!(
43	0	"number '{}' too big to parse into 64-bit integer",
44	0	Bytes(bytes),
45		)
46	0	},
47	0	)?;
48		}
49	0	Ok(n)
50	0	}
51
52		/// Parsed an optional `u64` that is a prefix of `bytes`.
53		///
54		/// If no digits (`[0-9]`) were found at the beginning of `bytes`, then `None`
55		/// is returned.
56		///
57		/// Note that this is safe to call on untrusted input. It will not attempt
58		/// to consume more input than could possibly fit into a parsed integer.
59		///
60		/// Since this returns a `u64`, it is possible that an integer that cannot
61		/// fit into an `i64` is returned. Callers should handle this. (Indeed,
62		/// `DurationUnits` handles this case.)
63		///
64		/// # Errors
65		///
66		/// When the parsed integer cannot fit into a `u64`.
67		#[cfg_attr(feature = "perf-inline", inline(always))]
68	0	pub(crate) fn u64_prefix(bytes: &[u8]) -> Result<(Option<u64>, &[u8]), Error> {
69		// Discovered via `u64::MAX.to_string().len()`.
70		const MAX_U64_DIGITS: usize = 20;
71
72	0	let mut digit_count = 0;
73	0	let mut n: u64 = 0;
74	0	while digit_count <= MAX_U64_DIGITS {
75	0	let Some(&byte) = bytes.get(digit_count) else { break };
76	0	if !byte.is_ascii_digit() {
77	0	break;
78	0	}
79	0	digit_count += 1;
80		// OK because we confirmed `byte` is an ASCII digit.
81	0	let digit = u64::from(byte - b'0');
82	0	n = n.checked_mul(10).and_then(\|n\| n.checked_add(digit)).ok_or_else(
83		#[inline(never)]
84	0	\|\| {
85	0	err!(
86	0	"number `{}` too big to parse into 64-bit integer",
87	0	Bytes(&bytes[..digit_count]),
88		)
89	0	},
90	0	)?;
91		}
92	0	if digit_count == 0 {
93	0	return Ok((None, bytes));
94	0	}
95	0	Ok((Some(n), &bytes[digit_count..]))
96	0	}
97
98		/// Parses a `u32` fractional number from the beginning to the end of the given
99		/// slice of ASCII digit characters.
100		///
101		/// The fraction's maximum precision is always 9 digits. The returned integer
102		/// will always be in units of `10^{max_precision}`. For example, this
103		/// will parse a fractional amount of seconds with a maximum precision of
104		/// nanoseconds.
105		///
106		/// If any byte in the given slice is not `[0-9]`, then this returns an error.
107		/// Notably, this routine does not permit parsing a negative integer.
108	0	pub(crate) fn fraction(bytes: &[u8]) -> Result<u32, Error> {
109		const MAX_PRECISION: usize = 9;
110
111	0	if bytes.is_empty() {
112	0	return Err(err!("invalid fraction, no digits found"));
113	0	} else if bytes.len() > MAX_PRECISION {
114	0	return Err(err!(
115	0	"invalid fraction, too many digits \
116	0	(at most {MAX_PRECISION} are allowed"
117	0	));
118	0	}
119	0	let mut n: u32 = 0;
120	0	for &byte in bytes {
121	0	let digit = match byte.checked_sub(b'0') {
122		None => {
123	0	return Err(err!(
124	0	"invalid fractional digit, expected 0-9 but got {}",
125	0	Byte(byte),
126	0	));
127		}
128	0	Some(digit) if digit > 9 => {
129	0	return Err(err!(
130	0	"invalid fractional digit, expected 0-9 but got {}",
131	0	Byte(byte),
132	0	))
133		}
134	0	Some(digit) => {
135	0	debug_assert!((0..=9).contains(&digit));
136	0	u32::from(digit)
137		}
138		};
139	0	n = n.checked_mul(10).and_then(\|n\| n.checked_add(digit)).ok_or_else(
140	0	\|\| {
141	0	err!(
142	0	"fractional '{}' too big to parse into 64-bit integer",
143	0	Bytes(bytes),
144		)
145	0	},
146	0	)?;
147		}
148	0	for _ in bytes.len()..MAX_PRECISION {
149	0	n = n.checked_mul(10).ok_or_else(\|\| {
150	0	err!(
151	0	"fractional '{}' too big to parse into 64-bit integer \
152	0	(too much precision supported)",
153	0	Bytes(bytes)
154		)
155	0	})?;
156		}
157	0	Ok(n)
158	0	}
159
160		/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
161		///
162		/// This is effectively `OsStr::to_str`, but with a slightly better error
163		/// message.
164		#[cfg(feature = "tzdb-zoneinfo")]
165		pub(crate) fn os_str_utf8<'o, O>(os_str: &'o O) -> Result<&'o str, Error>
166		where
167		O: ?Sized + AsRef<std::ffi::OsStr>,
168		{
169		let os_str = os_str.as_ref();
170		os_str
171		.to_str()
172		.ok_or_else(\|\| err!("environment value {os_str:?} is not valid UTF-8"))
173		}
174
175		/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
176		///
177		/// The main difference between this and `OsStr::to_str` is that this will
178		/// be a zero-cost conversion on Unix platforms to `&[u8]`. On Windows, this
179		/// will do UTF-8 validation and return an error if it's invalid UTF-8.
180		#[cfg(feature = "tz-system")]
181		pub(crate) fn os_str_bytes<'o, O>(os_str: &'o O) -> Result<&'o [u8], Error>
182		where
183		O: ?Sized + AsRef<std::ffi::OsStr>,
184		{
185		let os_str = os_str.as_ref();
186		#[cfg(unix)]
187		{
188		use std::os::unix::ffi::OsStrExt;
189		Ok(os_str.as_bytes())
190		}
191		#[cfg(not(unix))]
192		{
193		let string = os_str.to_str().ok_or_else(\|\| {
194		err!("environment value {os_str:?} is not valid UTF-8")
195		})?;
196		// It is suspect that we're doing UTF-8 validation and then throwing
197		// away the fact that we did UTF-8 validation. So this could lead
198		// to an extra UTF-8 check if the caller ultimately needs UTF-8. If
199		// that's important, we can add a new API that returns a `&str`. But it
200		// probably won't matter because an `OsStr` in this crate is usually
201		// just an environment variable.
202		Ok(string.as_bytes())
203		}
204		}
205
206		/// Splits the given input into two slices at the given position.
207		///
208		/// If the position is greater than the length of the slice given, then this
209		/// returns `None`.
210		#[cfg_attr(feature = "perf-inline", inline(always))]
211	0	pub(crate) fn split(input: &[u8], at: usize) -> Option<(&[u8], &[u8])> {
212	0	if at > input.len() {
213	0	None
214		} else {
215	0	Some(input.split_at(at))
216		}
217	0	}
218
219		/// Returns a function that converts two slices to an offset.
220		///
221		/// It takes the starting point as input and returns a function that, when
222		/// given an ending point (greater than or equal to the starting point), then
223		/// the corresponding pointers are subtracted and an offset relative to the
224		/// starting point is returned.
225		///
226		/// This is useful as a helper function in parsing routines that use slices
227		/// but want to report offsets.
228		///
229		/// # Panics
230		///
231		/// This may panic if the ending point is not a suffix slice of `start`.
232	0	pub(crate) fn offseter<'a>(
233	0	start: &'a [u8],
234	0	) -> impl Fn(&'a [u8]) -> usize + 'a {
235	0	move \|end\| (end.as_ptr() as usize) - (start.as_ptr() as usize)
236	0	}
237
238		/// Returns a function that converts two slices to the slice between them.
239		///
240		/// This takes a starting point as input and returns a function that, when
241		/// given an ending point (greater than or equal to the starting point), it
242		/// returns a slice beginning at the starting point and ending just at the
243		/// ending point.
244		///
245		/// This is useful as a helper function in parsing routines.
246		///
247		/// # Panics
248		///
249		/// This may panic if the ending point is not a suffix slice of `start`.
250	0	pub(crate) fn slicer<'a>(
251	0	start: &'a [u8],
252	0	) -> impl Fn(&'a [u8]) -> &'a [u8] + 'a {
253	0	let mkoffset = offseter(start);
254	0	move \|end\| {
255	0	let offset = mkoffset(end);
256	0	&start[..offset]
257	0	}
258	0	}