/rust/registry/src/index.crates.io-1949cf8c6b5b557f/jiff-0.2.16/src/util/parse.rs
Line | Count | Source |
1 | | use crate::{ |
2 | | error::{err, Error}, |
3 | | util::escape::{Byte, Bytes}, |
4 | | }; |
5 | | |
6 | | /// Parses an `i64` number from the beginning to the end of the given slice of |
7 | | /// ASCII digit characters. |
8 | | /// |
9 | | /// If any byte in the given slice is not `[0-9]`, then this returns an error. |
10 | | /// Similarly, if the number parsed does not fit into a `i64`, then this |
11 | | /// returns an error. Notably, this routine does not permit parsing a negative |
12 | | /// integer. (We use `i64` because everything in this crate uses signed |
13 | | /// integers, and because a higher level routine might want to parse the sign |
14 | | /// and then apply it to the result of this routine.) |
15 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
16 | 0 | pub(crate) fn i64(bytes: &[u8]) -> Result<i64, Error> { |
17 | 0 | if bytes.is_empty() { |
18 | 0 | return Err(err!("invalid number, no digits found")); |
19 | 0 | } |
20 | 0 | let mut n: i64 = 0; |
21 | 0 | for &byte in bytes { |
22 | 0 | let digit = match byte.checked_sub(b'0') { |
23 | | None => { |
24 | 0 | return Err(err!( |
25 | 0 | "invalid digit, expected 0-9 but got {}", |
26 | 0 | Byte(byte), |
27 | 0 | )); |
28 | | } |
29 | 0 | Some(digit) if digit > 9 => { |
30 | 0 | return Err(err!( |
31 | 0 | "invalid digit, expected 0-9 but got {}", |
32 | 0 | Byte(byte), |
33 | 0 | )) |
34 | | } |
35 | 0 | Some(digit) => { |
36 | 0 | debug_assert!((0..=9).contains(&digit)); |
37 | 0 | i64::from(digit) |
38 | | } |
39 | | }; |
40 | 0 | n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else( |
41 | 0 | || { |
42 | 0 | err!( |
43 | 0 | "number '{}' too big to parse into 64-bit integer", |
44 | 0 | Bytes(bytes), |
45 | | ) |
46 | 0 | }, |
47 | 0 | )?; |
48 | | } |
49 | 0 | Ok(n) |
50 | 0 | } |
51 | | |
52 | | /// Parsed an optional `u64` that is a prefix of `bytes`. |
53 | | /// |
54 | | /// If no digits (`[0-9]`) were found at the beginning of `bytes`, then `None` |
55 | | /// is returned. |
56 | | /// |
57 | | /// Note that this is safe to call on untrusted input. It will not attempt |
58 | | /// to consume more input than could possibly fit into a parsed integer. |
59 | | /// |
60 | | /// Since this returns a `u64`, it is possible that an integer that cannot |
61 | | /// fit into an `i64` is returned. Callers should handle this. (Indeed, |
62 | | /// `DurationUnits` handles this case.) |
63 | | /// |
64 | | /// # Errors |
65 | | /// |
66 | | /// When the parsed integer cannot fit into a `u64`. |
67 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
68 | 0 | pub(crate) fn u64_prefix(bytes: &[u8]) -> Result<(Option<u64>, &[u8]), Error> { |
69 | | // Discovered via `u64::MAX.to_string().len()`. |
70 | | const MAX_U64_DIGITS: usize = 20; |
71 | | |
72 | 0 | let mut digit_count = 0; |
73 | 0 | let mut n: u64 = 0; |
74 | 0 | while digit_count <= MAX_U64_DIGITS { |
75 | 0 | let Some(&byte) = bytes.get(digit_count) else { break }; |
76 | 0 | if !byte.is_ascii_digit() { |
77 | 0 | break; |
78 | 0 | } |
79 | 0 | digit_count += 1; |
80 | | // OK because we confirmed `byte` is an ASCII digit. |
81 | 0 | let digit = u64::from(byte - b'0'); |
82 | 0 | n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else( |
83 | | #[inline(never)] |
84 | 0 | || { |
85 | 0 | err!( |
86 | 0 | "number `{}` too big to parse into 64-bit integer", |
87 | 0 | Bytes(&bytes[..digit_count]), |
88 | | ) |
89 | 0 | }, |
90 | 0 | )?; |
91 | | } |
92 | 0 | if digit_count == 0 { |
93 | 0 | return Ok((None, bytes)); |
94 | 0 | } |
95 | 0 | Ok((Some(n), &bytes[digit_count..])) |
96 | 0 | } |
97 | | |
98 | | /// Parses a `u32` fractional number from the beginning to the end of the given |
99 | | /// slice of ASCII digit characters. |
100 | | /// |
101 | | /// The fraction's maximum precision is always 9 digits. The returned integer |
102 | | /// will always be in units of `10^{max_precision}`. For example, this |
103 | | /// will parse a fractional amount of seconds with a maximum precision of |
104 | | /// nanoseconds. |
105 | | /// |
106 | | /// If any byte in the given slice is not `[0-9]`, then this returns an error. |
107 | | /// Notably, this routine does not permit parsing a negative integer. |
108 | 0 | pub(crate) fn fraction(bytes: &[u8]) -> Result<u32, Error> { |
109 | | const MAX_PRECISION: usize = 9; |
110 | | |
111 | 0 | if bytes.is_empty() { |
112 | 0 | return Err(err!("invalid fraction, no digits found")); |
113 | 0 | } else if bytes.len() > MAX_PRECISION { |
114 | 0 | return Err(err!( |
115 | 0 | "invalid fraction, too many digits \ |
116 | 0 | (at most {MAX_PRECISION} are allowed" |
117 | 0 | )); |
118 | 0 | } |
119 | 0 | let mut n: u32 = 0; |
120 | 0 | for &byte in bytes { |
121 | 0 | let digit = match byte.checked_sub(b'0') { |
122 | | None => { |
123 | 0 | return Err(err!( |
124 | 0 | "invalid fractional digit, expected 0-9 but got {}", |
125 | 0 | Byte(byte), |
126 | 0 | )); |
127 | | } |
128 | 0 | Some(digit) if digit > 9 => { |
129 | 0 | return Err(err!( |
130 | 0 | "invalid fractional digit, expected 0-9 but got {}", |
131 | 0 | Byte(byte), |
132 | 0 | )) |
133 | | } |
134 | 0 | Some(digit) => { |
135 | 0 | debug_assert!((0..=9).contains(&digit)); |
136 | 0 | u32::from(digit) |
137 | | } |
138 | | }; |
139 | 0 | n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else( |
140 | 0 | || { |
141 | 0 | err!( |
142 | 0 | "fractional '{}' too big to parse into 64-bit integer", |
143 | 0 | Bytes(bytes), |
144 | | ) |
145 | 0 | }, |
146 | 0 | )?; |
147 | | } |
148 | 0 | for _ in bytes.len()..MAX_PRECISION { |
149 | 0 | n = n.checked_mul(10).ok_or_else(|| { |
150 | 0 | err!( |
151 | 0 | "fractional '{}' too big to parse into 64-bit integer \ |
152 | 0 | (too much precision supported)", |
153 | 0 | Bytes(bytes) |
154 | | ) |
155 | 0 | })?; |
156 | | } |
157 | 0 | Ok(n) |
158 | 0 | } |
159 | | |
160 | | /// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available. |
161 | | /// |
162 | | /// This is effectively `OsStr::to_str`, but with a slightly better error |
163 | | /// message. |
164 | | #[cfg(feature = "tzdb-zoneinfo")] |
165 | | pub(crate) fn os_str_utf8<'o, O>(os_str: &'o O) -> Result<&'o str, Error> |
166 | | where |
167 | | O: ?Sized + AsRef<std::ffi::OsStr>, |
168 | | { |
169 | | let os_str = os_str.as_ref(); |
170 | | os_str |
171 | | .to_str() |
172 | | .ok_or_else(|| err!("environment value {os_str:?} is not valid UTF-8")) |
173 | | } |
174 | | |
175 | | /// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available. |
176 | | /// |
177 | | /// The main difference between this and `OsStr::to_str` is that this will |
178 | | /// be a zero-cost conversion on Unix platforms to `&[u8]`. On Windows, this |
179 | | /// will do UTF-8 validation and return an error if it's invalid UTF-8. |
180 | | #[cfg(feature = "tz-system")] |
181 | | pub(crate) fn os_str_bytes<'o, O>(os_str: &'o O) -> Result<&'o [u8], Error> |
182 | | where |
183 | | O: ?Sized + AsRef<std::ffi::OsStr>, |
184 | | { |
185 | | let os_str = os_str.as_ref(); |
186 | | #[cfg(unix)] |
187 | | { |
188 | | use std::os::unix::ffi::OsStrExt; |
189 | | Ok(os_str.as_bytes()) |
190 | | } |
191 | | #[cfg(not(unix))] |
192 | | { |
193 | | let string = os_str.to_str().ok_or_else(|| { |
194 | | err!("environment value {os_str:?} is not valid UTF-8") |
195 | | })?; |
196 | | // It is suspect that we're doing UTF-8 validation and then throwing |
197 | | // away the fact that we did UTF-8 validation. So this could lead |
198 | | // to an extra UTF-8 check if the caller ultimately needs UTF-8. If |
199 | | // that's important, we can add a new API that returns a `&str`. But it |
200 | | // probably won't matter because an `OsStr` in this crate is usually |
201 | | // just an environment variable. |
202 | | Ok(string.as_bytes()) |
203 | | } |
204 | | } |
205 | | |
206 | | /// Splits the given input into two slices at the given position. |
207 | | /// |
208 | | /// If the position is greater than the length of the slice given, then this |
209 | | /// returns `None`. |
210 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
211 | 0 | pub(crate) fn split(input: &[u8], at: usize) -> Option<(&[u8], &[u8])> { |
212 | 0 | if at > input.len() { |
213 | 0 | None |
214 | | } else { |
215 | 0 | Some(input.split_at(at)) |
216 | | } |
217 | 0 | } |
218 | | |
219 | | /// Returns a function that converts two slices to an offset. |
220 | | /// |
221 | | /// It takes the starting point as input and returns a function that, when |
222 | | /// given an ending point (greater than or equal to the starting point), then |
223 | | /// the corresponding pointers are subtracted and an offset relative to the |
224 | | /// starting point is returned. |
225 | | /// |
226 | | /// This is useful as a helper function in parsing routines that use slices |
227 | | /// but want to report offsets. |
228 | | /// |
229 | | /// # Panics |
230 | | /// |
231 | | /// This may panic if the ending point is not a suffix slice of `start`. |
232 | 0 | pub(crate) fn offseter<'a>( |
233 | 0 | start: &'a [u8], |
234 | 0 | ) -> impl Fn(&'a [u8]) -> usize + 'a { |
235 | 0 | move |end| (end.as_ptr() as usize) - (start.as_ptr() as usize) |
236 | 0 | } |
237 | | |
238 | | /// Returns a function that converts two slices to the slice between them. |
239 | | /// |
240 | | /// This takes a starting point as input and returns a function that, when |
241 | | /// given an ending point (greater than or equal to the starting point), it |
242 | | /// returns a slice beginning at the starting point and ending just at the |
243 | | /// ending point. |
244 | | /// |
245 | | /// This is useful as a helper function in parsing routines. |
246 | | /// |
247 | | /// # Panics |
248 | | /// |
249 | | /// This may panic if the ending point is not a suffix slice of `start`. |
250 | 0 | pub(crate) fn slicer<'a>( |
251 | 0 | start: &'a [u8], |
252 | 0 | ) -> impl Fn(&'a [u8]) -> &'a [u8] + 'a { |
253 | 0 | let mkoffset = offseter(start); |
254 | 0 | move |end| { |
255 | 0 | let offset = mkoffset(end); |
256 | 0 | &start[..offset] |
257 | 0 | } |
258 | 0 | } |