/rust/registry/src/index.crates.io-1949cf8c6b5b557f/jiff-0.2.15/src/util/parse.rs
Line | Count | Source |
1 | | use crate::{ |
2 | | error::{err, Error}, |
3 | | util::escape::{Byte, Bytes}, |
4 | | }; |
5 | | |
6 | | /// Parses an `i64` number from the beginning to the end of the given slice of |
7 | | /// ASCII digit characters. |
8 | | /// |
9 | | /// If any byte in the given slice is not `[0-9]`, then this returns an error. |
10 | | /// Similarly, if the number parsed does not fit into a `i64`, then this |
11 | | /// returns an error. Notably, this routine does not permit parsing a negative |
12 | | /// integer. (We use `i64` because everything in this crate uses signed |
13 | | /// integers, and because a higher level routine might want to parse the sign |
14 | | /// and then apply it to the result of this routine.) |
15 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
16 | 0 | pub(crate) fn i64(bytes: &[u8]) -> Result<i64, Error> { |
17 | 0 | if bytes.is_empty() { |
18 | 0 | return Err(err!("invalid number, no digits found")); |
19 | 0 | } |
20 | 0 | let mut n: i64 = 0; |
21 | 0 | for &byte in bytes { |
22 | 0 | let digit = match byte.checked_sub(b'0') { |
23 | | None => { |
24 | 0 | return Err(err!( |
25 | 0 | "invalid digit, expected 0-9 but got {}", |
26 | 0 | Byte(byte), |
27 | 0 | )); |
28 | | } |
29 | 0 | Some(digit) if digit > 9 => { |
30 | 0 | return Err(err!( |
31 | 0 | "invalid digit, expected 0-9 but got {}", |
32 | 0 | Byte(byte), |
33 | 0 | )) |
34 | | } |
35 | 0 | Some(digit) => { |
36 | 0 | debug_assert!((0..=9).contains(&digit)); |
37 | 0 | i64::from(digit) |
38 | | } |
39 | | }; |
40 | 0 | n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else( |
41 | 0 | || { |
42 | 0 | err!( |
43 | 0 | "number '{}' too big to parse into 64-bit integer", |
44 | 0 | Bytes(bytes), |
45 | | ) |
46 | 0 | }, |
47 | 0 | )?; |
48 | | } |
49 | 0 | Ok(n) |
50 | 0 | } |
51 | | |
52 | | /// Parses an `i64` fractional number from the beginning to the end of the |
53 | | /// given slice of ASCII digit characters. |
54 | | /// |
55 | | /// The fraction's maximum precision must be provided. The returned integer |
56 | | /// will always be in units of `10^{max_precision}`. For example, to parse a |
57 | | /// fractional amount of seconds with a maximum precision of nanoseconds, then |
58 | | /// use `max_precision=9`. |
59 | | /// |
60 | | /// If any byte in the given slice is not `[0-9]`, then this returns an error. |
61 | | /// Similarly, if the fraction parsed does not fit into a `i64`, then this |
62 | | /// returns an error. Notably, this routine does not permit parsing a negative |
63 | | /// integer. (We use `i64` because everything in this crate uses signed |
64 | | /// integers, and because a higher level routine might want to parse the sign |
65 | | /// and then apply it to the result of this routine.) |
66 | 0 | pub(crate) fn fraction( |
67 | 0 | bytes: &[u8], |
68 | 0 | max_precision: usize, |
69 | 0 | ) -> Result<i64, Error> { |
70 | 0 | if bytes.is_empty() { |
71 | 0 | return Err(err!("invalid fraction, no digits found")); |
72 | 0 | } else if bytes.len() > max_precision { |
73 | 0 | return Err(err!( |
74 | 0 | "invalid fraction, too many digits \ |
75 | 0 | (at most {max_precision} are allowed" |
76 | 0 | )); |
77 | 0 | } |
78 | 0 | let mut n: i64 = 0; |
79 | 0 | for &byte in bytes { |
80 | 0 | let digit = match byte.checked_sub(b'0') { |
81 | | None => { |
82 | 0 | return Err(err!( |
83 | 0 | "invalid fractional digit, expected 0-9 but got {}", |
84 | 0 | Byte(byte), |
85 | 0 | )); |
86 | | } |
87 | 0 | Some(digit) if digit > 9 => { |
88 | 0 | return Err(err!( |
89 | 0 | "invalid fractional digit, expected 0-9 but got {}", |
90 | 0 | Byte(byte), |
91 | 0 | )) |
92 | | } |
93 | 0 | Some(digit) => { |
94 | 0 | debug_assert!((0..=9).contains(&digit)); |
95 | 0 | i64::from(digit) |
96 | | } |
97 | | }; |
98 | 0 | n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else( |
99 | 0 | || { |
100 | 0 | err!( |
101 | 0 | "fractional '{}' too big to parse into 64-bit integer", |
102 | 0 | Bytes(bytes), |
103 | | ) |
104 | 0 | }, |
105 | 0 | )?; |
106 | | } |
107 | 0 | for _ in bytes.len()..max_precision { |
108 | 0 | n = n.checked_mul(10).ok_or_else(|| { |
109 | 0 | err!( |
110 | 0 | "fractional '{}' too big to parse into 64-bit integer \ |
111 | 0 | (too much precision supported)", |
112 | 0 | Bytes(bytes) |
113 | | ) |
114 | 0 | })?; |
115 | | } |
116 | 0 | Ok(n) |
117 | 0 | } |
118 | | |
119 | | /// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available. |
120 | | /// |
121 | | /// This is effectively `OsStr::to_str`, but with a slightly better error |
122 | | /// message. |
123 | | #[cfg(feature = "tzdb-zoneinfo")] |
124 | | pub(crate) fn os_str_utf8<'o, O>(os_str: &'o O) -> Result<&'o str, Error> |
125 | | where |
126 | | O: ?Sized + AsRef<std::ffi::OsStr>, |
127 | | { |
128 | | let os_str = os_str.as_ref(); |
129 | | os_str |
130 | | .to_str() |
131 | | .ok_or_else(|| err!("environment value {os_str:?} is not valid UTF-8")) |
132 | | } |
133 | | |
134 | | /// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available. |
135 | | /// |
136 | | /// The main difference between this and `OsStr::to_str` is that this will |
137 | | /// be a zero-cost conversion on Unix platforms to `&[u8]`. On Windows, this |
138 | | /// will do UTF-8 validation and return an error if it's invalid UTF-8. |
139 | | #[cfg(feature = "tz-system")] |
140 | | pub(crate) fn os_str_bytes<'o, O>(os_str: &'o O) -> Result<&'o [u8], Error> |
141 | | where |
142 | | O: ?Sized + AsRef<std::ffi::OsStr>, |
143 | | { |
144 | | let os_str = os_str.as_ref(); |
145 | | #[cfg(unix)] |
146 | | { |
147 | | use std::os::unix::ffi::OsStrExt; |
148 | | Ok(os_str.as_bytes()) |
149 | | } |
150 | | #[cfg(not(unix))] |
151 | | { |
152 | | let string = os_str.to_str().ok_or_else(|| { |
153 | | err!("environment value {os_str:?} is not valid UTF-8") |
154 | | })?; |
155 | | // It is suspect that we're doing UTF-8 validation and then throwing |
156 | | // away the fact that we did UTF-8 validation. So this could lead |
157 | | // to an extra UTF-8 check if the caller ultimately needs UTF-8. If |
158 | | // that's important, we can add a new API that returns a `&str`. But it |
159 | | // probably won't matter because an `OsStr` in this crate is usually |
160 | | // just an environment variable. |
161 | | Ok(string.as_bytes()) |
162 | | } |
163 | | } |
164 | | |
165 | | /// Splits the given input into two slices at the given position. |
166 | | /// |
167 | | /// If the position is greater than the length of the slice given, then this |
168 | | /// returns `None`. |
169 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
170 | 0 | pub(crate) fn split(input: &[u8], at: usize) -> Option<(&[u8], &[u8])> { |
171 | 0 | if at > input.len() { |
172 | 0 | None |
173 | | } else { |
174 | 0 | Some(input.split_at(at)) |
175 | | } |
176 | 0 | } |
177 | | |
178 | | /// Returns a function that converts two slices to an offset. |
179 | | /// |
180 | | /// It takes the starting point as input and returns a function that, when |
181 | | /// given an ending point (greater than or equal to the starting point), then |
182 | | /// the corresponding pointers are subtracted and an offset relative to the |
183 | | /// starting point is returned. |
184 | | /// |
185 | | /// This is useful as a helper function in parsing routines that use slices |
186 | | /// but want to report offsets. |
187 | | /// |
188 | | /// # Panics |
189 | | /// |
190 | | /// This may panic if the ending point is not a suffix slice of `start`. |
191 | 0 | pub(crate) fn offseter<'a>( |
192 | 0 | start: &'a [u8], |
193 | 0 | ) -> impl Fn(&'a [u8]) -> usize + 'a { |
194 | 0 | move |end| (end.as_ptr() as usize) - (start.as_ptr() as usize) |
195 | 0 | } |
196 | | |
197 | | /// Returns a function that converts two slices to the slice between them. |
198 | | /// |
199 | | /// This takes a starting point as input and returns a function that, when |
200 | | /// given an ending point (greater than or equal to the starting point), it |
201 | | /// returns a slice beginning at the starting point and ending just at the |
202 | | /// ending point. |
203 | | /// |
204 | | /// This is useful as a helper function in parsing routines. |
205 | | /// |
206 | | /// # Panics |
207 | | /// |
208 | | /// This may panic if the ending point is not a suffix slice of `start`. |
209 | 0 | pub(crate) fn slicer<'a>( |
210 | 0 | start: &'a [u8], |
211 | 0 | ) -> impl Fn(&'a [u8]) -> &'a [u8] + 'a { |
212 | 0 | let mkoffset = offseter(start); |
213 | 0 | move |end| { |
214 | 0 | let offset = mkoffset(end); |
215 | 0 | &start[..offset] |
216 | 0 | } |
217 | 0 | } |