Coverage Report

Created: 2026-04-29 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/lexical-parse-integer-1.0.6/src/algorithm.rs
Line
Count
Source
1
//! Radix-generic, optimized, string-to-integer conversion routines.
2
//!
3
//! These routines are highly optimized: they use various optimizations
4
//! to read multiple digits at-a-time with less multiplication instructions,
5
//! as well as other optimizations to avoid unnecessary compile-time branching.
6
//!
7
//! See [Algorithm](/docs/Algorithm.md) for a more detailed description of
8
//! the algorithm choice here. See [Benchmarks.md](/docs/Benchmarks) for
9
//! recent benchmark data.
10
//!
11
//! These allow implementations of partial and complete parsers
12
//! using a single code-path via macros.
13
//!
14
//! This looks relatively, complex, but it's quite simple. Almost all
15
//! of these branches are resolved at compile-time, so the resulting
16
//! code is quite small while handling all of the internal complexity.
17
//!
18
//! 1. Helpers to process ok and error results for both complete and partial
19
//!    parsers. They have different APIs, and mixing the APIs leads to
20
//!    substantial performance hits.
21
//! 2. Overflow checking on invalid digits for partial parsers, while just
22
//!    returning invalid digits for complete parsers.
23
//! 3. A format-aware sign parser.
24
//! 4. Digit parsing algorithms which explicitly wrap on overflow, for no
25
//!    additional overhead. This has major performance wins for **most**
26
//!    real-world integers, so most valid input will be substantially faster.
27
//! 5. An algorithm to detect if overflow occurred. This is comprehensive, and
28
//!    short-circuits for common cases.
29
//! 6. A parsing algorithm for unsigned integers, always producing positive
30
//!    values. This avoids any unnecessary branching.
31
//! 7. Multi-digit optimizations for larger sizes.
32
33
#![doc(hidden)]
34
35
use lexical_util::digit::char_to_digit_const;
36
use lexical_util::error::Error;
37
use lexical_util::format::NumberFormat;
38
use lexical_util::iterator::{AsBytes, Bytes, DigitsIter, Iter};
39
use lexical_util::num::{as_cast, Integer};
40
use lexical_util::result::Result;
41
42
use crate::Options;
43
44
// HELPERS
45
46
/// Check if we should do multi-digit optimizations
47
19.6k
const fn can_try_parse_multidigits<'a, Iter: DigitsIter<'a>, const FORMAT: u128>(_: &Iter) -> bool {
48
19.6k
    let format = NumberFormat::<FORMAT> {};
49
19.6k
    Iter::IS_CONTIGUOUS && (cfg!(not(feature = "power-of-two")) || format.mantissa_radix() <= 10)
50
19.6k
}
51
52
// Get if digits are required for the format.
53
#[cfg_attr(not(feature = "format"), allow(unused_macros))]
54
macro_rules! required_digits {
55
    () => {
56
        NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
57
            || NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
58
    };
59
}
60
61
/// Return an value for a complete parser.
62
macro_rules! into_ok_complete {
63
    ($value:expr, $index:expr, $count:expr) => {{
64
        #[cfg(not(feature = "format"))]
65
        return Ok(as_cast($value));
66
67
        #[cfg(feature = "format")]
68
        if required_digits!() && $count == 0 {
69
            into_error!(Empty, $index);
70
        } else {
71
            return Ok(as_cast($value));
72
        }
73
    }};
74
}
75
76
/// Return an value and index for a partial parser.
77
macro_rules! into_ok_partial {
78
    ($value:expr, $index:expr, $count:expr) => {{
79
        #[cfg(not(feature = "format"))]
80
        return Ok((as_cast($value), $index));
81
82
        #[cfg(feature = "format")]
83
        if required_digits!() && $count == 0 {
84
            into_error!(Empty, $index);
85
        } else {
86
            return Ok((as_cast($value), $index));
87
        }
88
    }};
89
}
90
91
/// Return an error for a complete parser upon an invalid digit.
92
macro_rules! invalid_digit_complete {
93
    ($value:expr, $index:expr, $count:expr) => {
94
        // Don't do any overflow checking here: we don't need it.
95
        into_error!(InvalidDigit, $index - 1)
96
    };
97
}
98
99
/// Return a value for a partial parser upon an invalid digit.
100
/// This checks for numeric overflow, and returns the appropriate error.
101
macro_rules! invalid_digit_partial {
102
    ($value:expr, $index:expr, $count:expr) => {
103
        // NOTE: The value is already positive/negative
104
        into_ok_partial!($value, $index - 1, $count)
105
    };
106
}
107
108
/// Return an error, returning the index and the error.
109
macro_rules! into_error {
110
    ($code:ident, $index:expr) => {{
111
        return Err(Error::$code($index));
112
    }};
113
}
114
115
/// Handle an invalid digit if the format feature is enabled.
116
///
117
/// This is because we can have special, non-digit characters near
118
/// the start or internally. If `$is_end` is set to false, there **MUST**
119
/// be elements in the underlying slice after the current iterator.
120
#[cfg(feature = "format")]
121
macro_rules! fmt_invalid_digit {
122
    (
123
        $value:ident, $iter:ident, $c:expr, $start_index:ident, $invalid_digit:ident, $is_end:expr
124
    ) => {{
125
        // NOTE: If we have non-contiguous iterators, we could have a skip character
126
        // here at the boundary. This does not affect safety but it does affect
127
        // correctness.
128
        debug_assert!($iter.is_contiguous() || $is_end);
129
130
        let base_suffix = NumberFormat::<FORMAT>::BASE_SUFFIX;
131
        let uncased_base_suffix = NumberFormat::<FORMAT>::CASE_SENSITIVE_BASE_SUFFIX;
132
        // Need to check for a base suffix, if so, return a valid value.
133
        // We can't have a base suffix at the first value (need at least
134
        // 1 digit).
135
        if base_suffix != 0 && $iter.cursor() - $start_index > 1 {
136
            let is_suffix = if uncased_base_suffix {
137
                $c == base_suffix
138
            } else {
139
                $c.eq_ignore_ascii_case(&base_suffix)
140
            };
141
            // NOTE: If we're using the `take_n` optimization where it can't
142
            // be the end, then the iterator cannot be done. So, in that case,
143
            // we need to end. `take_n` also can never be used for non-
144
            // contiguous iterators.
145
            if is_suffix && $is_end && $iter.is_buffer_empty() {
146
                // Break out of the loop, we've finished parsing.
147
                break;
148
            } else if !$iter.is_buffer_empty() {
149
                // Haven't finished parsing, so we're going to call
150
                // `invalid_digit!`. Need to ensure we include the
151
                // base suffix in that.
152
153
                // SAFETY: safe since the iterator is not empty, as checked
154
                // in `$iter.is_buffer_empty()`. Adding in the check hopefully
155
                // will be elided since it's a known constant.
156
                unsafe { $iter.step_unchecked() };
157
            }
158
        }
159
        // Might have handled our base-prefix here.
160
        $invalid_digit!($value, $iter.cursor(), $iter.current_count())
161
    }};
162
}
163
164
/// Just return an invalid digit
165
#[cfg(not(feature = "format"))]
166
macro_rules! fmt_invalid_digit {
167
    (
168
        $value:ident, $iter:ident, $c:expr, $start_index:ident, $invalid_digit:ident, $is_end:expr
169
    ) => {{
170
        $invalid_digit!($value, $iter.cursor(), $iter.current_count());
171
    }};
172
}
173
174
/// Parse the sign from the leading digits.
175
///
176
/// This routine does the following:
177
///
178
/// 1. Parses the sign digit.
179
/// 2. Handles if positive signs before integers are not allowed.
180
/// 3. Handles negative signs if the type is unsigned.
181
/// 4. Handles if the sign is required, but missing.
182
/// 5. Handles if the iterator is empty, before or after parsing the sign.
183
/// 6. Handles if the iterator has invalid, leading zeros.
184
///
185
/// Returns if the value is negative, or any values detected when
186
/// validating the input.
187
#[doc(hidden)]
188
#[macro_export]
189
macro_rules! parse_sign {
190
    (
191
        $byte:ident,
192
        $is_signed:expr,
193
        $no_positive:expr,
194
        $required:expr,
195
        $invalid_positive:ident,
196
        $missing:ident
197
    ) => {
198
        // NOTE: `read_if` optimizes poorly since we then match after
199
        match $byte.integer_iter().first() {
200
            Some(&b'+') if !$no_positive => {
201
                // SAFETY: We have at least 1 item left since we peaked a value
202
                unsafe { $byte.step_unchecked() };
203
                Ok(false)
204
            },
205
            Some(&b'+') if $no_positive => Err(Error::$invalid_positive($byte.cursor())),
206
            Some(&b'-') if $is_signed => {
207
                // SAFETY: We have at least 1 item left since we peaked a value
208
                unsafe { $byte.step_unchecked() };
209
                Ok(true)
210
            },
211
            Some(_) if $required => Err(Error::$missing($byte.cursor())),
212
            _ if $required => Err(Error::$missing($byte.cursor())),
213
            _ => Ok(false),
214
        }
215
    };
216
}
217
218
/// Parse the sign from the leading digits.
219
#[cfg_attr(not(feature = "compact"), inline(always))]
220
19.7k
pub fn parse_sign<T: Integer, const FORMAT: u128>(byte: &mut Bytes<'_, FORMAT>) -> Result<bool> {
221
19.7k
    let format = NumberFormat::<FORMAT> {};
222
19.7k
    parse_sign!(
223
        byte,
224
0
        T::IS_SIGNED,
225
0
        format.no_positive_mantissa_sign(),
226
19.7k
        format.required_mantissa_sign(),
227
        InvalidPositiveSign,
228
        MissingSign
229
    )
230
19.7k
}
Unexecuted instantiation: lexical_parse_integer::algorithm::parse_sign::<u8, 0xa0000000000000000000000000c>
lexical_parse_integer::algorithm::parse_sign::<i32, 0xa0000000000000000000000000c>
Line
Count
Source
220
19.7k
pub fn parse_sign<T: Integer, const FORMAT: u128>(byte: &mut Bytes<'_, FORMAT>) -> Result<bool> {
221
19.7k
    let format = NumberFormat::<FORMAT> {};
222
19.7k
    parse_sign!(
223
        byte,
224
0
        T::IS_SIGNED,
225
0
        format.no_positive_mantissa_sign(),
226
19.7k
        format.required_mantissa_sign(),
227
        InvalidPositiveSign,
228
        MissingSign
229
    )
230
19.7k
}
Unexecuted instantiation: lexical_parse_integer::algorithm::parse_sign::<i64, 0xa0000000000000000000000000c>
Unexecuted instantiation: lexical_parse_integer::algorithm::parse_sign::<u64, 0xa0000000000000000000000000c>
231
232
// FOUR DIGITS
233
234
/// Determine if 4 bytes, read raw from bytes, are 4 digits for the radix.
235
#[cfg_attr(not(feature = "compact"), inline(always))]
236
0
pub fn is_4digits<const FORMAT: u128>(v: u32) -> bool {
237
0
    let radix = NumberFormat::<{ FORMAT }>::MANTISSA_RADIX;
238
0
    debug_assert!(radix <= 10);
239
240
    // We want to have a wrapping add and sub such that only values from the
241
    // range `[0x30, 0x39]` (or narrower for custom radixes) will not
242
    // overflow into the high bit. This means that the value needs to overflow
243
    // into into `0x80` if the digit is 1 above, or `0x46` for the value `0x39`.
244
    // Likewise, we only valid for `[0x30, 0x38]` for radix 8, so we need
245
    // `0x47`.
246
0
    let add = 0x46 + 10 - radix;
247
0
    let add = add + (add << 8) + (add << 16) + (add << 24);
248
    // This aims to underflow if anything is below the min digit: if we have any
249
    // values under `0x30`, then this underflows and wraps into the high bit.
250
0
    let sub = 0x3030_3030;
251
0
    let a = v.wrapping_add(add);
252
0
    let b = v.wrapping_sub(sub);
253
254
0
    (a | b) & 0x8080_8080 == 0
255
0
}
256
257
/// Parse 4 bytes read from bytes into 4 digits.
258
#[cfg_attr(not(feature = "compact"), inline(always))]
259
0
pub fn parse_4digits<const FORMAT: u128>(mut v: u32) -> u32 {
260
0
    let radix = NumberFormat::<{ FORMAT }>::MANTISSA_RADIX;
261
0
    debug_assert!(radix <= 10);
262
263
    // Normalize our digits to the range `[0, 9]`.
264
0
    v -= 0x3030_3030;
265
    // Scale digits in `0 <= Nn <= 99`.
266
0
    v = (v * radix) + (v >> 8);
267
    // Scale digits in `0 <= Nnnn <= 9999`.
268
0
    v = ((v & 0x0000007f) * radix * radix) + ((v >> 16) & 0x0000007f);
269
270
0
    v
271
0
}
272
273
/// Use a fast-path optimization, where we attempt to parse 4 digits at a time.
274
/// This reduces the number of multiplications necessary to 2, instead of 4.
275
///
276
/// This approach is described in full here:
277
/// <https://johnnylee-sde.github.io/Fast-numeric-string-to-int/>
278
#[cfg_attr(not(feature = "compact"), inline(always))]
279
0
pub fn try_parse_4digits<'a, T, Iter, const FORMAT: u128>(iter: &mut Iter) -> Option<T>
280
0
where
281
0
    T: Integer,
282
0
    Iter: DigitsIter<'a>,
283
{
284
    // Can't do fast optimizations with radixes larger than 10, since
285
    // we no longer have a contiguous ASCII block. Likewise, cannot
286
    // use non-contiguous iterators.
287
0
    debug_assert!(NumberFormat::<{ FORMAT }>::MANTISSA_RADIX <= 10);
288
0
    debug_assert!(Iter::IS_CONTIGUOUS);
289
290
    // Read our digits, validate the input, and check from there.
291
0
    let bytes = u32::from_le(iter.peek_u32()?);
292
0
    if is_4digits::<FORMAT>(bytes) {
293
        // SAFETY: safe since we have at least 4 bytes in the buffer.
294
0
        unsafe { iter.step_by_unchecked(4) };
295
0
        Some(T::as_cast(parse_4digits::<FORMAT>(bytes)))
296
    } else {
297
0
        None
298
    }
299
0
}
Unexecuted instantiation: lexical_parse_integer::algorithm::try_parse_4digits::<u8, lexical_util::noskip::DigitsIterator<0xa0000000000000000000000000c>, 0xa0000000000000000000000000c>
Unexecuted instantiation: lexical_parse_integer::algorithm::try_parse_4digits::<i32, lexical_util::noskip::DigitsIterator<0xa0000000000000000000000000c>, 0xa0000000000000000000000000c>
Unexecuted instantiation: lexical_parse_integer::algorithm::try_parse_4digits::<i64, lexical_util::noskip::DigitsIterator<0xa0000000000000000000000000c>, 0xa0000000000000000000000000c>
Unexecuted instantiation: lexical_parse_integer::algorithm::try_parse_4digits::<u64, lexical_util::noskip::DigitsIterator<0xa0000000000000000000000000c>, 0xa0000000000000000000000000c>
300
301
// EIGHT DIGITS
302
303
/// Determine if 8 bytes, read raw from bytes, are 8 digits for the radix.
304
/// See `is_4digits` for the algorithm description.
305
#[cfg_attr(not(feature = "compact"), inline(always))]
306
20.5k
pub fn is_8digits<const FORMAT: u128>(v: u64) -> bool {
307
20.5k
    let radix = NumberFormat::<{ FORMAT }>::MANTISSA_RADIX;
308
20.5k
    debug_assert!(radix <= 10);
309
310
20.5k
    let add = 0x46 + 10 - radix;
311
20.5k
    let add = add + (add << 8) + (add << 16) + (add << 24);
312
20.5k
    let add = (add as u64) | ((add as u64) << 32);
313
    // This aims to underflow if anything is below the min digit: if we have any
314
    // values under `0x30`, then this underflows and wraps into the high bit.
315
20.5k
    let sub = 0x3030_3030_3030_3030;
316
20.5k
    let a = v.wrapping_add(add);
317
20.5k
    let b = v.wrapping_sub(sub);
318
319
20.5k
    (a | b) & 0x8080_8080_8080_8080 == 0
320
20.5k
}
321
322
/// Parse 8 bytes read from bytes into 8 digits.
323
/// Credit for this goes to @aqrit, which further optimizes the
324
/// optimization described by Johnny Lee above.
325
#[cfg_attr(not(feature = "compact"), inline(always))]
326
17.5k
pub fn parse_8digits<const FORMAT: u128>(mut v: u64) -> u64 {
327
17.5k
    let radix = NumberFormat::<{ FORMAT }>::MANTISSA_RADIX as u64;
328
17.5k
    debug_assert!(radix <= 10);
329
330
    // Create our masks. Assume the optimizer will do this at compile time.
331
    // It seems like an optimizing compiler **will** do this, so we
332
    // should be safe.
333
17.5k
    let radix2 = radix * radix;
334
17.5k
    let radix4 = radix2 * radix2;
335
17.5k
    let radix6 = radix2 * radix4;
336
17.5k
    let mask = 0x0000_00FF_0000_00FFu64;
337
17.5k
    let mul1 = radix2 + (radix6 << 32);
338
17.5k
    let mul2 = 1 + (radix4 << 32);
339
340
    // Normalize our digits to the base.
341
17.5k
    v -= 0x3030_3030_3030_3030;
342
    // Scale digits in `0 <= Nn <= 99`.
343
17.5k
    v = (v * radix) + (v >> 8);
344
17.5k
    let v1 = (v & mask).wrapping_mul(mul1);
345
17.5k
    let v2 = ((v >> 16) & mask).wrapping_mul(mul2);
346
347
17.5k
    ((v1.wrapping_add(v2) >> 32) as u32) as u64
348
17.5k
}
349
350
/// Use a fast-path optimization, where we attempt to parse 8 digits at a time.
351
/// This reduces the number of multiplications necessary to 3, instead of 8.
352
#[cfg_attr(not(feature = "compact"), inline(always))]
353
27.1k
pub fn try_parse_8digits<'a, T, Iter, const FORMAT: u128>(iter: &mut Iter) -> Option<T>
354
27.1k
where
355
27.1k
    T: Integer,
356
27.1k
    Iter: DigitsIter<'a>,
357
{
358
    // Can't do fast optimizations with radixes larger than 10, since
359
    // we no longer have a contiguous ASCII block. Likewise, cannot
360
    // use non-contiguous iterators.
361
27.1k
    debug_assert!(NumberFormat::<{ FORMAT }>::MANTISSA_RADIX <= 10);
362
27.1k
    debug_assert!(Iter::IS_CONTIGUOUS);
363
364
    // Read our digits, validate the input, and check from there.
365
27.1k
    let bytes = u64::from_le(iter.peek_u64()?);
366
20.5k
    if is_8digits::<FORMAT>(bytes) {
367
        // SAFETY: safe since we have at least 8 bytes in the buffer.
368
17.5k
        unsafe { iter.step_by_unchecked(8) };
369
17.5k
        Some(T::as_cast(parse_8digits::<FORMAT>(bytes)))
370
    } else {
371
3.02k
        None
372
    }
373
27.1k
}
lexical_parse_integer::algorithm::try_parse_8digits::<u64, lexical_util::noskip::DigitsIterator<0xa0000000000000000000000000c>, 0xa0000000000000000000000000c>
Line
Count
Source
353
27.1k
pub fn try_parse_8digits<'a, T, Iter, const FORMAT: u128>(iter: &mut Iter) -> Option<T>
354
27.1k
where
355
27.1k
    T: Integer,
356
27.1k
    Iter: DigitsIter<'a>,
357
{
358
    // Can't do fast optimizations with radixes larger than 10, since
359
    // we no longer have a contiguous ASCII block. Likewise, cannot
360
    // use non-contiguous iterators.
361
27.1k
    debug_assert!(NumberFormat::<{ FORMAT }>::MANTISSA_RADIX <= 10);
362
27.1k
    debug_assert!(Iter::IS_CONTIGUOUS);
363
364
    // Read our digits, validate the input, and check from there.
365
27.1k
    let bytes = u64::from_le(iter.peek_u64()?);
366
20.5k
    if is_8digits::<FORMAT>(bytes) {
367
        // SAFETY: safe since we have at least 8 bytes in the buffer.
368
17.5k
        unsafe { iter.step_by_unchecked(8) };
369
17.5k
        Some(T::as_cast(parse_8digits::<FORMAT>(bytes)))
370
    } else {
371
3.02k
        None
372
    }
373
27.1k
}
Unexecuted instantiation: lexical_parse_integer::algorithm::try_parse_8digits::<u8, lexical_util::noskip::DigitsIterator<0xa0000000000000000000000000c>, 0xa0000000000000000000000000c>
Unexecuted instantiation: lexical_parse_integer::algorithm::try_parse_8digits::<i32, lexical_util::noskip::DigitsIterator<0xa0000000000000000000000000c>, 0xa0000000000000000000000000c>
Unexecuted instantiation: lexical_parse_integer::algorithm::try_parse_8digits::<i64, lexical_util::noskip::DigitsIterator<0xa0000000000000000000000000c>, 0xa0000000000000000000000000c>
374
375
// ONE DIGIT
376
377
/// Run a loop where the integer cannot possibly overflow.
378
///
379
/// If the length of the str is short compared to the range of the type
380
/// we are parsing into, then we can be certain that an overflow will not occur.
381
/// This bound is when `radix.pow(digits.len()) - 1 <= T::MAX` but the condition
382
/// above is a faster (conservative) approximation of this.
383
///
384
/// Consider radix 16 as it has the highest information density per digit and
385
/// will thus overflow the earliest: `u8::MAX` is `ff` - any str of length 2 is
386
/// guaranteed to not overflow. `i8::MAX` is `7f` - only a str of length 1 is
387
/// guaranteed to not overflow.
388
///
389
/// This is based off of [core/num](core).
390
///
391
/// * `value` - The current parsed value.
392
/// * `iter` - An iterator over all bytes in the input.
393
/// * `add_op` - The unchecked add/sub op.
394
/// * `start_index` - The offset where parsing started.
395
/// * `invalid_digit` - Behavior when an invalid digit is found.
396
/// * `is_end` - If iter corresponds to the full input.
397
///
398
/// core: <https://doc.rust-lang.org/1.81.0/src/core/num/mod.rs.html#1480>
399
macro_rules! parse_1digit_unchecked {
400
    (
401
        $value:ident,
402
        $iter:ident,
403
        $add_op:ident,
404
        $start_index:ident,
405
        $invalid_digit:ident,
406
        $is_end:expr
407
    ) => {{
408
        // This is a slower parsing algorithm, going 1 digit at a time, but doing it in
409
        // an unchecked loop.
410
        let radix = NumberFormat::<FORMAT>::MANTISSA_RADIX;
411
        while let Some(&c) = $iter.next() {
412
            let digit = match char_to_digit_const(c, radix) {
413
                Some(v) => v,
414
                None => fmt_invalid_digit!($value, $iter, c, $start_index, $invalid_digit, $is_end),
415
            };
416
            // multiply first since compilers are good at optimizing things out and will do
417
            // a fused mul/add We must do this after getting the digit for
418
            // partial parsers
419
            $value = $value.wrapping_mul(as_cast(radix)).$add_op(as_cast(digit));
420
        }
421
    }};
422
}
423
424
/// Run a loop where the integer could overflow.
425
///
426
/// This is a standard, unoptimized algorithm. This is based off of
427
/// [core/num](core)
428
///
429
/// * `value` - The current parsed value.
430
/// * `iter` - An iterator over all bytes in the input.
431
/// * `add_op` - The checked add/sub op.
432
/// * `start_index` - The offset where parsing started.
433
/// * `invalid_digit` - Behavior when an invalid digit is found.
434
/// * `overflow` - If the error is overflow or underflow.
435
///
436
/// core: <https://doc.rust-lang.org/1.81.0/src/core/num/mod.rs.html#1505>
437
macro_rules! parse_1digit_checked {
438
    (
439
        $value:ident,
440
        $iter:ident,
441
        $add_op:ident,
442
        $start_index:ident,
443
        $invalid_digit:ident,
444
        $overflow:ident
445
    ) => {{
446
        // This is a slower parsing algorithm, going 1 digit at a time, but doing it in
447
        // an unchecked loop.
448
        let radix = NumberFormat::<FORMAT>::MANTISSA_RADIX;
449
        while let Some(&c) = $iter.next() {
450
            let digit = match char_to_digit_const(c, radix) {
451
                Some(v) => v,
452
                None => fmt_invalid_digit!($value, $iter, c, $start_index, $invalid_digit, true),
453
            };
454
            // multiply first since compilers are good at optimizing things out and will do
455
            // a fused mul/add
456
            $value =
457
15.3k
                match $value.checked_mul(as_cast(radix)).and_then(|x| x.$add_op(as_cast(digit))) {
Unexecuted instantiation: lexical_parse_integer::algorithm::algorithm_complete::<u8, 0xa0000000000000000000000000c>::{closure#0}
Unexecuted instantiation: lexical_parse_integer::algorithm::algorithm_complete::<u8, 0xa0000000000000000000000000c>::{closure#1}
Unexecuted instantiation: lexical_parse_integer::algorithm::algorithm_complete::<i32, 0xa0000000000000000000000000c>::{closure#0}
lexical_parse_integer::algorithm::algorithm_complete::<i32, 0xa0000000000000000000000000c>::{closure#1}
Line
Count
Source
457
15.3k
                match $value.checked_mul(as_cast(radix)).and_then(|x| x.$add_op(as_cast(digit))) {
Unexecuted instantiation: lexical_parse_integer::algorithm::algorithm_complete::<i64, 0xa0000000000000000000000000c>::{closure#0}
Unexecuted instantiation: lexical_parse_integer::algorithm::algorithm_complete::<i64, 0xa0000000000000000000000000c>::{closure#1}
Unexecuted instantiation: lexical_parse_integer::algorithm::algorithm_complete::<u64, 0xa0000000000000000000000000c>::{closure#0}
Unexecuted instantiation: lexical_parse_integer::algorithm::algorithm_complete::<u64, 0xa0000000000000000000000000c>::{closure#1}
458
                    Some(value) => value,
459
                    None => into_error!($overflow, $iter.cursor() - 1),
460
                }
461
        }
462
    }};
463
}
464
465
// OVERALL DIGITS
466
// --------------
467
468
/// Run an unchecked loop where digits are processed incrementally.
469
///
470
/// If the type size is small or there's not many digits, skip multi-digit
471
/// optimizations. Otherwise, if the type size is large and we're not manually
472
/// skipping manual optimizations, then we do this here.
473
///
474
/// * `value` - The current parsed value.
475
/// * `iter` - An iterator over all bytes in the input.
476
/// * `add_op` - The unchecked add/sub op.
477
/// * `start_index` - The offset where parsing started.
478
/// * `invalid_digit` - Behavior when an invalid digit is found.
479
/// * `no_multi_digit` - If to disable multi-digit optimizations.
480
/// * `is_end` - If iter corresponds to the full input.
481
macro_rules! parse_digits_unchecked {
482
    (
483
        $value:ident,
484
        $iter:ident,
485
        $add_op:ident,
486
        $start_index:ident,
487
        $invalid_digit:ident,
488
        $no_multi_digit:expr,
489
        $is_end:expr
490
    ) => {{
491
        let can_multi = can_try_parse_multidigits::<_, FORMAT>(&$iter);
492
        let use_multi = can_multi && !$no_multi_digit;
493
494
        // these cannot overflow. also, we use at most 3 for a 128-bit float and 1 for a
495
        // 64-bit float NOTE: Miri will complain about this if we use radices >=
496
        // 16 but since they won't go into `try_parse_8digits!` or
497
        // `try_parse_4digits` it will be optimized out and the overflow won't
498
        // matter.
499
        let format = NumberFormat::<FORMAT> {};
500
        if use_multi && T::BITS >= 64 && $iter.buffer_length() >= 8 {
501
            // Try our fast, 8-digit at a time optimizations.
502
            let radix8 = T::from_u32(format.radix8());
503
            while let Some(value) = try_parse_8digits::<T, _, FORMAT>(&mut $iter) {
504
                $value = $value.wrapping_mul(radix8).$add_op(value);
505
            }
506
        } else if use_multi && T::BITS == 32 && $iter.buffer_length() >= 4 {
507
            // Try our fast, 8-digit at a time optimizations.
508
            let radix4 = T::from_u32(format.radix4());
509
            while let Some(value) = try_parse_4digits::<T, _, FORMAT>(&mut $iter) {
510
                $value = $value.wrapping_mul(radix4).$add_op(value);
511
            }
512
        }
513
        parse_1digit_unchecked!($value, $iter, $add_op, $start_index, $invalid_digit, $is_end)
514
    }};
515
}
516
517
/// Run  checked loop where digits are processed with overflow checking.
518
///
519
/// If the type size is small or there's not many digits, skip multi-digit
520
/// optimizations. Otherwise, if the type size is large and we're not manually
521
/// skipping manual optimizations, then we do this here.
522
///
523
/// * `value` - The current parsed value.
524
/// * `iter` - An iterator over all bytes in the input.
525
/// * `add_op` - The checked add/sub op.
526
/// * `add_op_uc` - The unchecked add/sub op for small digit optimizations.
527
/// * `start_index` - The offset where parsing started.
528
/// * `invalid_digit` - Behavior when an invalid digit is found.
529
/// * `overflow` - If the error is overflow or underflow.
530
/// * `no_multi_digit` - If to disable multi-digit optimizations.
531
/// * `overflow_digits` - The number of digits before we need to consider
532
///   checked ops.
533
macro_rules! parse_digits_checked {
534
    (
535
        $value:ident,
536
        $iter:ident,
537
        $add_op:ident,
538
        $add_op_uc:ident,
539
        $start_index:ident,
540
        $invalid_digit:ident,
541
        $overflow:ident,
542
        $no_multi_digit:expr,
543
        $overflow_digits:expr
544
    ) => {{
545
        // Can use the unchecked for the `max_digits` here. If we
546
        // have a non-contiguous iterator, we could have a case like
547
        // 123__456, with no consecutive digit separators allowed. If
548
        // it's broken between the `_` characters, the integer will be
549
        // seen as valid when it isn't.
550
        if cfg!(not(feature = "format")) || $iter.is_contiguous() {
551
            if let Some(mut small) = $iter.take_n($overflow_digits) {
552
                let mut small_iter = small.integer_iter();
553
                parse_digits_unchecked!(
554
                    $value,
555
                    small_iter,
556
                    $add_op_uc,
557
                    $start_index,
558
                    $invalid_digit,
559
                    $no_multi_digit,
560
                    false
561
                );
562
            }
563
        }
564
565
        // NOTE: all our multi-digit optimizations have been done here: skip this
566
        parse_1digit_checked!($value, $iter, $add_op, $start_index, $invalid_digit, $overflow)
567
    }};
568
}
569
570
// ALGORITHM
571
572
/// Generic algorithm for both partial and complete parsers.
573
///
574
/// * `invalid_digit` - Behavior on finding an invalid digit.
575
/// * `into_ok` - Behavior when returning a valid value.
576
/// * `invalid_digit` - Behavior when an invalid digit is found.
577
/// * `no_multi_digit` - If to disable multi-digit optimizations.
578
/// * `is_partial` - If the parser is a partial parser.
579
#[rustfmt::skip]
580
macro_rules! algorithm {
581
($bytes:ident, $into_ok:ident, $invalid_digit:ident, $no_multi_digit:expr) => {{
582
    // WARNING:
583
    // --------
584
    // None of this code can be changed for optimization reasons.
585
    // Do not change it without benchmarking every change.
586
    //  1. You cannot use the `NoSkipIterator` in the loop,
587
    //      you must either return a subslice (indexing)
588
    //      or increment outside of the loop.
589
    //      Failing to do so leads to numerous more, unnecessary
590
    //      conditional move instructions, killing performance.
591
    //  2. Return a 0 or 1 shift, and indexing unchecked outside
592
    //      of the loop is slightly faster.
593
    //  3. Partial and complete parsers cannot be efficiently done
594
    //      together.
595
    //
596
    // If you try to refactor without carefully monitoring benchmarks or
597
    // assembly generation, please log the number of wasted hours: so
598
    //  16 hours so far.
599
600
    // With `step_by_unchecked`, this is sufficiently optimized.
601
    // Removes conditional paths, to, which simplifies maintenance.
602
    // The skip version of the iterator automatically coalesces to
603
    // the no-skip iterator.
604
    let mut byte = $bytes.bytes::<FORMAT>();
605
    let radix = NumberFormat::<FORMAT>::MANTISSA_RADIX;
606
607
    let is_negative = parse_sign::<T, FORMAT>(&mut byte)?;
608
    let mut iter = byte.integer_iter();
609
    if iter.is_buffer_empty() {
610
        // Our default format **ALWAYS** requires significant digits, however,
611
        // we can have cases where we don
612
        #[cfg(not(feature = "format"))]
613
        into_error!(Empty, iter.cursor());
614
615
        #[cfg(feature = "format")]
616
        if required_digits!() {
617
            into_error!(Empty, iter.cursor());
618
        } else {
619
            $into_ok!(T::ZERO, iter.cursor(), 0)
620
        }
621
    }
622
623
    // Feature-gate a lot of format-only code here to simplify analysis with our branching
624
    // We only want to skip the zeros if have either require a base prefix or we don't
625
    // allow integer leading zeros, since the skip is expensive
626
    #[allow(unused_variables, unused_mut)]
627
    let mut start_index = iter.cursor();
628
    #[cfg_attr(not(feature = "format"), allow(unused_variables))]
629
    let format = NumberFormat::<FORMAT> {};
630
    #[cfg(feature = "format")]
631
    if format.has_base_prefix() || format.no_integer_leading_zeros() {
632
        // Skip any leading zeros. We want to do our check if it can't possibly overflow after.
633
        // For skipping digit-based formats, this approximation is a way over estimate.
634
        // NOTE: Skipping zeros is **EXPENSIVE* so we skip that without our format feature
635
        let zeros = iter.skip_zeros();
636
        start_index += zeros;
637
638
        // Now, check to see if we have a valid base prefix.
639
        let mut is_prefix = false;
640
        let base_prefix = format.base_prefix();
641
        if base_prefix != 0 && zeros == 1 {
642
            // Check to see if the next character is the base prefix.
643
            // We must have a format like `0x`, `0d`, `0o`. Note:
644
            if iter.read_if_value(base_prefix, format.case_sensitive_base_prefix()).is_some() {
645
                is_prefix = true;
646
                if iter.is_buffer_empty() {
647
                    into_error!(Empty, iter.cursor());
648
                } else {
649
                    start_index += 1;
650
                }
651
            }
652
        }
653
654
        // If we have a format that doesn't accept leading zeros,
655
        // check if the next value is invalid. It's invalid if the
656
        // first is 0, and the next is not a valid digit.
657
        if !is_prefix && format.no_integer_leading_zeros() && zeros != 0 {
658
            // Cannot have a base prefix and no leading zeros.
659
            let index = iter.cursor() - zeros;
660
            if zeros > 1 {
661
                into_error!(InvalidLeadingZeros, index);
662
            }
663
            // NOTE: Zeros has to be 0 here, so our index == 1 or 2 (depending on sign)
664
            match iter.peek().map(|&c| char_to_digit_const(c, format.radix())) {
665
                // Valid digit, we have an invalid value.
666
                Some(Some(_)) => into_error!(InvalidLeadingZeros, index),
667
                // Have a non-digit character that follows.
668
                Some(None) => $invalid_digit!(<T>::ZERO, iter.cursor() + 1, iter.current_count()),
669
                // No digits following, has to be ok
670
                None => $into_ok!(<T>::ZERO, index, iter.current_count()),
671
            };
672
        }
673
    }
674
675
    // shorter strings cannot possibly overflow so a great optimization
676
    let overflow_digits = T::overflow_digits(radix);
677
    let cannot_overflow = iter.as_slice().len() <= overflow_digits;
678
679
    //  NOTE:
680
    //      Don't add optimizations for 128-bit integers.
681
    //      128-bit multiplication is rather efficient, it's only division
682
    //      that's very slow. Any shortcut optimizations increasing branching,
683
    //      and even if parsing a 64-bit integer is marginally faster, it
684
    //      culminates in **way** slower performance overall for simple
685
    //      integers, and no improvement for large integers.
686
    let mut value = T::ZERO;
687
    if cannot_overflow && is_negative {
688
        parse_digits_unchecked!(value, iter, wrapping_sub, start_index, $invalid_digit, $no_multi_digit, true);
689
    } if cannot_overflow {
690
        parse_digits_unchecked!(value, iter, wrapping_add, start_index, $invalid_digit, $no_multi_digit, true);
691
    } else if is_negative {
692
        parse_digits_checked!(value, iter, checked_sub, wrapping_sub, start_index, $invalid_digit, Underflow, $no_multi_digit, overflow_digits);
693
    } else {
694
        parse_digits_checked!(value, iter, checked_add, wrapping_add, start_index, $invalid_digit, Overflow, $no_multi_digit, overflow_digits);
695
    }
696
697
    $into_ok!(value, iter.buffer_length(), iter.current_count())
698
}};
699
}
700
701
/// Algorithm for the complete parser.
702
#[cfg_attr(not(feature = "compact"), inline(always))]
703
19.7k
pub fn algorithm_complete<T, const FORMAT: u128>(bytes: &[u8], options: &Options) -> Result<T>
704
19.7k
where
705
19.7k
    T: Integer,
706
{
707
19.7k
    algorithm!(bytes, into_ok_complete, invalid_digit_complete, options.get_no_multi_digit())
708
19.7k
}
Unexecuted instantiation: lexical_parse_integer::algorithm::algorithm_complete::<u8, 0xa0000000000000000000000000c>
lexical_parse_integer::algorithm::algorithm_complete::<i32, 0xa0000000000000000000000000c>
Line
Count
Source
703
19.7k
pub fn algorithm_complete<T, const FORMAT: u128>(bytes: &[u8], options: &Options) -> Result<T>
704
19.7k
where
705
19.7k
    T: Integer,
706
{
707
19.7k
    algorithm!(bytes, into_ok_complete, invalid_digit_complete, options.get_no_multi_digit())
708
19.7k
}
Unexecuted instantiation: lexical_parse_integer::algorithm::algorithm_complete::<i64, 0xa0000000000000000000000000c>
Unexecuted instantiation: lexical_parse_integer::algorithm::algorithm_complete::<u64, 0xa0000000000000000000000000c>
709
710
/// Algorithm for the partial parser.
711
#[cfg_attr(not(feature = "compact"), inline(always))]
712
pub fn algorithm_partial<T, const FORMAT: u128>(
713
    bytes: &[u8],
714
    options: &Options,
715
) -> Result<(T, usize)>
716
where
717
    T: Integer,
718
{
719
    algorithm!(bytes, into_ok_partial, invalid_digit_partial, options.get_no_multi_digit())
720
}