Coverage Report

Created: 2026-04-01 06:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/zmij-1.0.20/src/lib.rs
Line
Count
Source
1
//! [![github]](https://github.com/dtolnay/zmij) [![crates-io]](https://crates.io/crates/zmij) [![docs-rs]](https://docs.rs/zmij)
2
//!
3
//! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github
4
//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust
5
//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs
6
//!
7
//! <br>
8
//!
9
//! A double-to-string conversion algorithm based on [Schubfach] and [yy].
10
//!
11
//! This Rust implementation is a line-by-line port of Victor Zverovich's
12
//! implementation in C++, <https://github.com/vitaut/zmij>.
13
//!
14
//! [Schubfach]: https://fmt.dev/papers/Schubfach4.pdf
15
//! [yy]: https://github.com/ibireme/c_numconv_benchmark/blob/master/vendor/yy_double/yy_double.c
16
//!
17
//! <br>
18
//!
19
//! # Example
20
//!
21
//! ```
22
//! fn main() {
23
//!     let mut buffer = zmij::Buffer::new();
24
//!     let printed = buffer.format(1.234);
25
//!     assert_eq!(printed, "1.234");
26
//! }
27
//! ```
28
//!
29
//! <br>
30
//!
31
//! ## Performance
32
//!
33
//! The [dtoa-benchmark] compares this library and other Rust floating point
34
//! formatting implementations across a range of precisions. The vertical axis
35
//! in this chart shows nanoseconds taken by a single execution of
36
//! `zmij::Buffer::new().format_finite(value)` so a lower result indicates a
37
//! faster library.
38
//!
39
//! [dtoa-benchmark]: https://github.com/dtolnay/dtoa-benchmark
40
//!
41
//! ![performance](https://raw.githubusercontent.com/dtolnay/zmij/master/dtoa-benchmark.png)
42
43
#![no_std]
44
#![doc(html_root_url = "https://docs.rs/zmij/1.0.20")]
45
#![deny(unsafe_op_in_unsafe_fn)]
46
#![allow(non_camel_case_types, non_snake_case)]
47
#![allow(
48
    clippy::blocks_in_conditions,
49
    clippy::cast_possible_truncation,
50
    clippy::cast_possible_wrap,
51
    clippy::cast_ptr_alignment,
52
    clippy::cast_sign_loss,
53
    clippy::doc_markdown,
54
    clippy::incompatible_msrv,
55
    clippy::items_after_statements,
56
    clippy::many_single_char_names,
57
    clippy::modulo_one,
58
    clippy::must_use_candidate,
59
    clippy::needless_doctest_main,
60
    clippy::never_loop,
61
    clippy::redundant_else,
62
    clippy::similar_names,
63
    clippy::too_many_arguments,
64
    clippy::too_many_lines,
65
    clippy::unreadable_literal,
66
    clippy::used_underscore_items,
67
    clippy::while_immutable_condition,
68
    clippy::wildcard_imports
69
)]
70
71
#[cfg(zmij_no_select_unpredictable)]
72
mod hint;
73
#[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
74
mod stdarch_x86;
75
#[cfg(test)]
76
mod tests;
77
mod traits;
78
79
#[cfg(all(any(target_arch = "aarch64", target_arch = "x86_64"), not(miri)))]
80
use core::arch::asm;
81
#[cfg(not(zmij_no_select_unpredictable))]
82
use core::hint;
83
use core::mem::{self, MaybeUninit};
84
use core::ptr;
85
use core::slice;
86
use core::str;
87
#[cfg(feature = "no-panic")]
88
use no_panic::no_panic;
89
90
const BUFFER_SIZE: usize = 24;
91
const NAN: &str = "NaN";
92
const INFINITY: &str = "inf";
93
const NEG_INFINITY: &str = "-inf";
94
95
// Returns true_value if lhs < rhs, else false_value, without branching.
96
#[inline]
97
14.5k
fn select_if_less(lhs: u64, rhs: u64, true_value: i64, false_value: i64) -> i64 {
98
14.5k
    hint::select_unpredictable(lhs < rhs, true_value, false_value)
99
14.5k
}
100
101
#[derive(Copy, Clone)]
102
#[cfg_attr(test, derive(Debug, PartialEq))]
103
struct uint128 {
104
    hi: u64,
105
    lo: u64,
106
}
107
108
// Use umul128_hi64 for division.
109
const USE_UMUL128_HI64: bool = cfg!(target_vendor = "apple");
110
111
// Computes 128-bit result of multiplication of two 64-bit unsigned integers.
112
14.5k
const fn umul128(x: u64, y: u64) -> u128 {
113
14.5k
    x as u128 * y as u128
114
14.5k
}
115
116
7.28k
const fn umul128_hi64(x: u64, y: u64) -> u64 {
117
7.28k
    (umul128(x, y) >> 64) as u64
118
7.28k
}
119
120
#[cfg_attr(feature = "no-panic", no_panic)]
121
0
fn umul192_hi128(x_hi: u64, x_lo: u64, y: u64) -> uint128 {
122
0
    let p = umul128(x_hi, y);
123
0
    let lo = (p as u64).wrapping_add((umul128(x_lo, y) >> 64) as u64);
124
0
    uint128 {
125
0
        hi: (p >> 64) as u64 + u64::from(lo < p as u64),
126
0
        lo,
127
0
    }
128
0
}
129
130
// Computes high 64 bits of multiplication of x and y, discards the least
131
// significant bit and rounds to odd, where x = uint128_t(x_hi << 64) | x_lo.
132
#[cfg_attr(feature = "no-panic", no_panic)]
133
12
fn umulhi_inexact_to_odd<UInt>(x_hi: u64, x_lo: u64, y: UInt) -> UInt
134
12
where
135
12
    UInt: traits::UInt,
136
{
137
12
    let num_bits = mem::size_of::<UInt>() * 8;
138
12
    if num_bits == 64 {
139
0
        let p = umul192_hi128(x_hi, x_lo, y.into());
140
0
        UInt::truncate(p.hi | u64::from((p.lo >> 1) != 0))
141
    } else {
142
12
        let p = (umul128(x_hi, y.into()) >> 32) as u64;
143
12
        UInt::enlarge((p >> 32) as u32 | u32::from((p as u32 >> 1) != 0))
144
    }
145
12
}
zmij::umulhi_inexact_to_odd::<u32>
Line
Count
Source
133
12
fn umulhi_inexact_to_odd<UInt>(x_hi: u64, x_lo: u64, y: UInt) -> UInt
134
12
where
135
12
    UInt: traits::UInt,
136
{
137
12
    let num_bits = mem::size_of::<UInt>() * 8;
138
12
    if num_bits == 64 {
139
0
        let p = umul192_hi128(x_hi, x_lo, y.into());
140
0
        UInt::truncate(p.hi | u64::from((p.lo >> 1) != 0))
141
    } else {
142
12
        let p = (umul128(x_hi, y.into()) >> 32) as u64;
143
12
        UInt::enlarge((p >> 32) as u32 | u32::from((p as u32 >> 1) != 0))
144
    }
145
12
}
Unexecuted instantiation: zmij::umulhi_inexact_to_odd::<u64>
146
147
trait FloatTraits: traits::Float {
148
    const NUM_BITS: i32;
149
    const NUM_SIG_BITS: i32 = Self::MANTISSA_DIGITS as i32 - 1;
150
    const NUM_EXP_BITS: i32 = Self::NUM_BITS - Self::NUM_SIG_BITS - 1;
151
    const EXP_MASK: i32 = (1 << Self::NUM_EXP_BITS) - 1;
152
    const EXP_BIAS: i32 = (1 << (Self::NUM_EXP_BITS - 1)) - 1;
153
    const EXP_OFFSET: i32 = Self::EXP_BIAS + Self::NUM_SIG_BITS;
154
155
    type SigType: traits::UInt;
156
    const IMPLICIT_BIT: Self::SigType;
157
158
    fn to_bits(self) -> Self::SigType;
159
160
22.0k
    fn is_negative(bits: Self::SigType) -> bool {
161
22.0k
        (bits >> (Self::NUM_BITS - 1)) != Self::SigType::from(0)
162
22.0k
    }
<f64 as zmij::FloatTraits>::is_negative
Line
Count
Source
160
5.08k
    fn is_negative(bits: Self::SigType) -> bool {
161
5.08k
        (bits >> (Self::NUM_BITS - 1)) != Self::SigType::from(0)
162
5.08k
    }
<f32 as zmij::FloatTraits>::is_negative
Line
Count
Source
160
16.9k
    fn is_negative(bits: Self::SigType) -> bool {
161
16.9k
        (bits >> (Self::NUM_BITS - 1)) != Self::SigType::from(0)
162
16.9k
    }
163
164
22.0k
    fn get_sig(bits: Self::SigType) -> Self::SigType {
165
22.0k
        bits & (Self::IMPLICIT_BIT - Self::SigType::from(1))
166
22.0k
    }
<f64 as zmij::FloatTraits>::get_sig
Line
Count
Source
164
5.08k
    fn get_sig(bits: Self::SigType) -> Self::SigType {
165
5.08k
        bits & (Self::IMPLICIT_BIT - Self::SigType::from(1))
166
5.08k
    }
<f32 as zmij::FloatTraits>::get_sig
Line
Count
Source
164
16.9k
    fn get_sig(bits: Self::SigType) -> Self::SigType {
165
16.9k
        bits & (Self::IMPLICIT_BIT - Self::SigType::from(1))
166
16.9k
    }
167
168
22.0k
    fn get_exp(bits: Self::SigType) -> i64 {
169
22.0k
        (bits << 1u8 >> (Self::NUM_SIG_BITS + 1)).into() as i64
170
22.0k
    }
<f64 as zmij::FloatTraits>::get_exp
Line
Count
Source
168
5.08k
    fn get_exp(bits: Self::SigType) -> i64 {
169
5.08k
        (bits << 1u8 >> (Self::NUM_SIG_BITS + 1)).into() as i64
170
5.08k
    }
<f32 as zmij::FloatTraits>::get_exp
Line
Count
Source
168
16.9k
    fn get_exp(bits: Self::SigType) -> i64 {
169
16.9k
        (bits << 1u8 >> (Self::NUM_SIG_BITS + 1)).into() as i64
170
16.9k
    }
171
}
172
173
impl FloatTraits for f32 {
174
    const NUM_BITS: i32 = 32;
175
    const IMPLICIT_BIT: u32 = 1 << Self::NUM_SIG_BITS;
176
177
    type SigType = u32;
178
179
16.9k
    fn to_bits(self) -> Self::SigType {
180
16.9k
        self.to_bits()
181
16.9k
    }
182
}
183
184
impl FloatTraits for f64 {
185
    const NUM_BITS: i32 = 64;
186
    const IMPLICIT_BIT: u64 = 1 << Self::NUM_SIG_BITS;
187
188
    type SigType = u64;
189
190
5.08k
    fn to_bits(self) -> Self::SigType {
191
5.08k
        self.to_bits()
192
5.08k
    }
193
}
194
195
#[repr(C, align(64))]
196
struct Pow10SignificandsTable {
197
    data: [u64; if Self::COMPRESS {
198
        0
199
    } else {
200
        Self::NUM_POW10 * 2
201
    }],
202
}
203
204
impl Pow10SignificandsTable {
205
    const COMPRESS: bool = false;
206
    const SPLIT_TABLES: bool = !Self::COMPRESS && cfg!(target_arch = "aarch64");
207
    const NUM_POW10: usize = 617;
208
209
7.29k
    unsafe fn get_unchecked(&self, dec_exp: i32) -> uint128 {
210
        const DEC_EXP_MIN: i32 = -292;
211
7.29k
        if Self::COMPRESS {
212
0
            let i = dec_exp - DEC_EXP_MIN;
213
            // 672 bytes of data
214
            #[rustfmt::skip]
215
            static POW10S: [u64; 28] = [
216
                0x8000000000000000, 0xa000000000000000, 0xc800000000000000,
217
                0xfa00000000000000, 0x9c40000000000000, 0xc350000000000000,
218
                0xf424000000000000, 0x9896800000000000, 0xbebc200000000000,
219
                0xee6b280000000000, 0x9502f90000000000, 0xba43b74000000000,
220
                0xe8d4a51000000000, 0x9184e72a00000000, 0xb5e620f480000000,
221
                0xe35fa931a0000000, 0x8e1bc9bf04000000, 0xb1a2bc2ec5000000,
222
                0xde0b6b3a76400000, 0x8ac7230489e80000, 0xad78ebc5ac620000,
223
                0xd8d726b7177a8000, 0x878678326eac9000, 0xa968163f0a57b400,
224
                0xd3c21bcecceda100, 0x84595161401484a0, 0xa56fa5b99019a5c8,
225
                0xcecb8f27f4200f3a,
226
            ];
227
228
            #[rustfmt::skip]
229
            static HIGH_PARTS: [uint128; 23] = [
230
                uint128 { hi: 0xaf8e5410288e1b6f, lo: 0x07ecf0ae5ee44dda },
231
                uint128 { hi: 0xb1442798f49ffb4a, lo: 0x99cd11cfdf41779d },
232
                uint128 { hi: 0xb2fe3f0b8599ef07, lo: 0x861fa7e6dcb4aa15 },
233
                uint128 { hi: 0xb4bca50b065abe63, lo: 0x0fed077a756b53aa },
234
                uint128 { hi: 0xb67f6455292cbf08, lo: 0x1a3bc84c17b1d543 },
235
                uint128 { hi: 0xb84687c269ef3bfb, lo: 0x3d5d514f40eea742 },
236
                uint128 { hi: 0xba121a4650e4ddeb, lo: 0x92f34d62616ce413 },
237
                uint128 { hi: 0xbbe226efb628afea, lo: 0x890489f70a55368c },
238
                uint128 { hi: 0xbdb6b8e905cb600f, lo: 0x5400e987bbc1c921 },
239
                uint128 { hi: 0xbf8fdb78849a5f96, lo: 0xde98520472bdd034 },
240
                uint128 { hi: 0xc16d9a0095928a27, lo: 0x75b7053c0f178294 },
241
                uint128 { hi: 0xc350000000000000, lo: 0x0000000000000000 },
242
                uint128 { hi: 0xc5371912364ce305, lo: 0x6c28000000000000 },
243
                uint128 { hi: 0xc722f0ef9d80aad6, lo: 0x424d3ad2b7b97ef6 },
244
                uint128 { hi: 0xc913936dd571c84c, lo: 0x03bc3a19cd1e38ea },
245
                uint128 { hi: 0xcb090c8001ab551c, lo: 0x5cadf5bfd3072cc6 },
246
                uint128 { hi: 0xcd036837130890a1, lo: 0x36dba887c37a8c10 },
247
                uint128 { hi: 0xcf02b2c21207ef2e, lo: 0x94f967e45e03f4bc },
248
                uint128 { hi: 0xd106f86e69d785c7, lo: 0xe13336d701beba52 },
249
                uint128 { hi: 0xd31045a8341ca07c, lo: 0x1ede48111209a051 },
250
                uint128 { hi: 0xd51ea6fa85785631, lo: 0x552a74227f3ea566 },
251
                uint128 { hi: 0xd732290fbacaf133, lo: 0xa97c177947ad4096 },
252
                uint128 { hi: 0xd94ad8b1c7380874, lo: 0x18375281ae7822bc },
253
            ];
254
255
            #[rustfmt::skip]
256
            static FIXUPS: [u32; 20] = [
257
                0x05271b1f, 0x00000c20, 0x00003200, 0x12100020,
258
                0x00000000, 0x06000000, 0xc16409c0, 0xaf26700f,
259
                0xeb987b07, 0x0000000d, 0x00000000, 0x66fbfffe,
260
                0xb74100ec, 0xa0669fe8, 0xedb21280, 0x00000686,
261
                0x0a021200, 0x29b89c20, 0x08bc0eda, 0x00000000,
262
            ];
263
264
0
            let m = unsafe { *POW10S.get_unchecked(((i + 11) % 28) as usize) };
265
0
            let h = unsafe { *HIGH_PARTS.get_unchecked(((i + 11) / 28) as usize) };
266
267
0
            let h1 = umul128_hi64(h.lo, m);
268
269
0
            let c0 = h.lo.wrapping_mul(m);
270
0
            let c1 = h1.wrapping_add(h.hi.wrapping_mul(m));
271
0
            let c2 = u64::from(c1 < h1) + umul128_hi64(h.hi, m);
272
273
0
            let mut result = if (c2 >> 63) != 0 {
274
0
                uint128 { hi: c2, lo: c1 }
275
            } else {
276
0
                uint128 {
277
0
                    hi: (c2 << 1) | (c1 >> 63),
278
0
                    lo: (c1 << 1) | (c0 >> 63),
279
0
                }
280
            };
281
282
0
            result.lo -=
283
0
                u64::from((unsafe { *FIXUPS.get_unchecked((i >> 5) as usize) } >> (i & 31)) & 1);
284
0
            return result;
285
7.29k
        }
286
7.29k
        if !Self::SPLIT_TABLES {
287
7.29k
            let index = ((dec_exp - DEC_EXP_MIN) * 2) as usize;
288
7.29k
            return uint128 {
289
7.29k
                hi: unsafe { *self.data.get_unchecked(index) },
290
7.29k
                lo: unsafe { *self.data.get_unchecked(index + 1) },
291
7.29k
            };
292
0
        }
293
294
        unsafe {
295
            #[cfg_attr(
296
                not(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri))),
297
                allow(unused_mut)
298
            )]
299
0
            let mut hi = self
300
0
                .data
301
0
                .as_ptr()
302
0
                .offset(Self::NUM_POW10 as isize + DEC_EXP_MIN as isize - 1);
303
            #[cfg_attr(
304
                not(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri))),
305
                allow(unused_mut)
306
            )]
307
0
            let mut lo = hi.add(Self::NUM_POW10);
308
309
            // Force indexed loads.
310
            #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri)))]
311
0
            asm!("/*{0}{1}*/", inout(reg) hi, inout(reg) lo);
312
0
            uint128 {
313
0
                hi: *hi.offset(-dec_exp as isize),
314
0
                lo: *lo.offset(-dec_exp as isize),
315
0
            }
316
        }
317
7.29k
    }
318
319
    #[cfg(test)]
320
    fn get(&self, dec_exp: i32) -> uint128 {
321
        const DEC_EXP_MIN: i32 = -292;
322
        assert!((DEC_EXP_MIN..DEC_EXP_MIN + Self::NUM_POW10 as i32).contains(&dec_exp));
323
        unsafe { self.get_unchecked(dec_exp) }
324
    }
325
}
326
327
// 128-bit significands of powers of 10 rounded down.
328
// Generation with 192-bit arithmetic and compression by Dougall Johnson.
329
static POW10_SIGNIFICANDS: Pow10SignificandsTable = {
330
    let mut data = [0; if Pow10SignificandsTable::COMPRESS {
331
        0
332
    } else {
333
        Pow10SignificandsTable::NUM_POW10 * 2
334
    }];
335
336
    struct uint192 {
337
        w0: u64, // least significant
338
        w1: u64,
339
        w2: u64, // most significant
340
    }
341
342
    // First element, rounded up to cancel out rounding down in the
343
    // multiplication, and minimize significant bits.
344
    let mut current = uint192 {
345
        w0: 0xe000000000000000,
346
        w1: 0x25e8e89c13bb0f7a,
347
        w2: 0xff77b1fcbebcdc4f,
348
    };
349
    let ten = 0xa000000000000000;
350
    let mut i = 0;
351
    while i < Pow10SignificandsTable::NUM_POW10 && !Pow10SignificandsTable::COMPRESS {
352
        if Pow10SignificandsTable::SPLIT_TABLES {
353
            data[Pow10SignificandsTable::NUM_POW10 - i - 1] = current.w2;
354
            data[Pow10SignificandsTable::NUM_POW10 * 2 - i - 1] = current.w1;
355
        } else {
356
            data[i * 2] = current.w2;
357
            data[i * 2 + 1] = current.w1;
358
        }
359
360
        let h0: u64 = umul128_hi64(current.w0, ten);
361
        let h1: u64 = umul128_hi64(current.w1, ten);
362
363
        let c0: u64 = h0.wrapping_add(current.w1.wrapping_mul(ten));
364
        let c1: u64 = ((c0 < h0) as u64 + h1).wrapping_add(current.w2.wrapping_mul(ten));
365
        let c2: u64 = (c1 < h1) as u64 + umul128_hi64(current.w2, ten); // dodgy carry
366
367
        // normalise
368
        if (c2 >> 63) != 0 {
369
            current = uint192 {
370
                w0: c0,
371
                w1: c1,
372
                w2: c2,
373
            };
374
        } else {
375
            current = uint192 {
376
                w0: c0 << 1,
377
                w1: c1 << 1 | c0 >> 63,
378
                w2: c2 << 1 | c1 >> 63,
379
            };
380
        }
381
382
        i += 1;
383
    }
384
385
    Pow10SignificandsTable { data }
386
};
387
388
// Computes the decimal exponent as floor(log10(2**bin_exp)) if regular or
389
// floor(log10(3/4 * 2**bin_exp)) otherwise, without branching.
390
7.29k
const fn compute_dec_exp(bin_exp: i32, regular: bool) -> i32 {
391
7.29k
    debug_assert!(bin_exp >= -1334 && bin_exp <= 2620);
392
    // log10_3_over_4_sig = -log10(3/4) * 2**log10_2_exp rounded to a power of 2
393
    const LOG10_3_OVER_4_SIG: i32 = 131_072;
394
    // log10_2_sig = round(log10(2) * 2**log10_2_exp)
395
    const LOG10_2_SIG: i32 = 315_653;
396
    const LOG10_2_EXP: i32 = 20;
397
7.29k
    (bin_exp * LOG10_2_SIG - !regular as i32 * LOG10_3_OVER_4_SIG) >> LOG10_2_EXP
398
7.29k
}
399
400
#[inline]
401
7.29k
const fn do_compute_exp_shift(bin_exp: i32, dec_exp: i32) -> u8 {
402
7.29k
    debug_assert!(dec_exp >= -350 && dec_exp <= 350);
403
    // log2_pow10_sig = round(log2(10) * 2**log2_pow10_exp) + 1
404
    const LOG2_POW10_SIG: i32 = 217_707;
405
    const LOG2_POW10_EXP: i32 = 16;
406
    // pow10_bin_exp = floor(log2(10**-dec_exp))
407
7.29k
    let pow10_bin_exp = (-dec_exp * LOG2_POW10_SIG) >> LOG2_POW10_EXP;
408
    // pow10 = ((pow10_hi << 64) | pow10_lo) * 2**(pow10_bin_exp - 127)
409
7.29k
    (bin_exp + pow10_bin_exp + 1) as u8
410
7.29k
}
411
412
struct ExpShiftTable {
413
    data: [u8; if Self::ENABLE {
414
        f64::EXP_MASK as usize + 1
415
    } else {
416
        1
417
    }],
418
}
419
420
impl ExpShiftTable {
421
    const ENABLE: bool = true;
422
}
423
424
static EXP_SHIFTS: ExpShiftTable = {
425
    let mut data = [0u8; if ExpShiftTable::ENABLE {
426
        f64::EXP_MASK as usize + 1
427
    } else {
428
        1
429
    }];
430
431
    let mut raw_exp = 0;
432
    while raw_exp < data.len() && ExpShiftTable::ENABLE {
433
        let mut bin_exp = raw_exp as i32 - f64::EXP_OFFSET;
434
        if raw_exp == 0 {
435
            bin_exp += 1;
436
        }
437
        let dec_exp = compute_dec_exp(bin_exp, true);
438
        data[raw_exp] = do_compute_exp_shift(bin_exp, dec_exp) as u8;
439
        raw_exp += 1;
440
    }
441
442
    ExpShiftTable { data }
443
};
444
445
// Computes a shift so that, after scaling by a power of 10, the intermediate
446
// result always has a fixed 128-bit fractional part (for double).
447
//
448
// Different binary exponents can map to the same decimal exponent, but place
449
// the decimal point at different bit positions. The shift compensates for this.
450
//
451
// For example, both 3 * 2**59 and 3 * 2**60 have dec_exp = 2, but dividing by
452
// 10^dec_exp puts the decimal point in different bit positions:
453
//   3 * 2**59 / 100 = 1.72...e+16  (needs shift = 1 + 1)
454
//   3 * 2**60 / 100 = 3.45...e+16  (needs shift = 2 + 1)
455
#[inline]
456
7.29k
unsafe fn compute_exp_shift<UInt, const ONLY_REGULAR: bool>(bin_exp: i32, dec_exp: i32) -> u8
457
7.29k
where
458
7.29k
    UInt: traits::UInt,
459
{
460
7.29k
    let num_bits = mem::size_of::<UInt>() * 8;
461
7.29k
    if num_bits == 64 && ExpShiftTable::ENABLE && ONLY_REGULAR {
462
        unsafe {
463
0
            *EXP_SHIFTS
464
0
                .data
465
0
                .as_ptr()
466
0
                .add((bin_exp + f64::EXP_OFFSET) as usize)
467
        }
468
    } else {
469
7.29k
        do_compute_exp_shift(bin_exp, dec_exp)
470
    }
471
7.29k
}
zmij::compute_exp_shift::<u32, false>
Line
Count
Source
456
6
unsafe fn compute_exp_shift<UInt, const ONLY_REGULAR: bool>(bin_exp: i32, dec_exp: i32) -> u8
457
6
where
458
6
    UInt: traits::UInt,
459
{
460
6
    let num_bits = mem::size_of::<UInt>() * 8;
461
6
    if num_bits == 64 && ExpShiftTable::ENABLE && ONLY_REGULAR {
462
        unsafe {
463
0
            *EXP_SHIFTS
464
0
                .data
465
0
                .as_ptr()
466
0
                .add((bin_exp + f64::EXP_OFFSET) as usize)
467
        }
468
    } else {
469
6
        do_compute_exp_shift(bin_exp, dec_exp)
470
    }
471
6
}
zmij::compute_exp_shift::<u32, true>
Line
Count
Source
456
7.28k
unsafe fn compute_exp_shift<UInt, const ONLY_REGULAR: bool>(bin_exp: i32, dec_exp: i32) -> u8
457
7.28k
where
458
7.28k
    UInt: traits::UInt,
459
{
460
7.28k
    let num_bits = mem::size_of::<UInt>() * 8;
461
7.28k
    if num_bits == 64 && ExpShiftTable::ENABLE && ONLY_REGULAR {
462
        unsafe {
463
0
            *EXP_SHIFTS
464
0
                .data
465
0
                .as_ptr()
466
0
                .add((bin_exp + f64::EXP_OFFSET) as usize)
467
        }
468
    } else {
469
7.28k
        do_compute_exp_shift(bin_exp, dec_exp)
470
    }
471
7.28k
}
Unexecuted instantiation: zmij::compute_exp_shift::<u64, false>
Unexecuted instantiation: zmij::compute_exp_shift::<u64, true>
472
473
#[cfg_attr(feature = "no-panic", no_panic)]
474
7.28k
fn count_trailing_nonzeros(x: u64) -> usize {
475
    // We count the number of bytes until there are only zeros left.
476
    // The code is equivalent to
477
    //    8 - x.leading_zeros() / 8
478
    // but if the BSR instruction is emitted (as gcc on x64 does with default
479
    // settings), subtracting the constant before dividing allows the compiler
480
    // to combine it with the subtraction which it inserts due to BSR counting
481
    // in the opposite direction.
482
    //
483
    // Additionally, the BSR instruction requires a zero check. Since the high
484
    // bit is unused we can avoid the zero check by shifting the datum left by
485
    // one and inserting a sentinel bit at the end. This can be faster than the
486
    // automatically inserted range check.
487
7.28k
    (70 - ((x.to_le() << 1) | 1).leading_zeros() as usize) / 8
488
7.28k
}
489
490
// Align data since unaligned access may be slower when crossing a
491
// hardware-specific boundary.
492
#[repr(C, align(2))]
493
struct Digits2([u8; 200]);
494
495
static DIGITS2: Digits2 = Digits2(
496
    *b"0001020304050607080910111213141516171819\
497
       2021222324252627282930313233343536373839\
498
       4041424344454647484950515253545556575859\
499
       6061626364656667686970717273747576777879\
500
       8081828384858687888990919293949596979899",
501
);
502
503
// Converts value in the range [0, 100) to a string. GCC generates a bit better
504
// code when value is pointer-size (https://www.godbolt.org/z/5fEPMT1cc).
505
#[cfg_attr(feature = "no-panic", no_panic)]
506
0
unsafe fn digits2(value: usize) -> &'static u16 {
507
0
    debug_assert!(value < 100);
508
509
    #[allow(clippy::cast_ptr_alignment)]
510
    unsafe {
511
0
        &*DIGITS2.0.as_ptr().cast::<u16>().add(value)
512
    }
513
0
}
514
515
const DIV10K_EXP: i32 = 40;
516
const DIV10K_SIG: u32 = ((1u64 << DIV10K_EXP) / 10000 + 1) as u32;
517
const NEG10K: u32 = ((1u64 << 32) - 10000) as u32;
518
519
const DIV100_EXP: i32 = 19;
520
const DIV100_SIG: u32 = (1 << DIV100_EXP) / 100 + 1;
521
const NEG100: u32 = (1 << 16) - 100;
522
523
const DIV10_EXP: i32 = 10;
524
const DIV10_SIG: u32 = (1 << DIV10_EXP) / 10 + 1;
525
const NEG10: u32 = (1 << 8) - 10;
526
527
const ZEROS: u64 = 0x0101010101010101 * b'0' as u64;
528
529
#[cfg_attr(feature = "no-panic", no_panic)]
530
7.28k
fn to_bcd8(abcdefgh: u64) -> u64 {
531
    // An optimization from Xiang JunBo.
532
    // Three steps BCD. Base 10000 -> base 100 -> base 10.
533
    // div and mod are evaluated simultaneously as, e.g.
534
    //   (abcdefgh / 10000) << 32 + (abcdefgh % 10000)
535
    //      == abcdefgh + (2**32 - 10000) * (abcdefgh / 10000)))
536
    // where the division on the RHS is implemented by the usual multiply + shift
537
    // trick and the fractional bits are masked away.
538
7.28k
    let abcd_efgh =
539
7.28k
        abcdefgh + u64::from(NEG10K) * ((abcdefgh * u64::from(DIV10K_SIG)) >> DIV10K_EXP);
540
7.28k
    let ab_cd_ef_gh = abcd_efgh
541
7.28k
        + u64::from(NEG100) * (((abcd_efgh * u64::from(DIV100_SIG)) >> DIV100_EXP) & 0x7f0000007f);
542
7.28k
    let a_b_c_d_e_f_g_h = ab_cd_ef_gh
543
7.28k
        + u64::from(NEG10)
544
7.28k
            * (((ab_cd_ef_gh * u64::from(DIV10_SIG)) >> DIV10_EXP) & 0xf000f000f000f);
545
7.28k
    a_b_c_d_e_f_g_h.to_be()
546
7.28k
}
547
548
7.28k
unsafe fn write_if(buffer: *mut u8, digit: u32, condition: bool) -> *mut u8 {
549
    unsafe {
550
7.28k
        *buffer = b'0' + digit as u8;
551
7.28k
        buffer.add(usize::from(condition))
552
    }
553
7.28k
}
554
555
7.28k
unsafe fn write8(buffer: *mut u8, value: u64) {
556
7.28k
    unsafe {
557
7.28k
        buffer.cast::<u64>().write_unaligned(value);
558
7.28k
    }
559
7.28k
}
560
561
// Writes a significand and removes trailing zeros. value has up to 17 decimal
562
// digits (16-17 for normals) for double (num_bits == 64) and up to 9 digits
563
// (8-9 for normals) for float. The significant digits start from buffer[1].
564
// buffer[0] may contain '0' after this function if the leading digit is zero.
565
#[cfg_attr(feature = "no-panic", no_panic)]
566
#[inline]
567
7.28k
unsafe fn write_significand<Float>(mut buffer: *mut u8, value: u64, extra_digit: bool) -> *mut u8
568
7.28k
where
569
7.28k
    Float: FloatTraits,
570
{
571
7.28k
    if Float::NUM_BITS == 32 {
572
7.28k
        buffer = unsafe { write_if(buffer, (value / 100_000_000) as u32, extra_digit) };
573
7.28k
        let bcd = to_bcd8(value % 100_000_000);
574
        unsafe {
575
7.28k
            write8(buffer, bcd + ZEROS);
576
7.28k
            return buffer.add(count_trailing_nonzeros(bcd));
577
        }
578
0
    }
579
580
    #[cfg(not(any(
581
        all(target_arch = "aarch64", target_feature = "neon", not(miri)),
582
        all(target_arch = "x86_64", target_feature = "sse2", not(miri)),
583
    )))]
584
    {
585
        // Digits/pairs of digits are denoted by letters: value = abbccddeeffgghhii.
586
        let abbccddee = (value / 100_000_000) as u32;
587
        let ffgghhii = (value % 100_000_000) as u32;
588
        buffer = unsafe { write_if(buffer, abbccddee / 100_000_000, extra_digit) };
589
        let bcd = to_bcd8(u64::from(abbccddee % 100_000_000));
590
        unsafe {
591
            write8(buffer, bcd + ZEROS);
592
        }
593
        if ffgghhii == 0 {
594
            return unsafe { buffer.add(count_trailing_nonzeros(bcd)) };
595
        }
596
        let bcd = to_bcd8(u64::from(ffgghhii));
597
        unsafe {
598
            write8(buffer.add(8), bcd + ZEROS);
599
            buffer.add(8).add(count_trailing_nonzeros(bcd))
600
        }
601
    }
602
603
    #[cfg(all(target_arch = "aarch64", target_feature = "neon", not(miri)))]
604
    {
605
        // An optimized version for NEON by Dougall Johnson.
606
607
        use core::arch::aarch64::*;
608
609
        const NEG10K: i32 = -10000 + 0x10000;
610
611
        #[repr(C, align(64))]
612
        struct Consts {
613
            mul_const: u64,
614
            hundred_million: u64,
615
            multipliers32: int32x4_t,
616
            multipliers16: int16x8_t,
617
        }
618
619
        static CONSTS: Consts = Consts {
620
            mul_const: 0xabcc77118461cefd,
621
            hundred_million: 100000000,
622
            multipliers32: unsafe {
623
                mem::transmute::<[i32; 4], int32x4_t>([
624
                    DIV10K_SIG as i32,
625
                    NEG10K,
626
                    (DIV100_SIG << 12) as i32,
627
                    NEG100 as i32,
628
                ])
629
            },
630
            multipliers16: unsafe {
631
                mem::transmute::<[i16; 8], int16x8_t>([0xce0, NEG10 as i16, 0, 0, 0, 0, 0, 0])
632
            },
633
        };
634
635
        let mut c = ptr::addr_of!(CONSTS);
636
637
        // Compiler barrier, or clang doesn't load from memory and generates 15
638
        // more instructions.
639
        let c = unsafe {
640
            asm!("/*{0}*/", inout(reg) c);
641
            &*c
642
        };
643
644
        let mut hundred_million = c.hundred_million;
645
646
        // Compiler barrier, or clang narrows the load to 32-bit and unpairs it.
647
        unsafe {
648
            asm!("/*{0}*/", inout(reg) hundred_million);
649
        }
650
651
        // Equivalent to abbccddee = value / 100000000, ffgghhii = value % 100000000.
652
        let abbccddee = (umul128(value, c.mul_const) >> 90) as u64;
653
        let ffgghhii = value - abbccddee * hundred_million;
654
655
        // We could probably make this bit faster, but we're preferring to
656
        // reuse the constants for now.
657
        let a = (umul128(abbccddee, c.mul_const) >> 90) as u64;
658
        let bbccddee = abbccddee - a * hundred_million;
659
660
        buffer = unsafe { write_if(buffer, a as u32, extra_digit) };
661
662
        unsafe {
663
            let ffgghhii_bbccddee_64: uint64x1_t =
664
                mem::transmute::<u64, uint64x1_t>((ffgghhii << 32) | bbccddee);
665
            let bbccddee_ffgghhii: int32x2_t = vreinterpret_s32_u64(ffgghhii_bbccddee_64);
666
667
            let bbcc_ffgg: int32x2_t = vreinterpret_s32_u32(vshr_n_u32(
668
                vreinterpret_u32_s32(vqdmulh_n_s32(
669
                    bbccddee_ffgghhii,
670
                    mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[0],
671
                )),
672
                9,
673
            ));
674
            let ddee_bbcc_hhii_ffgg_32: int32x2_t = vmla_n_s32(
675
                bbccddee_ffgghhii,
676
                bbcc_ffgg,
677
                mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[1],
678
            );
679
680
            let mut ddee_bbcc_hhii_ffgg: int32x4_t =
681
                vreinterpretq_s32_u32(vshll_n_u16(vreinterpret_u16_s32(ddee_bbcc_hhii_ffgg_32), 0));
682
683
            // Compiler barrier, or clang breaks the subsequent MLA into UADDW +
684
            // MUL.
685
            asm!("/*{:v}*/", inout(vreg) ddee_bbcc_hhii_ffgg);
686
687
            let dd_bb_hh_ff: int32x4_t = vqdmulhq_n_s32(
688
                ddee_bbcc_hhii_ffgg,
689
                mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[2],
690
            );
691
            let ee_dd_cc_bb_ii_hh_gg_ff: int16x8_t = vreinterpretq_s16_s32(vmlaq_n_s32(
692
                ddee_bbcc_hhii_ffgg,
693
                dd_bb_hh_ff,
694
                mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[3],
695
            ));
696
            let high_10s: int16x8_t = vqdmulhq_n_s16(
697
                ee_dd_cc_bb_ii_hh_gg_ff,
698
                mem::transmute::<int16x8_t, [i16; 8]>(c.multipliers16)[0],
699
            );
700
            let digits: uint8x16_t = vrev64q_u8(vreinterpretq_u8_s16(vmlaq_n_s16(
701
                ee_dd_cc_bb_ii_hh_gg_ff,
702
                high_10s,
703
                mem::transmute::<int16x8_t, [i16; 8]>(c.multipliers16)[1],
704
            )));
705
            let str: uint16x8_t = vaddq_u16(
706
                vreinterpretq_u16_u8(digits),
707
                vreinterpretq_u16_s8(vdupq_n_s8(b'0' as i8)),
708
            );
709
710
            buffer.cast::<uint16x8_t>().write_unaligned(str);
711
712
            let is_not_zero: uint16x8_t =
713
                vreinterpretq_u16_u8(vcgtzq_s8(vreinterpretq_s8_u8(digits)));
714
            let zeros: u64 = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(is_not_zero, 4)), 0);
715
716
            buffer.add(16 - (zeros.leading_zeros() as usize >> 2))
717
        }
718
    }
719
720
    #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
721
    {
722
        use crate::stdarch_x86::*;
723
724
0
        let abbccddee = (value / 100_000_000) as u32;
725
0
        let ffgghhii = (value % 100_000_000) as u32;
726
0
        let a = abbccddee / 100_000_000;
727
0
        let bbccddee = abbccddee % 100_000_000;
728
729
0
        buffer = unsafe { write_if(buffer, a, extra_digit) };
730
731
        #[repr(C, align(64))]
732
        struct Consts {
733
            div10k: u128,
734
            neg10k: u128,
735
            div100: u128,
736
            div10: u128,
737
            #[cfg(target_feature = "sse4.1")]
738
            neg100: u128,
739
            #[cfg(target_feature = "sse4.1")]
740
            neg10: u128,
741
            #[cfg(target_feature = "sse4.1")]
742
            bswap: u128,
743
            #[cfg(not(target_feature = "sse4.1"))]
744
            hundred: u128,
745
            #[cfg(not(target_feature = "sse4.1"))]
746
            moddiv10: u128,
747
            zeros: u128,
748
        }
749
750
        impl Consts {
751
0
            const fn splat64(x: u64) -> u128 {
752
0
                ((x as u128) << 64) | x as u128
753
0
            }
754
755
0
            const fn splat32(x: u32) -> u128 {
756
0
                Self::splat64(((x as u64) << 32) | x as u64)
757
0
            }
758
759
0
            const fn splat16(x: u16) -> u128 {
760
0
                Self::splat32(((x as u32) << 16) | x as u32)
761
0
            }
762
763
            #[cfg(target_feature = "sse4.1")]
764
            const fn pack8(a: u8, b: u8, c: u8, d: u8, e: u8, f: u8, g: u8, h: u8) -> u64 {
765
                ((h as u64) << 56)
766
                    | ((g as u64) << 48)
767
                    | ((f as u64) << 40)
768
                    | ((e as u64) << 32)
769
                    | ((d as u64) << 24)
770
                    | ((c as u64) << 16)
771
                    | ((b as u64) << 8)
772
                    | a as u64
773
            }
774
        }
775
776
        static CONSTS: Consts = Consts {
777
            div10k: Consts::splat64(DIV10K_SIG as u64),
778
            neg10k: Consts::splat64(NEG10K as u64),
779
            div100: Consts::splat32(DIV100_SIG),
780
            div10: Consts::splat16(((1u32 << 16) / 10 + 1) as u16),
781
            #[cfg(target_feature = "sse4.1")]
782
            neg100: Consts::splat32(NEG100),
783
            #[cfg(target_feature = "sse4.1")]
784
            neg10: Consts::splat16((1 << 8) - 10),
785
            #[cfg(target_feature = "sse4.1")]
786
            bswap: Consts::pack8(15, 14, 13, 12, 11, 10, 9, 8) as u128
787
                | (Consts::pack8(7, 6, 5, 4, 3, 2, 1, 0) as u128) << 64,
788
            #[cfg(not(target_feature = "sse4.1"))]
789
            hundred: Consts::splat32(100),
790
            #[cfg(not(target_feature = "sse4.1"))]
791
            moddiv10: Consts::splat16(10 * (1 << 8) - 1),
792
            zeros: Consts::splat64(ZEROS),
793
        };
794
795
0
        let mut c = ptr::addr_of!(CONSTS);
796
        // Load constants from memory.
797
0
        unsafe {
798
0
            asm!("/*{0}*/", inout(reg) c);
799
0
        }
800
801
0
        let div10k = unsafe { _mm_load_si128(ptr::addr_of!((*c).div10k).cast::<__m128i>()) };
802
0
        let neg10k = unsafe { _mm_load_si128(ptr::addr_of!((*c).neg10k).cast::<__m128i>()) };
803
0
        let div100 = unsafe { _mm_load_si128(ptr::addr_of!((*c).div100).cast::<__m128i>()) };
804
0
        let div10 = unsafe { _mm_load_si128(ptr::addr_of!((*c).div10).cast::<__m128i>()) };
805
        #[cfg(target_feature = "sse4.1")]
806
        let neg100 = unsafe { _mm_load_si128(ptr::addr_of!((*c).neg100).cast::<__m128i>()) };
807
        #[cfg(target_feature = "sse4.1")]
808
        let neg10 = unsafe { _mm_load_si128(ptr::addr_of!((*c).neg10).cast::<__m128i>()) };
809
        #[cfg(target_feature = "sse4.1")]
810
        let bswap = unsafe { _mm_load_si128(ptr::addr_of!((*c).bswap).cast::<__m128i>()) };
811
        #[cfg(not(target_feature = "sse4.1"))]
812
0
        let hundred = unsafe { _mm_load_si128(ptr::addr_of!((*c).hundred).cast::<__m128i>()) };
813
        #[cfg(not(target_feature = "sse4.1"))]
814
0
        let moddiv10 = unsafe { _mm_load_si128(ptr::addr_of!((*c).moddiv10).cast::<__m128i>()) };
815
0
        let zeros = unsafe { _mm_load_si128(ptr::addr_of!((*c).zeros).cast::<__m128i>()) };
816
817
        // The BCD sequences are based on ones provided by Xiang JunBo.
818
        unsafe {
819
0
            let x: __m128i = _mm_set_epi64x(i64::from(bbccddee), i64::from(ffgghhii));
820
0
            let y: __m128i = _mm_add_epi64(
821
0
                x,
822
0
                _mm_mul_epu32(neg10k, _mm_srli_epi64(_mm_mul_epu32(x, div10k), DIV10K_EXP)),
823
            );
824
825
            #[cfg(target_feature = "sse4.1")]
826
            let bcd: __m128i = {
827
                // _mm_mullo_epi32 is SSE 4.1
828
                let z: __m128i = _mm_add_epi64(
829
                    y,
830
                    _mm_mullo_epi32(neg100, _mm_srli_epi32(_mm_mulhi_epu16(y, div100), 3)),
831
                );
832
                let big_endian_bcd: __m128i =
833
                    _mm_add_epi64(z, _mm_mullo_epi16(neg10, _mm_mulhi_epu16(z, div10)));
834
                // SSSE3
835
                _mm_shuffle_epi8(big_endian_bcd, bswap)
836
            };
837
838
            #[cfg(not(target_feature = "sse4.1"))]
839
0
            let bcd: __m128i = {
840
0
                let y_div_100: __m128i = _mm_srli_epi16(_mm_mulhi_epu16(y, div100), 3);
841
0
                let y_mod_100: __m128i = _mm_sub_epi16(y, _mm_mullo_epi16(y_div_100, hundred));
842
0
                let z: __m128i = _mm_or_si128(_mm_slli_epi32(y_mod_100, 16), y_div_100);
843
0
                let bcd_shuffled: __m128i = _mm_sub_epi16(
844
0
                    _mm_slli_epi16(z, 8),
845
0
                    _mm_mullo_epi16(moddiv10, _mm_mulhi_epu16(z, div10)),
846
                );
847
0
                _mm_shuffle_epi32(bcd_shuffled, _MM_SHUFFLE(0, 1, 2, 3))
848
            };
849
850
0
            let digits = _mm_or_si128(bcd, zeros);
851
852
            // Count leading zeros.
853
0
            let mask128: __m128i = _mm_cmpgt_epi8(bcd, _mm_setzero_si128());
854
0
            let mask = _mm_movemask_epi8(mask128) as u32;
855
0
            let len = 32 - mask.leading_zeros() as usize;
856
857
0
            _mm_storeu_si128(buffer.cast::<__m128i>(), digits);
858
0
            buffer.add(len)
859
        }
860
    }
861
7.28k
}
Unexecuted instantiation: zmij::write_significand::<f64>
zmij::write_significand::<f32>
Line
Count
Source
567
7.28k
unsafe fn write_significand<Float>(mut buffer: *mut u8, value: u64, extra_digit: bool) -> *mut u8
568
7.28k
where
569
7.28k
    Float: FloatTraits,
570
{
571
7.28k
    if Float::NUM_BITS == 32 {
572
7.28k
        buffer = unsafe { write_if(buffer, (value / 100_000_000) as u32, extra_digit) };
573
7.28k
        let bcd = to_bcd8(value % 100_000_000);
574
        unsafe {
575
7.28k
            write8(buffer, bcd + ZEROS);
576
7.28k
            return buffer.add(count_trailing_nonzeros(bcd));
577
        }
578
0
    }
579
580
    #[cfg(not(any(
581
        all(target_arch = "aarch64", target_feature = "neon", not(miri)),
582
        all(target_arch = "x86_64", target_feature = "sse2", not(miri)),
583
    )))]
584
    {
585
        // Digits/pairs of digits are denoted by letters: value = abbccddeeffgghhii.
586
        let abbccddee = (value / 100_000_000) as u32;
587
        let ffgghhii = (value % 100_000_000) as u32;
588
        buffer = unsafe { write_if(buffer, abbccddee / 100_000_000, extra_digit) };
589
        let bcd = to_bcd8(u64::from(abbccddee % 100_000_000));
590
        unsafe {
591
            write8(buffer, bcd + ZEROS);
592
        }
593
        if ffgghhii == 0 {
594
            return unsafe { buffer.add(count_trailing_nonzeros(bcd)) };
595
        }
596
        let bcd = to_bcd8(u64::from(ffgghhii));
597
        unsafe {
598
            write8(buffer.add(8), bcd + ZEROS);
599
            buffer.add(8).add(count_trailing_nonzeros(bcd))
600
        }
601
    }
602
603
    #[cfg(all(target_arch = "aarch64", target_feature = "neon", not(miri)))]
604
    {
605
        // An optimized version for NEON by Dougall Johnson.
606
607
        use core::arch::aarch64::*;
608
609
        const NEG10K: i32 = -10000 + 0x10000;
610
611
        #[repr(C, align(64))]
612
        struct Consts {
613
            mul_const: u64,
614
            hundred_million: u64,
615
            multipliers32: int32x4_t,
616
            multipliers16: int16x8_t,
617
        }
618
619
        static CONSTS: Consts = Consts {
620
            mul_const: 0xabcc77118461cefd,
621
            hundred_million: 100000000,
622
            multipliers32: unsafe {
623
                mem::transmute::<[i32; 4], int32x4_t>([
624
                    DIV10K_SIG as i32,
625
                    NEG10K,
626
                    (DIV100_SIG << 12) as i32,
627
                    NEG100 as i32,
628
                ])
629
            },
630
            multipliers16: unsafe {
631
                mem::transmute::<[i16; 8], int16x8_t>([0xce0, NEG10 as i16, 0, 0, 0, 0, 0, 0])
632
            },
633
        };
634
635
        let mut c = ptr::addr_of!(CONSTS);
636
637
        // Compiler barrier, or clang doesn't load from memory and generates 15
638
        // more instructions.
639
        let c = unsafe {
640
            asm!("/*{0}*/", inout(reg) c);
641
            &*c
642
        };
643
644
        let mut hundred_million = c.hundred_million;
645
646
        // Compiler barrier, or clang narrows the load to 32-bit and unpairs it.
647
        unsafe {
648
            asm!("/*{0}*/", inout(reg) hundred_million);
649
        }
650
651
        // Equivalent to abbccddee = value / 100000000, ffgghhii = value % 100000000.
652
        let abbccddee = (umul128(value, c.mul_const) >> 90) as u64;
653
        let ffgghhii = value - abbccddee * hundred_million;
654
655
        // We could probably make this bit faster, but we're preferring to
656
        // reuse the constants for now.
657
        let a = (umul128(abbccddee, c.mul_const) >> 90) as u64;
658
        let bbccddee = abbccddee - a * hundred_million;
659
660
        buffer = unsafe { write_if(buffer, a as u32, extra_digit) };
661
662
        unsafe {
663
            let ffgghhii_bbccddee_64: uint64x1_t =
664
                mem::transmute::<u64, uint64x1_t>((ffgghhii << 32) | bbccddee);
665
            let bbccddee_ffgghhii: int32x2_t = vreinterpret_s32_u64(ffgghhii_bbccddee_64);
666
667
            let bbcc_ffgg: int32x2_t = vreinterpret_s32_u32(vshr_n_u32(
668
                vreinterpret_u32_s32(vqdmulh_n_s32(
669
                    bbccddee_ffgghhii,
670
                    mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[0],
671
                )),
672
                9,
673
            ));
674
            let ddee_bbcc_hhii_ffgg_32: int32x2_t = vmla_n_s32(
675
                bbccddee_ffgghhii,
676
                bbcc_ffgg,
677
                mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[1],
678
            );
679
680
            let mut ddee_bbcc_hhii_ffgg: int32x4_t =
681
                vreinterpretq_s32_u32(vshll_n_u16(vreinterpret_u16_s32(ddee_bbcc_hhii_ffgg_32), 0));
682
683
            // Compiler barrier, or clang breaks the subsequent MLA into UADDW +
684
            // MUL.
685
            asm!("/*{:v}*/", inout(vreg) ddee_bbcc_hhii_ffgg);
686
687
            let dd_bb_hh_ff: int32x4_t = vqdmulhq_n_s32(
688
                ddee_bbcc_hhii_ffgg,
689
                mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[2],
690
            );
691
            let ee_dd_cc_bb_ii_hh_gg_ff: int16x8_t = vreinterpretq_s16_s32(vmlaq_n_s32(
692
                ddee_bbcc_hhii_ffgg,
693
                dd_bb_hh_ff,
694
                mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[3],
695
            ));
696
            let high_10s: int16x8_t = vqdmulhq_n_s16(
697
                ee_dd_cc_bb_ii_hh_gg_ff,
698
                mem::transmute::<int16x8_t, [i16; 8]>(c.multipliers16)[0],
699
            );
700
            let digits: uint8x16_t = vrev64q_u8(vreinterpretq_u8_s16(vmlaq_n_s16(
701
                ee_dd_cc_bb_ii_hh_gg_ff,
702
                high_10s,
703
                mem::transmute::<int16x8_t, [i16; 8]>(c.multipliers16)[1],
704
            )));
705
            let str: uint16x8_t = vaddq_u16(
706
                vreinterpretq_u16_u8(digits),
707
                vreinterpretq_u16_s8(vdupq_n_s8(b'0' as i8)),
708
            );
709
710
            buffer.cast::<uint16x8_t>().write_unaligned(str);
711
712
            let is_not_zero: uint16x8_t =
713
                vreinterpretq_u16_u8(vcgtzq_s8(vreinterpretq_s8_u8(digits)));
714
            let zeros: u64 = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(is_not_zero, 4)), 0);
715
716
            buffer.add(16 - (zeros.leading_zeros() as usize >> 2))
717
        }
718
    }
719
720
    #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
721
    {
722
        use crate::stdarch_x86::*;
723
724
0
        let abbccddee = (value / 100_000_000) as u32;
725
0
        let ffgghhii = (value % 100_000_000) as u32;
726
0
        let a = abbccddee / 100_000_000;
727
0
        let bbccddee = abbccddee % 100_000_000;
728
729
0
        buffer = unsafe { write_if(buffer, a, extra_digit) };
730
731
        #[repr(C, align(64))]
732
        struct Consts {
733
            div10k: u128,
734
            neg10k: u128,
735
            div100: u128,
736
            div10: u128,
737
            #[cfg(target_feature = "sse4.1")]
738
            neg100: u128,
739
            #[cfg(target_feature = "sse4.1")]
740
            neg10: u128,
741
            #[cfg(target_feature = "sse4.1")]
742
            bswap: u128,
743
            #[cfg(not(target_feature = "sse4.1"))]
744
            hundred: u128,
745
            #[cfg(not(target_feature = "sse4.1"))]
746
            moddiv10: u128,
747
            zeros: u128,
748
        }
749
750
        impl Consts {
751
            const fn splat64(x: u64) -> u128 {
752
                ((x as u128) << 64) | x as u128
753
            }
754
755
            const fn splat32(x: u32) -> u128 {
756
                Self::splat64(((x as u64) << 32) | x as u64)
757
            }
758
759
            const fn splat16(x: u16) -> u128 {
760
                Self::splat32(((x as u32) << 16) | x as u32)
761
            }
762
763
            #[cfg(target_feature = "sse4.1")]
764
            const fn pack8(a: u8, b: u8, c: u8, d: u8, e: u8, f: u8, g: u8, h: u8) -> u64 {
765
                ((h as u64) << 56)
766
                    | ((g as u64) << 48)
767
                    | ((f as u64) << 40)
768
                    | ((e as u64) << 32)
769
                    | ((d as u64) << 24)
770
                    | ((c as u64) << 16)
771
                    | ((b as u64) << 8)
772
                    | a as u64
773
            }
774
        }
775
776
        static CONSTS: Consts = Consts {
777
            div10k: Consts::splat64(DIV10K_SIG as u64),
778
            neg10k: Consts::splat64(NEG10K as u64),
779
            div100: Consts::splat32(DIV100_SIG),
780
            div10: Consts::splat16(((1u32 << 16) / 10 + 1) as u16),
781
            #[cfg(target_feature = "sse4.1")]
782
            neg100: Consts::splat32(NEG100),
783
            #[cfg(target_feature = "sse4.1")]
784
            neg10: Consts::splat16((1 << 8) - 10),
785
            #[cfg(target_feature = "sse4.1")]
786
            bswap: Consts::pack8(15, 14, 13, 12, 11, 10, 9, 8) as u128
787
                | (Consts::pack8(7, 6, 5, 4, 3, 2, 1, 0) as u128) << 64,
788
            #[cfg(not(target_feature = "sse4.1"))]
789
            hundred: Consts::splat32(100),
790
            #[cfg(not(target_feature = "sse4.1"))]
791
            moddiv10: Consts::splat16(10 * (1 << 8) - 1),
792
            zeros: Consts::splat64(ZEROS),
793
        };
794
795
0
        let mut c = ptr::addr_of!(CONSTS);
796
        // Load constants from memory.
797
0
        unsafe {
798
0
            asm!("/*{0}*/", inout(reg) c);
799
0
        }
800
801
0
        let div10k = unsafe { _mm_load_si128(ptr::addr_of!((*c).div10k).cast::<__m128i>()) };
802
0
        let neg10k = unsafe { _mm_load_si128(ptr::addr_of!((*c).neg10k).cast::<__m128i>()) };
803
0
        let div100 = unsafe { _mm_load_si128(ptr::addr_of!((*c).div100).cast::<__m128i>()) };
804
0
        let div10 = unsafe { _mm_load_si128(ptr::addr_of!((*c).div10).cast::<__m128i>()) };
805
        #[cfg(target_feature = "sse4.1")]
806
        let neg100 = unsafe { _mm_load_si128(ptr::addr_of!((*c).neg100).cast::<__m128i>()) };
807
        #[cfg(target_feature = "sse4.1")]
808
        let neg10 = unsafe { _mm_load_si128(ptr::addr_of!((*c).neg10).cast::<__m128i>()) };
809
        #[cfg(target_feature = "sse4.1")]
810
        let bswap = unsafe { _mm_load_si128(ptr::addr_of!((*c).bswap).cast::<__m128i>()) };
811
        #[cfg(not(target_feature = "sse4.1"))]
812
0
        let hundred = unsafe { _mm_load_si128(ptr::addr_of!((*c).hundred).cast::<__m128i>()) };
813
        #[cfg(not(target_feature = "sse4.1"))]
814
0
        let moddiv10 = unsafe { _mm_load_si128(ptr::addr_of!((*c).moddiv10).cast::<__m128i>()) };
815
0
        let zeros = unsafe { _mm_load_si128(ptr::addr_of!((*c).zeros).cast::<__m128i>()) };
816
817
        // The BCD sequences are based on ones provided by Xiang JunBo.
818
        unsafe {
819
0
            let x: __m128i = _mm_set_epi64x(i64::from(bbccddee), i64::from(ffgghhii));
820
0
            let y: __m128i = _mm_add_epi64(
821
0
                x,
822
0
                _mm_mul_epu32(neg10k, _mm_srli_epi64(_mm_mul_epu32(x, div10k), DIV10K_EXP)),
823
            );
824
825
            #[cfg(target_feature = "sse4.1")]
826
            let bcd: __m128i = {
827
                // _mm_mullo_epi32 is SSE 4.1
828
                let z: __m128i = _mm_add_epi64(
829
                    y,
830
                    _mm_mullo_epi32(neg100, _mm_srli_epi32(_mm_mulhi_epu16(y, div100), 3)),
831
                );
832
                let big_endian_bcd: __m128i =
833
                    _mm_add_epi64(z, _mm_mullo_epi16(neg10, _mm_mulhi_epu16(z, div10)));
834
                // SSSE3
835
                _mm_shuffle_epi8(big_endian_bcd, bswap)
836
            };
837
838
            #[cfg(not(target_feature = "sse4.1"))]
839
0
            let bcd: __m128i = {
840
0
                let y_div_100: __m128i = _mm_srli_epi16(_mm_mulhi_epu16(y, div100), 3);
841
0
                let y_mod_100: __m128i = _mm_sub_epi16(y, _mm_mullo_epi16(y_div_100, hundred));
842
0
                let z: __m128i = _mm_or_si128(_mm_slli_epi32(y_mod_100, 16), y_div_100);
843
0
                let bcd_shuffled: __m128i = _mm_sub_epi16(
844
0
                    _mm_slli_epi16(z, 8),
845
0
                    _mm_mullo_epi16(moddiv10, _mm_mulhi_epu16(z, div10)),
846
                );
847
0
                _mm_shuffle_epi32(bcd_shuffled, _MM_SHUFFLE(0, 1, 2, 3))
848
            };
849
850
0
            let digits = _mm_or_si128(bcd, zeros);
851
852
            // Count leading zeros.
853
0
            let mask128: __m128i = _mm_cmpgt_epi8(bcd, _mm_setzero_si128());
854
0
            let mask = _mm_movemask_epi8(mask128) as u32;
855
0
            let len = 32 - mask.leading_zeros() as usize;
856
857
0
            _mm_storeu_si128(buffer.cast::<__m128i>(), digits);
858
0
            buffer.add(len)
859
        }
860
    }
861
7.28k
}
862
863
struct ToDecimalResult {
864
    sig: i64,
865
    exp: i32,
866
}
867
868
#[cfg_attr(feature = "no-panic", no_panic)]
869
#[inline]
870
6
fn to_decimal_schubfach<UInt>(bin_sig: UInt, bin_exp: i64, regular: bool) -> ToDecimalResult
871
6
where
872
6
    UInt: traits::UInt,
873
{
874
6
    let num_bits = mem::size_of::<UInt>() as i32 * 8;
875
6
    let dec_exp = compute_dec_exp(bin_exp as i32, regular);
876
6
    let exp_shift = unsafe { compute_exp_shift::<UInt, false>(bin_exp as i32, dec_exp) };
877
6
    let mut pow10 = unsafe { POW10_SIGNIFICANDS.get_unchecked(-dec_exp) };
878
879
    // Fallback to Schubfach to guarantee correctness in boundary cases. This
880
    // requires switching to strict overestimates of powers of 10.
881
6
    if num_bits == 64 {
882
0
        pow10.lo += 1;
883
6
    } else {
884
6
        pow10.hi += 1;
885
6
    }
886
887
    // Shift the significand so that boundaries are integer.
888
    const BOUND_SHIFT: u32 = 2;
889
6
    let bin_sig_shifted = bin_sig << BOUND_SHIFT;
890
891
    // Compute the estimates of lower and upper bounds of the rounding interval
892
    // by multiplying them by the power of 10 and applying modified rounding.
893
6
    let lsb = bin_sig & UInt::from(1);
894
6
    let lower = (bin_sig_shifted - (UInt::from(regular) + UInt::from(1))) << exp_shift;
895
6
    let lower = umulhi_inexact_to_odd(pow10.hi, pow10.lo, lower) + lsb;
896
6
    let upper = (bin_sig_shifted + UInt::from(2)) << exp_shift;
897
6
    let upper = umulhi_inexact_to_odd(pow10.hi, pow10.lo, upper) - lsb;
898
899
    // The idea of using a single shorter candidate is by Cassio Neri.
900
    // It is less or equal to the upper bound by construction.
901
6
    let shorter = (upper >> BOUND_SHIFT) / UInt::from(10) * UInt::from(10);
902
6
    if (shorter << BOUND_SHIFT) >= lower {
903
6
        return ToDecimalResult {
904
6
            sig: shorter.into() as i64,
905
6
            exp: dec_exp,
906
6
        };
907
0
    }
908
909
0
    let scaled_sig = umulhi_inexact_to_odd(pow10.hi, pow10.lo, bin_sig_shifted << exp_shift);
910
0
    let longer_below = scaled_sig >> BOUND_SHIFT;
911
0
    let longer_above = longer_below + UInt::from(1);
912
913
    // Pick the closest of longer_below and longer_above and check if it's in
914
    // the rounding interval.
915
0
    let cmp = scaled_sig
916
0
        .wrapping_sub((longer_below + longer_above) << 1)
917
0
        .to_signed();
918
0
    let below_closer = cmp < UInt::from(0).to_signed()
919
0
        || (cmp == UInt::from(0).to_signed() && (longer_below & UInt::from(1)) == UInt::from(0));
920
0
    let below_in = (longer_below << BOUND_SHIFT) >= lower;
921
0
    let dec_sig = if below_closer & below_in {
922
0
        longer_below
923
    } else {
924
0
        longer_above
925
    };
926
0
    ToDecimalResult {
927
0
        sig: dec_sig.into() as i64,
928
0
        exp: dec_exp,
929
0
    }
930
6
}
zmij::to_decimal_schubfach::<u32>
Line
Count
Source
870
6
fn to_decimal_schubfach<UInt>(bin_sig: UInt, bin_exp: i64, regular: bool) -> ToDecimalResult
871
6
where
872
6
    UInt: traits::UInt,
873
{
874
6
    let num_bits = mem::size_of::<UInt>() as i32 * 8;
875
6
    let dec_exp = compute_dec_exp(bin_exp as i32, regular);
876
6
    let exp_shift = unsafe { compute_exp_shift::<UInt, false>(bin_exp as i32, dec_exp) };
877
6
    let mut pow10 = unsafe { POW10_SIGNIFICANDS.get_unchecked(-dec_exp) };
878
879
    // Fallback to Schubfach to guarantee correctness in boundary cases. This
880
    // requires switching to strict overestimates of powers of 10.
881
6
    if num_bits == 64 {
882
0
        pow10.lo += 1;
883
6
    } else {
884
6
        pow10.hi += 1;
885
6
    }
886
887
    // Shift the significand so that boundaries are integer.
888
    const BOUND_SHIFT: u32 = 2;
889
6
    let bin_sig_shifted = bin_sig << BOUND_SHIFT;
890
891
    // Compute the estimates of lower and upper bounds of the rounding interval
892
    // by multiplying them by the power of 10 and applying modified rounding.
893
6
    let lsb = bin_sig & UInt::from(1);
894
6
    let lower = (bin_sig_shifted - (UInt::from(regular) + UInt::from(1))) << exp_shift;
895
6
    let lower = umulhi_inexact_to_odd(pow10.hi, pow10.lo, lower) + lsb;
896
6
    let upper = (bin_sig_shifted + UInt::from(2)) << exp_shift;
897
6
    let upper = umulhi_inexact_to_odd(pow10.hi, pow10.lo, upper) - lsb;
898
899
    // The idea of using a single shorter candidate is by Cassio Neri.
900
    // It is less or equal to the upper bound by construction.
901
6
    let shorter = (upper >> BOUND_SHIFT) / UInt::from(10) * UInt::from(10);
902
6
    if (shorter << BOUND_SHIFT) >= lower {
903
6
        return ToDecimalResult {
904
6
            sig: shorter.into() as i64,
905
6
            exp: dec_exp,
906
6
        };
907
0
    }
908
909
0
    let scaled_sig = umulhi_inexact_to_odd(pow10.hi, pow10.lo, bin_sig_shifted << exp_shift);
910
0
    let longer_below = scaled_sig >> BOUND_SHIFT;
911
0
    let longer_above = longer_below + UInt::from(1);
912
913
    // Pick the closest of longer_below and longer_above and check if it's in
914
    // the rounding interval.
915
0
    let cmp = scaled_sig
916
0
        .wrapping_sub((longer_below + longer_above) << 1)
917
0
        .to_signed();
918
0
    let below_closer = cmp < UInt::from(0).to_signed()
919
0
        || (cmp == UInt::from(0).to_signed() && (longer_below & UInt::from(1)) == UInt::from(0));
920
0
    let below_in = (longer_below << BOUND_SHIFT) >= lower;
921
0
    let dec_sig = if below_closer & below_in {
922
0
        longer_below
923
    } else {
924
0
        longer_above
925
    };
926
0
    ToDecimalResult {
927
0
        sig: dec_sig.into() as i64,
928
0
        exp: dec_exp,
929
0
    }
930
6
}
Unexecuted instantiation: zmij::to_decimal_schubfach::<u64>
931
932
// Here be 🐉s.
933
// Converts a binary FP number bin_sig * 2**bin_exp to the shortest decimal
934
// representation, where bin_exp = raw_exp - exp_offset.
935
#[cfg_attr(feature = "no-panic", no_panic)]
936
#[inline]
937
7.28k
fn to_decimal_fast<Float, UInt>(bin_sig: UInt, raw_exp: i64, regular: bool) -> ToDecimalResult
938
7.28k
where
939
7.28k
    Float: FloatTraits,
940
7.28k
    UInt: traits::UInt,
941
{
942
7.28k
    let bin_exp = raw_exp - i64::from(Float::EXP_OFFSET);
943
7.28k
    let num_bits = mem::size_of::<UInt>() as i32 * 8;
944
    // An optimization from yy by Yaoyuan Guo:
945
7.28k
    while regular {
946
7.28k
        let dec_exp = if USE_UMUL128_HI64 {
947
0
            umul128_hi64(bin_exp as u64, 0x4d10500000000000) as i32
948
        } else {
949
7.28k
            compute_dec_exp(bin_exp as i32, true)
950
        };
951
7.28k
        let exp_shift = unsafe { compute_exp_shift::<UInt, true>(bin_exp as i32, dec_exp) };
952
7.28k
        let pow10 = unsafe { POW10_SIGNIFICANDS.get_unchecked(-dec_exp) };
953
954
        let integral; // integral part of bin_sig * pow10
955
        let fractional; // fractional part of bin_sig * pow10
956
7.28k
        if num_bits == 64 {
957
0
            let p = umul192_hi128(pow10.hi, pow10.lo, (bin_sig << exp_shift).into());
958
0
            integral = UInt::truncate(p.hi);
959
0
            fractional = p.lo;
960
7.28k
        } else {
961
7.28k
            let p = umul128(pow10.hi, (bin_sig << exp_shift).into());
962
7.28k
            integral = UInt::truncate((p >> 64) as u64);
963
7.28k
            fractional = p as u64;
964
7.28k
        }
965
        const HALF_ULP: u64 = 1 << 63;
966
967
        // Exact half-ulp tie when rounding to nearest integer.
968
7.28k
        let cmp = fractional.wrapping_sub(HALF_ULP) as i64;
969
7.28k
        if cmp == 0 {
970
6
            break;
971
7.28k
        }
972
973
        // An optimization of integral % 10 by Dougall Johnson. Relies on range
974
        // calculation: (max_bin_sig << max_exp_shift) * max_u128.
975
        // (1 << 63) / 5 == (1 << 64) / 10 without an intermediate int128.
976
        const DIV10_SIG64: u64 = (1 << 63) / 5 + 1;
977
7.28k
        let div10 = umul128_hi64(integral.into(), DIV10_SIG64);
978
        #[allow(unused_mut)]
979
7.28k
        let mut digit = integral.into() - div10 * 10;
980
        // or it narrows to 32-bit and doesn't use madd/msub
981
        #[cfg(all(any(target_arch = "aarch64", target_arch = "x86_64"), not(miri)))]
982
7.28k
        unsafe {
983
7.28k
            asm!("/*{0}*/", inout(reg) digit);
984
7.28k
        }
985
986
        // Switch to a fixed-point representation with the least significant
987
        // integral digit in the upper bits and fractional digits in the lower
988
        // bits.
989
7.28k
        let num_integral_bits = if num_bits == 64 { 4 } else { 32 };
990
7.28k
        let num_fractional_bits = 64 - num_integral_bits;
991
7.28k
        let ten = 10u64 << num_fractional_bits;
992
        // Fixed-point remainder of the scaled significand modulo 10.
993
7.28k
        let scaled_sig_mod10 = (digit << num_fractional_bits) | (fractional >> num_integral_bits);
994
995
        // scaled_half_ulp = 0.5 * pow10 in the fixed-point format.
996
        // dec_exp is chosen so that 10**dec_exp <= 2**bin_exp < 10**(dec_exp + 1).
997
        // Since 1ulp == 2**bin_exp it will be in the range [1, 10) after scaling
998
        // by 10**dec_exp. Add 1 to combine the shift with division by two.
999
7.28k
        let scaled_half_ulp = pow10.hi >> (num_integral_bits - exp_shift + 1);
1000
7.28k
        let upper = scaled_sig_mod10 + scaled_half_ulp;
1001
1002
        // value = 5.0507837461e-27
1003
        // next  = 5.0507837461000010e-27
1004
        //
1005
        // c = integral.fractional' = 50507837461000003.153987... (value)
1006
        //                            50507837461000010.328635... (next)
1007
        //          scaled_half_ulp =                 3.587324...
1008
        //
1009
        // fractional' = fractional / 2**64, fractional = 2840565642863009226
1010
        //
1011
        //      50507837461000000       c               upper     50507837461000010
1012
        //              s              l|   L             |               S
1013
        // ───┬────┬────┼────┬────┬────┼*-──┼────┬────┬───*┬────┬────┬────┼-*--┬───
1014
        //    8    9    0    1    2    3    4    5    6    7    8    9    0 |  1
1015
        //            └─────────────────┼─────────────────┘                next
1016
        //                             1ulp
1017
        //
1018
        // s - shorter underestimate, S - shorter overestimate
1019
        // l - longer underestimate,  L - longer overestimate
1020
1021
        // Check for boundary case when rounding down to nearest 10 and
1022
        // near-boundary case when rounding up to nearest 10.
1023
        // Case where upper == ten is insufficient: 1.342178e+08f.
1024
7.28k
        if ten.wrapping_sub(upper) <= 1 // upper == ten || upper == ten - 1
1025
7.28k
            || scaled_sig_mod10 == scaled_half_ulp
1026
        {
1027
0
            break;
1028
7.28k
        }
1029
1030
7.28k
        let shorter = (integral.into() - digit) as i64;
1031
7.28k
        let longer = (integral.into() + u64::from(cmp >= 0)) as i64;
1032
7.28k
        let dec_sig = select_if_less(scaled_sig_mod10, scaled_half_ulp, shorter, longer);
1033
7.28k
        return ToDecimalResult {
1034
7.28k
            sig: select_if_less(ten, upper, shorter + 10, dec_sig),
1035
7.28k
            exp: dec_exp,
1036
7.28k
        };
1037
    }
1038
6
    to_decimal_schubfach(bin_sig, bin_exp, regular)
1039
7.28k
}
Unexecuted instantiation: zmij::to_decimal_fast::<f64, u64>
zmij::to_decimal_fast::<f32, u32>
Line
Count
Source
937
7.28k
fn to_decimal_fast<Float, UInt>(bin_sig: UInt, raw_exp: i64, regular: bool) -> ToDecimalResult
938
7.28k
where
939
7.28k
    Float: FloatTraits,
940
7.28k
    UInt: traits::UInt,
941
{
942
7.28k
    let bin_exp = raw_exp - i64::from(Float::EXP_OFFSET);
943
7.28k
    let num_bits = mem::size_of::<UInt>() as i32 * 8;
944
    // An optimization from yy by Yaoyuan Guo:
945
7.28k
    while regular {
946
7.28k
        let dec_exp = if USE_UMUL128_HI64 {
947
0
            umul128_hi64(bin_exp as u64, 0x4d10500000000000) as i32
948
        } else {
949
7.28k
            compute_dec_exp(bin_exp as i32, true)
950
        };
951
7.28k
        let exp_shift = unsafe { compute_exp_shift::<UInt, true>(bin_exp as i32, dec_exp) };
952
7.28k
        let pow10 = unsafe { POW10_SIGNIFICANDS.get_unchecked(-dec_exp) };
953
954
        let integral; // integral part of bin_sig * pow10
955
        let fractional; // fractional part of bin_sig * pow10
956
7.28k
        if num_bits == 64 {
957
0
            let p = umul192_hi128(pow10.hi, pow10.lo, (bin_sig << exp_shift).into());
958
0
            integral = UInt::truncate(p.hi);
959
0
            fractional = p.lo;
960
7.28k
        } else {
961
7.28k
            let p = umul128(pow10.hi, (bin_sig << exp_shift).into());
962
7.28k
            integral = UInt::truncate((p >> 64) as u64);
963
7.28k
            fractional = p as u64;
964
7.28k
        }
965
        const HALF_ULP: u64 = 1 << 63;
966
967
        // Exact half-ulp tie when rounding to nearest integer.
968
7.28k
        let cmp = fractional.wrapping_sub(HALF_ULP) as i64;
969
7.28k
        if cmp == 0 {
970
6
            break;
971
7.28k
        }
972
973
        // An optimization of integral % 10 by Dougall Johnson. Relies on range
974
        // calculation: (max_bin_sig << max_exp_shift) * max_u128.
975
        // (1 << 63) / 5 == (1 << 64) / 10 without an intermediate int128.
976
        const DIV10_SIG64: u64 = (1 << 63) / 5 + 1;
977
7.28k
        let div10 = umul128_hi64(integral.into(), DIV10_SIG64);
978
        #[allow(unused_mut)]
979
7.28k
        let mut digit = integral.into() - div10 * 10;
980
        // or it narrows to 32-bit and doesn't use madd/msub
981
        #[cfg(all(any(target_arch = "aarch64", target_arch = "x86_64"), not(miri)))]
982
7.28k
        unsafe {
983
7.28k
            asm!("/*{0}*/", inout(reg) digit);
984
7.28k
        }
985
986
        // Switch to a fixed-point representation with the least significant
987
        // integral digit in the upper bits and fractional digits in the lower
988
        // bits.
989
7.28k
        let num_integral_bits = if num_bits == 64 { 4 } else { 32 };
990
7.28k
        let num_fractional_bits = 64 - num_integral_bits;
991
7.28k
        let ten = 10u64 << num_fractional_bits;
992
        // Fixed-point remainder of the scaled significand modulo 10.
993
7.28k
        let scaled_sig_mod10 = (digit << num_fractional_bits) | (fractional >> num_integral_bits);
994
995
        // scaled_half_ulp = 0.5 * pow10 in the fixed-point format.
996
        // dec_exp is chosen so that 10**dec_exp <= 2**bin_exp < 10**(dec_exp + 1).
997
        // Since 1ulp == 2**bin_exp it will be in the range [1, 10) after scaling
998
        // by 10**dec_exp. Add 1 to combine the shift with division by two.
999
7.28k
        let scaled_half_ulp = pow10.hi >> (num_integral_bits - exp_shift + 1);
1000
7.28k
        let upper = scaled_sig_mod10 + scaled_half_ulp;
1001
1002
        // value = 5.0507837461e-27
1003
        // next  = 5.0507837461000010e-27
1004
        //
1005
        // c = integral.fractional' = 50507837461000003.153987... (value)
1006
        //                            50507837461000010.328635... (next)
1007
        //          scaled_half_ulp =                 3.587324...
1008
        //
1009
        // fractional' = fractional / 2**64, fractional = 2840565642863009226
1010
        //
1011
        //      50507837461000000       c               upper     50507837461000010
1012
        //              s              l|   L             |               S
1013
        // ───┬────┬────┼────┬────┬────┼*-──┼────┬────┬───*┬────┬────┬────┼-*--┬───
1014
        //    8    9    0    1    2    3    4    5    6    7    8    9    0 |  1
1015
        //            └─────────────────┼─────────────────┘                next
1016
        //                             1ulp
1017
        //
1018
        // s - shorter underestimate, S - shorter overestimate
1019
        // l - longer underestimate,  L - longer overestimate
1020
1021
        // Check for boundary case when rounding down to nearest 10 and
1022
        // near-boundary case when rounding up to nearest 10.
1023
        // Case where upper == ten is insufficient: 1.342178e+08f.
1024
7.28k
        if ten.wrapping_sub(upper) <= 1 // upper == ten || upper == ten - 1
1025
7.28k
            || scaled_sig_mod10 == scaled_half_ulp
1026
        {
1027
0
            break;
1028
7.28k
        }
1029
1030
7.28k
        let shorter = (integral.into() - digit) as i64;
1031
7.28k
        let longer = (integral.into() + u64::from(cmp >= 0)) as i64;
1032
7.28k
        let dec_sig = select_if_less(scaled_sig_mod10, scaled_half_ulp, shorter, longer);
1033
7.28k
        return ToDecimalResult {
1034
7.28k
            sig: select_if_less(ten, upper, shorter + 10, dec_sig),
1035
7.28k
            exp: dec_exp,
1036
7.28k
        };
1037
    }
1038
6
    to_decimal_schubfach(bin_sig, bin_exp, regular)
1039
7.28k
}
1040
1041
/// Writes the shortest correctly rounded decimal representation of `value` to
1042
/// `buffer`. `buffer` should point to a buffer of size `buffer_size` or larger.
1043
#[cfg_attr(feature = "no-panic", no_panic)]
1044
22.0k
unsafe fn write<Float>(value: Float, mut buffer: *mut u8) -> *mut u8
1045
22.0k
where
1046
22.0k
    Float: FloatTraits,
1047
{
1048
22.0k
    let bits = value.to_bits();
1049
    // It is beneficial to extract exponent and significand early.
1050
22.0k
    let bin_exp = Float::get_exp(bits); // binary exponent
1051
22.0k
    let bin_sig = Float::get_sig(bits); // binary significand
1052
1053
22.0k
    unsafe {
1054
22.0k
        *buffer = b'-';
1055
22.0k
    }
1056
22.0k
    buffer = unsafe { buffer.add(usize::from(Float::is_negative(bits))) };
1057
1058
    let mut dec;
1059
22.0k
    let threshold = if Float::NUM_BITS == 64 {
1060
5.08k
        10_000_000_000_000_000
1061
    } else {
1062
16.9k
        100_000_000
1063
    };
1064
22.0k
    if bin_exp == 0 {
1065
14.7k
        if bin_sig == Float::SigType::from(0) {
1066
            return unsafe {
1067
14.7k
                *buffer = b'0';
1068
14.7k
                *buffer.add(1) = b'.';
1069
14.7k
                *buffer.add(2) = b'0';
1070
14.7k
                buffer.add(3)
1071
            };
1072
0
        }
1073
0
        dec = to_decimal_schubfach(bin_sig, i64::from(1 - Float::EXP_OFFSET), true);
1074
0
        while dec.sig < threshold {
1075
0
            dec.sig *= 10;
1076
0
            dec.exp -= 1;
1077
0
        }
1078
7.28k
    } else {
1079
7.28k
        dec = to_decimal_fast::<Float, Float::SigType>(
1080
7.28k
            bin_sig | Float::IMPLICIT_BIT,
1081
7.28k
            bin_exp,
1082
7.28k
            bin_sig != Float::SigType::from(0),
1083
7.28k
        );
1084
7.28k
    }
1085
7.28k
    let mut dec_exp = dec.exp;
1086
7.28k
    let extra_digit = dec.sig >= threshold;
1087
7.28k
    dec_exp += Float::MAX_DIGITS10 as i32 - 2 + i32::from(extra_digit);
1088
7.28k
    if Float::NUM_BITS == 32 && dec.sig < 10_000_000 {
1089
0
        dec.sig *= 10;
1090
0
        dec_exp -= 1;
1091
7.28k
    }
1092
1093
    // Write significand.
1094
7.28k
    let end = unsafe { write_significand::<Float>(buffer.add(1), dec.sig as u64, extra_digit) };
1095
1096
7.28k
    let length = unsafe { end.offset_from(buffer.add(1)) } as usize;
1097
1098
7.28k
    if Float::NUM_BITS == 32 && (-6..=12).contains(&dec_exp)
1099
0
        || Float::NUM_BITS == 64 && (-5..=15).contains(&dec_exp)
1100
    {
1101
7.28k
        if length as i32 - 1 <= dec_exp {
1102
            // 1234e7 -> 12340000000.0
1103
            return unsafe {
1104
1.27k
                ptr::copy(buffer.add(1), buffer, length);
1105
1.27k
                ptr::write_bytes(buffer.add(length), b'0', dec_exp as usize + 3 - length);
1106
1.27k
                *buffer.add(dec_exp as usize + 1) = b'.';
1107
1.27k
                buffer.add(dec_exp as usize + 3)
1108
            };
1109
6.01k
        } else if 0 <= dec_exp {
1110
            // 1234e-2 -> 12.34
1111
            return unsafe {
1112
4.74k
                ptr::copy(buffer.add(1), buffer, dec_exp as usize + 1);
1113
4.74k
                *buffer.add(dec_exp as usize + 1) = b'.';
1114
4.74k
                buffer.add(length + 1)
1115
            };
1116
        } else {
1117
            // 1234e-6 -> 0.001234
1118
            return unsafe {
1119
1.27k
                ptr::copy(buffer.add(1), buffer.add((1 - dec_exp) as usize), length);
1120
1.27k
                ptr::write_bytes(buffer, b'0', (1 - dec_exp) as usize);
1121
1.27k
                *buffer.add(1) = b'.';
1122
1.27k
                buffer.add((1 - dec_exp) as usize + length)
1123
            };
1124
        }
1125
0
    }
1126
1127
0
    unsafe {
1128
0
        // 1234e30 -> 1.234e33
1129
0
        *buffer = *buffer.add(1);
1130
0
        *buffer.add(1) = b'.';
1131
0
    }
1132
0
    buffer = unsafe { buffer.add(length + usize::from(length > 1)) };
1133
1134
    // Write exponent.
1135
0
    let sign_ptr = buffer;
1136
0
    let e_sign = if dec_exp >= 0 {
1137
0
        (u16::from(b'+') << 8) | u16::from(b'e')
1138
    } else {
1139
0
        (u16::from(b'-') << 8) | u16::from(b'e')
1140
    };
1141
0
    buffer = unsafe { buffer.add(1) };
1142
0
    dec_exp = if dec_exp >= 0 { dec_exp } else { -dec_exp };
1143
0
    buffer = unsafe { buffer.add(usize::from(dec_exp >= 10)) };
1144
0
    if Float::MIN_10_EXP > -100 && Float::MAX_10_EXP < 100 {
1145
        unsafe {
1146
0
            buffer
1147
0
                .cast::<u16>()
1148
0
                .write_unaligned(*digits2(dec_exp as usize));
1149
0
            sign_ptr.cast::<u16>().write_unaligned(e_sign.to_le());
1150
0
            return buffer.add(2);
1151
        }
1152
0
    }
1153
    // digit = dec_exp / 100
1154
0
    let digit = if USE_UMUL128_HI64 {
1155
0
        umul128_hi64(dec_exp as u64, 0x290000000000000) as u32
1156
    } else {
1157
0
        (dec_exp as u32 * DIV100_SIG) >> DIV100_EXP
1158
    };
1159
0
    unsafe {
1160
0
        *buffer = b'0' + digit as u8;
1161
0
    }
1162
0
    buffer = unsafe { buffer.add(usize::from(dec_exp >= 100)) };
1163
    unsafe {
1164
0
        buffer
1165
0
            .cast::<u16>()
1166
0
            .write_unaligned(*digits2((dec_exp as u32 - digit * 100) as usize));
1167
0
        sign_ptr.cast::<u16>().write_unaligned(e_sign.to_le());
1168
0
        buffer.add(2)
1169
    }
1170
22.0k
}
zmij::write::<f64>
Line
Count
Source
1044
5.08k
unsafe fn write<Float>(value: Float, mut buffer: *mut u8) -> *mut u8
1045
5.08k
where
1046
5.08k
    Float: FloatTraits,
1047
{
1048
5.08k
    let bits = value.to_bits();
1049
    // It is beneficial to extract exponent and significand early.
1050
5.08k
    let bin_exp = Float::get_exp(bits); // binary exponent
1051
5.08k
    let bin_sig = Float::get_sig(bits); // binary significand
1052
1053
5.08k
    unsafe {
1054
5.08k
        *buffer = b'-';
1055
5.08k
    }
1056
5.08k
    buffer = unsafe { buffer.add(usize::from(Float::is_negative(bits))) };
1057
1058
    let mut dec;
1059
5.08k
    let threshold = if Float::NUM_BITS == 64 {
1060
5.08k
        10_000_000_000_000_000
1061
    } else {
1062
0
        100_000_000
1063
    };
1064
5.08k
    if bin_exp == 0 {
1065
5.08k
        if bin_sig == Float::SigType::from(0) {
1066
            return unsafe {
1067
5.08k
                *buffer = b'0';
1068
5.08k
                *buffer.add(1) = b'.';
1069
5.08k
                *buffer.add(2) = b'0';
1070
5.08k
                buffer.add(3)
1071
            };
1072
0
        }
1073
0
        dec = to_decimal_schubfach(bin_sig, i64::from(1 - Float::EXP_OFFSET), true);
1074
0
        while dec.sig < threshold {
1075
0
            dec.sig *= 10;
1076
0
            dec.exp -= 1;
1077
0
        }
1078
0
    } else {
1079
0
        dec = to_decimal_fast::<Float, Float::SigType>(
1080
0
            bin_sig | Float::IMPLICIT_BIT,
1081
0
            bin_exp,
1082
0
            bin_sig != Float::SigType::from(0),
1083
0
        );
1084
0
    }
1085
0
    let mut dec_exp = dec.exp;
1086
0
    let extra_digit = dec.sig >= threshold;
1087
0
    dec_exp += Float::MAX_DIGITS10 as i32 - 2 + i32::from(extra_digit);
1088
0
    if Float::NUM_BITS == 32 && dec.sig < 10_000_000 {
1089
0
        dec.sig *= 10;
1090
0
        dec_exp -= 1;
1091
0
    }
1092
1093
    // Write significand.
1094
0
    let end = unsafe { write_significand::<Float>(buffer.add(1), dec.sig as u64, extra_digit) };
1095
1096
0
    let length = unsafe { end.offset_from(buffer.add(1)) } as usize;
1097
1098
0
    if Float::NUM_BITS == 32 && (-6..=12).contains(&dec_exp)
1099
0
        || Float::NUM_BITS == 64 && (-5..=15).contains(&dec_exp)
1100
    {
1101
0
        if length as i32 - 1 <= dec_exp {
1102
            // 1234e7 -> 12340000000.0
1103
            return unsafe {
1104
0
                ptr::copy(buffer.add(1), buffer, length);
1105
0
                ptr::write_bytes(buffer.add(length), b'0', dec_exp as usize + 3 - length);
1106
0
                *buffer.add(dec_exp as usize + 1) = b'.';
1107
0
                buffer.add(dec_exp as usize + 3)
1108
            };
1109
0
        } else if 0 <= dec_exp {
1110
            // 1234e-2 -> 12.34
1111
            return unsafe {
1112
0
                ptr::copy(buffer.add(1), buffer, dec_exp as usize + 1);
1113
0
                *buffer.add(dec_exp as usize + 1) = b'.';
1114
0
                buffer.add(length + 1)
1115
            };
1116
        } else {
1117
            // 1234e-6 -> 0.001234
1118
            return unsafe {
1119
0
                ptr::copy(buffer.add(1), buffer.add((1 - dec_exp) as usize), length);
1120
0
                ptr::write_bytes(buffer, b'0', (1 - dec_exp) as usize);
1121
0
                *buffer.add(1) = b'.';
1122
0
                buffer.add((1 - dec_exp) as usize + length)
1123
            };
1124
        }
1125
0
    }
1126
1127
0
    unsafe {
1128
0
        // 1234e30 -> 1.234e33
1129
0
        *buffer = *buffer.add(1);
1130
0
        *buffer.add(1) = b'.';
1131
0
    }
1132
0
    buffer = unsafe { buffer.add(length + usize::from(length > 1)) };
1133
1134
    // Write exponent.
1135
0
    let sign_ptr = buffer;
1136
0
    let e_sign = if dec_exp >= 0 {
1137
0
        (u16::from(b'+') << 8) | u16::from(b'e')
1138
    } else {
1139
0
        (u16::from(b'-') << 8) | u16::from(b'e')
1140
    };
1141
0
    buffer = unsafe { buffer.add(1) };
1142
0
    dec_exp = if dec_exp >= 0 { dec_exp } else { -dec_exp };
1143
0
    buffer = unsafe { buffer.add(usize::from(dec_exp >= 10)) };
1144
0
    if Float::MIN_10_EXP > -100 && Float::MAX_10_EXP < 100 {
1145
        unsafe {
1146
0
            buffer
1147
0
                .cast::<u16>()
1148
0
                .write_unaligned(*digits2(dec_exp as usize));
1149
0
            sign_ptr.cast::<u16>().write_unaligned(e_sign.to_le());
1150
0
            return buffer.add(2);
1151
        }
1152
0
    }
1153
    // digit = dec_exp / 100
1154
0
    let digit = if USE_UMUL128_HI64 {
1155
0
        umul128_hi64(dec_exp as u64, 0x290000000000000) as u32
1156
    } else {
1157
0
        (dec_exp as u32 * DIV100_SIG) >> DIV100_EXP
1158
    };
1159
0
    unsafe {
1160
0
        *buffer = b'0' + digit as u8;
1161
0
    }
1162
0
    buffer = unsafe { buffer.add(usize::from(dec_exp >= 100)) };
1163
    unsafe {
1164
0
        buffer
1165
0
            .cast::<u16>()
1166
0
            .write_unaligned(*digits2((dec_exp as u32 - digit * 100) as usize));
1167
0
        sign_ptr.cast::<u16>().write_unaligned(e_sign.to_le());
1168
0
        buffer.add(2)
1169
    }
1170
5.08k
}
zmij::write::<f32>
Line
Count
Source
1044
16.9k
unsafe fn write<Float>(value: Float, mut buffer: *mut u8) -> *mut u8
1045
16.9k
where
1046
16.9k
    Float: FloatTraits,
1047
{
1048
16.9k
    let bits = value.to_bits();
1049
    // It is beneficial to extract exponent and significand early.
1050
16.9k
    let bin_exp = Float::get_exp(bits); // binary exponent
1051
16.9k
    let bin_sig = Float::get_sig(bits); // binary significand
1052
1053
16.9k
    unsafe {
1054
16.9k
        *buffer = b'-';
1055
16.9k
    }
1056
16.9k
    buffer = unsafe { buffer.add(usize::from(Float::is_negative(bits))) };
1057
1058
    let mut dec;
1059
16.9k
    let threshold = if Float::NUM_BITS == 64 {
1060
0
        10_000_000_000_000_000
1061
    } else {
1062
16.9k
        100_000_000
1063
    };
1064
16.9k
    if bin_exp == 0 {
1065
9.64k
        if bin_sig == Float::SigType::from(0) {
1066
            return unsafe {
1067
9.64k
                *buffer = b'0';
1068
9.64k
                *buffer.add(1) = b'.';
1069
9.64k
                *buffer.add(2) = b'0';
1070
9.64k
                buffer.add(3)
1071
            };
1072
0
        }
1073
0
        dec = to_decimal_schubfach(bin_sig, i64::from(1 - Float::EXP_OFFSET), true);
1074
0
        while dec.sig < threshold {
1075
0
            dec.sig *= 10;
1076
0
            dec.exp -= 1;
1077
0
        }
1078
7.28k
    } else {
1079
7.28k
        dec = to_decimal_fast::<Float, Float::SigType>(
1080
7.28k
            bin_sig | Float::IMPLICIT_BIT,
1081
7.28k
            bin_exp,
1082
7.28k
            bin_sig != Float::SigType::from(0),
1083
7.28k
        );
1084
7.28k
    }
1085
7.28k
    let mut dec_exp = dec.exp;
1086
7.28k
    let extra_digit = dec.sig >= threshold;
1087
7.28k
    dec_exp += Float::MAX_DIGITS10 as i32 - 2 + i32::from(extra_digit);
1088
7.28k
    if Float::NUM_BITS == 32 && dec.sig < 10_000_000 {
1089
0
        dec.sig *= 10;
1090
0
        dec_exp -= 1;
1091
7.28k
    }
1092
1093
    // Write significand.
1094
7.28k
    let end = unsafe { write_significand::<Float>(buffer.add(1), dec.sig as u64, extra_digit) };
1095
1096
7.28k
    let length = unsafe { end.offset_from(buffer.add(1)) } as usize;
1097
1098
7.28k
    if Float::NUM_BITS == 32 && (-6..=12).contains(&dec_exp)
1099
0
        || Float::NUM_BITS == 64 && (-5..=15).contains(&dec_exp)
1100
    {
1101
7.28k
        if length as i32 - 1 <= dec_exp {
1102
            // 1234e7 -> 12340000000.0
1103
            return unsafe {
1104
1.27k
                ptr::copy(buffer.add(1), buffer, length);
1105
1.27k
                ptr::write_bytes(buffer.add(length), b'0', dec_exp as usize + 3 - length);
1106
1.27k
                *buffer.add(dec_exp as usize + 1) = b'.';
1107
1.27k
                buffer.add(dec_exp as usize + 3)
1108
            };
1109
6.01k
        } else if 0 <= dec_exp {
1110
            // 1234e-2 -> 12.34
1111
            return unsafe {
1112
4.74k
                ptr::copy(buffer.add(1), buffer, dec_exp as usize + 1);
1113
4.74k
                *buffer.add(dec_exp as usize + 1) = b'.';
1114
4.74k
                buffer.add(length + 1)
1115
            };
1116
        } else {
1117
            // 1234e-6 -> 0.001234
1118
            return unsafe {
1119
1.27k
                ptr::copy(buffer.add(1), buffer.add((1 - dec_exp) as usize), length);
1120
1.27k
                ptr::write_bytes(buffer, b'0', (1 - dec_exp) as usize);
1121
1.27k
                *buffer.add(1) = b'.';
1122
1.27k
                buffer.add((1 - dec_exp) as usize + length)
1123
            };
1124
        }
1125
0
    }
1126
1127
0
    unsafe {
1128
0
        // 1234e30 -> 1.234e33
1129
0
        *buffer = *buffer.add(1);
1130
0
        *buffer.add(1) = b'.';
1131
0
    }
1132
0
    buffer = unsafe { buffer.add(length + usize::from(length > 1)) };
1133
1134
    // Write exponent.
1135
0
    let sign_ptr = buffer;
1136
0
    let e_sign = if dec_exp >= 0 {
1137
0
        (u16::from(b'+') << 8) | u16::from(b'e')
1138
    } else {
1139
0
        (u16::from(b'-') << 8) | u16::from(b'e')
1140
    };
1141
0
    buffer = unsafe { buffer.add(1) };
1142
0
    dec_exp = if dec_exp >= 0 { dec_exp } else { -dec_exp };
1143
0
    buffer = unsafe { buffer.add(usize::from(dec_exp >= 10)) };
1144
0
    if Float::MIN_10_EXP > -100 && Float::MAX_10_EXP < 100 {
1145
        unsafe {
1146
0
            buffer
1147
0
                .cast::<u16>()
1148
0
                .write_unaligned(*digits2(dec_exp as usize));
1149
0
            sign_ptr.cast::<u16>().write_unaligned(e_sign.to_le());
1150
0
            return buffer.add(2);
1151
        }
1152
0
    }
1153
    // digit = dec_exp / 100
1154
0
    let digit = if USE_UMUL128_HI64 {
1155
0
        umul128_hi64(dec_exp as u64, 0x290000000000000) as u32
1156
    } else {
1157
0
        (dec_exp as u32 * DIV100_SIG) >> DIV100_EXP
1158
    };
1159
0
    unsafe {
1160
0
        *buffer = b'0' + digit as u8;
1161
0
    }
1162
0
    buffer = unsafe { buffer.add(usize::from(dec_exp >= 100)) };
1163
    unsafe {
1164
0
        buffer
1165
0
            .cast::<u16>()
1166
0
            .write_unaligned(*digits2((dec_exp as u32 - digit * 100) as usize));
1167
0
        sign_ptr.cast::<u16>().write_unaligned(e_sign.to_le());
1168
0
        buffer.add(2)
1169
    }
1170
16.9k
}
1171
1172
/// Safe API for formatting floating point numbers to text.
1173
///
1174
/// ## Example
1175
///
1176
/// ```
1177
/// let mut buffer = zmij::Buffer::new();
1178
/// let printed = buffer.format_finite(1.234);
1179
/// assert_eq!(printed, "1.234");
1180
/// ```
1181
pub struct Buffer {
1182
    bytes: [MaybeUninit<u8>; BUFFER_SIZE],
1183
}
1184
1185
impl Buffer {
1186
    /// This is a cheap operation; you don't need to worry about reusing buffers
1187
    /// for efficiency.
1188
    #[inline]
1189
    #[cfg_attr(feature = "no-panic", no_panic)]
1190
22.0k
    pub fn new() -> Self {
1191
22.0k
        let bytes = [MaybeUninit::<u8>::uninit(); BUFFER_SIZE];
1192
22.0k
        Buffer { bytes }
1193
22.0k
    }
1194
1195
    /// Print a floating point number into this buffer and return a reference to
1196
    /// its string representation within the buffer.
1197
    ///
1198
    /// # Special cases
1199
    ///
1200
    /// This function formats NaN as the string "NaN", positive infinity as
1201
    /// "inf", and negative infinity as "-inf" to match std::fmt.
1202
    ///
1203
    /// If your input is known to be finite, you may get better performance by
1204
    /// calling the `format_finite` method instead of `format` to avoid the
1205
    /// checks for special cases.
1206
    #[cfg_attr(feature = "no-panic", no_panic)]
1207
0
    pub fn format<F: Float>(&mut self, f: F) -> &str {
1208
0
        if f.is_nonfinite() {
1209
0
            f.format_nonfinite()
1210
        } else {
1211
0
            self.format_finite(f)
1212
        }
1213
0
    }
Unexecuted instantiation: <zmij::Buffer>::format::<f64>
Unexecuted instantiation: <zmij::Buffer>::format::<_>
1214
1215
    /// Print a floating point number into this buffer and return a reference to
1216
    /// its string representation within the buffer.
1217
    ///
1218
    /// # Special cases
1219
    ///
1220
    /// This function **does not** check for NaN or infinity. If the input
1221
    /// number is not a finite float, the printed representation will be some
1222
    /// correctly formatted but unspecified numerical value.
1223
    ///
1224
    /// Please check [`is_finite`] yourself before calling this function, or
1225
    /// check [`is_nan`] and [`is_infinite`] and handle those cases yourself.
1226
    ///
1227
    /// [`is_finite`]: f64::is_finite
1228
    /// [`is_nan`]: f64::is_nan
1229
    /// [`is_infinite`]: f64::is_infinite
1230
    #[cfg_attr(feature = "no-panic", no_panic)]
1231
22.0k
    pub fn format_finite<F: Float>(&mut self, f: F) -> &str {
1232
        unsafe {
1233
22.0k
            let end = f.write_to_zmij_buffer(self.bytes.as_mut_ptr().cast::<u8>());
1234
22.0k
            let len = end.offset_from(self.bytes.as_ptr().cast::<u8>()) as usize;
1235
22.0k
            let slice = slice::from_raw_parts(self.bytes.as_ptr().cast::<u8>(), len);
1236
22.0k
            str::from_utf8_unchecked(slice)
1237
        }
1238
22.0k
    }
<zmij::Buffer>::format_finite::<f64>
Line
Count
Source
1231
5.08k
    pub fn format_finite<F: Float>(&mut self, f: F) -> &str {
1232
        unsafe {
1233
5.08k
            let end = f.write_to_zmij_buffer(self.bytes.as_mut_ptr().cast::<u8>());
1234
5.08k
            let len = end.offset_from(self.bytes.as_ptr().cast::<u8>()) as usize;
1235
5.08k
            let slice = slice::from_raw_parts(self.bytes.as_ptr().cast::<u8>(), len);
1236
5.08k
            str::from_utf8_unchecked(slice)
1237
        }
1238
5.08k
    }
<zmij::Buffer>::format_finite::<f32>
Line
Count
Source
1231
16.9k
    pub fn format_finite<F: Float>(&mut self, f: F) -> &str {
1232
        unsafe {
1233
16.9k
            let end = f.write_to_zmij_buffer(self.bytes.as_mut_ptr().cast::<u8>());
1234
16.9k
            let len = end.offset_from(self.bytes.as_ptr().cast::<u8>()) as usize;
1235
16.9k
            let slice = slice::from_raw_parts(self.bytes.as_ptr().cast::<u8>(), len);
1236
16.9k
            str::from_utf8_unchecked(slice)
1237
        }
1238
16.9k
    }
Unexecuted instantiation: <zmij::Buffer>::format_finite::<f64>
Unexecuted instantiation: <zmij::Buffer>::format_finite::<f32>
Unexecuted instantiation: <zmij::Buffer>::format_finite::<_>
1239
}
1240
1241
/// A floating point number, f32 or f64, that can be written into a
1242
/// [`zmij::Buffer`][Buffer].
1243
///
1244
/// This trait is sealed and cannot be implemented for types outside of the
1245
/// `zmij` crate.
1246
#[allow(unknown_lints)] // rustc older than 1.74
1247
#[allow(private_bounds)]
1248
pub trait Float: private::Sealed {}
1249
impl Float for f32 {}
1250
impl Float for f64 {}
1251
1252
mod private {
1253
    pub trait Sealed: crate::traits::Float {
1254
        fn is_nonfinite(self) -> bool;
1255
        fn format_nonfinite(self) -> &'static str;
1256
        unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8;
1257
    }
1258
1259
    impl Sealed for f32 {
1260
        #[inline]
1261
0
        fn is_nonfinite(self) -> bool {
1262
            const EXP_MASK: u32 = 0x7f800000;
1263
0
            let bits = self.to_bits();
1264
0
            bits & EXP_MASK == EXP_MASK
1265
0
        }
1266
1267
        #[cold]
1268
        #[cfg_attr(feature = "no-panic", inline)]
1269
0
        fn format_nonfinite(self) -> &'static str {
1270
            const MANTISSA_MASK: u32 = 0x007fffff;
1271
            const SIGN_MASK: u32 = 0x80000000;
1272
0
            let bits = self.to_bits();
1273
0
            if bits & MANTISSA_MASK != 0 {
1274
0
                crate::NAN
1275
0
            } else if bits & SIGN_MASK != 0 {
1276
0
                crate::NEG_INFINITY
1277
            } else {
1278
0
                crate::INFINITY
1279
            }
1280
0
        }
1281
1282
        #[cfg_attr(feature = "no-panic", inline)]
1283
16.9k
        unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8 {
1284
16.9k
            unsafe { crate::write(self, buffer) }
1285
16.9k
        }
1286
    }
1287
1288
    impl Sealed for f64 {
1289
        #[inline]
1290
0
        fn is_nonfinite(self) -> bool {
1291
            const EXP_MASK: u64 = 0x7ff0000000000000;
1292
0
            let bits = self.to_bits();
1293
0
            bits & EXP_MASK == EXP_MASK
1294
0
        }
1295
1296
        #[cold]
1297
        #[cfg_attr(feature = "no-panic", inline)]
1298
0
        fn format_nonfinite(self) -> &'static str {
1299
            const MANTISSA_MASK: u64 = 0x000fffffffffffff;
1300
            const SIGN_MASK: u64 = 0x8000000000000000;
1301
0
            let bits = self.to_bits();
1302
0
            if bits & MANTISSA_MASK != 0 {
1303
0
                crate::NAN
1304
0
            } else if bits & SIGN_MASK != 0 {
1305
0
                crate::NEG_INFINITY
1306
            } else {
1307
0
                crate::INFINITY
1308
            }
1309
0
        }
1310
1311
        #[cfg_attr(feature = "no-panic", inline)]
1312
5.08k
        unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8 {
1313
5.08k
            unsafe { crate::write(self, buffer) }
1314
5.08k
        }
1315
    }
1316
}
1317
1318
impl Default for Buffer {
1319
    #[inline]
1320
    #[cfg_attr(feature = "no-panic", no_panic)]
1321
0
    fn default() -> Self {
1322
0
        Buffer::new()
1323
0
    }
1324
}