Coverage Report

Created: 2025-07-18 06:22

/rust/registry/src/index.crates.io-6f17d22bba15001f/libm-0.2.11/src/math/fma.rs
Line
Count
Source (jump to first uncovered line)
1
use core::{f32, f64};
2
3
use super::scalbn;
4
5
const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1;
6
7
struct Num {
8
    m: u64,
9
    e: i32,
10
    sign: i32,
11
}
12
13
0
fn normalize(x: f64) -> Num {
14
0
    let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
15
0
16
0
    let mut ix: u64 = x.to_bits();
17
0
    let mut e: i32 = (ix >> 52) as i32;
18
0
    let sign: i32 = e & 0x800;
19
0
    e &= 0x7ff;
20
0
    if e == 0 {
21
0
        ix = (x * x1p63).to_bits();
22
0
        e = (ix >> 52) as i32 & 0x7ff;
23
0
        e = if e != 0 { e - 63 } else { 0x800 };
24
0
    }
25
0
    ix &= (1 << 52) - 1;
26
0
    ix |= 1 << 52;
27
0
    ix <<= 1;
28
0
    e -= 0x3ff + 52 + 1;
29
0
    Num { m: ix, e, sign }
30
0
}
31
32
#[inline]
33
0
fn mul(x: u64, y: u64) -> (u64, u64) {
34
0
    let t = (x as u128).wrapping_mul(y as u128);
35
0
    ((t >> 64) as u64, t as u64)
36
0
}
37
38
/// Floating multiply add (f64)
39
///
40
/// Computes `(x*y)+z`, rounded as one ternary operation:
41
/// Computes the value (as if) to infinite precision and rounds once to the result format,
42
/// according to the rounding mode characterized by the value of FLT_ROUNDS.
43
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
44
0
pub fn fma(x: f64, y: f64, z: f64) -> f64 {
45
0
    let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
46
0
    let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63
47
0
48
0
    /* normalize so top 10bits and last bit are 0 */
49
0
    let nx = normalize(x);
50
0
    let ny = normalize(y);
51
0
    let nz = normalize(z);
52
0
53
0
    if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN {
54
0
        return x * y + z;
55
0
    }
56
0
    if nz.e >= ZEROINFNAN {
57
0
        if nz.e > ZEROINFNAN {
58
            /* z==0 */
59
0
            return x * y + z;
60
0
        }
61
0
        return z;
62
0
    }
63
0
64
0
    /* mul: r = x*y */
65
0
    let zhi: u64;
66
0
    let zlo: u64;
67
0
    let (mut rhi, mut rlo) = mul(nx.m, ny.m);
68
0
    /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
69
0
70
0
    /* align exponents */
71
0
    let mut e: i32 = nx.e + ny.e;
72
0
    let mut d: i32 = nz.e - e;
73
0
    /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
74
0
    if d > 0 {
75
0
        if d < 64 {
76
0
            zlo = nz.m << d;
77
0
            zhi = nz.m >> (64 - d);
78
0
        } else {
79
0
            zlo = 0;
80
0
            zhi = nz.m;
81
0
            e = nz.e - 64;
82
0
            d -= 64;
83
0
            if d == 0 {
84
0
            } else if d < 64 {
85
0
                rlo = rhi << (64 - d) | rlo >> d | ((rlo << (64 - d)) != 0) as u64;
86
0
                rhi = rhi >> d;
87
0
            } else {
88
0
                rlo = 1;
89
0
                rhi = 0;
90
0
            }
91
        }
92
    } else {
93
0
        zhi = 0;
94
0
        d = -d;
95
0
        if d == 0 {
96
0
            zlo = nz.m;
97
0
        } else if d < 64 {
98
0
            zlo = nz.m >> d | ((nz.m << (64 - d)) != 0) as u64;
99
0
        } else {
100
0
            zlo = 1;
101
0
        }
102
    }
103
104
    /* add */
105
0
    let mut sign: i32 = nx.sign ^ ny.sign;
106
0
    let samesign: bool = (sign ^ nz.sign) == 0;
107
0
    let mut nonzero: i32 = 1;
108
0
    if samesign {
109
0
        /* r += z */
110
0
        rlo = rlo.wrapping_add(zlo);
111
0
        rhi += zhi + (rlo < zlo) as u64;
112
0
    } else {
113
        /* r -= z */
114
0
        let (res, borrow) = rlo.overflowing_sub(zlo);
115
0
        rlo = res;
116
0
        rhi = rhi.wrapping_sub(zhi.wrapping_add(borrow as u64));
117
0
        if (rhi >> 63) != 0 {
118
0
            rlo = (rlo as i64).wrapping_neg() as u64;
119
0
            rhi = (rhi as i64).wrapping_neg() as u64 - (rlo != 0) as u64;
120
0
            sign = (sign == 0) as i32;
121
0
        }
122
0
        nonzero = (rhi != 0) as i32;
123
    }
124
125
    /* set rhi to top 63bit of the result (last bit is sticky) */
126
0
    if nonzero != 0 {
127
0
        e += 64;
128
0
        d = rhi.leading_zeros() as i32 - 1;
129
0
        /* note: d > 0 */
130
0
        rhi = rhi << d | rlo >> (64 - d) | ((rlo << d) != 0) as u64;
131
0
    } else if rlo != 0 {
132
0
        d = rlo.leading_zeros() as i32 - 1;
133
0
        if d < 0 {
134
0
            rhi = rlo >> 1 | (rlo & 1);
135
0
        } else {
136
0
            rhi = rlo << d;
137
0
        }
138
    } else {
139
        /* exact +-0 */
140
0
        return x * y + z;
141
    }
142
0
    e -= d;
143
0
144
0
    /* convert to double */
145
0
    let mut i: i64 = rhi as i64; /* i is in [1<<62,(1<<63)-1] */
146
0
    if sign != 0 {
147
0
        i = -i;
148
0
    }
149
0
    let mut r: f64 = i as f64; /* |r| is in [0x1p62,0x1p63] */
150
0
151
0
    if e < -1022 - 62 {
152
        /* result is subnormal before rounding */
153
0
        if e == -1022 - 63 {
154
0
            let mut c: f64 = x1p63;
155
0
            if sign != 0 {
156
0
                c = -c;
157
0
            }
158
0
            if r == c {
159
                /* min normal after rounding, underflow depends
160
                on arch behaviour which can be imitated by
161
                a double to float conversion */
162
0
                let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32;
163
0
                return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64;
164
0
            }
165
0
            /* one bit is lost when scaled, add another top bit to
166
0
            only round once at conversion if it is inexact */
167
0
            if (rhi << 53) != 0 {
168
0
                i = (rhi >> 1 | (rhi & 1) | 1 << 62) as i64;
169
0
                if sign != 0 {
170
0
                    i = -i;
171
0
                }
172
0
                r = i as f64;
173
0
                r = 2. * r - c; /* remove top bit */
174
0
175
0
                /* raise underflow portably, such that it
176
0
                cannot be optimized away */
177
0
                {
178
0
                    let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r;
179
0
                    r += (tiny * tiny) * (r - r);
180
0
                }
181
0
            }
182
        } else {
183
            /* only round once when scaled */
184
0
            d = 10;
185
0
            i = ((rhi >> d | ((rhi << (64 - d)) != 0) as u64) << d) as i64;
186
0
            if sign != 0 {
187
0
                i = -i;
188
0
            }
189
0
            r = i as f64;
190
        }
191
0
    }
192
0
    scalbn(r, e)
193
0
}
194
195
#[cfg(test)]
196
mod tests {
197
    use super::*;
198
    #[test]
199
    fn fma_segfault() {
200
        // These two inputs cause fma to segfault on release due to overflow:
201
        assert_eq!(
202
            fma(
203
                -0.0000000000000002220446049250313,
204
                -0.0000000000000002220446049250313,
205
                -0.0000000000000002220446049250313
206
            ),
207
            -0.00000000000000022204460492503126,
208
        );
209
210
        let result = fma(-0.992, -0.992, -0.992);
211
        //force rounding to storage format on x87 to prevent superious errors.
212
        #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
213
        let result = force_eval!(result);
214
        assert_eq!(result, -0.007936000000000007,);
215
    }
216
217
    #[test]
218
    fn fma_sbb() {
219
        assert_eq!(fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), -3991680619069439e277);
220
    }
221
222
    #[test]
223
    fn fma_underflow() {
224
        assert_eq!(fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), 0.0,);
225
    }
226
}