/rust/registry/src/index.crates.io-6f17d22bba15001f/libm-0.2.11/src/math/sqrtf.rs
Line | Count | Source (jump to first uncovered line) |
1 | | /* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */ |
2 | | /* |
3 | | * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. |
4 | | */ |
5 | | /* |
6 | | * ==================================================== |
7 | | * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. |
8 | | * |
9 | | * Developed at SunPro, a Sun Microsystems, Inc. business. |
10 | | * Permission to use, copy, modify, and distribute this |
11 | | * software is freely granted, provided that this notice |
12 | | * is preserved. |
13 | | * ==================================================== |
14 | | */ |
15 | | |
16 | | /// The square root of `x` (f32). |
17 | | #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] |
18 | 0 | pub fn sqrtf(x: f32) -> f32 { |
19 | | // On wasm32 we know that LLVM's intrinsic will compile to an optimized |
20 | | // `f32.sqrt` native instruction, so we can leverage this for both code size |
21 | | // and speed. |
22 | | llvm_intrinsically_optimized! { |
23 | | #[cfg(target_arch = "wasm32")] { |
24 | | return if x < 0.0 { |
25 | | ::core::f32::NAN |
26 | | } else { |
27 | | unsafe { ::core::intrinsics::sqrtf32(x) } |
28 | | } |
29 | | } |
30 | | } |
31 | | #[cfg(all(target_feature = "sse", not(feature = "force-soft-floats")))] |
32 | | { |
33 | | // Note: This path is unlikely since LLVM will usually have already |
34 | | // optimized sqrt calls into hardware instructions if sse is available, |
35 | | // but if someone does end up here they'll appreciate the speed increase. |
36 | | #[cfg(target_arch = "x86")] |
37 | | use core::arch::x86::*; |
38 | | #[cfg(target_arch = "x86_64")] |
39 | | use core::arch::x86_64::*; |
40 | | unsafe { |
41 | 0 | let m = _mm_set_ss(x); |
42 | 0 | let m_sqrt = _mm_sqrt_ss(m); |
43 | 0 | _mm_cvtss_f32(m_sqrt) |
44 | 0 | } |
45 | 0 | } |
46 | 0 | #[cfg(any(not(target_feature = "sse"), feature = "force-soft-floats"))] |
47 | 0 | { |
48 | 0 | const TINY: f32 = 1.0e-30; |
49 | 0 |
|
50 | 0 | let mut z: f32; |
51 | 0 | let sign: i32 = 0x80000000u32 as i32; |
52 | 0 | let mut ix: i32; |
53 | 0 | let mut s: i32; |
54 | 0 | let mut q: i32; |
55 | 0 | let mut m: i32; |
56 | 0 | let mut t: i32; |
57 | 0 | let mut i: i32; |
58 | 0 | let mut r: u32; |
59 | 0 |
|
60 | 0 | ix = x.to_bits() as i32; |
61 | 0 |
|
62 | 0 | /* take care of Inf and NaN */ |
63 | 0 | if (ix as u32 & 0x7f800000) == 0x7f800000 { |
64 | 0 | return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ |
65 | 0 | } |
66 | 0 |
|
67 | 0 | /* take care of zero */ |
68 | 0 | if ix <= 0 { |
69 | 0 | if (ix & !sign) == 0 { |
70 | 0 | return x; /* sqrt(+-0) = +-0 */ |
71 | 0 | } |
72 | 0 | if ix < 0 { |
73 | 0 | return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ |
74 | 0 | } |
75 | 0 | } |
76 | 0 |
|
77 | 0 | /* normalize x */ |
78 | 0 | m = ix >> 23; |
79 | 0 | if m == 0 { |
80 | 0 | /* subnormal x */ |
81 | 0 | i = 0; |
82 | 0 | while ix & 0x00800000 == 0 { |
83 | 0 | ix <<= 1; |
84 | 0 | i = i + 1; |
85 | 0 | } |
86 | 0 | m -= i - 1; |
87 | 0 | } |
88 | 0 | m -= 127; /* unbias exponent */ |
89 | 0 | ix = (ix & 0x007fffff) | 0x00800000; |
90 | 0 | if m & 1 == 1 { |
91 | 0 | /* odd m, double x to make it even */ |
92 | 0 | ix += ix; |
93 | 0 | } |
94 | 0 | m >>= 1; /* m = [m/2] */ |
95 | 0 |
|
96 | 0 | /* generate sqrt(x) bit by bit */ |
97 | 0 | ix += ix; |
98 | 0 | q = 0; |
99 | 0 | s = 0; |
100 | 0 | r = 0x01000000; /* r = moving bit from right to left */ |
101 | 0 |
|
102 | 0 | while r != 0 { |
103 | 0 | t = s + r as i32; |
104 | 0 | if t <= ix { |
105 | 0 | s = t + r as i32; |
106 | 0 | ix -= t; |
107 | 0 | q += r as i32; |
108 | 0 | } |
109 | 0 | ix += ix; |
110 | 0 | r >>= 1; |
111 | 0 | } |
112 | 0 |
|
113 | 0 | /* use floating add to find out rounding direction */ |
114 | 0 | if ix != 0 { |
115 | 0 | z = 1.0 - TINY; /* raise inexact flag */ |
116 | 0 | if z >= 1.0 { |
117 | 0 | z = 1.0 + TINY; |
118 | 0 | if z > 1.0 { |
119 | 0 | q += 2; |
120 | 0 | } else { |
121 | 0 | q += q & 1; |
122 | 0 | } |
123 | 0 | } |
124 | 0 | } |
125 | 0 |
|
126 | 0 | ix = (q >> 1) + 0x3f000000; |
127 | 0 | ix += m << 23; |
128 | 0 | f32::from_bits(ix as u32) |
129 | 0 | } |
130 | 0 | } |
131 | | |
132 | | // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 |
133 | | #[cfg(not(target_arch = "powerpc64"))] |
134 | | #[cfg(test)] |
135 | | mod tests { |
136 | | use core::f32::*; |
137 | | |
138 | | use super::*; |
139 | | |
140 | | #[test] |
141 | | fn sanity_check() { |
142 | | assert_eq!(sqrtf(100.0), 10.0); |
143 | | assert_eq!(sqrtf(4.0), 2.0); |
144 | | } |
145 | | |
146 | | /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt |
147 | | #[test] |
148 | | fn spec_tests() { |
149 | | // Not Asserted: FE_INVALID exception is raised if argument is negative. |
150 | | assert!(sqrtf(-1.0).is_nan()); |
151 | | assert!(sqrtf(NAN).is_nan()); |
152 | | for f in [0.0, -0.0, INFINITY].iter().copied() { |
153 | | assert_eq!(sqrtf(f), f); |
154 | | } |
155 | | } |
156 | | |
157 | | #[test] |
158 | | fn conformance_tests() { |
159 | | let values = [3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), INFINITY]; |
160 | | let results = [1071833029u32, 1120403456u32, 456082799u32, 2139095040u32]; |
161 | | |
162 | | for i in 0..values.len() { |
163 | | let bits = f32::to_bits(sqrtf(values[i])); |
164 | | assert_eq!(results[i], bits); |
165 | | } |
166 | | } |
167 | | } |