/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.25/src/triangle/hypot.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use crate::common::{dyad_fmla, f_fmla}; |
30 | | use crate::double_double::DoubleDouble; |
31 | | use std::hint::black_box; |
32 | | |
33 | | // case hypot(x,y) >= 2^1024 |
34 | | #[cold] |
35 | 0 | fn hypot_overflow() -> f64 { |
36 | | const Z: f64 = f64::from_bits(0x7fefffffffffffff); |
37 | 0 | black_box(Z) + black_box(Z) |
38 | 0 | } |
39 | | |
40 | | #[cold] |
41 | 0 | fn hypot_denorm(a: u64, b: u64) -> f64 { |
42 | 0 | let mut a = a; |
43 | 0 | let mut b = b; |
44 | 0 | let af = (a as i64) as f64; |
45 | 0 | let bf = (b as i64) as f64; |
46 | | let mut underflow; |
47 | | // af and bf are x and y multiplied by 2^1074, thus integers |
48 | 0 | a = a.wrapping_shl(1); |
49 | 0 | b = b.wrapping_shl(1); |
50 | 0 | let mut rm = f_fmla(af, af, bf * bf).sqrt() as u64; |
51 | 0 | let mut tm: i64 = rm.wrapping_shl(1) as i64; |
52 | 0 | let mut denom: i64 = a |
53 | 0 | .wrapping_mul(a) |
54 | 0 | .wrapping_add(b.wrapping_mul(b)) |
55 | 0 | .wrapping_sub((tm as u64).wrapping_mul(tm as u64)) as i64; |
56 | | // D = a^2+b^2 - tm^2 |
57 | 0 | while denom > 2 * tm { |
58 | 0 | // tm too small |
59 | 0 | denom -= 2 * tm + 1; // (tm+1)^2 = tm^2 + 2*tm + 1 |
60 | 0 | tm = tm.wrapping_add(1); |
61 | 0 | } |
62 | 0 | while denom < 0 { |
63 | 0 | // tm too large |
64 | 0 | denom += 2 * tm - 1; // (tm-1)^2 = tm^2 - 2*tm + 1 |
65 | 0 | tm = tm.wrapping_sub(1); |
66 | 0 | } |
67 | | // tm = floor(sqrt(a^2+b^2)) and 0 <= D = a^2+b^2 - tm^2 < 2*tm+1 |
68 | | // if D=0 and tm is even, the result is exact |
69 | | // if D=0 and tm is odd, the result is a midpoint |
70 | 0 | let rb: i32 = (tm & 1) as i32; // round bit for rm |
71 | 0 | let rb2: i32 = (denom >= tm) as i32; // round bit for tm |
72 | 0 | let sb: i32 = (denom != 0) as i32; // sticky bit for rm |
73 | 0 | rm = (tm >> 1) as u64; // truncate the low bit |
74 | 0 | underflow = rm < 0x10000000000000u64; |
75 | 0 | if rb != 0 || sb != 0 { |
76 | 0 | let op = 1.0 + f64::from_bits(0x3c90000000000000); |
77 | 0 | let om = 1.0 - f64::from_bits(0x3c90000000000000); |
78 | 0 | if op == om { |
79 | | // rounding to nearest |
80 | 0 | if sb != 0 { |
81 | 0 | rm = rm.wrapping_add(rb as u64); |
82 | | // we have no underflow when rm is now 2^52 and rb2 != 0 |
83 | | // Remark: we cannot have a^2+b^2 = (tm+1/2)^2 exactly |
84 | | // since this would mean a^2+b^2 = tm^2+tm+1/4, |
85 | | // thus a^2+b^2 would be an odd multiple of 2^-1077 |
86 | | // (since ulp(tm) = 2^-1075) |
87 | 0 | if rm >> 52 != 0 && rb2 != 0 { |
88 | 0 | underflow = false; |
89 | 0 | } |
90 | 0 | } else { |
91 | 0 | // sticky bit is 0, round bit is 1: underflow doos not change |
92 | 0 | rm += rm & 1; // even rounding |
93 | 0 | } |
94 | 0 | } else if op > 1.0 { |
95 | | // rounding upwards |
96 | 0 | rm = rm.wrapping_add(1); |
97 | | // we have no underflow when rm is now 2^52 and tm was odd |
98 | 0 | if (rm >> 52) != 0 && (tm & 1) != 0 { |
99 | 0 | underflow = false; |
100 | 0 | } |
101 | 0 | } |
102 | 0 | if underflow { |
103 | 0 | // trigger underflow exception _after_ rounding for inexact results |
104 | 0 | let trig_uf = black_box(f64::from_bits(0x0010000000000000)); |
105 | 0 | _ = black_box(black_box(trig_uf) * black_box(trig_uf)); // triggers underflow |
106 | 0 | } |
107 | 0 | } |
108 | | // else the result is exact, and we have no underflow |
109 | 0 | f64::from_bits(rm) |
110 | 0 | } |
111 | | |
112 | | /* Here the square root is refined by Newton iterations: x^2+y^2 is exact |
113 | | and fits in a 128-bit integer, so the approximation is squared (which |
114 | | also fits in a 128-bit integer), compared and adjusted if necessary using |
115 | | the exact value of x^2+y^2. */ |
116 | | #[cold] |
117 | 0 | fn hypot_hard(x: f64, y: f64) -> f64 { |
118 | 0 | let op = 1.0 + f64::from_bits(0x3c90000000000000); |
119 | 0 | let om = 1.0 - f64::from_bits(0x3c90000000000000); |
120 | 0 | let mut xi = x.to_bits(); |
121 | 0 | let yi = y.to_bits(); |
122 | 0 | let mut bm = (xi & 0x000fffffffffffff) | (1u64 << 52); |
123 | 0 | let mut lm = (yi & 0x000fffffffffffff) | (1u64 << 52); |
124 | 0 | let be: i32 = (xi >> 52) as i32; |
125 | 0 | let le: i32 = (yi >> 52) as i32; |
126 | 0 | let ri = f_fmla(x, x, y * y).sqrt().to_bits(); |
127 | | const BS: u32 = 2; |
128 | 0 | let mut rm: u64 = ri & 0x000fffffffffffff; |
129 | 0 | let mut re: i32 = ((ri >> 52) as i32).wrapping_sub(0x3ff); |
130 | 0 | rm |= 1u64 << 52; |
131 | 0 | for _ in 0..3 { |
132 | 0 | if rm == (1u64 << 52) { |
133 | 0 | rm = 0x001fffffffffffff; |
134 | 0 | re = re.wrapping_sub(1); |
135 | 0 | } else { |
136 | 0 | rm = rm.wrapping_sub(1); |
137 | 0 | } |
138 | | } |
139 | 0 | bm = bm.wrapping_shl(BS); |
140 | 0 | let mut m2: u64 = bm.wrapping_mul(bm); |
141 | 0 | let de: i32 = be.wrapping_sub(le); |
142 | 0 | let mut ls: i32 = (BS as i32).wrapping_sub(de); |
143 | 0 | if ls >= 0 { |
144 | 0 | lm <<= ls; |
145 | 0 | m2 = m2.wrapping_add(lm.wrapping_mul(lm)); |
146 | 0 | } else { |
147 | 0 | let lm2: u128 = (lm as u128).wrapping_mul(lm as u128); |
148 | 0 | ls = ls.wrapping_mul(2); |
149 | 0 | m2 = m2.wrapping_add((lm2 >> -ls) as u64); // since ls < 0, the shift by -ls is legitimate |
150 | 0 | m2 |= ((lm2.wrapping_shl((128 + ls) as u32)) != 0) as u64; |
151 | 0 | } |
152 | 0 | let k: i32 = (BS as i32).wrapping_add(re); |
153 | | let mut denom: i64; |
154 | | loop { |
155 | 0 | rm += 1 + (rm >= (1u64 << 53)) as u64; |
156 | 0 | let tm: u64 = rm.wrapping_shl(k as u32); |
157 | 0 | let rm2: u64 = tm.wrapping_mul(tm); |
158 | 0 | denom = (m2 as i64).wrapping_sub(rm2 as i64); |
159 | 0 | if denom <= 0 { |
160 | 0 | break; |
161 | 0 | } |
162 | | } |
163 | 0 | if denom != 0 { |
164 | 0 | if op == om { |
165 | 0 | let ssm = if rm <= (1u64 << 53) { 1 } else { 0 }; |
166 | 0 | let tm: u64 = (rm << k) - (1 << (k - ssm)); |
167 | 0 | denom = (m2 as i64).wrapping_sub(tm.wrapping_mul(tm) as i64); |
168 | 0 | if denom != 0 { |
169 | 0 | rm = rm.wrapping_add((denom >> 63) as u64); |
170 | 0 | } else { |
171 | 0 | rm = rm.wrapping_sub(rm & 1); |
172 | 0 | } |
173 | | } else { |
174 | 0 | let ssm = if rm > (1u64 << 53) { 1u32 } else { 0 }; |
175 | 0 | let pdm = if op == 1. { 1u64 } else { 0 }; |
176 | 0 | rm = rm.wrapping_sub(pdm.wrapping_shl(ssm)); |
177 | | } |
178 | 0 | } |
179 | 0 | if rm >= (1u64 << 53) { |
180 | 0 | rm >>= 1; |
181 | 0 | re = re.wrapping_add(1); |
182 | 0 | } |
183 | | |
184 | 0 | let e: i64 = be.wrapping_sub(1).wrapping_add(re) as i64; |
185 | 0 | xi = (e as u64).wrapping_shl(52).wrapping_add(rm); |
186 | 0 | f64::from_bits(xi) |
187 | 0 | } |
188 | | |
189 | | /// Computes hypot |
190 | | /// |
191 | | /// Max ULP 0.5 |
192 | 0 | pub fn f_hypot(x: f64, y: f64) -> f64 { |
193 | 0 | let xi = x.to_bits(); |
194 | 0 | let yi = y.to_bits(); |
195 | 0 | let emsk = 0x7ffu64 << 52; |
196 | 0 | let mut ex = xi & emsk; |
197 | 0 | let mut ey = yi & emsk; |
198 | 0 | let mut x = x.abs(); |
199 | 0 | let mut y = y.abs(); |
200 | 0 | if ex == emsk || ey == emsk { |
201 | | /* Either x or y is NaN or Inf */ |
202 | 0 | let wx = xi.wrapping_shl(1); |
203 | 0 | let wy = yi.wrapping_shl(1); |
204 | 0 | let wm = emsk.wrapping_shl(1); |
205 | 0 | let ninf: i32 = ((wx == wm) ^ (wy == wm)) as i32; |
206 | 0 | let nqnn: i32 = (((wx >> 52) == 0xfff) ^ ((wy >> 52) == 0xfff)) as i32; |
207 | | /* ninf is 1 when only one of x and y is +/-Inf |
208 | | nqnn is 1 when only one of x and y is qNaN |
209 | | IEEE 754 says that hypot(+/-Inf,qNaN)=hypot(qNaN,+/-Inf)=+Inf. */ |
210 | 0 | if ninf != 0 && nqnn != 0 { |
211 | 0 | return if wx == wm { x * x } else { y * y }; |
212 | 0 | } |
213 | 0 | return x + y; /* inf, nan */ |
214 | 0 | } |
215 | | |
216 | 0 | let u = f64::max(x, y); |
217 | 0 | let v = f64::min(x, y); |
218 | 0 | let mut xd = u.to_bits(); |
219 | 0 | let mut yd = v.to_bits(); |
220 | 0 | ey = yd; |
221 | 0 | if (ey >> 52) == 0 { |
222 | | // y is subnormal |
223 | 0 | if (yd) == 0 { |
224 | 0 | return f64::from_bits(xd); |
225 | 0 | } |
226 | 0 | ex = xd; |
227 | 0 | if (ex >> 52) == 0 { |
228 | | // x is subnormal too |
229 | 0 | if ex == 0 { |
230 | 0 | return 0.; |
231 | 0 | } |
232 | 0 | return hypot_denorm(ex, ey); |
233 | 0 | } |
234 | 0 | let nz = ey.leading_zeros(); |
235 | 0 | ey = ey.wrapping_shl(nz - 11); |
236 | 0 | ey &= u64::MAX >> 12; |
237 | 0 | ey = ey.wrapping_sub(((nz as u64).wrapping_sub(12u64)) << 52); |
238 | 0 | let t = ey; |
239 | 0 | yd = t; |
240 | 0 | } |
241 | | |
242 | 0 | let de = xd.wrapping_sub(yd); |
243 | 0 | if de > (27u64 << 52) { |
244 | 0 | return dyad_fmla(f64::from_bits(0x3e40000000000000), v, u); |
245 | 0 | } |
246 | 0 | let off: i64 = ((0x3ffu64 << 52) as i64).wrapping_sub((xd & emsk) as i64); |
247 | 0 | xd = xd.wrapping_add(off as u64); |
248 | 0 | yd = yd.wrapping_add(off as u64); |
249 | 0 | x = f64::from_bits(xd); |
250 | 0 | y = f64::from_bits(yd); |
251 | 0 | let x2 = DoubleDouble::from_exact_mult(x, x); |
252 | 0 | let y2 = DoubleDouble::from_exact_mult(y, y); |
253 | 0 | let r2 = x2.hi + y2.hi; |
254 | 0 | let ir2 = 0.5 / r2; |
255 | 0 | let dr2 = ((x2.hi - r2) + y2.hi) + (x2.lo + y2.lo); |
256 | 0 | let mut th = r2.sqrt(); |
257 | 0 | let rsqrt = DoubleDouble::from_exact_mult(th, ir2); |
258 | 0 | let dz = dr2 - rsqrt.lo; |
259 | 0 | let mut tl = rsqrt.hi * dz; |
260 | 0 | let p = DoubleDouble::from_exact_add(th, tl); |
261 | 0 | th = p.hi; |
262 | 0 | tl = p.lo; |
263 | 0 | let mut thd = th.to_bits(); |
264 | 0 | let tld = tl.abs().to_bits(); |
265 | 0 | ex = thd; |
266 | 0 | ey = tld; |
267 | 0 | ex &= 0x7ffu64 << 52; |
268 | 0 | let aidr = ey.wrapping_add(0x3feu64 << 52).wrapping_sub(ex); |
269 | 0 | let mid = (aidr.wrapping_sub(0x3c90000000000000).wrapping_add(16)) >> 5; |
270 | 0 | if mid == 0 || aidr < 0x39b0000000000000u64 || aidr > 0x3c9fffffffffff80u64 { |
271 | 0 | thd = hypot_hard(x, y).to_bits(); |
272 | 0 | } |
273 | 0 | thd = thd.wrapping_sub(off as u64); |
274 | 0 | if thd >= (0x7ffu64 << 52) { |
275 | 0 | return hypot_overflow(); |
276 | 0 | } |
277 | 0 | f64::from_bits(thd) |
278 | 0 | } |
279 | | |
280 | | #[cfg(test)] |
281 | | mod tests { |
282 | | use super::*; |
283 | | #[test] |
284 | | fn test_hypot() { |
285 | | assert_eq!( |
286 | | f_hypot(2.8480945070593211E-306, 2.1219950320804403E-314), |
287 | | 2.848094507059321e-306 |
288 | | ); |
289 | | assert_eq!( |
290 | | f_hypot(3.5601181736115222E-307, 1.0609978954826362E-314), |
291 | | 3.560118173611524e-307 |
292 | | ); |
293 | | assert_eq!(f_hypot(2., 4.), 4.47213595499958); |
294 | | } |
295 | | } |