/rust/registry/src/index.crates.io-1949cf8c6b5b557f/pxfm-0.1.27/src/common.rs
Line | Count | Source |
1 | | /* |
2 | | * // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved. |
3 | | * // |
4 | | * // Redistribution and use in source and binary forms, with or without modification, |
5 | | * // are permitted provided that the following conditions are met: |
6 | | * // |
7 | | * // 1. Redistributions of source code must retain the above copyright notice, this |
8 | | * // list of conditions and the following disclaimer. |
9 | | * // |
10 | | * // 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | * // this list of conditions and the following disclaimer in the documentation |
12 | | * // and/or other materials provided with the distribution. |
13 | | * // |
14 | | * // 3. Neither the name of the copyright holder nor the names of its |
15 | | * // contributors may be used to endorse or promote products derived from |
16 | | * // this software without specific prior written permission. |
17 | | * // |
18 | | * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | | * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
22 | | * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
24 | | * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
25 | | * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | | * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | | * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | use num_traits::MulAdd; |
30 | | use std::ops::{Add, Mul}; |
31 | | |
32 | | #[inline(always)] |
33 | 0 | pub(crate) fn is_integerf(x: f32) -> bool { |
34 | | #[cfg(any( |
35 | | all( |
36 | | any(target_arch = "x86", target_arch = "x86_64"), |
37 | | target_feature = "sse4.1" |
38 | | ), |
39 | | target_arch = "aarch64" |
40 | | ))] |
41 | | { |
42 | | x.round_ties_even() == x |
43 | | } |
44 | | #[cfg(not(any( |
45 | | all( |
46 | | any(target_arch = "x86", target_arch = "x86_64"), |
47 | | target_feature = "sse4.1" |
48 | | ), |
49 | | target_arch = "aarch64" |
50 | | )))] |
51 | | { |
52 | 0 | let x_u = x.to_bits(); |
53 | 0 | let x_e = (x_u & EXP_MASK_F32) >> 23; |
54 | 0 | let lsb = (x_u | EXP_MASK_F32).trailing_zeros(); |
55 | | const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32; |
56 | | const UNIT_EXPONENT: u32 = E_BIAS + 23; |
57 | 0 | x_e + lsb >= UNIT_EXPONENT |
58 | | } |
59 | 0 | } |
60 | | |
61 | | #[inline(always)] |
62 | 0 | pub(crate) fn is_odd_integerf(x: f32) -> bool { |
63 | | #[cfg(target_arch = "aarch64")] |
64 | | { |
65 | | (x as i32 & 1) != 0 |
66 | | } |
67 | | #[cfg(not(target_arch = "aarch64"))] |
68 | | { |
69 | 0 | let x_u = x.to_bits(); |
70 | 0 | let x_e = (x_u & EXP_MASK_F32) >> 23; |
71 | 0 | let lsb = (x_u | EXP_MASK_F32).trailing_zeros(); |
72 | | const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32; |
73 | | |
74 | | const UNIT_EXPONENT: u32 = E_BIAS + 23; |
75 | 0 | x_e + lsb == UNIT_EXPONENT |
76 | | } |
77 | 0 | } |
78 | | |
79 | | #[inline(always)] |
80 | 0 | pub(crate) fn is_integer(n: f64) -> bool { |
81 | | #[cfg(any( |
82 | | all( |
83 | | any(target_arch = "x86", target_arch = "x86_64"), |
84 | | target_feature = "sse4.1" |
85 | | ), |
86 | | target_arch = "aarch64" |
87 | | ))] |
88 | | { |
89 | | n == n.round_ties_even() |
90 | | } |
91 | | #[cfg(not(any( |
92 | | all( |
93 | | any(target_arch = "x86", target_arch = "x86_64"), |
94 | | target_feature = "sse4.1" |
95 | | ), |
96 | | target_arch = "aarch64" |
97 | | )))] |
98 | | { |
99 | | use crate::bits::EXP_MASK; |
100 | 0 | let x_u = n.to_bits(); |
101 | 0 | let x_e = (x_u & EXP_MASK) >> 52; |
102 | 0 | let lsb = (x_u | EXP_MASK).trailing_zeros(); |
103 | | const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64; |
104 | | |
105 | | const UNIT_EXPONENT: u64 = E_BIAS + 52; |
106 | 0 | x_e + lsb as u64 >= UNIT_EXPONENT |
107 | | } |
108 | 0 | } |
109 | | |
110 | | #[inline(always)] |
111 | | #[allow(unused)] |
112 | 0 | pub(crate) fn is_odd_integer_fast(x: f64) -> bool { |
113 | 0 | unsafe { (x.to_int_unchecked::<i64>() & 1) != 0 } |
114 | 0 | } |
115 | | |
116 | | #[inline(always)] |
117 | | #[allow(unused)] |
118 | 0 | pub(crate) fn is_odd_integerf_fast(x: f32) -> bool { |
119 | 0 | unsafe { (x.to_int_unchecked::<i32>() & 1) != 0 } |
120 | 0 | } |
121 | | |
122 | | #[inline(always)] |
123 | 0 | pub(crate) fn is_odd_integer(x: f64) -> bool { |
124 | | #[cfg(any( |
125 | | all( |
126 | | any(target_arch = "x86", target_arch = "x86_64"), |
127 | | target_feature = "sse4.1" |
128 | | ), |
129 | | target_arch = "aarch64" |
130 | | ))] |
131 | | { |
132 | | (x as i64 & 1) != 0 |
133 | | } |
134 | | #[cfg(not(any( |
135 | | all( |
136 | | any(target_arch = "x86", target_arch = "x86_64"), |
137 | | target_feature = "sse4.1" |
138 | | ), |
139 | | target_arch = "aarch64" |
140 | | )))] |
141 | | { |
142 | | use crate::bits::EXP_MASK; |
143 | 0 | let x_u = x.to_bits(); |
144 | 0 | let x_e = (x_u & EXP_MASK) >> 52; |
145 | 0 | let lsb = (x_u | EXP_MASK).trailing_zeros(); |
146 | | const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64; |
147 | | |
148 | | const UNIT_EXPONENT: u64 = E_BIAS + 52; |
149 | 0 | x_e + lsb as u64 == UNIT_EXPONENT |
150 | | } |
151 | 0 | } |
152 | | |
153 | | #[cfg(any( |
154 | | all( |
155 | | any(target_arch = "x86", target_arch = "x86_64"), |
156 | | target_feature = "fma" |
157 | | ), |
158 | | target_arch = "aarch64" |
159 | | ))] |
160 | | #[inline(always)] |
161 | | pub(crate) fn mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>( |
162 | | acc: T, |
163 | | a: T, |
164 | | b: T, |
165 | | ) -> T { |
166 | | MulAdd::mul_add(a, b, acc) |
167 | | } |
168 | | |
169 | | #[inline(always)] |
170 | | #[cfg(not(any( |
171 | | all( |
172 | | any(target_arch = "x86", target_arch = "x86_64"), |
173 | | target_feature = "fma" |
174 | | ), |
175 | | target_arch = "aarch64" |
176 | | )))] |
177 | 0 | pub(crate) fn mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>( |
178 | 0 | acc: T, |
179 | 0 | a: T, |
180 | 0 | b: T, |
181 | 0 | ) -> T { |
182 | 0 | acc + a * b |
183 | 0 | } |
184 | | |
185 | | #[inline] |
186 | 0 | pub(crate) const fn rintfk(x: f32) -> f32 { |
187 | 0 | (if x < 0. { x - 0.5 } else { x + 0.5 }) as i32 as f32 |
188 | 0 | } |
189 | | |
190 | | #[inline(always)] |
191 | 0 | pub(crate) const fn fmlaf(a: f32, b: f32, c: f32) -> f32 { |
192 | 0 | c + a * b |
193 | 0 | } |
194 | | |
195 | | #[inline(always)] |
196 | 0 | pub(crate) fn f_fmlaf(a: f32, b: f32, c: f32) -> f32 { |
197 | | #[cfg(any( |
198 | | all( |
199 | | any(target_arch = "x86", target_arch = "x86_64"), |
200 | | target_feature = "fma" |
201 | | ), |
202 | | target_arch = "aarch64" |
203 | | ))] |
204 | | { |
205 | | f32::mul_add(a, b, c) |
206 | | } |
207 | | #[cfg(not(any( |
208 | | all( |
209 | | any(target_arch = "x86", target_arch = "x86_64"), |
210 | | target_feature = "fma" |
211 | | ), |
212 | | target_arch = "aarch64" |
213 | | )))] |
214 | | { |
215 | 0 | a * b + c |
216 | | } |
217 | 0 | } |
218 | | |
219 | | /// Optional FMA, if it is available hardware FMA will use, if not then just scalar `c + a * b` |
220 | | #[inline(always)] |
221 | 0 | pub(crate) fn f_fmla(a: f64, b: f64, c: f64) -> f64 { |
222 | | #[cfg(any( |
223 | | all( |
224 | | any(target_arch = "x86", target_arch = "x86_64"), |
225 | | target_feature = "fma" |
226 | | ), |
227 | | target_arch = "aarch64" |
228 | | ))] |
229 | | { |
230 | | f64::mul_add(a, b, c) |
231 | | } |
232 | | #[cfg(not(any( |
233 | | all( |
234 | | any(target_arch = "x86", target_arch = "x86_64"), |
235 | | target_feature = "fma" |
236 | | ), |
237 | | target_arch = "aarch64" |
238 | | )))] |
239 | | { |
240 | 0 | a * b + c |
241 | | } |
242 | 0 | } |
243 | | |
244 | | #[inline(always)] |
245 | 0 | pub(crate) const fn fmla(a: f64, b: f64, c: f64) -> f64 { |
246 | 0 | c + a * b |
247 | 0 | } |
248 | | |
249 | | /// Executes mandatory FMA |
250 | | /// if not available will be simulated through Dekker and Veltkamp |
251 | | #[inline(always)] |
252 | 0 | pub(crate) fn dd_fmla(a: f64, b: f64, c: f64) -> f64 { |
253 | | #[cfg(any( |
254 | | all( |
255 | | any(target_arch = "x86", target_arch = "x86_64"), |
256 | | target_feature = "fma" |
257 | | ), |
258 | | target_arch = "aarch64" |
259 | | ))] |
260 | | { |
261 | | f_fmla(a, b, c) |
262 | | } |
263 | | #[cfg(not(any( |
264 | | all( |
265 | | any(target_arch = "x86", target_arch = "x86_64"), |
266 | | target_feature = "fma" |
267 | | ), |
268 | | target_arch = "aarch64" |
269 | | )))] |
270 | | { |
271 | | use crate::double_double::DoubleDouble; |
272 | 0 | DoubleDouble::dd_f64_mul_add(a, b, c) |
273 | | } |
274 | 0 | } |
275 | | |
276 | | // Executes mandatory FMA |
277 | | // if not available will be simulated through dyadic float 128 |
278 | | #[inline(always)] |
279 | 0 | pub(crate) fn dyad_fmla(a: f64, b: f64, c: f64) -> f64 { |
280 | | #[cfg(any( |
281 | | all( |
282 | | any(target_arch = "x86", target_arch = "x86_64"), |
283 | | target_feature = "fma" |
284 | | ), |
285 | | target_arch = "aarch64" |
286 | | ))] |
287 | | { |
288 | | f_fmla(a, b, c) |
289 | | } |
290 | | #[cfg(not(any( |
291 | | all( |
292 | | any(target_arch = "x86", target_arch = "x86_64"), |
293 | | target_feature = "fma" |
294 | | ), |
295 | | target_arch = "aarch64" |
296 | | )))] |
297 | | { |
298 | | use crate::dyadic_float::DyadicFloat128; |
299 | 0 | let z = DyadicFloat128::new_from_f64(a); |
300 | 0 | let k = DyadicFloat128::new_from_f64(b); |
301 | 0 | let p = z * k + DyadicFloat128::new_from_f64(c); |
302 | 0 | p.fast_as_f64() |
303 | | } |
304 | 0 | } |
305 | | |
306 | | // Executes mandatory FMA |
307 | | // if not available will be simulated through Dekker and Veltkamp |
308 | | #[inline(always)] |
309 | | #[allow(unused)] |
310 | 0 | pub(crate) fn dd_fmlaf(a: f32, b: f32, c: f32) -> f32 { |
311 | | #[cfg(any( |
312 | | all( |
313 | | any(target_arch = "x86", target_arch = "x86_64"), |
314 | | target_feature = "fma" |
315 | | ), |
316 | | target_arch = "aarch64" |
317 | | ))] |
318 | | { |
319 | | f_fmlaf(a, b, c) |
320 | | } |
321 | | #[cfg(not(any( |
322 | | all( |
323 | | any(target_arch = "x86", target_arch = "x86_64"), |
324 | | target_feature = "fma" |
325 | | ), |
326 | | target_arch = "aarch64" |
327 | | )))] |
328 | | { |
329 | 0 | (a as f64 * b as f64 + c as f64) as f32 |
330 | | } |
331 | 0 | } |
332 | | |
333 | | #[allow(dead_code)] |
334 | | #[inline(always)] |
335 | 0 | pub(crate) fn c_mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>( |
336 | 0 | a: T, |
337 | 0 | b: T, |
338 | 0 | c: T, |
339 | 0 | ) -> T { |
340 | 0 | mlaf(c, a, b) |
341 | 0 | } |
342 | | |
343 | | /// Copies sign from `y` to `x` |
344 | | #[inline] |
345 | 0 | pub const fn copysignfk(x: f32, y: f32) -> f32 { |
346 | 0 | f32::from_bits((x.to_bits() & !(1 << 31)) ^ (y.to_bits() & (1 << 31))) |
347 | 0 | } |
348 | | |
349 | | // #[inline] |
350 | | // // Founds n in ln(đĽ)=ln(đ)+đln(2) |
351 | | // pub(crate) const fn ilogb2kf(d: f32) -> i32 { |
352 | | // (((d.to_bits() as i32) >> 23) & 0xff) - 0x7f |
353 | | // } |
354 | | // |
355 | | // #[inline] |
356 | | // // Founds a in x=a+đln(2) |
357 | | // pub(crate) const fn ldexp3kf(d: f32, n: i32) -> f32 { |
358 | | // f32::from_bits(((d.to_bits() as i32) + (n << 23)) as u32) |
359 | | // } |
360 | | |
361 | | #[inline] |
362 | 0 | pub(crate) const fn pow2if(q: i32) -> f32 { |
363 | 0 | f32::from_bits((q.wrapping_add(0x7f) as u32) << 23) |
364 | 0 | } Unexecuted instantiation: pxfm::common::pow2if Unexecuted instantiation: pxfm::common::pow2if |
365 | | |
366 | | /// Round towards whole integral number |
367 | | #[inline] |
368 | 0 | pub(crate) const fn rintk(x: f64) -> f64 { |
369 | 0 | (if x < 0. { x - 0.5 } else { x + 0.5 }) as i64 as f64 |
370 | 0 | } |
371 | | |
372 | | /// Computes 2^n |
373 | | #[inline(always)] |
374 | 0 | pub(crate) const fn pow2i(q: i32) -> f64 { |
375 | 0 | f64::from_bits((q.wrapping_add(0x3ff) as u64) << 52) |
376 | 0 | } |
377 | | |
378 | | // #[inline] |
379 | | // pub(crate) const fn ilogb2k(d: f64) -> i32 { |
380 | | // (((d.to_bits() >> 52) & 0x7ff) as i32) - 0x3ff |
381 | | // } |
382 | | // |
383 | | // #[inline] |
384 | | // pub(crate) const fn ldexp3k(d: f64, e: i32) -> f64 { |
385 | | // f64::from_bits(((d.to_bits() as i64) + ((e as i64) << 52)) as u64) |
386 | | // } |
387 | | |
388 | | /// Copies sign from `y` to `x` |
389 | | #[inline] |
390 | 0 | pub const fn copysignk(x: f64, y: f64) -> f64 { |
391 | 0 | f64::from_bits((x.to_bits() & !(1 << 63)) ^ (y.to_bits() & (1 << 63))) |
392 | 0 | } |
393 | | |
394 | | #[inline] |
395 | 0 | pub(crate) const fn min_normal_f64() -> f64 { |
396 | 0 | let exponent_bits = 1u64 << 52; |
397 | 0 | let bits = exponent_bits; |
398 | | |
399 | 0 | f64::from_bits(bits) |
400 | 0 | } |
401 | | |
402 | | #[inline] |
403 | 0 | const fn mask_trailing_ones_u32(len: u32) -> u32 { |
404 | 0 | if len >= 32 { |
405 | 0 | u32::MAX // All ones if length is 64 or more |
406 | | } else { |
407 | 0 | (1u32 << len).wrapping_sub(1) |
408 | | } |
409 | 0 | } |
410 | | |
411 | | pub(crate) const EXP_MASK_F32: u32 = mask_trailing_ones_u32(8) << 23; |
412 | | |
413 | | #[inline] |
414 | 0 | pub(crate) fn set_exponent_f32(x: u32, new_exp: u32) -> u32 { |
415 | 0 | let encoded_mask = new_exp.wrapping_shl(23) & EXP_MASK_F32; |
416 | 0 | x ^ ((x ^ encoded_mask) & EXP_MASK_F32) |
417 | 0 | } |
418 | | |
419 | | #[cfg(test)] |
420 | | mod tests { |
421 | | use super::*; |
422 | | #[test] |
423 | | fn test_is_integer() { |
424 | | assert_eq!(is_integer(5.), true); |
425 | | assert_eq!(is_integer(6.), true); |
426 | | assert_eq!(is_integer(6.01), false); |
427 | | assert_eq!(is_odd_integer(5.), true); |
428 | | assert_eq!(is_odd_integer(6.), false); |
429 | | assert_eq!(is_odd_integer(6.01), false); |
430 | | assert_eq!(is_integerf(5.), true); |
431 | | assert_eq!(is_integerf(6.), true); |
432 | | assert_eq!(is_integerf(6.01), false); |
433 | | assert_eq!(is_odd_integerf(5.), true); |
434 | | assert_eq!(is_odd_integerf(6.), false); |
435 | | assert_eq!(is_odd_integerf(6.01), false); |
436 | | } |
437 | | } |