Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2021, Leon Albrecht <leon2002.la@gmail.com>. |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #pragma once |
8 | | |
9 | | #include <AK/BuiltinWrappers.h> |
10 | | #include <AK/Concepts.h> |
11 | | #include <AK/FloatingPoint.h> |
12 | | #include <AK/NumericLimits.h> |
13 | | #include <AK/StdLibExtraDetails.h> |
14 | | #include <AK/Types.h> |
15 | | #include <math.h> |
16 | | |
17 | | #ifdef KERNEL |
18 | | # error "Including AK/Math.h from the Kernel is never correct! Floating point is disabled." |
19 | | #endif |
20 | | |
21 | | namespace AK { |
22 | | |
23 | | template<FloatingPoint T> |
24 | | constexpr T NaN = __builtin_nan(""); |
25 | | template<FloatingPoint T> |
26 | | constexpr T Infinity = __builtin_huge_vall(); |
27 | | template<FloatingPoint T> |
28 | | constexpr T Pi = 3.141592653589793238462643383279502884L; |
29 | | template<FloatingPoint T> |
30 | | constexpr T E = 2.718281828459045235360287471352662498L; |
31 | | template<FloatingPoint T> |
32 | | constexpr T Sqrt2 = 1.414213562373095048801688724209698079L; |
33 | | template<FloatingPoint T> |
34 | | constexpr T Sqrt1_2 = 0.707106781186547524400844362104849039L; |
35 | | |
36 | | template<FloatingPoint T> |
37 | | constexpr T L2_10 = 3.321928094887362347870319429489390175864L; |
38 | | template<FloatingPoint T> |
39 | | constexpr T L2_E = 1.442695040888963407359924681001892137L; |
40 | | |
41 | | namespace Details { |
42 | | template<size_t> |
43 | | constexpr size_t product_even(); |
44 | | template<> |
45 | 0 | constexpr size_t product_even<2>() { return 2; } |
46 | | template<size_t value> |
47 | 0 | constexpr size_t product_even() { return value * product_even<value - 2>(); }Unexecuted instantiation: unsigned long AK::Details::product_even<4ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<6ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<8ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<10ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<12ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<14ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<16ul>() |
48 | | |
49 | | template<size_t> |
50 | | constexpr size_t product_odd(); |
51 | | template<> |
52 | 0 | constexpr size_t product_odd<1>() { return 1; } |
53 | | template<size_t value> |
54 | 0 | constexpr size_t product_odd() { return value * product_odd<value - 2>(); }Unexecuted instantiation: unsigned long AK::Details::product_odd<3ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<5ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<7ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<9ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<11ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<13ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<15ul>() |
55 | | } |
56 | | |
57 | | template<FloatingPoint T> |
58 | | constexpr T to_radians(T degrees) |
59 | 0 | { |
60 | 0 | return degrees * AK::Pi<T> / 180; |
61 | 0 | } Unexecuted instantiation: _ZN2AK10to_radiansITkNS_8Concepts13FloatingPointEfEET_S2_ Unexecuted instantiation: _ZN2AK10to_radiansITkNS_8Concepts13FloatingPointEdEET_S2_ |
62 | | |
63 | | template<FloatingPoint T> |
64 | | constexpr T to_degrees(T radians) |
65 | 0 | { |
66 | 0 | return radians * 180 / AK::Pi<T>; |
67 | 0 | } Unexecuted instantiation: _ZN2AK10to_degreesITkNS_8Concepts13FloatingPointEfEET_S2_ Unexecuted instantiation: _ZN2AK10to_degreesITkNS_8Concepts13FloatingPointEdEET_S2_ |
68 | | |
69 | | template<FloatingPoint FloatT> |
70 | | FloatT copysign(FloatT x, FloatT y) |
71 | | { |
72 | | using Extractor = FloatExtractor<FloatT>; |
73 | | auto ex = Extractor::from_float(x); |
74 | | auto ey = Extractor::from_float(y); |
75 | | ex.sign = ey.sign; |
76 | | return ex.to_float(); |
77 | | } |
78 | | |
79 | | #define CALL_BUILTIN(function, args...) \ |
80 | 136k | do { \ |
81 | 136k | if constexpr (IsSame<T, long double>) \ |
82 | 136k | return __builtin_##function##l(args); \ |
83 | 136k | if constexpr (IsSame<T, double>) \ |
84 | 136k | return __builtin_##function(args); \ |
85 | 136k | if constexpr (IsSame<T, float>) \ |
86 | 136k | return __builtin_##function##f(args); \ |
87 | 136k | } while (0) |
88 | | |
89 | | #define CONSTEXPR_STATE(function, args...) \ |
90 | 113M | if (is_constant_evaluated()) \ |
91 | 113M | CALL_BUILTIN(function, args); |
92 | | |
93 | | #define AARCH64_INSTRUCTION(instruction, arg) \ |
94 | | if constexpr (IsSame<T, long double>) \ |
95 | | TODO(); \ |
96 | | if constexpr (IsSame<T, double>) { \ |
97 | | double res; \ |
98 | | asm(#instruction " %d0, %d1" \ |
99 | | : "=w"(res) \ |
100 | | : "w"(arg)); \ |
101 | | return res; \ |
102 | | } \ |
103 | | if constexpr (IsSame<T, float>) { \ |
104 | | float res; \ |
105 | | asm(#instruction " %s0, %s1" \ |
106 | | : "=w"(res) \ |
107 | | : "w"(arg)); \ |
108 | | return res; \ |
109 | | } |
110 | | |
111 | | template<FloatingPoint T> |
112 | | constexpr T fabs(T x) |
113 | 132k | { |
114 | | // Both GCC and Clang inline fabs by default, so this is just a cmath like wrapper |
115 | 132k | CALL_BUILTIN(fabs, x); |
116 | 132k | } _ZN2AK4fabsITkNS_8Concepts13FloatingPointEfEET_S2_ Line | Count | Source | 113 | 132k | { | 114 | | // Both GCC and Clang inline fabs by default, so this is just a cmath like wrapper | 115 | 132k | CALL_BUILTIN(fabs, x); | 116 | 132k | } |
Unexecuted instantiation: _ZN2AK4fabsITkNS_8Concepts13FloatingPointEdEET_S2_ Unexecuted instantiation: _ZN2AK4fabsITkNS_8Concepts13FloatingPointEdEET_S2_ |
117 | | |
118 | | namespace Rounding { |
119 | | template<FloatingPoint T> |
120 | | constexpr T ceil(T num) |
121 | 3.39k | { |
122 | | // FIXME: SSE4.1 rounds[sd] num, res, 0b110 |
123 | 3.39k | if (is_constant_evaluated()) { |
124 | 0 | if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max()) |
125 | 0 | return num; |
126 | 0 | return (static_cast<T>(static_cast<i64>(num)) == num) |
127 | 0 | ? static_cast<i64>(num) |
128 | 0 | : static_cast<i64>(num) + ((num > 0) ? 1 : 0); |
129 | 0 | } |
130 | | #if ARCH(AARCH64) |
131 | | AARCH64_INSTRUCTION(frintp, num); |
132 | | #else |
133 | 3.39k | CALL_BUILTIN(ceil, num); |
134 | 3.39k | #endif |
135 | 3.39k | } _ZN2AK8Rounding4ceilITkNS_8Concepts13FloatingPointEfEET_S3_ Line | Count | Source | 121 | 3.39k | { | 122 | | // FIXME: SSE4.1 rounds[sd] num, res, 0b110 | 123 | 3.39k | if (is_constant_evaluated()) { | 124 | 0 | if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max()) | 125 | 0 | return num; | 126 | 0 | return (static_cast<T>(static_cast<i64>(num)) == num) | 127 | 0 | ? static_cast<i64>(num) | 128 | 0 | : static_cast<i64>(num) + ((num > 0) ? 1 : 0); | 129 | 0 | } | 130 | | #if ARCH(AARCH64) | 131 | | AARCH64_INSTRUCTION(frintp, num); | 132 | | #else | 133 | 3.39k | CALL_BUILTIN(ceil, num); | 134 | 3.39k | #endif | 135 | 3.39k | } |
Unexecuted instantiation: _ZN2AK8Rounding4ceilITkNS_8Concepts13FloatingPointEdEET_S3_ |
136 | | |
137 | | template<FloatingPoint T> |
138 | | constexpr T floor(T num) |
139 | 0 | { |
140 | | // FIXME: SSE4.1 rounds[sd] num, res, 0b101 |
141 | 0 | if (is_constant_evaluated()) { |
142 | 0 | if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max()) |
143 | 0 | return num; |
144 | 0 | return (static_cast<T>(static_cast<i64>(num)) == num) |
145 | 0 | ? static_cast<i64>(num) |
146 | 0 | : static_cast<i64>(num) - ((num > 0) ? 0 : 1); |
147 | 0 | } |
148 | | #if ARCH(AARCH64) |
149 | | AARCH64_INSTRUCTION(frintm, num); |
150 | | #else |
151 | 0 | CALL_BUILTIN(floor, num); |
152 | 0 | #endif |
153 | 0 | } |
154 | | |
155 | | template<FloatingPoint T> |
156 | | constexpr T trunc(T num) |
157 | | { |
158 | | #if ARCH(AARCH64) |
159 | | if (is_constant_evaluated()) { |
160 | | if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max()) |
161 | | return num; |
162 | | return static_cast<T>(static_cast<i64>(num)); |
163 | | } |
164 | | AARCH64_INSTRUCTION(frintz, num); |
165 | | #endif |
166 | | // FIXME: Use dedicated instruction in the non constexpr case |
167 | | // SSE4.1: rounds[sd] %num, %res, 0b111 |
168 | | if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max()) |
169 | | return num; |
170 | | return static_cast<T>(static_cast<i64>(num)); |
171 | | } |
172 | | |
173 | | template<FloatingPoint T> |
174 | | constexpr T rint(T x) |
175 | | { |
176 | | CONSTEXPR_STATE(rint, x); |
177 | | // Note: This does break tie to even |
178 | | // But the behavior of frndint/rounds[ds]/frintx can be configured |
179 | | // through the floating point control registers. |
180 | | // FIXME: We should decide if we rename this to allow us to get away from |
181 | | // the configurability "burden" rint has |
182 | | // this would make us use `rounds[sd] %num, %res, 0b100` |
183 | | // and `frintn` respectively, |
184 | | // no such guaranteed round exists for x87 `frndint` |
185 | | #if ARCH(X86_64) |
186 | | # ifdef __SSE4_1__ |
187 | | if constexpr (IsSame<T, double>) { |
188 | | T r; |
189 | | asm( |
190 | | "roundsd %1, %0" |
191 | | : "=x"(r) |
192 | | : "x"(x)); |
193 | | return r; |
194 | | } |
195 | | if constexpr (IsSame<T, float>) { |
196 | | T r; |
197 | | asm( |
198 | | "roundss %1, %0" |
199 | | : "=x"(r) |
200 | | : "x"(x)); |
201 | | return r; |
202 | | } |
203 | | # else |
204 | | asm( |
205 | | "frndint" |
206 | | : "+t"(x)); |
207 | | return x; |
208 | | # endif |
209 | | #elif ARCH(AARCH64) |
210 | | AARCH64_INSTRUCTION(frintx, x); |
211 | | #elif ARCH(RISCV64) |
212 | | if (__builtin_isnan(x)) |
213 | | return x; |
214 | | |
215 | | // Floating point values have a gap size of >= 1 for values above 2^mantissa_bits - 1. |
216 | | if (fabs(x) > FloatExtractor<T>::mantissa_max) |
217 | | return x; |
218 | | |
219 | | if constexpr (IsSame<T, float>) { |
220 | | i64 r; |
221 | | asm("fcvt.l.s %0, %1, dyn" |
222 | | : "=r"(r) |
223 | | : "f"(x)); |
224 | | return copysign(static_cast<float>(r), x); |
225 | | } |
226 | | if constexpr (IsSame<T, double>) { |
227 | | i64 r; |
228 | | asm("fcvt.l.d %0, %1, dyn" |
229 | | : "=r"(r) |
230 | | : "f"(x)); |
231 | | return copysign(static_cast<double>(r), x); |
232 | | } |
233 | | if constexpr (IsSame<T, long double>) |
234 | | TODO_RISCV64(); |
235 | | #endif |
236 | | TODO(); |
237 | | } |
238 | | |
239 | | template<FloatingPoint T> |
240 | | constexpr T round(T x) |
241 | 0 | { |
242 | 0 | CONSTEXPR_STATE(round, x); |
243 | 0 | // Note: This is break-tie-away-from-zero, so not the hw's understanding of |
244 | 0 | // "nearest", which would be towards even. |
245 | 0 | if (x == 0.) |
246 | 0 | return x; |
247 | 0 | if (x > 0.) |
248 | 0 | return floor(x + .5); |
249 | 0 | return ceil(x - .5); |
250 | 0 | } |
251 | | |
252 | | template<Integral I, FloatingPoint P> |
253 | | ALWAYS_INLINE I round_to(P value); |
254 | | |
255 | | #if ARCH(X86_64) |
256 | | template<Integral I> |
257 | | ALWAYS_INLINE I round_to(long double value) |
258 | | { |
259 | | // Note: fistps outputs into a signed integer location (i16, i32, i64), |
260 | | // so lets be nice and tell the compiler that. |
261 | | Conditional<sizeof(I) >= sizeof(i16), MakeSigned<I>, i16> ret; |
262 | | if constexpr (sizeof(I) == sizeof(i64)) { |
263 | | asm("fistpll %0" |
264 | | : "=m"(ret) |
265 | | : "t"(value) |
266 | | : "st"); |
267 | | } else if constexpr (sizeof(I) == sizeof(i32)) { |
268 | | asm("fistpl %0" |
269 | | : "=m"(ret) |
270 | | : "t"(value) |
271 | | : "st"); |
272 | | } else { |
273 | | asm("fistps %0" |
274 | | : "=m"(ret) |
275 | | : "t"(value) |
276 | | : "st"); |
277 | | } |
278 | | return static_cast<I>(ret); |
279 | | } |
280 | | |
281 | | template<Integral I> |
282 | | ALWAYS_INLINE I round_to(float value) |
283 | 190M | { |
284 | | // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set, |
285 | | // if the value surpasses the i64 limit, even if the result could fit into an u64 |
286 | | // To solve this we would either need to detect that value or do a range check and |
287 | | // then do a more specialized conversion, which might include a division (which is expensive) |
288 | | if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) { |
289 | | i64 ret; |
290 | | asm("cvtss2si %1, %0" |
291 | | : "=r"(ret) |
292 | | : "xm"(value)); |
293 | | return static_cast<I>(ret); |
294 | | } |
295 | 190M | i32 ret; |
296 | 190M | asm("cvtss2si %1, %0" |
297 | 190M | : "=r"(ret) |
298 | 190M | : "xm"(value)); |
299 | 190M | return static_cast<I>(ret); |
300 | 190M | } _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEhEET_f Line | Count | Source | 283 | 190M | { | 284 | | // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set, | 285 | | // if the value surpasses the i64 limit, even if the result could fit into an u64 | 286 | | // To solve this we would either need to detect that value or do a range check and | 287 | | // then do a more specialized conversion, which might include a division (which is expensive) | 288 | | if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) { | 289 | | i64 ret; | 290 | | asm("cvtss2si %1, %0" | 291 | | : "=r"(ret) | 292 | | : "xm"(value)); | 293 | | return static_cast<I>(ret); | 294 | | } | 295 | 190M | i32 ret; | 296 | 190M | asm("cvtss2si %1, %0" | 297 | 190M | : "=r"(ret) | 298 | 190M | : "xm"(value)); | 299 | 190M | return static_cast<I>(ret); | 300 | 190M | } |
_ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEiEET_f Line | Count | Source | 283 | 15.3k | { | 284 | | // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set, | 285 | | // if the value surpasses the i64 limit, even if the result could fit into an u64 | 286 | | // To solve this we would either need to detect that value or do a range check and | 287 | | // then do a more specialized conversion, which might include a division (which is expensive) | 288 | | if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) { | 289 | | i64 ret; | 290 | | asm("cvtss2si %1, %0" | 291 | | : "=r"(ret) | 292 | | : "xm"(value)); | 293 | | return static_cast<I>(ret); | 294 | | } | 295 | 15.3k | i32 ret; | 296 | 15.3k | asm("cvtss2si %1, %0" | 297 | 15.3k | : "=r"(ret) | 298 | 15.3k | : "xm"(value)); | 299 | 15.3k | return static_cast<I>(ret); | 300 | 15.3k | } |
Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEsEET_f |
301 | | |
302 | | template<Integral I> |
303 | | ALWAYS_INLINE I round_to(double value) |
304 | 0 | { |
305 | | // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set, |
306 | | // if the value surpasses the i64 limit, even if the result could fit into an u64 |
307 | | // To solve this we would either need to detect that value or do a range check and |
308 | | // then do a more specialized conversion, which might include a division (which is expensive) |
309 | 0 | if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) { |
310 | 0 | i64 ret; |
311 | 0 | asm("cvtsd2si %1, %0" |
312 | 0 | : "=r"(ret) |
313 | 0 | : "xm"(value)); |
314 | 0 | return static_cast<I>(ret); |
315 | 0 | } |
316 | 0 | i32 ret; |
317 | 0 | asm("cvtsd2si %1, %0" |
318 | 0 | : "=r"(ret) |
319 | 0 | : "xm"(value)); |
320 | 0 | return static_cast<I>(ret); |
321 | 0 | } Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEiEET_d Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralElEET_d Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEhEET_d Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEmEET_d |
322 | | |
323 | | #elif ARCH(AARCH64) |
324 | | template<SignedIntegral I> |
325 | | ALWAYS_INLINE I round_to(float value) |
326 | | { |
327 | | if constexpr (sizeof(I) <= sizeof(u32)) { |
328 | | i32 res; |
329 | | asm("fcvtns %w0, %s1" |
330 | | : "=r"(res) |
331 | | : "w"(value)); |
332 | | return static_cast<I>(res); |
333 | | } |
334 | | i64 res; |
335 | | asm("fcvtns %0, %s1" |
336 | | : "=r"(res) |
337 | | : "w"(value)); |
338 | | return static_cast<I>(res); |
339 | | } |
340 | | |
341 | | template<SignedIntegral I> |
342 | | ALWAYS_INLINE I round_to(double value) |
343 | | { |
344 | | if constexpr (sizeof(I) <= sizeof(u32)) { |
345 | | i32 res; |
346 | | asm("fcvtns %w0, %d1" |
347 | | : "=r"(res) |
348 | | : "w"(value)); |
349 | | return static_cast<I>(res); |
350 | | } |
351 | | i64 res; |
352 | | asm("fcvtns %0, %d1" |
353 | | : "=r"(res) |
354 | | : "w"(value)); |
355 | | return static_cast<I>(res); |
356 | | } |
357 | | |
358 | | template<UnsignedIntegral U> |
359 | | ALWAYS_INLINE U round_to(float value) |
360 | | { |
361 | | if constexpr (sizeof(U) <= sizeof(u32)) { |
362 | | u32 res; |
363 | | asm("fcvtnu %w0, %s1" |
364 | | : "=r"(res) |
365 | | : "w"(value)); |
366 | | return static_cast<U>(res); |
367 | | } |
368 | | i64 res; |
369 | | asm("fcvtnu %0, %s1" |
370 | | : "=r"(res) |
371 | | : "w"(value)); |
372 | | return static_cast<U>(res); |
373 | | } |
374 | | |
375 | | template<UnsignedIntegral U> |
376 | | ALWAYS_INLINE U round_to(double value) |
377 | | { |
378 | | if constexpr (sizeof(U) <= sizeof(u32)) { |
379 | | u32 res; |
380 | | asm("fcvtns %w0, %d1" |
381 | | : "=r"(res) |
382 | | : "w"(value)); |
383 | | return static_cast<U>(res); |
384 | | } |
385 | | i64 res; |
386 | | asm("fcvtns %0, %d1" |
387 | | : "=r"(res) |
388 | | : "w"(value)); |
389 | | return static_cast<U>(res); |
390 | | } |
391 | | |
392 | | #else |
393 | | template<Integral I, FloatingPoint P> |
394 | | ALWAYS_INLINE I round_to(P value) |
395 | | { |
396 | | if constexpr (IsSame<P, long double>) |
397 | | return static_cast<I>(__builtin_llrintl(value)); |
398 | | if constexpr (IsSame<P, double>) |
399 | | return static_cast<I>(__builtin_llrint(value)); |
400 | | if constexpr (IsSame<P, float>) |
401 | | return static_cast<I>(__builtin_llrintf(value)); |
402 | | } |
403 | | #endif |
404 | | |
405 | | } |
406 | | |
407 | | using Rounding::ceil; |
408 | | using Rounding::floor; |
409 | | using Rounding::rint; |
410 | | using Rounding::round; |
411 | | using Rounding::round_to; |
412 | | using Rounding::trunc; |
413 | | |
414 | | namespace Division { |
415 | | template<FloatingPoint T> |
416 | | constexpr T fmod(T x, T y) |
417 | | { |
418 | | CONSTEXPR_STATE(fmod, x, y); |
419 | | |
420 | | #if ARCH(X86_64) |
421 | | u16 fpu_status; |
422 | | do { |
423 | | asm( |
424 | | "fprem\n" |
425 | | "fnstsw %%ax\n" |
426 | | : "+t"(x), "=a"(fpu_status) |
427 | | : "u"(y)); |
428 | | } while (fpu_status & 0x400); |
429 | | return x; |
430 | | #else |
431 | | # if defined(AK_OS_SERENITY) |
432 | | // FIXME: This is a very naive implementation. |
433 | | |
434 | | if (__builtin_isnan(x)) |
435 | | return x; |
436 | | if (__builtin_isnan(y)) |
437 | | return y; |
438 | | |
439 | | // SPECIAL VALUES |
440 | | // fmod(±0, y) returns ±0 if y is neither 0 nor NaN. |
441 | | if (x == 0 && y != 0) |
442 | | return x; |
443 | | |
444 | | // fmod(x, y) returns a NaN and raises the "invalid" floating-point exception for x infinite or y zero. |
445 | | // FIXME: Exception. |
446 | | if (__builtin_isinf(x) || y == 0) |
447 | | return NaN<T>; |
448 | | |
449 | | // fmod(x, ±infinity) returns x for x not infinite. |
450 | | if (__builtin_isinf(y)) |
451 | | return x; |
452 | | |
453 | | // Range reduction: handle negative x and y. |
454 | | if (y < 0) |
455 | | return fmod(x, -y); |
456 | | if (x < 0) |
457 | | return -fmod(-x, y); |
458 | | |
459 | | // x is x_mantissa * 2**x_exponent, y is y_mantissa * 2**y_exponent. |
460 | | // (Both are positive at this point.) |
461 | | // If y_exponent > x_exponent, we are done and can return x. |
462 | | // If y_exponent == x_exponent, we can return (x_mantissa % y_mantissa) * 2**x_exponent. |
463 | | // If y_exponent < x_exponent, we'll iteratively reduce x_exponent by shifting from |
464 | | // the exponent into the mantissa. |
465 | | |
466 | | auto x_bits = FloatExtractor<T>::from_float(x); |
467 | | typename FloatExtractor<T>::ComponentType x_exponent = x_bits.exponent; |
468 | | |
469 | | auto y_bits = FloatExtractor<T>::from_float(y); |
470 | | typename FloatExtractor<T>::ComponentType y_exponent = y_bits.exponent; |
471 | | |
472 | | // FIXME: Handle denormals. For now, treat them as 0. |
473 | | if (x_exponent == 0 && y_exponent != 0) |
474 | | return 0; |
475 | | if (y_exponent == 0) |
476 | | return NaN<T>; |
477 | | |
478 | | if (y_exponent > x_exponent) |
479 | | return x; |
480 | | |
481 | | // FIXME: This is wrong for f80. |
482 | | typename FloatExtractor<T>::ComponentType implicit_mantissa_bit = 1; |
483 | | implicit_mantissa_bit <<= FloatExtractor<T>::mantissa_bits; |
484 | | |
485 | | typename FloatExtractor<T>::ComponentType x_mantissa = x_bits.mantissa | implicit_mantissa_bit; |
486 | | typename FloatExtractor<T>::ComponentType y_mantissa = y_bits.mantissa | implicit_mantissa_bit; |
487 | | |
488 | | while (y_exponent < x_exponent) { |
489 | | // This is ok because (x % (y * 2**n)) divides (x % y) for all n > 0. |
490 | | x_mantissa %= y_mantissa; |
491 | | |
492 | | x_mantissa <<= 1; |
493 | | --x_exponent; |
494 | | } |
495 | | |
496 | | x_mantissa %= y_mantissa; |
497 | | |
498 | | if (x_mantissa == 0) |
499 | | return 0; |
500 | | |
501 | | // We're done and want to return x_mantissa * 2 ** x_exponent. |
502 | | // But x_mantissa might not have a leading 1 bit, so we have to realign first. |
503 | | // Mantissa is mantissa_bits long, count_leading_zeroes() counts in ComponentType, adjust: |
504 | | auto const always_zero_bits = sizeof(typename FloatExtractor<T>::ComponentType) * 8 - (FloatExtractor<T>::mantissa_bits + 1); // +1 for implicit 1 bit |
505 | | auto shift = count_leading_zeroes(x_mantissa) - always_zero_bits; |
506 | | |
507 | | if (x_exponent < shift) { |
508 | | // FIXME: Make a real denormal. |
509 | | return 0; |
510 | | } |
511 | | |
512 | | x_mantissa <<= shift; |
513 | | x_exponent -= shift; |
514 | | |
515 | | x_bits.exponent = x_exponent; |
516 | | x_bits.mantissa = x_mantissa; |
517 | | return x_bits.to_float(); |
518 | | # else |
519 | | CALL_BUILTIN(fmod, x, y); |
520 | | # endif |
521 | | #endif |
522 | | } |
523 | | |
524 | | template<FloatingPoint T> |
525 | | constexpr T remainder(T x, T y) |
526 | | { |
527 | | CONSTEXPR_STATE(remainder, x, y); |
528 | | |
529 | | #if ARCH(X86_64) |
530 | | u16 fpu_status; |
531 | | do { |
532 | | asm( |
533 | | "fprem1\n" |
534 | | "fnstsw %%ax\n" |
535 | | : "+t"(x), "=a"(fpu_status) |
536 | | : "u"(y)); |
537 | | } while (fpu_status & 0x400); |
538 | | return x; |
539 | | #else |
540 | | # if defined(AK_OS_SERENITY) |
541 | | // TODO: Add implementation for this function. |
542 | | TODO(); |
543 | | # endif |
544 | | CALL_BUILTIN(remainder, x, y); |
545 | | #endif |
546 | | } |
547 | | } |
548 | | |
549 | | using Division::fmod; |
550 | | using Division::remainder; |
551 | | |
552 | | template<FloatingPoint T> |
553 | | constexpr T sqrt(T x) |
554 | 43.7M | { |
555 | 43.7M | CONSTEXPR_STATE(sqrt, x); |
556 | | |
557 | 43.7M | #if ARCH(X86_64) |
558 | 43.7M | if constexpr (IsSame<T, float>) { |
559 | 40.9M | float res; |
560 | 40.9M | asm("sqrtss %1, %0" |
561 | 40.9M | : "=x"(res) |
562 | 40.9M | : "x"(x)); |
563 | 40.9M | return res; |
564 | 40.9M | } |
565 | 2.84M | if constexpr (IsSame<T, double>) { |
566 | 2.84M | double res; |
567 | 2.84M | asm("sqrtsd %1, %0" |
568 | 2.84M | : "=x"(res) |
569 | 2.84M | : "x"(x)); |
570 | 2.84M | return res; |
571 | 2.84M | } |
572 | 0 | T res; |
573 | 43.7M | asm("fsqrt" |
574 | 43.7M | : "=t"(res) |
575 | 43.7M | : "0"(x)); |
576 | 43.7M | return res; |
577 | | #elif ARCH(AARCH64) |
578 | | AARCH64_INSTRUCTION(fsqrt, x); |
579 | | #elif ARCH(RISCV64) |
580 | | if constexpr (IsSame<T, float>) { |
581 | | float res; |
582 | | asm("fsqrt.s %0, %1" |
583 | | : "=f"(res) |
584 | | : "f"(x)); |
585 | | return res; |
586 | | } |
587 | | if constexpr (IsSame<T, double>) { |
588 | | double res; |
589 | | asm("fsqrt.d %0, %1" |
590 | | : "=f"(res) |
591 | | : "f"(x)); |
592 | | return res; |
593 | | } |
594 | | if constexpr (IsSame<T, long double>) |
595 | | TODO_RISCV64(); |
596 | | #else |
597 | | # if defined(AK_OS_SERENITY) |
598 | | // TODO: Add implementation for this function. |
599 | | TODO(); |
600 | | # endif |
601 | | CALL_BUILTIN(sqrt, x); |
602 | | #endif |
603 | 43.7M | } _ZN2AK4sqrtITkNS_8Concepts13FloatingPointEfEET_S2_ Line | Count | Source | 554 | 40.9M | { | 555 | 40.9M | CONSTEXPR_STATE(sqrt, x); | 556 | | | 557 | 40.9M | #if ARCH(X86_64) | 558 | 40.9M | if constexpr (IsSame<T, float>) { | 559 | 40.9M | float res; | 560 | 40.9M | asm("sqrtss %1, %0" | 561 | 40.9M | : "=x"(res) | 562 | 40.9M | : "x"(x)); | 563 | 40.9M | return res; | 564 | 40.9M | } | 565 | | if constexpr (IsSame<T, double>) { | 566 | | double res; | 567 | | asm("sqrtsd %1, %0" | 568 | | : "=x"(res) | 569 | | : "x"(x)); | 570 | | return res; | 571 | | } | 572 | 40.9M | T res; | 573 | 40.9M | asm("fsqrt" | 574 | 40.9M | : "=t"(res) | 575 | 40.9M | : "0"(x)); | 576 | 40.9M | return res; | 577 | | #elif ARCH(AARCH64) | 578 | | AARCH64_INSTRUCTION(fsqrt, x); | 579 | | #elif ARCH(RISCV64) | 580 | | if constexpr (IsSame<T, float>) { | 581 | | float res; | 582 | | asm("fsqrt.s %0, %1" | 583 | | : "=f"(res) | 584 | | : "f"(x)); | 585 | | return res; | 586 | | } | 587 | | if constexpr (IsSame<T, double>) { | 588 | | double res; | 589 | | asm("fsqrt.d %0, %1" | 590 | | : "=f"(res) | 591 | | : "f"(x)); | 592 | | return res; | 593 | | } | 594 | | if constexpr (IsSame<T, long double>) | 595 | | TODO_RISCV64(); | 596 | | #else | 597 | | # if defined(AK_OS_SERENITY) | 598 | | // TODO: Add implementation for this function. | 599 | | TODO(); | 600 | | # endif | 601 | | CALL_BUILTIN(sqrt, x); | 602 | | #endif | 603 | 40.9M | } |
_ZN2AK4sqrtITkNS_8Concepts13FloatingPointEdEET_S2_ Line | Count | Source | 554 | 2.84M | { | 555 | 2.84M | CONSTEXPR_STATE(sqrt, x); | 556 | | | 557 | 2.84M | #if ARCH(X86_64) | 558 | | if constexpr (IsSame<T, float>) { | 559 | | float res; | 560 | | asm("sqrtss %1, %0" | 561 | | : "=x"(res) | 562 | | : "x"(x)); | 563 | | return res; | 564 | | } | 565 | 2.84M | if constexpr (IsSame<T, double>) { | 566 | 2.84M | double res; | 567 | 2.84M | asm("sqrtsd %1, %0" | 568 | 2.84M | : "=x"(res) | 569 | 2.84M | : "x"(x)); | 570 | 2.84M | return res; | 571 | 2.84M | } | 572 | 0 | T res; | 573 | 2.84M | asm("fsqrt" | 574 | 2.84M | : "=t"(res) | 575 | 2.84M | : "0"(x)); | 576 | 2.84M | return res; | 577 | | #elif ARCH(AARCH64) | 578 | | AARCH64_INSTRUCTION(fsqrt, x); | 579 | | #elif ARCH(RISCV64) | 580 | | if constexpr (IsSame<T, float>) { | 581 | | float res; | 582 | | asm("fsqrt.s %0, %1" | 583 | | : "=f"(res) | 584 | | : "f"(x)); | 585 | | return res; | 586 | | } | 587 | | if constexpr (IsSame<T, double>) { | 588 | | double res; | 589 | | asm("fsqrt.d %0, %1" | 590 | | : "=f"(res) | 591 | | : "f"(x)); | 592 | | return res; | 593 | | } | 594 | | if constexpr (IsSame<T, long double>) | 595 | | TODO_RISCV64(); | 596 | | #else | 597 | | # if defined(AK_OS_SERENITY) | 598 | | // TODO: Add implementation for this function. | 599 | | TODO(); | 600 | | # endif | 601 | | CALL_BUILTIN(sqrt, x); | 602 | | #endif | 603 | 2.84M | } |
|
604 | | |
605 | | template<FloatingPoint T> |
606 | | constexpr T cbrt(T x) |
607 | | { |
608 | | CONSTEXPR_STATE(cbrt, x); |
609 | | if (__builtin_isinf(x) || x == 0) |
610 | | return x; |
611 | | if (x < 0) |
612 | | return -cbrt(-x); |
613 | | |
614 | | T r = x; |
615 | | T ex = 0; |
616 | | |
617 | | while (r < 0.125l) { |
618 | | r *= 8; |
619 | | ex--; |
620 | | } |
621 | | while (r > 1.0l) { |
622 | | r *= 0.125l; |
623 | | ex++; |
624 | | } |
625 | | |
626 | | r = (-0.46946116l * r + 1.072302l) * r + 0.3812513l; |
627 | | |
628 | | while (ex < 0) { |
629 | | r *= 0.5l; |
630 | | ex++; |
631 | | } |
632 | | while (ex > 0) { |
633 | | r *= 2.0l; |
634 | | ex--; |
635 | | } |
636 | | |
637 | | r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r); |
638 | | r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r); |
639 | | r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r); |
640 | | r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r); |
641 | | |
642 | | return r; |
643 | | } |
644 | | |
645 | | namespace Trigonometry { |
646 | | |
647 | | template<FloatingPoint T> |
648 | | constexpr T hypot(T x, T y) |
649 | 21.1M | { |
650 | 21.1M | return sqrt(x * x + y * y); |
651 | 21.1M | } |
652 | | |
653 | | template<FloatingPoint T> |
654 | | constexpr T sin(T angle) |
655 | 19.7M | { |
656 | 19.7M | CONSTEXPR_STATE(sin, angle); |
657 | | |
658 | 19.7M | #if ARCH(X86_64) |
659 | 19.7M | T ret; |
660 | 19.7M | asm( |
661 | 19.7M | "fsin" |
662 | 19.7M | : "=t"(ret) |
663 | 19.7M | : "0"(angle)); |
664 | 19.7M | return ret; |
665 | | #else |
666 | | # if defined(AK_OS_SERENITY) |
667 | | T sign = 1; |
668 | | if (angle < 0) { |
669 | | angle = -angle; |
670 | | sign = -1; |
671 | | } |
672 | | |
673 | | if (angle >= 2 * Pi<T>) |
674 | | angle = fmod(angle, 2 * Pi<T>); |
675 | | |
676 | | if (angle >= Pi<T>) { |
677 | | angle = angle - Pi<T>; |
678 | | sign = -sign; |
679 | | } |
680 | | |
681 | | if (angle > Pi<T> / 2) |
682 | | angle = Pi<T> - angle; |
683 | | |
684 | | // https://github.com/samhocevar/lolremez/wiki/Tutorial-4-of-5%3A-fixing-lower-order-parameters |
685 | | auto f = [](T x) { |
686 | | if constexpr (IsSame<T, f32>) { |
687 | | // lolremez --float --degree 3 --range "1e-50:pi*pi/4" "(sin(sqrt(x))-sqrt(x))/(x*sqrt(x))" "1/(x*sqrt(x))" |
688 | | // "Estimated max error: 4.618689e-9" |
689 | | f32 u = 2.6000548e-06f; |
690 | | u = u * x + -0.00019806615f; |
691 | | u = u * x + 0.0083330171f; |
692 | | return u * x + -0.16666657f; |
693 | | } else { |
694 | | // FIXME: Could do something custom for long double. |
695 | | // lolremez --degree 6 --range "1e-50:pi*pi/4" "(sin(sqrt(x))-sqrt(x))/(x*sqrt(x))" "1/(x*sqrt(x))" |
696 | | // "Estimated max error: 1.1015766629825144e-16" |
697 | | T u = -7.3646464502210486e-13; |
698 | | u = u * x + 1.6047301196685753e-10; |
699 | | u = u * x + -2.5051851497012596e-08; |
700 | | u = u * x + 2.7557316077007725e-06; |
701 | | u = u * x + -0.00019841269820094207; |
702 | | u = u * x + 0.0083333333332628792; |
703 | | return u * x + -0.16666666666665811; |
704 | | } |
705 | | }; |
706 | | T angle_squared = angle * angle; |
707 | | return sign * (angle + angle * angle_squared * f(angle_squared)); |
708 | | # else |
709 | | CALL_BUILTIN(sin, angle); |
710 | | # endif |
711 | | #endif |
712 | 19.7M | } _ZN2AK12Trigonometry3sinITkNS_8Concepts13FloatingPointEfEET_S3_ Line | Count | Source | 655 | 19.7M | { | 656 | 19.7M | CONSTEXPR_STATE(sin, angle); | 657 | | | 658 | 19.7M | #if ARCH(X86_64) | 659 | 19.7M | T ret; | 660 | 19.7M | asm( | 661 | 19.7M | "fsin" | 662 | 19.7M | : "=t"(ret) | 663 | 19.7M | : "0"(angle)); | 664 | 19.7M | return ret; | 665 | | #else | 666 | | # if defined(AK_OS_SERENITY) | 667 | | T sign = 1; | 668 | | if (angle < 0) { | 669 | | angle = -angle; | 670 | | sign = -1; | 671 | | } | 672 | | | 673 | | if (angle >= 2 * Pi<T>) | 674 | | angle = fmod(angle, 2 * Pi<T>); | 675 | | | 676 | | if (angle >= Pi<T>) { | 677 | | angle = angle - Pi<T>; | 678 | | sign = -sign; | 679 | | } | 680 | | | 681 | | if (angle > Pi<T> / 2) | 682 | | angle = Pi<T> - angle; | 683 | | | 684 | | // https://github.com/samhocevar/lolremez/wiki/Tutorial-4-of-5%3A-fixing-lower-order-parameters | 685 | | auto f = [](T x) { | 686 | | if constexpr (IsSame<T, f32>) { | 687 | | // lolremez --float --degree 3 --range "1e-50:pi*pi/4" "(sin(sqrt(x))-sqrt(x))/(x*sqrt(x))" "1/(x*sqrt(x))" | 688 | | // "Estimated max error: 4.618689e-9" | 689 | | f32 u = 2.6000548e-06f; | 690 | | u = u * x + -0.00019806615f; | 691 | | u = u * x + 0.0083330171f; | 692 | | return u * x + -0.16666657f; | 693 | | } else { | 694 | | // FIXME: Could do something custom for long double. | 695 | | // lolremez --degree 6 --range "1e-50:pi*pi/4" "(sin(sqrt(x))-sqrt(x))/(x*sqrt(x))" "1/(x*sqrt(x))" | 696 | | // "Estimated max error: 1.1015766629825144e-16" | 697 | | T u = -7.3646464502210486e-13; | 698 | | u = u * x + 1.6047301196685753e-10; | 699 | | u = u * x + -2.5051851497012596e-08; | 700 | | u = u * x + 2.7557316077007725e-06; | 701 | | u = u * x + -0.00019841269820094207; | 702 | | u = u * x + 0.0083333333332628792; | 703 | | return u * x + -0.16666666666665811; | 704 | | } | 705 | | }; | 706 | | T angle_squared = angle * angle; | 707 | | return sign * (angle + angle * angle_squared * f(angle_squared)); | 708 | | # else | 709 | | CALL_BUILTIN(sin, angle); | 710 | | # endif | 711 | | #endif | 712 | 19.7M | } |
Unexecuted instantiation: _ZN2AK12Trigonometry3sinITkNS_8Concepts13FloatingPointEdEET_S3_ |
713 | | |
714 | | template<FloatingPoint T> |
715 | | constexpr T cos(T angle) |
716 | 5.77k | { |
717 | 5.77k | CONSTEXPR_STATE(cos, angle); |
718 | | |
719 | 5.77k | #if ARCH(X86_64) |
720 | 5.77k | T ret; |
721 | 5.77k | asm( |
722 | 5.77k | "fcos" |
723 | 5.77k | : "=t"(ret) |
724 | 5.77k | : "0"(angle)); |
725 | 5.77k | return ret; |
726 | | #else |
727 | | # if defined(AK_OS_SERENITY) |
728 | | if (angle < 0) |
729 | | angle = -angle; |
730 | | |
731 | | if (angle >= 2 * Pi<T>) |
732 | | angle = fmod(angle, 2 * Pi<T>); |
733 | | |
734 | | T sign = 1; |
735 | | if (angle >= Pi<T>) { |
736 | | angle = angle - Pi<T>; |
737 | | sign = -1; |
738 | | } |
739 | | |
740 | | if (angle > Pi<T> / 2) { |
741 | | angle = Pi<T> - angle; |
742 | | sign = -sign; |
743 | | } |
744 | | |
745 | | // https://github.com/samhocevar/lolremez/wiki/Tutorial-3-of-5:-changing-variables-for-simpler-polynomials |
746 | | // for cos(x): cos(x) - 1 is a function of x^2 terms, so we do the substitution on that page like |
747 | | // max | cos(x) - 1 - x^2 Q(x^2) | and then set y = x^2. That yields: |
748 | | // max | (cos(sqrt(y)) - 1) - y Q(y) |. Dividing through with y (instead of sqrt(y) as in the sin() case) gives us: |
749 | | auto f = [](T x) { |
750 | | if constexpr (IsSame<T, f32>) { |
751 | | // lolremez --float --degree 3 --range "1e-50:pi*pi/4" "(cos(sqrt(x)) - 1)/x" "1/x" |
752 | | // "Estimated max error: 5.2720126e-8" |
753 | | f32 u = 2.3194387e-05f; |
754 | | u = u * x + -0.0013855927f; |
755 | | u = u * x + 0.041663989f; |
756 | | return u * x + -0.49999931f; |
757 | | } else { |
758 | | // lolremez --degree 6 --range "1e-50:pi*pi/4" "(cos(sqrt(x)) - 1)/x" "1/x" |
759 | | // "Estimated max error: 2.0880269759116624e-15" |
760 | | T u = -1.101249182846601e-11; |
761 | | u = u * x + 2.0858735176345955e-09; |
762 | | u = u * x + -2.7556950755056579e-07; |
763 | | u = u * x + 2.4801583156341611e-05; |
764 | | u = u * x + -0.001388888886393419; |
765 | | u = u * x + 0.041666666665954213; |
766 | | return u * x + -0.49999999999993117; |
767 | | } |
768 | | }; |
769 | | T angle_squared = angle * angle; |
770 | | return sign * (1 + angle_squared * f(angle_squared)); |
771 | | # else |
772 | | CALL_BUILTIN(cos, angle); |
773 | | # endif |
774 | | #endif |
775 | 5.77k | } _ZN2AK12Trigonometry3cosITkNS_8Concepts13FloatingPointEfEET_S3_ Line | Count | Source | 716 | 5.77k | { | 717 | 5.77k | CONSTEXPR_STATE(cos, angle); | 718 | | | 719 | 5.77k | #if ARCH(X86_64) | 720 | 5.77k | T ret; | 721 | 5.77k | asm( | 722 | 5.77k | "fcos" | 723 | 5.77k | : "=t"(ret) | 724 | 5.77k | : "0"(angle)); | 725 | 5.77k | return ret; | 726 | | #else | 727 | | # if defined(AK_OS_SERENITY) | 728 | | if (angle < 0) | 729 | | angle = -angle; | 730 | | | 731 | | if (angle >= 2 * Pi<T>) | 732 | | angle = fmod(angle, 2 * Pi<T>); | 733 | | | 734 | | T sign = 1; | 735 | | if (angle >= Pi<T>) { | 736 | | angle = angle - Pi<T>; | 737 | | sign = -1; | 738 | | } | 739 | | | 740 | | if (angle > Pi<T> / 2) { | 741 | | angle = Pi<T> - angle; | 742 | | sign = -sign; | 743 | | } | 744 | | | 745 | | // https://github.com/samhocevar/lolremez/wiki/Tutorial-3-of-5:-changing-variables-for-simpler-polynomials | 746 | | // for cos(x): cos(x) - 1 is a function of x^2 terms, so we do the substitution on that page like | 747 | | // max | cos(x) - 1 - x^2 Q(x^2) | and then set y = x^2. That yields: | 748 | | // max | (cos(sqrt(y)) - 1) - y Q(y) |. Dividing through with y (instead of sqrt(y) as in the sin() case) gives us: | 749 | | auto f = [](T x) { | 750 | | if constexpr (IsSame<T, f32>) { | 751 | | // lolremez --float --degree 3 --range "1e-50:pi*pi/4" "(cos(sqrt(x)) - 1)/x" "1/x" | 752 | | // "Estimated max error: 5.2720126e-8" | 753 | | f32 u = 2.3194387e-05f; | 754 | | u = u * x + -0.0013855927f; | 755 | | u = u * x + 0.041663989f; | 756 | | return u * x + -0.49999931f; | 757 | | } else { | 758 | | // lolremez --degree 6 --range "1e-50:pi*pi/4" "(cos(sqrt(x)) - 1)/x" "1/x" | 759 | | // "Estimated max error: 2.0880269759116624e-15" | 760 | | T u = -1.101249182846601e-11; | 761 | | u = u * x + 2.0858735176345955e-09; | 762 | | u = u * x + -2.7556950755056579e-07; | 763 | | u = u * x + 2.4801583156341611e-05; | 764 | | u = u * x + -0.001388888886393419; | 765 | | u = u * x + 0.041666666665954213; | 766 | | return u * x + -0.49999999999993117; | 767 | | } | 768 | | }; | 769 | | T angle_squared = angle * angle; | 770 | | return sign * (1 + angle_squared * f(angle_squared)); | 771 | | # else | 772 | | CALL_BUILTIN(cos, angle); | 773 | | # endif | 774 | | #endif | 775 | 5.77k | } |
Unexecuted instantiation: _ZN2AK12Trigonometry3cosITkNS_8Concepts13FloatingPointEdEET_S3_ |
776 | | |
777 | | template<FloatingPoint T> |
778 | | constexpr void sincos(T angle, T& sin_val, T& cos_val) |
779 | 45.4M | { |
780 | 45.4M | if (is_constant_evaluated()) { |
781 | 0 | sin_val = sin(angle); |
782 | 0 | cos_val = cos(angle); |
783 | 0 | return; |
784 | 0 | } |
785 | 45.4M | #if ARCH(X86_64) |
786 | 45.4M | asm( |
787 | 45.4M | "fsincos" |
788 | 45.4M | : "=t"(cos_val), "=u"(sin_val) |
789 | 45.4M | : "0"(angle)); |
790 | | #else |
791 | | sin_val = sin(angle); |
792 | | cos_val = cos(angle); |
793 | | #endif |
794 | 45.4M | } _ZN2AK12Trigonometry6sincosITkNS_8Concepts13FloatingPointEfEEvT_RS3_S4_ Line | Count | Source | 779 | 43.0M | { | 780 | 43.0M | if (is_constant_evaluated()) { | 781 | 0 | sin_val = sin(angle); | 782 | 0 | cos_val = cos(angle); | 783 | 0 | return; | 784 | 0 | } | 785 | 43.0M | #if ARCH(X86_64) | 786 | 43.0M | asm( | 787 | 43.0M | "fsincos" | 788 | 43.0M | : "=t"(cos_val), "=u"(sin_val) | 789 | 43.0M | : "0"(angle)); | 790 | | #else | 791 | | sin_val = sin(angle); | 792 | | cos_val = cos(angle); | 793 | | #endif | 794 | 43.0M | } |
_ZN2AK12Trigonometry6sincosITkNS_8Concepts13FloatingPointEdEEvT_RS3_S4_ Line | Count | Source | 779 | 2.49M | { | 780 | 2.49M | if (is_constant_evaluated()) { | 781 | 0 | sin_val = sin(angle); | 782 | 0 | cos_val = cos(angle); | 783 | 0 | return; | 784 | 0 | } | 785 | 2.49M | #if ARCH(X86_64) | 786 | 2.49M | asm( | 787 | 2.49M | "fsincos" | 788 | 2.49M | : "=t"(cos_val), "=u"(sin_val) | 789 | 2.49M | : "0"(angle)); | 790 | | #else | 791 | | sin_val = sin(angle); | 792 | | cos_val = cos(angle); | 793 | | #endif | 794 | 2.49M | } |
|
795 | | |
796 | | template<FloatingPoint T> |
797 | | constexpr T tan(T angle) |
798 | 19.9M | { |
799 | 19.9M | CONSTEXPR_STATE(tan, angle); |
800 | | |
801 | 19.9M | #if ARCH(X86_64) |
802 | 19.9M | T ret, one; |
803 | 19.9M | asm( |
804 | 19.9M | "fptan" |
805 | 19.9M | : "=t"(one), "=u"(ret) |
806 | 19.9M | : "0"(angle)); |
807 | | |
808 | 19.9M | return ret; |
809 | | #else |
810 | | # if defined(AK_OS_SERENITY) |
811 | | return sin(angle) / cos(angle); |
812 | | # else |
813 | | CALL_BUILTIN(tan, angle); |
814 | | # endif |
815 | | #endif |
816 | 19.9M | } |
817 | | |
818 | | template<FloatingPoint T> |
819 | | constexpr T asin(T x) |
820 | | { |
821 | | CONSTEXPR_STATE(asin, x); |
822 | | |
823 | | if (x < 0) |
824 | | return -asin(-x); |
825 | | |
826 | | if (x > 1) |
827 | | return NaN<T>; |
828 | | |
829 | | if (x > (T)0.5) { |
830 | | // asin(x) = pi/2 - 2 * asin(sqrt((1 - x) / 2)) |
831 | | // If 0.5 < x <= 1, then sqrt((1 - x) / 2 ) < 0.5, |
832 | | // and the recursion will go down the `<= 0.5` branch. |
833 | | return Pi<T> / 2 - 2 * asin(sqrt((1 - x) / 2)); |
834 | | } |
835 | | |
836 | | T squared = x * x; |
837 | | T value = x; |
838 | | T i = x * squared; |
839 | | value += i * Details::product_odd<1>() / Details::product_even<2>() / 3; |
840 | | i *= squared; |
841 | | value += i * Details::product_odd<3>() / Details::product_even<4>() / 5; |
842 | | i *= squared; |
843 | | value += i * Details::product_odd<5>() / Details::product_even<6>() / 7; |
844 | | i *= squared; |
845 | | value += i * Details::product_odd<7>() / Details::product_even<8>() / 9; |
846 | | i *= squared; |
847 | | value += i * Details::product_odd<9>() / Details::product_even<10>() / 11; |
848 | | i *= squared; |
849 | | value += i * Details::product_odd<11>() / Details::product_even<12>() / 13; |
850 | | i *= squared; |
851 | | value += i * Details::product_odd<13>() / Details::product_even<14>() / 15; |
852 | | i *= squared; |
853 | | value += i * Details::product_odd<15>() / Details::product_even<16>() / 17; |
854 | | return value; |
855 | | } |
856 | | |
857 | | template<FloatingPoint T> |
858 | | constexpr T acos(T value) |
859 | | { |
860 | | CONSTEXPR_STATE(acos, value); |
861 | | |
862 | | // FIXME: I am naive |
863 | | return static_cast<T>(0.5) * Pi<T> - asin<T>(value); |
864 | | } |
865 | | |
866 | | template<FloatingPoint T> |
867 | | constexpr T atan(T value) |
868 | | { |
869 | | CONSTEXPR_STATE(atan, value); |
870 | | |
871 | | #if ARCH(X86_64) |
872 | | T ret; |
873 | | asm( |
874 | | "fld1\n" |
875 | | "fpatan\n" |
876 | | : "=t"(ret) |
877 | | : "0"(value)); |
878 | | return ret; |
879 | | #else |
880 | | # if defined(AK_OS_SERENITY) |
881 | | if (value < 0) |
882 | | return -atan(-value); |
883 | | |
884 | | if (value > 1) |
885 | | return Pi<T> / 2 - atan(1 / value); |
886 | | |
887 | | // atan is an odd function a leading factor of 1, so this uses the same substitutions as sin(). |
888 | | // See there for a description. |
889 | | auto f = [](T x) { |
890 | | if constexpr (IsSame<T, f32>) { |
891 | | // lolremez --float --degree 7 --range "1e-50:1" "(atan(sqrt(x))-sqrt(x))/(x*sqrt(x))" "1/(x*sqrt(x))" |
892 | | // "Estimated max error: 7.351572e-9" |
893 | | float u = 0.0026222446f; |
894 | | u = u * x + -0.015132537f; |
895 | | u = u * x + 0.041121863f; |
896 | | u = u * x + -0.073667064f; |
897 | | u = u * x + 0.10573932f; |
898 | | u = u * x + -0.14185975f; |
899 | | u = u * x + 0.19990396f; |
900 | | return u * x + -0.33332986f; |
901 | | } else { |
902 | | // FIXME: Could do something custom for long double. |
903 | | // lolremez --degree 15 --range "1e-50:1" "(atan(sqrt(x))-sqrt(x))/(x*sqrt(x))" "1/(x*sqrt(x))" |
904 | | // "Estimated max error: 2.695747111292741e-15" |
905 | | T u = 6.4855700791782353e-05; |
906 | | u = u * x + -0.00062980993515420608; |
907 | | u = u * x + 0.0028877745234626882; |
908 | | u = u * x + -0.0083913659122280861; |
909 | | u = u * x + 0.01759373283992496; |
910 | | u = u * x + -0.028943865588822337; |
911 | | u = u * x + 0.04001711781175539; |
912 | | u = u * x + -0.049493567473208426; |
913 | | u = u * x + 0.05782092815821073; |
914 | | u = u * x + -0.066423996784058609; |
915 | | u = u * x + 0.076879768543915233; |
916 | | u = u * x + -0.090903598304650779; |
917 | | u = u * x + 0.11111064186237864; |
918 | | u = u * x + -0.14285711801574916; |
919 | | u = u * x + 0.19999999929660117; |
920 | | return u * x + -0.33333333332571796; |
921 | | } |
922 | | }; |
923 | | T squared = value * value; |
924 | | return value + value * squared * f(squared); |
925 | | # endif |
926 | | CALL_BUILTIN(atan, value); |
927 | | #endif |
928 | | } |
929 | | |
930 | | template<FloatingPoint T> |
931 | | constexpr T atan2(T y, T x) |
932 | 4.97M | { |
933 | 4.97M | CONSTEXPR_STATE(atan2, y, x); |
934 | | |
935 | 4.97M | #if ARCH(X86_64) |
936 | 4.97M | T ret; |
937 | 4.97M | asm("fpatan" |
938 | 4.97M | : "=t"(ret) |
939 | 4.97M | : "0"(x), "u"(y) |
940 | 4.97M | : "st(1)"); |
941 | 4.97M | return ret; |
942 | | #else |
943 | | # if defined(AK_OS_SERENITY) |
944 | | if (__builtin_isnan(y)) |
945 | | return y; |
946 | | if (__builtin_isnan(x)) |
947 | | return x; |
948 | | |
949 | | // SPECIAL VALUES |
950 | | // atan2(±0, -0) returns ±pi. |
951 | | if (y == 0 && x == 0 && signbit(x)) |
952 | | return copysign(Pi<T>, y); |
953 | | |
954 | | // atan2(±0, +0) returns ±0. |
955 | | if (y == 0 && x == 0 && !signbit(x)) |
956 | | return y; |
957 | | |
958 | | // atan2(±0, x) returns ±pi for x < 0. |
959 | | if (y == 0 && x < 0) |
960 | | return copysign(Pi<T>, y); |
961 | | |
962 | | // atan2(±0, x) returns ±0 for x > 0. |
963 | | if (y == 0 && x > 0) |
964 | | return y; |
965 | | |
966 | | // atan2(y, ±0) returns +pi/2 for y > 0. |
967 | | if (y > 0 && x == 0) |
968 | | return Pi<T> / 2; |
969 | | |
970 | | // atan2(y, ±0) returns -pi/2 for y < 0. |
971 | | if (y < 0 && x == 0) |
972 | | return -Pi<T> / 2; |
973 | | |
974 | | // atan2(±y, -infinity) returns ±pi for finite y > 0. |
975 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && signbit(x)) |
976 | | return copysign(Pi<T>, y); |
977 | | |
978 | | // atan2(±y, +infinity) returns ±0 for finite y > 0. |
979 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && !signbit(x)) |
980 | | return copysign(static_cast<T>(0), y); |
981 | | |
982 | | // atan2(±infinity, x) returns ±pi/2 for finite x. |
983 | | if (__builtin_isinf(y) && !__builtin_isinf(x)) |
984 | | return copysign(Pi<T> / 2, y); |
985 | | |
986 | | // atan2(±infinity, -infinity) returns ±3*pi/4. |
987 | | if (__builtin_isinf(y) && __builtin_isinf(x) && signbit(x)) |
988 | | return copysign(3 * Pi<T> / 4, y); |
989 | | |
990 | | // atan2(±infinity, +infinity) returns ±pi/4. |
991 | | if (__builtin_isinf(y) && __builtin_isinf(x) && !signbit(x)) |
992 | | return copysign(Pi<T> / 4, y); |
993 | | |
994 | | // Check quadrant, going counterclockwise. |
995 | | if (y > 0 && x > 0) |
996 | | return atan(y / x); |
997 | | if (y > 0 && x < 0) |
998 | | return atan(y / x) + Pi<T>; |
999 | | if (y < 0 && x < 0) |
1000 | | return atan(y / x) - Pi<T>; |
1001 | | // y < 0 && x > 0 |
1002 | | return atan(y / x); |
1003 | | # else |
1004 | | CALL_BUILTIN(atan2, y, x); |
1005 | | # endif |
1006 | | #endif |
1007 | 4.97M | } _ZN2AK12Trigonometry5atan2ITkNS_8Concepts13FloatingPointEfEET_S3_S3_ Line | Count | Source | 932 | 4.29k | { | 933 | 4.29k | CONSTEXPR_STATE(atan2, y, x); | 934 | | | 935 | 4.29k | #if ARCH(X86_64) | 936 | 4.29k | T ret; | 937 | 4.29k | asm("fpatan" | 938 | 4.29k | : "=t"(ret) | 939 | 4.29k | : "0"(x), "u"(y) | 940 | 4.29k | : "st(1)"); | 941 | 4.29k | return ret; | 942 | | #else | 943 | | # if defined(AK_OS_SERENITY) | 944 | | if (__builtin_isnan(y)) | 945 | | return y; | 946 | | if (__builtin_isnan(x)) | 947 | | return x; | 948 | | | 949 | | // SPECIAL VALUES | 950 | | // atan2(±0, -0) returns ±pi. | 951 | | if (y == 0 && x == 0 && signbit(x)) | 952 | | return copysign(Pi<T>, y); | 953 | | | 954 | | // atan2(±0, +0) returns ±0. | 955 | | if (y == 0 && x == 0 && !signbit(x)) | 956 | | return y; | 957 | | | 958 | | // atan2(±0, x) returns ±pi for x < 0. | 959 | | if (y == 0 && x < 0) | 960 | | return copysign(Pi<T>, y); | 961 | | | 962 | | // atan2(±0, x) returns ±0 for x > 0. | 963 | | if (y == 0 && x > 0) | 964 | | return y; | 965 | | | 966 | | // atan2(y, ±0) returns +pi/2 for y > 0. | 967 | | if (y > 0 && x == 0) | 968 | | return Pi<T> / 2; | 969 | | | 970 | | // atan2(y, ±0) returns -pi/2 for y < 0. | 971 | | if (y < 0 && x == 0) | 972 | | return -Pi<T> / 2; | 973 | | | 974 | | // atan2(±y, -infinity) returns ±pi for finite y > 0. | 975 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && signbit(x)) | 976 | | return copysign(Pi<T>, y); | 977 | | | 978 | | // atan2(±y, +infinity) returns ±0 for finite y > 0. | 979 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && !signbit(x)) | 980 | | return copysign(static_cast<T>(0), y); | 981 | | | 982 | | // atan2(±infinity, x) returns ±pi/2 for finite x. | 983 | | if (__builtin_isinf(y) && !__builtin_isinf(x)) | 984 | | return copysign(Pi<T> / 2, y); | 985 | | | 986 | | // atan2(±infinity, -infinity) returns ±3*pi/4. | 987 | | if (__builtin_isinf(y) && __builtin_isinf(x) && signbit(x)) | 988 | | return copysign(3 * Pi<T> / 4, y); | 989 | | | 990 | | // atan2(±infinity, +infinity) returns ±pi/4. | 991 | | if (__builtin_isinf(y) && __builtin_isinf(x) && !signbit(x)) | 992 | | return copysign(Pi<T> / 4, y); | 993 | | | 994 | | // Check quadrant, going counterclockwise. | 995 | | if (y > 0 && x > 0) | 996 | | return atan(y / x); | 997 | | if (y > 0 && x < 0) | 998 | | return atan(y / x) + Pi<T>; | 999 | | if (y < 0 && x < 0) | 1000 | | return atan(y / x) - Pi<T>; | 1001 | | // y < 0 && x > 0 | 1002 | | return atan(y / x); | 1003 | | # else | 1004 | | CALL_BUILTIN(atan2, y, x); | 1005 | | # endif | 1006 | | #endif | 1007 | 4.29k | } |
_ZN2AK12Trigonometry5atan2ITkNS_8Concepts13FloatingPointEdEET_S3_S3_ Line | Count | Source | 932 | 4.97M | { | 933 | 4.97M | CONSTEXPR_STATE(atan2, y, x); | 934 | | | 935 | 4.97M | #if ARCH(X86_64) | 936 | 4.97M | T ret; | 937 | 4.97M | asm("fpatan" | 938 | 4.97M | : "=t"(ret) | 939 | 4.97M | : "0"(x), "u"(y) | 940 | 4.97M | : "st(1)"); | 941 | 4.97M | return ret; | 942 | | #else | 943 | | # if defined(AK_OS_SERENITY) | 944 | | if (__builtin_isnan(y)) | 945 | | return y; | 946 | | if (__builtin_isnan(x)) | 947 | | return x; | 948 | | | 949 | | // SPECIAL VALUES | 950 | | // atan2(±0, -0) returns ±pi. | 951 | | if (y == 0 && x == 0 && signbit(x)) | 952 | | return copysign(Pi<T>, y); | 953 | | | 954 | | // atan2(±0, +0) returns ±0. | 955 | | if (y == 0 && x == 0 && !signbit(x)) | 956 | | return y; | 957 | | | 958 | | // atan2(±0, x) returns ±pi for x < 0. | 959 | | if (y == 0 && x < 0) | 960 | | return copysign(Pi<T>, y); | 961 | | | 962 | | // atan2(±0, x) returns ±0 for x > 0. | 963 | | if (y == 0 && x > 0) | 964 | | return y; | 965 | | | 966 | | // atan2(y, ±0) returns +pi/2 for y > 0. | 967 | | if (y > 0 && x == 0) | 968 | | return Pi<T> / 2; | 969 | | | 970 | | // atan2(y, ±0) returns -pi/2 for y < 0. | 971 | | if (y < 0 && x == 0) | 972 | | return -Pi<T> / 2; | 973 | | | 974 | | // atan2(±y, -infinity) returns ±pi for finite y > 0. | 975 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && signbit(x)) | 976 | | return copysign(Pi<T>, y); | 977 | | | 978 | | // atan2(±y, +infinity) returns ±0 for finite y > 0. | 979 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && !signbit(x)) | 980 | | return copysign(static_cast<T>(0), y); | 981 | | | 982 | | // atan2(±infinity, x) returns ±pi/2 for finite x. | 983 | | if (__builtin_isinf(y) && !__builtin_isinf(x)) | 984 | | return copysign(Pi<T> / 2, y); | 985 | | | 986 | | // atan2(±infinity, -infinity) returns ±3*pi/4. | 987 | | if (__builtin_isinf(y) && __builtin_isinf(x) && signbit(x)) | 988 | | return copysign(3 * Pi<T> / 4, y); | 989 | | | 990 | | // atan2(±infinity, +infinity) returns ±pi/4. | 991 | | if (__builtin_isinf(y) && __builtin_isinf(x) && !signbit(x)) | 992 | | return copysign(Pi<T> / 4, y); | 993 | | | 994 | | // Check quadrant, going counterclockwise. | 995 | | if (y > 0 && x > 0) | 996 | | return atan(y / x); | 997 | | if (y > 0 && x < 0) | 998 | | return atan(y / x) + Pi<T>; | 999 | | if (y < 0 && x < 0) | 1000 | | return atan(y / x) - Pi<T>; | 1001 | | // y < 0 && x > 0 | 1002 | | return atan(y / x); | 1003 | | # else | 1004 | | CALL_BUILTIN(atan2, y, x); | 1005 | | # endif | 1006 | | #endif | 1007 | 4.97M | } |
|
1008 | | |
1009 | | } |
1010 | | |
1011 | | using Trigonometry::acos; |
1012 | | using Trigonometry::asin; |
1013 | | using Trigonometry::atan; |
1014 | | using Trigonometry::atan2; |
1015 | | using Trigonometry::cos; |
1016 | | using Trigonometry::hypot; |
1017 | | using Trigonometry::sin; |
1018 | | using Trigonometry::sincos; |
1019 | | using Trigonometry::tan; |
1020 | | |
1021 | | namespace Exponentials { |
1022 | | |
1023 | | template<FloatingPoint T> |
1024 | | constexpr T log2(T x) |
1025 | 4.13M | { |
1026 | 4.13M | CONSTEXPR_STATE(log2, x); |
1027 | | |
1028 | 4.13M | #if ARCH(X86_64) |
1029 | | if constexpr (IsSame<T, long double>) { |
1030 | | T ret; |
1031 | | asm( |
1032 | | "fld1\n" |
1033 | | "fxch %%st(1)\n" |
1034 | | "fyl2x\n" |
1035 | | : "=t"(ret) |
1036 | | : "0"(x)); |
1037 | | return ret; |
1038 | | } |
1039 | 4.13M | #endif |
1040 | | // References: |
1041 | | // Gist comparing some implementations |
1042 | | // * https://gist.github.com/Hendiadyoin1/f58346d66637deb9156ef360aa158bf9 |
1043 | | |
1044 | 4.13M | if (x == 0) |
1045 | 0 | return -Infinity<T>; |
1046 | 4.13M | if (x <= 0 || __builtin_isnan(x)) |
1047 | 0 | return NaN<T>; |
1048 | | |
1049 | 4.13M | auto ext = FloatExtractor<T>::from_float(x); |
1050 | 4.13M | T exponent = ext.exponent - FloatExtractor<T>::exponent_bias; |
1051 | | |
1052 | | // When the mantissa shows 0b00 (implicitly 1.0) we are on a power of 2 |
1053 | 4.13M | if (ext.mantissa == 0) |
1054 | 3.66M | return exponent; |
1055 | | |
1056 | | // FIXME: Handle denormalized numbers separately |
1057 | | |
1058 | 475k | FloatExtractor<T> mantissa_ext { |
1059 | 475k | .mantissa = ext.mantissa, |
1060 | 475k | .exponent = FloatExtractor<T>::exponent_bias, |
1061 | 475k | .sign = ext.sign |
1062 | 475k | }; |
1063 | | |
1064 | | // (1 <= mantissa < 2) |
1065 | 475k | T m = mantissa_ext.to_float(); |
1066 | | |
1067 | | // This is a reconstruction of one of Sun's algorithms |
1068 | | // They use a transformation to lower the problem space, |
1069 | | // while keeping the precision, and a 14th degree polynomial, |
1070 | | // which is mirrored at sqrt(2) |
1071 | | // TODO: Sun has some more algorithms for this and other math functions, |
1072 | | // leveraging LUTs, investigate those, if they are better in performance and/or precision. |
1073 | | // These seem to be related to crLibM's implementations, for which papers and references |
1074 | | // are available. |
1075 | | // FIXME: Dynamically adjust the amount of precision between floats and doubles |
1076 | | // AKA floats might need less accuracy here, in comparison to doubles |
1077 | | |
1078 | 475k | bool inverted = false; |
1079 | | // m > sqrt(2) |
1080 | 475k | if (m > Sqrt2<T>) { |
1081 | 161k | inverted = true; |
1082 | 161k | m = 2 / m; |
1083 | 161k | } |
1084 | 475k | T s = (m - 1) / (m + 1); |
1085 | | // ln((1 + s) / (1 - s)) == ln(m) |
1086 | 475k | T s2 = s * s; |
1087 | | // clang-format off |
1088 | 475k | T high_approx = s2 * (static_cast<T>(0.6666666666666735130) |
1089 | 475k | + s2 * (static_cast<T>(0.3999999999940941908) |
1090 | 475k | + s2 * (static_cast<T>(0.2857142874366239149) |
1091 | 475k | + s2 * (static_cast<T>(0.2222219843214978396) |
1092 | 475k | + s2 * (static_cast<T>(0.1818357216161805012) |
1093 | 475k | + s2 * (static_cast<T>(0.1531383769920937332) |
1094 | 475k | + s2 * static_cast<T>(0.1479819860511658591))))))); |
1095 | | // clang-format on |
1096 | | |
1097 | | // ln(m) == 2 * s + s * high_approx |
1098 | | // log2(m) == log2(e) * ln(m) |
1099 | 475k | T log2_mantissa = L2_E<T> * (2 * s + s * high_approx); |
1100 | 475k | if (inverted) |
1101 | 161k | log2_mantissa = 1 - log2_mantissa; |
1102 | 475k | return exponent + log2_mantissa; |
1103 | 4.13M | } _ZN2AK12Exponentials4log2ITkNS_8Concepts13FloatingPointEfEET_S3_ Line | Count | Source | 1025 | 4.13M | { | 1026 | 4.13M | CONSTEXPR_STATE(log2, x); | 1027 | | | 1028 | 4.13M | #if ARCH(X86_64) | 1029 | | if constexpr (IsSame<T, long double>) { | 1030 | | T ret; | 1031 | | asm( | 1032 | | "fld1\n" | 1033 | | "fxch %%st(1)\n" | 1034 | | "fyl2x\n" | 1035 | | : "=t"(ret) | 1036 | | : "0"(x)); | 1037 | | return ret; | 1038 | | } | 1039 | 4.13M | #endif | 1040 | | // References: | 1041 | | // Gist comparing some implementations | 1042 | | // * https://gist.github.com/Hendiadyoin1/f58346d66637deb9156ef360aa158bf9 | 1043 | | | 1044 | 4.13M | if (x == 0) | 1045 | 0 | return -Infinity<T>; | 1046 | 4.13M | if (x <= 0 || __builtin_isnan(x)) | 1047 | 0 | return NaN<T>; | 1048 | | | 1049 | 4.13M | auto ext = FloatExtractor<T>::from_float(x); | 1050 | 4.13M | T exponent = ext.exponent - FloatExtractor<T>::exponent_bias; | 1051 | | | 1052 | | // When the mantissa shows 0b00 (implicitly 1.0) we are on a power of 2 | 1053 | 4.13M | if (ext.mantissa == 0) | 1054 | 3.66M | return exponent; | 1055 | | | 1056 | | // FIXME: Handle denormalized numbers separately | 1057 | | | 1058 | 475k | FloatExtractor<T> mantissa_ext { | 1059 | 475k | .mantissa = ext.mantissa, | 1060 | 475k | .exponent = FloatExtractor<T>::exponent_bias, | 1061 | 475k | .sign = ext.sign | 1062 | 475k | }; | 1063 | | | 1064 | | // (1 <= mantissa < 2) | 1065 | 475k | T m = mantissa_ext.to_float(); | 1066 | | | 1067 | | // This is a reconstruction of one of Sun's algorithms | 1068 | | // They use a transformation to lower the problem space, | 1069 | | // while keeping the precision, and a 14th degree polynomial, | 1070 | | // which is mirrored at sqrt(2) | 1071 | | // TODO: Sun has some more algorithms for this and other math functions, | 1072 | | // leveraging LUTs, investigate those, if they are better in performance and/or precision. | 1073 | | // These seem to be related to crLibM's implementations, for which papers and references | 1074 | | // are available. | 1075 | | // FIXME: Dynamically adjust the amount of precision between floats and doubles | 1076 | | // AKA floats might need less accuracy here, in comparison to doubles | 1077 | | | 1078 | 475k | bool inverted = false; | 1079 | | // m > sqrt(2) | 1080 | 475k | if (m > Sqrt2<T>) { | 1081 | 161k | inverted = true; | 1082 | 161k | m = 2 / m; | 1083 | 161k | } | 1084 | 475k | T s = (m - 1) / (m + 1); | 1085 | | // ln((1 + s) / (1 - s)) == ln(m) | 1086 | 475k | T s2 = s * s; | 1087 | | // clang-format off | 1088 | 475k | T high_approx = s2 * (static_cast<T>(0.6666666666666735130) | 1089 | 475k | + s2 * (static_cast<T>(0.3999999999940941908) | 1090 | 475k | + s2 * (static_cast<T>(0.2857142874366239149) | 1091 | 475k | + s2 * (static_cast<T>(0.2222219843214978396) | 1092 | 475k | + s2 * (static_cast<T>(0.1818357216161805012) | 1093 | 475k | + s2 * (static_cast<T>(0.1531383769920937332) | 1094 | 475k | + s2 * static_cast<T>(0.1479819860511658591))))))); | 1095 | | // clang-format on | 1096 | | | 1097 | | // ln(m) == 2 * s + s * high_approx | 1098 | | // log2(m) == log2(e) * ln(m) | 1099 | 475k | T log2_mantissa = L2_E<T> * (2 * s + s * high_approx); | 1100 | 475k | if (inverted) | 1101 | 161k | log2_mantissa = 1 - log2_mantissa; | 1102 | 475k | return exponent + log2_mantissa; | 1103 | 4.13M | } |
Unexecuted instantiation: _ZN2AK12Exponentials4log2ITkNS_8Concepts13FloatingPointEdEET_S3_ Unexecuted instantiation: _ZN2AK12Exponentials4log2ITkNS_8Concepts13FloatingPointEdEET_S3_ |
1104 | | |
1105 | | template<FloatingPoint T> |
1106 | | constexpr T log(T x) |
1107 | 48 | { |
1108 | 48 | CONSTEXPR_STATE(log, x); |
1109 | | |
1110 | 48 | #if ARCH(X86_64) |
1111 | 48 | T ret; |
1112 | 48 | asm( |
1113 | 48 | "fldln2\n" |
1114 | 48 | "fxch %%st(1)\n" |
1115 | 48 | "fyl2x\n" |
1116 | 48 | : "=t"(ret) |
1117 | 48 | : "0"(x)); |
1118 | 48 | return ret; |
1119 | | #elif defined(AK_OS_SERENITY) |
1120 | | // FIXME: Adjust the polynomial and formula in log2 to fit this |
1121 | | return log2<T>(x) / L2_E<T>; |
1122 | | #else |
1123 | | CALL_BUILTIN(log, x); |
1124 | | #endif |
1125 | 48 | } |
1126 | | |
1127 | | template<FloatingPoint T> |
1128 | | constexpr T log10(T x) |
1129 | | { |
1130 | | CONSTEXPR_STATE(log10, x); |
1131 | | |
1132 | | #if ARCH(X86_64) |
1133 | | T ret; |
1134 | | asm( |
1135 | | "fldlg2\n" |
1136 | | "fxch %%st(1)\n" |
1137 | | "fyl2x\n" |
1138 | | : "=t"(ret) |
1139 | | : "0"(x)); |
1140 | | return ret; |
1141 | | #elif defined(AK_OS_SERENITY) |
1142 | | // FIXME: Adjust the polynomial and formula in log2 to fit this |
1143 | | return log2<T>(x) / L2_10<T>; |
1144 | | #else |
1145 | | CALL_BUILTIN(log10, x); |
1146 | | #endif |
1147 | | } |
1148 | | |
1149 | | template<FloatingPoint T> |
1150 | | constexpr T exp2(T exponent) |
1151 | 4.13M | { |
1152 | 4.13M | CONSTEXPR_STATE(exp2, exponent); |
1153 | | |
1154 | 4.13M | #if ARCH(X86_64) |
1155 | 4.13M | T res; |
1156 | 4.13M | asm("fld1\n" |
1157 | 4.13M | "fld %%st(1)\n" |
1158 | 4.13M | "fprem\n" |
1159 | 4.13M | "f2xm1\n" |
1160 | 4.13M | "faddp\n" |
1161 | 4.13M | "fscale\n" |
1162 | 4.13M | "fstp %%st(1)" |
1163 | 4.13M | : "=t"(res) |
1164 | 4.13M | : "0"(exponent)); |
1165 | 4.13M | return res; |
1166 | | #else |
1167 | | // TODO: Add better implementation of this function. |
1168 | | // This is just fast exponentiation for the integer part and |
1169 | | // the first couple terms of the taylor series for the fractional part. |
1170 | | |
1171 | | if (exponent < 0) |
1172 | | return 1 / exp2(-exponent); |
1173 | | |
1174 | | if (exponent >= log2(NumericLimits<T>::max())) |
1175 | | return Infinity<T>; |
1176 | | |
1177 | | // Integer exponentiation part. |
1178 | | int int_exponent = static_cast<int>(exponent); |
1179 | | T exponent_fraction = exponent - int_exponent; |
1180 | | |
1181 | | T int_result = 1; |
1182 | | T base = 2; |
1183 | | for (;;) { |
1184 | | if (int_exponent & 1) |
1185 | | int_result *= base; |
1186 | | int_exponent >>= 1; |
1187 | | if (!int_exponent) |
1188 | | break; |
1189 | | base *= base; |
1190 | | } |
1191 | | |
1192 | | // Fractional part. |
1193 | | // Uses: |
1194 | | // exp(x) = sum(n, 0, \infty, x ** n / n!) |
1195 | | // 2**x = exp(log2(e) * x) |
1196 | | // FIXME: Pick better step size (and make it dependent on T). |
1197 | | T result = 0; |
1198 | | T power = 1; |
1199 | | T factorial = 1; |
1200 | | for (int i = 1; i < 16; ++i) { |
1201 | | result += power / factorial; |
1202 | | power *= exponent_fraction / L2_E<T>; |
1203 | | factorial *= i; |
1204 | | } |
1205 | | |
1206 | | return int_result * result; |
1207 | | #endif |
1208 | 4.13M | } _ZN2AK12Exponentials4exp2ITkNS_8Concepts13FloatingPointEfEET_S3_ Line | Count | Source | 1151 | 4.13M | { | 1152 | 4.13M | CONSTEXPR_STATE(exp2, exponent); | 1153 | | | 1154 | 4.13M | #if ARCH(X86_64) | 1155 | 4.13M | T res; | 1156 | 4.13M | asm("fld1\n" | 1157 | 4.13M | "fld %%st(1)\n" | 1158 | 4.13M | "fprem\n" | 1159 | 4.13M | "f2xm1\n" | 1160 | 4.13M | "faddp\n" | 1161 | 4.13M | "fscale\n" | 1162 | 4.13M | "fstp %%st(1)" | 1163 | 4.13M | : "=t"(res) | 1164 | 4.13M | : "0"(exponent)); | 1165 | 4.13M | return res; | 1166 | | #else | 1167 | | // TODO: Add better implementation of this function. | 1168 | | // This is just fast exponentiation for the integer part and | 1169 | | // the first couple terms of the taylor series for the fractional part. | 1170 | | | 1171 | | if (exponent < 0) | 1172 | | return 1 / exp2(-exponent); | 1173 | | | 1174 | | if (exponent >= log2(NumericLimits<T>::max())) | 1175 | | return Infinity<T>; | 1176 | | | 1177 | | // Integer exponentiation part. | 1178 | | int int_exponent = static_cast<int>(exponent); | 1179 | | T exponent_fraction = exponent - int_exponent; | 1180 | | | 1181 | | T int_result = 1; | 1182 | | T base = 2; | 1183 | | for (;;) { | 1184 | | if (int_exponent & 1) | 1185 | | int_result *= base; | 1186 | | int_exponent >>= 1; | 1187 | | if (!int_exponent) | 1188 | | break; | 1189 | | base *= base; | 1190 | | } | 1191 | | | 1192 | | // Fractional part. | 1193 | | // Uses: | 1194 | | // exp(x) = sum(n, 0, \infty, x ** n / n!) | 1195 | | // 2**x = exp(log2(e) * x) | 1196 | | // FIXME: Pick better step size (and make it dependent on T). | 1197 | | T result = 0; | 1198 | | T power = 1; | 1199 | | T factorial = 1; | 1200 | | for (int i = 1; i < 16; ++i) { | 1201 | | result += power / factorial; | 1202 | | power *= exponent_fraction / L2_E<T>; | 1203 | | factorial *= i; | 1204 | | } | 1205 | | | 1206 | | return int_result * result; | 1207 | | #endif | 1208 | 4.13M | } |
Unexecuted instantiation: _ZN2AK12Exponentials4exp2ITkNS_8Concepts13FloatingPointEdEET_S3_ Unexecuted instantiation: _ZN2AK12Exponentials4exp2ITkNS_8Concepts13FloatingPointEdEET_S3_ |
1209 | | |
1210 | | template<FloatingPoint T> |
1211 | | constexpr T exp(T exponent) |
1212 | 0 | { |
1213 | 0 | CONSTEXPR_STATE(exp, exponent); |
1214 | |
|
1215 | 0 | #if ARCH(X86_64) |
1216 | 0 | T res; |
1217 | 0 | asm("fldl2e\n" |
1218 | 0 | "fmulp\n" |
1219 | 0 | "fld1\n" |
1220 | 0 | "fld %%st(1)\n" |
1221 | 0 | "fprem\n" |
1222 | 0 | "f2xm1\n" |
1223 | 0 | "faddp\n" |
1224 | 0 | "fscale\n" |
1225 | 0 | "fstp %%st(1)" |
1226 | 0 | : "=t"(res) |
1227 | 0 | : "0"(exponent)); |
1228 | 0 | return res; |
1229 | | #else |
1230 | | // TODO: Add better implementation of this function. |
1231 | | return exp2(exponent * L2_E<T>); |
1232 | | #endif |
1233 | 0 | } Unexecuted instantiation: _ZN2AK12Exponentials3expITkNS_8Concepts13FloatingPointEfEET_S3_ Unexecuted instantiation: _ZN2AK12Exponentials3expITkNS_8Concepts13FloatingPointEfEET_S3_ |
1234 | | |
1235 | | } |
1236 | | |
1237 | | using Exponentials::exp; |
1238 | | using Exponentials::exp2; |
1239 | | using Exponentials::log; |
1240 | | using Exponentials::log10; |
1241 | | using Exponentials::log2; |
1242 | | |
1243 | | namespace Hyperbolic { |
1244 | | |
1245 | | template<FloatingPoint T> |
1246 | | constexpr T sinh(T x) |
1247 | | { |
1248 | | T exponentiated = exp<T>(x); |
1249 | | if (x > 0) |
1250 | | return (exponentiated * exponentiated - 1) / 2 / exponentiated; |
1251 | | return (exponentiated - 1 / exponentiated) / 2; |
1252 | | } |
1253 | | |
1254 | | template<FloatingPoint T> |
1255 | | constexpr T cosh(T x) |
1256 | | { |
1257 | | CONSTEXPR_STATE(cosh, x); |
1258 | | |
1259 | | T exponentiated = exp(-x); |
1260 | | if (x < 0) |
1261 | | return (1 + exponentiated * exponentiated) / 2 / exponentiated; |
1262 | | return (1 / exponentiated + exponentiated) / 2; |
1263 | | } |
1264 | | |
1265 | | template<FloatingPoint T> |
1266 | | constexpr T tanh(T x) |
1267 | | { |
1268 | | if (x > 0) { |
1269 | | T exponentiated = exp<T>(2 * x); |
1270 | | return (exponentiated - 1) / (exponentiated + 1); |
1271 | | } |
1272 | | T plusX = exp<T>(x); |
1273 | | T minusX = 1 / plusX; |
1274 | | return (plusX - minusX) / (plusX + minusX); |
1275 | | } |
1276 | | |
1277 | | template<FloatingPoint T> |
1278 | | constexpr T asinh(T x) |
1279 | | { |
1280 | | return log<T>(x + sqrt<T>(x * x + 1)); |
1281 | | } |
1282 | | |
1283 | | template<FloatingPoint T> |
1284 | | constexpr T acosh(T x) |
1285 | | { |
1286 | | return log<T>(x + sqrt<T>(x * x - 1)); |
1287 | | } |
1288 | | |
1289 | | template<FloatingPoint T> |
1290 | | constexpr T atanh(T x) |
1291 | | { |
1292 | | return log<T>((1 + x) / (1 - x)) / (T)2.0l; |
1293 | | } |
1294 | | |
1295 | | } |
1296 | | |
1297 | | using Hyperbolic::acosh; |
1298 | | using Hyperbolic::asinh; |
1299 | | using Hyperbolic::atanh; |
1300 | | using Hyperbolic::cosh; |
1301 | | using Hyperbolic::sinh; |
1302 | | using Hyperbolic::tanh; |
1303 | | |
1304 | | // Calculate x^y with fast exponentiation when the power is a natural number. |
1305 | | template<FloatingPoint F, UnsignedIntegral U> |
1306 | | constexpr F pow_int(F x, U y) |
1307 | 132k | { |
1308 | 132k | auto result = static_cast<F>(1); |
1309 | 788k | while (y > 0) { |
1310 | 656k | if (y % 2 == 1) { |
1311 | 418k | result *= x; |
1312 | 418k | } |
1313 | 656k | x = x * x; |
1314 | 656k | y >>= 1; |
1315 | 656k | } |
1316 | 132k | return result; |
1317 | 132k | } _ZN2AK7pow_intITkNS_8Concepts13FloatingPointEfTkNS1_16UnsignedIntegralEmEET_S2_T0_ Line | Count | Source | 1307 | 132k | { | 1308 | 132k | auto result = static_cast<F>(1); | 1309 | 788k | while (y > 0) { | 1310 | 656k | if (y % 2 == 1) { | 1311 | 418k | result *= x; | 1312 | 418k | } | 1313 | 656k | x = x * x; | 1314 | 656k | y >>= 1; | 1315 | 656k | } | 1316 | 132k | return result; | 1317 | 132k | } |
Unexecuted instantiation: _ZN2AK7pow_intITkNS_8Concepts13FloatingPointEdTkNS1_16UnsignedIntegralEmEET_S2_T0_ |
1318 | | |
1319 | | template<FloatingPoint T> |
1320 | | constexpr T pow(T x, T y) |
1321 | 17.0M | { |
1322 | 17.0M | CONSTEXPR_STATE(pow, x, y); |
1323 | 17.0M | if (__builtin_isnan(y)) |
1324 | 0 | return y; |
1325 | 17.0M | if (y == 0) |
1326 | 12.6k | return 1; |
1327 | 17.0M | if (x == 0) |
1328 | 12.7M | return 0; |
1329 | 4.27M | if (y == 1) |
1330 | 3.29k | return x; |
1331 | | |
1332 | | // Take an integer fast path as long as the value fits within a 64-bit integer. |
1333 | 4.27M | if (y >= static_cast<T>(NumericLimits<i64>::min()) && y < static_cast<T>(NumericLimits<i64>::max())) { |
1334 | 4.27M | i64 y_as_int = static_cast<i64>(y); |
1335 | 4.27M | if (y == static_cast<T>(y_as_int)) { |
1336 | 132k | T result = pow_int(x, static_cast<u64>(fabs<T>(y))); |
1337 | 132k | if (y < 0) |
1338 | 103k | result = static_cast<T>(1.0l) / result; |
1339 | 132k | return result; |
1340 | 132k | } |
1341 | 4.27M | } |
1342 | | |
1343 | | // FIXME: This formula suffers from error magnification. |
1344 | 4.13M | return exp2<T>(y * log2<T>(x)); |
1345 | 4.27M | } _ZN2AK3powITkNS_8Concepts13FloatingPointEfEET_S2_S2_ Line | Count | Source | 1321 | 17.0M | { | 1322 | 17.0M | CONSTEXPR_STATE(pow, x, y); | 1323 | 17.0M | if (__builtin_isnan(y)) | 1324 | 0 | return y; | 1325 | 17.0M | if (y == 0) | 1326 | 12.6k | return 1; | 1327 | 17.0M | if (x == 0) | 1328 | 12.7M | return 0; | 1329 | 4.27M | if (y == 1) | 1330 | 3.29k | return x; | 1331 | | | 1332 | | // Take an integer fast path as long as the value fits within a 64-bit integer. | 1333 | 4.27M | if (y >= static_cast<T>(NumericLimits<i64>::min()) && y < static_cast<T>(NumericLimits<i64>::max())) { | 1334 | 4.27M | i64 y_as_int = static_cast<i64>(y); | 1335 | 4.27M | if (y == static_cast<T>(y_as_int)) { | 1336 | 132k | T result = pow_int(x, static_cast<u64>(fabs<T>(y))); | 1337 | 132k | if (y < 0) | 1338 | 103k | result = static_cast<T>(1.0l) / result; | 1339 | 132k | return result; | 1340 | 132k | } | 1341 | 4.27M | } | 1342 | | | 1343 | | // FIXME: This formula suffers from error magnification. | 1344 | 4.13M | return exp2<T>(y * log2<T>(x)); | 1345 | 4.27M | } |
Unexecuted instantiation: _ZN2AK3powITkNS_8Concepts13FloatingPointEdEET_S2_S2_ Unexecuted instantiation: _ZN2AK3powITkNS_8Concepts13FloatingPointEdEET_S2_S2_ |
1346 | | |
1347 | | template<Integral I, typename T> |
1348 | | constexpr I clamp_to(T value) |
1349 | 0 | { |
1350 | 0 | constexpr auto max = static_cast<T>(NumericLimits<I>::max()); |
1351 | 0 | if constexpr (max > 0) { |
1352 | 0 | if (value >= static_cast<T>(NumericLimits<I>::max())) |
1353 | 0 | return NumericLimits<I>::max(); |
1354 | 0 | } |
1355 | | |
1356 | 0 | constexpr auto min = static_cast<T>(NumericLimits<I>::min()); |
1357 | 0 | if constexpr (min <= 0) { |
1358 | 0 | if (value <= static_cast<T>(NumericLimits<I>::min())) |
1359 | 0 | return NumericLimits<I>::min(); |
1360 | 0 | } |
1361 | | |
1362 | | if constexpr (IsFloatingPoint<T>) |
1363 | 0 | return round_to<I>(value); |
1364 | | |
1365 | 0 | return value; |
1366 | 0 | } Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEldEET_T0_ Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralElmEET_T0_ Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEhdEET_T0_ Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEilEET_T0_ Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEifEET_T0_ Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEidEET_T0_ |
1367 | | |
1368 | | // Wrap a to keep it in the range [-b, b]. |
1369 | | template<typename T> |
1370 | | constexpr T wrap_to_range(T a, IdentityType<T> b) |
1371 | | { |
1372 | | return fmod(fmod(a + b, 2 * b) + 2 * b, 2 * b) - b; |
1373 | | } |
1374 | | |
1375 | | #undef CALL_BUILTIN |
1376 | | #undef CONSTEXPR_STATE |
1377 | | #undef AARCH64_INSTRUCTION |
1378 | | } |
1379 | | |
1380 | | #if USING_AK_GLOBALLY |
1381 | | using AK::round_to; |
1382 | | #endif |