Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2021, Leon Albrecht <leon2002.la@gmail.com>. |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #pragma once |
8 | | |
9 | | #include <AK/BuiltinWrappers.h> |
10 | | #include <AK/Concepts.h> |
11 | | #include <AK/FloatingPoint.h> |
12 | | #include <AK/NumericLimits.h> |
13 | | #include <AK/StdLibExtraDetails.h> |
14 | | #include <AK/Types.h> |
15 | | #include <math.h> |
16 | | |
17 | | #ifdef KERNEL |
18 | | # error "Including AK/Math.h from the Kernel is never correct! Floating point is disabled." |
19 | | #endif |
20 | | |
21 | | namespace AK { |
22 | | |
23 | | template<FloatingPoint T> |
24 | | constexpr T NaN = __builtin_nan(""); |
25 | | template<FloatingPoint T> |
26 | | constexpr T Infinity = __builtin_huge_vall(); |
27 | | template<FloatingPoint T> |
28 | | constexpr T Pi = 3.141592653589793238462643383279502884L; |
29 | | template<FloatingPoint T> |
30 | | constexpr T E = 2.718281828459045235360287471352662498L; |
31 | | template<FloatingPoint T> |
32 | | constexpr T Sqrt2 = 1.414213562373095048801688724209698079L; |
33 | | template<FloatingPoint T> |
34 | | constexpr T Sqrt1_2 = 0.707106781186547524400844362104849039L; |
35 | | |
36 | | template<FloatingPoint T> |
37 | | constexpr T L2_10 = 3.321928094887362347870319429489390175864L; |
38 | | template<FloatingPoint T> |
39 | | constexpr T L2_E = 1.442695040888963407359924681001892137L; |
40 | | |
41 | | namespace Details { |
42 | | template<size_t> |
43 | | constexpr size_t product_even(); |
44 | | template<> |
45 | 0 | constexpr size_t product_even<2>() { return 2; } |
46 | | template<size_t value> |
47 | 0 | constexpr size_t product_even() { return value * product_even<value - 2>(); }Unexecuted instantiation: unsigned long AK::Details::product_even<4ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<6ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<8ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<10ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<12ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<14ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<16ul>() |
48 | | |
49 | | template<size_t> |
50 | | constexpr size_t product_odd(); |
51 | | template<> |
52 | 0 | constexpr size_t product_odd<1>() { return 1; } |
53 | | template<size_t value> |
54 | 0 | constexpr size_t product_odd() { return value * product_odd<value - 2>(); }Unexecuted instantiation: unsigned long AK::Details::product_odd<3ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<5ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<7ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<9ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<11ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<13ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<15ul>() |
55 | | } |
56 | | |
57 | | template<FloatingPoint T> |
58 | | constexpr T to_radians(T degrees) |
59 | 0 | { |
60 | 0 | return degrees * AK::Pi<T> / 180; |
61 | 0 | } Unexecuted instantiation: _ZN2AK10to_radiansITkNS_8Concepts13FloatingPointEfEET_S2_ Unexecuted instantiation: _ZN2AK10to_radiansITkNS_8Concepts13FloatingPointEdEET_S2_ |
62 | | |
63 | | template<FloatingPoint T> |
64 | | constexpr T to_degrees(T radians) |
65 | 0 | { |
66 | 0 | return radians * 180 / AK::Pi<T>; |
67 | 0 | } Unexecuted instantiation: _ZN2AK10to_degreesITkNS_8Concepts13FloatingPointEfEET_S2_ Unexecuted instantiation: _ZN2AK10to_degreesITkNS_8Concepts13FloatingPointEdEET_S2_ |
68 | | |
69 | | template<FloatingPoint FloatT> |
70 | | FloatT copysign(FloatT x, FloatT y) |
71 | | { |
72 | | using Extractor = FloatExtractor<FloatT>; |
73 | | auto ex = Extractor::from_float(x); |
74 | | auto ey = Extractor::from_float(y); |
75 | | ex.sign = ey.sign; |
76 | | return ex.to_float(); |
77 | | } |
78 | | |
79 | | #define CONSTEXPR_STATE(function, args...) \ |
80 | 131M | if (is_constant_evaluated()) { \ |
81 | 0 | if (IsSame<T, long double>) \ |
82 | 0 | return __builtin_##function##l(args); \ |
83 | 0 | if (IsSame<T, double>) \ |
84 | 0 | return __builtin_##function(args); \ |
85 | 0 | if (IsSame<T, float>) \ |
86 | 0 | return __builtin_##function##f(args); \ |
87 | 0 | } |
88 | | |
89 | | #define AARCH64_INSTRUCTION(instruction, arg) \ |
90 | | if constexpr (IsSame<T, long double>) \ |
91 | | TODO(); \ |
92 | | if constexpr (IsSame<T, double>) { \ |
93 | | double res; \ |
94 | | asm(#instruction " %d0, %d1" \ |
95 | | : "=w"(res) \ |
96 | | : "w"(arg)); \ |
97 | | return res; \ |
98 | | } \ |
99 | | if constexpr (IsSame<T, float>) { \ |
100 | | float res; \ |
101 | | asm(#instruction " %s0, %s1" \ |
102 | | : "=w"(res) \ |
103 | | : "w"(arg)); \ |
104 | | return res; \ |
105 | | } |
106 | | |
107 | | template<FloatingPoint T> |
108 | | constexpr T fabs(T x) |
109 | 147k | { |
110 | | // Both GCC and Clang inline fabs by default, so this is just a cmath like wrapper |
111 | | if constexpr (IsSame<T, long double>) |
112 | | return __builtin_fabsl(x); |
113 | | if constexpr (IsSame<T, double>) |
114 | | return __builtin_fabs(x); |
115 | | if constexpr (IsSame<T, float>) |
116 | 147k | return __builtin_fabsf(x); |
117 | 147k | } _ZN2AK4fabsITkNS_8Concepts13FloatingPointEfEET_S2_ Line | Count | Source | 109 | 147k | { | 110 | | // Both GCC and Clang inline fabs by default, so this is just a cmath like wrapper | 111 | | if constexpr (IsSame<T, long double>) | 112 | | return __builtin_fabsl(x); | 113 | | if constexpr (IsSame<T, double>) | 114 | | return __builtin_fabs(x); | 115 | | if constexpr (IsSame<T, float>) | 116 | 147k | return __builtin_fabsf(x); | 117 | 147k | } |
Unexecuted instantiation: _ZN2AK4fabsITkNS_8Concepts13FloatingPointEdEET_S2_ |
118 | | |
119 | | namespace Rounding { |
120 | | template<FloatingPoint T> |
121 | | constexpr T ceil(T num) |
122 | 3.84k | { |
123 | | // FIXME: SSE4.1 rounds[sd] num, res, 0b110 |
124 | 3.84k | if (is_constant_evaluated()) { |
125 | 0 | if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max()) |
126 | 0 | return num; |
127 | 0 | return (static_cast<T>(static_cast<i64>(num)) == num) |
128 | 0 | ? static_cast<i64>(num) |
129 | 0 | : static_cast<i64>(num) + ((num > 0) ? 1 : 0); |
130 | 0 | } |
131 | | #if ARCH(AARCH64) |
132 | | AARCH64_INSTRUCTION(frintp, num); |
133 | | #else |
134 | | if constexpr (IsSame<T, long double>) |
135 | | return __builtin_ceill(num); |
136 | | if constexpr (IsSame<T, double>) |
137 | | return __builtin_ceil(num); |
138 | | if constexpr (IsSame<T, float>) |
139 | 3.84k | return __builtin_ceilf(num); |
140 | 3.84k | #endif |
141 | 3.84k | } _ZN2AK8Rounding4ceilITkNS_8Concepts13FloatingPointEfEET_S3_ Line | Count | Source | 122 | 3.84k | { | 123 | | // FIXME: SSE4.1 rounds[sd] num, res, 0b110 | 124 | 3.84k | if (is_constant_evaluated()) { | 125 | 0 | if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max()) | 126 | 0 | return num; | 127 | 0 | return (static_cast<T>(static_cast<i64>(num)) == num) | 128 | 0 | ? static_cast<i64>(num) | 129 | 0 | : static_cast<i64>(num) + ((num > 0) ? 1 : 0); | 130 | 0 | } | 131 | | #if ARCH(AARCH64) | 132 | | AARCH64_INSTRUCTION(frintp, num); | 133 | | #else | 134 | | if constexpr (IsSame<T, long double>) | 135 | | return __builtin_ceill(num); | 136 | | if constexpr (IsSame<T, double>) | 137 | | return __builtin_ceil(num); | 138 | | if constexpr (IsSame<T, float>) | 139 | 3.84k | return __builtin_ceilf(num); | 140 | 3.84k | #endif | 141 | 3.84k | } |
Unexecuted instantiation: _ZN2AK8Rounding4ceilITkNS_8Concepts13FloatingPointEdEET_S3_ |
142 | | |
143 | | template<FloatingPoint T> |
144 | | constexpr T floor(T num) |
145 | 0 | { |
146 | | // FIXME: SSE4.1 rounds[sd] num, res, 0b101 |
147 | 0 | if (is_constant_evaluated()) { |
148 | 0 | if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max()) |
149 | 0 | return num; |
150 | 0 | return (static_cast<T>(static_cast<i64>(num)) == num) |
151 | 0 | ? static_cast<i64>(num) |
152 | 0 | : static_cast<i64>(num) - ((num > 0) ? 0 : 1); |
153 | 0 | } |
154 | | #if ARCH(AARCH64) |
155 | | AARCH64_INSTRUCTION(frintm, num); |
156 | | #else |
157 | | if constexpr (IsSame<T, long double>) |
158 | | return __builtin_floorl(num); |
159 | | if constexpr (IsSame<T, double>) |
160 | 0 | return __builtin_floor(num); |
161 | | if constexpr (IsSame<T, float>) |
162 | | return __builtin_floorf(num); |
163 | 0 | #endif |
164 | 0 | } |
165 | | |
166 | | template<FloatingPoint T> |
167 | | constexpr T trunc(T num) |
168 | | { |
169 | | #if ARCH(AARCH64) |
170 | | if (is_constant_evaluated()) { |
171 | | if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max()) |
172 | | return num; |
173 | | return static_cast<T>(static_cast<i64>(num)); |
174 | | } |
175 | | AARCH64_INSTRUCTION(frintz, num); |
176 | | #endif |
177 | | // FIXME: Use dedicated instruction in the non constexpr case |
178 | | // SSE4.1: rounds[sd] %num, %res, 0b111 |
179 | | if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max()) |
180 | | return num; |
181 | | return static_cast<T>(static_cast<i64>(num)); |
182 | | } |
183 | | |
184 | | template<FloatingPoint T> |
185 | | constexpr T rint(T x) |
186 | | { |
187 | | CONSTEXPR_STATE(rint, x); |
188 | | // Note: This does break tie to even |
189 | | // But the behavior of frndint/rounds[ds]/frintx can be configured |
190 | | // through the floating point control registers. |
191 | | // FIXME: We should decide if we rename this to allow us to get away from |
192 | | // the configurability "burden" rint has |
193 | | // this would make us use `rounds[sd] %num, %res, 0b100` |
194 | | // and `frintn` respectively, |
195 | | // no such guaranteed round exists for x87 `frndint` |
196 | | #if ARCH(X86_64) |
197 | | # ifdef __SSE4_1__ |
198 | | if constexpr (IsSame<T, double>) { |
199 | | T r; |
200 | | asm( |
201 | | "roundsd %1, %0" |
202 | | : "=x"(r) |
203 | | : "x"(x)); |
204 | | return r; |
205 | | } |
206 | | if constexpr (IsSame<T, float>) { |
207 | | T r; |
208 | | asm( |
209 | | "roundss %1, %0" |
210 | | : "=x"(r) |
211 | | : "x"(x)); |
212 | | return r; |
213 | | } |
214 | | # else |
215 | | asm( |
216 | | "frndint" |
217 | | : "+t"(x)); |
218 | | return x; |
219 | | # endif |
220 | | #elif ARCH(AARCH64) |
221 | | AARCH64_INSTRUCTION(frintx, x); |
222 | | #elif ARCH(RISCV64) |
223 | | if (__builtin_isnan(x)) |
224 | | return x; |
225 | | |
226 | | // Floating point values have a gap size of >= 1 for values above 2^mantissa_bits - 1. |
227 | | if (fabs(x) > FloatExtractor<T>::mantissa_max) |
228 | | return x; |
229 | | |
230 | | if constexpr (IsSame<T, float>) { |
231 | | i64 r; |
232 | | asm("fcvt.l.s %0, %1, dyn" |
233 | | : "=r"(r) |
234 | | : "f"(x)); |
235 | | return copysign(static_cast<float>(r), x); |
236 | | } |
237 | | if constexpr (IsSame<T, double>) { |
238 | | i64 r; |
239 | | asm("fcvt.l.d %0, %1, dyn" |
240 | | : "=r"(r) |
241 | | : "f"(x)); |
242 | | return copysign(static_cast<double>(r), x); |
243 | | } |
244 | | if constexpr (IsSame<T, long double>) |
245 | | TODO_RISCV64(); |
246 | | #endif |
247 | | TODO(); |
248 | | } |
249 | | |
250 | | template<FloatingPoint T> |
251 | | constexpr T round(T x) |
252 | 0 | { |
253 | 0 | CONSTEXPR_STATE(round, x); |
254 | 0 | // Note: This is break-tie-away-from-zero, so not the hw's understanding of |
255 | 0 | // "nearest", which would be towards even. |
256 | 0 | if (x == 0.) |
257 | 0 | return x; |
258 | 0 | if (x > 0.) |
259 | 0 | return floor(x + .5); |
260 | 0 | return ceil(x - .5); |
261 | 0 | } |
262 | | |
263 | | template<Integral I, FloatingPoint P> |
264 | | ALWAYS_INLINE I round_to(P value); |
265 | | |
266 | | #if ARCH(X86_64) |
267 | | template<Integral I> |
268 | | ALWAYS_INLINE I round_to(long double value) |
269 | | { |
270 | | // Note: fistps outputs into a signed integer location (i16, i32, i64), |
271 | | // so lets be nice and tell the compiler that. |
272 | | Conditional<sizeof(I) >= sizeof(i16), MakeSigned<I>, i16> ret; |
273 | | if constexpr (sizeof(I) == sizeof(i64)) { |
274 | | asm("fistpll %0" |
275 | | : "=m"(ret) |
276 | | : "t"(value) |
277 | | : "st"); |
278 | | } else if constexpr (sizeof(I) == sizeof(i32)) { |
279 | | asm("fistpl %0" |
280 | | : "=m"(ret) |
281 | | : "t"(value) |
282 | | : "st"); |
283 | | } else { |
284 | | asm("fistps %0" |
285 | | : "=m"(ret) |
286 | | : "t"(value) |
287 | | : "st"); |
288 | | } |
289 | | return static_cast<I>(ret); |
290 | | } |
291 | | |
292 | | template<Integral I> |
293 | | ALWAYS_INLINE I round_to(float value) |
294 | 286M | { |
295 | | // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set, |
296 | | // if the value surpasses the i64 limit, even if the result could fit into an u64 |
297 | | // To solve this we would either need to detect that value or do a range check and |
298 | | // then do a more specialized conversion, which might include a division (which is expensive) |
299 | | if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) { |
300 | | i64 ret; |
301 | | asm("cvtss2si %1, %0" |
302 | | : "=r"(ret) |
303 | | : "xm"(value)); |
304 | | return static_cast<I>(ret); |
305 | | } |
306 | 286M | i32 ret; |
307 | 286M | asm("cvtss2si %1, %0" |
308 | 286M | : "=r"(ret) |
309 | 286M | : "xm"(value)); |
310 | 286M | return static_cast<I>(ret); |
311 | 286M | } _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEhEET_f Line | Count | Source | 294 | 286M | { | 295 | | // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set, | 296 | | // if the value surpasses the i64 limit, even if the result could fit into an u64 | 297 | | // To solve this we would either need to detect that value or do a range check and | 298 | | // then do a more specialized conversion, which might include a division (which is expensive) | 299 | | if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) { | 300 | | i64 ret; | 301 | | asm("cvtss2si %1, %0" | 302 | | : "=r"(ret) | 303 | | : "xm"(value)); | 304 | | return static_cast<I>(ret); | 305 | | } | 306 | 286M | i32 ret; | 307 | 286M | asm("cvtss2si %1, %0" | 308 | 286M | : "=r"(ret) | 309 | 286M | : "xm"(value)); | 310 | 286M | return static_cast<I>(ret); | 311 | 286M | } |
_ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEiEET_f Line | Count | Source | 294 | 19.4k | { | 295 | | // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set, | 296 | | // if the value surpasses the i64 limit, even if the result could fit into an u64 | 297 | | // To solve this we would either need to detect that value or do a range check and | 298 | | // then do a more specialized conversion, which might include a division (which is expensive) | 299 | | if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) { | 300 | | i64 ret; | 301 | | asm("cvtss2si %1, %0" | 302 | | : "=r"(ret) | 303 | | : "xm"(value)); | 304 | | return static_cast<I>(ret); | 305 | | } | 306 | 19.4k | i32 ret; | 307 | 19.4k | asm("cvtss2si %1, %0" | 308 | 19.4k | : "=r"(ret) | 309 | 19.4k | : "xm"(value)); | 310 | 19.4k | return static_cast<I>(ret); | 311 | 19.4k | } |
Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEsEET_f |
312 | | |
313 | | template<Integral I> |
314 | | ALWAYS_INLINE I round_to(double value) |
315 | 0 | { |
316 | | // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set, |
317 | | // if the value surpasses the i64 limit, even if the result could fit into an u64 |
318 | | // To solve this we would either need to detect that value or do a range check and |
319 | | // then do a more specialized conversion, which might include a division (which is expensive) |
320 | 0 | if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) { |
321 | 0 | i64 ret; |
322 | 0 | asm("cvtsd2si %1, %0" |
323 | 0 | : "=r"(ret) |
324 | 0 | : "xm"(value)); |
325 | 0 | return static_cast<I>(ret); |
326 | 0 | } |
327 | 0 | i32 ret; |
328 | 0 | asm("cvtsd2si %1, %0" |
329 | 0 | : "=r"(ret) |
330 | 0 | : "xm"(value)); |
331 | 0 | return static_cast<I>(ret); |
332 | 0 | } Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEiEET_d Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralElEET_d Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEhEET_d Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEmEET_d |
333 | | |
334 | | #elif ARCH(AARCH64) |
335 | | template<SignedIntegral I> |
336 | | ALWAYS_INLINE I round_to(float value) |
337 | | { |
338 | | if constexpr (sizeof(I) <= sizeof(u32)) { |
339 | | i32 res; |
340 | | asm("fcvtns %w0, %s1" |
341 | | : "=r"(res) |
342 | | : "w"(value)); |
343 | | return static_cast<I>(res); |
344 | | } |
345 | | i64 res; |
346 | | asm("fcvtns %0, %s1" |
347 | | : "=r"(res) |
348 | | : "w"(value)); |
349 | | return static_cast<I>(res); |
350 | | } |
351 | | |
352 | | template<SignedIntegral I> |
353 | | ALWAYS_INLINE I round_to(double value) |
354 | | { |
355 | | if constexpr (sizeof(I) <= sizeof(u32)) { |
356 | | i32 res; |
357 | | asm("fcvtns %w0, %d1" |
358 | | : "=r"(res) |
359 | | : "w"(value)); |
360 | | return static_cast<I>(res); |
361 | | } |
362 | | i64 res; |
363 | | asm("fcvtns %0, %d1" |
364 | | : "=r"(res) |
365 | | : "w"(value)); |
366 | | return static_cast<I>(res); |
367 | | } |
368 | | |
369 | | template<UnsignedIntegral U> |
370 | | ALWAYS_INLINE U round_to(float value) |
371 | | { |
372 | | if constexpr (sizeof(U) <= sizeof(u32)) { |
373 | | u32 res; |
374 | | asm("fcvtnu %w0, %s1" |
375 | | : "=r"(res) |
376 | | : "w"(value)); |
377 | | return static_cast<U>(res); |
378 | | } |
379 | | i64 res; |
380 | | asm("fcvtnu %0, %s1" |
381 | | : "=r"(res) |
382 | | : "w"(value)); |
383 | | return static_cast<U>(res); |
384 | | } |
385 | | |
386 | | template<UnsignedIntegral U> |
387 | | ALWAYS_INLINE U round_to(double value) |
388 | | { |
389 | | if constexpr (sizeof(U) <= sizeof(u32)) { |
390 | | u32 res; |
391 | | asm("fcvtns %w0, %d1" |
392 | | : "=r"(res) |
393 | | : "w"(value)); |
394 | | return static_cast<U>(res); |
395 | | } |
396 | | i64 res; |
397 | | asm("fcvtns %0, %d1" |
398 | | : "=r"(res) |
399 | | : "w"(value)); |
400 | | return static_cast<U>(res); |
401 | | } |
402 | | |
403 | | #else |
404 | | template<Integral I, FloatingPoint P> |
405 | | ALWAYS_INLINE I round_to(P value) |
406 | | { |
407 | | if constexpr (IsSame<P, long double>) |
408 | | return static_cast<I>(__builtin_llrintl(value)); |
409 | | if constexpr (IsSame<P, double>) |
410 | | return static_cast<I>(__builtin_llrint(value)); |
411 | | if constexpr (IsSame<P, float>) |
412 | | return static_cast<I>(__builtin_llrintf(value)); |
413 | | } |
414 | | #endif |
415 | | |
416 | | } |
417 | | |
418 | | using Rounding::ceil; |
419 | | using Rounding::floor; |
420 | | using Rounding::rint; |
421 | | using Rounding::round; |
422 | | using Rounding::round_to; |
423 | | using Rounding::trunc; |
424 | | |
425 | | namespace Division { |
426 | | template<FloatingPoint T> |
427 | | constexpr T fmod(T x, T y) |
428 | | { |
429 | | CONSTEXPR_STATE(fmod, x, y); |
430 | | |
431 | | #if ARCH(X86_64) |
432 | | u16 fpu_status; |
433 | | do { |
434 | | asm( |
435 | | "fprem\n" |
436 | | "fnstsw %%ax\n" |
437 | | : "+t"(x), "=a"(fpu_status) |
438 | | : "u"(y)); |
439 | | } while (fpu_status & 0x400); |
440 | | return x; |
441 | | #else |
442 | | # if defined(AK_OS_SERENITY) |
443 | | // FIXME: This is a very naive implementation. |
444 | | |
445 | | if (__builtin_isnan(x)) |
446 | | return x; |
447 | | if (__builtin_isnan(y)) |
448 | | return y; |
449 | | |
450 | | // SPECIAL VALUES |
451 | | // fmod(±0, y) returns ±0 if y is neither 0 nor NaN. |
452 | | if (x == 0 && y != 0) |
453 | | return x; |
454 | | |
455 | | // fmod(x, y) returns a NaN and raises the "invalid" floating-point exception for x infinite or y zero. |
456 | | // FIXME: Exception. |
457 | | if (__builtin_isinf(x) || y == 0) |
458 | | return NaN<T>; |
459 | | |
460 | | // fmod(x, ±infinity) returns x for x not infinite. |
461 | | if (__builtin_isinf(y)) |
462 | | return x; |
463 | | |
464 | | // Range reduction: handle negative x and y. |
465 | | if (y < 0) |
466 | | return fmod(x, -y); |
467 | | if (x < 0) |
468 | | return -fmod(-x, y); |
469 | | |
470 | | // x is x_mantissa * 2**x_exponent, y is y_mantissa * 2**y_exponent. |
471 | | // (Both are positive at this point.) |
472 | | // If y_exponent > x_exponent, we are done and can return x. |
473 | | // If y_exponent == x_exponent, we can return (x_mantissa % y_mantissa) * 2**x_exponent. |
474 | | // If y_exponent < x_exponent, we'll iteratively reduce x_exponent by shifting from |
475 | | // the exponent into the mantissa. |
476 | | |
477 | | auto x_bits = FloatExtractor<T>::from_float(x); |
478 | | typename FloatExtractor<T>::ComponentType x_exponent = x_bits.exponent; |
479 | | |
480 | | auto y_bits = FloatExtractor<T>::from_float(y); |
481 | | typename FloatExtractor<T>::ComponentType y_exponent = y_bits.exponent; |
482 | | |
483 | | // FIXME: Handle denormals. For now, treat them as 0. |
484 | | if (x_exponent == 0 && y_exponent != 0) |
485 | | return 0; |
486 | | if (y_exponent == 0) |
487 | | return NaN<T>; |
488 | | |
489 | | if (y_exponent > x_exponent) |
490 | | return x; |
491 | | |
492 | | // FIXME: This is wrong for f80. |
493 | | typename FloatExtractor<T>::ComponentType implicit_mantissa_bit = 1; |
494 | | implicit_mantissa_bit <<= FloatExtractor<T>::mantissa_bits; |
495 | | |
496 | | typename FloatExtractor<T>::ComponentType x_mantissa = x_bits.mantissa | implicit_mantissa_bit; |
497 | | typename FloatExtractor<T>::ComponentType y_mantissa = y_bits.mantissa | implicit_mantissa_bit; |
498 | | |
499 | | while (y_exponent < x_exponent) { |
500 | | // This is ok because (x % (y * 2**n)) divides (x % y) for all n > 0. |
501 | | x_mantissa %= y_mantissa; |
502 | | |
503 | | x_mantissa <<= 1; |
504 | | --x_exponent; |
505 | | } |
506 | | |
507 | | x_mantissa %= y_mantissa; |
508 | | |
509 | | if (x_mantissa == 0) |
510 | | return 0; |
511 | | |
512 | | // We're done and want to return x_mantissa * 2 ** x_exponent. |
513 | | // But x_mantissa might not have a leading 1 bit, so we have to realign first. |
514 | | // Mantissa is mantissa_bits long, count_leading_zeroes() counts in ComponentType, adjust: |
515 | | auto const always_zero_bits = sizeof(typename FloatExtractor<T>::ComponentType) * 8 - (FloatExtractor<T>::mantissa_bits + 1); // +1 for implicit 1 bit |
516 | | auto shift = count_leading_zeroes(x_mantissa) - always_zero_bits; |
517 | | |
518 | | if (x_exponent < shift) { |
519 | | // FIXME: Make a real denormal. |
520 | | return 0; |
521 | | } |
522 | | |
523 | | x_mantissa <<= shift; |
524 | | x_exponent -= shift; |
525 | | |
526 | | x_bits.exponent = x_exponent; |
527 | | x_bits.mantissa = x_mantissa; |
528 | | return x_bits.to_float(); |
529 | | # else |
530 | | if constexpr (IsSame<T, long double>) |
531 | | return __builtin_fmodl(x, y); |
532 | | if constexpr (IsSame<T, double>) |
533 | | return __builtin_fmod(x, y); |
534 | | if constexpr (IsSame<T, float>) |
535 | | return __builtin_fmodf(x, y); |
536 | | # endif |
537 | | #endif |
538 | | } |
539 | | |
540 | | template<FloatingPoint T> |
541 | | constexpr T remainder(T x, T y) |
542 | | { |
543 | | CONSTEXPR_STATE(remainder, x, y); |
544 | | |
545 | | #if ARCH(X86_64) |
546 | | u16 fpu_status; |
547 | | do { |
548 | | asm( |
549 | | "fprem1\n" |
550 | | "fnstsw %%ax\n" |
551 | | : "+t"(x), "=a"(fpu_status) |
552 | | : "u"(y)); |
553 | | } while (fpu_status & 0x400); |
554 | | return x; |
555 | | #else |
556 | | # if defined(AK_OS_SERENITY) |
557 | | // TODO: Add implementation for this function. |
558 | | TODO(); |
559 | | # endif |
560 | | if constexpr (IsSame<T, long double>) |
561 | | return __builtin_remainderl(x, y); |
562 | | if constexpr (IsSame<T, double>) |
563 | | return __builtin_remainder(x, y); |
564 | | if constexpr (IsSame<T, float>) |
565 | | return __builtin_remainderf(x, y); |
566 | | #endif |
567 | | } |
568 | | } |
569 | | |
570 | | using Division::fmod; |
571 | | using Division::remainder; |
572 | | |
573 | | template<FloatingPoint T> |
574 | | constexpr T sqrt(T x) |
575 | 65.7M | { |
576 | 65.7M | CONSTEXPR_STATE(sqrt, x); |
577 | | |
578 | 65.7M | #if ARCH(X86_64) |
579 | 65.7M | if constexpr (IsSame<T, float>) { |
580 | 63.2M | float res; |
581 | 63.2M | asm("sqrtss %1, %0" |
582 | 63.2M | : "=x"(res) |
583 | 63.2M | : "x"(x)); |
584 | 63.2M | return res; |
585 | 63.2M | } |
586 | 2.59M | if constexpr (IsSame<T, double>) { |
587 | 2.59M | double res; |
588 | 2.59M | asm("sqrtsd %1, %0" |
589 | 2.59M | : "=x"(res) |
590 | 2.59M | : "x"(x)); |
591 | 2.59M | return res; |
592 | 2.59M | } |
593 | 0 | T res; |
594 | 65.7M | asm("fsqrt" |
595 | 65.7M | : "=t"(res) |
596 | 65.7M | : "0"(x)); |
597 | 65.7M | return res; |
598 | | #elif ARCH(AARCH64) |
599 | | AARCH64_INSTRUCTION(fsqrt, x); |
600 | | #elif ARCH(RISCV64) |
601 | | if constexpr (IsSame<T, float>) { |
602 | | float res; |
603 | | asm("fsqrt.s %0, %1" |
604 | | : "=f"(res) |
605 | | : "f"(x)); |
606 | | return res; |
607 | | } |
608 | | if constexpr (IsSame<T, double>) { |
609 | | double res; |
610 | | asm("fsqrt.d %0, %1" |
611 | | : "=f"(res) |
612 | | : "f"(x)); |
613 | | return res; |
614 | | } |
615 | | if constexpr (IsSame<T, long double>) |
616 | | TODO_RISCV64(); |
617 | | #else |
618 | | # if defined(AK_OS_SERENITY) |
619 | | // TODO: Add implementation for this function. |
620 | | TODO(); |
621 | | # endif |
622 | | return __builtin_sqrt(x); |
623 | | #endif |
624 | 65.7M | } _ZN2AK4sqrtITkNS_8Concepts13FloatingPointEfEET_S2_ Line | Count | Source | 575 | 63.2M | { | 576 | 63.2M | CONSTEXPR_STATE(sqrt, x); | 577 | | | 578 | 63.2M | #if ARCH(X86_64) | 579 | 63.2M | if constexpr (IsSame<T, float>) { | 580 | 63.2M | float res; | 581 | 63.2M | asm("sqrtss %1, %0" | 582 | 63.2M | : "=x"(res) | 583 | 63.2M | : "x"(x)); | 584 | 63.2M | return res; | 585 | 63.2M | } | 586 | | if constexpr (IsSame<T, double>) { | 587 | | double res; | 588 | | asm("sqrtsd %1, %0" | 589 | | : "=x"(res) | 590 | | : "x"(x)); | 591 | | return res; | 592 | | } | 593 | 63.2M | T res; | 594 | 63.2M | asm("fsqrt" | 595 | 63.2M | : "=t"(res) | 596 | 63.2M | : "0"(x)); | 597 | 63.2M | return res; | 598 | | #elif ARCH(AARCH64) | 599 | | AARCH64_INSTRUCTION(fsqrt, x); | 600 | | #elif ARCH(RISCV64) | 601 | | if constexpr (IsSame<T, float>) { | 602 | | float res; | 603 | | asm("fsqrt.s %0, %1" | 604 | | : "=f"(res) | 605 | | : "f"(x)); | 606 | | return res; | 607 | | } | 608 | | if constexpr (IsSame<T, double>) { | 609 | | double res; | 610 | | asm("fsqrt.d %0, %1" | 611 | | : "=f"(res) | 612 | | : "f"(x)); | 613 | | return res; | 614 | | } | 615 | | if constexpr (IsSame<T, long double>) | 616 | | TODO_RISCV64(); | 617 | | #else | 618 | | # if defined(AK_OS_SERENITY) | 619 | | // TODO: Add implementation for this function. | 620 | | TODO(); | 621 | | # endif | 622 | | return __builtin_sqrt(x); | 623 | | #endif | 624 | 63.2M | } |
_ZN2AK4sqrtITkNS_8Concepts13FloatingPointEdEET_S2_ Line | Count | Source | 575 | 2.59M | { | 576 | 2.59M | CONSTEXPR_STATE(sqrt, x); | 577 | | | 578 | 2.59M | #if ARCH(X86_64) | 579 | | if constexpr (IsSame<T, float>) { | 580 | | float res; | 581 | | asm("sqrtss %1, %0" | 582 | | : "=x"(res) | 583 | | : "x"(x)); | 584 | | return res; | 585 | | } | 586 | 2.59M | if constexpr (IsSame<T, double>) { | 587 | 2.59M | double res; | 588 | 2.59M | asm("sqrtsd %1, %0" | 589 | 2.59M | : "=x"(res) | 590 | 2.59M | : "x"(x)); | 591 | 2.59M | return res; | 592 | 2.59M | } | 593 | 0 | T res; | 594 | 2.59M | asm("fsqrt" | 595 | 2.59M | : "=t"(res) | 596 | 2.59M | : "0"(x)); | 597 | 2.59M | return res; | 598 | | #elif ARCH(AARCH64) | 599 | | AARCH64_INSTRUCTION(fsqrt, x); | 600 | | #elif ARCH(RISCV64) | 601 | | if constexpr (IsSame<T, float>) { | 602 | | float res; | 603 | | asm("fsqrt.s %0, %1" | 604 | | : "=f"(res) | 605 | | : "f"(x)); | 606 | | return res; | 607 | | } | 608 | | if constexpr (IsSame<T, double>) { | 609 | | double res; | 610 | | asm("fsqrt.d %0, %1" | 611 | | : "=f"(res) | 612 | | : "f"(x)); | 613 | | return res; | 614 | | } | 615 | | if constexpr (IsSame<T, long double>) | 616 | | TODO_RISCV64(); | 617 | | #else | 618 | | # if defined(AK_OS_SERENITY) | 619 | | // TODO: Add implementation for this function. | 620 | | TODO(); | 621 | | # endif | 622 | | return __builtin_sqrt(x); | 623 | | #endif | 624 | 2.59M | } |
|
625 | | |
626 | | template<FloatingPoint T> |
627 | | constexpr T cbrt(T x) |
628 | | { |
629 | | CONSTEXPR_STATE(cbrt, x); |
630 | | if (__builtin_isinf(x) || x == 0) |
631 | | return x; |
632 | | if (x < 0) |
633 | | return -cbrt(-x); |
634 | | |
635 | | T r = x; |
636 | | T ex = 0; |
637 | | |
638 | | while (r < 0.125l) { |
639 | | r *= 8; |
640 | | ex--; |
641 | | } |
642 | | while (r > 1.0l) { |
643 | | r *= 0.125l; |
644 | | ex++; |
645 | | } |
646 | | |
647 | | r = (-0.46946116l * r + 1.072302l) * r + 0.3812513l; |
648 | | |
649 | | while (ex < 0) { |
650 | | r *= 0.5l; |
651 | | ex++; |
652 | | } |
653 | | while (ex > 0) { |
654 | | r *= 2.0l; |
655 | | ex--; |
656 | | } |
657 | | |
658 | | r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r); |
659 | | r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r); |
660 | | r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r); |
661 | | r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r); |
662 | | |
663 | | return r; |
664 | | } |
665 | | |
666 | | namespace Trigonometry { |
667 | | |
668 | | template<FloatingPoint T> |
669 | | constexpr T hypot(T x, T y) |
670 | 45.4M | { |
671 | 45.4M | return sqrt(x * x + y * y); |
672 | 45.4M | } |
673 | | |
674 | | template<FloatingPoint T> |
675 | | constexpr T sin(T angle) |
676 | 17.7M | { |
677 | 17.7M | CONSTEXPR_STATE(sin, angle); |
678 | | |
679 | 17.7M | #if ARCH(X86_64) |
680 | 17.7M | T ret; |
681 | 17.7M | asm( |
682 | 17.7M | "fsin" |
683 | 17.7M | : "=t"(ret) |
684 | 17.7M | : "0"(angle)); |
685 | 17.7M | return ret; |
686 | | #else |
687 | | # if defined(AK_OS_SERENITY) |
688 | | if (angle < 0) |
689 | | return -sin(-angle); |
690 | | |
691 | | angle = fmod(angle, 2 * Pi<T>); |
692 | | |
693 | | if (angle >= Pi<T>) |
694 | | return -sin(angle - Pi<T>); |
695 | | |
696 | | if (angle > Pi<T> / 2) |
697 | | return sin(Pi<T> - angle); |
698 | | |
699 | | // https://en.wikipedia.org/wiki/Bh%C4%81skara_I%27s_sine_approximation_formula |
700 | | // FIXME: This is not a good formula! It requires divisions, so it's slow, and it's not very accurate either. |
701 | | return 16 * angle * (Pi<T> - angle) / (5 * Pi<T> * Pi<T> - 4 * angle * (Pi<T> - angle)); |
702 | | # else |
703 | | return __builtin_sin(angle); |
704 | | # endif |
705 | | #endif |
706 | 17.7M | } _ZN2AK12Trigonometry3sinITkNS_8Concepts13FloatingPointEfEET_S3_ Line | Count | Source | 676 | 17.7M | { | 677 | 17.7M | CONSTEXPR_STATE(sin, angle); | 678 | | | 679 | 17.7M | #if ARCH(X86_64) | 680 | 17.7M | T ret; | 681 | 17.7M | asm( | 682 | 17.7M | "fsin" | 683 | 17.7M | : "=t"(ret) | 684 | 17.7M | : "0"(angle)); | 685 | 17.7M | return ret; | 686 | | #else | 687 | | # if defined(AK_OS_SERENITY) | 688 | | if (angle < 0) | 689 | | return -sin(-angle); | 690 | | | 691 | | angle = fmod(angle, 2 * Pi<T>); | 692 | | | 693 | | if (angle >= Pi<T>) | 694 | | return -sin(angle - Pi<T>); | 695 | | | 696 | | if (angle > Pi<T> / 2) | 697 | | return sin(Pi<T> - angle); | 698 | | | 699 | | // https://en.wikipedia.org/wiki/Bh%C4%81skara_I%27s_sine_approximation_formula | 700 | | // FIXME: This is not a good formula! It requires divisions, so it's slow, and it's not very accurate either. | 701 | | return 16 * angle * (Pi<T> - angle) / (5 * Pi<T> * Pi<T> - 4 * angle * (Pi<T> - angle)); | 702 | | # else | 703 | | return __builtin_sin(angle); | 704 | | # endif | 705 | | #endif | 706 | 17.7M | } |
Unexecuted instantiation: _ZN2AK12Trigonometry3sinITkNS_8Concepts13FloatingPointEdEET_S3_ |
707 | | |
708 | | template<FloatingPoint T> |
709 | | constexpr T cos(T angle) |
710 | 5.77k | { |
711 | 5.77k | CONSTEXPR_STATE(cos, angle); |
712 | | |
713 | 5.77k | #if ARCH(X86_64) |
714 | 5.77k | T ret; |
715 | 5.77k | asm( |
716 | 5.77k | "fcos" |
717 | 5.77k | : "=t"(ret) |
718 | 5.77k | : "0"(angle)); |
719 | 5.77k | return ret; |
720 | | #else |
721 | | # if defined(AK_OS_SERENITY) |
722 | | if (angle < 0) |
723 | | return cos(-angle); |
724 | | |
725 | | angle = fmod(angle, 2 * Pi<T>); |
726 | | |
727 | | if (angle >= Pi<T>) |
728 | | return -cos(angle - Pi<T>); |
729 | | |
730 | | if (angle > Pi<T> / 2) |
731 | | return -cos(Pi<T> - angle); |
732 | | |
733 | | // https://en.wikipedia.org/wiki/Bh%C4%81skara_I%27s_sine_approximation_formula |
734 | | // FIXME: This is not a good formula! It requires divisions, so it's slow, and it's not very accurate either. |
735 | | return (Pi<T> * Pi<T> - 4 * angle * angle) / (Pi<T> * Pi<T> + angle * angle); |
736 | | # else |
737 | | return __builtin_cos(angle); |
738 | | # endif |
739 | | #endif |
740 | 5.77k | } _ZN2AK12Trigonometry3cosITkNS_8Concepts13FloatingPointEfEET_S3_ Line | Count | Source | 710 | 5.77k | { | 711 | 5.77k | CONSTEXPR_STATE(cos, angle); | 712 | | | 713 | 5.77k | #if ARCH(X86_64) | 714 | 5.77k | T ret; | 715 | 5.77k | asm( | 716 | 5.77k | "fcos" | 717 | 5.77k | : "=t"(ret) | 718 | 5.77k | : "0"(angle)); | 719 | 5.77k | return ret; | 720 | | #else | 721 | | # if defined(AK_OS_SERENITY) | 722 | | if (angle < 0) | 723 | | return cos(-angle); | 724 | | | 725 | | angle = fmod(angle, 2 * Pi<T>); | 726 | | | 727 | | if (angle >= Pi<T>) | 728 | | return -cos(angle - Pi<T>); | 729 | | | 730 | | if (angle > Pi<T> / 2) | 731 | | return -cos(Pi<T> - angle); | 732 | | | 733 | | // https://en.wikipedia.org/wiki/Bh%C4%81skara_I%27s_sine_approximation_formula | 734 | | // FIXME: This is not a good formula! It requires divisions, so it's slow, and it's not very accurate either. | 735 | | return (Pi<T> * Pi<T> - 4 * angle * angle) / (Pi<T> * Pi<T> + angle * angle); | 736 | | # else | 737 | | return __builtin_cos(angle); | 738 | | # endif | 739 | | #endif | 740 | 5.77k | } |
Unexecuted instantiation: _ZN2AK12Trigonometry3cosITkNS_8Concepts13FloatingPointEdEET_S3_ |
741 | | |
742 | | template<FloatingPoint T> |
743 | | constexpr void sincos(T angle, T& sin_val, T& cos_val) |
744 | 40.8M | { |
745 | 40.8M | if (is_constant_evaluated()) { |
746 | 0 | sin_val = sin(angle); |
747 | 0 | cos_val = cos(angle); |
748 | 0 | return; |
749 | 0 | } |
750 | 40.8M | #if ARCH(X86_64) |
751 | 40.8M | asm( |
752 | 40.8M | "fsincos" |
753 | 40.8M | : "=t"(cos_val), "=u"(sin_val) |
754 | 40.8M | : "0"(angle)); |
755 | | #else |
756 | | sin_val = sin(angle); |
757 | | cos_val = cos(angle); |
758 | | #endif |
759 | 40.8M | } _ZN2AK12Trigonometry6sincosITkNS_8Concepts13FloatingPointEfEEvT_RS3_S4_ Line | Count | Source | 744 | 38.6M | { | 745 | 38.6M | if (is_constant_evaluated()) { | 746 | 0 | sin_val = sin(angle); | 747 | 0 | cos_val = cos(angle); | 748 | 0 | return; | 749 | 0 | } | 750 | 38.6M | #if ARCH(X86_64) | 751 | 38.6M | asm( | 752 | 38.6M | "fsincos" | 753 | 38.6M | : "=t"(cos_val), "=u"(sin_val) | 754 | 38.6M | : "0"(angle)); | 755 | | #else | 756 | | sin_val = sin(angle); | 757 | | cos_val = cos(angle); | 758 | | #endif | 759 | 38.6M | } |
_ZN2AK12Trigonometry6sincosITkNS_8Concepts13FloatingPointEdEEvT_RS3_S4_ Line | Count | Source | 744 | 2.23M | { | 745 | 2.23M | if (is_constant_evaluated()) { | 746 | 0 | sin_val = sin(angle); | 747 | 0 | cos_val = cos(angle); | 748 | 0 | return; | 749 | 0 | } | 750 | 2.23M | #if ARCH(X86_64) | 751 | 2.23M | asm( | 752 | 2.23M | "fsincos" | 753 | 2.23M | : "=t"(cos_val), "=u"(sin_val) | 754 | 2.23M | : "0"(angle)); | 755 | | #else | 756 | | sin_val = sin(angle); | 757 | | cos_val = cos(angle); | 758 | | #endif | 759 | 2.23M | } |
|
760 | | |
761 | | template<FloatingPoint T> |
762 | | constexpr T tan(T angle) |
763 | 17.9M | { |
764 | 17.9M | CONSTEXPR_STATE(tan, angle); |
765 | | |
766 | 17.9M | #if ARCH(X86_64) |
767 | 17.9M | T ret, one; |
768 | 17.9M | asm( |
769 | 17.9M | "fptan" |
770 | 17.9M | : "=t"(one), "=u"(ret) |
771 | 17.9M | : "0"(angle)); |
772 | | |
773 | 17.9M | return ret; |
774 | | #else |
775 | | # if defined(AK_OS_SERENITY) |
776 | | return sin(angle) / cos(angle); |
777 | | # else |
778 | | return __builtin_tan(angle); |
779 | | # endif |
780 | | #endif |
781 | 17.9M | } |
782 | | |
783 | | template<FloatingPoint T> |
784 | | constexpr T asin(T x) |
785 | | { |
786 | | CONSTEXPR_STATE(asin, x); |
787 | | |
788 | | if (x < 0) |
789 | | return -asin(-x); |
790 | | |
791 | | if (x > 1) |
792 | | return NaN<T>; |
793 | | |
794 | | if (x > (T)0.5) { |
795 | | // asin(x) = pi/2 - 2 * asin(sqrt((1 - x) / 2)) |
796 | | // If 0.5 < x <= 1, then sqrt((1 - x) / 2 ) < 0.5, |
797 | | // and the recursion will go down the `<= 0.5` branch. |
798 | | return Pi<T> / 2 - 2 * asin(sqrt((1 - x) / 2)); |
799 | | } |
800 | | |
801 | | T squared = x * x; |
802 | | T value = x; |
803 | | T i = x * squared; |
804 | | value += i * Details::product_odd<1>() / Details::product_even<2>() / 3; |
805 | | i *= squared; |
806 | | value += i * Details::product_odd<3>() / Details::product_even<4>() / 5; |
807 | | i *= squared; |
808 | | value += i * Details::product_odd<5>() / Details::product_even<6>() / 7; |
809 | | i *= squared; |
810 | | value += i * Details::product_odd<7>() / Details::product_even<8>() / 9; |
811 | | i *= squared; |
812 | | value += i * Details::product_odd<9>() / Details::product_even<10>() / 11; |
813 | | i *= squared; |
814 | | value += i * Details::product_odd<11>() / Details::product_even<12>() / 13; |
815 | | i *= squared; |
816 | | value += i * Details::product_odd<13>() / Details::product_even<14>() / 15; |
817 | | i *= squared; |
818 | | value += i * Details::product_odd<15>() / Details::product_even<16>() / 17; |
819 | | return value; |
820 | | } |
821 | | |
822 | | template<FloatingPoint T> |
823 | | constexpr T acos(T value) |
824 | | { |
825 | | CONSTEXPR_STATE(acos, value); |
826 | | |
827 | | // FIXME: I am naive |
828 | | return static_cast<T>(0.5) * Pi<T> - asin<T>(value); |
829 | | } |
830 | | |
831 | | template<FloatingPoint T> |
832 | | constexpr T atan(T value) |
833 | | { |
834 | | CONSTEXPR_STATE(atan, value); |
835 | | |
836 | | #if ARCH(X86_64) |
837 | | T ret; |
838 | | asm( |
839 | | "fld1\n" |
840 | | "fpatan\n" |
841 | | : "=t"(ret) |
842 | | : "0"(value)); |
843 | | return ret; |
844 | | #else |
845 | | # if defined(AK_OS_SERENITY) |
846 | | return asin(value / sqrt(1 + value * value)); |
847 | | # endif |
848 | | return __builtin_atan(value); |
849 | | #endif |
850 | | } |
851 | | |
852 | | template<FloatingPoint T> |
853 | | constexpr T atan2(T y, T x) |
854 | 4.46M | { |
855 | 4.46M | CONSTEXPR_STATE(atan2, y, x); |
856 | | |
857 | 4.46M | #if ARCH(X86_64) |
858 | 4.46M | T ret; |
859 | 4.46M | asm("fpatan" |
860 | 4.46M | : "=t"(ret) |
861 | 4.46M | : "0"(x), "u"(y) |
862 | 4.46M | : "st(1)"); |
863 | 4.46M | return ret; |
864 | | #else |
865 | | # if defined(AK_OS_SERENITY) |
866 | | if (__builtin_isnan(y)) |
867 | | return y; |
868 | | if (__builtin_isnan(x)) |
869 | | return x; |
870 | | |
871 | | // SPECIAL VALUES |
872 | | // atan2(±0, -0) returns ±pi. |
873 | | if (y == 0 && x == 0 && signbit(x)) |
874 | | return copysign(Pi<T>, y); |
875 | | |
876 | | // atan2(±0, +0) returns ±0. |
877 | | if (y == 0 && x == 0 && !signbit(x)) |
878 | | return y; |
879 | | |
880 | | // atan2(±0, x) returns ±pi for x < 0. |
881 | | if (y == 0 && x < 0) |
882 | | return copysign(Pi<T>, y); |
883 | | |
884 | | // atan2(±0, x) returns ±0 for x > 0. |
885 | | if (y == 0 && x > 0) |
886 | | return y; |
887 | | |
888 | | // atan2(y, ±0) returns +pi/2 for y > 0. |
889 | | if (y > 0 && x == 0) |
890 | | return Pi<T> / 2; |
891 | | |
892 | | // atan2(y, ±0) returns -pi/2 for y < 0. |
893 | | if (y < 0 && x == 0) |
894 | | return -Pi<T> / 2; |
895 | | |
896 | | // atan2(±y, -infinity) returns ±pi for finite y > 0. |
897 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && signbit(x)) |
898 | | return copysign(Pi<T>, y); |
899 | | |
900 | | // atan2(±y, +infinity) returns ±0 for finite y > 0. |
901 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && !signbit(x)) |
902 | | return copysign(static_cast<T>(0), y); |
903 | | |
904 | | // atan2(±infinity, x) returns ±pi/2 for finite x. |
905 | | if (__builtin_isinf(y) && !__builtin_isinf(x)) |
906 | | return copysign(Pi<T> / 2, y); |
907 | | |
908 | | // atan2(±infinity, -infinity) returns ±3*pi/4. |
909 | | if (__builtin_isinf(y) && __builtin_isinf(x) && signbit(x)) |
910 | | return copysign(3 * Pi<T> / 4, y); |
911 | | |
912 | | // atan2(±infinity, +infinity) returns ±pi/4. |
913 | | if (__builtin_isinf(y) && __builtin_isinf(x) && !signbit(x)) |
914 | | return copysign(Pi<T> / 4, y); |
915 | | |
916 | | // Check quadrant, going counterclockwise. |
917 | | if (y > 0 && x > 0) |
918 | | return atan(y / x); |
919 | | if (y > 0 && x < 0) |
920 | | return atan(y / x) + Pi<T>; |
921 | | if (y < 0 && x < 0) |
922 | | return atan(y / x) - Pi<T>; |
923 | | // y < 0 && x > 0 |
924 | | return atan(y / x); |
925 | | # else |
926 | | return __builtin_atan2(y, x); |
927 | | # endif |
928 | | #endif |
929 | 4.46M | } _ZN2AK12Trigonometry5atan2ITkNS_8Concepts13FloatingPointEfEET_S3_S3_ Line | Count | Source | 854 | 5.89k | { | 855 | 5.89k | CONSTEXPR_STATE(atan2, y, x); | 856 | | | 857 | 5.89k | #if ARCH(X86_64) | 858 | 5.89k | T ret; | 859 | 5.89k | asm("fpatan" | 860 | 5.89k | : "=t"(ret) | 861 | 5.89k | : "0"(x), "u"(y) | 862 | 5.89k | : "st(1)"); | 863 | 5.89k | return ret; | 864 | | #else | 865 | | # if defined(AK_OS_SERENITY) | 866 | | if (__builtin_isnan(y)) | 867 | | return y; | 868 | | if (__builtin_isnan(x)) | 869 | | return x; | 870 | | | 871 | | // SPECIAL VALUES | 872 | | // atan2(±0, -0) returns ±pi. | 873 | | if (y == 0 && x == 0 && signbit(x)) | 874 | | return copysign(Pi<T>, y); | 875 | | | 876 | | // atan2(±0, +0) returns ±0. | 877 | | if (y == 0 && x == 0 && !signbit(x)) | 878 | | return y; | 879 | | | 880 | | // atan2(±0, x) returns ±pi for x < 0. | 881 | | if (y == 0 && x < 0) | 882 | | return copysign(Pi<T>, y); | 883 | | | 884 | | // atan2(±0, x) returns ±0 for x > 0. | 885 | | if (y == 0 && x > 0) | 886 | | return y; | 887 | | | 888 | | // atan2(y, ±0) returns +pi/2 for y > 0. | 889 | | if (y > 0 && x == 0) | 890 | | return Pi<T> / 2; | 891 | | | 892 | | // atan2(y, ±0) returns -pi/2 for y < 0. | 893 | | if (y < 0 && x == 0) | 894 | | return -Pi<T> / 2; | 895 | | | 896 | | // atan2(±y, -infinity) returns ±pi for finite y > 0. | 897 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && signbit(x)) | 898 | | return copysign(Pi<T>, y); | 899 | | | 900 | | // atan2(±y, +infinity) returns ±0 for finite y > 0. | 901 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && !signbit(x)) | 902 | | return copysign(static_cast<T>(0), y); | 903 | | | 904 | | // atan2(±infinity, x) returns ±pi/2 for finite x. | 905 | | if (__builtin_isinf(y) && !__builtin_isinf(x)) | 906 | | return copysign(Pi<T> / 2, y); | 907 | | | 908 | | // atan2(±infinity, -infinity) returns ±3*pi/4. | 909 | | if (__builtin_isinf(y) && __builtin_isinf(x) && signbit(x)) | 910 | | return copysign(3 * Pi<T> / 4, y); | 911 | | | 912 | | // atan2(±infinity, +infinity) returns ±pi/4. | 913 | | if (__builtin_isinf(y) && __builtin_isinf(x) && !signbit(x)) | 914 | | return copysign(Pi<T> / 4, y); | 915 | | | 916 | | // Check quadrant, going counterclockwise. | 917 | | if (y > 0 && x > 0) | 918 | | return atan(y / x); | 919 | | if (y > 0 && x < 0) | 920 | | return atan(y / x) + Pi<T>; | 921 | | if (y < 0 && x < 0) | 922 | | return atan(y / x) - Pi<T>; | 923 | | // y < 0 && x > 0 | 924 | | return atan(y / x); | 925 | | # else | 926 | | return __builtin_atan2(y, x); | 927 | | # endif | 928 | | #endif | 929 | 5.89k | } |
_ZN2AK12Trigonometry5atan2ITkNS_8Concepts13FloatingPointEdEET_S3_S3_ Line | Count | Source | 854 | 4.46M | { | 855 | 4.46M | CONSTEXPR_STATE(atan2, y, x); | 856 | | | 857 | 4.46M | #if ARCH(X86_64) | 858 | 4.46M | T ret; | 859 | 4.46M | asm("fpatan" | 860 | 4.46M | : "=t"(ret) | 861 | 4.46M | : "0"(x), "u"(y) | 862 | 4.46M | : "st(1)"); | 863 | 4.46M | return ret; | 864 | | #else | 865 | | # if defined(AK_OS_SERENITY) | 866 | | if (__builtin_isnan(y)) | 867 | | return y; | 868 | | if (__builtin_isnan(x)) | 869 | | return x; | 870 | | | 871 | | // SPECIAL VALUES | 872 | | // atan2(±0, -0) returns ±pi. | 873 | | if (y == 0 && x == 0 && signbit(x)) | 874 | | return copysign(Pi<T>, y); | 875 | | | 876 | | // atan2(±0, +0) returns ±0. | 877 | | if (y == 0 && x == 0 && !signbit(x)) | 878 | | return y; | 879 | | | 880 | | // atan2(±0, x) returns ±pi for x < 0. | 881 | | if (y == 0 && x < 0) | 882 | | return copysign(Pi<T>, y); | 883 | | | 884 | | // atan2(±0, x) returns ±0 for x > 0. | 885 | | if (y == 0 && x > 0) | 886 | | return y; | 887 | | | 888 | | // atan2(y, ±0) returns +pi/2 for y > 0. | 889 | | if (y > 0 && x == 0) | 890 | | return Pi<T> / 2; | 891 | | | 892 | | // atan2(y, ±0) returns -pi/2 for y < 0. | 893 | | if (y < 0 && x == 0) | 894 | | return -Pi<T> / 2; | 895 | | | 896 | | // atan2(±y, -infinity) returns ±pi for finite y > 0. | 897 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && signbit(x)) | 898 | | return copysign(Pi<T>, y); | 899 | | | 900 | | // atan2(±y, +infinity) returns ±0 for finite y > 0. | 901 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && !signbit(x)) | 902 | | return copysign(static_cast<T>(0), y); | 903 | | | 904 | | // atan2(±infinity, x) returns ±pi/2 for finite x. | 905 | | if (__builtin_isinf(y) && !__builtin_isinf(x)) | 906 | | return copysign(Pi<T> / 2, y); | 907 | | | 908 | | // atan2(±infinity, -infinity) returns ±3*pi/4. | 909 | | if (__builtin_isinf(y) && __builtin_isinf(x) && signbit(x)) | 910 | | return copysign(3 * Pi<T> / 4, y); | 911 | | | 912 | | // atan2(±infinity, +infinity) returns ±pi/4. | 913 | | if (__builtin_isinf(y) && __builtin_isinf(x) && !signbit(x)) | 914 | | return copysign(Pi<T> / 4, y); | 915 | | | 916 | | // Check quadrant, going counterclockwise. | 917 | | if (y > 0 && x > 0) | 918 | | return atan(y / x); | 919 | | if (y > 0 && x < 0) | 920 | | return atan(y / x) + Pi<T>; | 921 | | if (y < 0 && x < 0) | 922 | | return atan(y / x) - Pi<T>; | 923 | | // y < 0 && x > 0 | 924 | | return atan(y / x); | 925 | | # else | 926 | | return __builtin_atan2(y, x); | 927 | | # endif | 928 | | #endif | 929 | 4.46M | } |
|
930 | | |
931 | | } |
932 | | |
933 | | using Trigonometry::acos; |
934 | | using Trigonometry::asin; |
935 | | using Trigonometry::atan; |
936 | | using Trigonometry::atan2; |
937 | | using Trigonometry::cos; |
938 | | using Trigonometry::hypot; |
939 | | using Trigonometry::sin; |
940 | | using Trigonometry::sincos; |
941 | | using Trigonometry::tan; |
942 | | |
943 | | namespace Exponentials { |
944 | | |
945 | | template<FloatingPoint T> |
946 | | constexpr T log2(T x) |
947 | 4.22M | { |
948 | 4.22M | CONSTEXPR_STATE(log2, x); |
949 | | |
950 | 4.22M | #if ARCH(X86_64) |
951 | | if constexpr (IsSame<T, long double>) { |
952 | | T ret; |
953 | | asm( |
954 | | "fld1\n" |
955 | | "fxch %%st(1)\n" |
956 | | "fyl2x\n" |
957 | | : "=t"(ret) |
958 | | : "0"(x)); |
959 | | return ret; |
960 | | } |
961 | 4.22M | #endif |
962 | | // References: |
963 | | // Gist comparing some implementations |
964 | | // * https://gist.github.com/Hendiadyoin1/f58346d66637deb9156ef360aa158bf9 |
965 | | |
966 | 4.22M | if (x == 0) |
967 | 0 | return -Infinity<T>; |
968 | 4.22M | if (x <= 0 || __builtin_isnan(x)) |
969 | 0 | return NaN<T>; |
970 | | |
971 | 4.22M | auto ext = FloatExtractor<T>::from_float(x); |
972 | 4.22M | T exponent = ext.exponent - FloatExtractor<T>::exponent_bias; |
973 | | |
974 | | // When the mantissa shows 0b00 (implicitly 1.0) we are on a power of 2 |
975 | 4.22M | if (ext.mantissa == 0) |
976 | 3.67M | return exponent; |
977 | | |
978 | | // FIXME: Handle denormalized numbers separately |
979 | | |
980 | 551k | FloatExtractor<T> mantissa_ext { |
981 | 551k | .mantissa = ext.mantissa, |
982 | 551k | .exponent = FloatExtractor<T>::exponent_bias, |
983 | 551k | .sign = ext.sign |
984 | 551k | }; |
985 | | |
986 | | // (1 <= mantissa < 2) |
987 | 551k | T m = mantissa_ext.to_float(); |
988 | | |
989 | | // This is a reconstruction of one of Sun's algorithms |
990 | | // They use a transformation to lower the problem space, |
991 | | // while keeping the precision, and a 14th degree polynomial, |
992 | | // which is mirrored at sqrt(2) |
993 | | // TODO: Sun has some more algorithms for this and other math functions, |
994 | | // leveraging LUTs, investigate those, if they are better in performance and/or precision. |
995 | | // These seem to be related to crLibM's implementations, for which papers and references |
996 | | // are available. |
997 | | // FIXME: Dynamically adjust the amount of precision between floats and doubles |
998 | | // AKA floats might need less accuracy here, in comparison to doubles |
999 | | |
1000 | 551k | bool inverted = false; |
1001 | | // m > sqrt(2) |
1002 | 551k | if (m > Sqrt2<T>) { |
1003 | 185k | inverted = true; |
1004 | 185k | m = 2 / m; |
1005 | 185k | } |
1006 | 551k | T s = (m - 1) / (m + 1); |
1007 | | // ln((1 + s) / (1 - s)) == ln(m) |
1008 | 551k | T s2 = s * s; |
1009 | | // clang-format off |
1010 | 551k | T high_approx = s2 * (static_cast<T>(0.6666666666666735130) |
1011 | 551k | + s2 * (static_cast<T>(0.3999999999940941908) |
1012 | 551k | + s2 * (static_cast<T>(0.2857142874366239149) |
1013 | 551k | + s2 * (static_cast<T>(0.2222219843214978396) |
1014 | 551k | + s2 * (static_cast<T>(0.1818357216161805012) |
1015 | 551k | + s2 * (static_cast<T>(0.1531383769920937332) |
1016 | 551k | + s2 * static_cast<T>(0.1479819860511658591))))))); |
1017 | | // clang-format on |
1018 | | |
1019 | | // ln(m) == 2 * s + s * high_approx |
1020 | | // log2(m) == log2(e) * ln(m) |
1021 | 551k | T log2_mantissa = L2_E<T> * (2 * s + s * high_approx); |
1022 | 551k | if (inverted) |
1023 | 185k | log2_mantissa = 1 - log2_mantissa; |
1024 | 551k | return exponent + log2_mantissa; |
1025 | 4.22M | } _ZN2AK12Exponentials4log2ITkNS_8Concepts13FloatingPointEfEET_S3_ Line | Count | Source | 947 | 4.22M | { | 948 | 4.22M | CONSTEXPR_STATE(log2, x); | 949 | | | 950 | 4.22M | #if ARCH(X86_64) | 951 | | if constexpr (IsSame<T, long double>) { | 952 | | T ret; | 953 | | asm( | 954 | | "fld1\n" | 955 | | "fxch %%st(1)\n" | 956 | | "fyl2x\n" | 957 | | : "=t"(ret) | 958 | | : "0"(x)); | 959 | | return ret; | 960 | | } | 961 | 4.22M | #endif | 962 | | // References: | 963 | | // Gist comparing some implementations | 964 | | // * https://gist.github.com/Hendiadyoin1/f58346d66637deb9156ef360aa158bf9 | 965 | | | 966 | 4.22M | if (x == 0) | 967 | 0 | return -Infinity<T>; | 968 | 4.22M | if (x <= 0 || __builtin_isnan(x)) | 969 | 0 | return NaN<T>; | 970 | | | 971 | 4.22M | auto ext = FloatExtractor<T>::from_float(x); | 972 | 4.22M | T exponent = ext.exponent - FloatExtractor<T>::exponent_bias; | 973 | | | 974 | | // When the mantissa shows 0b00 (implicitly 1.0) we are on a power of 2 | 975 | 4.22M | if (ext.mantissa == 0) | 976 | 3.67M | return exponent; | 977 | | | 978 | | // FIXME: Handle denormalized numbers separately | 979 | | | 980 | 551k | FloatExtractor<T> mantissa_ext { | 981 | 551k | .mantissa = ext.mantissa, | 982 | 551k | .exponent = FloatExtractor<T>::exponent_bias, | 983 | 551k | .sign = ext.sign | 984 | 551k | }; | 985 | | | 986 | | // (1 <= mantissa < 2) | 987 | 551k | T m = mantissa_ext.to_float(); | 988 | | | 989 | | // This is a reconstruction of one of Sun's algorithms | 990 | | // They use a transformation to lower the problem space, | 991 | | // while keeping the precision, and a 14th degree polynomial, | 992 | | // which is mirrored at sqrt(2) | 993 | | // TODO: Sun has some more algorithms for this and other math functions, | 994 | | // leveraging LUTs, investigate those, if they are better in performance and/or precision. | 995 | | // These seem to be related to crLibM's implementations, for which papers and references | 996 | | // are available. | 997 | | // FIXME: Dynamically adjust the amount of precision between floats and doubles | 998 | | // AKA floats might need less accuracy here, in comparison to doubles | 999 | | | 1000 | 551k | bool inverted = false; | 1001 | | // m > sqrt(2) | 1002 | 551k | if (m > Sqrt2<T>) { | 1003 | 185k | inverted = true; | 1004 | 185k | m = 2 / m; | 1005 | 185k | } | 1006 | 551k | T s = (m - 1) / (m + 1); | 1007 | | // ln((1 + s) / (1 - s)) == ln(m) | 1008 | 551k | T s2 = s * s; | 1009 | | // clang-format off | 1010 | 551k | T high_approx = s2 * (static_cast<T>(0.6666666666666735130) | 1011 | 551k | + s2 * (static_cast<T>(0.3999999999940941908) | 1012 | 551k | + s2 * (static_cast<T>(0.2857142874366239149) | 1013 | 551k | + s2 * (static_cast<T>(0.2222219843214978396) | 1014 | 551k | + s2 * (static_cast<T>(0.1818357216161805012) | 1015 | 551k | + s2 * (static_cast<T>(0.1531383769920937332) | 1016 | 551k | + s2 * static_cast<T>(0.1479819860511658591))))))); | 1017 | | // clang-format on | 1018 | | | 1019 | | // ln(m) == 2 * s + s * high_approx | 1020 | | // log2(m) == log2(e) * ln(m) | 1021 | 551k | T log2_mantissa = L2_E<T> * (2 * s + s * high_approx); | 1022 | 551k | if (inverted) | 1023 | 185k | log2_mantissa = 1 - log2_mantissa; | 1024 | 551k | return exponent + log2_mantissa; | 1025 | 4.22M | } |
Unexecuted instantiation: _ZN2AK12Exponentials4log2ITkNS_8Concepts13FloatingPointEdEET_S3_ Unexecuted instantiation: _ZN2AK12Exponentials4log2ITkNS_8Concepts13FloatingPointEdEET_S3_ |
1026 | | |
1027 | | template<FloatingPoint T> |
1028 | | constexpr T log(T x) |
1029 | 48 | { |
1030 | 48 | CONSTEXPR_STATE(log, x); |
1031 | | |
1032 | 48 | #if ARCH(X86_64) |
1033 | 48 | T ret; |
1034 | 48 | asm( |
1035 | 48 | "fldln2\n" |
1036 | 48 | "fxch %%st(1)\n" |
1037 | 48 | "fyl2x\n" |
1038 | 48 | : "=t"(ret) |
1039 | 48 | : "0"(x)); |
1040 | 48 | return ret; |
1041 | | #elif defined(AK_OS_SERENITY) |
1042 | | // FIXME: Adjust the polynomial and formula in log2 to fit this |
1043 | | return log2<T>(x) / L2_E<T>; |
1044 | | #else |
1045 | | return __builtin_log(x); |
1046 | | #endif |
1047 | 48 | } |
1048 | | |
1049 | | template<FloatingPoint T> |
1050 | | constexpr T log10(T x) |
1051 | | { |
1052 | | CONSTEXPR_STATE(log10, x); |
1053 | | |
1054 | | #if ARCH(X86_64) |
1055 | | T ret; |
1056 | | asm( |
1057 | | "fldlg2\n" |
1058 | | "fxch %%st(1)\n" |
1059 | | "fyl2x\n" |
1060 | | : "=t"(ret) |
1061 | | : "0"(x)); |
1062 | | return ret; |
1063 | | #elif defined(AK_OS_SERENITY) |
1064 | | // FIXME: Adjust the polynomial and formula in log2 to fit this |
1065 | | return log2<T>(x) / L2_10<T>; |
1066 | | #else |
1067 | | return __builtin_log10(x); |
1068 | | #endif |
1069 | | } |
1070 | | |
1071 | | template<FloatingPoint T> |
1072 | | constexpr T exp2(T exponent) |
1073 | 4.22M | { |
1074 | 4.22M | CONSTEXPR_STATE(exp2, exponent); |
1075 | | |
1076 | 4.22M | #if ARCH(X86_64) |
1077 | 4.22M | T res; |
1078 | 4.22M | asm("fld1\n" |
1079 | 4.22M | "fld %%st(1)\n" |
1080 | 4.22M | "fprem\n" |
1081 | 4.22M | "f2xm1\n" |
1082 | 4.22M | "faddp\n" |
1083 | 4.22M | "fscale\n" |
1084 | 4.22M | "fstp %%st(1)" |
1085 | 4.22M | : "=t"(res) |
1086 | 4.22M | : "0"(exponent)); |
1087 | 4.22M | return res; |
1088 | | #else |
1089 | | // TODO: Add better implementation of this function. |
1090 | | // This is just fast exponentiation for the integer part and |
1091 | | // the first couple terms of the taylor series for the fractional part. |
1092 | | |
1093 | | if (exponent < 0) |
1094 | | return 1 / exp2(-exponent); |
1095 | | |
1096 | | if (exponent >= log2(NumericLimits<T>::max())) |
1097 | | return Infinity<T>; |
1098 | | |
1099 | | // Integer exponentiation part. |
1100 | | int int_exponent = static_cast<int>(exponent); |
1101 | | T exponent_fraction = exponent - int_exponent; |
1102 | | |
1103 | | T int_result = 1; |
1104 | | T base = 2; |
1105 | | for (;;) { |
1106 | | if (int_exponent & 1) |
1107 | | int_result *= base; |
1108 | | int_exponent >>= 1; |
1109 | | if (!int_exponent) |
1110 | | break; |
1111 | | base *= base; |
1112 | | } |
1113 | | |
1114 | | // Fractional part. |
1115 | | // Uses: |
1116 | | // exp(x) = sum(n, 0, \infty, x ** n / n!) |
1117 | | // 2**x = exp(log2(e) * x) |
1118 | | // FIXME: Pick better step size (and make it dependent on T). |
1119 | | T result = 0; |
1120 | | T power = 1; |
1121 | | T factorial = 1; |
1122 | | for (int i = 1; i < 16; ++i) { |
1123 | | result += power / factorial; |
1124 | | power *= exponent_fraction / L2_E<T>; |
1125 | | factorial *= i; |
1126 | | } |
1127 | | |
1128 | | return int_result * result; |
1129 | | #endif |
1130 | 4.22M | } _ZN2AK12Exponentials4exp2ITkNS_8Concepts13FloatingPointEfEET_S3_ Line | Count | Source | 1073 | 4.22M | { | 1074 | 4.22M | CONSTEXPR_STATE(exp2, exponent); | 1075 | | | 1076 | 4.22M | #if ARCH(X86_64) | 1077 | 4.22M | T res; | 1078 | 4.22M | asm("fld1\n" | 1079 | 4.22M | "fld %%st(1)\n" | 1080 | 4.22M | "fprem\n" | 1081 | 4.22M | "f2xm1\n" | 1082 | 4.22M | "faddp\n" | 1083 | 4.22M | "fscale\n" | 1084 | 4.22M | "fstp %%st(1)" | 1085 | 4.22M | : "=t"(res) | 1086 | 4.22M | : "0"(exponent)); | 1087 | 4.22M | return res; | 1088 | | #else | 1089 | | // TODO: Add better implementation of this function. | 1090 | | // This is just fast exponentiation for the integer part and | 1091 | | // the first couple terms of the taylor series for the fractional part. | 1092 | | | 1093 | | if (exponent < 0) | 1094 | | return 1 / exp2(-exponent); | 1095 | | | 1096 | | if (exponent >= log2(NumericLimits<T>::max())) | 1097 | | return Infinity<T>; | 1098 | | | 1099 | | // Integer exponentiation part. | 1100 | | int int_exponent = static_cast<int>(exponent); | 1101 | | T exponent_fraction = exponent - int_exponent; | 1102 | | | 1103 | | T int_result = 1; | 1104 | | T base = 2; | 1105 | | for (;;) { | 1106 | | if (int_exponent & 1) | 1107 | | int_result *= base; | 1108 | | int_exponent >>= 1; | 1109 | | if (!int_exponent) | 1110 | | break; | 1111 | | base *= base; | 1112 | | } | 1113 | | | 1114 | | // Fractional part. | 1115 | | // Uses: | 1116 | | // exp(x) = sum(n, 0, \infty, x ** n / n!) | 1117 | | // 2**x = exp(log2(e) * x) | 1118 | | // FIXME: Pick better step size (and make it dependent on T). | 1119 | | T result = 0; | 1120 | | T power = 1; | 1121 | | T factorial = 1; | 1122 | | for (int i = 1; i < 16; ++i) { | 1123 | | result += power / factorial; | 1124 | | power *= exponent_fraction / L2_E<T>; | 1125 | | factorial *= i; | 1126 | | } | 1127 | | | 1128 | | return int_result * result; | 1129 | | #endif | 1130 | 4.22M | } |
Unexecuted instantiation: _ZN2AK12Exponentials4exp2ITkNS_8Concepts13FloatingPointEdEET_S3_ Unexecuted instantiation: _ZN2AK12Exponentials4exp2ITkNS_8Concepts13FloatingPointEdEET_S3_ |
1131 | | |
1132 | | template<FloatingPoint T> |
1133 | | constexpr T exp(T exponent) |
1134 | 0 | { |
1135 | 0 | CONSTEXPR_STATE(exp, exponent); |
1136 | |
|
1137 | 0 | #if ARCH(X86_64) |
1138 | 0 | T res; |
1139 | 0 | asm("fldl2e\n" |
1140 | 0 | "fmulp\n" |
1141 | 0 | "fld1\n" |
1142 | 0 | "fld %%st(1)\n" |
1143 | 0 | "fprem\n" |
1144 | 0 | "f2xm1\n" |
1145 | 0 | "faddp\n" |
1146 | 0 | "fscale\n" |
1147 | 0 | "fstp %%st(1)" |
1148 | 0 | : "=t"(res) |
1149 | 0 | : "0"(exponent)); |
1150 | 0 | return res; |
1151 | | #else |
1152 | | // TODO: Add better implementation of this function. |
1153 | | return exp2(exponent * L2_E<T>); |
1154 | | #endif |
1155 | 0 | } Unexecuted instantiation: _ZN2AK12Exponentials3expITkNS_8Concepts13FloatingPointEfEET_S3_ Unexecuted instantiation: _ZN2AK12Exponentials3expITkNS_8Concepts13FloatingPointEfEET_S3_ |
1156 | | |
1157 | | } |
1158 | | |
1159 | | using Exponentials::exp; |
1160 | | using Exponentials::exp2; |
1161 | | using Exponentials::log; |
1162 | | using Exponentials::log10; |
1163 | | using Exponentials::log2; |
1164 | | |
1165 | | namespace Hyperbolic { |
1166 | | |
1167 | | template<FloatingPoint T> |
1168 | | constexpr T sinh(T x) |
1169 | | { |
1170 | | T exponentiated = exp<T>(x); |
1171 | | if (x > 0) |
1172 | | return (exponentiated * exponentiated - 1) / 2 / exponentiated; |
1173 | | return (exponentiated - 1 / exponentiated) / 2; |
1174 | | } |
1175 | | |
1176 | | template<FloatingPoint T> |
1177 | | constexpr T cosh(T x) |
1178 | | { |
1179 | | CONSTEXPR_STATE(cosh, x); |
1180 | | |
1181 | | T exponentiated = exp(-x); |
1182 | | if (x < 0) |
1183 | | return (1 + exponentiated * exponentiated) / 2 / exponentiated; |
1184 | | return (1 / exponentiated + exponentiated) / 2; |
1185 | | } |
1186 | | |
1187 | | template<FloatingPoint T> |
1188 | | constexpr T tanh(T x) |
1189 | | { |
1190 | | if (x > 0) { |
1191 | | T exponentiated = exp<T>(2 * x); |
1192 | | return (exponentiated - 1) / (exponentiated + 1); |
1193 | | } |
1194 | | T plusX = exp<T>(x); |
1195 | | T minusX = 1 / plusX; |
1196 | | return (plusX - minusX) / (plusX + minusX); |
1197 | | } |
1198 | | |
1199 | | template<FloatingPoint T> |
1200 | | constexpr T asinh(T x) |
1201 | | { |
1202 | | return log<T>(x + sqrt<T>(x * x + 1)); |
1203 | | } |
1204 | | |
1205 | | template<FloatingPoint T> |
1206 | | constexpr T acosh(T x) |
1207 | | { |
1208 | | return log<T>(x + sqrt<T>(x * x - 1)); |
1209 | | } |
1210 | | |
1211 | | template<FloatingPoint T> |
1212 | | constexpr T atanh(T x) |
1213 | | { |
1214 | | return log<T>((1 + x) / (1 - x)) / (T)2.0l; |
1215 | | } |
1216 | | |
1217 | | } |
1218 | | |
1219 | | using Hyperbolic::acosh; |
1220 | | using Hyperbolic::asinh; |
1221 | | using Hyperbolic::atanh; |
1222 | | using Hyperbolic::cosh; |
1223 | | using Hyperbolic::sinh; |
1224 | | using Hyperbolic::tanh; |
1225 | | |
1226 | | // Calculate x^y with fast exponentiation when the power is a natural number. |
1227 | | template<FloatingPoint F, UnsignedIntegral U> |
1228 | | constexpr F pow_int(F x, U y) |
1229 | 147k | { |
1230 | 147k | auto result = static_cast<F>(1); |
1231 | 862k | while (y > 0) { |
1232 | 714k | if (y % 2 == 1) { |
1233 | 454k | result *= x; |
1234 | 454k | } |
1235 | 714k | x = x * x; |
1236 | 714k | y >>= 1; |
1237 | 714k | } |
1238 | 147k | return result; |
1239 | 147k | } _ZN2AK7pow_intITkNS_8Concepts13FloatingPointEfTkNS1_16UnsignedIntegralEmEET_S2_T0_ Line | Count | Source | 1229 | 147k | { | 1230 | 147k | auto result = static_cast<F>(1); | 1231 | 862k | while (y > 0) { | 1232 | 714k | if (y % 2 == 1) { | 1233 | 454k | result *= x; | 1234 | 454k | } | 1235 | 714k | x = x * x; | 1236 | 714k | y >>= 1; | 1237 | 714k | } | 1238 | 147k | return result; | 1239 | 147k | } |
Unexecuted instantiation: _ZN2AK7pow_intITkNS_8Concepts13FloatingPointEdTkNS1_16UnsignedIntegralEmEET_S2_T0_ |
1240 | | |
1241 | | template<FloatingPoint T> |
1242 | | constexpr T pow(T x, T y) |
1243 | 17.0M | { |
1244 | 17.0M | CONSTEXPR_STATE(pow, x, y); |
1245 | 17.0M | if (__builtin_isnan(y)) |
1246 | 0 | return y; |
1247 | 17.0M | if (y == 0) |
1248 | 5.98k | return 1; |
1249 | 17.0M | if (x == 0) |
1250 | 12.6M | return 0; |
1251 | 4.37M | if (y == 1) |
1252 | 3.65k | return x; |
1253 | | |
1254 | | // Take an integer fast path as long as the value fits within a 64-bit integer. |
1255 | 4.37M | if (y >= static_cast<T>(NumericLimits<i64>::min()) && y < static_cast<T>(NumericLimits<i64>::max())) { |
1256 | 4.37M | i64 y_as_int = static_cast<i64>(y); |
1257 | 4.37M | if (y == static_cast<T>(y_as_int)) { |
1258 | 147k | T result = pow_int(x, static_cast<u64>(fabs<T>(y))); |
1259 | 147k | if (y < 0) |
1260 | 117k | result = static_cast<T>(1.0l) / result; |
1261 | 147k | return result; |
1262 | 147k | } |
1263 | 4.37M | } |
1264 | | |
1265 | | // FIXME: This formula suffers from error magnification. |
1266 | 4.22M | return exp2<T>(y * log2<T>(x)); |
1267 | 4.37M | } _ZN2AK3powITkNS_8Concepts13FloatingPointEfEET_S2_S2_ Line | Count | Source | 1243 | 17.0M | { | 1244 | 17.0M | CONSTEXPR_STATE(pow, x, y); | 1245 | 17.0M | if (__builtin_isnan(y)) | 1246 | 0 | return y; | 1247 | 17.0M | if (y == 0) | 1248 | 5.98k | return 1; | 1249 | 17.0M | if (x == 0) | 1250 | 12.6M | return 0; | 1251 | 4.37M | if (y == 1) | 1252 | 3.65k | return x; | 1253 | | | 1254 | | // Take an integer fast path as long as the value fits within a 64-bit integer. | 1255 | 4.37M | if (y >= static_cast<T>(NumericLimits<i64>::min()) && y < static_cast<T>(NumericLimits<i64>::max())) { | 1256 | 4.37M | i64 y_as_int = static_cast<i64>(y); | 1257 | 4.37M | if (y == static_cast<T>(y_as_int)) { | 1258 | 147k | T result = pow_int(x, static_cast<u64>(fabs<T>(y))); | 1259 | 147k | if (y < 0) | 1260 | 117k | result = static_cast<T>(1.0l) / result; | 1261 | 147k | return result; | 1262 | 147k | } | 1263 | 4.37M | } | 1264 | | | 1265 | | // FIXME: This formula suffers from error magnification. | 1266 | 4.22M | return exp2<T>(y * log2<T>(x)); | 1267 | 4.37M | } |
Unexecuted instantiation: _ZN2AK3powITkNS_8Concepts13FloatingPointEdEET_S2_S2_ Unexecuted instantiation: _ZN2AK3powITkNS_8Concepts13FloatingPointEdEET_S2_S2_ |
1268 | | |
1269 | | template<Integral I, typename T> |
1270 | | constexpr I clamp_to(T value) |
1271 | 0 | { |
1272 | 0 | constexpr auto max = static_cast<T>(NumericLimits<I>::max()); |
1273 | 0 | if constexpr (max > 0) { |
1274 | 0 | if (value >= static_cast<T>(NumericLimits<I>::max())) |
1275 | 0 | return NumericLimits<I>::max(); |
1276 | 0 | } |
1277 | | |
1278 | 0 | constexpr auto min = static_cast<T>(NumericLimits<I>::min()); |
1279 | 0 | if constexpr (min <= 0) { |
1280 | 0 | if (value <= static_cast<T>(NumericLimits<I>::min())) |
1281 | 0 | return NumericLimits<I>::min(); |
1282 | 0 | } |
1283 | | |
1284 | | if constexpr (IsFloatingPoint<T>) |
1285 | 0 | return round_to<I>(value); |
1286 | | |
1287 | 0 | return value; |
1288 | 0 | } Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEldEET_T0_ Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralElmEET_T0_ Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEhdEET_T0_ Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEilEET_T0_ Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEifEET_T0_ Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEidEET_T0_ |
1289 | | |
1290 | | // Wrap a to keep it in the range [-b, b]. |
1291 | | template<typename T> |
1292 | | constexpr T wrap_to_range(T a, IdentityType<T> b) |
1293 | | { |
1294 | | return fmod(fmod(a + b, 2 * b) + 2 * b, 2 * b) - b; |
1295 | | } |
1296 | | |
1297 | | #undef CONSTEXPR_STATE |
1298 | | #undef AARCH64_INSTRUCTION |
1299 | | } |
1300 | | |
1301 | | #if USING_AK_GLOBALLY |
1302 | | using AK::round_to; |
1303 | | #endif |