Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2021, Leon Albrecht <leon2002.la@gmail.com>. |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #pragma once |
8 | | |
9 | | #include <AK/BuiltinWrappers.h> |
10 | | #include <AK/Concepts.h> |
11 | | #include <AK/FloatingPoint.h> |
12 | | #include <AK/NumericLimits.h> |
13 | | #include <AK/StdLibExtraDetails.h> |
14 | | #include <AK/Types.h> |
15 | | #include <math.h> |
16 | | |
17 | | #ifdef KERNEL |
18 | | # error "Including AK/Math.h from the Kernel is never correct! Floating point is disabled." |
19 | | #endif |
20 | | |
21 | | namespace AK { |
22 | | |
23 | | template<FloatingPoint T> |
24 | | constexpr T NaN = __builtin_nan(""); |
25 | | template<FloatingPoint T> |
26 | | constexpr T Infinity = __builtin_huge_vall(); |
27 | | template<FloatingPoint T> |
28 | | constexpr T Pi = 3.141592653589793238462643383279502884L; |
29 | | template<FloatingPoint T> |
30 | | constexpr T E = 2.718281828459045235360287471352662498L; |
31 | | template<FloatingPoint T> |
32 | | constexpr T Sqrt2 = 1.414213562373095048801688724209698079L; |
33 | | template<FloatingPoint T> |
34 | | constexpr T Sqrt1_2 = 0.707106781186547524400844362104849039L; |
35 | | |
36 | | template<FloatingPoint T> |
37 | | constexpr T L2_10 = 3.321928094887362347870319429489390175864L; |
38 | | template<FloatingPoint T> |
39 | | constexpr T L2_E = 1.442695040888963407359924681001892137L; |
40 | | |
41 | | namespace Details { |
42 | | template<size_t> |
43 | | constexpr size_t product_even(); |
44 | | template<> |
45 | 0 | constexpr size_t product_even<2>() { return 2; } |
46 | | template<size_t value> |
47 | 0 | constexpr size_t product_even() { return value * product_even<value - 2>(); }Unexecuted instantiation: unsigned long AK::Details::product_even<4ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<6ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<8ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<10ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<12ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<14ul>() Unexecuted instantiation: unsigned long AK::Details::product_even<16ul>() |
48 | | |
49 | | template<size_t> |
50 | | constexpr size_t product_odd(); |
51 | | template<> |
52 | 0 | constexpr size_t product_odd<1>() { return 1; } |
53 | | template<size_t value> |
54 | 0 | constexpr size_t product_odd() { return value * product_odd<value - 2>(); }Unexecuted instantiation: unsigned long AK::Details::product_odd<3ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<5ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<7ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<9ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<11ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<13ul>() Unexecuted instantiation: unsigned long AK::Details::product_odd<15ul>() |
55 | | } |
56 | | |
57 | | template<FloatingPoint T> |
58 | | constexpr T to_radians(T degrees) |
59 | 0 | { |
60 | 0 | return degrees * AK::Pi<T> / 180; |
61 | 0 | } Unexecuted instantiation: _ZN2AK10to_radiansITkNS_8Concepts13FloatingPointEfEET_S2_ Unexecuted instantiation: _ZN2AK10to_radiansITkNS_8Concepts13FloatingPointEdEET_S2_ |
62 | | |
63 | | template<FloatingPoint T> |
64 | | constexpr T to_degrees(T radians) |
65 | 0 | { |
66 | 0 | return radians * 180 / AK::Pi<T>; |
67 | 0 | } Unexecuted instantiation: _ZN2AK10to_degreesITkNS_8Concepts13FloatingPointEfEET_S2_ Unexecuted instantiation: _ZN2AK10to_degreesITkNS_8Concepts13FloatingPointEdEET_S2_ |
68 | | |
69 | | template<FloatingPoint FloatT> |
70 | | FloatT copysign(FloatT x, FloatT y) |
71 | | { |
72 | | using Extractor = FloatExtractor<FloatT>; |
73 | | auto ex = Extractor::from_float(x); |
74 | | auto ey = Extractor::from_float(y); |
75 | | ex.sign = ey.sign; |
76 | | return ex.to_float(); |
77 | | } |
78 | | |
79 | | #define CONSTEXPR_STATE(function, args...) \ |
80 | 157M | if (is_constant_evaluated()) { \ |
81 | 0 | if (IsSame<T, long double>) \ |
82 | 0 | return __builtin_##function##l(args); \ |
83 | 0 | if (IsSame<T, double>) \ |
84 | 0 | return __builtin_##function(args); \ |
85 | 0 | if (IsSame<T, float>) \ |
86 | 0 | return __builtin_##function##f(args); \ |
87 | 0 | } |
88 | | |
89 | | #define AARCH64_INSTRUCTION(instruction, arg) \ |
90 | | if constexpr (IsSame<T, long double>) \ |
91 | | TODO(); \ |
92 | | if constexpr (IsSame<T, double>) { \ |
93 | | double res; \ |
94 | | asm(#instruction " %d0, %d1" \ |
95 | | : "=w"(res) \ |
96 | | : "w"(arg)); \ |
97 | | return res; \ |
98 | | } \ |
99 | | if constexpr (IsSame<T, float>) { \ |
100 | | float res; \ |
101 | | asm(#instruction " %s0, %s1" \ |
102 | | : "=w"(res) \ |
103 | | : "w"(arg)); \ |
104 | | return res; \ |
105 | | } |
106 | | |
107 | | template<FloatingPoint T> |
108 | | constexpr T fabs(T x) |
109 | 148k | { |
110 | | // Both GCC and Clang inline fabs by default, so this is just a cmath like wrapper |
111 | | if constexpr (IsSame<T, long double>) |
112 | | return __builtin_fabsl(x); |
113 | | if constexpr (IsSame<T, double>) |
114 | | return __builtin_fabs(x); |
115 | | if constexpr (IsSame<T, float>) |
116 | 148k | return __builtin_fabsf(x); |
117 | 148k | } _ZN2AK4fabsITkNS_8Concepts13FloatingPointEfEET_S2_ Line | Count | Source | 109 | 148k | { | 110 | | // Both GCC and Clang inline fabs by default, so this is just a cmath like wrapper | 111 | | if constexpr (IsSame<T, long double>) | 112 | | return __builtin_fabsl(x); | 113 | | if constexpr (IsSame<T, double>) | 114 | | return __builtin_fabs(x); | 115 | | if constexpr (IsSame<T, float>) | 116 | 148k | return __builtin_fabsf(x); | 117 | 148k | } |
Unexecuted instantiation: _ZN2AK4fabsITkNS_8Concepts13FloatingPointEdEET_S2_ |
118 | | |
119 | | namespace Rounding { |
120 | | template<FloatingPoint T> |
121 | | constexpr T ceil(T num) |
122 | 3.95k | { |
123 | | // FIXME: SSE4.1 rounds[sd] num, res, 0b110 |
124 | 3.95k | if (is_constant_evaluated()) { |
125 | 0 | if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max()) |
126 | 0 | return num; |
127 | 0 | return (static_cast<T>(static_cast<i64>(num)) == num) |
128 | 0 | ? static_cast<i64>(num) |
129 | 0 | : static_cast<i64>(num) + ((num > 0) ? 1 : 0); |
130 | 0 | } |
131 | | #if ARCH(AARCH64) |
132 | | AARCH64_INSTRUCTION(frintp, num); |
133 | | #else |
134 | | if constexpr (IsSame<T, long double>) |
135 | | return __builtin_ceill(num); |
136 | | if constexpr (IsSame<T, double>) |
137 | | return __builtin_ceil(num); |
138 | | if constexpr (IsSame<T, float>) |
139 | 3.95k | return __builtin_ceilf(num); |
140 | 3.95k | #endif |
141 | 3.95k | } _ZN2AK8Rounding4ceilITkNS_8Concepts13FloatingPointEfEET_S3_ Line | Count | Source | 122 | 3.95k | { | 123 | | // FIXME: SSE4.1 rounds[sd] num, res, 0b110 | 124 | 3.95k | if (is_constant_evaluated()) { | 125 | 0 | if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max()) | 126 | 0 | return num; | 127 | 0 | return (static_cast<T>(static_cast<i64>(num)) == num) | 128 | 0 | ? static_cast<i64>(num) | 129 | 0 | : static_cast<i64>(num) + ((num > 0) ? 1 : 0); | 130 | 0 | } | 131 | | #if ARCH(AARCH64) | 132 | | AARCH64_INSTRUCTION(frintp, num); | 133 | | #else | 134 | | if constexpr (IsSame<T, long double>) | 135 | | return __builtin_ceill(num); | 136 | | if constexpr (IsSame<T, double>) | 137 | | return __builtin_ceil(num); | 138 | | if constexpr (IsSame<T, float>) | 139 | 3.95k | return __builtin_ceilf(num); | 140 | 3.95k | #endif | 141 | 3.95k | } |
Unexecuted instantiation: _ZN2AK8Rounding4ceilITkNS_8Concepts13FloatingPointEdEET_S3_ |
142 | | |
143 | | template<FloatingPoint T> |
144 | | constexpr T floor(T num) |
145 | 0 | { |
146 | | // FIXME: SSE4.1 rounds[sd] num, res, 0b101 |
147 | 0 | if (is_constant_evaluated()) { |
148 | 0 | if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max()) |
149 | 0 | return num; |
150 | 0 | return (static_cast<T>(static_cast<i64>(num)) == num) |
151 | 0 | ? static_cast<i64>(num) |
152 | 0 | : static_cast<i64>(num) - ((num > 0) ? 0 : 1); |
153 | 0 | } |
154 | | #if ARCH(AARCH64) |
155 | | AARCH64_INSTRUCTION(frintm, num); |
156 | | #else |
157 | | if constexpr (IsSame<T, long double>) |
158 | | return __builtin_floorl(num); |
159 | | if constexpr (IsSame<T, double>) |
160 | 0 | return __builtin_floor(num); |
161 | | if constexpr (IsSame<T, float>) |
162 | | return __builtin_floorf(num); |
163 | 0 | #endif |
164 | 0 | } |
165 | | |
166 | | template<FloatingPoint T> |
167 | | constexpr T trunc(T num) |
168 | | { |
169 | | #if ARCH(AARCH64) |
170 | | if (is_constant_evaluated()) { |
171 | | if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max()) |
172 | | return num; |
173 | | return static_cast<T>(static_cast<i64>(num)); |
174 | | } |
175 | | AARCH64_INSTRUCTION(frintz, num); |
176 | | #endif |
177 | | // FIXME: Use dedicated instruction in the non constexpr case |
178 | | // SSE4.1: rounds[sd] %num, %res, 0b111 |
179 | | if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max()) |
180 | | return num; |
181 | | return static_cast<T>(static_cast<i64>(num)); |
182 | | } |
183 | | |
184 | | template<FloatingPoint T> |
185 | | constexpr T rint(T x) |
186 | | { |
187 | | CONSTEXPR_STATE(rint, x); |
188 | | // Note: This does break tie to even |
189 | | // But the behavior of frndint/rounds[ds]/frintx can be configured |
190 | | // through the floating point control registers. |
191 | | // FIXME: We should decide if we rename this to allow us to get away from |
192 | | // the configurability "burden" rint has |
193 | | // this would make us use `rounds[sd] %num, %res, 0b100` |
194 | | // and `frintn` respectively, |
195 | | // no such guaranteed round exists for x87 `frndint` |
196 | | #if ARCH(X86_64) |
197 | | # ifdef __SSE4_1__ |
198 | | if constexpr (IsSame<T, double>) { |
199 | | T r; |
200 | | asm( |
201 | | "roundsd %1, %0" |
202 | | : "=x"(r) |
203 | | : "x"(x)); |
204 | | return r; |
205 | | } |
206 | | if constexpr (IsSame<T, float>) { |
207 | | T r; |
208 | | asm( |
209 | | "roundss %1, %0" |
210 | | : "=x"(r) |
211 | | : "x"(x)); |
212 | | return r; |
213 | | } |
214 | | # else |
215 | | asm( |
216 | | "frndint" |
217 | | : "+t"(x)); |
218 | | return x; |
219 | | # endif |
220 | | #elif ARCH(AARCH64) |
221 | | AARCH64_INSTRUCTION(frintx, x); |
222 | | #elif ARCH(RISCV64) |
223 | | if (__builtin_isnan(x)) |
224 | | return x; |
225 | | |
226 | | // Floating point values have a gap size of >= 1 for values above 2^mantissa_bits - 1. |
227 | | if (fabs(x) > FloatExtractor<T>::mantissa_max) |
228 | | return x; |
229 | | |
230 | | if constexpr (IsSame<T, float>) { |
231 | | i64 r; |
232 | | asm("fcvt.l.s %0, %1, dyn" |
233 | | : "=r"(r) |
234 | | : "f"(x)); |
235 | | return copysign(static_cast<float>(r), x); |
236 | | } |
237 | | if constexpr (IsSame<T, double>) { |
238 | | i64 r; |
239 | | asm("fcvt.l.d %0, %1, dyn" |
240 | | : "=r"(r) |
241 | | : "f"(x)); |
242 | | return copysign(static_cast<double>(r), x); |
243 | | } |
244 | | if constexpr (IsSame<T, long double>) |
245 | | TODO_RISCV64(); |
246 | | #endif |
247 | | TODO(); |
248 | | } |
249 | | |
250 | | template<FloatingPoint T> |
251 | | constexpr T round(T x) |
252 | 0 | { |
253 | 0 | CONSTEXPR_STATE(round, x); |
254 | 0 | // Note: This is break-tie-away-from-zero, so not the hw's understanding of |
255 | 0 | // "nearest", which would be towards even. |
256 | 0 | if (x == 0.) |
257 | 0 | return x; |
258 | 0 | if (x > 0.) |
259 | 0 | return floor(x + .5); |
260 | 0 | return ceil(x - .5); |
261 | 0 | } |
262 | | |
263 | | template<Integral I, FloatingPoint P> |
264 | | ALWAYS_INLINE I round_to(P value); |
265 | | |
266 | | #if ARCH(X86_64) |
267 | | template<Integral I> |
268 | | ALWAYS_INLINE I round_to(long double value) |
269 | | { |
270 | | // Note: fistps outputs into a signed integer location (i16, i32, i64), |
271 | | // so lets be nice and tell the compiler that. |
272 | | Conditional<sizeof(I) >= sizeof(i16), MakeSigned<I>, i16> ret; |
273 | | if constexpr (sizeof(I) == sizeof(i64)) { |
274 | | asm("fistpll %0" |
275 | | : "=m"(ret) |
276 | | : "t"(value) |
277 | | : "st"); |
278 | | } else if constexpr (sizeof(I) == sizeof(i32)) { |
279 | | asm("fistpl %0" |
280 | | : "=m"(ret) |
281 | | : "t"(value) |
282 | | : "st"); |
283 | | } else { |
284 | | asm("fistps %0" |
285 | | : "=m"(ret) |
286 | | : "t"(value) |
287 | | : "st"); |
288 | | } |
289 | | return static_cast<I>(ret); |
290 | | } |
291 | | |
292 | | template<Integral I> |
293 | | ALWAYS_INLINE I round_to(float value) |
294 | 546M | { |
295 | | // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set, |
296 | | // if the value surpasses the i64 limit, even if the result could fit into an u64 |
297 | | // To solve this we would either need to detect that value or do a range check and |
298 | | // then do a more specialized conversion, which might include a division (which is expensive) |
299 | | if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) { |
300 | | i64 ret; |
301 | | asm("cvtss2si %1, %0" |
302 | | : "=r"(ret) |
303 | | : "xm"(value)); |
304 | | return static_cast<I>(ret); |
305 | | } |
306 | 546M | i32 ret; |
307 | 546M | asm("cvtss2si %1, %0" |
308 | 546M | : "=r"(ret) |
309 | 546M | : "xm"(value)); |
310 | 546M | return static_cast<I>(ret); |
311 | 546M | } _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEhEET_f Line | Count | Source | 294 | 545M | { | 295 | | // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set, | 296 | | // if the value surpasses the i64 limit, even if the result could fit into an u64 | 297 | | // To solve this we would either need to detect that value or do a range check and | 298 | | // then do a more specialized conversion, which might include a division (which is expensive) | 299 | | if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) { | 300 | | i64 ret; | 301 | | asm("cvtss2si %1, %0" | 302 | | : "=r"(ret) | 303 | | : "xm"(value)); | 304 | | return static_cast<I>(ret); | 305 | | } | 306 | 545M | i32 ret; | 307 | 545M | asm("cvtss2si %1, %0" | 308 | 545M | : "=r"(ret) | 309 | 545M | : "xm"(value)); | 310 | 545M | return static_cast<I>(ret); | 311 | 545M | } |
_ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEiEET_f Line | Count | Source | 294 | 41.4k | { | 295 | | // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set, | 296 | | // if the value surpasses the i64 limit, even if the result could fit into an u64 | 297 | | // To solve this we would either need to detect that value or do a range check and | 298 | | // then do a more specialized conversion, which might include a division (which is expensive) | 299 | | if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) { | 300 | | i64 ret; | 301 | | asm("cvtss2si %1, %0" | 302 | | : "=r"(ret) | 303 | | : "xm"(value)); | 304 | | return static_cast<I>(ret); | 305 | | } | 306 | 41.4k | i32 ret; | 307 | 41.4k | asm("cvtss2si %1, %0" | 308 | 41.4k | : "=r"(ret) | 309 | 41.4k | : "xm"(value)); | 310 | 41.4k | return static_cast<I>(ret); | 311 | 41.4k | } |
Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEsEET_f |
312 | | |
313 | | template<Integral I> |
314 | | ALWAYS_INLINE I round_to(double value) |
315 | 0 | { |
316 | | // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set, |
317 | | // if the value surpasses the i64 limit, even if the result could fit into an u64 |
318 | | // To solve this we would either need to detect that value or do a range check and |
319 | | // then do a more specialized conversion, which might include a division (which is expensive) |
320 | 0 | if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) { |
321 | 0 | i64 ret; |
322 | 0 | asm("cvtsd2si %1, %0" |
323 | 0 | : "=r"(ret) |
324 | 0 | : "xm"(value)); |
325 | 0 | return static_cast<I>(ret); |
326 | 0 | } |
327 | 0 | i32 ret; |
328 | 0 | asm("cvtsd2si %1, %0" |
329 | 0 | : "=r"(ret) |
330 | 0 | : "xm"(value)); |
331 | 0 | return static_cast<I>(ret); |
332 | 0 | } Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEiEET_d Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralElEET_d Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEhEET_d Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEmEET_d |
333 | | |
334 | | #elif ARCH(AARCH64) |
335 | | template<Signed I> |
336 | | ALWAYS_INLINE I round_to(float value) |
337 | | { |
338 | | if constexpr (sizeof(I) <= sizeof(u32)) { |
339 | | i32 res; |
340 | | asm("fcvtns %w0, %s1" |
341 | | : "=r"(res) |
342 | | : "w"(value)); |
343 | | return static_cast<I>(res); |
344 | | } |
345 | | i64 res; |
346 | | asm("fcvtns %0, %s1" |
347 | | : "=r"(res) |
348 | | : "w"(value)); |
349 | | return static_cast<I>(res); |
350 | | } |
351 | | |
352 | | template<Signed I> |
353 | | ALWAYS_INLINE I round_to(double value) |
354 | | { |
355 | | if constexpr (sizeof(I) <= sizeof(u32)) { |
356 | | i32 res; |
357 | | asm("fcvtns %w0, %d1" |
358 | | : "=r"(res) |
359 | | : "w"(value)); |
360 | | return static_cast<I>(res); |
361 | | } |
362 | | i64 res; |
363 | | asm("fcvtns %0, %d1" |
364 | | : "=r"(res) |
365 | | : "w"(value)); |
366 | | return static_cast<I>(res); |
367 | | } |
368 | | |
369 | | template<Unsigned U> |
370 | | ALWAYS_INLINE U round_to(float value) |
371 | | { |
372 | | if constexpr (sizeof(U) <= sizeof(u32)) { |
373 | | u32 res; |
374 | | asm("fcvtnu %w0, %s1" |
375 | | : "=r"(res) |
376 | | : "w"(value)); |
377 | | return static_cast<U>(res); |
378 | | } |
379 | | i64 res; |
380 | | asm("fcvtnu %0, %s1" |
381 | | : "=r"(res) |
382 | | : "w"(value)); |
383 | | return static_cast<U>(res); |
384 | | } |
385 | | |
386 | | template<Unsigned U> |
387 | | ALWAYS_INLINE U round_to(double value) |
388 | | { |
389 | | if constexpr (sizeof(U) <= sizeof(u32)) { |
390 | | u32 res; |
391 | | asm("fcvtns %w0, %d1" |
392 | | : "=r"(res) |
393 | | : "w"(value)); |
394 | | return static_cast<U>(res); |
395 | | } |
396 | | i64 res; |
397 | | asm("fcvtns %0, %d1" |
398 | | : "=r"(res) |
399 | | : "w"(value)); |
400 | | return static_cast<U>(res); |
401 | | } |
402 | | |
403 | | #else |
404 | | template<Integral I, FloatingPoint P> |
405 | | ALWAYS_INLINE I round_to(P value) |
406 | | { |
407 | | if constexpr (IsSame<P, long double>) |
408 | | return static_cast<I>(__builtin_llrintl(value)); |
409 | | if constexpr (IsSame<P, double>) |
410 | | return static_cast<I>(__builtin_llrint(value)); |
411 | | if constexpr (IsSame<P, float>) |
412 | | return static_cast<I>(__builtin_llrintf(value)); |
413 | | } |
414 | | #endif |
415 | | |
416 | | } |
417 | | |
418 | | using Rounding::ceil; |
419 | | using Rounding::floor; |
420 | | using Rounding::rint; |
421 | | using Rounding::round; |
422 | | using Rounding::round_to; |
423 | | using Rounding::trunc; |
424 | | |
425 | | namespace Division { |
426 | | template<FloatingPoint T> |
427 | | constexpr T fmod(T x, T y) |
428 | | { |
429 | | CONSTEXPR_STATE(fmod, x, y); |
430 | | |
431 | | #if ARCH(X86_64) |
432 | | u16 fpu_status; |
433 | | do { |
434 | | asm( |
435 | | "fprem\n" |
436 | | "fnstsw %%ax\n" |
437 | | : "+t"(x), "=a"(fpu_status) |
438 | | : "u"(y)); |
439 | | } while (fpu_status & 0x400); |
440 | | return x; |
441 | | #else |
442 | | # if defined(AK_OS_SERENITY) |
443 | | // FIXME: This is a very naive implementation. |
444 | | |
445 | | if (__builtin_isnan(x)) |
446 | | return x; |
447 | | if (__builtin_isnan(y)) |
448 | | return y; |
449 | | |
450 | | // SPECIAL VALUES |
451 | | // fmod(±0, y) returns ±0 if y is neither 0 nor NaN. |
452 | | if (x == 0 && y != 0) |
453 | | return x; |
454 | | |
455 | | // fmod(x, y) returns a NaN and raises the "invalid" floating-point exception for x infinite or y zero. |
456 | | // FIXME: Exception. |
457 | | if (__builtin_isinf(x) || y == 0) |
458 | | return NaN<T>; |
459 | | |
460 | | // fmod(x, ±infinity) returns x for x not infinite. |
461 | | if (__builtin_isinf(y)) |
462 | | return x; |
463 | | |
464 | | // Range reduction: handle negative x and y. |
465 | | if (y < 0) |
466 | | return fmod(x, -y); |
467 | | if (x < 0) |
468 | | return -fmod(-x, y); |
469 | | |
470 | | // x is x_mantissa * 2**x_exponent, y is y_mantissa * 2**y_exponent. |
471 | | // (Both are positive at this point.) |
472 | | // If y_exponent > x_exponent, we are done and can return x. |
473 | | // If y_exponent == x_exponent, we can return (x_mantissa % y_mantissa) * 2**x_exponent. |
474 | | // If y_exponent < x_exponent, we'll iteratively reduce x_exponent by shifting from |
475 | | // the exponent into the mantissa. |
476 | | |
477 | | auto x_bits = FloatExtractor<T>::from_float(x); |
478 | | typename FloatExtractor<T>::ComponentType x_exponent = x_bits.exponent; |
479 | | |
480 | | auto y_bits = FloatExtractor<T>::from_float(y); |
481 | | typename FloatExtractor<T>::ComponentType y_exponent = y_bits.exponent; |
482 | | |
483 | | // FIXME: Handle denormals. For now, treat them as 0. |
484 | | if (x_exponent == 0 && y_exponent != 0) |
485 | | return 0; |
486 | | if (y_exponent == 0) |
487 | | return NaN<T>; |
488 | | |
489 | | if (y_exponent > x_exponent) |
490 | | return x; |
491 | | |
492 | | // FIXME: This is wrong for f80. |
493 | | typename FloatExtractor<T>::ComponentType implicit_mantissa_bit = 1; |
494 | | implicit_mantissa_bit <<= FloatExtractor<T>::mantissa_bits; |
495 | | |
496 | | typename FloatExtractor<T>::ComponentType x_mantissa = x_bits.mantissa | implicit_mantissa_bit; |
497 | | typename FloatExtractor<T>::ComponentType y_mantissa = y_bits.mantissa | implicit_mantissa_bit; |
498 | | |
499 | | while (y_exponent < x_exponent) { |
500 | | // This is ok because (x % (y * 2**n)) divides (x % y) for all n > 0. |
501 | | x_mantissa %= y_mantissa; |
502 | | |
503 | | x_mantissa <<= 1; |
504 | | --x_exponent; |
505 | | } |
506 | | |
507 | | x_mantissa %= y_mantissa; |
508 | | |
509 | | // We're done and want to return x_mantissa * 2 ** x_exponent. |
510 | | // But x_mantissa might not have a leading 1 bit, so we have to realign first. |
511 | | // Mantissa is mantissa_bits long, count_leading_zeroes() counts in ComponentType, adjust: |
512 | | auto const always_zero_bits = sizeof(typename FloatExtractor<T>::ComponentType) * 8 - (FloatExtractor<T>::mantissa_bits + 1); // +1 for implicit 1 bit |
513 | | auto shift = count_leading_zeroes(x_mantissa) - always_zero_bits; |
514 | | |
515 | | if (x_exponent < shift) { |
516 | | // FIXME: Make a real denormal. |
517 | | return 0; |
518 | | } |
519 | | |
520 | | x_mantissa <<= shift; |
521 | | x_exponent -= shift; |
522 | | |
523 | | x_bits.exponent = x_exponent; |
524 | | x_bits.mantissa = x_mantissa; |
525 | | return x_bits.to_float(); |
526 | | # else |
527 | | if constexpr (IsSame<T, long double>) |
528 | | return __builtin_fmodl(x, y); |
529 | | if constexpr (IsSame<T, double>) |
530 | | return __builtin_fmod(x, y); |
531 | | if constexpr (IsSame<T, float>) |
532 | | return __builtin_fmodf(x, y); |
533 | | # endif |
534 | | #endif |
535 | | } |
536 | | |
537 | | template<FloatingPoint T> |
538 | | constexpr T remainder(T x, T y) |
539 | | { |
540 | | CONSTEXPR_STATE(remainder, x, y); |
541 | | |
542 | | #if ARCH(X86_64) |
543 | | u16 fpu_status; |
544 | | do { |
545 | | asm( |
546 | | "fprem1\n" |
547 | | "fnstsw %%ax\n" |
548 | | : "+t"(x), "=a"(fpu_status) |
549 | | : "u"(y)); |
550 | | } while (fpu_status & 0x400); |
551 | | return x; |
552 | | #else |
553 | | # if defined(AK_OS_SERENITY) |
554 | | // TODO: Add implementation for this function. |
555 | | TODO(); |
556 | | # endif |
557 | | if constexpr (IsSame<T, long double>) |
558 | | return __builtin_remainderl(x, y); |
559 | | if constexpr (IsSame<T, double>) |
560 | | return __builtin_remainder(x, y); |
561 | | if constexpr (IsSame<T, float>) |
562 | | return __builtin_remainderf(x, y); |
563 | | #endif |
564 | | } |
565 | | } |
566 | | |
567 | | using Division::fmod; |
568 | | using Division::remainder; |
569 | | |
570 | | template<FloatingPoint T> |
571 | | constexpr T sqrt(T x) |
572 | 94.7M | { |
573 | 94.7M | CONSTEXPR_STATE(sqrt, x); |
574 | | |
575 | 94.7M | #if ARCH(X86_64) |
576 | 94.7M | if constexpr (IsSame<T, float>) { |
577 | 92.2M | float res; |
578 | 92.2M | asm("sqrtss %1, %0" |
579 | 92.2M | : "=x"(res) |
580 | 92.2M | : "x"(x)); |
581 | 92.2M | return res; |
582 | 92.2M | } |
583 | 2.50M | if constexpr (IsSame<T, double>) { |
584 | 2.50M | double res; |
585 | 2.50M | asm("sqrtsd %1, %0" |
586 | 2.50M | : "=x"(res) |
587 | 2.50M | : "x"(x)); |
588 | 2.50M | return res; |
589 | 2.50M | } |
590 | 0 | T res; |
591 | 94.7M | asm("fsqrt" |
592 | 94.7M | : "=t"(res) |
593 | 94.7M | : "0"(x)); |
594 | 94.7M | return res; |
595 | | #elif ARCH(AARCH64) |
596 | | AARCH64_INSTRUCTION(fsqrt, x); |
597 | | #elif ARCH(RISCV64) |
598 | | if constexpr (IsSame<T, float>) { |
599 | | float res; |
600 | | asm("fsqrt.s %0, %1" |
601 | | : "=f"(res) |
602 | | : "f"(x)); |
603 | | return res; |
604 | | } |
605 | | if constexpr (IsSame<T, double>) { |
606 | | double res; |
607 | | asm("fsqrt.d %0, %1" |
608 | | : "=f"(res) |
609 | | : "f"(x)); |
610 | | return res; |
611 | | } |
612 | | if constexpr (IsSame<T, long double>) |
613 | | TODO_RISCV64(); |
614 | | #else |
615 | | # if defined(AK_OS_SERENITY) |
616 | | // TODO: Add implementation for this function. |
617 | | TODO(); |
618 | | # endif |
619 | | return __builtin_sqrt(x); |
620 | | #endif |
621 | 94.7M | } _ZN2AK4sqrtITkNS_8Concepts13FloatingPointEfEET_S2_ Line | Count | Source | 572 | 92.2M | { | 573 | 92.2M | CONSTEXPR_STATE(sqrt, x); | 574 | | | 575 | 92.2M | #if ARCH(X86_64) | 576 | 92.2M | if constexpr (IsSame<T, float>) { | 577 | 92.2M | float res; | 578 | 92.2M | asm("sqrtss %1, %0" | 579 | 92.2M | : "=x"(res) | 580 | 92.2M | : "x"(x)); | 581 | 92.2M | return res; | 582 | 92.2M | } | 583 | | if constexpr (IsSame<T, double>) { | 584 | | double res; | 585 | | asm("sqrtsd %1, %0" | 586 | | : "=x"(res) | 587 | | : "x"(x)); | 588 | | return res; | 589 | | } | 590 | 92.2M | T res; | 591 | 92.2M | asm("fsqrt" | 592 | 92.2M | : "=t"(res) | 593 | 92.2M | : "0"(x)); | 594 | 92.2M | return res; | 595 | | #elif ARCH(AARCH64) | 596 | | AARCH64_INSTRUCTION(fsqrt, x); | 597 | | #elif ARCH(RISCV64) | 598 | | if constexpr (IsSame<T, float>) { | 599 | | float res; | 600 | | asm("fsqrt.s %0, %1" | 601 | | : "=f"(res) | 602 | | : "f"(x)); | 603 | | return res; | 604 | | } | 605 | | if constexpr (IsSame<T, double>) { | 606 | | double res; | 607 | | asm("fsqrt.d %0, %1" | 608 | | : "=f"(res) | 609 | | : "f"(x)); | 610 | | return res; | 611 | | } | 612 | | if constexpr (IsSame<T, long double>) | 613 | | TODO_RISCV64(); | 614 | | #else | 615 | | # if defined(AK_OS_SERENITY) | 616 | | // TODO: Add implementation for this function. | 617 | | TODO(); | 618 | | # endif | 619 | | return __builtin_sqrt(x); | 620 | | #endif | 621 | 92.2M | } |
_ZN2AK4sqrtITkNS_8Concepts13FloatingPointEdEET_S2_ Line | Count | Source | 572 | 2.50M | { | 573 | 2.50M | CONSTEXPR_STATE(sqrt, x); | 574 | | | 575 | 2.50M | #if ARCH(X86_64) | 576 | | if constexpr (IsSame<T, float>) { | 577 | | float res; | 578 | | asm("sqrtss %1, %0" | 579 | | : "=x"(res) | 580 | | : "x"(x)); | 581 | | return res; | 582 | | } | 583 | 2.50M | if constexpr (IsSame<T, double>) { | 584 | 2.50M | double res; | 585 | 2.50M | asm("sqrtsd %1, %0" | 586 | 2.50M | : "=x"(res) | 587 | 2.50M | : "x"(x)); | 588 | 2.50M | return res; | 589 | 2.50M | } | 590 | 0 | T res; | 591 | 2.50M | asm("fsqrt" | 592 | 2.50M | : "=t"(res) | 593 | 2.50M | : "0"(x)); | 594 | 2.50M | return res; | 595 | | #elif ARCH(AARCH64) | 596 | | AARCH64_INSTRUCTION(fsqrt, x); | 597 | | #elif ARCH(RISCV64) | 598 | | if constexpr (IsSame<T, float>) { | 599 | | float res; | 600 | | asm("fsqrt.s %0, %1" | 601 | | : "=f"(res) | 602 | | : "f"(x)); | 603 | | return res; | 604 | | } | 605 | | if constexpr (IsSame<T, double>) { | 606 | | double res; | 607 | | asm("fsqrt.d %0, %1" | 608 | | : "=f"(res) | 609 | | : "f"(x)); | 610 | | return res; | 611 | | } | 612 | | if constexpr (IsSame<T, long double>) | 613 | | TODO_RISCV64(); | 614 | | #else | 615 | | # if defined(AK_OS_SERENITY) | 616 | | // TODO: Add implementation for this function. | 617 | | TODO(); | 618 | | # endif | 619 | | return __builtin_sqrt(x); | 620 | | #endif | 621 | 2.50M | } |
|
622 | | |
623 | | template<FloatingPoint T> |
624 | | constexpr T cbrt(T x) |
625 | | { |
626 | | CONSTEXPR_STATE(cbrt, x); |
627 | | if (__builtin_isinf(x) || x == 0) |
628 | | return x; |
629 | | if (x < 0) |
630 | | return -cbrt(-x); |
631 | | |
632 | | T r = x; |
633 | | T ex = 0; |
634 | | |
635 | | while (r < 0.125l) { |
636 | | r *= 8; |
637 | | ex--; |
638 | | } |
639 | | while (r > 1.0l) { |
640 | | r *= 0.125l; |
641 | | ex++; |
642 | | } |
643 | | |
644 | | r = (-0.46946116l * r + 1.072302l) * r + 0.3812513l; |
645 | | |
646 | | while (ex < 0) { |
647 | | r *= 0.5l; |
648 | | ex++; |
649 | | } |
650 | | while (ex > 0) { |
651 | | r *= 2.0l; |
652 | | ex--; |
653 | | } |
654 | | |
655 | | r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r); |
656 | | r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r); |
657 | | r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r); |
658 | | r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r); |
659 | | |
660 | | return r; |
661 | | } |
662 | | |
663 | | namespace Trigonometry { |
664 | | |
665 | | template<FloatingPoint T> |
666 | | constexpr T hypot(T x, T y) |
667 | 75.3M | { |
668 | 75.3M | return sqrt(x * x + y * y); |
669 | 75.3M | } |
670 | | |
671 | | template<FloatingPoint T> |
672 | | constexpr T sin(T angle) |
673 | 16.9M | { |
674 | 16.9M | CONSTEXPR_STATE(sin, angle); |
675 | | |
676 | 16.9M | #if ARCH(X86_64) |
677 | 16.9M | T ret; |
678 | 16.9M | asm( |
679 | 16.9M | "fsin" |
680 | 16.9M | : "=t"(ret) |
681 | 16.9M | : "0"(angle)); |
682 | 16.9M | return ret; |
683 | | #else |
684 | | # if defined(AK_OS_SERENITY) |
685 | | if (angle < 0) |
686 | | return -sin(-angle); |
687 | | |
688 | | angle = fmod(angle, 2 * Pi<T>); |
689 | | |
690 | | if (angle >= Pi<T>) |
691 | | return -sin(angle - Pi<T>); |
692 | | |
693 | | if (angle > Pi<T> / 2) |
694 | | return sin(Pi<T> - angle); |
695 | | |
696 | | // https://en.wikipedia.org/wiki/Bh%C4%81skara_I%27s_sine_approximation_formula |
697 | | // FIXME: This is not a good formula! It requires divisions, so it's slow, and it's not very accurate either. |
698 | | return 16 * angle * (Pi<T> - angle) / (5 * Pi<T> * Pi<T> - 4 * angle * (Pi<T> - angle)); |
699 | | # else |
700 | | return __builtin_sin(angle); |
701 | | # endif |
702 | | #endif |
703 | 16.9M | } _ZN2AK12Trigonometry3sinITkNS_8Concepts13FloatingPointEfEET_S3_ Line | Count | Source | 673 | 16.9M | { | 674 | 16.9M | CONSTEXPR_STATE(sin, angle); | 675 | | | 676 | 16.9M | #if ARCH(X86_64) | 677 | 16.9M | T ret; | 678 | 16.9M | asm( | 679 | 16.9M | "fsin" | 680 | 16.9M | : "=t"(ret) | 681 | 16.9M | : "0"(angle)); | 682 | 16.9M | return ret; | 683 | | #else | 684 | | # if defined(AK_OS_SERENITY) | 685 | | if (angle < 0) | 686 | | return -sin(-angle); | 687 | | | 688 | | angle = fmod(angle, 2 * Pi<T>); | 689 | | | 690 | | if (angle >= Pi<T>) | 691 | | return -sin(angle - Pi<T>); | 692 | | | 693 | | if (angle > Pi<T> / 2) | 694 | | return sin(Pi<T> - angle); | 695 | | | 696 | | // https://en.wikipedia.org/wiki/Bh%C4%81skara_I%27s_sine_approximation_formula | 697 | | // FIXME: This is not a good formula! It requires divisions, so it's slow, and it's not very accurate either. | 698 | | return 16 * angle * (Pi<T> - angle) / (5 * Pi<T> * Pi<T> - 4 * angle * (Pi<T> - angle)); | 699 | | # else | 700 | | return __builtin_sin(angle); | 701 | | # endif | 702 | | #endif | 703 | 16.9M | } |
Unexecuted instantiation: _ZN2AK12Trigonometry3sinITkNS_8Concepts13FloatingPointEdEET_S3_ |
704 | | |
705 | | template<FloatingPoint T> |
706 | | constexpr T cos(T angle) |
707 | 5.77k | { |
708 | 5.77k | CONSTEXPR_STATE(cos, angle); |
709 | | |
710 | 5.77k | #if ARCH(X86_64) |
711 | 5.77k | T ret; |
712 | 5.77k | asm( |
713 | 5.77k | "fcos" |
714 | 5.77k | : "=t"(ret) |
715 | 5.77k | : "0"(angle)); |
716 | 5.77k | return ret; |
717 | | #else |
718 | | # if defined(AK_OS_SERENITY) |
719 | | if (angle < 0) |
720 | | return cos(-angle); |
721 | | |
722 | | angle = fmod(angle, 2 * Pi<T>); |
723 | | |
724 | | if (angle >= Pi<T>) |
725 | | return -cos(angle - Pi<T>); |
726 | | |
727 | | if (angle > Pi<T> / 2) |
728 | | return -cos(Pi<T> - angle); |
729 | | |
730 | | // https://en.wikipedia.org/wiki/Bh%C4%81skara_I%27s_sine_approximation_formula |
731 | | // FIXME: This is not a good formula! It requires divisions, so it's slow, and it's not very accurate either. |
732 | | return (Pi<T> * Pi<T> - 4 * angle * angle) / (Pi<T> * Pi<T> + angle * angle); |
733 | | # else |
734 | | return __builtin_cos(angle); |
735 | | # endif |
736 | | #endif |
737 | 5.77k | } _ZN2AK12Trigonometry3cosITkNS_8Concepts13FloatingPointEfEET_S3_ Line | Count | Source | 707 | 5.77k | { | 708 | 5.77k | CONSTEXPR_STATE(cos, angle); | 709 | | | 710 | 5.77k | #if ARCH(X86_64) | 711 | 5.77k | T ret; | 712 | 5.77k | asm( | 713 | 5.77k | "fcos" | 714 | 5.77k | : "=t"(ret) | 715 | 5.77k | : "0"(angle)); | 716 | 5.77k | return ret; | 717 | | #else | 718 | | # if defined(AK_OS_SERENITY) | 719 | | if (angle < 0) | 720 | | return cos(-angle); | 721 | | | 722 | | angle = fmod(angle, 2 * Pi<T>); | 723 | | | 724 | | if (angle >= Pi<T>) | 725 | | return -cos(angle - Pi<T>); | 726 | | | 727 | | if (angle > Pi<T> / 2) | 728 | | return -cos(Pi<T> - angle); | 729 | | | 730 | | // https://en.wikipedia.org/wiki/Bh%C4%81skara_I%27s_sine_approximation_formula | 731 | | // FIXME: This is not a good formula! It requires divisions, so it's slow, and it's not very accurate either. | 732 | | return (Pi<T> * Pi<T> - 4 * angle * angle) / (Pi<T> * Pi<T> + angle * angle); | 733 | | # else | 734 | | return __builtin_cos(angle); | 735 | | # endif | 736 | | #endif | 737 | 5.77k | } |
Unexecuted instantiation: _ZN2AK12Trigonometry3cosITkNS_8Concepts13FloatingPointEdEET_S3_ |
738 | | |
739 | | template<FloatingPoint T> |
740 | | constexpr void sincos(T angle, T& sin_val, T& cos_val) |
741 | 39.2M | { |
742 | 39.2M | if (is_constant_evaluated()) { |
743 | 0 | sin_val = sin(angle); |
744 | 0 | cos_val = cos(angle); |
745 | 0 | return; |
746 | 0 | } |
747 | 39.2M | #if ARCH(X86_64) |
748 | 39.2M | asm( |
749 | 39.2M | "fsincos" |
750 | 39.2M | : "=t"(cos_val), "=u"(sin_val) |
751 | 39.2M | : "0"(angle)); |
752 | | #else |
753 | | sin_val = sin(angle); |
754 | | cos_val = cos(angle); |
755 | | #endif |
756 | 39.2M | } _ZN2AK12Trigonometry6sincosITkNS_8Concepts13FloatingPointEfEEvT_RS3_S4_ Line | Count | Source | 741 | 37.0M | { | 742 | 37.0M | if (is_constant_evaluated()) { | 743 | 0 | sin_val = sin(angle); | 744 | 0 | cos_val = cos(angle); | 745 | 0 | return; | 746 | 0 | } | 747 | 37.0M | #if ARCH(X86_64) | 748 | 37.0M | asm( | 749 | 37.0M | "fsincos" | 750 | 37.0M | : "=t"(cos_val), "=u"(sin_val) | 751 | 37.0M | : "0"(angle)); | 752 | | #else | 753 | | sin_val = sin(angle); | 754 | | cos_val = cos(angle); | 755 | | #endif | 756 | 37.0M | } |
_ZN2AK12Trigonometry6sincosITkNS_8Concepts13FloatingPointEdEEvT_RS3_S4_ Line | Count | Source | 741 | 2.12M | { | 742 | 2.12M | if (is_constant_evaluated()) { | 743 | 0 | sin_val = sin(angle); | 744 | 0 | cos_val = cos(angle); | 745 | 0 | return; | 746 | 0 | } | 747 | 2.12M | #if ARCH(X86_64) | 748 | 2.12M | asm( | 749 | 2.12M | "fsincos" | 750 | 2.12M | : "=t"(cos_val), "=u"(sin_val) | 751 | 2.12M | : "0"(angle)); | 752 | | #else | 753 | | sin_val = sin(angle); | 754 | | cos_val = cos(angle); | 755 | | #endif | 756 | 2.12M | } |
|
757 | | |
758 | | template<FloatingPoint T> |
759 | | constexpr T tan(T angle) |
760 | 17.0M | { |
761 | 17.0M | CONSTEXPR_STATE(tan, angle); |
762 | | |
763 | 17.0M | #if ARCH(X86_64) |
764 | 17.0M | T ret, one; |
765 | 17.0M | asm( |
766 | 17.0M | "fptan" |
767 | 17.0M | : "=t"(one), "=u"(ret) |
768 | 17.0M | : "0"(angle)); |
769 | | |
770 | 17.0M | return ret; |
771 | | #else |
772 | | # if defined(AK_OS_SERENITY) |
773 | | return sin(angle) / cos(angle); |
774 | | # else |
775 | | return __builtin_tan(angle); |
776 | | # endif |
777 | | #endif |
778 | 17.0M | } |
779 | | |
780 | | template<FloatingPoint T> |
781 | | constexpr T asin(T x) |
782 | | { |
783 | | CONSTEXPR_STATE(asin, x); |
784 | | |
785 | | if (x < 0) |
786 | | return -asin(-x); |
787 | | |
788 | | if (x > 1) |
789 | | return NaN<T>; |
790 | | |
791 | | if (x > (T)0.5) { |
792 | | // asin(x) = pi/2 - 2 * asin(sqrt((1 - x) / 2)) |
793 | | // If 0.5 < x <= 1, then sqrt((1 - x) / 2 ) < 0.5, |
794 | | // and the recursion will go down the `<= 0.5` branch. |
795 | | return Pi<T> / 2 - 2 * asin(sqrt((1 - x) / 2)); |
796 | | } |
797 | | |
798 | | T squared = x * x; |
799 | | T value = x; |
800 | | T i = x * squared; |
801 | | value += i * Details::product_odd<1>() / Details::product_even<2>() / 3; |
802 | | i *= squared; |
803 | | value += i * Details::product_odd<3>() / Details::product_even<4>() / 5; |
804 | | i *= squared; |
805 | | value += i * Details::product_odd<5>() / Details::product_even<6>() / 7; |
806 | | i *= squared; |
807 | | value += i * Details::product_odd<7>() / Details::product_even<8>() / 9; |
808 | | i *= squared; |
809 | | value += i * Details::product_odd<9>() / Details::product_even<10>() / 11; |
810 | | i *= squared; |
811 | | value += i * Details::product_odd<11>() / Details::product_even<12>() / 13; |
812 | | i *= squared; |
813 | | value += i * Details::product_odd<13>() / Details::product_even<14>() / 15; |
814 | | i *= squared; |
815 | | value += i * Details::product_odd<15>() / Details::product_even<16>() / 17; |
816 | | return value; |
817 | | } |
818 | | |
819 | | template<FloatingPoint T> |
820 | | constexpr T acos(T value) |
821 | | { |
822 | | CONSTEXPR_STATE(acos, value); |
823 | | |
824 | | // FIXME: I am naive |
825 | | return static_cast<T>(0.5) * Pi<T> - asin<T>(value); |
826 | | } |
827 | | |
828 | | template<FloatingPoint T> |
829 | | constexpr T atan(T value) |
830 | | { |
831 | | CONSTEXPR_STATE(atan, value); |
832 | | |
833 | | #if ARCH(X86_64) |
834 | | T ret; |
835 | | asm( |
836 | | "fld1\n" |
837 | | "fpatan\n" |
838 | | : "=t"(ret) |
839 | | : "0"(value)); |
840 | | return ret; |
841 | | #else |
842 | | # if defined(AK_OS_SERENITY) |
843 | | return asin(value / sqrt(1 + value * value)); |
844 | | # endif |
845 | | return __builtin_atan(value); |
846 | | #endif |
847 | | } |
848 | | |
849 | | template<FloatingPoint T> |
850 | | constexpr T atan2(T y, T x) |
851 | 4.26M | { |
852 | 4.26M | CONSTEXPR_STATE(atan2, y, x); |
853 | | |
854 | 4.26M | #if ARCH(X86_64) |
855 | 4.26M | T ret; |
856 | 4.26M | asm("fpatan" |
857 | 4.26M | : "=t"(ret) |
858 | 4.26M | : "0"(x), "u"(y) |
859 | 4.26M | : "st(1)"); |
860 | 4.26M | return ret; |
861 | | #else |
862 | | # if defined(AK_OS_SERENITY) |
863 | | if (__builtin_isnan(y)) |
864 | | return y; |
865 | | if (__builtin_isnan(x)) |
866 | | return x; |
867 | | |
868 | | // SPECIAL VALUES |
869 | | // atan2(±0, -0) returns ±pi. |
870 | | if (y == 0 && x == 0 && signbit(x)) |
871 | | return copysign(Pi<T>, y); |
872 | | |
873 | | // atan2(±0, +0) returns ±0. |
874 | | if (y == 0 && x == 0 && !signbit(x)) |
875 | | return y; |
876 | | |
877 | | // atan2(±0, x) returns ±pi for x < 0. |
878 | | if (y == 0 && x < 0) |
879 | | return copysign(Pi<T>, y); |
880 | | |
881 | | // atan2(±0, x) returns ±0 for x > 0. |
882 | | if (y == 0 && x > 0) |
883 | | return y; |
884 | | |
885 | | // atan2(y, ±0) returns +pi/2 for y > 0. |
886 | | if (y > 0 && x == 0) |
887 | | return Pi<T> / 2; |
888 | | |
889 | | // atan2(y, ±0) returns -pi/2 for y < 0. |
890 | | if (y < 0 && x == 0) |
891 | | return -Pi<T> / 2; |
892 | | |
893 | | // atan2(±y, -infinity) returns ±pi for finite y > 0. |
894 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && signbit(x)) |
895 | | return copysign(Pi<T>, y); |
896 | | |
897 | | // atan2(±y, +infinity) returns ±0 for finite y > 0. |
898 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && !signbit(x)) |
899 | | return copysign(static_cast<T>(0), y); |
900 | | |
901 | | // atan2(±infinity, x) returns ±pi/2 for finite x. |
902 | | if (__builtin_isinf(y) && !__builtin_isinf(x)) |
903 | | return copysign(Pi<T> / 2, y); |
904 | | |
905 | | // atan2(±infinity, -infinity) returns ±3*pi/4. |
906 | | if (__builtin_isinf(y) && __builtin_isinf(x) && signbit(x)) |
907 | | return copysign(3 * Pi<T> / 4, y); |
908 | | |
909 | | // atan2(±infinity, +infinity) returns ±pi/4. |
910 | | if (__builtin_isinf(y) && __builtin_isinf(x) && !signbit(x)) |
911 | | return copysign(Pi<T> / 4, y); |
912 | | |
913 | | // Check quadrant, going counterclockwise. |
914 | | if (y > 0 && x > 0) |
915 | | return atan(y / x); |
916 | | if (y > 0 && x < 0) |
917 | | return atan(y / x) + Pi<T>; |
918 | | if (y < 0 && x < 0) |
919 | | return atan(y / x) - Pi<T>; |
920 | | // y < 0 && x > 0 |
921 | | return atan(y / x); |
922 | | # else |
923 | | return __builtin_atan2(y, x); |
924 | | # endif |
925 | | #endif |
926 | 4.26M | } _ZN2AK12Trigonometry5atan2ITkNS_8Concepts13FloatingPointEfEET_S3_S3_ Line | Count | Source | 851 | 16.7k | { | 852 | 16.7k | CONSTEXPR_STATE(atan2, y, x); | 853 | | | 854 | 16.7k | #if ARCH(X86_64) | 855 | 16.7k | T ret; | 856 | 16.7k | asm("fpatan" | 857 | 16.7k | : "=t"(ret) | 858 | 16.7k | : "0"(x), "u"(y) | 859 | 16.7k | : "st(1)"); | 860 | 16.7k | return ret; | 861 | | #else | 862 | | # if defined(AK_OS_SERENITY) | 863 | | if (__builtin_isnan(y)) | 864 | | return y; | 865 | | if (__builtin_isnan(x)) | 866 | | return x; | 867 | | | 868 | | // SPECIAL VALUES | 869 | | // atan2(±0, -0) returns ±pi. | 870 | | if (y == 0 && x == 0 && signbit(x)) | 871 | | return copysign(Pi<T>, y); | 872 | | | 873 | | // atan2(±0, +0) returns ±0. | 874 | | if (y == 0 && x == 0 && !signbit(x)) | 875 | | return y; | 876 | | | 877 | | // atan2(±0, x) returns ±pi for x < 0. | 878 | | if (y == 0 && x < 0) | 879 | | return copysign(Pi<T>, y); | 880 | | | 881 | | // atan2(±0, x) returns ±0 for x > 0. | 882 | | if (y == 0 && x > 0) | 883 | | return y; | 884 | | | 885 | | // atan2(y, ±0) returns +pi/2 for y > 0. | 886 | | if (y > 0 && x == 0) | 887 | | return Pi<T> / 2; | 888 | | | 889 | | // atan2(y, ±0) returns -pi/2 for y < 0. | 890 | | if (y < 0 && x == 0) | 891 | | return -Pi<T> / 2; | 892 | | | 893 | | // atan2(±y, -infinity) returns ±pi for finite y > 0. | 894 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && signbit(x)) | 895 | | return copysign(Pi<T>, y); | 896 | | | 897 | | // atan2(±y, +infinity) returns ±0 for finite y > 0. | 898 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && !signbit(x)) | 899 | | return copysign(static_cast<T>(0), y); | 900 | | | 901 | | // atan2(±infinity, x) returns ±pi/2 for finite x. | 902 | | if (__builtin_isinf(y) && !__builtin_isinf(x)) | 903 | | return copysign(Pi<T> / 2, y); | 904 | | | 905 | | // atan2(±infinity, -infinity) returns ±3*pi/4. | 906 | | if (__builtin_isinf(y) && __builtin_isinf(x) && signbit(x)) | 907 | | return copysign(3 * Pi<T> / 4, y); | 908 | | | 909 | | // atan2(±infinity, +infinity) returns ±pi/4. | 910 | | if (__builtin_isinf(y) && __builtin_isinf(x) && !signbit(x)) | 911 | | return copysign(Pi<T> / 4, y); | 912 | | | 913 | | // Check quadrant, going counterclockwise. | 914 | | if (y > 0 && x > 0) | 915 | | return atan(y / x); | 916 | | if (y > 0 && x < 0) | 917 | | return atan(y / x) + Pi<T>; | 918 | | if (y < 0 && x < 0) | 919 | | return atan(y / x) - Pi<T>; | 920 | | // y < 0 && x > 0 | 921 | | return atan(y / x); | 922 | | # else | 923 | | return __builtin_atan2(y, x); | 924 | | # endif | 925 | | #endif | 926 | 16.7k | } |
_ZN2AK12Trigonometry5atan2ITkNS_8Concepts13FloatingPointEdEET_S3_S3_ Line | Count | Source | 851 | 4.24M | { | 852 | 4.24M | CONSTEXPR_STATE(atan2, y, x); | 853 | | | 854 | 4.24M | #if ARCH(X86_64) | 855 | 4.24M | T ret; | 856 | 4.24M | asm("fpatan" | 857 | 4.24M | : "=t"(ret) | 858 | 4.24M | : "0"(x), "u"(y) | 859 | 4.24M | : "st(1)"); | 860 | 4.24M | return ret; | 861 | | #else | 862 | | # if defined(AK_OS_SERENITY) | 863 | | if (__builtin_isnan(y)) | 864 | | return y; | 865 | | if (__builtin_isnan(x)) | 866 | | return x; | 867 | | | 868 | | // SPECIAL VALUES | 869 | | // atan2(±0, -0) returns ±pi. | 870 | | if (y == 0 && x == 0 && signbit(x)) | 871 | | return copysign(Pi<T>, y); | 872 | | | 873 | | // atan2(±0, +0) returns ±0. | 874 | | if (y == 0 && x == 0 && !signbit(x)) | 875 | | return y; | 876 | | | 877 | | // atan2(±0, x) returns ±pi for x < 0. | 878 | | if (y == 0 && x < 0) | 879 | | return copysign(Pi<T>, y); | 880 | | | 881 | | // atan2(±0, x) returns ±0 for x > 0. | 882 | | if (y == 0 && x > 0) | 883 | | return y; | 884 | | | 885 | | // atan2(y, ±0) returns +pi/2 for y > 0. | 886 | | if (y > 0 && x == 0) | 887 | | return Pi<T> / 2; | 888 | | | 889 | | // atan2(y, ±0) returns -pi/2 for y < 0. | 890 | | if (y < 0 && x == 0) | 891 | | return -Pi<T> / 2; | 892 | | | 893 | | // atan2(±y, -infinity) returns ±pi for finite y > 0. | 894 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && signbit(x)) | 895 | | return copysign(Pi<T>, y); | 896 | | | 897 | | // atan2(±y, +infinity) returns ±0 for finite y > 0. | 898 | | if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && !signbit(x)) | 899 | | return copysign(static_cast<T>(0), y); | 900 | | | 901 | | // atan2(±infinity, x) returns ±pi/2 for finite x. | 902 | | if (__builtin_isinf(y) && !__builtin_isinf(x)) | 903 | | return copysign(Pi<T> / 2, y); | 904 | | | 905 | | // atan2(±infinity, -infinity) returns ±3*pi/4. | 906 | | if (__builtin_isinf(y) && __builtin_isinf(x) && signbit(x)) | 907 | | return copysign(3 * Pi<T> / 4, y); | 908 | | | 909 | | // atan2(±infinity, +infinity) returns ±pi/4. | 910 | | if (__builtin_isinf(y) && __builtin_isinf(x) && !signbit(x)) | 911 | | return copysign(Pi<T> / 4, y); | 912 | | | 913 | | // Check quadrant, going counterclockwise. | 914 | | if (y > 0 && x > 0) | 915 | | return atan(y / x); | 916 | | if (y > 0 && x < 0) | 917 | | return atan(y / x) + Pi<T>; | 918 | | if (y < 0 && x < 0) | 919 | | return atan(y / x) - Pi<T>; | 920 | | // y < 0 && x > 0 | 921 | | return atan(y / x); | 922 | | # else | 923 | | return __builtin_atan2(y, x); | 924 | | # endif | 925 | | #endif | 926 | 4.24M | } |
|
927 | | |
928 | | } |
929 | | |
930 | | using Trigonometry::acos; |
931 | | using Trigonometry::asin; |
932 | | using Trigonometry::atan; |
933 | | using Trigonometry::atan2; |
934 | | using Trigonometry::cos; |
935 | | using Trigonometry::hypot; |
936 | | using Trigonometry::sin; |
937 | | using Trigonometry::sincos; |
938 | | using Trigonometry::tan; |
939 | | |
940 | | namespace Exponentials { |
941 | | |
942 | | template<FloatingPoint T> |
943 | | constexpr T log2(T x) |
944 | 4.17M | { |
945 | 4.17M | CONSTEXPR_STATE(log2, x); |
946 | | |
947 | 4.17M | #if ARCH(X86_64) |
948 | | if constexpr (IsSame<T, long double>) { |
949 | | T ret; |
950 | | asm( |
951 | | "fld1\n" |
952 | | "fxch %%st(1)\n" |
953 | | "fyl2x\n" |
954 | | : "=t"(ret) |
955 | | : "0"(x)); |
956 | | return ret; |
957 | | } |
958 | 4.17M | #endif |
959 | | // References: |
960 | | // Gist comparing some implementations |
961 | | // * https://gist.github.com/Hendiadyoin1/f58346d66637deb9156ef360aa158bf9 |
962 | | |
963 | 4.17M | if (x == 0) |
964 | 0 | return -Infinity<T>; |
965 | 4.17M | if (x <= 0 || __builtin_isnan(x)) |
966 | 0 | return NaN<T>; |
967 | | |
968 | 4.17M | auto ext = FloatExtractor<T>::from_float(x); |
969 | 4.17M | T exponent = ext.exponent - FloatExtractor<T>::exponent_bias; |
970 | | |
971 | | // When the mantissa shows 0b00 (implicitly 1.0) we are on a power of 2 |
972 | 4.17M | if (ext.mantissa == 0) |
973 | 3.60M | return exponent; |
974 | | |
975 | | // FIXME: Handle denormalized numbers separately |
976 | | |
977 | 570k | FloatExtractor<T> mantissa_ext { |
978 | 570k | .mantissa = ext.mantissa, |
979 | 570k | .exponent = FloatExtractor<T>::exponent_bias, |
980 | 570k | .sign = ext.sign |
981 | 570k | }; |
982 | | |
983 | | // (1 <= mantissa < 2) |
984 | 570k | T m = mantissa_ext.to_float(); |
985 | | |
986 | | // This is a reconstruction of one of Sun's algorithms |
987 | | // They use a transformation to lower the problem space, |
988 | | // while keeping the precision, and a 14th degree polynomial, |
989 | | // which is mirrored at sqrt(2) |
990 | | // TODO: Sun has some more algorithms for this and other math functions, |
991 | | // leveraging LUTs, investigate those, if they are better in performance and/or precision. |
992 | | // These seem to be related to crLibM's implementations, for which papers and references |
993 | | // are available. |
994 | | // FIXME: Dynamically adjust the amount of precision between floats and doubles |
995 | | // AKA floats might need less accuracy here, in comparison to doubles |
996 | | |
997 | 570k | bool inverted = false; |
998 | | // m > sqrt(2) |
999 | 570k | if (m > Sqrt2<T>) { |
1000 | 191k | inverted = true; |
1001 | 191k | m = 2 / m; |
1002 | 191k | } |
1003 | 570k | T s = (m - 1) / (m + 1); |
1004 | | // ln((1 + s) / (1 - s)) == ln(m) |
1005 | 570k | T s2 = s * s; |
1006 | | // clang-format off |
1007 | 570k | T high_approx = s2 * (static_cast<T>(0.6666666666666735130) |
1008 | 570k | + s2 * (static_cast<T>(0.3999999999940941908) |
1009 | 570k | + s2 * (static_cast<T>(0.2857142874366239149) |
1010 | 570k | + s2 * (static_cast<T>(0.2222219843214978396) |
1011 | 570k | + s2 * (static_cast<T>(0.1818357216161805012) |
1012 | 570k | + s2 * (static_cast<T>(0.1531383769920937332) |
1013 | 570k | + s2 * static_cast<T>(0.1479819860511658591))))))); |
1014 | | // clang-format on |
1015 | | |
1016 | | // ln(m) == 2 * s + s * high_approx |
1017 | | // log2(m) == log2(e) * ln(m) |
1018 | 570k | T log2_mantissa = L2_E<T> * (2 * s + s * high_approx); |
1019 | 570k | if (inverted) |
1020 | 191k | log2_mantissa = 1 - log2_mantissa; |
1021 | 570k | return exponent + log2_mantissa; |
1022 | 4.17M | } _ZN2AK12Exponentials4log2ITkNS_8Concepts13FloatingPointEfEET_S3_ Line | Count | Source | 944 | 4.17M | { | 945 | 4.17M | CONSTEXPR_STATE(log2, x); | 946 | | | 947 | 4.17M | #if ARCH(X86_64) | 948 | | if constexpr (IsSame<T, long double>) { | 949 | | T ret; | 950 | | asm( | 951 | | "fld1\n" | 952 | | "fxch %%st(1)\n" | 953 | | "fyl2x\n" | 954 | | : "=t"(ret) | 955 | | : "0"(x)); | 956 | | return ret; | 957 | | } | 958 | 4.17M | #endif | 959 | | // References: | 960 | | // Gist comparing some implementations | 961 | | // * https://gist.github.com/Hendiadyoin1/f58346d66637deb9156ef360aa158bf9 | 962 | | | 963 | 4.17M | if (x == 0) | 964 | 0 | return -Infinity<T>; | 965 | 4.17M | if (x <= 0 || __builtin_isnan(x)) | 966 | 0 | return NaN<T>; | 967 | | | 968 | 4.17M | auto ext = FloatExtractor<T>::from_float(x); | 969 | 4.17M | T exponent = ext.exponent - FloatExtractor<T>::exponent_bias; | 970 | | | 971 | | // When the mantissa shows 0b00 (implicitly 1.0) we are on a power of 2 | 972 | 4.17M | if (ext.mantissa == 0) | 973 | 3.60M | return exponent; | 974 | | | 975 | | // FIXME: Handle denormalized numbers separately | 976 | | | 977 | 570k | FloatExtractor<T> mantissa_ext { | 978 | 570k | .mantissa = ext.mantissa, | 979 | 570k | .exponent = FloatExtractor<T>::exponent_bias, | 980 | 570k | .sign = ext.sign | 981 | 570k | }; | 982 | | | 983 | | // (1 <= mantissa < 2) | 984 | 570k | T m = mantissa_ext.to_float(); | 985 | | | 986 | | // This is a reconstruction of one of Sun's algorithms | 987 | | // They use a transformation to lower the problem space, | 988 | | // while keeping the precision, and a 14th degree polynomial, | 989 | | // which is mirrored at sqrt(2) | 990 | | // TODO: Sun has some more algorithms for this and other math functions, | 991 | | // leveraging LUTs, investigate those, if they are better in performance and/or precision. | 992 | | // These seem to be related to crLibM's implementations, for which papers and references | 993 | | // are available. | 994 | | // FIXME: Dynamically adjust the amount of precision between floats and doubles | 995 | | // AKA floats might need less accuracy here, in comparison to doubles | 996 | | | 997 | 570k | bool inverted = false; | 998 | | // m > sqrt(2) | 999 | 570k | if (m > Sqrt2<T>) { | 1000 | 191k | inverted = true; | 1001 | 191k | m = 2 / m; | 1002 | 191k | } | 1003 | 570k | T s = (m - 1) / (m + 1); | 1004 | | // ln((1 + s) / (1 - s)) == ln(m) | 1005 | 570k | T s2 = s * s; | 1006 | | // clang-format off | 1007 | 570k | T high_approx = s2 * (static_cast<T>(0.6666666666666735130) | 1008 | 570k | + s2 * (static_cast<T>(0.3999999999940941908) | 1009 | 570k | + s2 * (static_cast<T>(0.2857142874366239149) | 1010 | 570k | + s2 * (static_cast<T>(0.2222219843214978396) | 1011 | 570k | + s2 * (static_cast<T>(0.1818357216161805012) | 1012 | 570k | + s2 * (static_cast<T>(0.1531383769920937332) | 1013 | 570k | + s2 * static_cast<T>(0.1479819860511658591))))))); | 1014 | | // clang-format on | 1015 | | | 1016 | | // ln(m) == 2 * s + s * high_approx | 1017 | | // log2(m) == log2(e) * ln(m) | 1018 | 570k | T log2_mantissa = L2_E<T> * (2 * s + s * high_approx); | 1019 | 570k | if (inverted) | 1020 | 191k | log2_mantissa = 1 - log2_mantissa; | 1021 | 570k | return exponent + log2_mantissa; | 1022 | 4.17M | } |
Unexecuted instantiation: _ZN2AK12Exponentials4log2ITkNS_8Concepts13FloatingPointEdEET_S3_ Unexecuted instantiation: _ZN2AK12Exponentials4log2ITkNS_8Concepts13FloatingPointEdEET_S3_ |
1023 | | |
1024 | | template<FloatingPoint T> |
1025 | | constexpr T log(T x) |
1026 | 48 | { |
1027 | 48 | CONSTEXPR_STATE(log, x); |
1028 | | |
1029 | 48 | #if ARCH(X86_64) |
1030 | 48 | T ret; |
1031 | 48 | asm( |
1032 | 48 | "fldln2\n" |
1033 | 48 | "fxch %%st(1)\n" |
1034 | 48 | "fyl2x\n" |
1035 | 48 | : "=t"(ret) |
1036 | 48 | : "0"(x)); |
1037 | 48 | return ret; |
1038 | | #elif defined(AK_OS_SERENITY) |
1039 | | // FIXME: Adjust the polynomial and formula in log2 to fit this |
1040 | | return log2<T>(x) / L2_E<T>; |
1041 | | #else |
1042 | | return __builtin_log(x); |
1043 | | #endif |
1044 | 48 | } |
1045 | | |
1046 | | template<FloatingPoint T> |
1047 | | constexpr T log10(T x) |
1048 | | { |
1049 | | CONSTEXPR_STATE(log10, x); |
1050 | | |
1051 | | #if ARCH(X86_64) |
1052 | | T ret; |
1053 | | asm( |
1054 | | "fldlg2\n" |
1055 | | "fxch %%st(1)\n" |
1056 | | "fyl2x\n" |
1057 | | : "=t"(ret) |
1058 | | : "0"(x)); |
1059 | | return ret; |
1060 | | #elif defined(AK_OS_SERENITY) |
1061 | | // FIXME: Adjust the polynomial and formula in log2 to fit this |
1062 | | return log2<T>(x) / L2_10<T>; |
1063 | | #else |
1064 | | return __builtin_log10(x); |
1065 | | #endif |
1066 | | } |
1067 | | |
1068 | | template<FloatingPoint T> |
1069 | | constexpr T exp2(T exponent) |
1070 | 4.17M | { |
1071 | 4.17M | CONSTEXPR_STATE(exp2, exponent); |
1072 | | |
1073 | 4.17M | #if ARCH(X86_64) |
1074 | 4.17M | T res; |
1075 | 4.17M | asm("fld1\n" |
1076 | 4.17M | "fld %%st(1)\n" |
1077 | 4.17M | "fprem\n" |
1078 | 4.17M | "f2xm1\n" |
1079 | 4.17M | "faddp\n" |
1080 | 4.17M | "fscale\n" |
1081 | 4.17M | "fstp %%st(1)" |
1082 | 4.17M | : "=t"(res) |
1083 | 4.17M | : "0"(exponent)); |
1084 | 4.17M | return res; |
1085 | | #else |
1086 | | // TODO: Add better implementation of this function. |
1087 | | // This is just fast exponentiation for the integer part and |
1088 | | // the first couple terms of the taylor series for the fractional part. |
1089 | | |
1090 | | if (exponent < 0) |
1091 | | return 1 / exp2(-exponent); |
1092 | | |
1093 | | if (exponent >= log2(NumericLimits<T>::max())) |
1094 | | return Infinity<T>; |
1095 | | |
1096 | | // Integer exponentiation part. |
1097 | | int int_exponent = static_cast<int>(exponent); |
1098 | | T exponent_fraction = exponent - int_exponent; |
1099 | | |
1100 | | T int_result = 1; |
1101 | | T base = 2; |
1102 | | for (;;) { |
1103 | | if (int_exponent & 1) |
1104 | | int_result *= base; |
1105 | | int_exponent >>= 1; |
1106 | | if (!int_exponent) |
1107 | | break; |
1108 | | base *= base; |
1109 | | } |
1110 | | |
1111 | | // Fractional part. |
1112 | | // Uses: |
1113 | | // exp(x) = sum(n, 0, \infty, x ** n / n!) |
1114 | | // 2**x = exp(log2(e) * x) |
1115 | | // FIXME: Pick better step size (and make it dependent on T). |
1116 | | T result = 0; |
1117 | | T power = 1; |
1118 | | T factorial = 1; |
1119 | | for (int i = 1; i < 16; ++i) { |
1120 | | result += power / factorial; |
1121 | | power *= exponent_fraction / L2_E<T>; |
1122 | | factorial *= i; |
1123 | | } |
1124 | | |
1125 | | return int_result * result; |
1126 | | #endif |
1127 | 4.17M | } _ZN2AK12Exponentials4exp2ITkNS_8Concepts13FloatingPointEfEET_S3_ Line | Count | Source | 1070 | 4.17M | { | 1071 | 4.17M | CONSTEXPR_STATE(exp2, exponent); | 1072 | | | 1073 | 4.17M | #if ARCH(X86_64) | 1074 | 4.17M | T res; | 1075 | 4.17M | asm("fld1\n" | 1076 | 4.17M | "fld %%st(1)\n" | 1077 | 4.17M | "fprem\n" | 1078 | 4.17M | "f2xm1\n" | 1079 | 4.17M | "faddp\n" | 1080 | 4.17M | "fscale\n" | 1081 | 4.17M | "fstp %%st(1)" | 1082 | 4.17M | : "=t"(res) | 1083 | 4.17M | : "0"(exponent)); | 1084 | 4.17M | return res; | 1085 | | #else | 1086 | | // TODO: Add better implementation of this function. | 1087 | | // This is just fast exponentiation for the integer part and | 1088 | | // the first couple terms of the taylor series for the fractional part. | 1089 | | | 1090 | | if (exponent < 0) | 1091 | | return 1 / exp2(-exponent); | 1092 | | | 1093 | | if (exponent >= log2(NumericLimits<T>::max())) | 1094 | | return Infinity<T>; | 1095 | | | 1096 | | // Integer exponentiation part. | 1097 | | int int_exponent = static_cast<int>(exponent); | 1098 | | T exponent_fraction = exponent - int_exponent; | 1099 | | | 1100 | | T int_result = 1; | 1101 | | T base = 2; | 1102 | | for (;;) { | 1103 | | if (int_exponent & 1) | 1104 | | int_result *= base; | 1105 | | int_exponent >>= 1; | 1106 | | if (!int_exponent) | 1107 | | break; | 1108 | | base *= base; | 1109 | | } | 1110 | | | 1111 | | // Fractional part. | 1112 | | // Uses: | 1113 | | // exp(x) = sum(n, 0, \infty, x ** n / n!) | 1114 | | // 2**x = exp(log2(e) * x) | 1115 | | // FIXME: Pick better step size (and make it dependent on T). | 1116 | | T result = 0; | 1117 | | T power = 1; | 1118 | | T factorial = 1; | 1119 | | for (int i = 1; i < 16; ++i) { | 1120 | | result += power / factorial; | 1121 | | power *= exponent_fraction / L2_E<T>; | 1122 | | factorial *= i; | 1123 | | } | 1124 | | | 1125 | | return int_result * result; | 1126 | | #endif | 1127 | 4.17M | } |
Unexecuted instantiation: _ZN2AK12Exponentials4exp2ITkNS_8Concepts13FloatingPointEdEET_S3_ Unexecuted instantiation: _ZN2AK12Exponentials4exp2ITkNS_8Concepts13FloatingPointEdEET_S3_ |
1128 | | |
1129 | | template<FloatingPoint T> |
1130 | | constexpr T exp(T exponent) |
1131 | 0 | { |
1132 | 0 | CONSTEXPR_STATE(exp, exponent); |
1133 | |
|
1134 | 0 | #if ARCH(X86_64) |
1135 | 0 | T res; |
1136 | 0 | asm("fldl2e\n" |
1137 | 0 | "fmulp\n" |
1138 | 0 | "fld1\n" |
1139 | 0 | "fld %%st(1)\n" |
1140 | 0 | "fprem\n" |
1141 | 0 | "f2xm1\n" |
1142 | 0 | "faddp\n" |
1143 | 0 | "fscale\n" |
1144 | 0 | "fstp %%st(1)" |
1145 | 0 | : "=t"(res) |
1146 | 0 | : "0"(exponent)); |
1147 | 0 | return res; |
1148 | | #else |
1149 | | // TODO: Add better implementation of this function. |
1150 | | return exp2(exponent * L2_E<T>); |
1151 | | #endif |
1152 | 0 | } Unexecuted instantiation: _ZN2AK12Exponentials3expITkNS_8Concepts13FloatingPointEfEET_S3_ Unexecuted instantiation: _ZN2AK12Exponentials3expITkNS_8Concepts13FloatingPointEfEET_S3_ |
1153 | | |
1154 | | } |
1155 | | |
1156 | | using Exponentials::exp; |
1157 | | using Exponentials::exp2; |
1158 | | using Exponentials::log; |
1159 | | using Exponentials::log10; |
1160 | | using Exponentials::log2; |
1161 | | |
1162 | | namespace Hyperbolic { |
1163 | | |
1164 | | template<FloatingPoint T> |
1165 | | constexpr T sinh(T x) |
1166 | | { |
1167 | | T exponentiated = exp<T>(x); |
1168 | | if (x > 0) |
1169 | | return (exponentiated * exponentiated - 1) / 2 / exponentiated; |
1170 | | return (exponentiated - 1 / exponentiated) / 2; |
1171 | | } |
1172 | | |
1173 | | template<FloatingPoint T> |
1174 | | constexpr T cosh(T x) |
1175 | | { |
1176 | | CONSTEXPR_STATE(cosh, x); |
1177 | | |
1178 | | T exponentiated = exp(-x); |
1179 | | if (x < 0) |
1180 | | return (1 + exponentiated * exponentiated) / 2 / exponentiated; |
1181 | | return (1 / exponentiated + exponentiated) / 2; |
1182 | | } |
1183 | | |
1184 | | template<FloatingPoint T> |
1185 | | constexpr T tanh(T x) |
1186 | | { |
1187 | | if (x > 0) { |
1188 | | T exponentiated = exp<T>(2 * x); |
1189 | | return (exponentiated - 1) / (exponentiated + 1); |
1190 | | } |
1191 | | T plusX = exp<T>(x); |
1192 | | T minusX = 1 / plusX; |
1193 | | return (plusX - minusX) / (plusX + minusX); |
1194 | | } |
1195 | | |
1196 | | template<FloatingPoint T> |
1197 | | constexpr T asinh(T x) |
1198 | | { |
1199 | | return log<T>(x + sqrt<T>(x * x + 1)); |
1200 | | } |
1201 | | |
1202 | | template<FloatingPoint T> |
1203 | | constexpr T acosh(T x) |
1204 | | { |
1205 | | return log<T>(x + sqrt<T>(x * x - 1)); |
1206 | | } |
1207 | | |
1208 | | template<FloatingPoint T> |
1209 | | constexpr T atanh(T x) |
1210 | | { |
1211 | | return log<T>((1 + x) / (1 - x)) / (T)2.0l; |
1212 | | } |
1213 | | |
1214 | | } |
1215 | | |
1216 | | using Hyperbolic::acosh; |
1217 | | using Hyperbolic::asinh; |
1218 | | using Hyperbolic::atanh; |
1219 | | using Hyperbolic::cosh; |
1220 | | using Hyperbolic::sinh; |
1221 | | using Hyperbolic::tanh; |
1222 | | |
1223 | | // Calculate x^y with fast exponentiation when the power is a natural number. |
1224 | | template<FloatingPoint F, Unsigned U> |
1225 | | constexpr F pow_int(F x, U y) |
1226 | 148k | { |
1227 | 148k | auto result = static_cast<F>(1); |
1228 | 870k | while (y > 0) { |
1229 | 721k | if (y % 2 == 1) { |
1230 | 459k | result *= x; |
1231 | 459k | } |
1232 | 721k | x = x * x; |
1233 | 721k | y >>= 1; |
1234 | 721k | } |
1235 | 148k | return result; |
1236 | 148k | } _ZN2AK7pow_intITkNS_8Concepts13FloatingPointEfTkNS1_8UnsignedEmEET_S2_T0_ Line | Count | Source | 1226 | 148k | { | 1227 | 148k | auto result = static_cast<F>(1); | 1228 | 870k | while (y > 0) { | 1229 | 721k | if (y % 2 == 1) { | 1230 | 459k | result *= x; | 1231 | 459k | } | 1232 | 721k | x = x * x; | 1233 | 721k | y >>= 1; | 1234 | 721k | } | 1235 | 148k | return result; | 1236 | 148k | } |
Unexecuted instantiation: _ZN2AK7pow_intITkNS_8Concepts13FloatingPointEdTkNS1_8UnsignedEmEET_S2_T0_ |
1237 | | |
1238 | | template<FloatingPoint T> |
1239 | | constexpr T pow(T x, T y) |
1240 | 16.5M | { |
1241 | 16.5M | CONSTEXPR_STATE(pow, x, y); |
1242 | 16.5M | if (__builtin_isnan(y)) |
1243 | 0 | return y; |
1244 | 16.5M | if (y == 0) |
1245 | 6.34k | return 1; |
1246 | 16.5M | if (x == 0) |
1247 | 12.2M | return 0; |
1248 | 4.32M | if (y == 1) |
1249 | 3.75k | return x; |
1250 | | |
1251 | | // Take an integer fast path as long as the value fits within a 64-bit integer. |
1252 | 4.32M | if (y >= static_cast<T>(NumericLimits<i64>::min()) && y < static_cast<T>(NumericLimits<i64>::max())) { |
1253 | 4.32M | i64 y_as_int = static_cast<i64>(y); |
1254 | 4.32M | if (y == static_cast<T>(y_as_int)) { |
1255 | 148k | T result = pow_int(x, static_cast<u64>(fabs<T>(y))); |
1256 | 148k | if (y < 0) |
1257 | 118k | result = static_cast<T>(1.0l) / result; |
1258 | 148k | return result; |
1259 | 148k | } |
1260 | 4.32M | } |
1261 | | |
1262 | | // FIXME: This formula suffers from error magnification. |
1263 | 4.17M | return exp2<T>(y * log2<T>(x)); |
1264 | 4.32M | } _ZN2AK3powITkNS_8Concepts13FloatingPointEfEET_S2_S2_ Line | Count | Source | 1240 | 16.5M | { | 1241 | 16.5M | CONSTEXPR_STATE(pow, x, y); | 1242 | 16.5M | if (__builtin_isnan(y)) | 1243 | 0 | return y; | 1244 | 16.5M | if (y == 0) | 1245 | 6.34k | return 1; | 1246 | 16.5M | if (x == 0) | 1247 | 12.2M | return 0; | 1248 | 4.32M | if (y == 1) | 1249 | 3.75k | return x; | 1250 | | | 1251 | | // Take an integer fast path as long as the value fits within a 64-bit integer. | 1252 | 4.32M | if (y >= static_cast<T>(NumericLimits<i64>::min()) && y < static_cast<T>(NumericLimits<i64>::max())) { | 1253 | 4.32M | i64 y_as_int = static_cast<i64>(y); | 1254 | 4.32M | if (y == static_cast<T>(y_as_int)) { | 1255 | 148k | T result = pow_int(x, static_cast<u64>(fabs<T>(y))); | 1256 | 148k | if (y < 0) | 1257 | 118k | result = static_cast<T>(1.0l) / result; | 1258 | 148k | return result; | 1259 | 148k | } | 1260 | 4.32M | } | 1261 | | | 1262 | | // FIXME: This formula suffers from error magnification. | 1263 | 4.17M | return exp2<T>(y * log2<T>(x)); | 1264 | 4.32M | } |
Unexecuted instantiation: _ZN2AK3powITkNS_8Concepts13FloatingPointEdEET_S2_S2_ Unexecuted instantiation: _ZN2AK3powITkNS_8Concepts13FloatingPointEdEET_S2_S2_ |
1265 | | |
1266 | | template<Integral I, typename T> |
1267 | | constexpr I clamp_to(T value) |
1268 | 0 | { |
1269 | 0 | constexpr auto max = static_cast<T>(NumericLimits<I>::max()); |
1270 | 0 | if constexpr (max > 0) { |
1271 | 0 | if (value >= static_cast<T>(NumericLimits<I>::max())) |
1272 | 0 | return NumericLimits<I>::max(); |
1273 | 0 | } |
1274 | | |
1275 | 0 | constexpr auto min = static_cast<T>(NumericLimits<I>::min()); |
1276 | 0 | if constexpr (min <= 0) { |
1277 | 0 | if (value <= static_cast<T>(NumericLimits<I>::min())) |
1278 | 0 | return NumericLimits<I>::min(); |
1279 | 0 | } |
1280 | | |
1281 | | if constexpr (IsFloatingPoint<T>) |
1282 | 0 | return round_to<I>(value); |
1283 | | |
1284 | 0 | return value; |
1285 | 0 | } Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEldEET_T0_ Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralElmEET_T0_ Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEhdEET_T0_ Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEilEET_T0_ Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEifEET_T0_ Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEidEET_T0_ |
1286 | | |
1287 | | // Wrap a to keep it in the range [-b, b]. |
1288 | | template<typename T> |
1289 | | constexpr T wrap_to_range(T a, IdentityType<T> b) |
1290 | | { |
1291 | | return fmod(fmod(a + b, 2 * b) + 2 * b, 2 * b) - b; |
1292 | | } |
1293 | | |
1294 | | #undef CONSTEXPR_STATE |
1295 | | #undef AARCH64_INSTRUCTION |
1296 | | } |
1297 | | |
1298 | | #if USING_AK_GLOBALLY |
1299 | | using AK::round_to; |
1300 | | #endif |