Coverage Report

Created: 2026-06-07 07:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/serenity/AK/Math.h
Line
Count
Source
1
/*
2
 * Copyright (c) 2021, Leon Albrecht <leon2002.la@gmail.com>.
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#pragma once
8
9
#include <AK/BuiltinWrappers.h>
10
#include <AK/Concepts.h>
11
#include <AK/FloatingPoint.h>
12
#include <AK/NumericLimits.h>
13
#include <AK/StdLibExtraDetails.h>
14
#include <AK/Types.h>
15
#include <math.h>
16
17
#ifdef KERNEL
18
#    error "Including AK/Math.h from the Kernel is never correct! Floating point is disabled."
19
#endif
20
21
namespace AK {
22
23
template<FloatingPoint T>
24
constexpr T NaN = __builtin_nan("");
25
template<FloatingPoint T>
26
constexpr T Infinity = __builtin_huge_vall();
27
template<FloatingPoint T>
28
constexpr T Pi = 3.141592653589793238462643383279502884L;
29
template<FloatingPoint T>
30
constexpr T E = 2.718281828459045235360287471352662498L;
31
template<FloatingPoint T>
32
constexpr T Sqrt2 = 1.414213562373095048801688724209698079L;
33
template<FloatingPoint T>
34
constexpr T Sqrt1_2 = 0.707106781186547524400844362104849039L;
35
36
template<FloatingPoint T>
37
constexpr T L2_10 = 3.321928094887362347870319429489390175864L;
38
template<FloatingPoint T>
39
constexpr T L2_E = 1.442695040888963407359924681001892137L;
40
41
namespace Details {
42
template<size_t>
43
constexpr size_t product_even();
44
template<>
45
0
constexpr size_t product_even<2>() { return 2; }
46
template<size_t value>
47
0
constexpr size_t product_even() { return value * product_even<value - 2>(); }
Unexecuted instantiation: unsigned long AK::Details::product_even<4ul>()
Unexecuted instantiation: unsigned long AK::Details::product_even<6ul>()
Unexecuted instantiation: unsigned long AK::Details::product_even<8ul>()
Unexecuted instantiation: unsigned long AK::Details::product_even<10ul>()
Unexecuted instantiation: unsigned long AK::Details::product_even<12ul>()
Unexecuted instantiation: unsigned long AK::Details::product_even<14ul>()
Unexecuted instantiation: unsigned long AK::Details::product_even<16ul>()
48
49
template<size_t>
50
constexpr size_t product_odd();
51
template<>
52
0
constexpr size_t product_odd<1>() { return 1; }
53
template<size_t value>
54
0
constexpr size_t product_odd() { return value * product_odd<value - 2>(); }
Unexecuted instantiation: unsigned long AK::Details::product_odd<3ul>()
Unexecuted instantiation: unsigned long AK::Details::product_odd<5ul>()
Unexecuted instantiation: unsigned long AK::Details::product_odd<7ul>()
Unexecuted instantiation: unsigned long AK::Details::product_odd<9ul>()
Unexecuted instantiation: unsigned long AK::Details::product_odd<11ul>()
Unexecuted instantiation: unsigned long AK::Details::product_odd<13ul>()
Unexecuted instantiation: unsigned long AK::Details::product_odd<15ul>()
55
}
56
57
template<FloatingPoint T>
58
constexpr T to_radians(T degrees)
59
0
{
60
0
    return degrees * AK::Pi<T> / 180;
61
0
}
Unexecuted instantiation: _ZN2AK10to_radiansITkNS_8Concepts13FloatingPointEfEET_S2_
Unexecuted instantiation: _ZN2AK10to_radiansITkNS_8Concepts13FloatingPointEdEET_S2_
62
63
template<FloatingPoint T>
64
constexpr T to_degrees(T radians)
65
0
{
66
0
    return radians * 180 / AK::Pi<T>;
67
0
}
Unexecuted instantiation: _ZN2AK10to_degreesITkNS_8Concepts13FloatingPointEfEET_S2_
Unexecuted instantiation: _ZN2AK10to_degreesITkNS_8Concepts13FloatingPointEdEET_S2_
68
69
template<FloatingPoint FloatT>
70
FloatT copysign(FloatT x, FloatT y)
71
{
72
    using Extractor = FloatExtractor<FloatT>;
73
    auto ex = Extractor::from_float(x);
74
    auto ey = Extractor::from_float(y);
75
    ex.sign = ey.sign;
76
    return ex.to_float();
77
}
78
79
#define CALL_BUILTIN(function, args...)           \
80
136k
    do {                                          \
81
136k
        if constexpr (IsSame<T, long double>)     \
82
136k
            return __builtin_##function##l(args); \
83
136k
        if constexpr (IsSame<T, double>)          \
84
136k
            return __builtin_##function(args);    \
85
136k
        if constexpr (IsSame<T, float>)           \
86
136k
            return __builtin_##function##f(args); \
87
136k
    } while (0)
88
89
#define CONSTEXPR_STATE(function, args...) \
90
113M
    if (is_constant_evaluated())           \
91
113M
        CALL_BUILTIN(function, args);
92
93
#define AARCH64_INSTRUCTION(instruction, arg) \
94
    if constexpr (IsSame<T, long double>)     \
95
        TODO();                               \
96
    if constexpr (IsSame<T, double>) {        \
97
        double res;                           \
98
        asm(#instruction " %d0, %d1"          \
99
            : "=w"(res)                       \
100
            : "w"(arg));                      \
101
        return res;                           \
102
    }                                         \
103
    if constexpr (IsSame<T, float>) {         \
104
        float res;                            \
105
        asm(#instruction " %s0, %s1"          \
106
            : "=w"(res)                       \
107
            : "w"(arg));                      \
108
        return res;                           \
109
    }
110
111
template<FloatingPoint T>
112
constexpr T fabs(T x)
113
132k
{
114
    // Both GCC and Clang inline fabs by default, so this is just a cmath like wrapper
115
132k
    CALL_BUILTIN(fabs, x);
116
132k
}
_ZN2AK4fabsITkNS_8Concepts13FloatingPointEfEET_S2_
Line
Count
Source
113
132k
{
114
    // Both GCC and Clang inline fabs by default, so this is just a cmath like wrapper
115
132k
    CALL_BUILTIN(fabs, x);
116
132k
}
Unexecuted instantiation: _ZN2AK4fabsITkNS_8Concepts13FloatingPointEdEET_S2_
Unexecuted instantiation: _ZN2AK4fabsITkNS_8Concepts13FloatingPointEdEET_S2_
117
118
namespace Rounding {
119
template<FloatingPoint T>
120
constexpr T ceil(T num)
121
3.39k
{
122
    // FIXME: SSE4.1 rounds[sd] num, res, 0b110
123
3.39k
    if (is_constant_evaluated()) {
124
0
        if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max())
125
0
            return num;
126
0
        return (static_cast<T>(static_cast<i64>(num)) == num)
127
0
            ? static_cast<i64>(num)
128
0
            : static_cast<i64>(num) + ((num > 0) ? 1 : 0);
129
0
    }
130
#if ARCH(AARCH64)
131
    AARCH64_INSTRUCTION(frintp, num);
132
#else
133
3.39k
    CALL_BUILTIN(ceil, num);
134
3.39k
#endif
135
3.39k
}
_ZN2AK8Rounding4ceilITkNS_8Concepts13FloatingPointEfEET_S3_
Line
Count
Source
121
3.39k
{
122
    // FIXME: SSE4.1 rounds[sd] num, res, 0b110
123
3.39k
    if (is_constant_evaluated()) {
124
0
        if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max())
125
0
            return num;
126
0
        return (static_cast<T>(static_cast<i64>(num)) == num)
127
0
            ? static_cast<i64>(num)
128
0
            : static_cast<i64>(num) + ((num > 0) ? 1 : 0);
129
0
    }
130
#if ARCH(AARCH64)
131
    AARCH64_INSTRUCTION(frintp, num);
132
#else
133
3.39k
    CALL_BUILTIN(ceil, num);
134
3.39k
#endif
135
3.39k
}
Unexecuted instantiation: _ZN2AK8Rounding4ceilITkNS_8Concepts13FloatingPointEdEET_S3_
136
137
template<FloatingPoint T>
138
constexpr T floor(T num)
139
0
{
140
    // FIXME: SSE4.1 rounds[sd] num, res, 0b101
141
0
    if (is_constant_evaluated()) {
142
0
        if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max())
143
0
            return num;
144
0
        return (static_cast<T>(static_cast<i64>(num)) == num)
145
0
            ? static_cast<i64>(num)
146
0
            : static_cast<i64>(num) - ((num > 0) ? 0 : 1);
147
0
    }
148
#if ARCH(AARCH64)
149
    AARCH64_INSTRUCTION(frintm, num);
150
#else
151
0
    CALL_BUILTIN(floor, num);
152
0
#endif
153
0
}
154
155
template<FloatingPoint T>
156
constexpr T trunc(T num)
157
{
158
#if ARCH(AARCH64)
159
    if (is_constant_evaluated()) {
160
        if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max())
161
            return num;
162
        return static_cast<T>(static_cast<i64>(num));
163
    }
164
    AARCH64_INSTRUCTION(frintz, num);
165
#endif
166
    // FIXME: Use dedicated instruction in the non constexpr case
167
    //        SSE4.1: rounds[sd] %num, %res, 0b111
168
    if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max())
169
        return num;
170
    return static_cast<T>(static_cast<i64>(num));
171
}
172
173
template<FloatingPoint T>
174
constexpr T rint(T x)
175
{
176
    CONSTEXPR_STATE(rint, x);
177
    // Note: This does break tie to even
178
    //       But the behavior of frndint/rounds[ds]/frintx can be configured
179
    //       through the floating point control registers.
180
    // FIXME: We should decide if we rename this to allow us to get away from
181
    //        the configurability "burden" rint has
182
    //        this would make us use `rounds[sd] %num, %res, 0b100`
183
    //        and `frintn` respectively,
184
    //        no such guaranteed round exists for x87 `frndint`
185
#if ARCH(X86_64)
186
#    ifdef __SSE4_1__
187
    if constexpr (IsSame<T, double>) {
188
        T r;
189
        asm(
190
            "roundsd %1, %0"
191
            : "=x"(r)
192
            : "x"(x));
193
        return r;
194
    }
195
    if constexpr (IsSame<T, float>) {
196
        T r;
197
        asm(
198
            "roundss %1, %0"
199
            : "=x"(r)
200
            : "x"(x));
201
        return r;
202
    }
203
#    else
204
    asm(
205
        "frndint"
206
        : "+t"(x));
207
    return x;
208
#    endif
209
#elif ARCH(AARCH64)
210
    AARCH64_INSTRUCTION(frintx, x);
211
#elif ARCH(RISCV64)
212
    if (__builtin_isnan(x))
213
        return x;
214
215
    // Floating point values have a gap size of >= 1 for values above 2^mantissa_bits - 1.
216
    if (fabs(x) > FloatExtractor<T>::mantissa_max)
217
        return x;
218
219
    if constexpr (IsSame<T, float>) {
220
        i64 r;
221
        asm("fcvt.l.s %0, %1, dyn"
222
            : "=r"(r)
223
            : "f"(x));
224
        return copysign(static_cast<float>(r), x);
225
    }
226
    if constexpr (IsSame<T, double>) {
227
        i64 r;
228
        asm("fcvt.l.d %0, %1, dyn"
229
            : "=r"(r)
230
            : "f"(x));
231
        return copysign(static_cast<double>(r), x);
232
    }
233
    if constexpr (IsSame<T, long double>)
234
        TODO_RISCV64();
235
#endif
236
    TODO();
237
}
238
239
template<FloatingPoint T>
240
constexpr T round(T x)
241
0
{
242
0
    CONSTEXPR_STATE(round, x);
243
0
    // Note: This is break-tie-away-from-zero, so not the hw's understanding of
244
0
    //       "nearest", which would be towards even.
245
0
    if (x == 0.)
246
0
        return x;
247
0
    if (x > 0.)
248
0
        return floor(x + .5);
249
0
    return ceil(x - .5);
250
0
}
251
252
template<Integral I, FloatingPoint P>
253
ALWAYS_INLINE I round_to(P value);
254
255
#if ARCH(X86_64)
256
template<Integral I>
257
ALWAYS_INLINE I round_to(long double value)
258
{
259
    // Note: fistps outputs into a signed integer location (i16, i32, i64),
260
    //       so lets be nice and tell the compiler that.
261
    Conditional<sizeof(I) >= sizeof(i16), MakeSigned<I>, i16> ret;
262
    if constexpr (sizeof(I) == sizeof(i64)) {
263
        asm("fistpll %0"
264
            : "=m"(ret)
265
            : "t"(value)
266
            : "st");
267
    } else if constexpr (sizeof(I) == sizeof(i32)) {
268
        asm("fistpl %0"
269
            : "=m"(ret)
270
            : "t"(value)
271
            : "st");
272
    } else {
273
        asm("fistps %0"
274
            : "=m"(ret)
275
            : "t"(value)
276
            : "st");
277
    }
278
    return static_cast<I>(ret);
279
}
280
281
template<Integral I>
282
ALWAYS_INLINE I round_to(float value)
283
190M
{
284
    // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set,
285
    //        if the value surpasses the i64 limit, even if the result could fit into an u64
286
    //        To solve this we would either need to detect that value or do a range check and
287
    //        then do a more specialized conversion, which might include a division (which is expensive)
288
    if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) {
289
        i64 ret;
290
        asm("cvtss2si %1, %0"
291
            : "=r"(ret)
292
            : "xm"(value));
293
        return static_cast<I>(ret);
294
    }
295
190M
    i32 ret;
296
190M
    asm("cvtss2si %1, %0"
297
190M
        : "=r"(ret)
298
190M
        : "xm"(value));
299
190M
    return static_cast<I>(ret);
300
190M
}
_ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEhEET_f
Line
Count
Source
283
190M
{
284
    // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set,
285
    //        if the value surpasses the i64 limit, even if the result could fit into an u64
286
    //        To solve this we would either need to detect that value or do a range check and
287
    //        then do a more specialized conversion, which might include a division (which is expensive)
288
    if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) {
289
        i64 ret;
290
        asm("cvtss2si %1, %0"
291
            : "=r"(ret)
292
            : "xm"(value));
293
        return static_cast<I>(ret);
294
    }
295
190M
    i32 ret;
296
190M
    asm("cvtss2si %1, %0"
297
190M
        : "=r"(ret)
298
190M
        : "xm"(value));
299
190M
    return static_cast<I>(ret);
300
190M
}
_ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEiEET_f
Line
Count
Source
283
15.3k
{
284
    // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set,
285
    //        if the value surpasses the i64 limit, even if the result could fit into an u64
286
    //        To solve this we would either need to detect that value or do a range check and
287
    //        then do a more specialized conversion, which might include a division (which is expensive)
288
    if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) {
289
        i64 ret;
290
        asm("cvtss2si %1, %0"
291
            : "=r"(ret)
292
            : "xm"(value));
293
        return static_cast<I>(ret);
294
    }
295
15.3k
    i32 ret;
296
15.3k
    asm("cvtss2si %1, %0"
297
15.3k
        : "=r"(ret)
298
15.3k
        : "xm"(value));
299
15.3k
    return static_cast<I>(ret);
300
15.3k
}
Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEsEET_f
301
302
template<Integral I>
303
ALWAYS_INLINE I round_to(double value)
304
0
{
305
    // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set,
306
    //        if the value surpasses the i64 limit, even if the result could fit into an u64
307
    //        To solve this we would either need to detect that value or do a range check and
308
    //        then do a more specialized conversion, which might include a division (which is expensive)
309
0
    if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) {
310
0
        i64 ret;
311
0
        asm("cvtsd2si %1, %0"
312
0
            : "=r"(ret)
313
0
            : "xm"(value));
314
0
        return static_cast<I>(ret);
315
0
    }
316
0
    i32 ret;
317
0
    asm("cvtsd2si %1, %0"
318
0
        : "=r"(ret)
319
0
        : "xm"(value));
320
0
    return static_cast<I>(ret);
321
0
}
Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEiEET_d
Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralElEET_d
Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEhEET_d
Unexecuted instantiation: _ZN2AK8Rounding8round_toITkNS_8Concepts8IntegralEmEET_d
322
323
#elif ARCH(AARCH64)
324
template<SignedIntegral I>
325
ALWAYS_INLINE I round_to(float value)
326
{
327
    if constexpr (sizeof(I) <= sizeof(u32)) {
328
        i32 res;
329
        asm("fcvtns %w0, %s1"
330
            : "=r"(res)
331
            : "w"(value));
332
        return static_cast<I>(res);
333
    }
334
    i64 res;
335
    asm("fcvtns %0, %s1"
336
        : "=r"(res)
337
        : "w"(value));
338
    return static_cast<I>(res);
339
}
340
341
template<SignedIntegral I>
342
ALWAYS_INLINE I round_to(double value)
343
{
344
    if constexpr (sizeof(I) <= sizeof(u32)) {
345
        i32 res;
346
        asm("fcvtns %w0, %d1"
347
            : "=r"(res)
348
            : "w"(value));
349
        return static_cast<I>(res);
350
    }
351
    i64 res;
352
    asm("fcvtns %0, %d1"
353
        : "=r"(res)
354
        : "w"(value));
355
    return static_cast<I>(res);
356
}
357
358
template<UnsignedIntegral U>
359
ALWAYS_INLINE U round_to(float value)
360
{
361
    if constexpr (sizeof(U) <= sizeof(u32)) {
362
        u32 res;
363
        asm("fcvtnu %w0, %s1"
364
            : "=r"(res)
365
            : "w"(value));
366
        return static_cast<U>(res);
367
    }
368
    i64 res;
369
    asm("fcvtnu %0, %s1"
370
        : "=r"(res)
371
        : "w"(value));
372
    return static_cast<U>(res);
373
}
374
375
template<UnsignedIntegral U>
376
ALWAYS_INLINE U round_to(double value)
377
{
378
    if constexpr (sizeof(U) <= sizeof(u32)) {
379
        u32 res;
380
        asm("fcvtns %w0, %d1"
381
            : "=r"(res)
382
            : "w"(value));
383
        return static_cast<U>(res);
384
    }
385
    i64 res;
386
    asm("fcvtns %0, %d1"
387
        : "=r"(res)
388
        : "w"(value));
389
    return static_cast<U>(res);
390
}
391
392
#else
393
template<Integral I, FloatingPoint P>
394
ALWAYS_INLINE I round_to(P value)
395
{
396
    if constexpr (IsSame<P, long double>)
397
        return static_cast<I>(__builtin_llrintl(value));
398
    if constexpr (IsSame<P, double>)
399
        return static_cast<I>(__builtin_llrint(value));
400
    if constexpr (IsSame<P, float>)
401
        return static_cast<I>(__builtin_llrintf(value));
402
}
403
#endif
404
405
}
406
407
using Rounding::ceil;
408
using Rounding::floor;
409
using Rounding::rint;
410
using Rounding::round;
411
using Rounding::round_to;
412
using Rounding::trunc;
413
414
namespace Division {
415
template<FloatingPoint T>
416
constexpr T fmod(T x, T y)
417
{
418
    CONSTEXPR_STATE(fmod, x, y);
419
420
#if ARCH(X86_64)
421
    u16 fpu_status;
422
    do {
423
        asm(
424
            "fprem\n"
425
            "fnstsw %%ax\n"
426
            : "+t"(x), "=a"(fpu_status)
427
            : "u"(y));
428
    } while (fpu_status & 0x400);
429
    return x;
430
#else
431
#    if defined(AK_OS_SERENITY)
432
    // FIXME: This is a very naive implementation.
433
434
    if (__builtin_isnan(x))
435
        return x;
436
    if (__builtin_isnan(y))
437
        return y;
438
439
    // SPECIAL VALUES
440
    //      fmod(±0, y) returns ±0 if y is neither 0 nor NaN.
441
    if (x == 0 && y != 0)
442
        return x;
443
444
    //      fmod(x, y) returns a NaN and raises the "invalid" floating-point exception for x infinite or y zero.
445
    // FIXME: Exception.
446
    if (__builtin_isinf(x) || y == 0)
447
        return NaN<T>;
448
449
    //      fmod(x, ±infinity) returns x for x not infinite.
450
    if (__builtin_isinf(y))
451
        return x;
452
453
    // Range reduction: handle negative x and y.
454
    if (y < 0)
455
        return fmod(x, -y);
456
    if (x < 0)
457
        return -fmod(-x, y);
458
459
    // x is x_mantissa * 2**x_exponent, y is y_mantissa * 2**y_exponent.
460
    // (Both are positive at this point.)
461
    // If y_exponent > x_exponent, we are done and can return x.
462
    // If y_exponent == x_exponent, we can return (x_mantissa % y_mantissa) * 2**x_exponent.
463
    // If y_exponent < x_exponent, we'll iteratively reduce x_exponent by shifting from
464
    // the exponent into the mantissa.
465
466
    auto x_bits = FloatExtractor<T>::from_float(x);
467
    typename FloatExtractor<T>::ComponentType x_exponent = x_bits.exponent;
468
469
    auto y_bits = FloatExtractor<T>::from_float(y);
470
    typename FloatExtractor<T>::ComponentType y_exponent = y_bits.exponent;
471
472
    // FIXME: Handle denormals. For now, treat them as 0.
473
    if (x_exponent == 0 && y_exponent != 0)
474
        return 0;
475
    if (y_exponent == 0)
476
        return NaN<T>;
477
478
    if (y_exponent > x_exponent)
479
        return x;
480
481
    // FIXME: This is wrong for f80.
482
    typename FloatExtractor<T>::ComponentType implicit_mantissa_bit = 1;
483
    implicit_mantissa_bit <<= FloatExtractor<T>::mantissa_bits;
484
485
    typename FloatExtractor<T>::ComponentType x_mantissa = x_bits.mantissa | implicit_mantissa_bit;
486
    typename FloatExtractor<T>::ComponentType y_mantissa = y_bits.mantissa | implicit_mantissa_bit;
487
488
    while (y_exponent < x_exponent) {
489
        // This is ok because (x % (y * 2**n)) divides (x % y) for all n > 0.
490
        x_mantissa %= y_mantissa;
491
492
        x_mantissa <<= 1;
493
        --x_exponent;
494
    }
495
496
    x_mantissa %= y_mantissa;
497
498
    if (x_mantissa == 0)
499
        return 0;
500
501
    // We're done and want to return x_mantissa * 2 ** x_exponent.
502
    // But x_mantissa might not have a leading 1 bit, so we have to realign first.
503
    // Mantissa is mantissa_bits long, count_leading_zeroes() counts in ComponentType, adjust:
504
    auto const always_zero_bits = sizeof(typename FloatExtractor<T>::ComponentType) * 8 - (FloatExtractor<T>::mantissa_bits + 1); // +1 for implicit 1 bit
505
    auto shift = count_leading_zeroes(x_mantissa) - always_zero_bits;
506
507
    if (x_exponent < shift) {
508
        // FIXME: Make a real denormal.
509
        return 0;
510
    }
511
512
    x_mantissa <<= shift;
513
    x_exponent -= shift;
514
515
    x_bits.exponent = x_exponent;
516
    x_bits.mantissa = x_mantissa;
517
    return x_bits.to_float();
518
#    else
519
    CALL_BUILTIN(fmod, x, y);
520
#    endif
521
#endif
522
}
523
524
template<FloatingPoint T>
525
constexpr T remainder(T x, T y)
526
{
527
    CONSTEXPR_STATE(remainder, x, y);
528
529
#if ARCH(X86_64)
530
    u16 fpu_status;
531
    do {
532
        asm(
533
            "fprem1\n"
534
            "fnstsw %%ax\n"
535
            : "+t"(x), "=a"(fpu_status)
536
            : "u"(y));
537
    } while (fpu_status & 0x400);
538
    return x;
539
#else
540
#    if defined(AK_OS_SERENITY)
541
    // TODO: Add implementation for this function.
542
    TODO();
543
#    endif
544
    CALL_BUILTIN(remainder, x, y);
545
#endif
546
}
547
}
548
549
using Division::fmod;
550
using Division::remainder;
551
552
template<FloatingPoint T>
553
constexpr T sqrt(T x)
554
43.7M
{
555
43.7M
    CONSTEXPR_STATE(sqrt, x);
556
557
43.7M
#if ARCH(X86_64)
558
43.7M
    if constexpr (IsSame<T, float>) {
559
40.9M
        float res;
560
40.9M
        asm("sqrtss %1, %0"
561
40.9M
            : "=x"(res)
562
40.9M
            : "x"(x));
563
40.9M
        return res;
564
40.9M
    }
565
2.84M
    if constexpr (IsSame<T, double>) {
566
2.84M
        double res;
567
2.84M
        asm("sqrtsd %1, %0"
568
2.84M
            : "=x"(res)
569
2.84M
            : "x"(x));
570
2.84M
        return res;
571
2.84M
    }
572
0
    T res;
573
43.7M
    asm("fsqrt"
574
43.7M
        : "=t"(res)
575
43.7M
        : "0"(x));
576
43.7M
    return res;
577
#elif ARCH(AARCH64)
578
    AARCH64_INSTRUCTION(fsqrt, x);
579
#elif ARCH(RISCV64)
580
    if constexpr (IsSame<T, float>) {
581
        float res;
582
        asm("fsqrt.s %0, %1"
583
            : "=f"(res)
584
            : "f"(x));
585
        return res;
586
    }
587
    if constexpr (IsSame<T, double>) {
588
        double res;
589
        asm("fsqrt.d %0, %1"
590
            : "=f"(res)
591
            : "f"(x));
592
        return res;
593
    }
594
    if constexpr (IsSame<T, long double>)
595
        TODO_RISCV64();
596
#else
597
#    if defined(AK_OS_SERENITY)
598
    // TODO: Add implementation for this function.
599
    TODO();
600
#    endif
601
    CALL_BUILTIN(sqrt, x);
602
#endif
603
43.7M
}
_ZN2AK4sqrtITkNS_8Concepts13FloatingPointEfEET_S2_
Line
Count
Source
554
40.9M
{
555
40.9M
    CONSTEXPR_STATE(sqrt, x);
556
557
40.9M
#if ARCH(X86_64)
558
40.9M
    if constexpr (IsSame<T, float>) {
559
40.9M
        float res;
560
40.9M
        asm("sqrtss %1, %0"
561
40.9M
            : "=x"(res)
562
40.9M
            : "x"(x));
563
40.9M
        return res;
564
40.9M
    }
565
    if constexpr (IsSame<T, double>) {
566
        double res;
567
        asm("sqrtsd %1, %0"
568
            : "=x"(res)
569
            : "x"(x));
570
        return res;
571
    }
572
40.9M
    T res;
573
40.9M
    asm("fsqrt"
574
40.9M
        : "=t"(res)
575
40.9M
        : "0"(x));
576
40.9M
    return res;
577
#elif ARCH(AARCH64)
578
    AARCH64_INSTRUCTION(fsqrt, x);
579
#elif ARCH(RISCV64)
580
    if constexpr (IsSame<T, float>) {
581
        float res;
582
        asm("fsqrt.s %0, %1"
583
            : "=f"(res)
584
            : "f"(x));
585
        return res;
586
    }
587
    if constexpr (IsSame<T, double>) {
588
        double res;
589
        asm("fsqrt.d %0, %1"
590
            : "=f"(res)
591
            : "f"(x));
592
        return res;
593
    }
594
    if constexpr (IsSame<T, long double>)
595
        TODO_RISCV64();
596
#else
597
#    if defined(AK_OS_SERENITY)
598
    // TODO: Add implementation for this function.
599
    TODO();
600
#    endif
601
    CALL_BUILTIN(sqrt, x);
602
#endif
603
40.9M
}
_ZN2AK4sqrtITkNS_8Concepts13FloatingPointEdEET_S2_
Line
Count
Source
554
2.84M
{
555
2.84M
    CONSTEXPR_STATE(sqrt, x);
556
557
2.84M
#if ARCH(X86_64)
558
    if constexpr (IsSame<T, float>) {
559
        float res;
560
        asm("sqrtss %1, %0"
561
            : "=x"(res)
562
            : "x"(x));
563
        return res;
564
    }
565
2.84M
    if constexpr (IsSame<T, double>) {
566
2.84M
        double res;
567
2.84M
        asm("sqrtsd %1, %0"
568
2.84M
            : "=x"(res)
569
2.84M
            : "x"(x));
570
2.84M
        return res;
571
2.84M
    }
572
0
    T res;
573
2.84M
    asm("fsqrt"
574
2.84M
        : "=t"(res)
575
2.84M
        : "0"(x));
576
2.84M
    return res;
577
#elif ARCH(AARCH64)
578
    AARCH64_INSTRUCTION(fsqrt, x);
579
#elif ARCH(RISCV64)
580
    if constexpr (IsSame<T, float>) {
581
        float res;
582
        asm("fsqrt.s %0, %1"
583
            : "=f"(res)
584
            : "f"(x));
585
        return res;
586
    }
587
    if constexpr (IsSame<T, double>) {
588
        double res;
589
        asm("fsqrt.d %0, %1"
590
            : "=f"(res)
591
            : "f"(x));
592
        return res;
593
    }
594
    if constexpr (IsSame<T, long double>)
595
        TODO_RISCV64();
596
#else
597
#    if defined(AK_OS_SERENITY)
598
    // TODO: Add implementation for this function.
599
    TODO();
600
#    endif
601
    CALL_BUILTIN(sqrt, x);
602
#endif
603
2.84M
}
604
605
template<FloatingPoint T>
606
constexpr T cbrt(T x)
607
{
608
    CONSTEXPR_STATE(cbrt, x);
609
    if (__builtin_isinf(x) || x == 0)
610
        return x;
611
    if (x < 0)
612
        return -cbrt(-x);
613
614
    T r = x;
615
    T ex = 0;
616
617
    while (r < 0.125l) {
618
        r *= 8;
619
        ex--;
620
    }
621
    while (r > 1.0l) {
622
        r *= 0.125l;
623
        ex++;
624
    }
625
626
    r = (-0.46946116l * r + 1.072302l) * r + 0.3812513l;
627
628
    while (ex < 0) {
629
        r *= 0.5l;
630
        ex++;
631
    }
632
    while (ex > 0) {
633
        r *= 2.0l;
634
        ex--;
635
    }
636
637
    r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r);
638
    r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r);
639
    r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r);
640
    r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r);
641
642
    return r;
643
}
644
645
namespace Trigonometry {
646
647
template<FloatingPoint T>
648
constexpr T hypot(T x, T y)
649
21.1M
{
650
21.1M
    return sqrt(x * x + y * y);
651
21.1M
}
652
653
template<FloatingPoint T>
654
constexpr T sin(T angle)
655
19.7M
{
656
19.7M
    CONSTEXPR_STATE(sin, angle);
657
658
19.7M
#if ARCH(X86_64)
659
19.7M
    T ret;
660
19.7M
    asm(
661
19.7M
        "fsin"
662
19.7M
        : "=t"(ret)
663
19.7M
        : "0"(angle));
664
19.7M
    return ret;
665
#else
666
#    if defined(AK_OS_SERENITY)
667
    T sign = 1;
668
    if (angle < 0) {
669
        angle = -angle;
670
        sign = -1;
671
    }
672
673
    if (angle >= 2 * Pi<T>)
674
        angle = fmod(angle, 2 * Pi<T>);
675
676
    if (angle >= Pi<T>) {
677
        angle = angle - Pi<T>;
678
        sign = -sign;
679
    }
680
681
    if (angle > Pi<T> / 2)
682
        angle = Pi<T> - angle;
683
684
    // https://github.com/samhocevar/lolremez/wiki/Tutorial-4-of-5%3A-fixing-lower-order-parameters
685
    auto f = [](T x) {
686
        if constexpr (IsSame<T, f32>) {
687
            // lolremez --float --degree 3 --range "1e-50:pi*pi/4" "(sin(sqrt(x))-sqrt(x))/(x*sqrt(x))" "1/(x*sqrt(x))"
688
            // "Estimated max error: 4.618689e-9"
689
            f32 u = 2.6000548e-06f;
690
            u = u * x + -0.00019806615f;
691
            u = u * x + 0.0083330171f;
692
            return u * x + -0.16666657f;
693
        } else {
694
            // FIXME: Could do something custom for long double.
695
            // lolremez --degree 6 --range "1e-50:pi*pi/4" "(sin(sqrt(x))-sqrt(x))/(x*sqrt(x))" "1/(x*sqrt(x))"
696
            // "Estimated max error: 1.1015766629825144e-16"
697
            T u = -7.3646464502210486e-13;
698
            u = u * x + 1.6047301196685753e-10;
699
            u = u * x + -2.5051851497012596e-08;
700
            u = u * x + 2.7557316077007725e-06;
701
            u = u * x + -0.00019841269820094207;
702
            u = u * x + 0.0083333333332628792;
703
            return u * x + -0.16666666666665811;
704
        }
705
    };
706
    T angle_squared = angle * angle;
707
    return sign * (angle + angle * angle_squared * f(angle_squared));
708
#    else
709
    CALL_BUILTIN(sin, angle);
710
#    endif
711
#endif
712
19.7M
}
_ZN2AK12Trigonometry3sinITkNS_8Concepts13FloatingPointEfEET_S3_
Line
Count
Source
655
19.7M
{
656
19.7M
    CONSTEXPR_STATE(sin, angle);
657
658
19.7M
#if ARCH(X86_64)
659
19.7M
    T ret;
660
19.7M
    asm(
661
19.7M
        "fsin"
662
19.7M
        : "=t"(ret)
663
19.7M
        : "0"(angle));
664
19.7M
    return ret;
665
#else
666
#    if defined(AK_OS_SERENITY)
667
    T sign = 1;
668
    if (angle < 0) {
669
        angle = -angle;
670
        sign = -1;
671
    }
672
673
    if (angle >= 2 * Pi<T>)
674
        angle = fmod(angle, 2 * Pi<T>);
675
676
    if (angle >= Pi<T>) {
677
        angle = angle - Pi<T>;
678
        sign = -sign;
679
    }
680
681
    if (angle > Pi<T> / 2)
682
        angle = Pi<T> - angle;
683
684
    // https://github.com/samhocevar/lolremez/wiki/Tutorial-4-of-5%3A-fixing-lower-order-parameters
685
    auto f = [](T x) {
686
        if constexpr (IsSame<T, f32>) {
687
            // lolremez --float --degree 3 --range "1e-50:pi*pi/4" "(sin(sqrt(x))-sqrt(x))/(x*sqrt(x))" "1/(x*sqrt(x))"
688
            // "Estimated max error: 4.618689e-9"
689
            f32 u = 2.6000548e-06f;
690
            u = u * x + -0.00019806615f;
691
            u = u * x + 0.0083330171f;
692
            return u * x + -0.16666657f;
693
        } else {
694
            // FIXME: Could do something custom for long double.
695
            // lolremez --degree 6 --range "1e-50:pi*pi/4" "(sin(sqrt(x))-sqrt(x))/(x*sqrt(x))" "1/(x*sqrt(x))"
696
            // "Estimated max error: 1.1015766629825144e-16"
697
            T u = -7.3646464502210486e-13;
698
            u = u * x + 1.6047301196685753e-10;
699
            u = u * x + -2.5051851497012596e-08;
700
            u = u * x + 2.7557316077007725e-06;
701
            u = u * x + -0.00019841269820094207;
702
            u = u * x + 0.0083333333332628792;
703
            return u * x + -0.16666666666665811;
704
        }
705
    };
706
    T angle_squared = angle * angle;
707
    return sign * (angle + angle * angle_squared * f(angle_squared));
708
#    else
709
    CALL_BUILTIN(sin, angle);
710
#    endif
711
#endif
712
19.7M
}
Unexecuted instantiation: _ZN2AK12Trigonometry3sinITkNS_8Concepts13FloatingPointEdEET_S3_
713
714
template<FloatingPoint T>
715
constexpr T cos(T angle)
716
5.77k
{
717
5.77k
    CONSTEXPR_STATE(cos, angle);
718
719
5.77k
#if ARCH(X86_64)
720
5.77k
    T ret;
721
5.77k
    asm(
722
5.77k
        "fcos"
723
5.77k
        : "=t"(ret)
724
5.77k
        : "0"(angle));
725
5.77k
    return ret;
726
#else
727
#    if defined(AK_OS_SERENITY)
728
    if (angle < 0)
729
        angle = -angle;
730
731
    if (angle >= 2 * Pi<T>)
732
        angle = fmod(angle, 2 * Pi<T>);
733
734
    T sign = 1;
735
    if (angle >= Pi<T>) {
736
        angle = angle - Pi<T>;
737
        sign = -1;
738
    }
739
740
    if (angle > Pi<T> / 2) {
741
        angle = Pi<T> - angle;
742
        sign = -sign;
743
    }
744
745
    // https://github.com/samhocevar/lolremez/wiki/Tutorial-3-of-5:-changing-variables-for-simpler-polynomials
746
    // for cos(x): cos(x) - 1 is a function of x^2 terms, so we do the substitution on that page like
747
    // max | cos(x) - 1 - x^2 Q(x^2) | and then set y = x^2. That yields:
748
    // max | (cos(sqrt(y)) - 1) - y Q(y) |. Dividing through with y (instead of sqrt(y) as in the sin() case) gives us:
749
    auto f = [](T x) {
750
        if constexpr (IsSame<T, f32>) {
751
            // lolremez --float --degree 3 --range "1e-50:pi*pi/4" "(cos(sqrt(x)) - 1)/x"  "1/x"
752
            // "Estimated max error: 5.2720126e-8"
753
            f32 u = 2.3194387e-05f;
754
            u = u * x + -0.0013855927f;
755
            u = u * x + 0.041663989f;
756
            return u * x + -0.49999931f;
757
        } else {
758
            // lolremez --degree 6 --range "1e-50:pi*pi/4" "(cos(sqrt(x)) - 1)/x"  "1/x"
759
            // "Estimated max error: 2.0880269759116624e-15"
760
            T u = -1.101249182846601e-11;
761
            u = u * x + 2.0858735176345955e-09;
762
            u = u * x + -2.7556950755056579e-07;
763
            u = u * x + 2.4801583156341611e-05;
764
            u = u * x + -0.001388888886393419;
765
            u = u * x + 0.041666666665954213;
766
            return u * x + -0.49999999999993117;
767
        }
768
    };
769
    T angle_squared = angle * angle;
770
    return sign * (1 + angle_squared * f(angle_squared));
771
#    else
772
    CALL_BUILTIN(cos, angle);
773
#    endif
774
#endif
775
5.77k
}
_ZN2AK12Trigonometry3cosITkNS_8Concepts13FloatingPointEfEET_S3_
Line
Count
Source
716
5.77k
{
717
5.77k
    CONSTEXPR_STATE(cos, angle);
718
719
5.77k
#if ARCH(X86_64)
720
5.77k
    T ret;
721
5.77k
    asm(
722
5.77k
        "fcos"
723
5.77k
        : "=t"(ret)
724
5.77k
        : "0"(angle));
725
5.77k
    return ret;
726
#else
727
#    if defined(AK_OS_SERENITY)
728
    if (angle < 0)
729
        angle = -angle;
730
731
    if (angle >= 2 * Pi<T>)
732
        angle = fmod(angle, 2 * Pi<T>);
733
734
    T sign = 1;
735
    if (angle >= Pi<T>) {
736
        angle = angle - Pi<T>;
737
        sign = -1;
738
    }
739
740
    if (angle > Pi<T> / 2) {
741
        angle = Pi<T> - angle;
742
        sign = -sign;
743
    }
744
745
    // https://github.com/samhocevar/lolremez/wiki/Tutorial-3-of-5:-changing-variables-for-simpler-polynomials
746
    // for cos(x): cos(x) - 1 is a function of x^2 terms, so we do the substitution on that page like
747
    // max | cos(x) - 1 - x^2 Q(x^2) | and then set y = x^2. That yields:
748
    // max | (cos(sqrt(y)) - 1) - y Q(y) |. Dividing through with y (instead of sqrt(y) as in the sin() case) gives us:
749
    auto f = [](T x) {
750
        if constexpr (IsSame<T, f32>) {
751
            // lolremez --float --degree 3 --range "1e-50:pi*pi/4" "(cos(sqrt(x)) - 1)/x"  "1/x"
752
            // "Estimated max error: 5.2720126e-8"
753
            f32 u = 2.3194387e-05f;
754
            u = u * x + -0.0013855927f;
755
            u = u * x + 0.041663989f;
756
            return u * x + -0.49999931f;
757
        } else {
758
            // lolremez --degree 6 --range "1e-50:pi*pi/4" "(cos(sqrt(x)) - 1)/x"  "1/x"
759
            // "Estimated max error: 2.0880269759116624e-15"
760
            T u = -1.101249182846601e-11;
761
            u = u * x + 2.0858735176345955e-09;
762
            u = u * x + -2.7556950755056579e-07;
763
            u = u * x + 2.4801583156341611e-05;
764
            u = u * x + -0.001388888886393419;
765
            u = u * x + 0.041666666665954213;
766
            return u * x + -0.49999999999993117;
767
        }
768
    };
769
    T angle_squared = angle * angle;
770
    return sign * (1 + angle_squared * f(angle_squared));
771
#    else
772
    CALL_BUILTIN(cos, angle);
773
#    endif
774
#endif
775
5.77k
}
Unexecuted instantiation: _ZN2AK12Trigonometry3cosITkNS_8Concepts13FloatingPointEdEET_S3_
776
777
template<FloatingPoint T>
778
constexpr void sincos(T angle, T& sin_val, T& cos_val)
779
45.4M
{
780
45.4M
    if (is_constant_evaluated()) {
781
0
        sin_val = sin(angle);
782
0
        cos_val = cos(angle);
783
0
        return;
784
0
    }
785
45.4M
#if ARCH(X86_64)
786
45.4M
    asm(
787
45.4M
        "fsincos"
788
45.4M
        : "=t"(cos_val), "=u"(sin_val)
789
45.4M
        : "0"(angle));
790
#else
791
    sin_val = sin(angle);
792
    cos_val = cos(angle);
793
#endif
794
45.4M
}
_ZN2AK12Trigonometry6sincosITkNS_8Concepts13FloatingPointEfEEvT_RS3_S4_
Line
Count
Source
779
43.0M
{
780
43.0M
    if (is_constant_evaluated()) {
781
0
        sin_val = sin(angle);
782
0
        cos_val = cos(angle);
783
0
        return;
784
0
    }
785
43.0M
#if ARCH(X86_64)
786
43.0M
    asm(
787
43.0M
        "fsincos"
788
43.0M
        : "=t"(cos_val), "=u"(sin_val)
789
43.0M
        : "0"(angle));
790
#else
791
    sin_val = sin(angle);
792
    cos_val = cos(angle);
793
#endif
794
43.0M
}
_ZN2AK12Trigonometry6sincosITkNS_8Concepts13FloatingPointEdEEvT_RS3_S4_
Line
Count
Source
779
2.49M
{
780
2.49M
    if (is_constant_evaluated()) {
781
0
        sin_val = sin(angle);
782
0
        cos_val = cos(angle);
783
0
        return;
784
0
    }
785
2.49M
#if ARCH(X86_64)
786
2.49M
    asm(
787
2.49M
        "fsincos"
788
2.49M
        : "=t"(cos_val), "=u"(sin_val)
789
2.49M
        : "0"(angle));
790
#else
791
    sin_val = sin(angle);
792
    cos_val = cos(angle);
793
#endif
794
2.49M
}
795
796
template<FloatingPoint T>
797
constexpr T tan(T angle)
798
19.9M
{
799
19.9M
    CONSTEXPR_STATE(tan, angle);
800
801
19.9M
#if ARCH(X86_64)
802
19.9M
    T ret, one;
803
19.9M
    asm(
804
19.9M
        "fptan"
805
19.9M
        : "=t"(one), "=u"(ret)
806
19.9M
        : "0"(angle));
807
808
19.9M
    return ret;
809
#else
810
#    if defined(AK_OS_SERENITY)
811
    return sin(angle) / cos(angle);
812
#    else
813
    CALL_BUILTIN(tan, angle);
814
#    endif
815
#endif
816
19.9M
}
817
818
template<FloatingPoint T>
819
constexpr T asin(T x)
820
{
821
    CONSTEXPR_STATE(asin, x);
822
823
    if (x < 0)
824
        return -asin(-x);
825
826
    if (x > 1)
827
        return NaN<T>;
828
829
    if (x > (T)0.5) {
830
        // asin(x) = pi/2 - 2 * asin(sqrt((1 - x) / 2))
831
        // If 0.5 < x <= 1, then sqrt((1 - x) / 2 ) < 0.5,
832
        // and the recursion will go down the `<= 0.5` branch.
833
        return Pi<T> / 2 - 2 * asin(sqrt((1 - x) / 2));
834
    }
835
836
    T squared = x * x;
837
    T value = x;
838
    T i = x * squared;
839
    value += i * Details::product_odd<1>() / Details::product_even<2>() / 3;
840
    i *= squared;
841
    value += i * Details::product_odd<3>() / Details::product_even<4>() / 5;
842
    i *= squared;
843
    value += i * Details::product_odd<5>() / Details::product_even<6>() / 7;
844
    i *= squared;
845
    value += i * Details::product_odd<7>() / Details::product_even<8>() / 9;
846
    i *= squared;
847
    value += i * Details::product_odd<9>() / Details::product_even<10>() / 11;
848
    i *= squared;
849
    value += i * Details::product_odd<11>() / Details::product_even<12>() / 13;
850
    i *= squared;
851
    value += i * Details::product_odd<13>() / Details::product_even<14>() / 15;
852
    i *= squared;
853
    value += i * Details::product_odd<15>() / Details::product_even<16>() / 17;
854
    return value;
855
}
856
857
template<FloatingPoint T>
858
constexpr T acos(T value)
859
{
860
    CONSTEXPR_STATE(acos, value);
861
862
    // FIXME: I am naive
863
    return static_cast<T>(0.5) * Pi<T> - asin<T>(value);
864
}
865
866
template<FloatingPoint T>
867
constexpr T atan(T value)
868
{
869
    CONSTEXPR_STATE(atan, value);
870
871
#if ARCH(X86_64)
872
    T ret;
873
    asm(
874
        "fld1\n"
875
        "fpatan\n"
876
        : "=t"(ret)
877
        : "0"(value));
878
    return ret;
879
#else
880
#    if defined(AK_OS_SERENITY)
881
    if (value < 0)
882
        return -atan(-value);
883
884
    if (value > 1)
885
        return Pi<T> / 2 - atan(1 / value);
886
887
    // atan is an odd function a leading factor of 1, so this uses the same substitutions as sin().
888
    // See there for a description.
889
    auto f = [](T x) {
890
        if constexpr (IsSame<T, f32>) {
891
            // lolremez --float --degree 7 --range "1e-50:1" "(atan(sqrt(x))-sqrt(x))/(x*sqrt(x))" "1/(x*sqrt(x))"
892
            // "Estimated max error: 7.351572e-9"
893
            float u = 0.0026222446f;
894
            u = u * x + -0.015132537f;
895
            u = u * x + 0.041121863f;
896
            u = u * x + -0.073667064f;
897
            u = u * x + 0.10573932f;
898
            u = u * x + -0.14185975f;
899
            u = u * x + 0.19990396f;
900
            return u * x + -0.33332986f;
901
        } else {
902
            // FIXME: Could do something custom for long double.
903
            // lolremez --degree 15 --range "1e-50:1" "(atan(sqrt(x))-sqrt(x))/(x*sqrt(x))" "1/(x*sqrt(x))"
904
            // "Estimated max error: 2.695747111292741e-15"
905
            T u = 6.4855700791782353e-05;
906
            u = u * x + -0.00062980993515420608;
907
            u = u * x + 0.0028877745234626882;
908
            u = u * x + -0.0083913659122280861;
909
            u = u * x + 0.01759373283992496;
910
            u = u * x + -0.028943865588822337;
911
            u = u * x + 0.04001711781175539;
912
            u = u * x + -0.049493567473208426;
913
            u = u * x + 0.05782092815821073;
914
            u = u * x + -0.066423996784058609;
915
            u = u * x + 0.076879768543915233;
916
            u = u * x + -0.090903598304650779;
917
            u = u * x + 0.11111064186237864;
918
            u = u * x + -0.14285711801574916;
919
            u = u * x + 0.19999999929660117;
920
            return u * x + -0.33333333332571796;
921
        }
922
    };
923
    T squared = value * value;
924
    return value + value * squared * f(squared);
925
#    endif
926
    CALL_BUILTIN(atan, value);
927
#endif
928
}
929
930
template<FloatingPoint T>
931
constexpr T atan2(T y, T x)
932
4.97M
{
933
4.97M
    CONSTEXPR_STATE(atan2, y, x);
934
935
4.97M
#if ARCH(X86_64)
936
4.97M
    T ret;
937
4.97M
    asm("fpatan"
938
4.97M
        : "=t"(ret)
939
4.97M
        : "0"(x), "u"(y)
940
4.97M
        : "st(1)");
941
4.97M
    return ret;
942
#else
943
#    if defined(AK_OS_SERENITY)
944
    if (__builtin_isnan(y))
945
        return y;
946
    if (__builtin_isnan(x))
947
        return x;
948
949
    // SPECIAL VALUES
950
    //      atan2(±0, -0) returns ±pi.
951
    if (y == 0 && x == 0 && signbit(x))
952
        return copysign(Pi<T>, y);
953
954
    //      atan2(±0, +0) returns ±0.
955
    if (y == 0 && x == 0 && !signbit(x))
956
        return y;
957
958
    //      atan2(±0, x) returns ±pi for x < 0.
959
    if (y == 0 && x < 0)
960
        return copysign(Pi<T>, y);
961
962
    //      atan2(±0, x) returns ±0 for x > 0.
963
    if (y == 0 && x > 0)
964
        return y;
965
966
    //      atan2(y, ±0) returns +pi/2 for y > 0.
967
    if (y > 0 && x == 0)
968
        return Pi<T> / 2;
969
970
    //      atan2(y, ±0) returns -pi/2 for y < 0.
971
    if (y < 0 && x == 0)
972
        return -Pi<T> / 2;
973
974
    //      atan2(±y, -infinity) returns ±pi for finite y > 0.
975
    if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && signbit(x))
976
        return copysign(Pi<T>, y);
977
978
    //      atan2(±y, +infinity) returns ±0 for finite y > 0.
979
    if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && !signbit(x))
980
        return copysign(static_cast<T>(0), y);
981
982
    //      atan2(±infinity, x) returns ±pi/2 for finite x.
983
    if (__builtin_isinf(y) && !__builtin_isinf(x))
984
        return copysign(Pi<T> / 2, y);
985
986
    //      atan2(±infinity, -infinity) returns ±3*pi/4.
987
    if (__builtin_isinf(y) && __builtin_isinf(x) && signbit(x))
988
        return copysign(3 * Pi<T> / 4, y);
989
990
    //      atan2(±infinity, +infinity) returns ±pi/4.
991
    if (__builtin_isinf(y) && __builtin_isinf(x) && !signbit(x))
992
        return copysign(Pi<T> / 4, y);
993
994
    // Check quadrant, going counterclockwise.
995
    if (y > 0 && x > 0)
996
        return atan(y / x);
997
    if (y > 0 && x < 0)
998
        return atan(y / x) + Pi<T>;
999
    if (y < 0 && x < 0)
1000
        return atan(y / x) - Pi<T>;
1001
    // y < 0 && x > 0
1002
    return atan(y / x);
1003
#    else
1004
    CALL_BUILTIN(atan2, y, x);
1005
#    endif
1006
#endif
1007
4.97M
}
_ZN2AK12Trigonometry5atan2ITkNS_8Concepts13FloatingPointEfEET_S3_S3_
Line
Count
Source
932
4.29k
{
933
4.29k
    CONSTEXPR_STATE(atan2, y, x);
934
935
4.29k
#if ARCH(X86_64)
936
4.29k
    T ret;
937
4.29k
    asm("fpatan"
938
4.29k
        : "=t"(ret)
939
4.29k
        : "0"(x), "u"(y)
940
4.29k
        : "st(1)");
941
4.29k
    return ret;
942
#else
943
#    if defined(AK_OS_SERENITY)
944
    if (__builtin_isnan(y))
945
        return y;
946
    if (__builtin_isnan(x))
947
        return x;
948
949
    // SPECIAL VALUES
950
    //      atan2(±0, -0) returns ±pi.
951
    if (y == 0 && x == 0 && signbit(x))
952
        return copysign(Pi<T>, y);
953
954
    //      atan2(±0, +0) returns ±0.
955
    if (y == 0 && x == 0 && !signbit(x))
956
        return y;
957
958
    //      atan2(±0, x) returns ±pi for x < 0.
959
    if (y == 0 && x < 0)
960
        return copysign(Pi<T>, y);
961
962
    //      atan2(±0, x) returns ±0 for x > 0.
963
    if (y == 0 && x > 0)
964
        return y;
965
966
    //      atan2(y, ±0) returns +pi/2 for y > 0.
967
    if (y > 0 && x == 0)
968
        return Pi<T> / 2;
969
970
    //      atan2(y, ±0) returns -pi/2 for y < 0.
971
    if (y < 0 && x == 0)
972
        return -Pi<T> / 2;
973
974
    //      atan2(±y, -infinity) returns ±pi for finite y > 0.
975
    if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && signbit(x))
976
        return copysign(Pi<T>, y);
977
978
    //      atan2(±y, +infinity) returns ±0 for finite y > 0.
979
    if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && !signbit(x))
980
        return copysign(static_cast<T>(0), y);
981
982
    //      atan2(±infinity, x) returns ±pi/2 for finite x.
983
    if (__builtin_isinf(y) && !__builtin_isinf(x))
984
        return copysign(Pi<T> / 2, y);
985
986
    //      atan2(±infinity, -infinity) returns ±3*pi/4.
987
    if (__builtin_isinf(y) && __builtin_isinf(x) && signbit(x))
988
        return copysign(3 * Pi<T> / 4, y);
989
990
    //      atan2(±infinity, +infinity) returns ±pi/4.
991
    if (__builtin_isinf(y) && __builtin_isinf(x) && !signbit(x))
992
        return copysign(Pi<T> / 4, y);
993
994
    // Check quadrant, going counterclockwise.
995
    if (y > 0 && x > 0)
996
        return atan(y / x);
997
    if (y > 0 && x < 0)
998
        return atan(y / x) + Pi<T>;
999
    if (y < 0 && x < 0)
1000
        return atan(y / x) - Pi<T>;
1001
    // y < 0 && x > 0
1002
    return atan(y / x);
1003
#    else
1004
    CALL_BUILTIN(atan2, y, x);
1005
#    endif
1006
#endif
1007
4.29k
}
_ZN2AK12Trigonometry5atan2ITkNS_8Concepts13FloatingPointEdEET_S3_S3_
Line
Count
Source
932
4.97M
{
933
4.97M
    CONSTEXPR_STATE(atan2, y, x);
934
935
4.97M
#if ARCH(X86_64)
936
4.97M
    T ret;
937
4.97M
    asm("fpatan"
938
4.97M
        : "=t"(ret)
939
4.97M
        : "0"(x), "u"(y)
940
4.97M
        : "st(1)");
941
4.97M
    return ret;
942
#else
943
#    if defined(AK_OS_SERENITY)
944
    if (__builtin_isnan(y))
945
        return y;
946
    if (__builtin_isnan(x))
947
        return x;
948
949
    // SPECIAL VALUES
950
    //      atan2(±0, -0) returns ±pi.
951
    if (y == 0 && x == 0 && signbit(x))
952
        return copysign(Pi<T>, y);
953
954
    //      atan2(±0, +0) returns ±0.
955
    if (y == 0 && x == 0 && !signbit(x))
956
        return y;
957
958
    //      atan2(±0, x) returns ±pi for x < 0.
959
    if (y == 0 && x < 0)
960
        return copysign(Pi<T>, y);
961
962
    //      atan2(±0, x) returns ±0 for x > 0.
963
    if (y == 0 && x > 0)
964
        return y;
965
966
    //      atan2(y, ±0) returns +pi/2 for y > 0.
967
    if (y > 0 && x == 0)
968
        return Pi<T> / 2;
969
970
    //      atan2(y, ±0) returns -pi/2 for y < 0.
971
    if (y < 0 && x == 0)
972
        return -Pi<T> / 2;
973
974
    //      atan2(±y, -infinity) returns ±pi for finite y > 0.
975
    if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && signbit(x))
976
        return copysign(Pi<T>, y);
977
978
    //      atan2(±y, +infinity) returns ±0 for finite y > 0.
979
    if (!__builtin_isinf(y) && y > 0 && __builtin_isinf(x) && !signbit(x))
980
        return copysign(static_cast<T>(0), y);
981
982
    //      atan2(±infinity, x) returns ±pi/2 for finite x.
983
    if (__builtin_isinf(y) && !__builtin_isinf(x))
984
        return copysign(Pi<T> / 2, y);
985
986
    //      atan2(±infinity, -infinity) returns ±3*pi/4.
987
    if (__builtin_isinf(y) && __builtin_isinf(x) && signbit(x))
988
        return copysign(3 * Pi<T> / 4, y);
989
990
    //      atan2(±infinity, +infinity) returns ±pi/4.
991
    if (__builtin_isinf(y) && __builtin_isinf(x) && !signbit(x))
992
        return copysign(Pi<T> / 4, y);
993
994
    // Check quadrant, going counterclockwise.
995
    if (y > 0 && x > 0)
996
        return atan(y / x);
997
    if (y > 0 && x < 0)
998
        return atan(y / x) + Pi<T>;
999
    if (y < 0 && x < 0)
1000
        return atan(y / x) - Pi<T>;
1001
    // y < 0 && x > 0
1002
    return atan(y / x);
1003
#    else
1004
    CALL_BUILTIN(atan2, y, x);
1005
#    endif
1006
#endif
1007
4.97M
}
1008
1009
}
1010
1011
using Trigonometry::acos;
1012
using Trigonometry::asin;
1013
using Trigonometry::atan;
1014
using Trigonometry::atan2;
1015
using Trigonometry::cos;
1016
using Trigonometry::hypot;
1017
using Trigonometry::sin;
1018
using Trigonometry::sincos;
1019
using Trigonometry::tan;
1020
1021
namespace Exponentials {
1022
1023
template<FloatingPoint T>
1024
constexpr T log2(T x)
1025
4.13M
{
1026
4.13M
    CONSTEXPR_STATE(log2, x);
1027
1028
4.13M
#if ARCH(X86_64)
1029
    if constexpr (IsSame<T, long double>) {
1030
        T ret;
1031
        asm(
1032
            "fld1\n"
1033
            "fxch %%st(1)\n"
1034
            "fyl2x\n"
1035
            : "=t"(ret)
1036
            : "0"(x));
1037
        return ret;
1038
    }
1039
4.13M
#endif
1040
    // References:
1041
    // Gist comparing some implementations
1042
    // * https://gist.github.com/Hendiadyoin1/f58346d66637deb9156ef360aa158bf9
1043
1044
4.13M
    if (x == 0)
1045
0
        return -Infinity<T>;
1046
4.13M
    if (x <= 0 || __builtin_isnan(x))
1047
0
        return NaN<T>;
1048
1049
4.13M
    auto ext = FloatExtractor<T>::from_float(x);
1050
4.13M
    T exponent = ext.exponent - FloatExtractor<T>::exponent_bias;
1051
1052
    // When the mantissa shows 0b00 (implicitly 1.0) we are on a power of 2
1053
4.13M
    if (ext.mantissa == 0)
1054
3.66M
        return exponent;
1055
1056
    // FIXME: Handle denormalized numbers separately
1057
1058
475k
    FloatExtractor<T> mantissa_ext {
1059
475k
        .mantissa = ext.mantissa,
1060
475k
        .exponent = FloatExtractor<T>::exponent_bias,
1061
475k
        .sign = ext.sign
1062
475k
    };
1063
1064
    // (1 <= mantissa < 2)
1065
475k
    T m = mantissa_ext.to_float();
1066
1067
    // This is a reconstruction of one of Sun's algorithms
1068
    // They use a transformation to lower the problem space,
1069
    // while keeping the precision, and a 14th degree polynomial,
1070
    // which is mirrored at sqrt(2)
1071
    // TODO: Sun has some more algorithms for this and other math functions,
1072
    //       leveraging LUTs, investigate those, if they are better in performance and/or precision.
1073
    //       These seem to be related to crLibM's implementations, for which papers and references
1074
    //       are available.
1075
    // FIXME: Dynamically adjust the amount of precision between floats and doubles
1076
    //        AKA floats might need less accuracy here, in comparison to doubles
1077
1078
475k
    bool inverted = false;
1079
    // m > sqrt(2)
1080
475k
    if (m > Sqrt2<T>) {
1081
161k
        inverted = true;
1082
161k
        m = 2 / m;
1083
161k
    }
1084
475k
    T s = (m - 1) / (m + 1);
1085
    // ln((1 + s) / (1 - s)) == ln(m)
1086
475k
    T s2 = s * s;
1087
    // clang-format off
1088
475k
    T high_approx = s2 * (static_cast<T>(0.6666666666666735130)
1089
475k
                  + s2 * (static_cast<T>(0.3999999999940941908)
1090
475k
                  + s2 * (static_cast<T>(0.2857142874366239149)
1091
475k
                  + s2 * (static_cast<T>(0.2222219843214978396)
1092
475k
                  + s2 * (static_cast<T>(0.1818357216161805012)
1093
475k
                  + s2 * (static_cast<T>(0.1531383769920937332)
1094
475k
                  + s2 *  static_cast<T>(0.1479819860511658591)))))));
1095
    // clang-format on
1096
1097
    // ln(m) == 2 * s + s * high_approx
1098
    // log2(m) == log2(e) * ln(m)
1099
475k
    T log2_mantissa = L2_E<T> * (2 * s + s * high_approx);
1100
475k
    if (inverted)
1101
161k
        log2_mantissa = 1 - log2_mantissa;
1102
475k
    return exponent + log2_mantissa;
1103
4.13M
}
_ZN2AK12Exponentials4log2ITkNS_8Concepts13FloatingPointEfEET_S3_
Line
Count
Source
1025
4.13M
{
1026
4.13M
    CONSTEXPR_STATE(log2, x);
1027
1028
4.13M
#if ARCH(X86_64)
1029
    if constexpr (IsSame<T, long double>) {
1030
        T ret;
1031
        asm(
1032
            "fld1\n"
1033
            "fxch %%st(1)\n"
1034
            "fyl2x\n"
1035
            : "=t"(ret)
1036
            : "0"(x));
1037
        return ret;
1038
    }
1039
4.13M
#endif
1040
    // References:
1041
    // Gist comparing some implementations
1042
    // * https://gist.github.com/Hendiadyoin1/f58346d66637deb9156ef360aa158bf9
1043
1044
4.13M
    if (x == 0)
1045
0
        return -Infinity<T>;
1046
4.13M
    if (x <= 0 || __builtin_isnan(x))
1047
0
        return NaN<T>;
1048
1049
4.13M
    auto ext = FloatExtractor<T>::from_float(x);
1050
4.13M
    T exponent = ext.exponent - FloatExtractor<T>::exponent_bias;
1051
1052
    // When the mantissa shows 0b00 (implicitly 1.0) we are on a power of 2
1053
4.13M
    if (ext.mantissa == 0)
1054
3.66M
        return exponent;
1055
1056
    // FIXME: Handle denormalized numbers separately
1057
1058
475k
    FloatExtractor<T> mantissa_ext {
1059
475k
        .mantissa = ext.mantissa,
1060
475k
        .exponent = FloatExtractor<T>::exponent_bias,
1061
475k
        .sign = ext.sign
1062
475k
    };
1063
1064
    // (1 <= mantissa < 2)
1065
475k
    T m = mantissa_ext.to_float();
1066
1067
    // This is a reconstruction of one of Sun's algorithms
1068
    // They use a transformation to lower the problem space,
1069
    // while keeping the precision, and a 14th degree polynomial,
1070
    // which is mirrored at sqrt(2)
1071
    // TODO: Sun has some more algorithms for this and other math functions,
1072
    //       leveraging LUTs, investigate those, if they are better in performance and/or precision.
1073
    //       These seem to be related to crLibM's implementations, for which papers and references
1074
    //       are available.
1075
    // FIXME: Dynamically adjust the amount of precision between floats and doubles
1076
    //        AKA floats might need less accuracy here, in comparison to doubles
1077
1078
475k
    bool inverted = false;
1079
    // m > sqrt(2)
1080
475k
    if (m > Sqrt2<T>) {
1081
161k
        inverted = true;
1082
161k
        m = 2 / m;
1083
161k
    }
1084
475k
    T s = (m - 1) / (m + 1);
1085
    // ln((1 + s) / (1 - s)) == ln(m)
1086
475k
    T s2 = s * s;
1087
    // clang-format off
1088
475k
    T high_approx = s2 * (static_cast<T>(0.6666666666666735130)
1089
475k
                  + s2 * (static_cast<T>(0.3999999999940941908)
1090
475k
                  + s2 * (static_cast<T>(0.2857142874366239149)
1091
475k
                  + s2 * (static_cast<T>(0.2222219843214978396)
1092
475k
                  + s2 * (static_cast<T>(0.1818357216161805012)
1093
475k
                  + s2 * (static_cast<T>(0.1531383769920937332)
1094
475k
                  + s2 *  static_cast<T>(0.1479819860511658591)))))));
1095
    // clang-format on
1096
1097
    // ln(m) == 2 * s + s * high_approx
1098
    // log2(m) == log2(e) * ln(m)
1099
475k
    T log2_mantissa = L2_E<T> * (2 * s + s * high_approx);
1100
475k
    if (inverted)
1101
161k
        log2_mantissa = 1 - log2_mantissa;
1102
475k
    return exponent + log2_mantissa;
1103
4.13M
}
Unexecuted instantiation: _ZN2AK12Exponentials4log2ITkNS_8Concepts13FloatingPointEdEET_S3_
Unexecuted instantiation: _ZN2AK12Exponentials4log2ITkNS_8Concepts13FloatingPointEdEET_S3_
1104
1105
template<FloatingPoint T>
1106
constexpr T log(T x)
1107
48
{
1108
48
    CONSTEXPR_STATE(log, x);
1109
1110
48
#if ARCH(X86_64)
1111
48
    T ret;
1112
48
    asm(
1113
48
        "fldln2\n"
1114
48
        "fxch %%st(1)\n"
1115
48
        "fyl2x\n"
1116
48
        : "=t"(ret)
1117
48
        : "0"(x));
1118
48
    return ret;
1119
#elif defined(AK_OS_SERENITY)
1120
    // FIXME: Adjust the polynomial and formula in log2 to fit this
1121
    return log2<T>(x) / L2_E<T>;
1122
#else
1123
    CALL_BUILTIN(log, x);
1124
#endif
1125
48
}
1126
1127
template<FloatingPoint T>
1128
constexpr T log10(T x)
1129
{
1130
    CONSTEXPR_STATE(log10, x);
1131
1132
#if ARCH(X86_64)
1133
    T ret;
1134
    asm(
1135
        "fldlg2\n"
1136
        "fxch %%st(1)\n"
1137
        "fyl2x\n"
1138
        : "=t"(ret)
1139
        : "0"(x));
1140
    return ret;
1141
#elif defined(AK_OS_SERENITY)
1142
    // FIXME: Adjust the polynomial and formula in log2 to fit this
1143
    return log2<T>(x) / L2_10<T>;
1144
#else
1145
    CALL_BUILTIN(log10, x);
1146
#endif
1147
}
1148
1149
template<FloatingPoint T>
1150
constexpr T exp2(T exponent)
1151
4.13M
{
1152
4.13M
    CONSTEXPR_STATE(exp2, exponent);
1153
1154
4.13M
#if ARCH(X86_64)
1155
4.13M
    T res;
1156
4.13M
    asm("fld1\n"
1157
4.13M
        "fld %%st(1)\n"
1158
4.13M
        "fprem\n"
1159
4.13M
        "f2xm1\n"
1160
4.13M
        "faddp\n"
1161
4.13M
        "fscale\n"
1162
4.13M
        "fstp %%st(1)"
1163
4.13M
        : "=t"(res)
1164
4.13M
        : "0"(exponent));
1165
4.13M
    return res;
1166
#else
1167
    // TODO: Add better implementation of this function.
1168
    // This is just fast exponentiation for the integer part and
1169
    // the first couple terms of the taylor series for the fractional part.
1170
1171
    if (exponent < 0)
1172
        return 1 / exp2(-exponent);
1173
1174
    if (exponent >= log2(NumericLimits<T>::max()))
1175
        return Infinity<T>;
1176
1177
    // Integer exponentiation part.
1178
    int int_exponent = static_cast<int>(exponent);
1179
    T exponent_fraction = exponent - int_exponent;
1180
1181
    T int_result = 1;
1182
    T base = 2;
1183
    for (;;) {
1184
        if (int_exponent & 1)
1185
            int_result *= base;
1186
        int_exponent >>= 1;
1187
        if (!int_exponent)
1188
            break;
1189
        base *= base;
1190
    }
1191
1192
    // Fractional part.
1193
    // Uses:
1194
    // exp(x) = sum(n, 0, \infty, x ** n / n!)
1195
    // 2**x = exp(log2(e) * x)
1196
    // FIXME: Pick better step size (and make it dependent on T).
1197
    T result = 0;
1198
    T power = 1;
1199
    T factorial = 1;
1200
    for (int i = 1; i < 16; ++i) {
1201
        result += power / factorial;
1202
        power *= exponent_fraction / L2_E<T>;
1203
        factorial *= i;
1204
    }
1205
1206
    return int_result * result;
1207
#endif
1208
4.13M
}
_ZN2AK12Exponentials4exp2ITkNS_8Concepts13FloatingPointEfEET_S3_
Line
Count
Source
1151
4.13M
{
1152
4.13M
    CONSTEXPR_STATE(exp2, exponent);
1153
1154
4.13M
#if ARCH(X86_64)
1155
4.13M
    T res;
1156
4.13M
    asm("fld1\n"
1157
4.13M
        "fld %%st(1)\n"
1158
4.13M
        "fprem\n"
1159
4.13M
        "f2xm1\n"
1160
4.13M
        "faddp\n"
1161
4.13M
        "fscale\n"
1162
4.13M
        "fstp %%st(1)"
1163
4.13M
        : "=t"(res)
1164
4.13M
        : "0"(exponent));
1165
4.13M
    return res;
1166
#else
1167
    // TODO: Add better implementation of this function.
1168
    // This is just fast exponentiation for the integer part and
1169
    // the first couple terms of the taylor series for the fractional part.
1170
1171
    if (exponent < 0)
1172
        return 1 / exp2(-exponent);
1173
1174
    if (exponent >= log2(NumericLimits<T>::max()))
1175
        return Infinity<T>;
1176
1177
    // Integer exponentiation part.
1178
    int int_exponent = static_cast<int>(exponent);
1179
    T exponent_fraction = exponent - int_exponent;
1180
1181
    T int_result = 1;
1182
    T base = 2;
1183
    for (;;) {
1184
        if (int_exponent & 1)
1185
            int_result *= base;
1186
        int_exponent >>= 1;
1187
        if (!int_exponent)
1188
            break;
1189
        base *= base;
1190
    }
1191
1192
    // Fractional part.
1193
    // Uses:
1194
    // exp(x) = sum(n, 0, \infty, x ** n / n!)
1195
    // 2**x = exp(log2(e) * x)
1196
    // FIXME: Pick better step size (and make it dependent on T).
1197
    T result = 0;
1198
    T power = 1;
1199
    T factorial = 1;
1200
    for (int i = 1; i < 16; ++i) {
1201
        result += power / factorial;
1202
        power *= exponent_fraction / L2_E<T>;
1203
        factorial *= i;
1204
    }
1205
1206
    return int_result * result;
1207
#endif
1208
4.13M
}
Unexecuted instantiation: _ZN2AK12Exponentials4exp2ITkNS_8Concepts13FloatingPointEdEET_S3_
Unexecuted instantiation: _ZN2AK12Exponentials4exp2ITkNS_8Concepts13FloatingPointEdEET_S3_
1209
1210
template<FloatingPoint T>
1211
constexpr T exp(T exponent)
1212
0
{
1213
0
    CONSTEXPR_STATE(exp, exponent);
1214
1215
0
#if ARCH(X86_64)
1216
0
    T res;
1217
0
    asm("fldl2e\n"
1218
0
        "fmulp\n"
1219
0
        "fld1\n"
1220
0
        "fld %%st(1)\n"
1221
0
        "fprem\n"
1222
0
        "f2xm1\n"
1223
0
        "faddp\n"
1224
0
        "fscale\n"
1225
0
        "fstp %%st(1)"
1226
0
        : "=t"(res)
1227
0
        : "0"(exponent));
1228
0
    return res;
1229
#else
1230
    // TODO: Add better implementation of this function.
1231
    return exp2(exponent * L2_E<T>);
1232
#endif
1233
0
}
Unexecuted instantiation: _ZN2AK12Exponentials3expITkNS_8Concepts13FloatingPointEfEET_S3_
Unexecuted instantiation: _ZN2AK12Exponentials3expITkNS_8Concepts13FloatingPointEfEET_S3_
1234
1235
}
1236
1237
using Exponentials::exp;
1238
using Exponentials::exp2;
1239
using Exponentials::log;
1240
using Exponentials::log10;
1241
using Exponentials::log2;
1242
1243
namespace Hyperbolic {
1244
1245
template<FloatingPoint T>
1246
constexpr T sinh(T x)
1247
{
1248
    T exponentiated = exp<T>(x);
1249
    if (x > 0)
1250
        return (exponentiated * exponentiated - 1) / 2 / exponentiated;
1251
    return (exponentiated - 1 / exponentiated) / 2;
1252
}
1253
1254
template<FloatingPoint T>
1255
constexpr T cosh(T x)
1256
{
1257
    CONSTEXPR_STATE(cosh, x);
1258
1259
    T exponentiated = exp(-x);
1260
    if (x < 0)
1261
        return (1 + exponentiated * exponentiated) / 2 / exponentiated;
1262
    return (1 / exponentiated + exponentiated) / 2;
1263
}
1264
1265
template<FloatingPoint T>
1266
constexpr T tanh(T x)
1267
{
1268
    if (x > 0) {
1269
        T exponentiated = exp<T>(2 * x);
1270
        return (exponentiated - 1) / (exponentiated + 1);
1271
    }
1272
    T plusX = exp<T>(x);
1273
    T minusX = 1 / plusX;
1274
    return (plusX - minusX) / (plusX + minusX);
1275
}
1276
1277
template<FloatingPoint T>
1278
constexpr T asinh(T x)
1279
{
1280
    return log<T>(x + sqrt<T>(x * x + 1));
1281
}
1282
1283
template<FloatingPoint T>
1284
constexpr T acosh(T x)
1285
{
1286
    return log<T>(x + sqrt<T>(x * x - 1));
1287
}
1288
1289
template<FloatingPoint T>
1290
constexpr T atanh(T x)
1291
{
1292
    return log<T>((1 + x) / (1 - x)) / (T)2.0l;
1293
}
1294
1295
}
1296
1297
using Hyperbolic::acosh;
1298
using Hyperbolic::asinh;
1299
using Hyperbolic::atanh;
1300
using Hyperbolic::cosh;
1301
using Hyperbolic::sinh;
1302
using Hyperbolic::tanh;
1303
1304
// Calculate x^y with fast exponentiation when the power is a natural number.
1305
template<FloatingPoint F, UnsignedIntegral U>
1306
constexpr F pow_int(F x, U y)
1307
132k
{
1308
132k
    auto result = static_cast<F>(1);
1309
788k
    while (y > 0) {
1310
656k
        if (y % 2 == 1) {
1311
418k
            result *= x;
1312
418k
        }
1313
656k
        x = x * x;
1314
656k
        y >>= 1;
1315
656k
    }
1316
132k
    return result;
1317
132k
}
_ZN2AK7pow_intITkNS_8Concepts13FloatingPointEfTkNS1_16UnsignedIntegralEmEET_S2_T0_
Line
Count
Source
1307
132k
{
1308
132k
    auto result = static_cast<F>(1);
1309
788k
    while (y > 0) {
1310
656k
        if (y % 2 == 1) {
1311
418k
            result *= x;
1312
418k
        }
1313
656k
        x = x * x;
1314
656k
        y >>= 1;
1315
656k
    }
1316
132k
    return result;
1317
132k
}
Unexecuted instantiation: _ZN2AK7pow_intITkNS_8Concepts13FloatingPointEdTkNS1_16UnsignedIntegralEmEET_S2_T0_
1318
1319
template<FloatingPoint T>
1320
constexpr T pow(T x, T y)
1321
17.0M
{
1322
17.0M
    CONSTEXPR_STATE(pow, x, y);
1323
17.0M
    if (__builtin_isnan(y))
1324
0
        return y;
1325
17.0M
    if (y == 0)
1326
12.6k
        return 1;
1327
17.0M
    if (x == 0)
1328
12.7M
        return 0;
1329
4.27M
    if (y == 1)
1330
3.29k
        return x;
1331
1332
    // Take an integer fast path as long as the value fits within a 64-bit integer.
1333
4.27M
    if (y >= static_cast<T>(NumericLimits<i64>::min()) && y < static_cast<T>(NumericLimits<i64>::max())) {
1334
4.27M
        i64 y_as_int = static_cast<i64>(y);
1335
4.27M
        if (y == static_cast<T>(y_as_int)) {
1336
132k
            T result = pow_int(x, static_cast<u64>(fabs<T>(y)));
1337
132k
            if (y < 0)
1338
103k
                result = static_cast<T>(1.0l) / result;
1339
132k
            return result;
1340
132k
        }
1341
4.27M
    }
1342
1343
    // FIXME: This formula suffers from error magnification.
1344
4.13M
    return exp2<T>(y * log2<T>(x));
1345
4.27M
}
_ZN2AK3powITkNS_8Concepts13FloatingPointEfEET_S2_S2_
Line
Count
Source
1321
17.0M
{
1322
17.0M
    CONSTEXPR_STATE(pow, x, y);
1323
17.0M
    if (__builtin_isnan(y))
1324
0
        return y;
1325
17.0M
    if (y == 0)
1326
12.6k
        return 1;
1327
17.0M
    if (x == 0)
1328
12.7M
        return 0;
1329
4.27M
    if (y == 1)
1330
3.29k
        return x;
1331
1332
    // Take an integer fast path as long as the value fits within a 64-bit integer.
1333
4.27M
    if (y >= static_cast<T>(NumericLimits<i64>::min()) && y < static_cast<T>(NumericLimits<i64>::max())) {
1334
4.27M
        i64 y_as_int = static_cast<i64>(y);
1335
4.27M
        if (y == static_cast<T>(y_as_int)) {
1336
132k
            T result = pow_int(x, static_cast<u64>(fabs<T>(y)));
1337
132k
            if (y < 0)
1338
103k
                result = static_cast<T>(1.0l) / result;
1339
132k
            return result;
1340
132k
        }
1341
4.27M
    }
1342
1343
    // FIXME: This formula suffers from error magnification.
1344
4.13M
    return exp2<T>(y * log2<T>(x));
1345
4.27M
}
Unexecuted instantiation: _ZN2AK3powITkNS_8Concepts13FloatingPointEdEET_S2_S2_
Unexecuted instantiation: _ZN2AK3powITkNS_8Concepts13FloatingPointEdEET_S2_S2_
1346
1347
template<Integral I, typename T>
1348
constexpr I clamp_to(T value)
1349
0
{
1350
0
    constexpr auto max = static_cast<T>(NumericLimits<I>::max());
1351
0
    if constexpr (max > 0) {
1352
0
        if (value >= static_cast<T>(NumericLimits<I>::max()))
1353
0
            return NumericLimits<I>::max();
1354
0
    }
1355
1356
0
    constexpr auto min = static_cast<T>(NumericLimits<I>::min());
1357
0
    if constexpr (min <= 0) {
1358
0
        if (value <= static_cast<T>(NumericLimits<I>::min()))
1359
0
            return NumericLimits<I>::min();
1360
0
    }
1361
1362
    if constexpr (IsFloatingPoint<T>)
1363
0
        return round_to<I>(value);
1364
1365
0
    return value;
1366
0
}
Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEldEET_T0_
Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralElmEET_T0_
Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEhdEET_T0_
Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEilEET_T0_
Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEifEET_T0_
Unexecuted instantiation: _ZN2AK8clamp_toITkNS_8Concepts8IntegralEidEET_T0_
1367
1368
// Wrap a to keep it in the range [-b, b].
1369
template<typename T>
1370
constexpr T wrap_to_range(T a, IdentityType<T> b)
1371
{
1372
    return fmod(fmod(a + b, 2 * b) + 2 * b, 2 * b) - b;
1373
}
1374
1375
#undef CALL_BUILTIN
1376
#undef CONSTEXPR_STATE
1377
#undef AARCH64_INSTRUCTION
1378
}
1379
1380
#if USING_AK_GLOBALLY
1381
using AK::round_to;
1382
#endif