/src/serenity/AK/SIMDMath.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org> |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #pragma once |
8 | | |
9 | | #include <AK/Math.h> |
10 | | #include <AK/SIMD.h> |
11 | | #include <AK/SIMDExtras.h> |
12 | | #include <math.h> |
13 | | |
14 | | namespace AK::SIMD { |
15 | | |
16 | | // Functions ending in "_int_range" only accept arguments within range [INT_MIN, INT_MAX]. |
17 | | // Other inputs will generate unexpected results. |
18 | | |
19 | | ALWAYS_INLINE static f32x4 truncate_int_range(f32x4 v) |
20 | 0 | { |
21 | 0 | return simd_cast<f32x4>(simd_cast<i32x4>(v)); |
22 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::truncate_int_range(float __vector(4)) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::truncate_int_range(float __vector(4)) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::truncate_int_range(float __vector(4)) Unexecuted instantiation: Font.cpp:AK::SIMD::truncate_int_range(float __vector(4)) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::truncate_int_range(float __vector(4)) Unexecuted instantiation: Filter.cpp:AK::SIMD::truncate_int_range(float __vector(4)) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::truncate_int_range(float __vector(4)) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::truncate_int_range(float __vector(4)) |
23 | | |
24 | | ALWAYS_INLINE static f32x4 floor_int_range(f32x4 v) |
25 | 0 | { |
26 | 0 | auto t = truncate_int_range(v); |
27 | 0 | return t > v ? t - 1.0f : t; |
28 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::floor_int_range(float __vector(4)) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::floor_int_range(float __vector(4)) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::floor_int_range(float __vector(4)) Unexecuted instantiation: Font.cpp:AK::SIMD::floor_int_range(float __vector(4)) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::floor_int_range(float __vector(4)) Unexecuted instantiation: Filter.cpp:AK::SIMD::floor_int_range(float __vector(4)) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::floor_int_range(float __vector(4)) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::floor_int_range(float __vector(4)) |
29 | | |
30 | | ALWAYS_INLINE static f32x4 ceil_int_range(f32x4 v) |
31 | 0 | { |
32 | 0 | auto t = truncate_int_range(v); |
33 | 0 | return t < v ? t + 1.0f : t; |
34 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::ceil_int_range(float __vector(4)) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::ceil_int_range(float __vector(4)) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::ceil_int_range(float __vector(4)) Unexecuted instantiation: Font.cpp:AK::SIMD::ceil_int_range(float __vector(4)) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::ceil_int_range(float __vector(4)) Unexecuted instantiation: Filter.cpp:AK::SIMD::ceil_int_range(float __vector(4)) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::ceil_int_range(float __vector(4)) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::ceil_int_range(float __vector(4)) |
35 | | |
36 | | ALWAYS_INLINE static f32x4 frac_int_range(f32x4 v) |
37 | 0 | { |
38 | 0 | return v - floor_int_range(v); |
39 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::frac_int_range(float __vector(4)) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::frac_int_range(float __vector(4)) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::frac_int_range(float __vector(4)) Unexecuted instantiation: Font.cpp:AK::SIMD::frac_int_range(float __vector(4)) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::frac_int_range(float __vector(4)) Unexecuted instantiation: Filter.cpp:AK::SIMD::frac_int_range(float __vector(4)) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::frac_int_range(float __vector(4)) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::frac_int_range(float __vector(4)) |
40 | | |
41 | | template<SIMDVector T> |
42 | | ALWAYS_INLINE T bitselect(T v1, T v2, T control_mask) |
43 | 0 | { |
44 | 0 | return (v1 & control_mask) | (v2 & ~control_mask); |
45 | 0 | } Unexecuted instantiation: _ZN2AK4SIMD9bitselectITkNS0_10SIMDVectorEDv4_sEET_S3_S3_S3_ Unexecuted instantiation: _ZN2AK4SIMD9bitselectITkNS0_10SIMDVectorEDv4_iEET_S3_S3_S3_ |
46 | | |
47 | | template<SIMDVector T> |
48 | | requires(IsIntegral<ElementOf<T>>) |
49 | | ALWAYS_INLINE T abs(T x) |
50 | 0 | { |
51 | 0 | return bitselect(x, -x, x > 0); |
52 | 0 | } Unexecuted instantiation: _ZN2AK4SIMD3absITkNS0_10SIMDVectorEDv4_sQ10IsIntegralINS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES5_S5_ Unexecuted instantiation: _ZN2AK4SIMD3absITkNS0_10SIMDVectorEDv4_iQ10IsIntegralINS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES5_S5_ |
53 | | |
54 | | ALWAYS_INLINE static f32x4 clamp(f32x4 v, f32x4 min, f32x4 max) |
55 | 0 | { |
56 | 0 | return v < min ? min : (v > max ? max : v); |
57 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::clamp(float __vector(4), float __vector(4), float __vector(4)) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::clamp(float __vector(4), float __vector(4), float __vector(4)) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::clamp(float __vector(4), float __vector(4), float __vector(4)) Unexecuted instantiation: Font.cpp:AK::SIMD::clamp(float __vector(4), float __vector(4), float __vector(4)) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::clamp(float __vector(4), float __vector(4), float __vector(4)) Unexecuted instantiation: Filter.cpp:AK::SIMD::clamp(float __vector(4), float __vector(4), float __vector(4)) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::clamp(float __vector(4), float __vector(4), float __vector(4)) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::clamp(float __vector(4), float __vector(4), float __vector(4)) |
58 | | |
59 | | template<SIMDVector V, typename U = ElementOf<V>> |
60 | | ALWAYS_INLINE static V clamp(V v, U min, U max) |
61 | | { |
62 | | return v < min ? min : (v > max ? max : v); |
63 | | } |
64 | | |
65 | | ALWAYS_INLINE static f32x4 exp(f32x4 v) |
66 | 0 | { |
67 | 0 | // FIXME: This should be replaced with a vectorized algorithm instead of calling the scalar expf 4 times |
68 | 0 | return f32x4 { |
69 | 0 | expf(v[0]), |
70 | 0 | expf(v[1]), |
71 | 0 | expf(v[2]), |
72 | 0 | expf(v[3]), |
73 | 0 | }; |
74 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::exp(float __vector(4)) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::exp(float __vector(4)) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::exp(float __vector(4)) Unexecuted instantiation: Font.cpp:AK::SIMD::exp(float __vector(4)) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::exp(float __vector(4)) Unexecuted instantiation: Filter.cpp:AK::SIMD::exp(float __vector(4)) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::exp(float __vector(4)) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::exp(float __vector(4)) |
75 | | |
76 | | ALWAYS_INLINE static f32x4 exp_approximate(f32x4 v) |
77 | 0 | { |
78 | 0 | static constexpr int number_of_iterations = 10; |
79 | 0 | auto result = 1.f + v / (1 << number_of_iterations); |
80 | 0 | for (int i = 0; i < number_of_iterations; ++i) |
81 | 0 | result *= result; |
82 | 0 | return result; |
83 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::exp_approximate(float __vector(4)) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::exp_approximate(float __vector(4)) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::exp_approximate(float __vector(4)) Unexecuted instantiation: Font.cpp:AK::SIMD::exp_approximate(float __vector(4)) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::exp_approximate(float __vector(4)) Unexecuted instantiation: Filter.cpp:AK::SIMD::exp_approximate(float __vector(4)) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::exp_approximate(float __vector(4)) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::exp_approximate(float __vector(4)) |
84 | | |
85 | | ALWAYS_INLINE static f32x4 sqrt(f32x4 v) |
86 | 0 | { |
87 | 0 | #if ARCH(X86_64) |
88 | 0 | return __builtin_ia32_sqrtps(v); |
89 | 0 | #else |
90 | 0 | return f32x4 { |
91 | 0 | AK::sqrt(v[0]), |
92 | 0 | AK::sqrt(v[1]), |
93 | 0 | AK::sqrt(v[2]), |
94 | 0 | AK::sqrt(v[3]), |
95 | 0 | }; |
96 | 0 | #endif |
97 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::sqrt(float __vector(4)) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::sqrt(float __vector(4)) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::sqrt(float __vector(4)) Unexecuted instantiation: Font.cpp:AK::SIMD::sqrt(float __vector(4)) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::sqrt(float __vector(4)) Unexecuted instantiation: Filter.cpp:AK::SIMD::sqrt(float __vector(4)) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::sqrt(float __vector(4)) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::sqrt(float __vector(4)) |
98 | | |
99 | | } |