/src/Simd/src/Simd/SimdAvx2SynetUnaryOperation.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2024 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #include "Simd/SimdSynet.h" |
25 | | #include "Simd/SimdStore.h" |
26 | | #include "Simd/SimdExtract.h" |
27 | | #include "Simd/SimdBase.h" |
28 | | #include "Simd/SimdSse41.h" |
29 | | #include "Simd/SimdAvx2.h" |
30 | | #include "Simd/SimdArray.h" |
31 | | #include "Simd/SimdPow.h" |
32 | | #include "Simd/SimdExp.h" |
33 | | #include "Simd/SimdErf.h" |
34 | | #include "Simd/SimdPerformance.h" |
35 | | #include "Simd/SimdGather.h" |
36 | | #include "Simd/SimdTrigonometric.h" |
37 | | |
38 | | namespace Simd |
39 | | { |
40 | | #if defined(SIMD_AVX2_ENABLE) && defined(SIMD_SYNET_ENABLE) |
41 | | namespace Avx2 |
42 | | { |
43 | | template<SimdSynetUnaryOperation32fType type> __m256 SynetUnaryOperation32f(__m256 value); |
44 | | |
45 | | template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fAbs>(__m256 value) |
46 | 0 | { |
47 | 0 | return _mm256_andnot_ps(_mm256_set1_ps(-0.0f), value); |
48 | 0 | } |
49 | | |
50 | | template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fCeil>(__m256 value) |
51 | 0 | { |
52 | 0 | return _mm256_ceil_ps(value); |
53 | 0 | } |
54 | | |
55 | | template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fCos>(__m256 value) |
56 | 0 | { |
57 | 0 | return Cos(value); |
58 | 0 | } |
59 | | |
60 | | template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fErf>(__m256 value) |
61 | 0 | { |
62 | 0 | return Erf(value); |
63 | 0 | } |
64 | | |
65 | | template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fExp>(__m256 value) |
66 | 0 | { |
67 | 0 | return Exponent(value); |
68 | 0 | } |
69 | | |
70 | | template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fFloor>(__m256 value) |
71 | 0 | { |
72 | 0 | return _mm256_floor_ps(value); |
73 | 0 | } |
74 | | |
75 | | template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fLog>(__m256 value) |
76 | 0 | { |
77 | 0 | return Logarithm(value); |
78 | 0 | } |
79 | | |
80 | | template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fNeg>(__m256 value) |
81 | 0 | { |
82 | 0 | return _mm256_sub_ps(_mm256_setzero_ps(), value); |
83 | 0 | } |
84 | | |
85 | | template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fNot>(__m256 value) |
86 | 0 | { |
87 | 0 | return Not(value); |
88 | 0 | } |
89 | | |
90 | | template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fRcp>(__m256 value) |
91 | 0 | { |
92 | 0 | return Rcp<false>(value); |
93 | 0 | } |
94 | | |
95 | | template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fRsqrt>(__m256 value) |
96 | 0 | { |
97 | 0 | return _mm256_rsqrt_ps(value); |
98 | 0 | } |
99 | | |
100 | | template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fSin>(__m256 value) |
101 | 0 | { |
102 | 0 | return Sin(value); |
103 | 0 | } |
104 | | |
105 | | template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fSqrt>(__m256 value) |
106 | 0 | { |
107 | 0 | return _mm256_sqrt_ps(value); |
108 | 0 | } |
109 | | |
110 | | template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fTanh>(__m256 value) |
111 | 0 | { |
112 | 0 | return Tanh(value); |
113 | 0 | } |
114 | | |
115 | | template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fZero>(__m256 value) |
116 | 0 | { |
117 | 0 | return _mm256_setzero_ps(); |
118 | 0 | } |
119 | | |
120 | | template<SimdSynetUnaryOperation32fType type, bool align> void SynetUnaryOperation32f(const float* src, size_t size, float* dst) |
121 | 0 | { |
122 | 0 | size_t sizeF = AlignLo(size, F); |
123 | 0 | size_t sizeQF = AlignLo(size, QF); |
124 | 0 | size_t i = 0; |
125 | 0 | for (; i < sizeQF; i += QF) |
126 | 0 | { |
127 | 0 | Store<align>(dst + i + 0 * F, SynetUnaryOperation32f<type>(Load<align>(src + i + 0 * F))); |
128 | 0 | Store<align>(dst + i + 1 * F, SynetUnaryOperation32f<type>(Load<align>(src + i + 1 * F))); |
129 | 0 | Store<align>(dst + i + 2 * F, SynetUnaryOperation32f<type>(Load<align>(src + i + 2 * F))); |
130 | 0 | Store<align>(dst + i + 3 * F, SynetUnaryOperation32f<type>(Load<align>(src + i + 3 * F))); |
131 | 0 | } |
132 | 0 | for (; i < sizeF; i += F) |
133 | 0 | Store<align>(dst + i, SynetUnaryOperation32f<type>(Load<align>(src + i))); |
134 | 0 | for (; i < size; ++i) |
135 | 0 | dst[i] = Base::SynetUnaryOperation32f<type>(src[i]); |
136 | 0 | } Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)0, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)1, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)2, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)4, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)3, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)5, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)6, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)7, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)8, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)9, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)10, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)11, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)12, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)13, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)14, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)0, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)1, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)2, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)4, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)3, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)5, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)6, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)7, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)8, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)9, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)10, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)11, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)12, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)13, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)14, false>(float const*, unsigned long, float*) |
137 | | |
138 | | template<bool align> void SynetUnaryOperation32f(const float* src, size_t size, SimdSynetUnaryOperation32fType type, float* dst) |
139 | 0 | { |
140 | 0 | switch (type) |
141 | 0 | { |
142 | 0 | case SimdSynetUnaryOperation32fAbs: SynetUnaryOperation32f<SimdSynetUnaryOperation32fAbs, align>(src, size, dst); break; |
143 | 0 | case SimdSynetUnaryOperation32fCeil: SynetUnaryOperation32f<SimdSynetUnaryOperation32fCeil, align>(src, size, dst); break; |
144 | 0 | case SimdSynetUnaryOperation32fCos: SynetUnaryOperation32f<SimdSynetUnaryOperation32fCos, align>(src, size, dst); break; |
145 | 0 | case SimdSynetUnaryOperation32fExp: SynetUnaryOperation32f<SimdSynetUnaryOperation32fExp, align>(src, size, dst); break; |
146 | 0 | case SimdSynetUnaryOperation32fErf: SynetUnaryOperation32f<SimdSynetUnaryOperation32fErf, align>(src, size, dst); break; |
147 | 0 | case SimdSynetUnaryOperation32fFloor: SynetUnaryOperation32f<SimdSynetUnaryOperation32fFloor, align>(src, size, dst); break; |
148 | 0 | case SimdSynetUnaryOperation32fLog: SynetUnaryOperation32f<SimdSynetUnaryOperation32fLog, align>(src, size, dst); break; |
149 | 0 | case SimdSynetUnaryOperation32fNeg: SynetUnaryOperation32f<SimdSynetUnaryOperation32fNeg, align>(src, size, dst); break; |
150 | 0 | case SimdSynetUnaryOperation32fNot: SynetUnaryOperation32f<SimdSynetUnaryOperation32fNot, align>(src, size, dst); break; |
151 | 0 | case SimdSynetUnaryOperation32fRcp: SynetUnaryOperation32f<SimdSynetUnaryOperation32fRcp, align>(src, size, dst); break; |
152 | 0 | case SimdSynetUnaryOperation32fRsqrt: SynetUnaryOperation32f<SimdSynetUnaryOperation32fRsqrt, align>(src, size, dst); break; |
153 | 0 | case SimdSynetUnaryOperation32fSin: SynetUnaryOperation32f<SimdSynetUnaryOperation32fSin, align>(src, size, dst); break; |
154 | 0 | case SimdSynetUnaryOperation32fSqrt: SynetUnaryOperation32f<SimdSynetUnaryOperation32fSqrt, align>(src, size, dst); break; |
155 | 0 | case SimdSynetUnaryOperation32fTanh: SynetUnaryOperation32f<SimdSynetUnaryOperation32fTanh, align>(src, size, dst); break; |
156 | 0 | case SimdSynetUnaryOperation32fZero: SynetUnaryOperation32f<SimdSynetUnaryOperation32fZero, align>(src, size, dst); break; |
157 | 0 | default: |
158 | 0 | Sse41::SynetUnaryOperation32f(src, size, type, dst); |
159 | 0 | } |
160 | 0 | } Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<true>(float const*, unsigned long, SimdSynetUnaryOperation32fType, float*) Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<false>(float const*, unsigned long, SimdSynetUnaryOperation32fType, float*) |
161 | | |
162 | | void SynetUnaryOperation32f(const float* src, size_t size, SimdSynetUnaryOperation32fType type, float* dst) |
163 | 0 | { |
164 | 0 | if (Aligned(src) && Aligned(dst)) |
165 | 0 | SynetUnaryOperation32f<true>(src, size, type, dst); |
166 | 0 | else |
167 | 0 | SynetUnaryOperation32f<false>(src, size, type, dst); |
168 | 0 | } |
169 | | } |
170 | | #endif// SIMD_AVX2_ENABLE |
171 | | } |