/src/Simd/src/Simd/SimdSse41SynetUnaryOperation.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2024 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #include "Simd/SimdMemory.h" |
25 | | #include "Simd/SimdArray.h" |
26 | | #include "Simd/SimdStore.h" |
27 | | #include "Simd/SimdExtract.h" |
28 | | #include "Simd/SimdSynet.h" |
29 | | #include "Simd/SimdBase.h" |
30 | | #include "Simd/SimdSse41.h" |
31 | | #include "Simd/SimdExp.h" |
32 | | #include "Simd/SimdGather.h" |
33 | | #include "Simd/SimdPow.h" |
34 | | #include "Simd/SimdErf.h" |
35 | | #include "Simd/SimdTrigonometric.h" |
36 | | |
37 | | namespace Simd |
38 | | { |
39 | | #if defined(SIMD_SSE41_ENABLE) && defined(SIMD_SYNET_ENABLE) |
40 | | namespace Sse41 |
41 | | { |
42 | | template<SimdSynetUnaryOperation32fType type> __m128 SynetUnaryOperation32f(__m128 value); |
43 | | |
44 | | template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fAbs>(__m128 value) |
45 | 0 | { |
46 | 0 | return Abs(value); |
47 | 0 | } |
48 | | |
49 | | template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fCeil>(__m128 value) |
50 | 0 | { |
51 | 0 | return _mm_ceil_ps(value); |
52 | 0 | } |
53 | | |
54 | | template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fCos>(__m128 value) |
55 | 0 | { |
56 | 0 | return Cos(value); |
57 | 0 | } |
58 | | |
59 | | template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fErf>(__m128 value) |
60 | 0 | { |
61 | 0 | return Erf(value); |
62 | 0 | } |
63 | | |
64 | | template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fExp>(__m128 value) |
65 | 0 | { |
66 | 0 | return Exponent(value); |
67 | 0 | } |
68 | | |
69 | | template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fFloor>(__m128 value) |
70 | 0 | { |
71 | 0 | return _mm_floor_ps(value); |
72 | 0 | } |
73 | | |
74 | | template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fLog>(__m128 value) |
75 | 0 | { |
76 | 0 | return Logarithm(value); |
77 | 0 | } |
78 | | |
79 | | template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fNeg>(__m128 value) |
80 | 0 | { |
81 | 0 | return Neg(value); |
82 | 0 | } |
83 | | |
84 | | template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fNot>(__m128 value) |
85 | 0 | { |
86 | 0 | return Not(value); |
87 | 0 | } |
88 | | |
89 | | template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fRcp>(__m128 value) |
90 | 0 | { |
91 | 0 | return Rcp<false>(value); |
92 | 0 | } |
93 | | |
94 | | template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fRsqrt>(__m128 value) |
95 | 0 | { |
96 | 0 | return _mm_rsqrt_ps(value); |
97 | 0 | } |
98 | | |
99 | | template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fSin>(__m128 value) |
100 | 0 | { |
101 | 0 | return Sin(value); |
102 | 0 | } |
103 | | |
104 | | template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fSqrt>(__m128 value) |
105 | 0 | { |
106 | 0 | return _mm_sqrt_ps(value); |
107 | 0 | } |
108 | | |
109 | | template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fTanh>(__m128 value) |
110 | 0 | { |
111 | 0 | return Tanh(value); |
112 | 0 | } |
113 | | |
114 | | template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fZero>(__m128 value) |
115 | 0 | { |
116 | 0 | return _mm_setzero_ps(); |
117 | 0 | } |
118 | | |
119 | | template<SimdSynetUnaryOperation32fType type, bool align> void SynetUnaryOperation32f(const float* src, size_t size, float* dst) |
120 | 0 | { |
121 | 0 | size_t sizeF = AlignLo(size, F); |
122 | 0 | size_t sizeQF = AlignLo(size, QF); |
123 | 0 | size_t i = 0; |
124 | 0 | for (; i < sizeQF; i += QF) |
125 | 0 | { |
126 | 0 | Store<align>(dst + i + 0 * F, SynetUnaryOperation32f<type>(Load<align>(src + i + 0 * F))); |
127 | 0 | Store<align>(dst + i + 1 * F, SynetUnaryOperation32f<type>(Load<align>(src + i + 1 * F))); |
128 | 0 | Store<align>(dst + i + 2 * F, SynetUnaryOperation32f<type>(Load<align>(src + i + 2 * F))); |
129 | 0 | Store<align>(dst + i + 3 * F, SynetUnaryOperation32f<type>(Load<align>(src + i + 3 * F))); |
130 | 0 | } |
131 | 0 | for (; i < sizeF; i += F) |
132 | 0 | Store<align>(dst + i, SynetUnaryOperation32f<type>(Load<align>(src + i))); |
133 | 0 | for (; i < size; ++i) |
134 | 0 | dst[i] = Base::SynetUnaryOperation32f<type>(src[i]); |
135 | 0 | } Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)0, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)1, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)2, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)4, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)3, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)5, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)6, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)7, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)8, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)9, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)10, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)11, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)12, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)13, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)14, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)0, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)1, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)2, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)4, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)3, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)5, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)6, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)7, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)8, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)9, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)10, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)11, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)12, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)13, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)14, false>(float const*, unsigned long, float*) |
136 | | |
137 | | template<bool align> void SynetUnaryOperation32f(const float* src, size_t size, SimdSynetUnaryOperation32fType type, float* dst) |
138 | 0 | { |
139 | 0 | switch (type) |
140 | 0 | { |
141 | 0 | case SimdSynetUnaryOperation32fAbs: SynetUnaryOperation32f<SimdSynetUnaryOperation32fAbs, align>(src, size, dst); break; |
142 | 0 | case SimdSynetUnaryOperation32fCeil: SynetUnaryOperation32f<SimdSynetUnaryOperation32fCeil, align>(src, size, dst); break; |
143 | 0 | case SimdSynetUnaryOperation32fCos: SynetUnaryOperation32f<SimdSynetUnaryOperation32fCos, align>(src, size, dst); break; |
144 | 0 | case SimdSynetUnaryOperation32fExp: SynetUnaryOperation32f<SimdSynetUnaryOperation32fExp, align>(src, size, dst); break; |
145 | 0 | case SimdSynetUnaryOperation32fErf: SynetUnaryOperation32f<SimdSynetUnaryOperation32fErf, align>(src, size, dst); break; |
146 | 0 | case SimdSynetUnaryOperation32fFloor: SynetUnaryOperation32f<SimdSynetUnaryOperation32fFloor, align>(src, size, dst); break; |
147 | 0 | case SimdSynetUnaryOperation32fLog: SynetUnaryOperation32f<SimdSynetUnaryOperation32fLog, align>(src, size, dst); break; |
148 | 0 | case SimdSynetUnaryOperation32fNeg: SynetUnaryOperation32f<SimdSynetUnaryOperation32fNeg, align>(src, size, dst); break; |
149 | 0 | case SimdSynetUnaryOperation32fNot: SynetUnaryOperation32f<SimdSynetUnaryOperation32fNot, align>(src, size, dst); break; |
150 | 0 | case SimdSynetUnaryOperation32fRcp: SynetUnaryOperation32f<SimdSynetUnaryOperation32fRcp, align>(src, size, dst); break; |
151 | 0 | case SimdSynetUnaryOperation32fRsqrt: SynetUnaryOperation32f<SimdSynetUnaryOperation32fRsqrt, align>(src, size, dst); break; |
152 | 0 | case SimdSynetUnaryOperation32fSin: SynetUnaryOperation32f<SimdSynetUnaryOperation32fSin, align>(src, size, dst); break; |
153 | 0 | case SimdSynetUnaryOperation32fSqrt: SynetUnaryOperation32f<SimdSynetUnaryOperation32fSqrt, align>(src, size, dst); break; |
154 | 0 | case SimdSynetUnaryOperation32fTanh: SynetUnaryOperation32f<SimdSynetUnaryOperation32fTanh, align>(src, size, dst); break; |
155 | 0 | case SimdSynetUnaryOperation32fZero: SynetUnaryOperation32f<SimdSynetUnaryOperation32fZero, align>(src, size, dst); break; |
156 | 0 | default: |
157 | 0 | Base::SynetUnaryOperation32f(src, size, type, dst); |
158 | 0 | } |
159 | 0 | } Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<true>(float const*, unsigned long, SimdSynetUnaryOperation32fType, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<false>(float const*, unsigned long, SimdSynetUnaryOperation32fType, float*) |
160 | | |
161 | | void SynetUnaryOperation32f(const float* src, size_t size, SimdSynetUnaryOperation32fType type, float* dst) |
162 | 0 | { |
163 | 0 | if (Aligned(src) && Aligned(dst)) |
164 | 0 | SynetUnaryOperation32f<true>(src, size, type, dst); |
165 | 0 | else |
166 | 0 | SynetUnaryOperation32f<false>(src, size, type, dst); |
167 | 0 | } |
168 | | } |
169 | | #endif |
170 | | } |