/src/Simd/src/Simd/SimdSse41SynetUnaryOperation.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  | * Simd Library (http://ermig1979.github.io/Simd).  | 
3  |  | *  | 
4  |  | * Copyright (c) 2011-2024 Yermalayeu Ihar.  | 
5  |  | *  | 
6  |  | * Permission is hereby granted, free of charge, to any person obtaining a copy  | 
7  |  | * of this software and associated documentation files (the "Software"), to deal  | 
8  |  | * in the Software without restriction, including without limitation the rights  | 
9  |  | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell  | 
10  |  | * copies of the Software, and to permit persons to whom the Software is  | 
11  |  | * furnished to do so, subject to the following conditions:  | 
12  |  | *  | 
13  |  | * The above copyright notice and this permission notice shall be included in  | 
14  |  | * all copies or substantial portions of the Software.  | 
15  |  | *  | 
16  |  | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  | 
17  |  | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  | 
18  |  | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  | 
19  |  | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  | 
20  |  | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  | 
21  |  | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  | 
22  |  | * SOFTWARE.  | 
23  |  | */  | 
24  |  | #include "Simd/SimdMemory.h"  | 
25  |  | #include "Simd/SimdArray.h"  | 
26  |  | #include "Simd/SimdStore.h"  | 
27  |  | #include "Simd/SimdExtract.h"  | 
28  |  | #include "Simd/SimdSynet.h"  | 
29  |  | #include "Simd/SimdBase.h"  | 
30  |  | #include "Simd/SimdSse41.h"  | 
31  |  | #include "Simd/SimdExp.h"  | 
32  |  | #include "Simd/SimdGather.h"  | 
33  |  | #include "Simd/SimdPow.h"  | 
34  |  | #include "Simd/SimdErf.h"  | 
35  |  | #include "Simd/SimdTrigonometric.h"  | 
36  |  |  | 
37  |  | namespace Simd  | 
38  |  | { | 
39  |  | #if defined(SIMD_SSE41_ENABLE) && defined(SIMD_SYNET_ENABLE)     | 
40  |  |     namespace Sse41  | 
41  |  |     { | 
42  |  |          template<SimdSynetUnaryOperation32fType type> __m128 SynetUnaryOperation32f(__m128 value);  | 
43  |  |  | 
44  |  |         template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fAbs>(__m128 value)  | 
45  | 0  |         { | 
46  | 0  |             return Abs(value);  | 
47  | 0  |         }  | 
48  |  |  | 
49  |  |         template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fCeil>(__m128 value)  | 
50  | 0  |         { | 
51  | 0  |             return _mm_ceil_ps(value);  | 
52  | 0  |         }  | 
53  |  |  | 
54  |  |         template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fCos>(__m128 value)  | 
55  | 0  |         { | 
56  | 0  |             return Cos(value);  | 
57  | 0  |         }  | 
58  |  |  | 
59  |  |         template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fErf>(__m128 value)  | 
60  | 0  |         { | 
61  | 0  |             return Erf(value);  | 
62  | 0  |         }  | 
63  |  |  | 
64  |  |         template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fExp>(__m128 value)  | 
65  | 0  |         { | 
66  | 0  |             return Exponent(value);  | 
67  | 0  |         }  | 
68  |  |  | 
69  |  |         template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fFloor>(__m128 value)  | 
70  | 0  |         { | 
71  | 0  |             return _mm_floor_ps(value);  | 
72  | 0  |         }  | 
73  |  |  | 
74  |  |         template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fLog>(__m128 value)  | 
75  | 0  |         { | 
76  | 0  |             return Logarithm(value);  | 
77  | 0  |         }  | 
78  |  |  | 
79  |  |         template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fNeg>(__m128 value)  | 
80  | 0  |         { | 
81  | 0  |             return Neg(value);  | 
82  | 0  |         }  | 
83  |  |  | 
84  |  |         template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fNot>(__m128 value)  | 
85  | 0  |         { | 
86  | 0  |             return Not(value);  | 
87  | 0  |         }  | 
88  |  |  | 
89  |  |         template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fRcp>(__m128 value)  | 
90  | 0  |         { | 
91  | 0  |             return Rcp<false>(value);  | 
92  | 0  |         }  | 
93  |  |  | 
94  |  |         template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fRsqrt>(__m128 value)  | 
95  | 0  |         { | 
96  | 0  |             return _mm_rsqrt_ps(value);  | 
97  | 0  |         }  | 
98  |  |  | 
99  |  |         template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fSin>(__m128 value)  | 
100  | 0  |         { | 
101  | 0  |             return Sin(value);  | 
102  | 0  |         }  | 
103  |  |  | 
104  |  |         template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fSqrt>(__m128 value)  | 
105  | 0  |         { | 
106  | 0  |             return _mm_sqrt_ps(value);  | 
107  | 0  |         }  | 
108  |  |  | 
109  |  |         template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fTanh>(__m128 value)  | 
110  | 0  |         { | 
111  | 0  |             return Tanh(value);  | 
112  | 0  |         }  | 
113  |  |  | 
114  |  |         template<> SIMD_INLINE __m128 SynetUnaryOperation32f<SimdSynetUnaryOperation32fZero>(__m128 value)  | 
115  | 0  |         { | 
116  | 0  |             return _mm_setzero_ps();  | 
117  | 0  |         }  | 
118  |  |  | 
119  |  |         template<SimdSynetUnaryOperation32fType type, bool align> void SynetUnaryOperation32f(const float* src, size_t size, float* dst)  | 
120  | 0  |         { | 
121  | 0  |             size_t sizeF = AlignLo(size, F);  | 
122  | 0  |             size_t sizeQF = AlignLo(size, QF);  | 
123  | 0  |             size_t i = 0;  | 
124  | 0  |             for (; i < sizeQF; i += QF)  | 
125  | 0  |             { | 
126  | 0  |                 Store<align>(dst + i + 0 * F, SynetUnaryOperation32f<type>(Load<align>(src + i + 0 * F)));  | 
127  | 0  |                 Store<align>(dst + i + 1 * F, SynetUnaryOperation32f<type>(Load<align>(src + i + 1 * F)));  | 
128  | 0  |                 Store<align>(dst + i + 2 * F, SynetUnaryOperation32f<type>(Load<align>(src + i + 2 * F)));  | 
129  | 0  |                 Store<align>(dst + i + 3 * F, SynetUnaryOperation32f<type>(Load<align>(src + i + 3 * F)));  | 
130  | 0  |             }  | 
131  | 0  |             for (; i < sizeF; i += F)  | 
132  | 0  |                 Store<align>(dst + i, SynetUnaryOperation32f<type>(Load<align>(src + i)));  | 
133  | 0  |             for (; i < size; ++i)  | 
134  | 0  |                 dst[i] = Base::SynetUnaryOperation32f<type>(src[i]);  | 
135  | 0  |         } Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)0, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)1, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)2, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)4, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)3, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)5, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)6, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)7, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)8, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)9, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)10, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)11, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)12, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)13, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)14, true>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)0, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)1, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)2, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)4, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)3, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)5, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)6, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)7, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)8, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)9, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)10, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)11, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)12, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)13, false>(float const*, unsigned long, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)14, false>(float const*, unsigned long, float*)  | 
136  |  |  | 
137  |  |         template<bool align> void SynetUnaryOperation32f(const float* src, size_t size, SimdSynetUnaryOperation32fType type, float* dst)  | 
138  | 0  |         { | 
139  | 0  |             switch (type)  | 
140  | 0  |             { | 
141  | 0  |             case SimdSynetUnaryOperation32fAbs: SynetUnaryOperation32f<SimdSynetUnaryOperation32fAbs, align>(src, size, dst); break;  | 
142  | 0  |             case SimdSynetUnaryOperation32fCeil: SynetUnaryOperation32f<SimdSynetUnaryOperation32fCeil, align>(src, size, dst); break;  | 
143  | 0  |             case SimdSynetUnaryOperation32fCos: SynetUnaryOperation32f<SimdSynetUnaryOperation32fCos, align>(src, size, dst); break;  | 
144  | 0  |             case SimdSynetUnaryOperation32fExp: SynetUnaryOperation32f<SimdSynetUnaryOperation32fExp, align>(src, size, dst); break;  | 
145  | 0  |             case SimdSynetUnaryOperation32fErf: SynetUnaryOperation32f<SimdSynetUnaryOperation32fErf, align>(src, size, dst); break;  | 
146  | 0  |             case SimdSynetUnaryOperation32fFloor: SynetUnaryOperation32f<SimdSynetUnaryOperation32fFloor, align>(src, size, dst); break;  | 
147  | 0  |             case SimdSynetUnaryOperation32fLog: SynetUnaryOperation32f<SimdSynetUnaryOperation32fLog, align>(src, size, dst); break;  | 
148  | 0  |             case SimdSynetUnaryOperation32fNeg: SynetUnaryOperation32f<SimdSynetUnaryOperation32fNeg, align>(src, size, dst); break;  | 
149  | 0  |             case SimdSynetUnaryOperation32fNot: SynetUnaryOperation32f<SimdSynetUnaryOperation32fNot, align>(src, size, dst); break;  | 
150  | 0  |             case SimdSynetUnaryOperation32fRcp: SynetUnaryOperation32f<SimdSynetUnaryOperation32fRcp, align>(src, size, dst); break;  | 
151  | 0  |             case SimdSynetUnaryOperation32fRsqrt: SynetUnaryOperation32f<SimdSynetUnaryOperation32fRsqrt, align>(src, size, dst); break;  | 
152  | 0  |             case SimdSynetUnaryOperation32fSin: SynetUnaryOperation32f<SimdSynetUnaryOperation32fSin, align>(src, size, dst); break;  | 
153  | 0  |             case SimdSynetUnaryOperation32fSqrt: SynetUnaryOperation32f<SimdSynetUnaryOperation32fSqrt, align>(src, size, dst); break;  | 
154  | 0  |             case SimdSynetUnaryOperation32fTanh: SynetUnaryOperation32f<SimdSynetUnaryOperation32fTanh, align>(src, size, dst); break;  | 
155  | 0  |             case SimdSynetUnaryOperation32fZero: SynetUnaryOperation32f<SimdSynetUnaryOperation32fZero, align>(src, size, dst); break;  | 
156  | 0  |             default:  | 
157  | 0  |                 Base::SynetUnaryOperation32f(src, size, type, dst);  | 
158  | 0  |             }  | 
159  | 0  |         } Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<true>(float const*, unsigned long, SimdSynetUnaryOperation32fType, float*) Unexecuted instantiation: void Simd::Sse41::SynetUnaryOperation32f<false>(float const*, unsigned long, SimdSynetUnaryOperation32fType, float*)  | 
160  |  |  | 
161  |  |         void SynetUnaryOperation32f(const float* src, size_t size, SimdSynetUnaryOperation32fType type, float* dst)  | 
162  | 0  |         { | 
163  | 0  |             if (Aligned(src) && Aligned(dst))  | 
164  | 0  |                 SynetUnaryOperation32f<true>(src, size, type, dst);  | 
165  | 0  |             else  | 
166  | 0  |                 SynetUnaryOperation32f<false>(src, size, type, dst);  | 
167  | 0  |         }  | 
168  |  |    }  | 
169  |  | #endif  | 
170  |  | }  |