Coverage Report

Created: 2025-08-11 07:29

/src/Simd/src/Simd/SimdAvx2SynetUnaryOperation.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2024 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdSynet.h"
25
#include "Simd/SimdStore.h"
26
#include "Simd/SimdExtract.h"
27
#include "Simd/SimdBase.h"
28
#include "Simd/SimdSse41.h"
29
#include "Simd/SimdAvx2.h"
30
#include "Simd/SimdArray.h"
31
#include "Simd/SimdPow.h"
32
#include "Simd/SimdExp.h"
33
#include "Simd/SimdErf.h"
34
#include "Simd/SimdPerformance.h"
35
#include "Simd/SimdGather.h"
36
#include "Simd/SimdTrigonometric.h"
37
38
namespace Simd
39
{
40
#if defined(SIMD_AVX2_ENABLE) && defined(SIMD_SYNET_ENABLE)    
41
    namespace Avx2
42
    {
43
        template<SimdSynetUnaryOperation32fType type> __m256 SynetUnaryOperation32f(__m256 value);
44
45
        template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fAbs>(__m256 value)
46
0
        {
47
0
            return _mm256_andnot_ps(_mm256_set1_ps(-0.0f), value);
48
0
        }
49
50
        template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fCeil>(__m256 value)
51
0
        {
52
0
            return _mm256_ceil_ps(value);
53
0
        }
54
55
        template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fCos>(__m256 value)
56
0
        {
57
0
            return Cos(value);
58
0
        }
59
60
        template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fErf>(__m256 value)
61
0
        {
62
0
            return Erf(value);
63
0
        }
64
65
        template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fExp>(__m256 value)
66
0
        {
67
0
            return Exponent(value);
68
0
        }
69
70
        template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fFloor>(__m256 value)
71
0
        {
72
0
            return _mm256_floor_ps(value);
73
0
        }
74
75
        template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fLog>(__m256 value)
76
0
        {
77
0
            return Logarithm(value);
78
0
        }
79
80
        template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fNeg>(__m256 value)
81
0
        {
82
0
            return _mm256_sub_ps(_mm256_setzero_ps(), value);
83
0
        }
84
85
        template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fNot>(__m256 value)
86
0
        {
87
0
            return Not(value);
88
0
        }
89
90
        template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fRcp>(__m256 value)
91
0
        {
92
0
            return Rcp<false>(value);
93
0
        }
94
95
        template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fRsqrt>(__m256 value)
96
0
        {
97
0
            return _mm256_rsqrt_ps(value);
98
0
        }
99
100
        template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fSin>(__m256 value)
101
0
        {
102
0
            return Sin(value);
103
0
        }
104
105
        template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fSqrt>(__m256 value)
106
0
        {
107
0
            return _mm256_sqrt_ps(value);
108
0
        }
109
110
        template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fTanh>(__m256 value)
111
0
        {
112
0
            return Tanh(value);
113
0
        }
114
115
        template<> SIMD_INLINE __m256 SynetUnaryOperation32f<SimdSynetUnaryOperation32fZero>(__m256 value)
116
0
        {
117
0
            return _mm256_setzero_ps();
118
0
        }
119
120
        template<SimdSynetUnaryOperation32fType type, bool align> void SynetUnaryOperation32f(const float* src, size_t size, float* dst)
121
0
        {
122
0
            size_t sizeF = AlignLo(size, F);
123
0
            size_t sizeQF = AlignLo(size, QF);
124
0
            size_t i = 0;
125
0
            for (; i < sizeQF; i += QF)
126
0
            {
127
0
                Store<align>(dst + i + 0 * F, SynetUnaryOperation32f<type>(Load<align>(src + i + 0 * F)));
128
0
                Store<align>(dst + i + 1 * F, SynetUnaryOperation32f<type>(Load<align>(src + i + 1 * F)));
129
0
                Store<align>(dst + i + 2 * F, SynetUnaryOperation32f<type>(Load<align>(src + i + 2 * F)));
130
0
                Store<align>(dst + i + 3 * F, SynetUnaryOperation32f<type>(Load<align>(src + i + 3 * F)));
131
0
            }
132
0
            for (; i < sizeF; i += F)
133
0
                Store<align>(dst + i, SynetUnaryOperation32f<type>(Load<align>(src + i)));
134
0
            for (; i < size; ++i)
135
0
                dst[i] = Base::SynetUnaryOperation32f<type>(src[i]);
136
0
        }
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)0, true>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)1, true>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)2, true>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)4, true>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)3, true>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)5, true>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)6, true>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)7, true>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)8, true>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)9, true>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)10, true>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)11, true>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)12, true>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)13, true>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)14, true>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)0, false>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)1, false>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)2, false>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)4, false>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)3, false>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)5, false>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)6, false>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)7, false>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)8, false>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)9, false>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)10, false>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)11, false>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)12, false>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)13, false>(float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<(SimdSynetUnaryOperation32fType)14, false>(float const*, unsigned long, float*)
137
138
        template<bool align> void SynetUnaryOperation32f(const float* src, size_t size, SimdSynetUnaryOperation32fType type, float* dst)
139
0
        {
140
0
            switch (type)
141
0
            {
142
0
            case SimdSynetUnaryOperation32fAbs: SynetUnaryOperation32f<SimdSynetUnaryOperation32fAbs, align>(src, size, dst); break;
143
0
            case SimdSynetUnaryOperation32fCeil: SynetUnaryOperation32f<SimdSynetUnaryOperation32fCeil, align>(src, size, dst); break;
144
0
            case SimdSynetUnaryOperation32fCos: SynetUnaryOperation32f<SimdSynetUnaryOperation32fCos, align>(src, size, dst); break;
145
0
            case SimdSynetUnaryOperation32fExp: SynetUnaryOperation32f<SimdSynetUnaryOperation32fExp, align>(src, size, dst); break;
146
0
            case SimdSynetUnaryOperation32fErf: SynetUnaryOperation32f<SimdSynetUnaryOperation32fErf, align>(src, size, dst); break;
147
0
            case SimdSynetUnaryOperation32fFloor: SynetUnaryOperation32f<SimdSynetUnaryOperation32fFloor, align>(src, size, dst); break;
148
0
            case SimdSynetUnaryOperation32fLog: SynetUnaryOperation32f<SimdSynetUnaryOperation32fLog, align>(src, size, dst); break;
149
0
            case SimdSynetUnaryOperation32fNeg: SynetUnaryOperation32f<SimdSynetUnaryOperation32fNeg, align>(src, size, dst); break;
150
0
            case SimdSynetUnaryOperation32fNot: SynetUnaryOperation32f<SimdSynetUnaryOperation32fNot, align>(src, size, dst); break;
151
0
            case SimdSynetUnaryOperation32fRcp: SynetUnaryOperation32f<SimdSynetUnaryOperation32fRcp, align>(src, size, dst); break;
152
0
            case SimdSynetUnaryOperation32fRsqrt: SynetUnaryOperation32f<SimdSynetUnaryOperation32fRsqrt, align>(src, size, dst); break;
153
0
            case SimdSynetUnaryOperation32fSin: SynetUnaryOperation32f<SimdSynetUnaryOperation32fSin, align>(src, size, dst); break;
154
0
            case SimdSynetUnaryOperation32fSqrt: SynetUnaryOperation32f<SimdSynetUnaryOperation32fSqrt, align>(src, size, dst); break;
155
0
            case SimdSynetUnaryOperation32fTanh: SynetUnaryOperation32f<SimdSynetUnaryOperation32fTanh, align>(src, size, dst); break;
156
0
            case SimdSynetUnaryOperation32fZero: SynetUnaryOperation32f<SimdSynetUnaryOperation32fZero, align>(src, size, dst); break;
157
0
            default:
158
0
                Sse41::SynetUnaryOperation32f(src, size, type, dst);
159
0
            }
160
0
        }
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<true>(float const*, unsigned long, SimdSynetUnaryOperation32fType, float*)
Unexecuted instantiation: void Simd::Avx2::SynetUnaryOperation32f<false>(float const*, unsigned long, SimdSynetUnaryOperation32fType, float*)
161
162
        void SynetUnaryOperation32f(const float* src, size_t size, SimdSynetUnaryOperation32fType type, float* dst)
163
0
        {
164
0
            if (Aligned(src) && Aligned(dst))
165
0
                SynetUnaryOperation32f<true>(src, size, type, dst);
166
0
            else
167
0
                SynetUnaryOperation32f<false>(src, size, type, dst);
168
0
        }
169
    }
170
#endif// SIMD_AVX2_ENABLE
171
}