Coverage Report

Created: 2025-12-10 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/Simd/src/Simd/SimdBaseSynetActivation.cpp
Line
Count
Source
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2024 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdArray.h"
25
#include "Simd/SimdExp.h"
26
#include "Simd/SimdErf.h"
27
#include "Simd/SimdSynet.h"
28
#include "Simd/SimdBFloat16.h"
29
30
namespace Simd
31
{
32
#if defined(SIMD_SYNET_ENABLE)
33
    namespace Base
34
    {
35
        void SynetElu32f(const float * src, size_t size, const float * alpha, float * dst)
36
0
        {
37
0
            float _alpha = alpha[0];
38
0
            size_t size4 = Simd::AlignLo(size, 4);
39
0
            size_t i = 0;
40
0
            for (; i < size4; i += 4)
41
0
            {
42
0
                dst[i + 0] = SynetElu32f(src[i + 0], _alpha);
43
0
                dst[i + 1] = SynetElu32f(src[i + 1], _alpha);
44
0
                dst[i + 2] = SynetElu32f(src[i + 2], _alpha);
45
0
                dst[i + 3] = SynetElu32f(src[i + 3], _alpha);
46
0
            }
47
0
            for (; i < size; ++i)
48
0
                dst[i] = SynetElu32f(src[i], _alpha);
49
0
        }
50
51
        //-------------------------------------------------------------------------------------------------
52
53
        void SynetGelu32f(const float* src, size_t size, float* dst)
54
0
        {
55
0
            size_t size4 = Simd::AlignLo(size, 4);
56
0
            size_t i = 0;
57
0
            for (; i < size4; i += 4)
58
0
            {
59
0
                dst[i + 0] = Gelu(src[i + 0]);
60
0
                dst[i + 1] = Gelu(src[i + 1]);
61
0
                dst[i + 2] = Gelu(src[i + 2]);
62
0
                dst[i + 3] = Gelu(src[i + 3]);
63
0
            }
64
0
            for (; i < size; ++i)
65
0
                dst[i] = Gelu(src[i]);
66
0
        }
67
68
        //-------------------------------------------------------------------------------------------------
69
70
        void SynetHardSigmoid32f(const float* src, size_t size, const float* scale, const float* shift, float* dst)
71
0
        {
72
0
            float _scale = scale[0];
73
0
            float _shift = shift[0];
74
0
            size_t size4 = Simd::AlignLo(size, 4);
75
0
            size_t i = 0;
76
0
            for (; i < size4; i += 4)
77
0
            {
78
0
                dst[i + 0] = SynetHardSigmoid32f(src[i + 0], _scale, _shift);
79
0
                dst[i + 1] = SynetHardSigmoid32f(src[i + 1], _scale, _shift);
80
0
                dst[i + 2] = SynetHardSigmoid32f(src[i + 2], _scale, _shift);
81
0
                dst[i + 3] = SynetHardSigmoid32f(src[i + 3], _scale, _shift);
82
0
            }
83
0
            for (; i < size; ++i)
84
0
                dst[i] = SynetHardSigmoid32f(src[i], _scale, _shift);
85
0
        }
86
87
        //-------------------------------------------------------------------------------------------------
88
89
        void SynetHswish32f(const float * src, size_t size, const float * shift, const float * scale, float * dst)
90
0
        {
91
0
            float _shift = shift[0];
92
0
            float _scale = scale[0];
93
0
            size_t size4 = Simd::AlignLo(size, 4);
94
0
            size_t i = 0;
95
0
            for (; i < size4; i += 4)
96
0
            {
97
0
                dst[i + 0] = SynetHswish32f(src[i + 0], _shift, _scale);
98
0
                dst[i + 1] = SynetHswish32f(src[i + 1], _shift, _scale);
99
0
                dst[i + 2] = SynetHswish32f(src[i + 2], _shift, _scale);
100
0
                dst[i + 3] = SynetHswish32f(src[i + 3], _shift, _scale);
101
0
            }
102
0
            for (; i < size; ++i)
103
0
                dst[i] = SynetHswish32f(src[i], _shift, _scale);
104
0
        }
105
106
        //-------------------------------------------------------------------------------------------------
107
108
        void SynetMish32f(const float* src, size_t size, const float* threshold, float* dst)
109
0
        {
110
0
            float _threshold = threshold[0];
111
0
            size_t size4 = Simd::AlignLo(size, 4);
112
0
            size_t i = 0;
113
0
            for (; i < size4; i += 4)
114
0
            {
115
0
                dst[i + 0] = SynetMish32f(src[i + 0], _threshold);
116
0
                dst[i + 1] = SynetMish32f(src[i + 1], _threshold);
117
0
                dst[i + 2] = SynetMish32f(src[i + 2], _threshold);
118
0
                dst[i + 3] = SynetMish32f(src[i + 3], _threshold);
119
0
            }
120
0
            for (; i < size; ++i)
121
0
                dst[i] = SynetMish32f(src[i], _threshold);
122
0
        }
123
124
        //-------------------------------------------------------------------------------------------------
125
126
        void SynetPreluLayerForwardNchw(const float* src, const float* slope, size_t channels, size_t spatial, float* dst)
127
0
        {
128
0
            size_t aligned = Simd::AlignLo(spatial, 4);
129
0
            for (size_t c = 0; c < channels; ++c)
130
0
            {
131
0
                float _slope = slope[c];
132
0
                size_t s = 0;
133
0
                for (; s < aligned; s += 4)
134
0
                {
135
0
                    dst[s + 0] = SynetRelu32f(src[s + 0], _slope);
136
0
                    dst[s + 1] = SynetRelu32f(src[s + 1], _slope);
137
0
                    dst[s + 2] = SynetRelu32f(src[s + 2], _slope);
138
0
                    dst[s + 3] = SynetRelu32f(src[s + 3], _slope);
139
0
                }
140
0
                for (; s < spatial; ++s)
141
0
                    dst[s] = SynetRelu32f(src[s], _slope);
142
0
                src += spatial;
143
0
                dst += spatial;
144
0
            }
145
0
        }
146
147
        void SynetPreluLayerForwardNhwc(const float* src, const float* slope, size_t channels, size_t spatial, float* dst)
148
0
        {
149
0
            size_t aligned = Simd::AlignLo(channels, 4);
150
0
            for (size_t s = 0; s < spatial; ++s)
151
0
            {
152
0
                size_t c = 0;
153
0
                for (; c < aligned; c += 4)
154
0
                {
155
0
                    dst[c + 0] = SynetRelu32f(src[c + 0], slope[c + 0]);
156
0
                    dst[c + 1] = SynetRelu32f(src[c + 1], slope[c + 1]);
157
0
                    dst[c + 2] = SynetRelu32f(src[c + 2], slope[c + 2]);
158
0
                    dst[c + 3] = SynetRelu32f(src[c + 3], slope[c + 3]);
159
0
                }
160
0
                for (; c < channels; ++c)
161
0
                    dst[c] = SynetRelu32f(src[c], slope[c]);
162
0
                src += channels;
163
0
                dst += channels;
164
165
0
            }
166
0
        }
167
168
        void SynetPreluLayerForward(const float* src, const float* slope, size_t channels, size_t spatial, float* dst, SimdTensorFormatType format)
169
0
        {
170
0
            if (Base::NchwCompatible(channels, spatial, format))
171
0
                SynetPreluLayerForwardNchw(src, slope, channels, spatial, dst);
172
0
            else if (Base::NhwcCompatible(channels, spatial, format))
173
0
                SynetPreluLayerForwardNhwc(src, slope, channels, spatial, dst);
174
0
            else
175
0
                assert(0);
176
0
        }
177
178
        //-------------------------------------------------------------------------------------------------
179
180
        void SynetRelu32f(const float* src, size_t size, const float* slope, float* dst)
181
0
        {
182
0
            float _slope = slope[0];
183
0
            size_t size4 = Simd::AlignLo(size, 4);
184
0
            size_t i = 0;
185
0
            for (; i < size4; i += 4)
186
0
            {
187
0
                dst[i + 0] = SynetRelu32f(src[i + 0], _slope);
188
0
                dst[i + 1] = SynetRelu32f(src[i + 1], _slope);
189
0
                dst[i + 2] = SynetRelu32f(src[i + 2], _slope);
190
0
                dst[i + 3] = SynetRelu32f(src[i + 3], _slope);
191
0
            }
192
0
            for (; i < size; ++i)
193
0
                dst[i] = SynetRelu32f(src[i], _slope);
194
0
        }
195
196
        //-------------------------------------------------------------------------------------------------
197
198
        void SynetRelu16b(const uint16_t* src, size_t size, const float* slope, uint16_t* dst)
199
0
        {
200
0
            float _slope = slope[0];
201
0
            size_t size4 = Simd::AlignLo(size, 4);
202
0
            size_t i = 0;
203
0
            for (; i < size4; i += 4)
204
0
            {
205
0
                dst[i + 0] = SynetRelu16b(src[i + 0], _slope);
206
0
                dst[i + 1] = SynetRelu16b(src[i + 1], _slope);
207
0
                dst[i + 2] = SynetRelu16b(src[i + 2], _slope);
208
0
                dst[i + 3] = SynetRelu16b(src[i + 3], _slope);
209
0
            }
210
0
            for (; i < size; ++i)
211
0
                dst[i] = SynetRelu16b(src[i], _slope);
212
0
        }
213
214
        //-------------------------------------------------------------------------------------------------
215
216
        void SynetRestrictRange32f(const float * src, size_t size, const float * lower, const float * upper, float * dst)
217
0
        {
218
0
            float min = *lower;
219
0
            float max = *upper;
220
0
            size_t size4 = Simd::AlignLo(size, 4);
221
0
            size_t i = 0;
222
0
            for (; i < size4; i += 4)
223
0
            {
224
0
                dst[i + 0] = Simd::RestrictRange(src[i + 0], min, max);
225
0
                dst[i + 1] = Simd::RestrictRange(src[i + 1], min, max);
226
0
                dst[i + 2] = Simd::RestrictRange(src[i + 2], min, max);
227
0
                dst[i + 3] = Simd::RestrictRange(src[i + 3], min, max);
228
0
            }
229
0
            for (; i < size; ++i)
230
0
                dst[i] = Simd::RestrictRange(src[i], min, max);
231
0
        }
232
233
        //-------------------------------------------------------------------------------------------------
234
235
        void SynetSigmoid32f(const float* src, size_t size, const float* slope, float* dst)
236
0
        {
237
0
            float _slope = slope[0];
238
0
            size_t size4 = Simd::AlignLo(size, 4);
239
0
            size_t i = 0;
240
0
            for (; i < size4; i += 4)
241
0
            {
242
0
                dst[i + 0] = SynetSigmoid32f(src[i + 0], _slope);
243
0
                dst[i + 1] = SynetSigmoid32f(src[i + 1], _slope);
244
0
                dst[i + 2] = SynetSigmoid32f(src[i + 2], _slope);
245
0
                dst[i + 3] = SynetSigmoid32f(src[i + 3], _slope);
246
0
            }
247
0
            for (; i < size; ++i)
248
0
                dst[i] = SynetSigmoid32f(src[i], _slope);
249
0
        }
250
251
        //-------------------------------------------------------------------------------------------------
252
253
        void SynetSwish32f(const float* src, size_t size, const float* slope, float* dst)
254
0
        {
255
0
            float _slope = slope[0];
256
0
            size_t size4 = Simd::AlignLo(size, 4);
257
0
            size_t i = 0;
258
0
            for (; i < size4; i += 4)
259
0
            {
260
0
                dst[i + 0] = SynetSwish32f(src[i + 0], _slope);
261
0
                dst[i + 1] = SynetSwish32f(src[i + 1], _slope);
262
0
                dst[i + 2] = SynetSwish32f(src[i + 2], _slope);
263
0
                dst[i + 3] = SynetSwish32f(src[i + 3], _slope);
264
0
            }
265
0
            for (; i < size; ++i)
266
0
                dst[i] = SynetSwish32f(src[i], _slope);
267
0
        }
268
269
        //-------------------------------------------------------------------------------------------------
270
271
        void SynetSoftplus32f(const float* src, size_t size, const float * beta, const float * threshold, float* dst)
272
0
        {
273
0
            float _beta = beta[0];
274
0
            float _threshold = threshold[0];
275
0
            size_t size4 = Simd::AlignLo(size, 4);
276
0
            size_t i = 0;
277
0
            for (; i < size4; i += 4)
278
0
            {
279
0
                dst[i + 0] = SynetSoftplus32f(src[i + 0], _beta, _threshold);
280
0
                dst[i + 1] = SynetSoftplus32f(src[i + 1], _beta, _threshold);
281
0
                dst[i + 2] = SynetSoftplus32f(src[i + 2], _beta, _threshold);
282
0
                dst[i + 3] = SynetSoftplus32f(src[i + 3], _beta, _threshold);
283
0
            }
284
0
            for (; i < size; ++i)
285
0
                dst[i] = SynetSoftplus32f(src[i], _beta, _threshold);
286
0
        }
287
288
        //-------------------------------------------------------------------------------------------------
289
290
        void SynetTanh32f(const float* src, size_t size, const float* slope, float* dst)
291
0
        {
292
0
            float _slope = slope[0];
293
0
            size_t size4 = Simd::AlignLo(size, 4);
294
0
            size_t i = 0;
295
0
            for (; i < size4; i += 4)
296
0
            {
297
0
                dst[i + 0] = SynetTanh32f(src[i + 0], _slope);
298
0
                dst[i + 1] = SynetTanh32f(src[i + 1], _slope);
299
0
                dst[i + 2] = SynetTanh32f(src[i + 2], _slope);
300
0
                dst[i + 3] = SynetTanh32f(src[i + 3], _slope);
301
0
            }
302
0
            for (; i < size; ++i)
303
0
                dst[i] = SynetTanh32f(src[i], _slope);
304
0
        }
305
    }
306
#endif
307
}