/src/Simd/src/Simd/SimdBaseSynetActivation.cpp

Source
/*
* Simd Library (http://ermig1979.github.io/Simd).
*
* Copyright (c) 2011-2024 Yermalayeu Ihar.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "Simd/SimdArray.h"
#include "Simd/SimdExp.h"
#include "Simd/SimdErf.h"
#include "Simd/SimdSynet.h"
#include "Simd/SimdBFloat16.h"

namespace Simd
{
#if defined(SIMD_SYNET_ENABLE)
    namespace Base
    {
        void SynetElu32f(const float * src, size_t size, const float * alpha, float * dst)
        {
            float _alpha = alpha[0];
            size_t size4 = Simd::AlignLo(size, 4);
            size_t i = 0;
            for (; i < size4; i += 4)
            {
                dst[i + 0] = SynetElu32f(src[i + 0], _alpha);
                dst[i + 1] = SynetElu32f(src[i + 1], _alpha);
                dst[i + 2] = SynetElu32f(src[i + 2], _alpha);
                dst[i + 3] = SynetElu32f(src[i + 3], _alpha);
            }
            for (; i < size; ++i)
                dst[i] = SynetElu32f(src[i], _alpha);
        }

        //-------------------------------------------------------------------------------------------------

        void SynetGelu32f(const float* src, size_t size, float* dst)
        {
            size_t size4 = Simd::AlignLo(size, 4);
            size_t i = 0;
            for (; i < size4; i += 4)
            {
                dst[i + 0] = Gelu(src[i + 0]);
                dst[i + 1] = Gelu(src[i + 1]);
                dst[i + 2] = Gelu(src[i + 2]);
                dst[i + 3] = Gelu(src[i + 3]);
            }
            for (; i < size; ++i)
                dst[i] = Gelu(src[i]);
        }

        //-------------------------------------------------------------------------------------------------

        void SynetHardSigmoid32f(const float* src, size_t size, const float* scale, const float* shift, float* dst)
        {
            float _scale = scale[0];
            float _shift = shift[0];
            size_t size4 = Simd::AlignLo(size, 4);
            size_t i = 0;
            for (; i < size4; i += 4)
            {
                dst[i + 0] = SynetHardSigmoid32f(src[i + 0], _scale, _shift);
                dst[i + 1] = SynetHardSigmoid32f(src[i + 1], _scale, _shift);
                dst[i + 2] = SynetHardSigmoid32f(src[i + 2], _scale, _shift);
                dst[i + 3] = SynetHardSigmoid32f(src[i + 3], _scale, _shift);
            }
            for (; i < size; ++i)
                dst[i] = SynetHardSigmoid32f(src[i], _scale, _shift);
        }

        //-------------------------------------------------------------------------------------------------

        void SynetHswish32f(const float * src, size_t size, const float * shift, const float * scale, float * dst)
        {
            float _shift = shift[0];
            float _scale = scale[0];
            size_t size4 = Simd::AlignLo(size, 4);
            size_t i = 0;
            for (; i < size4; i += 4)
            {
                dst[i + 0] = SynetHswish32f(src[i + 0], _shift, _scale);
                dst[i + 1] = SynetHswish32f(src[i + 1], _shift, _scale);
                dst[i + 2] = SynetHswish32f(src[i + 2], _shift, _scale);
                dst[i + 3] = SynetHswish32f(src[i + 3], _shift, _scale);
            }
            for (; i < size; ++i)
                dst[i] = SynetHswish32f(src[i], _shift, _scale);
        }

        //-------------------------------------------------------------------------------------------------

        void SynetMish32f(const float* src, size_t size, const float* threshold, float* dst)
        {
            float _threshold = threshold[0];
            size_t size4 = Simd::AlignLo(size, 4);
            size_t i = 0;
            for (; i < size4; i += 4)
            {
                dst[i + 0] = SynetMish32f(src[i + 0], _threshold);
                dst[i + 1] = SynetMish32f(src[i + 1], _threshold);
                dst[i + 2] = SynetMish32f(src[i + 2], _threshold);
                dst[i + 3] = SynetMish32f(src[i + 3], _threshold);
            }
            for (; i < size; ++i)
                dst[i] = SynetMish32f(src[i], _threshold);
        }

        //-------------------------------------------------------------------------------------------------

        void SynetPreluLayerForwardNchw(const float* src, const float* slope, size_t channels, size_t spatial, float* dst)
        {
            size_t aligned = Simd::AlignLo(spatial, 4);
            for (size_t c = 0; c < channels; ++c)
            {
                float _slope = slope[c];
                size_t s = 0;
                for (; s < aligned; s += 4)
                {
                    dst[s + 0] = SynetRelu32f(src[s + 0], _slope);
                    dst[s + 1] = SynetRelu32f(src[s + 1], _slope);
                    dst[s + 2] = SynetRelu32f(src[s + 2], _slope);
                    dst[s + 3] = SynetRelu32f(src[s + 3], _slope);
                }
                for (; s < spatial; ++s)
                    dst[s] = SynetRelu32f(src[s], _slope);
                src += spatial;
                dst += spatial;
            }
        }

        void SynetPreluLayerForwardNhwc(const float* src, const float* slope, size_t channels, size_t spatial, float* dst)
        {
            size_t aligned = Simd::AlignLo(channels, 4);
            for (size_t s = 0; s < spatial; ++s)
            {
                size_t c = 0;
                for (; c < aligned; c += 4)
                {
                    dst[c + 0] = SynetRelu32f(src[c + 0], slope[c + 0]);
                    dst[c + 1] = SynetRelu32f(src[c + 1], slope[c + 1]);
                    dst[c + 2] = SynetRelu32f(src[c + 2], slope[c + 2]);
                    dst[c + 3] = SynetRelu32f(src[c + 3], slope[c + 3]);
                }
                for (; c < channels; ++c)
                    dst[c] = SynetRelu32f(src[c], slope[c]);
                src += channels;
                dst += channels;

            }
        }

        void SynetPreluLayerForward(const float* src, const float* slope, size_t channels, size_t spatial, float* dst, SimdTensorFormatType format)
        {
            if (Base::NchwCompatible(channels, spatial, format))
                SynetPreluLayerForwardNchw(src, slope, channels, spatial, dst);
            else if (Base::NhwcCompatible(channels, spatial, format))
                SynetPreluLayerForwardNhwc(src, slope, channels, spatial, dst);
            else
                assert(0);
        }

        //-------------------------------------------------------------------------------------------------

        void SynetRelu32f(const float* src, size_t size, const float* slope, float* dst)
        {
            float _slope = slope[0];
            size_t size4 = Simd::AlignLo(size, 4);
            size_t i = 0;
            for (; i < size4; i += 4)
            {
                dst[i + 0] = SynetRelu32f(src[i + 0], _slope);
                dst[i + 1] = SynetRelu32f(src[i + 1], _slope);
                dst[i + 2] = SynetRelu32f(src[i + 2], _slope);
                dst[i + 3] = SynetRelu32f(src[i + 3], _slope);
            }
            for (; i < size; ++i)
                dst[i] = SynetRelu32f(src[i], _slope);
        }

        //-------------------------------------------------------------------------------------------------

        void SynetRelu16b(const uint16_t* src, size_t size, const float* slope, uint16_t* dst)
        {
            float _slope = slope[0];
            size_t size4 = Simd::AlignLo(size, 4);
            size_t i = 0;
            for (; i < size4; i += 4)
            {
                dst[i + 0] = SynetRelu16b(src[i + 0], _slope);
                dst[i + 1] = SynetRelu16b(src[i + 1], _slope);
                dst[i + 2] = SynetRelu16b(src[i + 2], _slope);
                dst[i + 3] = SynetRelu16b(src[i + 3], _slope);
            }
            for (; i < size; ++i)
                dst[i] = SynetRelu16b(src[i], _slope);
        }

        //-------------------------------------------------------------------------------------------------

        void SynetRestrictRange32f(const float * src, size_t size, const float * lower, const float * upper, float * dst)
        {
            float min = *lower;
            float max = *upper;
            size_t size4 = Simd::AlignLo(size, 4);
            size_t i = 0;
            for (; i < size4; i += 4)
            {
                dst[i + 0] = Simd::RestrictRange(src[i + 0], min, max);
                dst[i + 1] = Simd::RestrictRange(src[i + 1], min, max);
                dst[i + 2] = Simd::RestrictRange(src[i + 2], min, max);
                dst[i + 3] = Simd::RestrictRange(src[i + 3], min, max);
            }
            for (; i < size; ++i)
                dst[i] = Simd::RestrictRange(src[i], min, max);
        }

        //-------------------------------------------------------------------------------------------------

        void SynetSigmoid32f(const float* src, size_t size, const float* slope, float* dst)
        {
            float _slope = slope[0];
            size_t size4 = Simd::AlignLo(size, 4);
            size_t i = 0;
            for (; i < size4; i += 4)
            {
                dst[i + 0] = SynetSigmoid32f(src[i + 0], _slope);
                dst[i + 1] = SynetSigmoid32f(src[i + 1], _slope);
                dst[i + 2] = SynetSigmoid32f(src[i + 2], _slope);
                dst[i + 3] = SynetSigmoid32f(src[i + 3], _slope);
            }
            for (; i < size; ++i)
                dst[i] = SynetSigmoid32f(src[i], _slope);
        }

        //-------------------------------------------------------------------------------------------------

        void SynetSwish32f(const float* src, size_t size, const float* slope, float* dst)
        {
            float _slope = slope[0];
            size_t size4 = Simd::AlignLo(size, 4);
            size_t i = 0;
            for (; i < size4; i += 4)
            {
                dst[i + 0] = SynetSwish32f(src[i + 0], _slope);
                dst[i + 1] = SynetSwish32f(src[i + 1], _slope);
                dst[i + 2] = SynetSwish32f(src[i + 2], _slope);
                dst[i + 3] = SynetSwish32f(src[i + 3], _slope);
            }
            for (; i < size; ++i)
                dst[i] = SynetSwish32f(src[i], _slope);
        }

        //-------------------------------------------------------------------------------------------------

        void SynetSoftplus32f(const float* src, size_t size, const float * beta, const float * threshold, float* dst)
        {
            float _beta = beta[0];
            float _threshold = threshold[0];
            size_t size4 = Simd::AlignLo(size, 4);
            size_t i = 0;
            for (; i < size4; i += 4)
            {
                dst[i + 0] = SynetSoftplus32f(src[i + 0], _beta, _threshold);
                dst[i + 1] = SynetSoftplus32f(src[i + 1], _beta, _threshold);
                dst[i + 2] = SynetSoftplus32f(src[i + 2], _beta, _threshold);
                dst[i + 3] = SynetSoftplus32f(src[i + 3], _beta, _threshold);
            }
            for (; i < size; ++i)
                dst[i] = SynetSoftplus32f(src[i], _beta, _threshold);
        }

        //-------------------------------------------------------------------------------------------------

        void SynetTanh32f(const float* src, size_t size, const float* slope, float* dst)
        {
            float _slope = slope[0];
            size_t size4 = Simd::AlignLo(size, 4);
            size_t i = 0;
            for (; i < size4; i += 4)
            {
                dst[i + 0] = SynetTanh32f(src[i + 0], _slope);
                dst[i + 1] = SynetTanh32f(src[i + 1], _slope);
                dst[i + 2] = SynetTanh32f(src[i + 2], _slope);
                dst[i + 3] = SynetTanh32f(src[i + 3], _slope);
            }
            for (; i < size; ++i)
                dst[i] = SynetTanh32f(src[i], _slope);
        }
    }
#endif
}

Coverage Report

Created: 2025-12-10 07:04

Line	Count	Source
1		/*
2		* Simd Library (http://ermig1979.github.io/Simd).
3		*
4		* Copyright (c) 2011-2024 Yermalayeu Ihar.
5		*
6		* Permission is hereby granted, free of charge, to any person obtaining a copy
7		* of this software and associated documentation files (the "Software"), to deal
8		* in the Software without restriction, including without limitation the rights
9		* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10		* copies of the Software, and to permit persons to whom the Software is
11		* furnished to do so, subject to the following conditions:
12		*
13		* The above copyright notice and this permission notice shall be included in
14		* all copies or substantial portions of the Software.
15		*
16		* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17		* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18		* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19		* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20		* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21		* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22		* SOFTWARE.
23		*/
24		#include "Simd/SimdArray.h"
25		#include "Simd/SimdExp.h"
26		#include "Simd/SimdErf.h"
27		#include "Simd/SimdSynet.h"
28		#include "Simd/SimdBFloat16.h"
29
30		namespace Simd
31		{
32		#if defined(SIMD_SYNET_ENABLE)
33		namespace Base
34		{
35		void SynetElu32f(const float * src, size_t size, const float * alpha, float * dst)
36	0	{
37	0	float _alpha = alpha[0];
38	0	size_t size4 = Simd::AlignLo(size, 4);
39	0	size_t i = 0;
40	0	for (; i < size4; i += 4)
41	0	{
42	0	dst[i + 0] = SynetElu32f(src[i + 0], _alpha);
43	0	dst[i + 1] = SynetElu32f(src[i + 1], _alpha);
44	0	dst[i + 2] = SynetElu32f(src[i + 2], _alpha);
45	0	dst[i + 3] = SynetElu32f(src[i + 3], _alpha);
46	0	}
47	0	for (; i < size; ++i)
48	0	dst[i] = SynetElu32f(src[i], _alpha);
49	0	}
50
51		//-------------------------------------------------------------------------------------------------
52
53		void SynetGelu32f(const float* src, size_t size, float* dst)
54	0	{
55	0	size_t size4 = Simd::AlignLo(size, 4);
56	0	size_t i = 0;
57	0	for (; i < size4; i += 4)
58	0	{
59	0	dst[i + 0] = Gelu(src[i + 0]);
60	0	dst[i + 1] = Gelu(src[i + 1]);
61	0	dst[i + 2] = Gelu(src[i + 2]);
62	0	dst[i + 3] = Gelu(src[i + 3]);
63	0	}
64	0	for (; i < size; ++i)
65	0	dst[i] = Gelu(src[i]);
66	0	}
67
68		//-------------------------------------------------------------------------------------------------
69
70		void SynetHardSigmoid32f(const float* src, size_t size, const float* scale, const float* shift, float* dst)
71	0	{
72	0	float _scale = scale[0];
73	0	float _shift = shift[0];
74	0	size_t size4 = Simd::AlignLo(size, 4);
75	0	size_t i = 0;
76	0	for (; i < size4; i += 4)
77	0	{
78	0	dst[i + 0] = SynetHardSigmoid32f(src[i + 0], _scale, _shift);
79	0	dst[i + 1] = SynetHardSigmoid32f(src[i + 1], _scale, _shift);
80	0	dst[i + 2] = SynetHardSigmoid32f(src[i + 2], _scale, _shift);
81	0	dst[i + 3] = SynetHardSigmoid32f(src[i + 3], _scale, _shift);
82	0	}
83	0	for (; i < size; ++i)
84	0	dst[i] = SynetHardSigmoid32f(src[i], _scale, _shift);
85	0	}
86
87		//-------------------------------------------------------------------------------------------------
88
89		void SynetHswish32f(const float * src, size_t size, const float * shift, const float * scale, float * dst)
90	0	{
91	0	float _shift = shift[0];
92	0	float _scale = scale[0];
93	0	size_t size4 = Simd::AlignLo(size, 4);
94	0	size_t i = 0;
95	0	for (; i < size4; i += 4)
96	0	{
97	0	dst[i + 0] = SynetHswish32f(src[i + 0], _shift, _scale);
98	0	dst[i + 1] = SynetHswish32f(src[i + 1], _shift, _scale);
99	0	dst[i + 2] = SynetHswish32f(src[i + 2], _shift, _scale);
100	0	dst[i + 3] = SynetHswish32f(src[i + 3], _shift, _scale);
101	0	}
102	0	for (; i < size; ++i)
103	0	dst[i] = SynetHswish32f(src[i], _shift, _scale);
104	0	}
105
106		//-------------------------------------------------------------------------------------------------
107
108		void SynetMish32f(const float* src, size_t size, const float* threshold, float* dst)
109	0	{
110	0	float _threshold = threshold[0];
111	0	size_t size4 = Simd::AlignLo(size, 4);
112	0	size_t i = 0;
113	0	for (; i < size4; i += 4)
114	0	{
115	0	dst[i + 0] = SynetMish32f(src[i + 0], _threshold);
116	0	dst[i + 1] = SynetMish32f(src[i + 1], _threshold);
117	0	dst[i + 2] = SynetMish32f(src[i + 2], _threshold);
118	0	dst[i + 3] = SynetMish32f(src[i + 3], _threshold);
119	0	}
120	0	for (; i < size; ++i)
121	0	dst[i] = SynetMish32f(src[i], _threshold);
122	0	}
123
124		//-------------------------------------------------------------------------------------------------
125
126		void SynetPreluLayerForwardNchw(const float* src, const float* slope, size_t channels, size_t spatial, float* dst)
127	0	{
128	0	size_t aligned = Simd::AlignLo(spatial, 4);
129	0	for (size_t c = 0; c < channels; ++c)
130	0	{
131	0	float _slope = slope[c];
132	0	size_t s = 0;
133	0	for (; s < aligned; s += 4)
134	0	{
135	0	dst[s + 0] = SynetRelu32f(src[s + 0], _slope);
136	0	dst[s + 1] = SynetRelu32f(src[s + 1], _slope);
137	0	dst[s + 2] = SynetRelu32f(src[s + 2], _slope);
138	0	dst[s + 3] = SynetRelu32f(src[s + 3], _slope);
139	0	}
140	0	for (; s < spatial; ++s)
141	0	dst[s] = SynetRelu32f(src[s], _slope);
142	0	src += spatial;
143	0	dst += spatial;
144	0	}
145	0	}
146
147		void SynetPreluLayerForwardNhwc(const float* src, const float* slope, size_t channels, size_t spatial, float* dst)
148	0	{
149	0	size_t aligned = Simd::AlignLo(channels, 4);
150	0	for (size_t s = 0; s < spatial; ++s)
151	0	{
152	0	size_t c = 0;
153	0	for (; c < aligned; c += 4)
154	0	{
155	0	dst[c + 0] = SynetRelu32f(src[c + 0], slope[c + 0]);
156	0	dst[c + 1] = SynetRelu32f(src[c + 1], slope[c + 1]);
157	0	dst[c + 2] = SynetRelu32f(src[c + 2], slope[c + 2]);
158	0	dst[c + 3] = SynetRelu32f(src[c + 3], slope[c + 3]);
159	0	}
160	0	for (; c < channels; ++c)
161	0	dst[c] = SynetRelu32f(src[c], slope[c]);
162	0	src += channels;
163	0	dst += channels;
164
165	0	}
166	0	}
167
168		void SynetPreluLayerForward(const float* src, const float* slope, size_t channels, size_t spatial, float* dst, SimdTensorFormatType format)
169	0	{
170	0	if (Base::NchwCompatible(channels, spatial, format))
171	0	SynetPreluLayerForwardNchw(src, slope, channels, spatial, dst);
172	0	else if (Base::NhwcCompatible(channels, spatial, format))
173	0	SynetPreluLayerForwardNhwc(src, slope, channels, spatial, dst);
174	0	else
175	0	assert(0);
176	0	}
177
178		//-------------------------------------------------------------------------------------------------
179
180		void SynetRelu32f(const float* src, size_t size, const float* slope, float* dst)
181	0	{
182	0	float _slope = slope[0];
183	0	size_t size4 = Simd::AlignLo(size, 4);
184	0	size_t i = 0;
185	0	for (; i < size4; i += 4)
186	0	{
187	0	dst[i + 0] = SynetRelu32f(src[i + 0], _slope);
188	0	dst[i + 1] = SynetRelu32f(src[i + 1], _slope);
189	0	dst[i + 2] = SynetRelu32f(src[i + 2], _slope);
190	0	dst[i + 3] = SynetRelu32f(src[i + 3], _slope);
191	0	}
192	0	for (; i < size; ++i)
193	0	dst[i] = SynetRelu32f(src[i], _slope);
194	0	}
195
196		//-------------------------------------------------------------------------------------------------
197
198		void SynetRelu16b(const uint16_t* src, size_t size, const float* slope, uint16_t* dst)
199	0	{
200	0	float _slope = slope[0];
201	0	size_t size4 = Simd::AlignLo(size, 4);
202	0	size_t i = 0;
203	0	for (; i < size4; i += 4)
204	0	{
205	0	dst[i + 0] = SynetRelu16b(src[i + 0], _slope);
206	0	dst[i + 1] = SynetRelu16b(src[i + 1], _slope);
207	0	dst[i + 2] = SynetRelu16b(src[i + 2], _slope);
208	0	dst[i + 3] = SynetRelu16b(src[i + 3], _slope);
209	0	}
210	0	for (; i < size; ++i)
211	0	dst[i] = SynetRelu16b(src[i], _slope);
212	0	}
213
214		//-------------------------------------------------------------------------------------------------
215
216		void SynetRestrictRange32f(const float * src, size_t size, const float * lower, const float * upper, float * dst)
217	0	{
218	0	float min = *lower;
219	0	float max = *upper;
220	0	size_t size4 = Simd::AlignLo(size, 4);
221	0	size_t i = 0;
222	0	for (; i < size4; i += 4)
223	0	{
224	0	dst[i + 0] = Simd::RestrictRange(src[i + 0], min, max);
225	0	dst[i + 1] = Simd::RestrictRange(src[i + 1], min, max);
226	0	dst[i + 2] = Simd::RestrictRange(src[i + 2], min, max);
227	0	dst[i + 3] = Simd::RestrictRange(src[i + 3], min, max);
228	0	}
229	0	for (; i < size; ++i)
230	0	dst[i] = Simd::RestrictRange(src[i], min, max);
231	0	}
232
233		//-------------------------------------------------------------------------------------------------
234
235		void SynetSigmoid32f(const float* src, size_t size, const float* slope, float* dst)
236	0	{
237	0	float _slope = slope[0];
238	0	size_t size4 = Simd::AlignLo(size, 4);
239	0	size_t i = 0;
240	0	for (; i < size4; i += 4)
241	0	{
242	0	dst[i + 0] = SynetSigmoid32f(src[i + 0], _slope);
243	0	dst[i + 1] = SynetSigmoid32f(src[i + 1], _slope);
244	0	dst[i + 2] = SynetSigmoid32f(src[i + 2], _slope);
245	0	dst[i + 3] = SynetSigmoid32f(src[i + 3], _slope);
246	0	}
247	0	for (; i < size; ++i)
248	0	dst[i] = SynetSigmoid32f(src[i], _slope);
249	0	}
250
251		//-------------------------------------------------------------------------------------------------
252
253		void SynetSwish32f(const float* src, size_t size, const float* slope, float* dst)
254	0	{
255	0	float _slope = slope[0];
256	0	size_t size4 = Simd::AlignLo(size, 4);
257	0	size_t i = 0;
258	0	for (; i < size4; i += 4)
259	0	{
260	0	dst[i + 0] = SynetSwish32f(src[i + 0], _slope);
261	0	dst[i + 1] = SynetSwish32f(src[i + 1], _slope);
262	0	dst[i + 2] = SynetSwish32f(src[i + 2], _slope);
263	0	dst[i + 3] = SynetSwish32f(src[i + 3], _slope);
264	0	}
265	0	for (; i < size; ++i)
266	0	dst[i] = SynetSwish32f(src[i], _slope);
267	0	}
268
269		//-------------------------------------------------------------------------------------------------
270
271		void SynetSoftplus32f(const float* src, size_t size, const float * beta, const float * threshold, float* dst)
272	0	{
273	0	float _beta = beta[0];
274	0	float _threshold = threshold[0];
275	0	size_t size4 = Simd::AlignLo(size, 4);
276	0	size_t i = 0;
277	0	for (; i < size4; i += 4)
278	0	{
279	0	dst[i + 0] = SynetSoftplus32f(src[i + 0], _beta, _threshold);
280	0	dst[i + 1] = SynetSoftplus32f(src[i + 1], _beta, _threshold);
281	0	dst[i + 2] = SynetSoftplus32f(src[i + 2], _beta, _threshold);
282	0	dst[i + 3] = SynetSoftplus32f(src[i + 3], _beta, _threshold);
283	0	}
284	0	for (; i < size; ++i)
285	0	dst[i] = SynetSoftplus32f(src[i], _beta, _threshold);
286	0	}
287
288		//-------------------------------------------------------------------------------------------------
289
290		void SynetTanh32f(const float* src, size_t size, const float* slope, float* dst)
291	0	{
292	0	float _slope = slope[0];
293	0	size_t size4 = Simd::AlignLo(size, 4);
294	0	size_t i = 0;
295	0	for (; i < size4; i += 4)
296	0	{
297	0	dst[i + 0] = SynetTanh32f(src[i + 0], _slope);
298	0	dst[i + 1] = SynetTanh32f(src[i + 1], _slope);
299	0	dst[i + 2] = SynetTanh32f(src[i + 2], _slope);
300	0	dst[i + 3] = SynetTanh32f(src[i + 3], _slope);
301	0	}
302	0	for (; i < size; ++i)
303	0	dst[i] = SynetTanh32f(src[i], _slope);
304	0	}
305		}
306		#endif
307		}