Coverage Report

Created: 2024-10-01 06:54

/src/Simd/src/Simd/SimdBaseSynetInnerProduct32f.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2024 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdSynetInnerProduct32f.h"
25
#include "Simd/SimdSynetConvolution32f.h"
26
#include "Simd/SimdCpu.h"
27
#include "Simd/SimdBase.h"
28
29
namespace Simd
30
{
31
#if defined(SIMD_SYNET_ENABLE)
32
33
#if defined(SIMD_PERFORMANCE_STATISTIC) && (defined(NDEBUG) || defined(SIMD_PERF_STAT_IN_DEBUG))
34
    Base::PerformanceMeasurer * SynetInnerProduct32f::Perf(const char* func)
35
    {
36
        if (_perf == NULL)
37
            _perf = Simd::Base::PerformanceMeasurerStorage::s_storage.Get(func, Param().Info() + " " + Desc(), Param().Flop());
38
        return _perf;
39
    }
40
#endif
41
42
    namespace Base
43
    {
44
        void SynetInnerProductLayerForward(const float* src, const float* weight, const float* bias, size_t count, size_t size, float* dst)
45
0
        {
46
0
            size_t aligned = Simd::AlignLo(size, 4);
47
0
            for (size_t i = 0; i < count; ++i)
48
0
            {
49
0
                size_t j = 0;
50
0
                float sums[4] = { 0, 0, 0, 0 };
51
0
                for (; j < aligned; j += 4)
52
0
                {
53
0
                    sums[0] += src[j + 0] * weight[j + 0];
54
0
                    sums[1] += src[j + 1] * weight[j + 1];
55
0
                    sums[2] += src[j + 2] * weight[j + 2];
56
0
                    sums[3] += src[j + 3] * weight[j + 3];
57
0
                }
58
0
                for (; j < size; ++j)
59
0
                    sums[0] += src[j] * weight[j];
60
0
                dst[i] = sums[0] + sums[1] + sums[2] + sums[3] + (bias ? bias[i] : 0);
61
0
                weight += size;
62
0
            }
63
0
        }
64
65
        //---------------------------------------------------------------------
66
67
        SynetInnerProduct32fGemm::SynetInnerProduct32fGemm(const InnerProductParam32f & p)
68
            : SynetInnerProduct32f(p)
69
            , _0(0.0f)
70
            , _1(1.0f)
71
0
        {
72
0
            _M = _param.batch;
73
0
            _N = _param.output;
74
0
            _K = _param.input;
75
0
            _ldS = _K;
76
0
            _ldD = _N;
77
0
            _biasAndActivation = Base::ConvolutionBiasAndActivation;
78
0
            _prod = NULL;
79
0
            if (_param.transpose)
80
0
            {
81
0
                _gemm = Base::Gemm32fNT;
82
0
                _ldW = _K;
83
0
                if (_M == 1 && _param.activation == SimdConvolutionActivationIdentity)
84
0
                    _prod = Base::SynetInnerProductLayerForward;
85
0
            }
86
0
            else
87
0
            {
88
0
                _gemm = Base::Gemm32fNN;
89
0
                _ldW = _N;
90
0
            }
91
0
        }
92
93
        String SynetInnerProduct32fGemm::Desc() const 
94
0
        { 
95
0
            return Ext() + "::Gemm" + (_prod ? "Prod" : 
96
0
                String("N") + (_cbWeight.size ? "Ncb" : (_param.transpose == SimdTrue ? "T" : "N")));
97
0
        }
98
99
        void SynetInnerProduct32fGemm::SetParams(const float* weight, SimdBool* internal, const float* bias, const float* params)
100
0
        {
101
0
            Simd::SynetInnerProduct32f::SetParams(weight, internal, bias, params);
102
0
            if (_cbWeight.data)
103
0
            {
104
0
                Array32f buffer;
105
0
                if (_param.transpose)
106
0
                {
107
0
                    buffer.Resize(_N * _K);
108
0
                    for (size_t k = 0; k < _K; ++k)
109
0
                        for (size_t j = 0; j < _N; ++j)
110
0
                            buffer[k*_N + j] = weight[j * _K + k];
111
0
                    weight = buffer.data;
112
0
                }
113
0
                _cbPack(_M, _N, _K, weight, _cbWeight.data, GemmKernelAny, NHWC_GEMM_COMPATIBLE);
114
0
                if (internal)
115
0
                    *internal = SimdTrue;
116
0
            }
117
0
        }
118
119
        void SynetInnerProduct32fGemm::Forward(const float * src, float * dst)
120
0
        {
121
0
            if (_prod)
122
0
                _prod(src, _weight, _bias, _N, _K, dst);
123
0
            else
124
0
            {
125
0
                if (_cbWeight.data)
126
0
                    _cbRun(_M, _N, _K, src, _cbWeight.data, dst, GemmKernelAny, NHWC_GEMM_COMPATIBLE);
127
0
                else
128
0
                    _gemm(_M, _N, _K, &_1, src, _ldS, _weight, _ldW, &_0, dst, _ldD);
129
0
                _biasAndActivation(_bias, _N, _M, _param.activation, _params, SimdTrue, dst);
130
0
            }
131
0
        }
132
133
        //---------------------------------------------------------------------
134
135
        SynetInnerProduct32fProd::SynetInnerProduct32fProd(const InnerProductParam32f& p)
136
            : SynetInnerProduct32f(p)
137
0
        {
138
0
            _N = _param.output;
139
0
            _K = _param.input;
140
0
        }
141
142
        void SynetInnerProduct32fProd::SetParams(const float* weight, SimdBool* internal, const float* bias, const float* params)
143
0
        {
144
0
            SynetInnerProduct32f::SetParams(weight, internal, bias, params);
145
0
            ReorderWeight(_weight, _rWeight.data);
146
0
            if (internal)
147
0
                *internal = SimdTrue;
148
0
            if (bias)
149
0
                memcpy(_rBias.data, bias, _param.output * sizeof(float));
150
0
        }
151
152
        void SynetInnerProduct32fProd::Forward(const float* src, float* dst)
153
0
        {
154
0
            _prod(src, _rWeight.data, _rBias.data, _K, _N, dst);
155
0
        }
156
157
        bool SynetInnerProduct32fProd::Preferable(const InnerProductParam32f& p)
158
0
        {
159
0
            return
160
0
                p.activation == SimdConvolutionActivationIdentity &&
161
0
                p.batch == 1 &&
162
0
                p.output >= 4 &&
163
0
                Base::AlgCacheL3() > p.input * p.output * sizeof(float);
164
0
        }
165
166
        void SynetInnerProduct32fProd::SetSize(size_t F)
167
0
        {
168
0
            _F = F;
169
0
            _rWeight.Resize(AlignHi(_N, _F) * _K);
170
0
            _rBias.Resize(AlignHi(_N, _F), true);
171
0
        }
172
173
        void SynetInnerProduct32fProd::ReorderWeight(const float* src, float* dst)
174
0
        {
175
0
            if (_param.transpose)
176
0
            {
177
0
                for (size_t n = 0; n < _N; n += _F)
178
0
                {
179
0
                    size_t F = Simd::Min(_N, n + _F) - n;
180
0
                    const float* psrc = src + n * _K;
181
0
                    for (size_t k = 0; k < _K; ++k)
182
0
                    {
183
0
                        size_t f = 0;
184
0
                        for (; f < F; ++f)
185
0
                            *(dst++) = psrc[f * _K];
186
0
                        for (; f < _F; ++f)
187
0
                            *(dst++) = 0.0f;
188
0
                        psrc++;
189
0
                    }
190
0
                }            
191
0
            }
192
0
            else
193
0
            {
194
0
                for (size_t n = 0; n < _N; n += _F)
195
0
                {
196
0
                    size_t F = Simd::Min(_N, n + _F) - n;
197
0
                    const float* psrc = src + n;
198
0
                    for (size_t k = 0; k < _K; ++k)
199
0
                    {
200
0
                        size_t f = 0;
201
0
                        for (; f < F; ++f)
202
0
                            *(dst++) = psrc[f];
203
0
                        for (; f < _F; ++f)
204
0
                            *(dst++) = 0.0f;
205
0
                        psrc += _N;
206
0
                    }
207
0
                }
208
0
            }
209
0
        }
210
211
        //---------------------------------------------------------------------
212
213
        void * SynetInnerProduct32fInit(size_t batch, size_t input, size_t output, SimdBool transpose, SimdConvolutionActivationType activation)
214
0
        {
215
0
            InnerProductParam32f param(batch, input, output, transpose, activation);
216
0
            if (!param.Valid())
217
0
                return NULL;
218
0
            return new SynetInnerProduct32fGemm(param);
219
0
        }
220
    }
221
#endif
222
}