/src/Simd/src/Simd/SimdBaseSynetInnerProduct32f.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2024 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #include "Simd/SimdSynetInnerProduct32f.h" |
25 | | #include "Simd/SimdSynetConvolution32f.h" |
26 | | #include "Simd/SimdCpu.h" |
27 | | #include "Simd/SimdBase.h" |
28 | | |
29 | | namespace Simd |
30 | | { |
31 | | #if defined(SIMD_SYNET_ENABLE) |
32 | | |
33 | | #if defined(SIMD_PERFORMANCE_STATISTIC) && (defined(NDEBUG) || defined(SIMD_PERF_STAT_IN_DEBUG)) |
34 | | Base::PerformanceMeasurer * SynetInnerProduct32f::Perf(const char* func) |
35 | | { |
36 | | if (_perf == NULL) |
37 | | _perf = Simd::Base::PerformanceMeasurerStorage::s_storage.Get(func, Param().Info() + " " + Desc(), Param().Flop()); |
38 | | return _perf; |
39 | | } |
40 | | #endif |
41 | | |
42 | | namespace Base |
43 | | { |
44 | | void SynetInnerProductLayerForward(const float* src, const float* weight, const float* bias, size_t count, size_t size, float* dst) |
45 | 0 | { |
46 | 0 | size_t aligned = Simd::AlignLo(size, 4); |
47 | 0 | for (size_t i = 0; i < count; ++i) |
48 | 0 | { |
49 | 0 | size_t j = 0; |
50 | 0 | float sums[4] = { 0, 0, 0, 0 }; |
51 | 0 | for (; j < aligned; j += 4) |
52 | 0 | { |
53 | 0 | sums[0] += src[j + 0] * weight[j + 0]; |
54 | 0 | sums[1] += src[j + 1] * weight[j + 1]; |
55 | 0 | sums[2] += src[j + 2] * weight[j + 2]; |
56 | 0 | sums[3] += src[j + 3] * weight[j + 3]; |
57 | 0 | } |
58 | 0 | for (; j < size; ++j) |
59 | 0 | sums[0] += src[j] * weight[j]; |
60 | 0 | dst[i] = sums[0] + sums[1] + sums[2] + sums[3] + (bias ? bias[i] : 0); |
61 | 0 | weight += size; |
62 | 0 | } |
63 | 0 | } |
64 | | |
65 | | //--------------------------------------------------------------------- |
66 | | |
67 | | SynetInnerProduct32fGemm::SynetInnerProduct32fGemm(const InnerProductParam32f & p) |
68 | | : SynetInnerProduct32f(p) |
69 | | , _0(0.0f) |
70 | | , _1(1.0f) |
71 | 0 | { |
72 | 0 | _M = _param.batch; |
73 | 0 | _N = _param.output; |
74 | 0 | _K = _param.input; |
75 | 0 | _ldS = _K; |
76 | 0 | _ldD = _N; |
77 | 0 | _biasAndActivation = Base::ConvolutionBiasAndActivation; |
78 | 0 | _prod = NULL; |
79 | 0 | if (_param.transpose) |
80 | 0 | { |
81 | 0 | _gemm = Base::Gemm32fNT; |
82 | 0 | _ldW = _K; |
83 | 0 | if (_M == 1 && _param.activation == SimdConvolutionActivationIdentity) |
84 | 0 | _prod = Base::SynetInnerProductLayerForward; |
85 | 0 | } |
86 | 0 | else |
87 | 0 | { |
88 | 0 | _gemm = Base::Gemm32fNN; |
89 | 0 | _ldW = _N; |
90 | 0 | } |
91 | 0 | } |
92 | | |
93 | | String SynetInnerProduct32fGemm::Desc() const |
94 | 0 | { |
95 | 0 | return Ext() + "::Gemm" + (_prod ? "Prod" : |
96 | 0 | String("N") + (_cbWeight.size ? "Ncb" : (_param.transpose == SimdTrue ? "T" : "N"))); |
97 | 0 | } |
98 | | |
99 | | void SynetInnerProduct32fGemm::SetParams(const float* weight, SimdBool* internal, const float* bias, const float* params) |
100 | 0 | { |
101 | 0 | Simd::SynetInnerProduct32f::SetParams(weight, internal, bias, params); |
102 | 0 | if (_cbWeight.data) |
103 | 0 | { |
104 | 0 | Array32f buffer; |
105 | 0 | if (_param.transpose) |
106 | 0 | { |
107 | 0 | buffer.Resize(_N * _K); |
108 | 0 | for (size_t k = 0; k < _K; ++k) |
109 | 0 | for (size_t j = 0; j < _N; ++j) |
110 | 0 | buffer[k*_N + j] = weight[j * _K + k]; |
111 | 0 | weight = buffer.data; |
112 | 0 | } |
113 | 0 | _cbPack(_M, _N, _K, weight, _cbWeight.data, GemmKernelAny, NHWC_GEMM_COMPATIBLE); |
114 | 0 | if (internal) |
115 | 0 | *internal = SimdTrue; |
116 | 0 | } |
117 | 0 | } |
118 | | |
119 | | void SynetInnerProduct32fGemm::Forward(const float * src, float * dst) |
120 | 0 | { |
121 | 0 | if (_prod) |
122 | 0 | _prod(src, _weight, _bias, _N, _K, dst); |
123 | 0 | else |
124 | 0 | { |
125 | 0 | if (_cbWeight.data) |
126 | 0 | _cbRun(_M, _N, _K, src, _cbWeight.data, dst, GemmKernelAny, NHWC_GEMM_COMPATIBLE); |
127 | 0 | else |
128 | 0 | _gemm(_M, _N, _K, &_1, src, _ldS, _weight, _ldW, &_0, dst, _ldD); |
129 | 0 | _biasAndActivation(_bias, _N, _M, _param.activation, _params, SimdTrue, dst); |
130 | 0 | } |
131 | 0 | } |
132 | | |
133 | | //--------------------------------------------------------------------- |
134 | | |
135 | | SynetInnerProduct32fProd::SynetInnerProduct32fProd(const InnerProductParam32f& p) |
136 | | : SynetInnerProduct32f(p) |
137 | 0 | { |
138 | 0 | _N = _param.output; |
139 | 0 | _K = _param.input; |
140 | 0 | } |
141 | | |
142 | | void SynetInnerProduct32fProd::SetParams(const float* weight, SimdBool* internal, const float* bias, const float* params) |
143 | 0 | { |
144 | 0 | SynetInnerProduct32f::SetParams(weight, internal, bias, params); |
145 | 0 | ReorderWeight(_weight, _rWeight.data); |
146 | 0 | if (internal) |
147 | 0 | *internal = SimdTrue; |
148 | 0 | if (bias) |
149 | 0 | memcpy(_rBias.data, bias, _param.output * sizeof(float)); |
150 | 0 | } |
151 | | |
152 | | void SynetInnerProduct32fProd::Forward(const float* src, float* dst) |
153 | 0 | { |
154 | 0 | _prod(src, _rWeight.data, _rBias.data, _K, _N, dst); |
155 | 0 | } |
156 | | |
157 | | bool SynetInnerProduct32fProd::Preferable(const InnerProductParam32f& p) |
158 | 0 | { |
159 | 0 | return |
160 | 0 | p.activation == SimdConvolutionActivationIdentity && |
161 | 0 | p.batch == 1 && |
162 | 0 | p.output >= 4 && |
163 | 0 | Base::AlgCacheL3() > p.input * p.output * sizeof(float); |
164 | 0 | } |
165 | | |
166 | | void SynetInnerProduct32fProd::SetSize(size_t F) |
167 | 0 | { |
168 | 0 | _F = F; |
169 | 0 | _rWeight.Resize(AlignHi(_N, _F) * _K); |
170 | 0 | _rBias.Resize(AlignHi(_N, _F), true); |
171 | 0 | } |
172 | | |
173 | | void SynetInnerProduct32fProd::ReorderWeight(const float* src, float* dst) |
174 | 0 | { |
175 | 0 | if (_param.transpose) |
176 | 0 | { |
177 | 0 | for (size_t n = 0; n < _N; n += _F) |
178 | 0 | { |
179 | 0 | size_t F = Simd::Min(_N, n + _F) - n; |
180 | 0 | const float* psrc = src + n * _K; |
181 | 0 | for (size_t k = 0; k < _K; ++k) |
182 | 0 | { |
183 | 0 | size_t f = 0; |
184 | 0 | for (; f < F; ++f) |
185 | 0 | *(dst++) = psrc[f * _K]; |
186 | 0 | for (; f < _F; ++f) |
187 | 0 | *(dst++) = 0.0f; |
188 | 0 | psrc++; |
189 | 0 | } |
190 | 0 | } |
191 | 0 | } |
192 | 0 | else |
193 | 0 | { |
194 | 0 | for (size_t n = 0; n < _N; n += _F) |
195 | 0 | { |
196 | 0 | size_t F = Simd::Min(_N, n + _F) - n; |
197 | 0 | const float* psrc = src + n; |
198 | 0 | for (size_t k = 0; k < _K; ++k) |
199 | 0 | { |
200 | 0 | size_t f = 0; |
201 | 0 | for (; f < F; ++f) |
202 | 0 | *(dst++) = psrc[f]; |
203 | 0 | for (; f < _F; ++f) |
204 | 0 | *(dst++) = 0.0f; |
205 | 0 | psrc += _N; |
206 | 0 | } |
207 | 0 | } |
208 | 0 | } |
209 | 0 | } |
210 | | |
211 | | //--------------------------------------------------------------------- |
212 | | |
213 | | void * SynetInnerProduct32fInit(size_t batch, size_t input, size_t output, SimdBool transpose, SimdConvolutionActivationType activation) |
214 | 0 | { |
215 | 0 | InnerProductParam32f param(batch, input, output, transpose, activation); |
216 | 0 | if (!param.Valid()) |
217 | 0 | return NULL; |
218 | 0 | return new SynetInnerProduct32fGemm(param); |
219 | 0 | } |
220 | | } |
221 | | #endif |
222 | | } |