/src/Simd/src/Simd/SimdSynetInnerProduct16b.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2024 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #ifndef __SimdSynetInnerProduct16b_h__ |
25 | | #define __SimdSynetInnerProduct16b_h__ |
26 | | |
27 | | #include "Simd/SimdArray.h" |
28 | | #include "Simd/SimdPerformance.h" |
29 | | #include "Simd/SimdSynetConvParam.h" |
30 | | |
31 | | namespace Simd |
32 | | { |
33 | | struct InnerProductParam16b |
34 | | { |
35 | | size_t M, N, K; |
36 | | SimdTensorDataType typeA, typeB, typeC; |
37 | | SimdBool transB, constB, bias; |
38 | | |
39 | | InnerProductParam16b(size_t m, size_t n, size_t k, |
40 | | SimdTensorDataType ta, SimdTensorDataType tb, SimdTensorDataType tc, |
41 | | SimdBool t, SimdBool c, SimdBool b) |
42 | | : M(m), N(n), K(k) |
43 | | , typeA(ta), typeB(tb), typeC(tc) |
44 | | , transB(t), constB(c), bias(b) |
45 | 0 | { |
46 | 0 | } |
47 | | |
48 | | bool Valid() |
49 | 0 | { |
50 | 0 | return |
51 | 0 | (typeA == SimdTensorData32f || typeA == SimdTensorData16b) && |
52 | 0 | (typeB == SimdTensorData32f || typeB == SimdTensorData16b) && |
53 | 0 | (typeC == SimdTensorData32f || typeC == SimdTensorData16b); |
54 | 0 | } |
55 | | |
56 | | String Info() const |
57 | 0 | { |
58 | 0 | std::stringstream ss; |
59 | 0 | ss << M << "x" << N << "x" << K << "-"; |
60 | 0 | ss << ToChar(typeA) << ToChar(typeB) << ToChar(typeC) << "-"; |
61 | 0 | ss << (transB ? "t" : "n") << (constB ? "1" : "2") << (bias ? "b" : "o"); |
62 | 0 | return ss.str(); |
63 | 0 | } |
64 | | |
65 | | int64_t Flop() const |
66 | 0 | { |
67 | 0 | return int64_t(M) * N * K * 2; |
68 | 0 | } |
69 | | }; |
70 | | |
71 | | //------------------------------------------------------------------------------------------------- |
72 | | |
73 | | class SynetInnerProduct16b : public Deletable |
74 | | { |
75 | | public: |
76 | | SynetInnerProduct16b(const InnerProductParam16b& p) |
77 | | : _param(p) |
78 | | #if defined(SIMD_PERFORMANCE_STATISTIC) && (defined(NDEBUG) || defined(SIMD_PERF_STAT_IN_DEBUG)) |
79 | | , _perf(NULL) |
80 | | #endif |
81 | | , _sizeA(0) |
82 | | , _sizeB(0) |
83 | | , _sizeC(0) |
84 | 0 | { |
85 | 0 | } |
86 | | |
87 | | const InnerProductParam16b& Param() const |
88 | 0 | { |
89 | 0 | return _param; |
90 | 0 | } |
91 | | |
92 | | virtual size_t InternalBufferSize() const |
93 | 0 | { |
94 | 0 | return _buffer.RawSize() + _weight.RawSize() + _bias.RawSize(); |
95 | 0 | } |
96 | | |
97 | | virtual size_t ExternalBufferSize() const |
98 | 0 | { |
99 | 0 | return _sizeA * 2 + _sizeB * 2 + _sizeC * 4; |
100 | 0 | } |
101 | | |
102 | | virtual String Ext() const = 0; |
103 | | virtual String Desc() const = 0; |
104 | | |
105 | | virtual void SetParams(const float* weight, const float* bias) = 0; |
106 | | virtual void Forward(const uint8_t* A, const uint8_t* B, uint8_t* buf, uint8_t* C) = 0; |
107 | | |
108 | | #if defined(SIMD_PERFORMANCE_STATISTIC) && (defined(NDEBUG) || defined(SIMD_PERF_STAT_IN_DEBUG)) |
109 | | Base::PerformanceMeasurer* Perf(const char* func) |
110 | | { |
111 | | if (_perf == NULL) |
112 | | _perf = Simd::Base::PerformanceMeasurerStorage::s_storage.Get(func, Param().Info() + " " + Desc(), Param().Flop()); |
113 | | return _perf; |
114 | | } |
115 | | #endif |
116 | | |
117 | | const char* Info() const |
118 | 0 | { |
119 | 0 | _info = Desc(); |
120 | 0 | return _info.c_str(); |
121 | 0 | } |
122 | | |
123 | | protected: |
124 | | InnerProductParam16b _param; |
125 | | #if defined(SIMD_PERFORMANCE_STATISTIC) && (defined(NDEBUG) || defined(SIMD_PERF_STAT_IN_DEBUG)) |
126 | | Base::PerformanceMeasurer* _perf; |
127 | | #endif |
128 | | Array8u _buffer; |
129 | | Array16u _weight; |
130 | | Array32f _bias; |
131 | | mutable String _info; |
132 | | size_t _sizeA, _sizeB, _sizeC; |
133 | | |
134 | | uint8_t* Buffer(uint8_t* buffer) |
135 | 0 | { |
136 | 0 | if (buffer) |
137 | 0 | return buffer; |
138 | 0 | else |
139 | 0 | { |
140 | 0 | _buffer.Resize(ExternalBufferSize()); |
141 | 0 | return _buffer.data; |
142 | 0 | } |
143 | 0 | } |
144 | | }; |
145 | | |
146 | | //------------------------------------------------------------------------------------------------- |
147 | | |
148 | | namespace Base |
149 | | { |
150 | | class SynetInnerProduct16bRef : public SynetInnerProduct16b |
151 | | { |
152 | | public: |
153 | | SynetInnerProduct16bRef(const InnerProductParam16b& p); |
154 | 0 | virtual String Ext() const { return "Base"; } |
155 | | virtual String Desc() const; |
156 | | virtual void SetParams(const float* weight, const float* bias); |
157 | | virtual void Forward(const uint8_t* A, const uint8_t* B, uint8_t* buf, uint8_t* C); |
158 | | |
159 | | protected: |
160 | | void GemmAndBias(const uint16_t* A, const uint16_t* B, float* C); |
161 | | }; |
162 | | |
163 | | class SynetInnerProduct16bGemmNN : public SynetInnerProduct16b |
164 | | { |
165 | | public: |
166 | | SynetInnerProduct16bGemmNN(const InnerProductParam16b& p); |
167 | 0 | virtual String Ext() const { return "Base"; } |
168 | | virtual String Desc() const; |
169 | | virtual void SetParams(const float* weight, const float* bias); |
170 | | virtual void Forward(const uint8_t* A, const uint8_t* B, uint8_t* buf, uint8_t* C); |
171 | | |
172 | | static bool Preferable(const InnerProductParam16b& p); |
173 | | |
174 | | struct AlgParam |
175 | | { |
176 | | size_t F, microM, microN, microK; |
177 | | size_t macroM, macroN, macroK; |
178 | | size_t aM, aN, aK, eA, eB, eC, bK, cN; |
179 | | }; |
180 | | |
181 | | typedef void(*PrepPtr)(const uint8_t* src, const InnerProductParam16b& p, const AlgParam& a, size_t size, size_t K, uint16_t* dst); |
182 | | typedef void(*GemmPtr)(const uint16_t* A, const InnerProductParam16b& p, const AlgParam& a, size_t M, size_t N, size_t K, int update, const uint16_t* B, float* C, int post, const float* bias, uint8_t* dst); |
183 | | |
184 | | protected: |
185 | | void SetAlgParam(size_t F, size_t microM, size_t microN, size_t microK, size_t L1, size_t L2, size_t L3); |
186 | | |
187 | | AlgParam _alg; |
188 | | PrepPtr _prepA, _prepB; |
189 | | GemmPtr _gemm; |
190 | | }; |
191 | | |
192 | | //------------------------------------------------------------------------------------------------- |
193 | | |
194 | | void* SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias); |
195 | | } |
196 | | |
197 | | #ifdef SIMD_SSE41_ENABLE |
198 | | namespace Sse41 |
199 | | { |
200 | | class SynetInnerProduct16bGemmNN : public Base::SynetInnerProduct16bGemmNN |
201 | | { |
202 | | public: |
203 | | SynetInnerProduct16bGemmNN(const InnerProductParam16b& p); |
204 | | |
205 | 0 | virtual String Ext() const { return "Sse41"; } |
206 | | }; |
207 | | |
208 | | //------------------------------------------------------------------------------------------------- |
209 | | |
210 | | void* SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias); |
211 | | } |
212 | | #endif |
213 | | |
214 | | #ifdef SIMD_AVX2_ENABLE |
215 | | namespace Avx2 |
216 | | { |
217 | | class SynetInnerProduct16bGemmNN : public Sse41::SynetInnerProduct16bGemmNN |
218 | | { |
219 | | public: |
220 | | SynetInnerProduct16bGemmNN(const InnerProductParam16b& p); |
221 | | |
222 | 0 | virtual String Ext() const { return "Avx2"; } |
223 | | }; |
224 | | |
225 | | //------------------------------------------------------------------------------------------------- |
226 | | |
227 | | void* SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias); |
228 | | } |
229 | | #endif |
230 | | |
231 | | #ifdef SIMD_AVX512BW_ENABLE |
232 | | namespace Avx512bw |
233 | | { |
234 | | class SynetInnerProduct16bGemmNN : public Avx2::SynetInnerProduct16bGemmNN |
235 | | { |
236 | | public: |
237 | | SynetInnerProduct16bGemmNN(const InnerProductParam16b& p); |
238 | | |
239 | 0 | virtual String Ext() const { return "Avx512bw"; } |
240 | | }; |
241 | | |
242 | | //------------------------------------------------------------------------------------------------- |
243 | | |
244 | | void* SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias); |
245 | | } |
246 | | #endif |
247 | | |
248 | | #if (defined(SIMD_AMXBF16_ENABLE) || (defined(SIMD_AVX512BW_ENABLE) && defined(SIMD_AMX_EMULATE))) |
249 | | namespace AmxBf16 |
250 | | { |
251 | | class SynetInnerProduct16bGemmNN : public Avx512bw::SynetInnerProduct16bGemmNN |
252 | | { |
253 | | public: |
254 | | SynetInnerProduct16bGemmNN(const InnerProductParam16b& p); |
255 | | |
256 | | virtual String Ext() const { return "AmxBf16"; } |
257 | | }; |
258 | | |
259 | | //------------------------------------------------------------------------------------------------- |
260 | | |
261 | | void* SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias); |
262 | | } |
263 | | #endif |
264 | | } |
265 | | |
266 | | #endif |