/src/Simd/src/Simd/SimdSynetInnerProduct16b.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  | * Simd Library (http://ermig1979.github.io/Simd).  | 
3  |  | *  | 
4  |  | * Copyright (c) 2011-2024 Yermalayeu Ihar.  | 
5  |  | *  | 
6  |  | * Permission is hereby granted, free of charge, to any person obtaining a copy  | 
7  |  | * of this software and associated documentation files (the "Software"), to deal  | 
8  |  | * in the Software without restriction, including without limitation the rights  | 
9  |  | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell  | 
10  |  | * copies of the Software, and to permit persons to whom the Software is  | 
11  |  | * furnished to do so, subject to the following conditions:  | 
12  |  | *  | 
13  |  | * The above copyright notice and this permission notice shall be included in  | 
14  |  | * all copies or substantial portions of the Software.  | 
15  |  | *  | 
16  |  | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  | 
17  |  | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  | 
18  |  | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  | 
19  |  | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  | 
20  |  | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  | 
21  |  | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  | 
22  |  | * SOFTWARE.  | 
23  |  | */  | 
24  |  | #ifndef __SimdSynetInnerProduct16b_h__  | 
25  |  | #define __SimdSynetInnerProduct16b_h__  | 
26  |  |  | 
27  |  | #include "Simd/SimdArray.h"  | 
28  |  | #include "Simd/SimdPerformance.h"  | 
29  |  | #include "Simd/SimdSynetConvParam.h"  | 
30  |  |  | 
31  |  | namespace Simd  | 
32  |  | { | 
33  |  |     struct InnerProductParam16b  | 
34  |  |     { | 
35  |  |         size_t M, N, K;  | 
36  |  |         SimdTensorDataType typeA, typeB, typeC;  | 
37  |  |         SimdBool transB, constB, bias;  | 
38  |  |  | 
39  |  |         InnerProductParam16b(size_t m, size_t n, size_t k,  | 
40  |  |             SimdTensorDataType ta, SimdTensorDataType tb, SimdTensorDataType tc,  | 
41  |  |             SimdBool t, SimdBool c, SimdBool b)  | 
42  |  |             : M(m), N(n), K(k)  | 
43  |  |             , typeA(ta), typeB(tb), typeC(tc)  | 
44  |  |             , transB(t), constB(c), bias(b)  | 
45  | 0  |         { | 
46  | 0  |         }  | 
47  |  |  | 
48  |  |         bool Valid()  | 
49  | 0  |         { | 
50  | 0  |             return  | 
51  | 0  |                 (typeA == SimdTensorData32f || typeA == SimdTensorData16b) &&  | 
52  | 0  |                 (typeB == SimdTensorData32f || typeB == SimdTensorData16b) &&  | 
53  | 0  |                 (typeC == SimdTensorData32f || typeC == SimdTensorData16b);  | 
54  | 0  |         }  | 
55  |  |  | 
56  |  |         String Info() const  | 
57  | 0  |         { | 
58  | 0  |             std::stringstream ss;  | 
59  | 0  |             ss << M << "x" << N << "x" << K << "-";  | 
60  | 0  |             ss << ToChar(typeA) << ToChar(typeB) << ToChar(typeC) << "-";  | 
61  | 0  |             ss << (transB ? "t" : "n") << (constB ? "1" : "2") << (bias ? "b" : "o");  | 
62  | 0  |             return ss.str();  | 
63  | 0  |         }  | 
64  |  |  | 
65  |  |         int64_t Flop() const  | 
66  | 0  |         { | 
67  | 0  |             return int64_t(M) * N * K * 2;  | 
68  | 0  |         }  | 
69  |  |     };  | 
70  |  |  | 
71  |  |     //-------------------------------------------------------------------------------------------------  | 
72  |  |  | 
73  |  |     class SynetInnerProduct16b : public Deletable  | 
74  |  |     { | 
75  |  |     public:  | 
76  |  |         SynetInnerProduct16b(const InnerProductParam16b& p)  | 
77  |  |             : _param(p)  | 
78  |  | #if defined(SIMD_PERFORMANCE_STATISTIC) && (defined(NDEBUG) || defined(SIMD_PERF_STAT_IN_DEBUG))  | 
79  |  |             , _perf(NULL)  | 
80  |  | #endif  | 
81  |  |             , _sizeA(0)  | 
82  |  |             , _sizeB(0)  | 
83  |  |             , _sizeC(0)  | 
84  | 0  |         { | 
85  | 0  |         }  | 
86  |  |  | 
87  |  |         const InnerProductParam16b& Param() const  | 
88  | 0  |         { | 
89  | 0  |             return _param;  | 
90  | 0  |         }  | 
91  |  |  | 
92  |  |         virtual size_t InternalBufferSize() const  | 
93  | 0  |         { | 
94  | 0  |             return _buffer.RawSize() + _weight.RawSize() + _bias.RawSize();  | 
95  | 0  |         }  | 
96  |  |  | 
97  |  |         virtual size_t ExternalBufferSize() const  | 
98  | 0  |         { | 
99  | 0  |             return _sizeA * 2 + _sizeB * 2 + _sizeC * 4;  | 
100  | 0  |         }  | 
101  |  |  | 
102  |  |         virtual String Ext() const = 0;  | 
103  |  |         virtual String Desc() const = 0;  | 
104  |  |  | 
105  |  |         virtual void SetParams(const float* weight, const float* bias) = 0;  | 
106  |  |         virtual void Forward(const uint8_t* A, const uint8_t* B, uint8_t* buf, uint8_t* C) = 0;  | 
107  |  |  | 
108  |  | #if defined(SIMD_PERFORMANCE_STATISTIC) && (defined(NDEBUG) || defined(SIMD_PERF_STAT_IN_DEBUG))  | 
109  |  |         Base::PerformanceMeasurer* Perf(const char* func)  | 
110  |  |         { | 
111  |  |             if (_perf == NULL)  | 
112  |  |                 _perf = Simd::Base::PerformanceMeasurerStorage::s_storage.Get(func, Param().Info() + " " + Desc(), Param().Flop());  | 
113  |  |             return _perf;  | 
114  |  |         }  | 
115  |  | #endif  | 
116  |  |  | 
117  |  |         const char* Info() const  | 
118  | 0  |         { | 
119  | 0  |             _info = Desc();  | 
120  | 0  |             return _info.c_str();  | 
121  | 0  |         }  | 
122  |  |  | 
123  |  |     protected:  | 
124  |  |         InnerProductParam16b _param;  | 
125  |  | #if defined(SIMD_PERFORMANCE_STATISTIC) && (defined(NDEBUG) || defined(SIMD_PERF_STAT_IN_DEBUG))  | 
126  |  |         Base::PerformanceMeasurer* _perf;  | 
127  |  | #endif  | 
128  |  |         Array8u _buffer;  | 
129  |  |         Array16u _weight;  | 
130  |  |         Array32f _bias;  | 
131  |  |         mutable String _info;  | 
132  |  |         size_t _sizeA, _sizeB, _sizeC;  | 
133  |  |  | 
134  |  |         uint8_t* Buffer(uint8_t* buffer)  | 
135  | 0  |         { | 
136  | 0  |             if (buffer)  | 
137  | 0  |                 return buffer;  | 
138  | 0  |             else  | 
139  | 0  |             { | 
140  | 0  |                 _buffer.Resize(ExternalBufferSize());  | 
141  | 0  |                 return _buffer.data;  | 
142  | 0  |             }  | 
143  | 0  |         }  | 
144  |  |     };  | 
145  |  |  | 
146  |  |     //-------------------------------------------------------------------------------------------------  | 
147  |  |  | 
148  |  |     namespace Base  | 
149  |  |     { | 
150  |  |         class SynetInnerProduct16bRef : public SynetInnerProduct16b  | 
151  |  |         { | 
152  |  |         public:  | 
153  |  |             SynetInnerProduct16bRef(const InnerProductParam16b& p);  | 
154  | 0  |             virtual String Ext() const { return "Base"; } | 
155  |  |             virtual String Desc() const;  | 
156  |  |             virtual void SetParams(const float* weight, const float* bias);  | 
157  |  |             virtual void Forward(const uint8_t* A, const uint8_t* B, uint8_t* buf, uint8_t* C);  | 
158  |  |  | 
159  |  |         protected:  | 
160  |  |             void GemmAndBias(const uint16_t* A, const uint16_t* B, float* C);  | 
161  |  |         };  | 
162  |  |  | 
163  |  |         class SynetInnerProduct16bGemmNN : public SynetInnerProduct16b  | 
164  |  |         { | 
165  |  |         public:  | 
166  |  |             SynetInnerProduct16bGemmNN(const InnerProductParam16b& p);  | 
167  | 0  |             virtual String Ext() const { return "Base"; } | 
168  |  |             virtual String Desc() const;  | 
169  |  |             virtual void SetParams(const float* weight, const float* bias);  | 
170  |  |             virtual void Forward(const uint8_t* A, const uint8_t* B, uint8_t* buf, uint8_t* C);  | 
171  |  |  | 
172  |  |             static bool Preferable(const InnerProductParam16b& p);  | 
173  |  |  | 
174  |  |             struct AlgParam  | 
175  |  |             { | 
176  |  |                 size_t F, microM, microN, microK;  | 
177  |  |                 size_t macroM, macroN, macroK;  | 
178  |  |                 size_t aM, aN, aK, eA, eB, eC, bK, cN;  | 
179  |  |             };  | 
180  |  |  | 
181  |  |             typedef void(*PrepPtr)(const uint8_t* src, const InnerProductParam16b& p, const AlgParam& a, size_t size, size_t K, uint16_t* dst);  | 
182  |  |             typedef void(*GemmPtr)(const uint16_t* A, const InnerProductParam16b& p, const AlgParam& a, size_t M, size_t N, size_t K, int update, const uint16_t* B, float* C, int post, const float* bias, uint8_t* dst);  | 
183  |  |  | 
184  |  |         protected:  | 
185  |  |             void SetAlgParam(size_t F, size_t microM, size_t microN, size_t microK, size_t L1, size_t L2, size_t L3);  | 
186  |  |  | 
187  |  |             AlgParam _alg;  | 
188  |  |             PrepPtr _prepA, _prepB;  | 
189  |  |             GemmPtr _gemm;  | 
190  |  |         };  | 
191  |  |  | 
192  |  |         //-------------------------------------------------------------------------------------------------  | 
193  |  |  | 
194  |  |         void* SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias);  | 
195  |  |     }  | 
196  |  |  | 
197  |  | #ifdef SIMD_SSE41_ENABLE      | 
198  |  |     namespace Sse41  | 
199  |  |     { | 
200  |  |         class SynetInnerProduct16bGemmNN : public Base::SynetInnerProduct16bGemmNN  | 
201  |  |         { | 
202  |  |         public:  | 
203  |  |             SynetInnerProduct16bGemmNN(const InnerProductParam16b& p);  | 
204  |  |  | 
205  | 0  |             virtual String Ext() const { return "Sse41"; } | 
206  |  |         };  | 
207  |  |  | 
208  |  |         //-------------------------------------------------------------------------------------------------  | 
209  |  |  | 
210  |  |         void* SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias);  | 
211  |  |     }  | 
212  |  | #endif  | 
213  |  |  | 
214  |  | #ifdef SIMD_AVX2_ENABLE      | 
215  |  |     namespace Avx2  | 
216  |  |     { | 
217  |  |         class SynetInnerProduct16bGemmNN : public Sse41::SynetInnerProduct16bGemmNN  | 
218  |  |         { | 
219  |  |         public:  | 
220  |  |             SynetInnerProduct16bGemmNN(const InnerProductParam16b& p);  | 
221  |  |  | 
222  | 0  |             virtual String Ext() const { return "Avx2"; } | 
223  |  |         };  | 
224  |  |  | 
225  |  |         //-------------------------------------------------------------------------------------------------  | 
226  |  |  | 
227  |  |         void* SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias);  | 
228  |  |     }  | 
229  |  | #endif  | 
230  |  |  | 
231  |  | #ifdef SIMD_AVX512BW_ENABLE      | 
232  |  |     namespace Avx512bw  | 
233  |  |     { | 
234  |  |         class SynetInnerProduct16bGemmNN : public Avx2::SynetInnerProduct16bGemmNN  | 
235  |  |         { | 
236  |  |         public:  | 
237  |  |             SynetInnerProduct16bGemmNN(const InnerProductParam16b& p);  | 
238  |  |  | 
239  | 0  |             virtual String Ext() const { return "Avx512bw"; } | 
240  |  |         };  | 
241  |  |  | 
242  |  |         //-------------------------------------------------------------------------------------------------  | 
243  |  |  | 
244  |  |         void* SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias);  | 
245  |  |     }  | 
246  |  | #endif  | 
247  |  |  | 
248  |  | #if (defined(SIMD_AMXBF16_ENABLE) || (defined(SIMD_AVX512BW_ENABLE) && defined(SIMD_AMX_EMULATE)))     | 
249  |  |     namespace AmxBf16  | 
250  |  |     { | 
251  |  |         class SynetInnerProduct16bGemmNN : public Avx512bw::SynetInnerProduct16bGemmNN  | 
252  |  |         { | 
253  |  |         public:  | 
254  |  |             SynetInnerProduct16bGemmNN(const InnerProductParam16b& p);  | 
255  |  |  | 
256  |  |             virtual String Ext() const { return "AmxBf16"; } | 
257  |  |         };  | 
258  |  |  | 
259  |  |         //-------------------------------------------------------------------------------------------------  | 
260  |  |  | 
261  |  |         void* SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias);  | 
262  |  |     }  | 
263  |  | #endif  | 
264  |  | }  | 
265  |  |  | 
266  |  | #endif  |