Coverage Report

Created: 2024-10-01 06:54

/src/Simd/src/Simd/SimdSynetInnerProduct16b.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2024 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#ifndef __SimdSynetInnerProduct16b_h__
25
#define __SimdSynetInnerProduct16b_h__
26
27
#include "Simd/SimdArray.h"
28
#include "Simd/SimdPerformance.h"
29
#include "Simd/SimdSynetConvParam.h"
30
31
namespace Simd
32
{
33
    struct InnerProductParam16b
34
    {
35
        size_t M, N, K;
36
        SimdTensorDataType typeA, typeB, typeC;
37
        SimdBool transB, constB, bias;
38
39
        InnerProductParam16b(size_t m, size_t n, size_t k,
40
            SimdTensorDataType ta, SimdTensorDataType tb, SimdTensorDataType tc,
41
            SimdBool t, SimdBool c, SimdBool b)
42
            : M(m), N(n), K(k)
43
            , typeA(ta), typeB(tb), typeC(tc)
44
            , transB(t), constB(c), bias(b)
45
0
        {
46
0
        }
47
48
        bool Valid()
49
0
        {
50
0
            return
51
0
                (typeA == SimdTensorData32f || typeA == SimdTensorData16b) &&
52
0
                (typeB == SimdTensorData32f || typeB == SimdTensorData16b) &&
53
0
                (typeC == SimdTensorData32f || typeC == SimdTensorData16b);
54
0
        }
55
56
        String Info() const
57
0
        {
58
0
            std::stringstream ss;
59
0
            ss << M << "x" << N << "x" << K << "-";
60
0
            ss << ToChar(typeA) << ToChar(typeB) << ToChar(typeC) << "-";
61
0
            ss << (transB ? "t" : "n") << (constB ? "1" : "2") << (bias ? "b" : "o");
62
0
            return ss.str();
63
0
        }
64
65
        int64_t Flop() const
66
0
        {
67
0
            return int64_t(M) * N * K * 2;
68
0
        }
69
    };
70
71
    //-------------------------------------------------------------------------------------------------
72
73
    class SynetInnerProduct16b : public Deletable
74
    {
75
    public:
76
        SynetInnerProduct16b(const InnerProductParam16b& p)
77
            : _param(p)
78
#if defined(SIMD_PERFORMANCE_STATISTIC) && (defined(NDEBUG) || defined(SIMD_PERF_STAT_IN_DEBUG))
79
            , _perf(NULL)
80
#endif
81
            , _sizeA(0)
82
            , _sizeB(0)
83
            , _sizeC(0)
84
0
        {
85
0
        }
86
87
        const InnerProductParam16b& Param() const
88
0
        {
89
0
            return _param;
90
0
        }
91
92
        virtual size_t InternalBufferSize() const
93
0
        {
94
0
            return _buffer.RawSize() + _weight.RawSize() + _bias.RawSize();
95
0
        }
96
97
        virtual size_t ExternalBufferSize() const
98
0
        {
99
0
            return _sizeA * 2 + _sizeB * 2 + _sizeC * 4;
100
0
        }
101
102
        virtual String Ext() const = 0;
103
        virtual String Desc() const = 0;
104
105
        virtual void SetParams(const float* weight, const float* bias) = 0;
106
        virtual void Forward(const uint8_t* A, const uint8_t* B, uint8_t* buf, uint8_t* C) = 0;
107
108
#if defined(SIMD_PERFORMANCE_STATISTIC) && (defined(NDEBUG) || defined(SIMD_PERF_STAT_IN_DEBUG))
109
        Base::PerformanceMeasurer* Perf(const char* func)
110
        {
111
            if (_perf == NULL)
112
                _perf = Simd::Base::PerformanceMeasurerStorage::s_storage.Get(func, Param().Info() + " " + Desc(), Param().Flop());
113
            return _perf;
114
        }
115
#endif
116
117
        const char* Info() const
118
0
        {
119
0
            _info = Desc();
120
0
            return _info.c_str();
121
0
        }
122
123
    protected:
124
        InnerProductParam16b _param;
125
#if defined(SIMD_PERFORMANCE_STATISTIC) && (defined(NDEBUG) || defined(SIMD_PERF_STAT_IN_DEBUG))
126
        Base::PerformanceMeasurer* _perf;
127
#endif
128
        Array8u _buffer;
129
        Array16u _weight;
130
        Array32f _bias;
131
        mutable String _info;
132
        size_t _sizeA, _sizeB, _sizeC;
133
134
        uint8_t* Buffer(uint8_t* buffer)
135
0
        {
136
0
            if (buffer)
137
0
                return buffer;
138
0
            else
139
0
            {
140
0
                _buffer.Resize(ExternalBufferSize());
141
0
                return _buffer.data;
142
0
            }
143
0
        }
144
    };
145
146
    //-------------------------------------------------------------------------------------------------
147
148
    namespace Base
149
    {
150
        class SynetInnerProduct16bRef : public SynetInnerProduct16b
151
        {
152
        public:
153
            SynetInnerProduct16bRef(const InnerProductParam16b& p);
154
0
            virtual String Ext() const { return "Base"; }
155
            virtual String Desc() const;
156
            virtual void SetParams(const float* weight, const float* bias);
157
            virtual void Forward(const uint8_t* A, const uint8_t* B, uint8_t* buf, uint8_t* C);
158
159
        protected:
160
            void GemmAndBias(const uint16_t* A, const uint16_t* B, float* C);
161
        };
162
163
        class SynetInnerProduct16bGemmNN : public SynetInnerProduct16b
164
        {
165
        public:
166
            SynetInnerProduct16bGemmNN(const InnerProductParam16b& p);
167
0
            virtual String Ext() const { return "Base"; }
168
            virtual String Desc() const;
169
            virtual void SetParams(const float* weight, const float* bias);
170
            virtual void Forward(const uint8_t* A, const uint8_t* B, uint8_t* buf, uint8_t* C);
171
172
            static bool Preferable(const InnerProductParam16b& p);
173
174
            struct AlgParam
175
            {
176
                size_t F, microM, microN, microK;
177
                size_t macroM, macroN, macroK;
178
                size_t aM, aN, aK, eA, eB, eC, bK, cN;
179
            };
180
181
            typedef void(*PrepPtr)(const uint8_t* src, const InnerProductParam16b& p, const AlgParam& a, size_t size, size_t K, uint16_t* dst);
182
            typedef void(*GemmPtr)(const uint16_t* A, const InnerProductParam16b& p, const AlgParam& a, size_t M, size_t N, size_t K, int update, const uint16_t* B, float* C, int post, const float* bias, uint8_t* dst);
183
184
        protected:
185
            void SetAlgParam(size_t F, size_t microM, size_t microN, size_t microK, size_t L1, size_t L2, size_t L3);
186
187
            AlgParam _alg;
188
            PrepPtr _prepA, _prepB;
189
            GemmPtr _gemm;
190
        };
191
192
        //-------------------------------------------------------------------------------------------------
193
194
        void* SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias);
195
    }
196
197
#ifdef SIMD_SSE41_ENABLE    
198
    namespace Sse41
199
    {
200
        class SynetInnerProduct16bGemmNN : public Base::SynetInnerProduct16bGemmNN
201
        {
202
        public:
203
            SynetInnerProduct16bGemmNN(const InnerProductParam16b& p);
204
205
0
            virtual String Ext() const { return "Sse41"; }
206
        };
207
208
        //-------------------------------------------------------------------------------------------------
209
210
        void* SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias);
211
    }
212
#endif
213
214
#ifdef SIMD_AVX2_ENABLE    
215
    namespace Avx2
216
    {
217
        class SynetInnerProduct16bGemmNN : public Sse41::SynetInnerProduct16bGemmNN
218
        {
219
        public:
220
            SynetInnerProduct16bGemmNN(const InnerProductParam16b& p);
221
222
0
            virtual String Ext() const { return "Avx2"; }
223
        };
224
225
        //-------------------------------------------------------------------------------------------------
226
227
        void* SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias);
228
    }
229
#endif
230
231
#ifdef SIMD_AVX512BW_ENABLE    
232
    namespace Avx512bw
233
    {
234
        class SynetInnerProduct16bGemmNN : public Avx2::SynetInnerProduct16bGemmNN
235
        {
236
        public:
237
            SynetInnerProduct16bGemmNN(const InnerProductParam16b& p);
238
239
0
            virtual String Ext() const { return "Avx512bw"; }
240
        };
241
242
        //-------------------------------------------------------------------------------------------------
243
244
        void* SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias);
245
    }
246
#endif
247
248
#if (defined(SIMD_AMXBF16_ENABLE) || (defined(SIMD_AVX512BW_ENABLE) && defined(SIMD_AMX_EMULATE)))   
249
    namespace AmxBf16
250
    {
251
        class SynetInnerProduct16bGemmNN : public Avx512bw::SynetInnerProduct16bGemmNN
252
        {
253
        public:
254
            SynetInnerProduct16bGemmNN(const InnerProductParam16b& p);
255
256
            virtual String Ext() const { return "AmxBf16"; }
257
        };
258
259
        //-------------------------------------------------------------------------------------------------
260
261
        void* SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias);
262
    }
263
#endif
264
}
265
266
#endif