Coverage Report

Created: 2025-12-10 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/Simd/src/Simd/SimdRuntime.h
Line
Count
Source
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2022 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#ifndef __SimdRuntime_h__
25
#define __SimdRuntime_h__
26
27
#include "Simd/SimdTime.h"
28
#include "Simd/SimdGemm.h"
29
30
#include <vector>
31
#include <limits>
32
#include <algorithm>
33
#include <string>
34
#ifdef SIMD_RUNTIME_STATISTIC
35
#include <sstream>
36
#include <iostream>
37
#include <iomanip>
38
#endif
39
40
namespace Simd
41
{
42
    typedef ::std::string String;
43
44
    template <class Func, class Args> struct Runtime
45
    {
46
        SIMD_INLINE Runtime()
47
0
            : _best(NULL)
48
0
        {
49
0
        }
Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Runtime()
Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Runtime()
Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Runtime()
50
51
        SIMD_INLINE ~Runtime()
52
0
        {
53
#ifdef SIMD_RUNTIME_STATISTIC
54
            if (!_info.empty())
55
            {
56
                std::sort(_candidates.begin(), _candidates.end(), [](const Candidate & a, const Candidate & b) { return a.Mean() < b.Mean(); });
57
                std::cout << "Simd::Runtime " << _info << " : ";
58
                int64_t f = TimeFrequency();
59
                for (size_t i = 0; i < _candidates.size(); ++i)
60
                {
61
                    int64_t t = _candidates[i].Mean();
62
                    std::cout << _candidates[i].func.Name() << ": " << t * 1000 / f << "." << (t * 1000000 / f) % 1000 << "  ";
63
                }
64
                std::cout << std::endl;
65
            }
66
#endif
67
0
        }
Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::~Runtime()
Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::~Runtime()
Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::~Runtime()
68
69
        SIMD_INLINE void Init(const Func & func)
70
        {
71
            _candidates.clear();
72
            _candidates.push_back(Candidate(func));
73
            _best = &_candidates[0].func;
74
        }
75
76
        SIMD_INLINE void Init(const std::vector<Func> & funcs)
77
0
        {
78
0
            assert(funcs.size() >= 1);
79
0
            _candidates.clear();
80
0
            for (size_t i = 0; i < funcs.size(); ++i)
81
0
                _candidates.push_back(Candidate(funcs[i]));
82
0
            _best = funcs.size() == 1 ? &_candidates[0].func : NULL;
83
0
        }
Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Init(std::__1::vector<Simd::GemmFunc, std::__1::allocator<Simd::GemmFunc> > const&)
Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Init(std::__1::vector<Simd::GemmCbFunc, std::__1::allocator<Simd::GemmCbFunc> > const&)
Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Init(std::__1::vector<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, std::__1::allocator<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc> > const&)
84
85
        SIMD_INLINE void Run(const Args & args)
86
0
        {
87
0
            if (_best)
88
0
                _best->Run(args);
89
0
            else
90
0
                Test(args);
91
0
        }
Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Run(Simd::GemmCbArgs const&)
Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Run(Simd::GemmArgs const&)
Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Run(Simd::Base::SynetConvolution32fNhwcDirect::RunArgs const&)
92
93
        SIMD_INLINE size_t Size() const
94
0
        {
95
0
            return _candidates.size();
96
0
        }
97
98
        SIMD_INLINE const Func & At(size_t index) const
99
0
        {
100
0
            return _candidates[index].func;
101
0
        }
Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::At(unsigned long) const
Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::At(unsigned long) const
102
103
    private:
104
        static const size_t TEST_COUNT = 3 + 2;
105
106
        struct Candidate
107
        {
108
            Func func;
109
            size_t count;
110
            int64_t sum, min, max;
111
112
            SIMD_INLINE Candidate(const Func & f)
113
0
                : func(f)
114
0
                , count(0)
115
0
                , sum(0)
116
0
                , min(std::numeric_limits<int64_t>::max())
117
0
                , max(0)
118
0
            {
119
0
            }
Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Candidate::Candidate(Simd::GemmFunc const&)
Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Candidate::Candidate(Simd::GemmCbFunc const&)
Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Candidate::Candidate(Simd::Base::SynetConvolution32fNhwcDirect::RunFunc const&)
120
121
            SIMD_INLINE void Update(int64_t value)
122
0
            {
123
0
                count += 1;
124
0
                sum += value;
125
0
                min = std::min(min, value);
126
0
                max = std::max(max, value);
127
0
            }
Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Candidate::Update(long)
Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Candidate::Update(long)
Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Candidate::Update(long)
128
129
            SIMD_INLINE int64_t Mean() const
130
0
            {
131
0
                if( count > 2)
132
0
                    return (sum - min - max) / (count - 2);
133
0
                else if (count > 0)
134
0
                    return sum / count;
135
0
                else
136
0
                    return sum;
137
0
            }
Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Candidate::Mean() const
Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Candidate::Mean() const
Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Candidate::Mean() const
138
        };
139
        typedef std::vector<Candidate> Candidates;
140
141
        Func * _best;
142
        Candidates _candidates;
143
        String _info;
144
145
        SIMD_INLINE void Test(const Args & args)
146
0
        {
147
0
            assert(_candidates.size());
148
0
            Candidate * current = Current();
149
0
            if (current)
150
0
            {
151
#ifdef SIMD_RUNTIME_STATISTIC
152
                if (_info.empty())
153
                    _info = current->func.Info(args);
154
#endif
155
0
                int64_t start = Simd::TimeCounter();
156
0
                current->func.Run(args);
157
0
                current->Update(Simd::TimeCounter() - start);
158
0
            }
159
0
            else
160
0
            {
161
0
                _best = &Best()->func;
162
0
                _best->Run(args);
163
0
            }
164
0
        }
Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Test(Simd::GemmCbArgs const&)
Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Test(Simd::GemmArgs const&)
Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Test(Simd::Base::SynetConvolution32fNhwcDirect::RunArgs const&)
165
166
        SIMD_INLINE Candidate * Current()
167
0
        {
168
0
            size_t min = TEST_COUNT;
169
0
            Candidate * current = NULL;
170
0
            for (size_t i = 0; i < _candidates.size(); ++i)
171
0
            {
172
0
                if (_candidates[i].count < min)
173
0
                {
174
0
                    min = _candidates[i].count;
175
0
                    current = &_candidates[i];
176
0
                }
177
0
            }
178
0
            return current;
179
0
        }
Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Current()
Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Current()
Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Current()
180
181
        SIMD_INLINE Candidate * Best()
182
0
        {
183
0
            Candidate * best = &_candidates[0];
184
0
            int64_t min = best->Mean();
185
0
            for (size_t i = 1; i < _candidates.size(); ++i)
186
0
            {
187
0
                int64_t mean = _candidates[i].Mean();
188
0
                if (mean < min)
189
0
                {
190
0
                    min = mean;
191
0
                    best = &_candidates[i];
192
0
                }
193
0
            }
194
0
            return best;
195
0
        }
Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Best()
Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Best()
Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Best()
196
    };
197
198
    //-------------------------------------------------------------------------
199
200
    struct GemmArgs
201
    {
202
        size_t M; size_t N; size_t K; const float * alpha; const float * A; size_t lda; const float * B; size_t ldb; const float * beta; float * C; size_t ldc;
203
        SIMD_INLINE GemmArgs(size_t M_, size_t N_, size_t K_, const float * alpha_, const float * A_, size_t lda_, const float * B_, size_t ldb_, const float * beta_, float * C_, size_t ldc_)
204
0
            :M(M_), N(N_), K(K_), alpha(alpha_), A(A_), lda(lda_), B(B_), ldb(ldb_), beta(beta_), ldc(ldc_), C(C_) 
205
0
        {}
206
    };
207
208
    struct GemmFunc
209
    {
210
        typedef void(*Func)(size_t M, size_t N, size_t K, const float* alpha, const float* A, size_t lda, const float* B, size_t ldb, const float* beta, float* C, size_t ldc);
211
212
        SIMD_INLINE GemmFunc(const Func & func, const String & name)
213
0
            : _func(func)
214
0
            , _name(name)
215
0
        {
216
0
        }
217
218
0
        SIMD_INLINE String Name() const { return _name; }
219
220
        SIMD_INLINE void Run(const GemmArgs & args)
221
0
        {
222
0
            _func(args.M, args.N, args.K, args.alpha, args.A, args.lda, args.B, args.ldb, args.beta, args.C, args.ldc);
223
0
        }
224
225
#ifdef SIMD_RUNTIME_STATISTIC
226
        SIMD_INLINE String Info(const GemmArgs & args) const
227
        {
228
            std::stringstream ss;
229
            ss << "Gemm [" << args.M << ", " << args.N << ", " << args.K << "]";
230
            return ss.str();
231
        }
232
#endif
233
234
    private:
235
        Func _func;
236
        String _name;
237
    };
238
    typedef std::vector<GemmFunc> GemmFuncs;
239
240
    SIMD_INLINE GemmFuncs InitGemmFuncs(const GemmFunc::Func & func1, const String & name1, const GemmFunc::Func & func2 = NULL, const String & name2 = String())
241
0
    {
242
0
        GemmFuncs funcs;
243
0
        funcs.push_back(GemmFunc(func1, name1));
244
0
        if (func2)
245
0
            funcs.push_back(GemmFunc(func2, name2));
246
0
        return funcs;
247
0
    }
248
249
    typedef Runtime<GemmFunc, GemmArgs> RuntimeGemm;
250
251
    //-------------------------------------------------------------------------
252
253
    struct GemmCbArgs
254
    {
255
        size_t M; size_t N; size_t K; const float * A; const float * pB; float * C;
256
        SIMD_INLINE GemmCbArgs(size_t M_, size_t N_, size_t K_, const float * A_, const float * pB_, float * C_)
257
0
            :M(M_), N(N_), K(K_), A(A_), pB(pB_), C(C_)
258
0
        {}
259
    };
260
261
    struct GemmCbFunc
262
    {
263
        typedef size_t (*BufferSizePtr)(size_t M, size_t N, size_t K, GemmKernelType type, bool compatibility);
264
        typedef void (*ReorderBPtr)(size_t M, size_t N, size_t K, const float * B, float * pB, GemmKernelType type, bool compatibility);
265
        typedef void (*RunPtr)(size_t M, size_t N, size_t K, const float * A, const float * pB, float * C, GemmKernelType type, bool compatibility);
266
267
        SIMD_INLINE GemmCbFunc(BufferSizePtr bufferSize, ReorderBPtr reorderB, RunPtr run, GemmKernelType type, const String & name)
268
0
            : _bufferSize(bufferSize)
269
0
            , _reorderB(reorderB)
270
0
            , _run(run)
271
0
            , _type(type)
272
0
            , _name(name)
273
0
        {
274
0
        }
275
276
0
        SIMD_INLINE String Name() const { return _name; }
277
278
        SIMD_INLINE void Run(const GemmCbArgs & args)
279
0
        {
280
0
            _run(args.M, args.N, args.K, args.A, args.pB, args.C, _type, _type != GemmKernelAny);
281
0
        }
282
283
#ifdef SIMD_RUNTIME_STATISTIC
284
        SIMD_INLINE String Info(const GemmCbArgs & args) const
285
        {
286
            std::stringstream ss;
287
            ss << "GemmCb [" << args.M << ", " << args.N << ", " << args.K << "]";
288
            return ss.str();
289
        }
290
#endif 
291
        
292
0
        SIMD_INLINE GemmKernelType Type() const { return _type; }
293
294
        SIMD_INLINE size_t BufferSize(size_t M, size_t N, size_t K) const
295
0
        {
296
0
            return _bufferSize(M, N, K, _type, _type != GemmKernelAny);
297
0
        }
298
299
        SIMD_INLINE void ReorderB(size_t M, size_t N, size_t K, const float * B, float * pB) const
300
0
        {
301
0
            _reorderB(M, N, K, B, pB, _type, _type != GemmKernelAny);
302
0
        }
303
304
    private:
305
        BufferSizePtr _bufferSize;
306
        ReorderBPtr _reorderB;
307
        RunPtr _run;
308
        GemmKernelType _type;
309
        String _name;
310
    };
311
    typedef std::vector<GemmCbFunc> GemmCbFuncs;
312
313
    SIMD_INLINE GemmCbFuncs InitGemmCbFuncs(GemmCbFunc::BufferSizePtr bufferSize, GemmCbFunc::ReorderBPtr reorderB, GemmCbFunc::RunPtr run, 
314
        const String & name, GemmKernelType begin, GemmKernelType end)
315
0
    {
316
0
        GemmCbFuncs funcs;
317
0
        for (int i = (int)begin, n = (int)end; i <= n; ++i)
318
0
            funcs.push_back(GemmCbFunc(bufferSize, reorderB, run, GemmKernelType(i), name + "-" + ToStr(i)));
319
0
        return funcs;
320
0
    }
321
322
    typedef Runtime<GemmCbFunc, GemmCbArgs> RuntimeGemmCb;
323
}
324
325
#endif//__SimdRuntime_h__