/src/Simd/src/Simd/SimdRuntime.h
Line | Count | Source |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2022 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #ifndef __SimdRuntime_h__ |
25 | | #define __SimdRuntime_h__ |
26 | | |
27 | | #include "Simd/SimdTime.h" |
28 | | #include "Simd/SimdGemm.h" |
29 | | |
30 | | #include <vector> |
31 | | #include <limits> |
32 | | #include <algorithm> |
33 | | #include <string> |
34 | | #ifdef SIMD_RUNTIME_STATISTIC |
35 | | #include <sstream> |
36 | | #include <iostream> |
37 | | #include <iomanip> |
38 | | #endif |
39 | | |
40 | | namespace Simd |
41 | | { |
42 | | typedef ::std::string String; |
43 | | |
44 | | template <class Func, class Args> struct Runtime |
45 | | { |
46 | | SIMD_INLINE Runtime() |
47 | 0 | : _best(NULL) |
48 | 0 | { |
49 | 0 | } Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Runtime() Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Runtime() Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Runtime() |
50 | | |
51 | | SIMD_INLINE ~Runtime() |
52 | 0 | { |
53 | | #ifdef SIMD_RUNTIME_STATISTIC |
54 | | if (!_info.empty()) |
55 | | { |
56 | | std::sort(_candidates.begin(), _candidates.end(), [](const Candidate & a, const Candidate & b) { return a.Mean() < b.Mean(); }); |
57 | | std::cout << "Simd::Runtime " << _info << " : "; |
58 | | int64_t f = TimeFrequency(); |
59 | | for (size_t i = 0; i < _candidates.size(); ++i) |
60 | | { |
61 | | int64_t t = _candidates[i].Mean(); |
62 | | std::cout << _candidates[i].func.Name() << ": " << t * 1000 / f << "." << (t * 1000000 / f) % 1000 << " "; |
63 | | } |
64 | | std::cout << std::endl; |
65 | | } |
66 | | #endif |
67 | 0 | } Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::~Runtime() Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::~Runtime() Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::~Runtime() |
68 | | |
69 | | SIMD_INLINE void Init(const Func & func) |
70 | | { |
71 | | _candidates.clear(); |
72 | | _candidates.push_back(Candidate(func)); |
73 | | _best = &_candidates[0].func; |
74 | | } |
75 | | |
76 | | SIMD_INLINE void Init(const std::vector<Func> & funcs) |
77 | 0 | { |
78 | 0 | assert(funcs.size() >= 1); |
79 | 0 | _candidates.clear(); |
80 | 0 | for (size_t i = 0; i < funcs.size(); ++i) |
81 | 0 | _candidates.push_back(Candidate(funcs[i])); |
82 | 0 | _best = funcs.size() == 1 ? &_candidates[0].func : NULL; |
83 | 0 | } Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Init(std::__1::vector<Simd::GemmFunc, std::__1::allocator<Simd::GemmFunc> > const&) Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Init(std::__1::vector<Simd::GemmCbFunc, std::__1::allocator<Simd::GemmCbFunc> > const&) Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Init(std::__1::vector<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, std::__1::allocator<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc> > const&) |
84 | | |
85 | | SIMD_INLINE void Run(const Args & args) |
86 | 0 | { |
87 | 0 | if (_best) |
88 | 0 | _best->Run(args); |
89 | 0 | else |
90 | 0 | Test(args); |
91 | 0 | } Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Run(Simd::GemmCbArgs const&) Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Run(Simd::GemmArgs const&) Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Run(Simd::Base::SynetConvolution32fNhwcDirect::RunArgs const&) |
92 | | |
93 | | SIMD_INLINE size_t Size() const |
94 | 0 | { |
95 | 0 | return _candidates.size(); |
96 | 0 | } |
97 | | |
98 | | SIMD_INLINE const Func & At(size_t index) const |
99 | 0 | { |
100 | 0 | return _candidates[index].func; |
101 | 0 | } Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::At(unsigned long) const Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::At(unsigned long) const |
102 | | |
103 | | private: |
104 | | static const size_t TEST_COUNT = 3 + 2; |
105 | | |
106 | | struct Candidate |
107 | | { |
108 | | Func func; |
109 | | size_t count; |
110 | | int64_t sum, min, max; |
111 | | |
112 | | SIMD_INLINE Candidate(const Func & f) |
113 | 0 | : func(f) |
114 | 0 | , count(0) |
115 | 0 | , sum(0) |
116 | 0 | , min(std::numeric_limits<int64_t>::max()) |
117 | 0 | , max(0) |
118 | 0 | { |
119 | 0 | } Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Candidate::Candidate(Simd::GemmFunc const&) Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Candidate::Candidate(Simd::GemmCbFunc const&) Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Candidate::Candidate(Simd::Base::SynetConvolution32fNhwcDirect::RunFunc const&) |
120 | | |
121 | | SIMD_INLINE void Update(int64_t value) |
122 | 0 | { |
123 | 0 | count += 1; |
124 | 0 | sum += value; |
125 | 0 | min = std::min(min, value); |
126 | 0 | max = std::max(max, value); |
127 | 0 | } Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Candidate::Update(long) Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Candidate::Update(long) Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Candidate::Update(long) |
128 | | |
129 | | SIMD_INLINE int64_t Mean() const |
130 | 0 | { |
131 | 0 | if( count > 2) |
132 | 0 | return (sum - min - max) / (count - 2); |
133 | 0 | else if (count > 0) |
134 | 0 | return sum / count; |
135 | 0 | else |
136 | 0 | return sum; |
137 | 0 | } Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Candidate::Mean() const Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Candidate::Mean() const Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Candidate::Mean() const |
138 | | }; |
139 | | typedef std::vector<Candidate> Candidates; |
140 | | |
141 | | Func * _best; |
142 | | Candidates _candidates; |
143 | | String _info; |
144 | | |
145 | | SIMD_INLINE void Test(const Args & args) |
146 | 0 | { |
147 | 0 | assert(_candidates.size()); |
148 | 0 | Candidate * current = Current(); |
149 | 0 | if (current) |
150 | 0 | { |
151 | | #ifdef SIMD_RUNTIME_STATISTIC |
152 | | if (_info.empty()) |
153 | | _info = current->func.Info(args); |
154 | | #endif |
155 | 0 | int64_t start = Simd::TimeCounter(); |
156 | 0 | current->func.Run(args); |
157 | 0 | current->Update(Simd::TimeCounter() - start); |
158 | 0 | } |
159 | 0 | else |
160 | 0 | { |
161 | 0 | _best = &Best()->func; |
162 | 0 | _best->Run(args); |
163 | 0 | } |
164 | 0 | } Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Test(Simd::GemmCbArgs const&) Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Test(Simd::GemmArgs const&) Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Test(Simd::Base::SynetConvolution32fNhwcDirect::RunArgs const&) |
165 | | |
166 | | SIMD_INLINE Candidate * Current() |
167 | 0 | { |
168 | 0 | size_t min = TEST_COUNT; |
169 | 0 | Candidate * current = NULL; |
170 | 0 | for (size_t i = 0; i < _candidates.size(); ++i) |
171 | 0 | { |
172 | 0 | if (_candidates[i].count < min) |
173 | 0 | { |
174 | 0 | min = _candidates[i].count; |
175 | 0 | current = &_candidates[i]; |
176 | 0 | } |
177 | 0 | } |
178 | 0 | return current; |
179 | 0 | } Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Current() Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Current() Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Current() |
180 | | |
181 | | SIMD_INLINE Candidate * Best() |
182 | 0 | { |
183 | 0 | Candidate * best = &_candidates[0]; |
184 | 0 | int64_t min = best->Mean(); |
185 | 0 | for (size_t i = 1; i < _candidates.size(); ++i) |
186 | 0 | { |
187 | 0 | int64_t mean = _candidates[i].Mean(); |
188 | 0 | if (mean < min) |
189 | 0 | { |
190 | 0 | min = mean; |
191 | 0 | best = &_candidates[i]; |
192 | 0 | } |
193 | 0 | } |
194 | 0 | return best; |
195 | 0 | } Unexecuted instantiation: Simd::Runtime<Simd::GemmCbFunc, Simd::GemmCbArgs>::Best() Unexecuted instantiation: Simd::Runtime<Simd::GemmFunc, Simd::GemmArgs>::Best() Unexecuted instantiation: Simd::Runtime<Simd::Base::SynetConvolution32fNhwcDirect::RunFunc, Simd::Base::SynetConvolution32fNhwcDirect::RunArgs>::Best() |
196 | | }; |
197 | | |
198 | | //------------------------------------------------------------------------- |
199 | | |
200 | | struct GemmArgs |
201 | | { |
202 | | size_t M; size_t N; size_t K; const float * alpha; const float * A; size_t lda; const float * B; size_t ldb; const float * beta; float * C; size_t ldc; |
203 | | SIMD_INLINE GemmArgs(size_t M_, size_t N_, size_t K_, const float * alpha_, const float * A_, size_t lda_, const float * B_, size_t ldb_, const float * beta_, float * C_, size_t ldc_) |
204 | 0 | :M(M_), N(N_), K(K_), alpha(alpha_), A(A_), lda(lda_), B(B_), ldb(ldb_), beta(beta_), ldc(ldc_), C(C_) |
205 | 0 | {} |
206 | | }; |
207 | | |
208 | | struct GemmFunc |
209 | | { |
210 | | typedef void(*Func)(size_t M, size_t N, size_t K, const float* alpha, const float* A, size_t lda, const float* B, size_t ldb, const float* beta, float* C, size_t ldc); |
211 | | |
212 | | SIMD_INLINE GemmFunc(const Func & func, const String & name) |
213 | 0 | : _func(func) |
214 | 0 | , _name(name) |
215 | 0 | { |
216 | 0 | } |
217 | | |
218 | 0 | SIMD_INLINE String Name() const { return _name; } |
219 | | |
220 | | SIMD_INLINE void Run(const GemmArgs & args) |
221 | 0 | { |
222 | 0 | _func(args.M, args.N, args.K, args.alpha, args.A, args.lda, args.B, args.ldb, args.beta, args.C, args.ldc); |
223 | 0 | } |
224 | | |
225 | | #ifdef SIMD_RUNTIME_STATISTIC |
226 | | SIMD_INLINE String Info(const GemmArgs & args) const |
227 | | { |
228 | | std::stringstream ss; |
229 | | ss << "Gemm [" << args.M << ", " << args.N << ", " << args.K << "]"; |
230 | | return ss.str(); |
231 | | } |
232 | | #endif |
233 | | |
234 | | private: |
235 | | Func _func; |
236 | | String _name; |
237 | | }; |
238 | | typedef std::vector<GemmFunc> GemmFuncs; |
239 | | |
240 | | SIMD_INLINE GemmFuncs InitGemmFuncs(const GemmFunc::Func & func1, const String & name1, const GemmFunc::Func & func2 = NULL, const String & name2 = String()) |
241 | 0 | { |
242 | 0 | GemmFuncs funcs; |
243 | 0 | funcs.push_back(GemmFunc(func1, name1)); |
244 | 0 | if (func2) |
245 | 0 | funcs.push_back(GemmFunc(func2, name2)); |
246 | 0 | return funcs; |
247 | 0 | } |
248 | | |
249 | | typedef Runtime<GemmFunc, GemmArgs> RuntimeGemm; |
250 | | |
251 | | //------------------------------------------------------------------------- |
252 | | |
253 | | struct GemmCbArgs |
254 | | { |
255 | | size_t M; size_t N; size_t K; const float * A; const float * pB; float * C; |
256 | | SIMD_INLINE GemmCbArgs(size_t M_, size_t N_, size_t K_, const float * A_, const float * pB_, float * C_) |
257 | 0 | :M(M_), N(N_), K(K_), A(A_), pB(pB_), C(C_) |
258 | 0 | {} |
259 | | }; |
260 | | |
261 | | struct GemmCbFunc |
262 | | { |
263 | | typedef size_t (*BufferSizePtr)(size_t M, size_t N, size_t K, GemmKernelType type, bool compatibility); |
264 | | typedef void (*ReorderBPtr)(size_t M, size_t N, size_t K, const float * B, float * pB, GemmKernelType type, bool compatibility); |
265 | | typedef void (*RunPtr)(size_t M, size_t N, size_t K, const float * A, const float * pB, float * C, GemmKernelType type, bool compatibility); |
266 | | |
267 | | SIMD_INLINE GemmCbFunc(BufferSizePtr bufferSize, ReorderBPtr reorderB, RunPtr run, GemmKernelType type, const String & name) |
268 | 0 | : _bufferSize(bufferSize) |
269 | 0 | , _reorderB(reorderB) |
270 | 0 | , _run(run) |
271 | 0 | , _type(type) |
272 | 0 | , _name(name) |
273 | 0 | { |
274 | 0 | } |
275 | | |
276 | 0 | SIMD_INLINE String Name() const { return _name; } |
277 | | |
278 | | SIMD_INLINE void Run(const GemmCbArgs & args) |
279 | 0 | { |
280 | 0 | _run(args.M, args.N, args.K, args.A, args.pB, args.C, _type, _type != GemmKernelAny); |
281 | 0 | } |
282 | | |
283 | | #ifdef SIMD_RUNTIME_STATISTIC |
284 | | SIMD_INLINE String Info(const GemmCbArgs & args) const |
285 | | { |
286 | | std::stringstream ss; |
287 | | ss << "GemmCb [" << args.M << ", " << args.N << ", " << args.K << "]"; |
288 | | return ss.str(); |
289 | | } |
290 | | #endif |
291 | | |
292 | 0 | SIMD_INLINE GemmKernelType Type() const { return _type; } |
293 | | |
294 | | SIMD_INLINE size_t BufferSize(size_t M, size_t N, size_t K) const |
295 | 0 | { |
296 | 0 | return _bufferSize(M, N, K, _type, _type != GemmKernelAny); |
297 | 0 | } |
298 | | |
299 | | SIMD_INLINE void ReorderB(size_t M, size_t N, size_t K, const float * B, float * pB) const |
300 | 0 | { |
301 | 0 | _reorderB(M, N, K, B, pB, _type, _type != GemmKernelAny); |
302 | 0 | } |
303 | | |
304 | | private: |
305 | | BufferSizePtr _bufferSize; |
306 | | ReorderBPtr _reorderB; |
307 | | RunPtr _run; |
308 | | GemmKernelType _type; |
309 | | String _name; |
310 | | }; |
311 | | typedef std::vector<GemmCbFunc> GemmCbFuncs; |
312 | | |
313 | | SIMD_INLINE GemmCbFuncs InitGemmCbFuncs(GemmCbFunc::BufferSizePtr bufferSize, GemmCbFunc::ReorderBPtr reorderB, GemmCbFunc::RunPtr run, |
314 | | const String & name, GemmKernelType begin, GemmKernelType end) |
315 | 0 | { |
316 | 0 | GemmCbFuncs funcs; |
317 | 0 | for (int i = (int)begin, n = (int)end; i <= n; ++i) |
318 | 0 | funcs.push_back(GemmCbFunc(bufferSize, reorderB, run, GemmKernelType(i), name + "-" + ToStr(i))); |
319 | 0 | return funcs; |
320 | 0 | } |
321 | | |
322 | | typedef Runtime<GemmCbFunc, GemmCbArgs> RuntimeGemmCb; |
323 | | } |
324 | | |
325 | | #endif//__SimdRuntime_h__ |