Coverage Report

Created: 2025-12-31 07:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/Simd/src/Simd/SimdAvx512bwSynetQuantizedConvolutionNhwcGemm.cpp
Line
Count
Source
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2025 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdSynetQuantizedConvolution.h"
25
#include "Simd/SimdSynetQuantizedActivation.h"
26
#include "Simd/SimdSynetQuantizeLinear.h"
27
#include "Simd/SimdSynetConvolution8iCommon.h"
28
#include "Simd/SimdSynet.h"
29
#include "Simd/SimdMath.h"
30
#include "Simd/SimdBase.h"
31
#include "Simd/SimdCpu.h"
32
#include "Simd/SimdLog.h"
33
#include "Simd/SimdSet.h"
34
#include "Simd/SimdCopy.h"
35
36
namespace Simd
37
{
38
#if defined(SIMD_AVX512BW_ENABLE) && defined(SIMD_SYNET_ENABLE)   
39
    namespace Avx512bw
40
    {
41
        typedef Base::SynetQuantizedConvolutionNhwcGemm::AlgParam AlgParam;
42
        typedef Base::SynetQuantizedConvolutionNhwcGemm::ConvolutionPtr Convolution;
43
44
        //-----------------------------------------------------------------------------------------
45
46
        static void QuantizedConvolutionNhwcGemmReorder(const uint8_t* src, uint8_t zero, const ConvParam& p, const AlgParam& a, size_t yBeg, size_t yEnd, uint8_t* dst)
47
0
        {
48
0
            size_t C = p.srcC, C64 = AlignLo(C, 64), K = a.bufK, kcX = p.kernelX * C;
49
0
            __mmask64 gM = TailMask64(K - a.K), cM= TailMask64(C - C64);
50
0
            __m512i _zero = _mm512_set1_epi8(zero);
51
0
            for (size_t dy = yBeg; dy < yEnd; ++dy)
52
0
            {
53
0
                for (size_t dx = 0; dx < p.dstW; ++dx, dst += K)
54
0
                {
55
0
                    uint8_t* pd = dst;
56
0
                    for (size_t ky = 0, k = 0; ky < p.kernelY; ky++)
57
0
                    {
58
0
                        size_t sy = dy * p.strideY + ky * p.dilationY - p.padY;
59
0
                        if (sy < p.srcH)
60
0
                        {
61
0
                            for (size_t kx = 0; kx < p.kernelX; kx++)
62
0
                            {
63
0
                                size_t sx = dx * p.strideX + kx * p.dilationX - p.padX;
64
0
                                if (sx < p.srcW)
65
0
                                    Copy(src + (sy * p.srcW + sx) * C, C64, cM, pd);
66
0
                                else
67
0
                                    SetZeros(pd, _zero, C64, cM);
68
0
                                pd += C;
69
0
                            }
70
0
                        }
71
0
                        else
72
0
                        {
73
0
                            SetZeros(pd, _zero, kcX);
74
0
                            pd += kcX;
75
0
                        }
76
0
                    }
77
0
                    SetZero(pd, _mm512_setzero_si512(), gM);
78
0
                }
79
0
            }
80
0
        }
81
82
        static void QuantizedConvolutionNhwcGemmReorder1d(const uint8_t* src, uint8_t zero, const ConvParam& p, const AlgParam& a, size_t yBeg, size_t yEnd, uint8_t* dst)
83
0
        {
84
            //SIMD_PERF_BEG(ToStr(p.srcC));
85
0
            assert(p.IsDilation(1));
86
0
            size_t C = p.srcC, C64 = AlignLo(C, 64), K = a.bufK, kC = p.kernelX * C, kC64 = AlignLo(kC, 64), sX = p.strideX, cW = p.srcW * C, kY = p.kernelY, scX = sX * C;
87
0
            size_t dyB = DivHi(p.padY, p.strideY), dyE = p.dstH - DivHi(p.padH, p.strideY), dxB = DivHi(p.padX, p.strideX), dxE = p.dstW - DivHi(p.padW, p.strideX);
88
0
            __mmask64 gM = TailMask64(K - a.K), cM = TailMask64(C - C64), kcM = TailMask64(kC - kC64);
89
0
            __m512i _zero = _mm512_set1_epi8(zero);
90
0
            for (size_t dy = yBeg; dy < yEnd; ++dy)
91
0
            {
92
0
                size_t dx = 0;
93
0
                for (; dx < dxB; ++dx, dst += K)
94
0
                {
95
0
                    uint8_t* pd = dst;
96
0
                    ptrdiff_t sxcB = (dx * sX - p.padX) * C, sxcE = sxcB + kC;
97
0
                    for (size_t ky = 0, k = 0; ky < kY; ky++)
98
0
                    {
99
0
                        size_t sy = dy * p.strideY + ky - p.padY;
100
0
                        if (sy < p.srcH)
101
0
                        {
102
0
                            for (ptrdiff_t sxc = sxcB; sxc < sxcE; sxc += C, pd += C)
103
0
                            {
104
0
                                if ((size_t)sxc < cW)
105
0
                                    Copy(src + sy * cW + sxc, C64, cM, pd);
106
0
                                else
107
0
                                    SetZeros(pd, _zero, C64, cM);
108
0
                            }
109
0
                        }
110
0
                        else
111
0
                        {
112
0
                            SetZeros(pd, _zero, kC64, kcM);
113
0
                            pd += kC;
114
0
                        }
115
0
                    }
116
0
                    SetZero(pd, _mm512_setzero_si512(), gM);
117
0
                }
118
0
                if (dy >= dyB && dy < dyE)
119
0
                {
120
0
                    const uint8_t* ps = src + (dy * p.strideY - p.padY) * cW + (dx * sX - p.padX) * C;
121
0
                    for (; dx < dxE; ++dx, dst += K, ps += scX)
122
0
                    {
123
0
                        uint8_t* pd = dst;
124
0
                        for (size_t ky = 0; ky < kY; ky++, pd += kC)
125
0
                            Copy(ps + ky * cW, kC64, kcM, pd);
126
0
                        SetZero(pd, _mm512_setzero_si512(), gM);
127
0
                    }
128
0
                }
129
0
                else
130
0
                {
131
0
                    for (; dx < dxE; ++dx, dst += K)
132
0
                    {
133
0
                        uint8_t* pd = dst;
134
0
                        ptrdiff_t sxcB = (dx * sX - p.padX) * C;
135
0
                        for (size_t ky = 0; ky < kY; ky++)
136
0
                        {
137
0
                            size_t sy = dy * p.strideY + ky - p.padY;
138
0
                            if (sy < p.srcH)
139
0
                                Copy(src + sy * cW + sxcB, kC64, kcM, pd);
140
0
                            else
141
0
                                SetZeros(pd, _zero, kC64, kcM);
142
0
                            pd += kC;
143
0
                        }
144
0
                        SetZero(pd, _mm512_setzero_si512(), gM);
145
0
                    }
146
0
                }
147
0
                for (; dx < p.dstW; ++dx, dst += K)
148
0
                {
149
0
                    uint8_t* pd = dst;
150
0
                    ptrdiff_t sxcB = (dx * sX - p.padX) * C, sxcE = sxcB + kC;
151
0
                    for (size_t ky = 0, k = 0; ky < kY; ky++)
152
0
                    {
153
0
                        size_t sy = dy * p.strideY + ky - p.padY;
154
0
                        if (sy < p.srcH)
155
0
                        {
156
0
                            for (ptrdiff_t sxc = sxcB; sxc < sxcE; sxc += C, pd += C)
157
0
                            {
158
0
                                if ((size_t)sxc < cW)
159
0
                                    Copy(src + sy * cW + sxc, C64, cM, pd);
160
0
                                else
161
0
                                    SetZeros(pd, _zero, C64, cM);
162
0
                            }
163
0
                        }
164
0
                        else
165
0
                        {
166
0
                            SetZeros(pd, _zero, kC64, kcM);
167
0
                            pd += kC;
168
0
                        }
169
0
                    }
170
0
                    SetZero(pd, _mm512_setzero_si512(), gM);
171
0
                }
172
0
            }
173
0
        }
174
175
        static void QuantizedConvolutionNhwcGemmReorder1d16c(const uint8_t* src, uint8_t zero, const ConvParam& p, const AlgParam& a, size_t yBeg, size_t yEnd, uint8_t* dst)
176
0
        {
177
0
            assert(p.IsDilation(1) && p.srcC <= 16 && p.srcC * p.kernelX <= 64);
178
0
            size_t K = a.bufK, C = p.srcC, kcX = p.kernelX * C, sX = p.strideX, cW = p.srcW * C, cwH = cW * p.srcH, kY = p.kernelY, scX = sX * C;
179
0
            size_t dyB = DivHi(p.padY, p.strideY), dyE = p.dstH - DivHi(p.padH, p.strideY), dxB = DivHi(p.padX, p.strideX), dxE = p.dstW - DivHi(p.padW, p.strideX);
180
0
            __mmask64 gM = TailMask64(K - a.K), kcM = TailMask64(kcX);
181
0
            __mmask16 cM = TailMask16(C);
182
0
            __m512i _zero = _mm512_set1_epi8(zero);
183
0
            for (size_t dy = yBeg; dy < yEnd; ++dy)
184
0
            {
185
0
                size_t dx = 0;
186
0
                for (; dx < dxB; ++dx, dst += K)
187
0
                {
188
0
                    uint8_t* pd = dst;
189
0
                    ptrdiff_t sxcB = (dx * sX - p.padX) * C, sxcE = sxcB + kcX;
190
0
                    for (size_t ky = 0; ky < kY; ky++)
191
0
                    {
192
0
                        size_t sy = dy * p.strideY + ky - p.padY;
193
0
                        if (sy < p.srcH)
194
0
                        {
195
0
                            for (ptrdiff_t sxc = sxcB; sxc < sxcE; sxc += C, pd += C)
196
0
                            {
197
0
                                if ((size_t)sxc < cW)
198
0
                                    _mm_mask_storeu_epi8(pd, cM, _mm_maskz_loadu_epi8(cM, src + sy * cW + sxc));
199
0
                                else
200
0
                                    _mm_mask_storeu_epi8(pd, cM, _mm512_castsi512_si128(_zero));
201
0
                            }
202
0
                        }
203
0
                        else
204
0
                        {
205
0
                            _mm512_mask_storeu_epi8(pd, kcM, _zero);
206
0
                            pd += kcX;
207
0
                        }
208
0
                    }
209
0
                    _mm512_mask_storeu_epi8(pd, gM, _mm512_setzero_si512());
210
0
                }
211
0
                if (dy >= dyB && dy < dyE)
212
0
                {
213
0
                    const uint8_t* ps = src + (dy * p.strideY - p.padY) * cW + (dx * sX - p.padX) * C;
214
0
                    for (; dx < dxE; ++dx, dst += K, ps += scX)
215
0
                    {
216
0
                        uint8_t* pd = dst;
217
0
                        for (size_t ky = 0; ky < kY; ky++, pd += kcX)
218
0
                            _mm512_mask_storeu_epi8(pd, kcM, _mm512_maskz_loadu_epi8(kcM, ps + ky * cW));
219
0
                        _mm512_mask_storeu_epi8(pd, gM, _mm512_setzero_si512());
220
0
                    }
221
0
                }
222
0
                else
223
0
                {
224
0
                    for (; dx < dxE; ++dx, dst += K)
225
0
                    {
226
0
                        uint8_t* pd = dst;
227
0
                        ptrdiff_t sxcB = (dx * sX - p.padX) * C;
228
0
                        for (size_t ky = 0; ky < kY; ky++)
229
0
                        {
230
0
                            size_t sy = dy * p.strideY + ky - p.padY;
231
0
                            if (sy < p.srcH)
232
0
                                _mm512_mask_storeu_epi8(pd, kcM, _mm512_maskz_loadu_epi8(kcM, src + sy * cW + sxcB));
233
0
                            else
234
0
                                _mm512_mask_storeu_epi8(pd, kcM, _zero);
235
0
                            pd += kcX;
236
0
                        }
237
0
                        _mm512_mask_storeu_epi8(pd, gM, _mm512_setzero_si512());
238
0
                    }
239
0
                }
240
0
                for (; dx < p.dstW; ++dx, dst += K)
241
0
                {
242
0
                    uint8_t* pd = dst;
243
0
                    ptrdiff_t sxcB = (dx * sX - p.padX) * C, sxcE = sxcB + kcX;
244
0
                    for (size_t ky = 0; ky < kY; ky++)
245
0
                    {
246
0
                        size_t sy = dy * p.strideY + ky - p.padY;
247
0
                        if (sy < p.srcH)
248
0
                        {
249
0
                            for (ptrdiff_t sxc = sxcB; sxc < sxcE; sxc += C, pd += C)
250
0
                            {
251
0
                                if ((size_t)sxc < cW)
252
0
                                    _mm_mask_storeu_epi8(pd, cM, _mm_maskz_loadu_epi8(cM, src + sy * cW + sxc));
253
0
                                else
254
0
                                    _mm_mask_storeu_epi8(pd, cM, _mm512_castsi512_si128(_zero));
255
0
                            }
256
0
                        }
257
0
                        else
258
0
                        {
259
0
                            _mm512_mask_storeu_epi8(pd, kcM, _zero);
260
0
                            pd += kcX;
261
0
                        }
262
0
                    }
263
0
                    _mm512_mask_storeu_epi8(pd, gM, _mm512_setzero_si512());
264
0
                }
265
0
            }
266
0
        }
267
268
        //-----------------------------------------------------------------------------------------
269
270
        template<Term8iType term, SimdConvolutionActivationType type, int M> void QuantizedConvolutionNhwcGemm_i2xM(const uint8_t* src0, const ConvParam& p, const AlgParam& a, 
271
            size_t srcC, size_t dstC, int update, const int8_t* weight0, const __m512i* sBias, const __m512* sNorm, const __m512i& iLo, const __m512i& iHi, const __m512& iScale, 
272
            const __m512* params, const __m512& dNorm, const __m512i& dZero, int32_t* buf, uint8_t* dst)
273
0
        {
274
0
            __m512i d00, d01, d10, d11, d20, d21, d30, d31, d40, d41, d50, d51, d60, d61, d70, d71, d80, d81, d90, d91, dA0, dA1, dB0, dB1, s0, w0, w1;
275
0
            size_t dB = a.dB, dD = p.dstC * a.elem, dS = a.bufK;
276
0
            const int8_t* weight1 = weight0 + a.bufK * F;
277
0
            const uint8_t* src1 = src0 + 1 * dS;
278
0
            const uint8_t* src2 = src0 + 2 * dS;
279
0
            const uint8_t* src3 = src0 + 3 * dS;
280
0
            const uint8_t* src4 = src0 + 4 * dS;
281
0
            const uint8_t* src5 = src0 + 5 * dS;
282
0
            if (dstC > F)
283
0
            {
284
0
                if (update)
285
0
                {
286
0
                    if (M > 0x0) d00 = _mm512_loadu_si512(buf + 0x0 * dB + 0), d01 = _mm512_loadu_si512(buf + 0x0 * dB + F);
287
0
                    if (M > 0x1) d10 = _mm512_loadu_si512(buf + 0x1 * dB + 0), d11 = _mm512_loadu_si512(buf + 0x1 * dB + F);
288
0
                    if (M > 0x2) d20 = _mm512_loadu_si512(buf + 0x2 * dB + 0), d21 = _mm512_loadu_si512(buf + 0x2 * dB + F);
289
0
                    if (M > 0x3) d30 = _mm512_loadu_si512(buf + 0x3 * dB + 0), d31 = _mm512_loadu_si512(buf + 0x3 * dB + F);
290
0
                    if (M > 0x4) d40 = _mm512_loadu_si512(buf + 0x4 * dB + 0), d41 = _mm512_loadu_si512(buf + 0x4 * dB + F);
291
0
                    if (M > 0x5) d50 = _mm512_loadu_si512(buf + 0x5 * dB + 0), d51 = _mm512_loadu_si512(buf + 0x5 * dB + F);
292
0
                    if (M > 0x6) d60 = _mm512_loadu_si512(buf + 0x6 * dB + 0), d61 = _mm512_loadu_si512(buf + 0x6 * dB + F);
293
0
                    if (M > 0x7) d70 = _mm512_loadu_si512(buf + 0x7 * dB + 0), d71 = _mm512_loadu_si512(buf + 0x7 * dB + F);
294
0
                    if (M > 0x8) d80 = _mm512_loadu_si512(buf + 0x8 * dB + 0), d81 = _mm512_loadu_si512(buf + 0x8 * dB + F);
295
0
                    if (M > 0x9) d90 = _mm512_loadu_si512(buf + 0x9 * dB + 0), d91 = _mm512_loadu_si512(buf + 0x9 * dB + F);
296
0
                    if (M > 0xA) dA0 = _mm512_loadu_si512(buf + 0xA * dB + 0), dA1 = _mm512_loadu_si512(buf + 0xA * dB + F);
297
0
                    if (M > 0xB) dB0 = _mm512_loadu_si512(buf + 0xB * dB + 0), dB1 = _mm512_loadu_si512(buf + 0xB * dB + F);
298
0
                }
299
0
                else
300
0
                {
301
0
                    if (M > 0x0) d00 = _mm512_setzero_si512(), d01 = _mm512_setzero_si512();
302
0
                    if (M > 0x1) d10 = _mm512_setzero_si512(), d11 = _mm512_setzero_si512();
303
0
                    if (M > 0x2) d20 = _mm512_setzero_si512(), d21 = _mm512_setzero_si512();
304
0
                    if (M > 0x3) d30 = _mm512_setzero_si512(), d31 = _mm512_setzero_si512();
305
0
                    if (M > 0x4) d40 = _mm512_setzero_si512(), d41 = _mm512_setzero_si512();
306
0
                    if (M > 0x5) d50 = _mm512_setzero_si512(), d51 = _mm512_setzero_si512();
307
0
                    if (M > 0x6) d60 = _mm512_setzero_si512(), d61 = _mm512_setzero_si512();
308
0
                    if (M > 0x7) d70 = _mm512_setzero_si512(), d71 = _mm512_setzero_si512();
309
0
                    if (M > 0x8) d80 = _mm512_setzero_si512(), d81 = _mm512_setzero_si512();
310
0
                    if (M > 0x9) d90 = _mm512_setzero_si512(), d91 = _mm512_setzero_si512();
311
0
                    if (M > 0xA) dA0 = _mm512_setzero_si512(), dA1 = _mm512_setzero_si512();
312
0
                    if (M > 0xB) dB0 = _mm512_setzero_si512(), dB1 = _mm512_setzero_si512();
313
0
                }
314
0
                for (size_t offs0 = 0, offs6 = offs0 + 6 * dS; offs0 < srcC; offs0 += 4, offs6 += 4)
315
0
                {
316
0
                    w0 = _mm512_loadu_si512((__m512i*)weight0);
317
0
                    w1 = _mm512_loadu_si512((__m512i*)weight1);
318
0
                    if (M > 0x0) s0 = Set4(src0 + offs0), Madd4<true>(d00, s0, w0), Madd4<true>(d01, s0, w1);
319
0
                    if (M > 0x1) s0 = Set4(src1 + offs0), Madd4<true>(d10, s0, w0), Madd4<true>(d11, s0, w1);
320
0
                    if (M > 0x2) s0 = Set4(src2 + offs0), Madd4<true>(d20, s0, w0), Madd4<true>(d21, s0, w1);
321
0
                    if (M > 0x3) s0 = Set4(src3 + offs0), Madd4<true>(d30, s0, w0), Madd4<true>(d31, s0, w1);
322
0
                    if (M > 0x4) s0 = Set4(src4 + offs0), Madd4<true>(d40, s0, w0), Madd4<true>(d41, s0, w1);
323
0
                    if (M > 0x5) s0 = Set4(src5 + offs0), Madd4<true>(d50, s0, w0), Madd4<true>(d51, s0, w1);
324
0
                    if (M > 0x6) s0 = Set4(src0 + offs6), Madd4<true>(d60, s0, w0), Madd4<true>(d61, s0, w1);
325
0
                    if (M > 0x7) s0 = Set4(src1 + offs6), Madd4<true>(d70, s0, w0), Madd4<true>(d71, s0, w1);
326
0
                    if (M > 0x8) s0 = Set4(src2 + offs6), Madd4<true>(d80, s0, w0), Madd4<true>(d81, s0, w1);
327
0
                    if (M > 0x9) s0 = Set4(src3 + offs6), Madd4<true>(d90, s0, w0), Madd4<true>(d91, s0, w1);
328
0
                    if (M > 0xA) s0 = Set4(src4 + offs6), Madd4<true>(dA0, s0, w0), Madd4<true>(dA1, s0, w1);
329
0
                    if (M > 0xB) s0 = Set4(src5 + offs6), Madd4<true>(dB0, s0, w0), Madd4<true>(dB1, s0, w1);
330
0
                    weight0 += A, weight1 += A;
331
0
                }
332
0
                __mmask16 tail = TailMask16(dstC - F);
333
0
                if (M > 0x0) Save2<term, type>(dst, buf, d00, d01, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
334
0
                if (M > 0x1) Save2<term, type>(dst, buf, d10, d11, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
335
0
                if (M > 0x2) Save2<term, type>(dst, buf, d20, d21, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
336
0
                if (M > 0x3) Save2<term, type>(dst, buf, d30, d31, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
337
0
                if (M > 0x4) Save2<term, type>(dst, buf, d40, d41, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
338
0
                if (M > 0x5) Save2<term, type>(dst, buf, d50, d51, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
339
0
                if (M > 0x6) Save2<term, type>(dst, buf, d60, d61, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
340
0
                if (M > 0x7) Save2<term, type>(dst, buf, d70, d71, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
341
0
                if (M > 0x8) Save2<term, type>(dst, buf, d80, d81, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
342
0
                if (M > 0x9) Save2<term, type>(dst, buf, d90, d91, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
343
0
                if (M > 0xA) Save2<term, type>(dst, buf, dA0, dA1, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
344
0
                if (M > 0xB) Save2<term, type>(dst, buf, dB0, dB1, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
345
0
            }
346
0
            else
347
0
            {
348
0
                if (update)
349
0
                {
350
0
                    if (M > 0x0) d00 = _mm512_loadu_si512(buf + 0x0 * dB + 0);
351
0
                    if (M > 0x1) d10 = _mm512_loadu_si512(buf + 0x1 * dB + 0);
352
0
                    if (M > 0x2) d20 = _mm512_loadu_si512(buf + 0x2 * dB + 0);
353
0
                    if (M > 0x3) d30 = _mm512_loadu_si512(buf + 0x3 * dB + 0);
354
0
                    if (M > 0x4) d40 = _mm512_loadu_si512(buf + 0x4 * dB + 0);
355
0
                    if (M > 0x5) d50 = _mm512_loadu_si512(buf + 0x5 * dB + 0);
356
0
                    if (M > 0x6) d60 = _mm512_loadu_si512(buf + 0x6 * dB + 0);
357
0
                    if (M > 0x7) d70 = _mm512_loadu_si512(buf + 0x7 * dB + 0);
358
0
                    if (M > 0x8) d80 = _mm512_loadu_si512(buf + 0x8 * dB + 0);
359
0
                    if (M > 0x9) d90 = _mm512_loadu_si512(buf + 0x9 * dB + 0);
360
0
                    if (M > 0xA) dA0 = _mm512_loadu_si512(buf + 0xA * dB + 0);
361
0
                    if (M > 0xB) dB0 = _mm512_loadu_si512(buf + 0xB * dB + 0);
362
0
                }
363
0
                else
364
0
                {
365
0
                    if (M > 0x0) d00 = _mm512_setzero_si512();
366
0
                    if (M > 0x1) d10 = _mm512_setzero_si512();
367
0
                    if (M > 0x2) d20 = _mm512_setzero_si512();
368
0
                    if (M > 0x3) d30 = _mm512_setzero_si512();
369
0
                    if (M > 0x4) d40 = _mm512_setzero_si512();
370
0
                    if (M > 0x5) d50 = _mm512_setzero_si512();
371
0
                    if (M > 0x6) d60 = _mm512_setzero_si512();
372
0
                    if (M > 0x7) d70 = _mm512_setzero_si512();
373
0
                    if (M > 0x8) d80 = _mm512_setzero_si512();
374
0
                    if (M > 0x9) d90 = _mm512_setzero_si512();
375
0
                    if (M > 0xA) dA0 = _mm512_setzero_si512();
376
0
                    if (M > 0xB) dB0 = _mm512_setzero_si512();
377
0
                }
378
0
                for (size_t offs0 = 0, offs6 = offs0 + 6 * dS; offs0 < srcC; offs0 += 4, offs6 += 4)
379
0
                {
380
0
                    w0 = _mm512_loadu_si512((__m512i*)weight0);
381
0
                    if (M > 0x0) s0 = Set4(src0 + offs0), Madd4<true>(d00, s0, w0);
382
0
                    if (M > 0x1) s0 = Set4(src1 + offs0), Madd4<true>(d10, s0, w0);
383
0
                    if (M > 0x2) s0 = Set4(src2 + offs0), Madd4<true>(d20, s0, w0);
384
0
                    if (M > 0x3) s0 = Set4(src3 + offs0), Madd4<true>(d30, s0, w0);
385
0
                    if (M > 0x4) s0 = Set4(src4 + offs0), Madd4<true>(d40, s0, w0);
386
0
                    if (M > 0x5) s0 = Set4(src5 + offs0), Madd4<true>(d50, s0, w0);
387
0
                    if (M > 0x6) s0 = Set4(src0 + offs6), Madd4<true>(d60, s0, w0);
388
0
                    if (M > 0x7) s0 = Set4(src1 + offs6), Madd4<true>(d70, s0, w0);
389
0
                    if (M > 0x8) s0 = Set4(src2 + offs6), Madd4<true>(d80, s0, w0);
390
0
                    if (M > 0x9) s0 = Set4(src3 + offs6), Madd4<true>(d90, s0, w0);
391
0
                    if (M > 0xA) s0 = Set4(src4 + offs6), Madd4<true>(dA0, s0, w0);
392
0
                    if (M > 0xB) s0 = Set4(src5 + offs6), Madd4<true>(dB0, s0, w0);
393
0
                    weight0 += A;
394
0
                }
395
0
                __mmask16 tail = TailMask16(dstC);
396
0
                if (M > 0x0) Save1<term, type>(dst, buf, d00, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
397
0
                if (M > 0x1) Save1<term, type>(dst, buf, d10, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
398
0
                if (M > 0x2) Save1<term, type>(dst, buf, d20, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
399
0
                if (M > 0x3) Save1<term, type>(dst, buf, d30, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
400
0
                if (M > 0x4) Save1<term, type>(dst, buf, d40, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
401
0
                if (M > 0x5) Save1<term, type>(dst, buf, d50, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
402
0
                if (M > 0x6) Save1<term, type>(dst, buf, d60, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
403
0
                if (M > 0x7) Save1<term, type>(dst, buf, d70, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
404
0
                if (M > 0x8) Save1<term, type>(dst, buf, d80, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
405
0
                if (M > 0x9) Save1<term, type>(dst, buf, d90, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
406
0
                if (M > 0xA) Save1<term, type>(dst, buf, dA0, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
407
0
                if (M > 0xB) Save1<term, type>(dst, buf, dB0, sBias, sNorm, iLo, iHi, iScale, params, dNorm, dZero, tail), dst += dD, buf += dB;
408
0
            }
409
0
        }
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 11>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 12>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)0, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)0, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)0, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)0, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)0, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)0, 6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)0, 7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)0, 8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)0, 9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)0, 10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)0, 11>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)0, 12>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)1, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)1, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)1, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)1, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)1, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)1, 6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)1, 7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)1, 8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)1, 9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)1, 10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)1, 11>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)1, 12>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)2, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)2, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)2, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)2, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)2, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)2, 6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)2, 7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)2, 8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)2, 9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)2, 10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)2, 11>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)2, 12>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 11>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 12>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 11>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 12>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 11>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 12>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 11>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 12>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 11>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 12>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 11>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 12>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 11>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 12>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 11>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 12>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
410
411
        typedef void(*QuantizedConvolutionNhwcGemm_i2xM_Ptr)(const uint8_t* src0, const ConvParam& p, const AlgParam& a, size_t srcC, size_t dstC, int update, const int8_t* weight,
412
            const __m512i* sBias, const __m512* sNorm, const __m512i& iLo, const __m512i& iHi, const __m512& iScale, const __m512* params, const __m512& dNorm, const __m512i& dZero, int32_t* buf, uint8_t* dst);
413
414
        template<Term8iType term, SimdConvolutionActivationType type> QuantizedConvolutionNhwcGemm_i2xM_Ptr GetQuantizedConvolutionNhwcGemm_i2xM(size_t M)
415
0
        {
416
0
            switch (M)
417
0
            {
418
0
            case 0x0: return NULL;
419
0
            case 0x1: return QuantizedConvolutionNhwcGemm_i2xM<term, type, 0x1>;
420
0
            case 0x2: return QuantizedConvolutionNhwcGemm_i2xM<term, type, 0x2>;
421
0
            case 0x3: return QuantizedConvolutionNhwcGemm_i2xM<term, type, 0x3>;
422
0
            case 0x4: return QuantizedConvolutionNhwcGemm_i2xM<term, type, 0x4>;
423
0
            case 0x5: return QuantizedConvolutionNhwcGemm_i2xM<term, type, 0x5>;
424
0
            case 0x6: return QuantizedConvolutionNhwcGemm_i2xM<term, type, 0x6>;
425
0
            case 0x7: return QuantizedConvolutionNhwcGemm_i2xM<term, type, 0x7>;
426
0
            case 0x8: return QuantizedConvolutionNhwcGemm_i2xM<term, type, 0x8>;
427
0
            case 0x9: return QuantizedConvolutionNhwcGemm_i2xM<term, type, 0x9>;
428
0
            case 0xA: return QuantizedConvolutionNhwcGemm_i2xM<term, type, 0xA>;
429
0
            case 0xB: return QuantizedConvolutionNhwcGemm_i2xM<term, type, 0xB>;
430
0
            case 0xC: return QuantizedConvolutionNhwcGemm_i2xM<term, type, 0xC>;
431
0
            }
432
0
            assert(0);
433
0
            return NULL;
434
0
        }
Unexecuted instantiation: void (*Simd::Avx512bw::GetQuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void (*Simd::Avx512bw::GetQuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)0>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void (*Simd::Avx512bw::GetQuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)1>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void (*Simd::Avx512bw::GetQuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)2>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void (*Simd::Avx512bw::GetQuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void (*Simd::Avx512bw::GetQuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void (*Simd::Avx512bw::GetQuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void (*Simd::Avx512bw::GetQuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void (*Simd::Avx512bw::GetQuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void (*Simd::Avx512bw::GetQuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void (*Simd::Avx512bw::GetQuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
Unexecuted instantiation: void (*Simd::Avx512bw::GetQuantizedConvolutionNhwcGemm_i2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, int, signed char const*, long long __vector(8) const*, float __vector(16) const*, long long __vector(8) const&, long long __vector(8) const&, float __vector(16) const&, float __vector(16) const*, float __vector(16) const&, long long __vector(8) const&, int*, unsigned char*)
435
436
        template<Term8iType term, SimdConvolutionActivationType type> void QuantizedConvolutionNhwcGemm_i2(const uint8_t* src, const ConvParam& p, const AlgParam& a, size_t dstC, size_t dstH, size_t srcC, 
437
            int update, const int8_t* weight, const int32_t* sBias, const float* sNorm, int32_t iZero, float iScale, const float* params, float dNorm, int32_t dZero, int32_t* buf, uint8_t* dst)
438
0
        {
439
0
            size_t n1 = dstH * p.dstW, n = 12;
440
0
            size_t nn = AlignLoAny(n1, n), m = n1 - nn, dW = a.bufK * DF;
441
0
            size_t dB = a.dB, dD = p.dstC * a.elem, dS = a.bufK;
442
0
            QuantizedConvolutionNhwcGemm_i2xM_Ptr convolution_i2xN = GetQuantizedConvolutionNhwcGemm_i2xM<term, type>(n);
443
0
            QuantizedConvolutionNhwcGemm_i2xM_Ptr convolution_i2xM = GetQuantizedConvolutionNhwcGemm_i2xM<term, type>(m);
444
445
0
            __m512 _sNorm[2], _iScale, _params[2], _dNorm;
446
0
            __m512i _sBias[2], _dZero = _mm512_set1_epi32(dZero), _iLo, _iHi;
447
0
            if (type != SimdConvolutionActivationIdentity)
448
0
            {
449
0
                _iLo = _mm512_set1_epi32(-iZero);
450
0
                _iHi = _mm512_set1_epi32(255 - iZero);
451
0
                _iScale = _mm512_set1_ps(iScale);
452
0
                _dNorm = _mm512_set1_ps(dNorm);
453
0
                _params[0] = _mm512_set1_ps(params[0]);
454
0
                _params[1] = _mm512_set1_ps(params[1]);
455
0
            }
456
0
            for (size_t dc = 0; dc < dstC; dc += DF)
457
0
            {
458
0
                size_t dC = Simd::Min(DF, dstC - dc);
459
0
                _sBias[0] = _mm512_loadu_si512((__m512i*)(sBias + dc) + 0);
460
0
                _sBias[1] = _mm512_loadu_si512((__m512i*)(sBias + dc) + 1);
461
0
                _sNorm[0] = _mm512_loadu_ps(sNorm + dc + 0);
462
0
                _sNorm[1] = _mm512_loadu_ps(sNorm + dc + F);
463
0
                if (type == SimdConvolutionActivationPrelu)
464
0
                {
465
0
                    _params[0] = _mm512_loadu_ps(params + dc + 0);
466
0
                    _params[1] = _mm512_loadu_ps(params + dc + F);
467
0
                }
468
0
                const uint8_t* s = src;
469
0
                int32_t* b = buf + dc;
470
0
                uint8_t* d = dst + dc * a.elem;
471
0
                size_t i = 0;
472
0
                for (; i < nn; i += n, s += n * dS, b += n * dB, d += n * dD)
473
0
                    convolution_i2xN(s, p, a, srcC, dC, update, weight, _sBias, _sNorm, _iLo, _iHi, _iScale, _params, _dNorm, _dZero, b, d);
474
0
                for (; i < n1; i += m, s += m * dS, b += m * dB, d += m * dD)
475
0
                    convolution_i2xM(s, p, a, srcC, dC, update, weight, _sBias, _sNorm, _iLo, _iHi, _iScale, _params, _dNorm, _dZero, b, d);
476
0
                weight += dW;
477
0
            }
478
0
        }
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2<(Simd::Term8iType)2, (SimdConvolutionActivationType)0>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, unsigned long, int, signed char const*, int const*, float const*, int, float, float const*, float, int, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2<(Simd::Term8iType)0, (SimdConvolutionActivationType)0>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, unsigned long, int, signed char const*, int const*, float const*, int, float, float const*, float, int, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2<(Simd::Term8iType)0, (SimdConvolutionActivationType)1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, unsigned long, int, signed char const*, int const*, float const*, int, float, float const*, float, int, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2<(Simd::Term8iType)0, (SimdConvolutionActivationType)2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, unsigned long, int, signed char const*, int const*, float const*, int, float, float const*, float, int, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2<(Simd::Term8iType)0, (SimdConvolutionActivationType)3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, unsigned long, int, signed char const*, int const*, float const*, int, float, float const*, float, int, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2<(Simd::Term8iType)0, (SimdConvolutionActivationType)4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, unsigned long, int, signed char const*, int const*, float const*, int, float, float const*, float, int, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2<(Simd::Term8iType)0, (SimdConvolutionActivationType)5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, unsigned long, int, signed char const*, int const*, float const*, int, float, float const*, float, int, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2<(Simd::Term8iType)0, (SimdConvolutionActivationType)6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, unsigned long, int, signed char const*, int const*, float const*, int, float, float const*, float, int, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2<(Simd::Term8iType)0, (SimdConvolutionActivationType)7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, unsigned long, int, signed char const*, int const*, float const*, int, float, float const*, float, int, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2<(Simd::Term8iType)0, (SimdConvolutionActivationType)8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, unsigned long, int, signed char const*, int const*, float const*, int, float, float const*, float, int, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2<(Simd::Term8iType)0, (SimdConvolutionActivationType)9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, unsigned long, int, signed char const*, int const*, float const*, int, float, float const*, float, int, int*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::QuantizedConvolutionNhwcGemm_i2<(Simd::Term8iType)0, (SimdConvolutionActivationType)10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetQuantizedConvolutionNhwcGemm::AlgParam const&, unsigned long, unsigned long, unsigned long, int, signed char const*, int const*, float const*, int, float, float const*, float, int, int*, unsigned char*)
479
480
        //-----------------------------------------------------------------------------------------
481
482
        SIMD_INLINE void Set(const ConvParam& p, const AlgParam& a, Convolution* convolutions)
483
0
        {
484
0
            convolutions[0] = QuantizedConvolutionNhwcGemm_i2<Term8iInterim, SimdConvolutionActivationIdentity>;
485
0
            switch (p.activation)
486
0
            {
487
0
            case SimdConvolutionActivationIdentity: convolutions[1] = QuantizedConvolutionNhwcGemm_i2<Term8iLast8u, SimdConvolutionActivationIdentity>; break;
488
0
            case SimdConvolutionActivationRelu: convolutions[1] = QuantizedConvolutionNhwcGemm_i2<Term8iLast8u, SimdConvolutionActivationRelu>; break;
489
0
            case SimdConvolutionActivationLeakyRelu: convolutions[1] = QuantizedConvolutionNhwcGemm_i2<Term8iLast8u, SimdConvolutionActivationLeakyRelu>; break;
490
0
            case SimdConvolutionActivationRestrictRange: convolutions[1] = QuantizedConvolutionNhwcGemm_i2<Term8iLast8u, SimdConvolutionActivationRestrictRange>; break;
491
0
            case SimdConvolutionActivationPrelu: convolutions[1] = QuantizedConvolutionNhwcGemm_i2<Term8iLast8u, SimdConvolutionActivationPrelu>; break;
492
0
            case SimdConvolutionActivationElu: convolutions[1] = QuantizedConvolutionNhwcGemm_i2<Term8iLast8u, SimdConvolutionActivationElu>; break;
493
0
            case SimdConvolutionActivationHswish: convolutions[1] = QuantizedConvolutionNhwcGemm_i2<Term8iLast8u, SimdConvolutionActivationHswish>; break;
494
0
            case SimdConvolutionActivationMish: convolutions[1] = QuantizedConvolutionNhwcGemm_i2<Term8iLast8u, SimdConvolutionActivationMish>; break;
495
0
            case SimdConvolutionActivationHardSigmoid: convolutions[1] = QuantizedConvolutionNhwcGemm_i2<Term8iLast8u, SimdConvolutionActivationHardSigmoid>; break;
496
0
            case SimdConvolutionActivationSwish: convolutions[1] = QuantizedConvolutionNhwcGemm_i2<Term8iLast8u, SimdConvolutionActivationSwish>; break;
497
0
            case SimdConvolutionActivationGelu: convolutions[1] = QuantizedConvolutionNhwcGemm_i2<Term8iLast8u, SimdConvolutionActivationGelu>; break;
498
0
            default:
499
0
                convolutions[1] = NULL;
500
0
            }
501
0
        }
502
503
        SynetQuantizedConvolutionNhwcGemm::SynetQuantizedConvolutionNhwcGemm(const ConvParam& p)
504
0
            : Avx2::SynetQuantizedConvolutionNhwcGemm(p)
505
0
        {
506
0
            SetAlgParam(F, F * 2, 12, 4, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3());
507
0
            if (_src8u)
508
0
            {
509
0
                AlgParam& a = _alg;
510
0
                if (_is1x1 && a.K == a.bufK)
511
0
                    _convert = NULL;
512
0
                else
513
0
                {
514
0
                    if (p.IsDilation(1) && p.srcC <= 16 && p.srcC*p.kernelX <= 64)
515
0
                        _convert = QuantizedConvolutionNhwcGemmReorder1d16c;
516
0
                    else if (p.IsDilation(1))
517
0
                        _convert = QuantizedConvolutionNhwcGemmReorder1d;
518
0
                    else
519
0
                        _convert = QuantizedConvolutionNhwcGemmReorder;
520
0
                }
521
0
            }
522
0
            else
523
                assert(0);
524
0
            Set(p, _alg, _convolutions);
525
0
        }
526
    }
527
#endif
528
}