/src/Simd/src/Simd/SimdSse41SynetConvolution8iNhwcDirect1x1.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  | * Simd Library (http://ermig1979.github.io/Simd).  | 
3  |  | *  | 
4  |  | * Copyright (c) 2011-2024 Yermalayeu Ihar.  | 
5  |  | *  | 
6  |  | * Permission is hereby granted, free of charge, to any person obtaining a copy  | 
7  |  | * of this software and associated documentation files (the "Software"), to deal  | 
8  |  | * in the Software without restriction, including without limitation the rights  | 
9  |  | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell  | 
10  |  | * copies of the Software, and to permit persons to whom the Software is  | 
11  |  | * furnished to do so, subject to the following conditions:  | 
12  |  | *  | 
13  |  | * The above copyright notice and this permission notice shall be included in  | 
14  |  | * all copies or substantial portions of the Software.  | 
15  |  | *  | 
16  |  | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  | 
17  |  | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  | 
18  |  | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  | 
19  |  | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  | 
20  |  | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  | 
21  |  | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  | 
22  |  | * SOFTWARE.  | 
23  |  | */  | 
24  |  | #include "Simd/SimdSynetConvolution8i.h"  | 
25  |  | #include "Simd/SimdSynetConvolution8iCommon.h"  | 
26  |  | #include "Simd/SimdSynet.h"  | 
27  |  | #include "Simd/SimdMath.h"  | 
28  |  | #include "Simd/SimdBase.h"  | 
29  |  | #include "Simd/SimdCpu.h"  | 
30  |  |  | 
31  |  | namespace Simd  | 
32  |  | { | 
33  |  | #if defined(SIMD_SSE41_ENABLE) && defined(SIMD_SYNET_ENABLE)     | 
34  |  |     namespace Sse41  | 
35  |  |     { | 
36  |  |         using AlgParam = SynetConvolution8iNhwcDirect::AlgParam;  | 
37  |  |         using ConvolutionPtr = SynetConvolution8iNhwcDirect::ConvolutionPtr;  | 
38  |  |  | 
39  |  |         //---------------------------------------------------------------------  | 
40  |  |  | 
41  |  |         template<Term8iType term, SimdConvolutionActivationType type, int M> void ConvolutionNhwcDirect1x1_2xM(  | 
42  |  |             const uint8_t* src0, const ConvParam& p, const AlgParam& a, size_t srcC, size_t dstC, const int8_t* weight0,  | 
43  |  |             const __m128* norm, const __m128* bias, const __m128* params, const __m128* scale, const __m128* shift, int32_t* buf, uint8_t* dst, int first)  | 
44  | 0  |         { | 
45  | 0  |             __m128i d00, d01, d10, d11, d20, d21, d30, d31, d40, d41, s0, w0, w1;  | 
46  | 0  |             size_t dS = p.srcC * p.strideX, dD = p.dstC * a.size, dB = p.dstC;  | 
47  | 0  |             const int8_t* weight1 = weight0 + DivHi(p.srcC, 4) * A;  | 
48  | 0  |             const uint8_t* src1 = src0 + 1 * dS;  | 
49  | 0  |             const uint8_t* src2 = src0 + 2 * dS;  | 
50  | 0  |             const uint8_t* src3 = src0 + 3 * dS;  | 
51  | 0  |             const uint8_t* src4 = src0 + 4 * dS;  | 
52  | 0  |             __m128i upper = _mm_set1_epi32(a.upper);  | 
53  | 0  |             if (dstC > F)  | 
54  | 0  |             { | 
55  | 0  |                 if (first)  | 
56  | 0  |                 { | 
57  | 0  |                     if (M > 0) d00 = _mm_setzero_si128(), d01 = _mm_setzero_si128();  | 
58  | 0  |                     if (M > 1) d10 = _mm_setzero_si128(), d11 = _mm_setzero_si128();  | 
59  | 0  |                     if (M > 2) d20 = _mm_setzero_si128(), d21 = _mm_setzero_si128();  | 
60  | 0  |                     if (M > 3) d30 = _mm_setzero_si128(), d31 = _mm_setzero_si128();  | 
61  | 0  |                     if (M > 4) d40 = _mm_setzero_si128(), d41 = _mm_setzero_si128();  | 
62  | 0  |                 }  | 
63  | 0  |                 else  | 
64  | 0  |                 { | 
65  | 0  |                     if (M > 0) d00 = _mm_loadu_si128((__m128i*)(buf + 0 * dB + 0)), d01 = _mm_loadu_si128((__m128i*)(buf + 0 * dB + F));  | 
66  | 0  |                     if (M > 1) d10 = _mm_loadu_si128((__m128i*)(buf + 1 * dB + 0)), d11 = _mm_loadu_si128((__m128i*)(buf + 1 * dB + F));  | 
67  | 0  |                     if (M > 2) d20 = _mm_loadu_si128((__m128i*)(buf + 2 * dB + 0)), d21 = _mm_loadu_si128((__m128i*)(buf + 2 * dB + F));  | 
68  | 0  |                     if (M > 3) d30 = _mm_loadu_si128((__m128i*)(buf + 3 * dB + 0)), d31 = _mm_loadu_si128((__m128i*)(buf + 3 * dB + F));  | 
69  | 0  |                     if (M > 4) d40 = _mm_loadu_si128((__m128i*)(buf + 4 * dB + 0)), d41 = _mm_loadu_si128((__m128i*)(buf + 4 * dB + F));  | 
70  | 0  |                 }  | 
71  | 0  |                 if (Base::Overflow(p.compatibility) || Base::Narrowed(p.compatibility))  | 
72  | 0  |                 { | 
73  | 0  |                     for (size_t offs = 0; offs < srcC; offs += 4)  | 
74  | 0  |                     { | 
75  | 0  |                         w0 = _mm_loadu_si128((__m128i*)weight0);  | 
76  | 0  |                         w1 = _mm_loadu_si128((__m128i*)weight1);  | 
77  | 0  |                         if (M > 0) s0 = Set4(src0 + offs), Madd4<true>(d00, s0, w0), Madd4<true>(d01, s0, w1);  | 
78  | 0  |                         if (M > 1) s0 = Set4(src1 + offs), Madd4<true>(d10, s0, w0), Madd4<true>(d11, s0, w1);  | 
79  | 0  |                         if (M > 2) s0 = Set4(src2 + offs), Madd4<true>(d20, s0, w0), Madd4<true>(d21, s0, w1);  | 
80  | 0  |                         if (M > 3) s0 = Set4(src3 + offs), Madd4<true>(d30, s0, w0), Madd4<true>(d31, s0, w1);  | 
81  | 0  |                         if (M > 4) s0 = Set4(src4 + offs), Madd4<true>(d40, s0, w0), Madd4<true>(d41, s0, w1);  | 
82  | 0  |                         weight0 += A, weight1 += A;  | 
83  | 0  |                     }  | 
84  | 0  |                 }  | 
85  | 0  |                 else  | 
86  | 0  |                 { | 
87  | 0  |                     for (size_t offs = 0; offs < srcC; offs += 4)  | 
88  | 0  |                     { | 
89  | 0  |                         w0 = _mm_loadu_si128((__m128i*)weight0);  | 
90  | 0  |                         w1 = _mm_loadu_si128((__m128i*)weight1);  | 
91  | 0  |                         if (M > 0) s0 = Set4(src0 + offs), Madd4<false>(d00, s0, w0), Madd4<false>(d01, s0, w1);  | 
92  | 0  |                         if (M > 1) s0 = Set4(src1 + offs), Madd4<false>(d10, s0, w0), Madd4<false>(d11, s0, w1);  | 
93  | 0  |                         if (M > 2) s0 = Set4(src2 + offs), Madd4<false>(d20, s0, w0), Madd4<false>(d21, s0, w1);  | 
94  | 0  |                         if (M > 3) s0 = Set4(src3 + offs), Madd4<false>(d30, s0, w0), Madd4<false>(d31, s0, w1);  | 
95  | 0  |                         if (M > 4) s0 = Set4(src4 + offs), Madd4<false>(d40, s0, w0), Madd4<false>(d41, s0, w1);  | 
96  | 0  |                         weight0 += A, weight1 += A;  | 
97  | 0  |                     }  | 
98  | 0  |                 }  | 
99  | 0  |                 if (dstC == DF)  | 
100  | 0  |                 { | 
101  | 0  |                     if (M > 0) Save2<term, type>(dst, buf, d00, d01, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;  | 
102  | 0  |                     if (M > 1) Save2<term, type>(dst, buf, d10, d11, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;  | 
103  | 0  |                     if (M > 2) Save2<term, type>(dst, buf, d20, d21, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;  | 
104  | 0  |                     if (M > 3) Save2<term, type>(dst, buf, d30, d31, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;  | 
105  | 0  |                     if (M > 4) Save2<term, type>(dst, buf, d40, d41, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;  | 
106  | 0  |                 }  | 
107  | 0  |                 else  | 
108  | 0  |                 { | 
109  | 0  |                     if (M > 0) Save2<term, type>(dst, buf, d00, d01, norm, bias, params, scale, shift, upper, dstC - F), dst += dD, buf += dB;  | 
110  | 0  |                     if (M > 1) Save2<term, type>(dst, buf, d10, d11, norm, bias, params, scale, shift, upper, dstC - F), dst += dD, buf += dB;  | 
111  | 0  |                     if (M > 2) Save2<term, type>(dst, buf, d20, d21, norm, bias, params, scale, shift, upper, dstC - F), dst += dD, buf += dB;  | 
112  | 0  |                     if (M > 3) Save2<term, type>(dst, buf, d30, d31, norm, bias, params, scale, shift, upper, dstC - F), dst += dD, buf += dB;  | 
113  | 0  |                     if (M > 4) Save2<term, type>(dst, buf, d40, d41, norm, bias, params, scale, shift, upper, dstC - F), dst += dD, buf += dB;  | 
114  | 0  |                 }  | 
115  | 0  |             }  | 
116  | 0  |             else  | 
117  | 0  |             { | 
118  | 0  |                 if (first)  | 
119  | 0  |                 { | 
120  | 0  |                     if (M > 0) d00 = _mm_setzero_si128();  | 
121  | 0  |                     if (M > 1) d10 = _mm_setzero_si128();  | 
122  | 0  |                     if (M > 2) d20 = _mm_setzero_si128();  | 
123  | 0  |                     if (M > 3) d30 = _mm_setzero_si128();  | 
124  | 0  |                     if (M > 4) d40 = _mm_setzero_si128();  | 
125  | 0  |                 }  | 
126  | 0  |                 else  | 
127  | 0  |                 { | 
128  | 0  |                     if (M > 0) d00 = _mm_loadu_si128((__m128i*)(buf + 0 * dB + 0));  | 
129  | 0  |                     if (M > 1) d10 = _mm_loadu_si128((__m128i*)(buf + 1 * dB + 0));  | 
130  | 0  |                     if (M > 2) d20 = _mm_loadu_si128((__m128i*)(buf + 2 * dB + 0));  | 
131  | 0  |                     if (M > 3) d30 = _mm_loadu_si128((__m128i*)(buf + 3 * dB + 0));  | 
132  | 0  |                     if (M > 4) d40 = _mm_loadu_si128((__m128i*)(buf + 4 * dB + 0));  | 
133  | 0  |                 }  | 
134  | 0  |                 if (Base::Overflow(p.compatibility) || Base::Narrowed(p.compatibility))  | 
135  | 0  |                 { | 
136  | 0  |                     for (size_t offs = 0; offs < srcC; offs += 4)  | 
137  | 0  |                     { | 
138  | 0  |                         w0 = _mm_loadu_si128((__m128i*)weight0);  | 
139  | 0  |                         if (M > 0) s0 = Set4(src0 + offs), Madd4<true>(d00, s0, w0);  | 
140  | 0  |                         if (M > 1) s0 = Set4(src1 + offs), Madd4<true>(d10, s0, w0);  | 
141  | 0  |                         if (M > 2) s0 = Set4(src2 + offs), Madd4<true>(d20, s0, w0);  | 
142  | 0  |                         if (M > 3) s0 = Set4(src3 + offs), Madd4<true>(d30, s0, w0);  | 
143  | 0  |                         if (M > 4) s0 = Set4(src4 + offs), Madd4<true>(d40, s0, w0);  | 
144  | 0  |                         weight0 += A;  | 
145  | 0  |                     }                  | 
146  | 0  |                 }  | 
147  | 0  |                 else  | 
148  | 0  |                 { | 
149  | 0  |                     for (size_t offs = 0; offs < srcC; offs += 4)  | 
150  | 0  |                     { | 
151  | 0  |                         w0 = _mm_loadu_si128((__m128i*)weight0);  | 
152  | 0  |                         if (M > 0) s0 = Set4(src0 + offs), Madd4<false>(d00, s0, w0);  | 
153  | 0  |                         if (M > 1) s0 = Set4(src1 + offs), Madd4<false>(d10, s0, w0);  | 
154  | 0  |                         if (M > 2) s0 = Set4(src2 + offs), Madd4<false>(d20, s0, w0);  | 
155  | 0  |                         if (M > 3) s0 = Set4(src3 + offs), Madd4<false>(d30, s0, w0);  | 
156  | 0  |                         if (M > 4) s0 = Set4(src4 + offs), Madd4<false>(d40, s0, w0);  | 
157  | 0  |                         weight0 += A;  | 
158  | 0  |                     }  | 
159  | 0  |                 }  | 
160  | 0  |                 if (dstC == F)  | 
161  | 0  |                 { | 
162  | 0  |                     if (M > 0) Save1<term, type>(dst, buf, d00, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;  | 
163  | 0  |                     if (M > 1) Save1<term, type>(dst, buf, d10, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;  | 
164  | 0  |                     if (M > 2) Save1<term, type>(dst, buf, d20, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;  | 
165  | 0  |                     if (M > 3) Save1<term, type>(dst, buf, d30, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;  | 
166  | 0  |                     if (M > 4) Save1<term, type>(dst, buf, d40, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;  | 
167  | 0  |                 }  | 
168  | 0  |                 else  | 
169  | 0  |                 { | 
170  | 0  |                     if (M > 0) Save1<term, type>(dst, buf, d00, norm, bias, params, scale, shift, upper, dstC), dst += dD, buf += dB;  | 
171  | 0  |                     if (M > 1) Save1<term, type>(dst, buf, d10, norm, bias, params, scale, shift, upper, dstC), dst += dD, buf += dB;  | 
172  | 0  |                     if (M > 2) Save1<term, type>(dst, buf, d20, norm, bias, params, scale, shift, upper, dstC), dst += dD, buf += dB;  | 
173  | 0  |                     if (M > 3) Save1<term, type>(dst, buf, d30, norm, bias, params, scale, shift, upper, dstC), dst += dD, buf += dB;  | 
174  | 0  |                     if (M > 4) Save1<term, type>(dst, buf, d40, norm, bias, params, scale, shift, upper, dstC), dst += dD, buf += dB;  | 
175  | 0  |                 }  | 
176  | 0  |             }  | 
177  | 0  |         } Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)  | 
178  |  |  | 
179  |  |         typedef void(*ConvolutionNhwcDirect1x1_2xM_Ptr)(const uint8_t* src0, const ConvParam& p, const AlgParam& a, size_t srcC, size_t dstC,  | 
180  |  |             const int8_t* weight0, const __m128* norm, const __m128* bias, const __m128* params, const __m128* scale, const __m128* shift, int32_t* buf, uint8_t* dst, int first);  | 
181  |  |  | 
182  |  |         template<Term8iType term, SimdConvolutionActivationType type> ConvolutionNhwcDirect1x1_2xM_Ptr GetConvolutionNhwcDirect1x1_2xM(size_t M)  | 
183  | 0  |         { | 
184  | 0  |             switch (M)  | 
185  | 0  |             { | 
186  | 0  |             case 0: return NULL;  | 
187  | 0  |             case 1: return ConvolutionNhwcDirect1x1_2xM<term, type, 1>;  | 
188  | 0  |             case 2: return ConvolutionNhwcDirect1x1_2xM<term, type, 2>;  | 
189  | 0  |             case 3: return ConvolutionNhwcDirect1x1_2xM<term, type, 3>;  | 
190  | 0  |             case 4: return ConvolutionNhwcDirect1x1_2xM<term, type, 4>;  | 
191  | 0  |             case 5: return ConvolutionNhwcDirect1x1_2xM<term, type, 5>;  | 
192  | 0  |             }  | 
193  | 0  |             assert(0);  | 
194  | 0  |             return NULL;  | 
195  | 0  |         } Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)  | 
196  |  |  | 
197  |  |         template<Term8iType term, SimdConvolutionActivationType type> void ConvolutionNhwcDirect1x1_2(const uint8_t* src,  | 
198  |  |             const ConvParam& p, const AlgParam& a, size_t dstC, size_t yBeg, size_t yEnd, size_t srcC, const int8_t* weight,  | 
199  |  |             const float* norm, const float* bias, const float* params, const float* scale, const float* shift, int32_t* buf, uint8_t* dst, int first)  | 
200  | 0  |         { | 
201  | 0  |             size_t n = 5, n1 = (yEnd - yBeg) * p.dstW, nn = AlignLoAny(n1, n), m = n1 - nn;  | 
202  | 0  |             ConvolutionNhwcDirect1x1_2xM_Ptr convolutionNhwcDirect1x1_2xN = GetConvolutionNhwcDirect1x1_2xM<term, type>(n);  | 
203  | 0  |             ConvolutionNhwcDirect1x1_2xM_Ptr convolutionNhwcDirect1x1_2xM = GetConvolutionNhwcDirect1x1_2xM<term, type>(m);  | 
204  | 0  |             __m128 _norm[2], _bias[2], _params[2], _scale[2], _shift[2];  | 
205  | 0  |             _params[0] = _mm_set1_ps(params[0]);  | 
206  | 0  |             _params[1] = _mm_set1_ps(params[1]);  | 
207  | 0  |             for (size_t dc = 0; dc < dstC; dc += DF)  | 
208  | 0  |             { | 
209  | 0  |                 size_t dC = Simd::Min(DF, dstC - dc);  | 
210  | 0  |                 _norm[0] = _mm_loadu_ps(norm + dc + 0);  | 
211  | 0  |                 _norm[1] = _mm_loadu_ps(norm + dc + F);  | 
212  | 0  |                 _bias[0] = _mm_loadu_ps(bias + dc + 0);  | 
213  | 0  |                 _bias[1] = _mm_loadu_ps(bias + dc + F);  | 
214  | 0  |                 if (type == ::SimdConvolutionActivationPrelu)  | 
215  | 0  |                 { | 
216  | 0  |                     _params[0] = _mm_loadu_ps(params + dc + 0);  | 
217  | 0  |                     _params[1] = _mm_loadu_ps(params + dc + F);  | 
218  | 0  |                 }  | 
219  | 0  |                 _scale[0] = _mm_loadu_ps(scale + dc + 0);  | 
220  | 0  |                 _scale[1] = _mm_loadu_ps(scale + dc + F);  | 
221  | 0  |                 _shift[0] = _mm_loadu_ps(shift + dc + 0);  | 
222  | 0  |                 _shift[1] = _mm_loadu_ps(shift + dc + F);  | 
223  | 0  |                 const uint8_t* s = src + yBeg * p.srcW * p.srcC;  | 
224  | 0  |                 uint8_t* d = dst + (dc + yBeg * p.dstW * p.dstC) * a.size;  | 
225  | 0  |                 int32_t* b = buf + dc + yBeg * p.dstW * p.dstC;  | 
226  | 0  |                 size_t i = 0;  | 
227  | 0  |                 for (; i < nn; i += n, s += p.srcC * n, b += p.dstC * n, d += p.dstC * a.size * n)  | 
228  | 0  |                     convolutionNhwcDirect1x1_2xN(s, p, a, srcC, dC, weight, _norm, _bias, _params, _scale, _shift, b, d, first);  | 
229  | 0  |                 for (; i < n1; i += m, s += p.srcC * m, b += p.dstC * m, d += p.dstC * a.size * m)  | 
230  | 0  |                     convolutionNhwcDirect1x1_2xM(s, p, a, srcC, dC, weight, _norm, _bias, _params, _scale, _shift, b, d, first);  | 
231  | 0  |                 weight += DivHi(p.srcC, 4) * DA;  | 
232  | 0  |             }  | 
233  | 0  |         } Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)2, (SimdConvolutionActivationType)0>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)  | 
234  |  |  | 
235  |  |         //---------------------------------------------------------------------  | 
236  |  |  | 
237  |  |         template <Term8iType term, SimdConvolutionActivationType activation> void SetDirect1x1(const ConvParam& p, const AlgParam& a, ConvolutionPtr* d)  | 
238  | 0  |         { | 
239  | 0  |             assert(a.microD == 2 * F && p.Is1x1() == true);  | 
240  | 0  |             d[term] = ConvolutionNhwcDirect1x1_2<term, activation>;  | 
241  | 0  |         } Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)3>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)3>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)2, (SimdConvolutionActivationType)0>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)4>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)4>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)5>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)5>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)6>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)6>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)7>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)7>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)8>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)8>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)9>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)9>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)10>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)10>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))  | 
242  |  |  | 
243  |  |         template<SimdConvolutionActivationType activation> void SetDirect1x1(const ConvParam& p, const AlgParam& a, ConvolutionPtr* d)  | 
244  | 0  |         { | 
245  | 0  |             SetDirect1x1<Term8iLast8u, activation>(p, a, d);  | 
246  | 0  |             SetDirect1x1<Term8iLast32f, activation>(p, a, d);  | 
247  | 0  |             SetDirect1x1<Term8iInterim, SimdConvolutionActivationIdentity>(p, a, d);  | 
248  | 0  |         } Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)3>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)4>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)5>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)6>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)7>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)8>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)9>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)10>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))  | 
249  |  |  | 
250  |  |         void SetDirect1x1(const ConvParam& p, const AlgParam& a, ConvolutionPtr* d)  | 
251  | 0  |         { | 
252  | 0  |             switch (p.activation)  | 
253  | 0  |             { | 
254  | 0  |             case SimdConvolutionActivationIdentity: SetDirect1x1<SimdConvolutionActivationRestrictRange>(p, a, d); break;  | 
255  | 0  |             case SimdConvolutionActivationRelu: SetDirect1x1<SimdConvolutionActivationRestrictRange>(p, a, d); break;  | 
256  | 0  |             case SimdConvolutionActivationLeakyRelu: SetDirect1x1<SimdConvolutionActivationPrelu>(p, a, d); break;  | 
257  | 0  |             case SimdConvolutionActivationRestrictRange: SetDirect1x1<SimdConvolutionActivationRestrictRange>(p, a, d); break;  | 
258  | 0  |             case SimdConvolutionActivationPrelu: SetDirect1x1<SimdConvolutionActivationPrelu>(p, a, d); break;  | 
259  | 0  |             case SimdConvolutionActivationElu: SetDirect1x1<SimdConvolutionActivationElu>(p, a, d); break;  | 
260  | 0  |             case SimdConvolutionActivationHswish: SetDirect1x1<SimdConvolutionActivationHswish>(p, a, d); break;  | 
261  | 0  |             case SimdConvolutionActivationMish: SetDirect1x1<SimdConvolutionActivationMish>(p, a, d); break;  | 
262  | 0  |             case SimdConvolutionActivationHardSigmoid: SetDirect1x1<SimdConvolutionActivationHardSigmoid>(p, a, d); break;  | 
263  | 0  |             case SimdConvolutionActivationSwish: SetDirect1x1<SimdConvolutionActivationSwish>(p, a, d); break;  | 
264  | 0  |             case SimdConvolutionActivationGelu: SetDirect1x1<SimdConvolutionActivationGelu>(p, a, d); break;  | 
265  | 0  |             default: assert(0);  | 
266  | 0  |             }  | 
267  | 0  |         }  | 
268  |  |     }  | 
269  |  | #endif  | 
270  |  | }  |