/src/Simd/src/Simd/SimdSse41SynetConvolution8iNhwcDirect1x1.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2024 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #include "Simd/SimdSynetConvolution8i.h" |
25 | | #include "Simd/SimdSynetConvolution8iCommon.h" |
26 | | #include "Simd/SimdSynet.h" |
27 | | #include "Simd/SimdMath.h" |
28 | | #include "Simd/SimdBase.h" |
29 | | #include "Simd/SimdCpu.h" |
30 | | |
31 | | namespace Simd |
32 | | { |
33 | | #if defined(SIMD_SSE41_ENABLE) && defined(SIMD_SYNET_ENABLE) |
34 | | namespace Sse41 |
35 | | { |
36 | | using AlgParam = SynetConvolution8iNhwcDirect::AlgParam; |
37 | | using ConvolutionPtr = SynetConvolution8iNhwcDirect::ConvolutionPtr; |
38 | | |
39 | | //--------------------------------------------------------------------- |
40 | | |
41 | | template<Term8iType term, SimdConvolutionActivationType type, int M> void ConvolutionNhwcDirect1x1_2xM( |
42 | | const uint8_t* src0, const ConvParam& p, const AlgParam& a, size_t srcC, size_t dstC, const int8_t* weight0, |
43 | | const __m128* norm, const __m128* bias, const __m128* params, const __m128* scale, const __m128* shift, int32_t* buf, uint8_t* dst, int first) |
44 | 0 | { |
45 | 0 | __m128i d00, d01, d10, d11, d20, d21, d30, d31, d40, d41, s0, w0, w1; |
46 | 0 | size_t dS = p.srcC * p.strideX, dD = p.dstC * a.size, dB = p.dstC; |
47 | 0 | const int8_t* weight1 = weight0 + DivHi(p.srcC, 4) * A; |
48 | 0 | const uint8_t* src1 = src0 + 1 * dS; |
49 | 0 | const uint8_t* src2 = src0 + 2 * dS; |
50 | 0 | const uint8_t* src3 = src0 + 3 * dS; |
51 | 0 | const uint8_t* src4 = src0 + 4 * dS; |
52 | 0 | __m128i upper = _mm_set1_epi32(a.upper); |
53 | 0 | if (dstC > F) |
54 | 0 | { |
55 | 0 | if (first) |
56 | 0 | { |
57 | 0 | if (M > 0) d00 = _mm_setzero_si128(), d01 = _mm_setzero_si128(); |
58 | 0 | if (M > 1) d10 = _mm_setzero_si128(), d11 = _mm_setzero_si128(); |
59 | 0 | if (M > 2) d20 = _mm_setzero_si128(), d21 = _mm_setzero_si128(); |
60 | 0 | if (M > 3) d30 = _mm_setzero_si128(), d31 = _mm_setzero_si128(); |
61 | 0 | if (M > 4) d40 = _mm_setzero_si128(), d41 = _mm_setzero_si128(); |
62 | 0 | } |
63 | 0 | else |
64 | 0 | { |
65 | 0 | if (M > 0) d00 = _mm_loadu_si128((__m128i*)(buf + 0 * dB + 0)), d01 = _mm_loadu_si128((__m128i*)(buf + 0 * dB + F)); |
66 | 0 | if (M > 1) d10 = _mm_loadu_si128((__m128i*)(buf + 1 * dB + 0)), d11 = _mm_loadu_si128((__m128i*)(buf + 1 * dB + F)); |
67 | 0 | if (M > 2) d20 = _mm_loadu_si128((__m128i*)(buf + 2 * dB + 0)), d21 = _mm_loadu_si128((__m128i*)(buf + 2 * dB + F)); |
68 | 0 | if (M > 3) d30 = _mm_loadu_si128((__m128i*)(buf + 3 * dB + 0)), d31 = _mm_loadu_si128((__m128i*)(buf + 3 * dB + F)); |
69 | 0 | if (M > 4) d40 = _mm_loadu_si128((__m128i*)(buf + 4 * dB + 0)), d41 = _mm_loadu_si128((__m128i*)(buf + 4 * dB + F)); |
70 | 0 | } |
71 | 0 | if (Base::Overflow(p.compatibility) || Base::Narrowed(p.compatibility)) |
72 | 0 | { |
73 | 0 | for (size_t offs = 0; offs < srcC; offs += 4) |
74 | 0 | { |
75 | 0 | w0 = _mm_loadu_si128((__m128i*)weight0); |
76 | 0 | w1 = _mm_loadu_si128((__m128i*)weight1); |
77 | 0 | if (M > 0) s0 = Set4(src0 + offs), Madd4<true>(d00, s0, w0), Madd4<true>(d01, s0, w1); |
78 | 0 | if (M > 1) s0 = Set4(src1 + offs), Madd4<true>(d10, s0, w0), Madd4<true>(d11, s0, w1); |
79 | 0 | if (M > 2) s0 = Set4(src2 + offs), Madd4<true>(d20, s0, w0), Madd4<true>(d21, s0, w1); |
80 | 0 | if (M > 3) s0 = Set4(src3 + offs), Madd4<true>(d30, s0, w0), Madd4<true>(d31, s0, w1); |
81 | 0 | if (M > 4) s0 = Set4(src4 + offs), Madd4<true>(d40, s0, w0), Madd4<true>(d41, s0, w1); |
82 | 0 | weight0 += A, weight1 += A; |
83 | 0 | } |
84 | 0 | } |
85 | 0 | else |
86 | 0 | { |
87 | 0 | for (size_t offs = 0; offs < srcC; offs += 4) |
88 | 0 | { |
89 | 0 | w0 = _mm_loadu_si128((__m128i*)weight0); |
90 | 0 | w1 = _mm_loadu_si128((__m128i*)weight1); |
91 | 0 | if (M > 0) s0 = Set4(src0 + offs), Madd4<false>(d00, s0, w0), Madd4<false>(d01, s0, w1); |
92 | 0 | if (M > 1) s0 = Set4(src1 + offs), Madd4<false>(d10, s0, w0), Madd4<false>(d11, s0, w1); |
93 | 0 | if (M > 2) s0 = Set4(src2 + offs), Madd4<false>(d20, s0, w0), Madd4<false>(d21, s0, w1); |
94 | 0 | if (M > 3) s0 = Set4(src3 + offs), Madd4<false>(d30, s0, w0), Madd4<false>(d31, s0, w1); |
95 | 0 | if (M > 4) s0 = Set4(src4 + offs), Madd4<false>(d40, s0, w0), Madd4<false>(d41, s0, w1); |
96 | 0 | weight0 += A, weight1 += A; |
97 | 0 | } |
98 | 0 | } |
99 | 0 | if (dstC == DF) |
100 | 0 | { |
101 | 0 | if (M > 0) Save2<term, type>(dst, buf, d00, d01, norm, bias, params, scale, shift, upper), dst += dD, buf += dB; |
102 | 0 | if (M > 1) Save2<term, type>(dst, buf, d10, d11, norm, bias, params, scale, shift, upper), dst += dD, buf += dB; |
103 | 0 | if (M > 2) Save2<term, type>(dst, buf, d20, d21, norm, bias, params, scale, shift, upper), dst += dD, buf += dB; |
104 | 0 | if (M > 3) Save2<term, type>(dst, buf, d30, d31, norm, bias, params, scale, shift, upper), dst += dD, buf += dB; |
105 | 0 | if (M > 4) Save2<term, type>(dst, buf, d40, d41, norm, bias, params, scale, shift, upper), dst += dD, buf += dB; |
106 | 0 | } |
107 | 0 | else |
108 | 0 | { |
109 | 0 | if (M > 0) Save2<term, type>(dst, buf, d00, d01, norm, bias, params, scale, shift, upper, dstC - F), dst += dD, buf += dB; |
110 | 0 | if (M > 1) Save2<term, type>(dst, buf, d10, d11, norm, bias, params, scale, shift, upper, dstC - F), dst += dD, buf += dB; |
111 | 0 | if (M > 2) Save2<term, type>(dst, buf, d20, d21, norm, bias, params, scale, shift, upper, dstC - F), dst += dD, buf += dB; |
112 | 0 | if (M > 3) Save2<term, type>(dst, buf, d30, d31, norm, bias, params, scale, shift, upper, dstC - F), dst += dD, buf += dB; |
113 | 0 | if (M > 4) Save2<term, type>(dst, buf, d40, d41, norm, bias, params, scale, shift, upper, dstC - F), dst += dD, buf += dB; |
114 | 0 | } |
115 | 0 | } |
116 | 0 | else |
117 | 0 | { |
118 | 0 | if (first) |
119 | 0 | { |
120 | 0 | if (M > 0) d00 = _mm_setzero_si128(); |
121 | 0 | if (M > 1) d10 = _mm_setzero_si128(); |
122 | 0 | if (M > 2) d20 = _mm_setzero_si128(); |
123 | 0 | if (M > 3) d30 = _mm_setzero_si128(); |
124 | 0 | if (M > 4) d40 = _mm_setzero_si128(); |
125 | 0 | } |
126 | 0 | else |
127 | 0 | { |
128 | 0 | if (M > 0) d00 = _mm_loadu_si128((__m128i*)(buf + 0 * dB + 0)); |
129 | 0 | if (M > 1) d10 = _mm_loadu_si128((__m128i*)(buf + 1 * dB + 0)); |
130 | 0 | if (M > 2) d20 = _mm_loadu_si128((__m128i*)(buf + 2 * dB + 0)); |
131 | 0 | if (M > 3) d30 = _mm_loadu_si128((__m128i*)(buf + 3 * dB + 0)); |
132 | 0 | if (M > 4) d40 = _mm_loadu_si128((__m128i*)(buf + 4 * dB + 0)); |
133 | 0 | } |
134 | 0 | if (Base::Overflow(p.compatibility) || Base::Narrowed(p.compatibility)) |
135 | 0 | { |
136 | 0 | for (size_t offs = 0; offs < srcC; offs += 4) |
137 | 0 | { |
138 | 0 | w0 = _mm_loadu_si128((__m128i*)weight0); |
139 | 0 | if (M > 0) s0 = Set4(src0 + offs), Madd4<true>(d00, s0, w0); |
140 | 0 | if (M > 1) s0 = Set4(src1 + offs), Madd4<true>(d10, s0, w0); |
141 | 0 | if (M > 2) s0 = Set4(src2 + offs), Madd4<true>(d20, s0, w0); |
142 | 0 | if (M > 3) s0 = Set4(src3 + offs), Madd4<true>(d30, s0, w0); |
143 | 0 | if (M > 4) s0 = Set4(src4 + offs), Madd4<true>(d40, s0, w0); |
144 | 0 | weight0 += A; |
145 | 0 | } |
146 | 0 | } |
147 | 0 | else |
148 | 0 | { |
149 | 0 | for (size_t offs = 0; offs < srcC; offs += 4) |
150 | 0 | { |
151 | 0 | w0 = _mm_loadu_si128((__m128i*)weight0); |
152 | 0 | if (M > 0) s0 = Set4(src0 + offs), Madd4<false>(d00, s0, w0); |
153 | 0 | if (M > 1) s0 = Set4(src1 + offs), Madd4<false>(d10, s0, w0); |
154 | 0 | if (M > 2) s0 = Set4(src2 + offs), Madd4<false>(d20, s0, w0); |
155 | 0 | if (M > 3) s0 = Set4(src3 + offs), Madd4<false>(d30, s0, w0); |
156 | 0 | if (M > 4) s0 = Set4(src4 + offs), Madd4<false>(d40, s0, w0); |
157 | 0 | weight0 += A; |
158 | 0 | } |
159 | 0 | } |
160 | 0 | if (dstC == F) |
161 | 0 | { |
162 | 0 | if (M > 0) Save1<term, type>(dst, buf, d00, norm, bias, params, scale, shift, upper), dst += dD, buf += dB; |
163 | 0 | if (M > 1) Save1<term, type>(dst, buf, d10, norm, bias, params, scale, shift, upper), dst += dD, buf += dB; |
164 | 0 | if (M > 2) Save1<term, type>(dst, buf, d20, norm, bias, params, scale, shift, upper), dst += dD, buf += dB; |
165 | 0 | if (M > 3) Save1<term, type>(dst, buf, d30, norm, bias, params, scale, shift, upper), dst += dD, buf += dB; |
166 | 0 | if (M > 4) Save1<term, type>(dst, buf, d40, norm, bias, params, scale, shift, upper), dst += dD, buf += dB; |
167 | 0 | } |
168 | 0 | else |
169 | 0 | { |
170 | 0 | if (M > 0) Save1<term, type>(dst, buf, d00, norm, bias, params, scale, shift, upper, dstC), dst += dD, buf += dB; |
171 | 0 | if (M > 1) Save1<term, type>(dst, buf, d10, norm, bias, params, scale, shift, upper, dstC), dst += dD, buf += dB; |
172 | 0 | if (M > 2) Save1<term, type>(dst, buf, d20, norm, bias, params, scale, shift, upper, dstC), dst += dD, buf += dB; |
173 | 0 | if (M > 3) Save1<term, type>(dst, buf, d30, norm, bias, params, scale, shift, upper, dstC), dst += dD, buf += dB; |
174 | 0 | if (M > 4) Save1<term, type>(dst, buf, d40, norm, bias, params, scale, shift, upper, dstC), dst += dD, buf += dB; |
175 | 0 | } |
176 | 0 | } |
177 | 0 | } Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) |
178 | | |
179 | | typedef void(*ConvolutionNhwcDirect1x1_2xM_Ptr)(const uint8_t* src0, const ConvParam& p, const AlgParam& a, size_t srcC, size_t dstC, |
180 | | const int8_t* weight0, const __m128* norm, const __m128* bias, const __m128* params, const __m128* scale, const __m128* shift, int32_t* buf, uint8_t* dst, int first); |
181 | | |
182 | | template<Term8iType term, SimdConvolutionActivationType type> ConvolutionNhwcDirect1x1_2xM_Ptr GetConvolutionNhwcDirect1x1_2xM(size_t M) |
183 | 0 | { |
184 | 0 | switch (M) |
185 | 0 | { |
186 | 0 | case 0: return NULL; |
187 | 0 | case 1: return ConvolutionNhwcDirect1x1_2xM<term, type, 1>; |
188 | 0 | case 2: return ConvolutionNhwcDirect1x1_2xM<term, type, 2>; |
189 | 0 | case 3: return ConvolutionNhwcDirect1x1_2xM<term, type, 3>; |
190 | 0 | case 4: return ConvolutionNhwcDirect1x1_2xM<term, type, 4>; |
191 | 0 | case 5: return ConvolutionNhwcDirect1x1_2xM<term, type, 5>; |
192 | 0 | } |
193 | 0 | assert(0); |
194 | 0 | return NULL; |
195 | 0 | } Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int) |
196 | | |
197 | | template<Term8iType term, SimdConvolutionActivationType type> void ConvolutionNhwcDirect1x1_2(const uint8_t* src, |
198 | | const ConvParam& p, const AlgParam& a, size_t dstC, size_t yBeg, size_t yEnd, size_t srcC, const int8_t* weight, |
199 | | const float* norm, const float* bias, const float* params, const float* scale, const float* shift, int32_t* buf, uint8_t* dst, int first) |
200 | 0 | { |
201 | 0 | size_t n = 5, n1 = (yEnd - yBeg) * p.dstW, nn = AlignLoAny(n1, n), m = n1 - nn; |
202 | 0 | ConvolutionNhwcDirect1x1_2xM_Ptr convolutionNhwcDirect1x1_2xN = GetConvolutionNhwcDirect1x1_2xM<term, type>(n); |
203 | 0 | ConvolutionNhwcDirect1x1_2xM_Ptr convolutionNhwcDirect1x1_2xM = GetConvolutionNhwcDirect1x1_2xM<term, type>(m); |
204 | 0 | __m128 _norm[2], _bias[2], _params[2], _scale[2], _shift[2]; |
205 | 0 | _params[0] = _mm_set1_ps(params[0]); |
206 | 0 | _params[1] = _mm_set1_ps(params[1]); |
207 | 0 | for (size_t dc = 0; dc < dstC; dc += DF) |
208 | 0 | { |
209 | 0 | size_t dC = Simd::Min(DF, dstC - dc); |
210 | 0 | _norm[0] = _mm_loadu_ps(norm + dc + 0); |
211 | 0 | _norm[1] = _mm_loadu_ps(norm + dc + F); |
212 | 0 | _bias[0] = _mm_loadu_ps(bias + dc + 0); |
213 | 0 | _bias[1] = _mm_loadu_ps(bias + dc + F); |
214 | 0 | if (type == ::SimdConvolutionActivationPrelu) |
215 | 0 | { |
216 | 0 | _params[0] = _mm_loadu_ps(params + dc + 0); |
217 | 0 | _params[1] = _mm_loadu_ps(params + dc + F); |
218 | 0 | } |
219 | 0 | _scale[0] = _mm_loadu_ps(scale + dc + 0); |
220 | 0 | _scale[1] = _mm_loadu_ps(scale + dc + F); |
221 | 0 | _shift[0] = _mm_loadu_ps(shift + dc + 0); |
222 | 0 | _shift[1] = _mm_loadu_ps(shift + dc + F); |
223 | 0 | const uint8_t* s = src + yBeg * p.srcW * p.srcC; |
224 | 0 | uint8_t* d = dst + (dc + yBeg * p.dstW * p.dstC) * a.size; |
225 | 0 | int32_t* b = buf + dc + yBeg * p.dstW * p.dstC; |
226 | 0 | size_t i = 0; |
227 | 0 | for (; i < nn; i += n, s += p.srcC * n, b += p.dstC * n, d += p.dstC * a.size * n) |
228 | 0 | convolutionNhwcDirect1x1_2xN(s, p, a, srcC, dC, weight, _norm, _bias, _params, _scale, _shift, b, d, first); |
229 | 0 | for (; i < n1; i += m, s += p.srcC * m, b += p.dstC * m, d += p.dstC * a.size * m) |
230 | 0 | convolutionNhwcDirect1x1_2xM(s, p, a, srcC, dC, weight, _norm, _bias, _params, _scale, _shift, b, d, first); |
231 | 0 | weight += DivHi(p.srcC, 4) * DA; |
232 | 0 | } |
233 | 0 | } Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)2, (SimdConvolutionActivationType)0>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int) |
234 | | |
235 | | //--------------------------------------------------------------------- |
236 | | |
237 | | template <Term8iType term, SimdConvolutionActivationType activation> void SetDirect1x1(const ConvParam& p, const AlgParam& a, ConvolutionPtr* d) |
238 | 0 | { |
239 | 0 | assert(a.microD == 2 * F && p.Is1x1() == true); |
240 | 0 | d[term] = ConvolutionNhwcDirect1x1_2<term, activation>; |
241 | 0 | } Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)3>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)3>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)2, (SimdConvolutionActivationType)0>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)4>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)4>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)5>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)5>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)6>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)6>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)7>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)7>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)8>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)8>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)9>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)9>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)10>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)10>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) |
242 | | |
243 | | template<SimdConvolutionActivationType activation> void SetDirect1x1(const ConvParam& p, const AlgParam& a, ConvolutionPtr* d) |
244 | 0 | { |
245 | 0 | SetDirect1x1<Term8iLast8u, activation>(p, a, d); |
246 | 0 | SetDirect1x1<Term8iLast32f, activation>(p, a, d); |
247 | 0 | SetDirect1x1<Term8iInterim, SimdConvolutionActivationIdentity>(p, a, d); |
248 | 0 | } Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)3>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)4>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)5>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)6>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)7>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)8>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)9>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)10>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)) |
249 | | |
250 | | void SetDirect1x1(const ConvParam& p, const AlgParam& a, ConvolutionPtr* d) |
251 | 0 | { |
252 | 0 | switch (p.activation) |
253 | 0 | { |
254 | 0 | case SimdConvolutionActivationIdentity: SetDirect1x1<SimdConvolutionActivationRestrictRange>(p, a, d); break; |
255 | 0 | case SimdConvolutionActivationRelu: SetDirect1x1<SimdConvolutionActivationRestrictRange>(p, a, d); break; |
256 | 0 | case SimdConvolutionActivationLeakyRelu: SetDirect1x1<SimdConvolutionActivationPrelu>(p, a, d); break; |
257 | 0 | case SimdConvolutionActivationRestrictRange: SetDirect1x1<SimdConvolutionActivationRestrictRange>(p, a, d); break; |
258 | 0 | case SimdConvolutionActivationPrelu: SetDirect1x1<SimdConvolutionActivationPrelu>(p, a, d); break; |
259 | 0 | case SimdConvolutionActivationElu: SetDirect1x1<SimdConvolutionActivationElu>(p, a, d); break; |
260 | 0 | case SimdConvolutionActivationHswish: SetDirect1x1<SimdConvolutionActivationHswish>(p, a, d); break; |
261 | 0 | case SimdConvolutionActivationMish: SetDirect1x1<SimdConvolutionActivationMish>(p, a, d); break; |
262 | 0 | case SimdConvolutionActivationHardSigmoid: SetDirect1x1<SimdConvolutionActivationHardSigmoid>(p, a, d); break; |
263 | 0 | case SimdConvolutionActivationSwish: SetDirect1x1<SimdConvolutionActivationSwish>(p, a, d); break; |
264 | 0 | case SimdConvolutionActivationGelu: SetDirect1x1<SimdConvolutionActivationGelu>(p, a, d); break; |
265 | 0 | default: assert(0); |
266 | 0 | } |
267 | 0 | } |
268 | | } |
269 | | #endif |
270 | | } |