Coverage Report

Created: 2024-10-01 06:54

/src/Simd/src/Simd/SimdSse41SynetConvolution8iNhwcDirect1x1.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2024 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdSynetConvolution8i.h"
25
#include "Simd/SimdSynetConvolution8iCommon.h"
26
#include "Simd/SimdSynet.h"
27
#include "Simd/SimdMath.h"
28
#include "Simd/SimdBase.h"
29
#include "Simd/SimdCpu.h"
30
31
namespace Simd
32
{
33
#if defined(SIMD_SSE41_ENABLE) && defined(SIMD_SYNET_ENABLE)   
34
    namespace Sse41
35
    {
36
        using AlgParam = SynetConvolution8iNhwcDirect::AlgParam;
37
        using ConvolutionPtr = SynetConvolution8iNhwcDirect::ConvolutionPtr;
38
39
        //---------------------------------------------------------------------
40
41
        template<Term8iType term, SimdConvolutionActivationType type, int M> void ConvolutionNhwcDirect1x1_2xM(
42
            const uint8_t* src0, const ConvParam& p, const AlgParam& a, size_t srcC, size_t dstC, const int8_t* weight0,
43
            const __m128* norm, const __m128* bias, const __m128* params, const __m128* scale, const __m128* shift, int32_t* buf, uint8_t* dst, int first)
44
0
        {
45
0
            __m128i d00, d01, d10, d11, d20, d21, d30, d31, d40, d41, s0, w0, w1;
46
0
            size_t dS = p.srcC * p.strideX, dD = p.dstC * a.size, dB = p.dstC;
47
0
            const int8_t* weight1 = weight0 + DivHi(p.srcC, 4) * A;
48
0
            const uint8_t* src1 = src0 + 1 * dS;
49
0
            const uint8_t* src2 = src0 + 2 * dS;
50
0
            const uint8_t* src3 = src0 + 3 * dS;
51
0
            const uint8_t* src4 = src0 + 4 * dS;
52
0
            __m128i upper = _mm_set1_epi32(a.upper);
53
0
            if (dstC > F)
54
0
            {
55
0
                if (first)
56
0
                {
57
0
                    if (M > 0) d00 = _mm_setzero_si128(), d01 = _mm_setzero_si128();
58
0
                    if (M > 1) d10 = _mm_setzero_si128(), d11 = _mm_setzero_si128();
59
0
                    if (M > 2) d20 = _mm_setzero_si128(), d21 = _mm_setzero_si128();
60
0
                    if (M > 3) d30 = _mm_setzero_si128(), d31 = _mm_setzero_si128();
61
0
                    if (M > 4) d40 = _mm_setzero_si128(), d41 = _mm_setzero_si128();
62
0
                }
63
0
                else
64
0
                {
65
0
                    if (M > 0) d00 = _mm_loadu_si128((__m128i*)(buf + 0 * dB + 0)), d01 = _mm_loadu_si128((__m128i*)(buf + 0 * dB + F));
66
0
                    if (M > 1) d10 = _mm_loadu_si128((__m128i*)(buf + 1 * dB + 0)), d11 = _mm_loadu_si128((__m128i*)(buf + 1 * dB + F));
67
0
                    if (M > 2) d20 = _mm_loadu_si128((__m128i*)(buf + 2 * dB + 0)), d21 = _mm_loadu_si128((__m128i*)(buf + 2 * dB + F));
68
0
                    if (M > 3) d30 = _mm_loadu_si128((__m128i*)(buf + 3 * dB + 0)), d31 = _mm_loadu_si128((__m128i*)(buf + 3 * dB + F));
69
0
                    if (M > 4) d40 = _mm_loadu_si128((__m128i*)(buf + 4 * dB + 0)), d41 = _mm_loadu_si128((__m128i*)(buf + 4 * dB + F));
70
0
                }
71
0
                if (Base::Overflow(p.compatibility) || Base::Narrowed(p.compatibility))
72
0
                {
73
0
                    for (size_t offs = 0; offs < srcC; offs += 4)
74
0
                    {
75
0
                        w0 = _mm_loadu_si128((__m128i*)weight0);
76
0
                        w1 = _mm_loadu_si128((__m128i*)weight1);
77
0
                        if (M > 0) s0 = Set4(src0 + offs), Madd4<true>(d00, s0, w0), Madd4<true>(d01, s0, w1);
78
0
                        if (M > 1) s0 = Set4(src1 + offs), Madd4<true>(d10, s0, w0), Madd4<true>(d11, s0, w1);
79
0
                        if (M > 2) s0 = Set4(src2 + offs), Madd4<true>(d20, s0, w0), Madd4<true>(d21, s0, w1);
80
0
                        if (M > 3) s0 = Set4(src3 + offs), Madd4<true>(d30, s0, w0), Madd4<true>(d31, s0, w1);
81
0
                        if (M > 4) s0 = Set4(src4 + offs), Madd4<true>(d40, s0, w0), Madd4<true>(d41, s0, w1);
82
0
                        weight0 += A, weight1 += A;
83
0
                    }
84
0
                }
85
0
                else
86
0
                {
87
0
                    for (size_t offs = 0; offs < srcC; offs += 4)
88
0
                    {
89
0
                        w0 = _mm_loadu_si128((__m128i*)weight0);
90
0
                        w1 = _mm_loadu_si128((__m128i*)weight1);
91
0
                        if (M > 0) s0 = Set4(src0 + offs), Madd4<false>(d00, s0, w0), Madd4<false>(d01, s0, w1);
92
0
                        if (M > 1) s0 = Set4(src1 + offs), Madd4<false>(d10, s0, w0), Madd4<false>(d11, s0, w1);
93
0
                        if (M > 2) s0 = Set4(src2 + offs), Madd4<false>(d20, s0, w0), Madd4<false>(d21, s0, w1);
94
0
                        if (M > 3) s0 = Set4(src3 + offs), Madd4<false>(d30, s0, w0), Madd4<false>(d31, s0, w1);
95
0
                        if (M > 4) s0 = Set4(src4 + offs), Madd4<false>(d40, s0, w0), Madd4<false>(d41, s0, w1);
96
0
                        weight0 += A, weight1 += A;
97
0
                    }
98
0
                }
99
0
                if (dstC == DF)
100
0
                {
101
0
                    if (M > 0) Save2<term, type>(dst, buf, d00, d01, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;
102
0
                    if (M > 1) Save2<term, type>(dst, buf, d10, d11, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;
103
0
                    if (M > 2) Save2<term, type>(dst, buf, d20, d21, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;
104
0
                    if (M > 3) Save2<term, type>(dst, buf, d30, d31, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;
105
0
                    if (M > 4) Save2<term, type>(dst, buf, d40, d41, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;
106
0
                }
107
0
                else
108
0
                {
109
0
                    if (M > 0) Save2<term, type>(dst, buf, d00, d01, norm, bias, params, scale, shift, upper, dstC - F), dst += dD, buf += dB;
110
0
                    if (M > 1) Save2<term, type>(dst, buf, d10, d11, norm, bias, params, scale, shift, upper, dstC - F), dst += dD, buf += dB;
111
0
                    if (M > 2) Save2<term, type>(dst, buf, d20, d21, norm, bias, params, scale, shift, upper, dstC - F), dst += dD, buf += dB;
112
0
                    if (M > 3) Save2<term, type>(dst, buf, d30, d31, norm, bias, params, scale, shift, upper, dstC - F), dst += dD, buf += dB;
113
0
                    if (M > 4) Save2<term, type>(dst, buf, d40, d41, norm, bias, params, scale, shift, upper, dstC - F), dst += dD, buf += dB;
114
0
                }
115
0
            }
116
0
            else
117
0
            {
118
0
                if (first)
119
0
                {
120
0
                    if (M > 0) d00 = _mm_setzero_si128();
121
0
                    if (M > 1) d10 = _mm_setzero_si128();
122
0
                    if (M > 2) d20 = _mm_setzero_si128();
123
0
                    if (M > 3) d30 = _mm_setzero_si128();
124
0
                    if (M > 4) d40 = _mm_setzero_si128();
125
0
                }
126
0
                else
127
0
                {
128
0
                    if (M > 0) d00 = _mm_loadu_si128((__m128i*)(buf + 0 * dB + 0));
129
0
                    if (M > 1) d10 = _mm_loadu_si128((__m128i*)(buf + 1 * dB + 0));
130
0
                    if (M > 2) d20 = _mm_loadu_si128((__m128i*)(buf + 2 * dB + 0));
131
0
                    if (M > 3) d30 = _mm_loadu_si128((__m128i*)(buf + 3 * dB + 0));
132
0
                    if (M > 4) d40 = _mm_loadu_si128((__m128i*)(buf + 4 * dB + 0));
133
0
                }
134
0
                if (Base::Overflow(p.compatibility) || Base::Narrowed(p.compatibility))
135
0
                {
136
0
                    for (size_t offs = 0; offs < srcC; offs += 4)
137
0
                    {
138
0
                        w0 = _mm_loadu_si128((__m128i*)weight0);
139
0
                        if (M > 0) s0 = Set4(src0 + offs), Madd4<true>(d00, s0, w0);
140
0
                        if (M > 1) s0 = Set4(src1 + offs), Madd4<true>(d10, s0, w0);
141
0
                        if (M > 2) s0 = Set4(src2 + offs), Madd4<true>(d20, s0, w0);
142
0
                        if (M > 3) s0 = Set4(src3 + offs), Madd4<true>(d30, s0, w0);
143
0
                        if (M > 4) s0 = Set4(src4 + offs), Madd4<true>(d40, s0, w0);
144
0
                        weight0 += A;
145
0
                    }                
146
0
                }
147
0
                else
148
0
                {
149
0
                    for (size_t offs = 0; offs < srcC; offs += 4)
150
0
                    {
151
0
                        w0 = _mm_loadu_si128((__m128i*)weight0);
152
0
                        if (M > 0) s0 = Set4(src0 + offs), Madd4<false>(d00, s0, w0);
153
0
                        if (M > 1) s0 = Set4(src1 + offs), Madd4<false>(d10, s0, w0);
154
0
                        if (M > 2) s0 = Set4(src2 + offs), Madd4<false>(d20, s0, w0);
155
0
                        if (M > 3) s0 = Set4(src3 + offs), Madd4<false>(d30, s0, w0);
156
0
                        if (M > 4) s0 = Set4(src4 + offs), Madd4<false>(d40, s0, w0);
157
0
                        weight0 += A;
158
0
                    }
159
0
                }
160
0
                if (dstC == F)
161
0
                {
162
0
                    if (M > 0) Save1<term, type>(dst, buf, d00, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;
163
0
                    if (M > 1) Save1<term, type>(dst, buf, d10, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;
164
0
                    if (M > 2) Save1<term, type>(dst, buf, d20, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;
165
0
                    if (M > 3) Save1<term, type>(dst, buf, d30, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;
166
0
                    if (M > 4) Save1<term, type>(dst, buf, d40, norm, bias, params, scale, shift, upper), dst += dD, buf += dB;
167
0
                }
168
0
                else
169
0
                {
170
0
                    if (M > 0) Save1<term, type>(dst, buf, d00, norm, bias, params, scale, shift, upper, dstC), dst += dD, buf += dB;
171
0
                    if (M > 1) Save1<term, type>(dst, buf, d10, norm, bias, params, scale, shift, upper, dstC), dst += dD, buf += dB;
172
0
                    if (M > 2) Save1<term, type>(dst, buf, d20, norm, bias, params, scale, shift, upper, dstC), dst += dD, buf += dB;
173
0
                    if (M > 3) Save1<term, type>(dst, buf, d30, norm, bias, params, scale, shift, upper, dstC), dst += dD, buf += dB;
174
0
                    if (M > 4) Save1<term, type>(dst, buf, d40, norm, bias, params, scale, shift, upper, dstC), dst += dD, buf += dB;
175
0
                }
176
0
            }
177
0
        }
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10, 1>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10, 2>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10, 3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10, 4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10, 5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
178
179
        typedef void(*ConvolutionNhwcDirect1x1_2xM_Ptr)(const uint8_t* src0, const ConvParam& p, const AlgParam& a, size_t srcC, size_t dstC,
180
            const int8_t* weight0, const __m128* norm, const __m128* bias, const __m128* params, const __m128* scale, const __m128* shift, int32_t* buf, uint8_t* dst, int first);
181
182
        template<Term8iType term, SimdConvolutionActivationType type> ConvolutionNhwcDirect1x1_2xM_Ptr GetConvolutionNhwcDirect1x1_2xM(size_t M)
183
0
        {
184
0
            switch (M)
185
0
            {
186
0
            case 0: return NULL;
187
0
            case 1: return ConvolutionNhwcDirect1x1_2xM<term, type, 1>;
188
0
            case 2: return ConvolutionNhwcDirect1x1_2xM<term, type, 2>;
189
0
            case 3: return ConvolutionNhwcDirect1x1_2xM<term, type, 3>;
190
0
            case 4: return ConvolutionNhwcDirect1x1_2xM<term, type, 4>;
191
0
            case 5: return ConvolutionNhwcDirect1x1_2xM<term, type, 5>;
192
0
            }
193
0
            assert(0);
194
0
            return NULL;
195
0
        }
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)3>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)3>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)2, (SimdConvolutionActivationType)0>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)4>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)4>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)5>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)5>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)6>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)6>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)7>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)7>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)8>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)8>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)9>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)9>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)0, (SimdConvolutionActivationType)10>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
Unexecuted instantiation: void (*Simd::Sse41::GetConvolutionNhwcDirect1x1_2xM<(Simd::Term8iType)1, (SimdConvolutionActivationType)10>(unsigned long))(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, signed char const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, float __vector(4) const*, int*, unsigned char*, int)
196
197
        template<Term8iType term, SimdConvolutionActivationType type> void ConvolutionNhwcDirect1x1_2(const uint8_t* src,
198
            const ConvParam& p, const AlgParam& a, size_t dstC, size_t yBeg, size_t yEnd, size_t srcC, const int8_t* weight,
199
            const float* norm, const float* bias, const float* params, const float* scale, const float* shift, int32_t* buf, uint8_t* dst, int first)
200
0
        {
201
0
            size_t n = 5, n1 = (yEnd - yBeg) * p.dstW, nn = AlignLoAny(n1, n), m = n1 - nn;
202
0
            ConvolutionNhwcDirect1x1_2xM_Ptr convolutionNhwcDirect1x1_2xN = GetConvolutionNhwcDirect1x1_2xM<term, type>(n);
203
0
            ConvolutionNhwcDirect1x1_2xM_Ptr convolutionNhwcDirect1x1_2xM = GetConvolutionNhwcDirect1x1_2xM<term, type>(m);
204
0
            __m128 _norm[2], _bias[2], _params[2], _scale[2], _shift[2];
205
0
            _params[0] = _mm_set1_ps(params[0]);
206
0
            _params[1] = _mm_set1_ps(params[1]);
207
0
            for (size_t dc = 0; dc < dstC; dc += DF)
208
0
            {
209
0
                size_t dC = Simd::Min(DF, dstC - dc);
210
0
                _norm[0] = _mm_loadu_ps(norm + dc + 0);
211
0
                _norm[1] = _mm_loadu_ps(norm + dc + F);
212
0
                _bias[0] = _mm_loadu_ps(bias + dc + 0);
213
0
                _bias[1] = _mm_loadu_ps(bias + dc + F);
214
0
                if (type == ::SimdConvolutionActivationPrelu)
215
0
                {
216
0
                    _params[0] = _mm_loadu_ps(params + dc + 0);
217
0
                    _params[1] = _mm_loadu_ps(params + dc + F);
218
0
                }
219
0
                _scale[0] = _mm_loadu_ps(scale + dc + 0);
220
0
                _scale[1] = _mm_loadu_ps(scale + dc + F);
221
0
                _shift[0] = _mm_loadu_ps(shift + dc + 0);
222
0
                _shift[1] = _mm_loadu_ps(shift + dc + F);
223
0
                const uint8_t* s = src + yBeg * p.srcW * p.srcC;
224
0
                uint8_t* d = dst + (dc + yBeg * p.dstW * p.dstC) * a.size;
225
0
                int32_t* b = buf + dc + yBeg * p.dstW * p.dstC;
226
0
                size_t i = 0;
227
0
                for (; i < nn; i += n, s += p.srcC * n, b += p.dstC * n, d += p.dstC * a.size * n)
228
0
                    convolutionNhwcDirect1x1_2xN(s, p, a, srcC, dC, weight, _norm, _bias, _params, _scale, _shift, b, d, first);
229
0
                for (; i < n1; i += m, s += p.srcC * m, b += p.dstC * m, d += p.dstC * a.size * m)
230
0
                    convolutionNhwcDirect1x1_2xM(s, p, a, srcC, dC, weight, _norm, _bias, _params, _scale, _shift, b, d, first);
231
0
                weight += DivHi(p.srcC, 4) * DA;
232
0
            }
233
0
        }
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)3>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)2, (SimdConvolutionActivationType)0>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)4>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)5>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)6>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)7>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)8>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)9>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)0, (SimdConvolutionActivationType)10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
Unexecuted instantiation: void Simd::Sse41::ConvolutionNhwcDirect1x1_2<(Simd::Term8iType)1, (SimdConvolutionActivationType)10>(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int)
234
235
        //---------------------------------------------------------------------
236
237
        template <Term8iType term, SimdConvolutionActivationType activation> void SetDirect1x1(const ConvParam& p, const AlgParam& a, ConvolutionPtr* d)
238
0
        {
239
0
            assert(a.microD == 2 * F && p.Is1x1() == true);
240
0
            d[term] = ConvolutionNhwcDirect1x1_2<term, activation>;
241
0
        }
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)3>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)3>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)2, (SimdConvolutionActivationType)0>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)4>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)4>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)5>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)5>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)6>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)6>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)7>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)7>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)8>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)8>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)9>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)9>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)0, (SimdConvolutionActivationType)10>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(Simd::Term8iType)1, (SimdConvolutionActivationType)10>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
242
243
        template<SimdConvolutionActivationType activation> void SetDirect1x1(const ConvParam& p, const AlgParam& a, ConvolutionPtr* d)
244
0
        {
245
0
            SetDirect1x1<Term8iLast8u, activation>(p, a, d);
246
0
            SetDirect1x1<Term8iLast32f, activation>(p, a, d);
247
0
            SetDirect1x1<Term8iInterim, SimdConvolutionActivationIdentity>(p, a, d);
248
0
        }
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)3>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)4>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)5>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)6>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)7>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)8>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)9>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
Unexecuted instantiation: void Simd::Sse41::SetDirect1x1<(SimdConvolutionActivationType)10>(Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, void (**)(unsigned char const*, Simd::ConvParam const&, Simd::Base::SynetConvolution8iNhwcDirect::AlgParam const&, unsigned long, unsigned long, unsigned long, unsigned long, signed char const*, float const*, float const*, float const*, float const*, float const*, int*, unsigned char*, int))
249
250
        void SetDirect1x1(const ConvParam& p, const AlgParam& a, ConvolutionPtr* d)
251
0
        {
252
0
            switch (p.activation)
253
0
            {
254
0
            case SimdConvolutionActivationIdentity: SetDirect1x1<SimdConvolutionActivationRestrictRange>(p, a, d); break;
255
0
            case SimdConvolutionActivationRelu: SetDirect1x1<SimdConvolutionActivationRestrictRange>(p, a, d); break;
256
0
            case SimdConvolutionActivationLeakyRelu: SetDirect1x1<SimdConvolutionActivationPrelu>(p, a, d); break;
257
0
            case SimdConvolutionActivationRestrictRange: SetDirect1x1<SimdConvolutionActivationRestrictRange>(p, a, d); break;
258
0
            case SimdConvolutionActivationPrelu: SetDirect1x1<SimdConvolutionActivationPrelu>(p, a, d); break;
259
0
            case SimdConvolutionActivationElu: SetDirect1x1<SimdConvolutionActivationElu>(p, a, d); break;
260
0
            case SimdConvolutionActivationHswish: SetDirect1x1<SimdConvolutionActivationHswish>(p, a, d); break;
261
0
            case SimdConvolutionActivationMish: SetDirect1x1<SimdConvolutionActivationMish>(p, a, d); break;
262
0
            case SimdConvolutionActivationHardSigmoid: SetDirect1x1<SimdConvolutionActivationHardSigmoid>(p, a, d); break;
263
0
            case SimdConvolutionActivationSwish: SetDirect1x1<SimdConvolutionActivationSwish>(p, a, d); break;
264
0
            case SimdConvolutionActivationGelu: SetDirect1x1<SimdConvolutionActivationGelu>(p, a, d); break;
265
0
            default: assert(0);
266
0
            }
267
0
        }
268
    }
269
#endif
270
}