/src/Simd/src/Simd/SimdSse41SynetDeconvolution32f.cpp
Line | Count | Source |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2024 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #include "Simd/SimdSynetDeconvolution32f.h" |
25 | | #include "Simd/SimdSynetConvolution32f.h" |
26 | | #include "Simd/SimdSynetConvolution32fCommon.h" |
27 | | #include "Simd/SimdExtract.h" |
28 | | #include "Simd/SimdSynet.h" |
29 | | #include "Simd/SimdSse41.h" |
30 | | #include "Simd/SimdGemm.h" |
31 | | #include "Simd/SimdExp.h" |
32 | | #include "Simd/SimdCpu.h" |
33 | | |
34 | | namespace Simd |
35 | | { |
36 | | #if defined(SIMD_SSE41_ENABLE) && defined(SIMD_SYNET_ENABLE) |
37 | | namespace Sse41 |
38 | | { |
39 | | SynetDeconvolution32fGemmNN::SynetDeconvolution32fGemmNN(const DeconvParam & p) |
40 | 0 | : Base::SynetDeconvolution32fGemmNN(p) |
41 | 0 | { |
42 | 0 | _gemm.Init(InitGemmFuncs(Sse41::Gemm32fNN, "Sse41")); |
43 | 0 | if (_param.trans && _param.group == 1) |
44 | 0 | { |
45 | 0 | if (NHWC_GEMM_RUNTIME) |
46 | 0 | { |
47 | 0 | _gemmCb.Init(InitGemmCbFuncs(Sse41::Gemm32fNNcbBufferSize, Sse41::Gemm32fNNcbReorderB, Sse41::Gemm32fNNcbRun, "Sse41", GemmKernelF2, GemmKernelF3)); |
48 | 0 | _nhwcWeight.Resize(_gemmCb.At(0).BufferSize(_M*_merge, _N, _K)); |
49 | 0 | } |
50 | 0 | else |
51 | 0 | _nhwcWeight.Resize(Sse41::Gemm32fNNcbBufferSize(_M*_merge, _N, _K, GemmKernelAny, NHWC_GEMM_COMPATIBLE)); |
52 | 0 | _nhwcRun = Sse41::Gemm32fNNcbRun; |
53 | 0 | _nhwcReorderB = Sse41::Gemm32fNNcbReorderB; |
54 | 0 | } |
55 | 0 | _biasAndActivation = Sse41::ConvolutionBiasAndActivation; |
56 | 0 | } |
57 | | |
58 | | //------------------------------------------------------------------------------------------------- |
59 | | |
60 | | void SynetDeconvolution32fGemmNN::RowToImg(const float* src, float* dst) |
61 | 0 | { |
62 | 0 | const DeconvParam& p = _param; |
63 | 0 | assert(p.trans && p.group == 1); |
64 | 0 | if ((p.IsPad(0) && p.IsDilation(1) && p.kernelY == p.strideX && p.kernelX == p.strideX) || p.dstC < F) |
65 | 0 | { |
66 | 0 | Base::SynetDeconvolution32fGemmNN::RowToImg(src, dst); |
67 | 0 | return; |
68 | 0 | } |
69 | 0 | else |
70 | 0 | { |
71 | 0 | size_t dstCF = AlignLo(p.dstC, F); |
72 | 0 | for (size_t dy = 0; dy < p.dstH; ++dy) |
73 | 0 | for (size_t dx = 0; dx < p.dstW; ++dx) |
74 | 0 | memset(dst + (dy * p.dstW + dx) * p.dstC, 0, p.dstC * sizeof(float)); |
75 | 0 | for (size_t sy = 0; sy < p.srcH; ++sy) |
76 | 0 | { |
77 | 0 | for (size_t sx = 0; sx < p.srcW; ++sx) |
78 | 0 | { |
79 | 0 | size_t dy = sy * p.strideY - p.padY; |
80 | 0 | for (size_t ky = 0; ky < p.kernelY; ky++, dy += p.dilationY) |
81 | 0 | { |
82 | 0 | if (dy < p.dstH) |
83 | 0 | { |
84 | 0 | size_t dx = sx * p.strideX - p.padX; |
85 | 0 | for (size_t kx = 0; kx < p.kernelX; kx++, dx += p.dilationX) |
86 | 0 | { |
87 | 0 | if (dx < p.dstW) |
88 | 0 | { |
89 | 0 | float* d = dst + (dy * p.dstW + dx) * p.dstC; |
90 | 0 | size_t dc = 0; |
91 | 0 | for (; dc < dstCF; dc += F) |
92 | 0 | _mm_storeu_ps(d + dc, _mm_add_ps(_mm_loadu_ps(d + dc), _mm_loadu_ps(src + dc))); |
93 | 0 | for (; dc < p.dstC; ++dc) |
94 | 0 | d[dc] += src[dc]; |
95 | 0 | } |
96 | 0 | src += p.dstC; |
97 | 0 | } |
98 | 0 | } |
99 | 0 | else |
100 | 0 | src += p.kernelX * p.dstC; |
101 | 0 | } |
102 | 0 | } |
103 | 0 | } |
104 | 0 | } |
105 | 0 | } |
106 | | |
107 | | //------------------------------------------------------------------------------------------------- |
108 | | |
109 | | typedef void (*DeconvolutionNhwcDirect2x2_Ptr) (const float * src0, const DeconvParam & p, size_t srcC, size_t dstC, const float * weight, const __m128 * bias, const __m128 * params, float * ds, int first); |
110 | | |
111 | | template<TermType term, SimdConvolutionActivationType type, size_t tail> void DeconvolutionNhwcDirect2x2_M(const float * src0, |
112 | | const DeconvParam & p, size_t srcC, size_t dstC, const float * weight0, const __m128 * bias, const __m128 * params, float * dst, int first) |
113 | 0 | { |
114 | 0 | size_t dS = p.srcC, dD = p.dstC; |
115 | 0 | const float * weight1 = weight0 + srcC * F, * src1, * src2, * src3, * src4, * src5; |
116 | 0 | if (tail > 1) src1 = src0 + 1 * dS; |
117 | 0 | if (tail > 2) src2 = src0 + 2 * dS; |
118 | 0 | if (tail > 3) src3 = src0 + 3 * dS; |
119 | 0 | if (tail > 4) src4 = src0 + 4 * dS; |
120 | 0 | if (tail > 5) src5 = src0 + 5 * dS; |
121 | 0 | __m128 d00, d01, d10, d11, d20, d21, d30, d31, d40, d41, d50, d51, s0, w0, w1; |
122 | 0 | if (first) |
123 | 0 | { |
124 | 0 | if (tail > 0) d00 = _mm_setzero_ps(), d01 = _mm_setzero_ps(); |
125 | 0 | if (tail > 1) d10 = _mm_setzero_ps(), d11 = _mm_setzero_ps(); |
126 | 0 | if (tail > 2) d20 = _mm_setzero_ps(), d21 = _mm_setzero_ps(); |
127 | 0 | if (tail > 3) d30 = _mm_setzero_ps(), d31 = _mm_setzero_ps(); |
128 | 0 | if (tail > 4) d40 = _mm_setzero_ps(), d41 = _mm_setzero_ps(); |
129 | 0 | if (tail > 5) d50 = _mm_setzero_ps(), d51 = _mm_setzero_ps(); |
130 | 0 | } |
131 | 0 | else |
132 | 0 | { |
133 | 0 | if (tail > 0) d00 = _mm_loadu_ps(dst + 0x0 * dD), d01 = _mm_loadu_ps(dst + 0x1 * dD); |
134 | 0 | if (tail > 1) d10 = _mm_loadu_ps(dst + 0x2 * dD), d11 = _mm_loadu_ps(dst + 0x3 * dD); |
135 | 0 | if (tail > 2) d20 = _mm_loadu_ps(dst + 0x4 * dD), d21 = _mm_loadu_ps(dst + 0x5 * dD); |
136 | 0 | if (tail > 3) d30 = _mm_loadu_ps(dst + 0x6 * dD), d31 = _mm_loadu_ps(dst + 0x7 * dD); |
137 | 0 | if (tail > 4) d40 = _mm_loadu_ps(dst + 0x8 * dD), d41 = _mm_loadu_ps(dst + 0x9 * dD); |
138 | 0 | if (tail > 5) d50 = _mm_loadu_ps(dst + 0xa * dD), d51 = _mm_loadu_ps(dst + 0xb * dD); |
139 | 0 | } |
140 | 0 | for (size_t sc = 0; sc < srcC; ++sc) |
141 | 0 | { |
142 | 0 | w0 = _mm_loadu_ps(weight0); |
143 | 0 | w1 = _mm_loadu_ps(weight1); |
144 | 0 | if (tail > 0) s0 = _mm_set1_ps(src0[sc]), d00 = _mm_add_ps(_mm_mul_ps(s0, w0), d00), d01 = _mm_add_ps(_mm_mul_ps(s0, w1), d01); |
145 | 0 | if (tail > 1) s0 = _mm_set1_ps(src1[sc]), d10 = _mm_add_ps(_mm_mul_ps(s0, w0), d10), d11 = _mm_add_ps(_mm_mul_ps(s0, w1), d11); |
146 | 0 | if (tail > 2) s0 = _mm_set1_ps(src2[sc]), d20 = _mm_add_ps(_mm_mul_ps(s0, w0), d20), d21 = _mm_add_ps(_mm_mul_ps(s0, w1), d21); |
147 | 0 | if (tail > 3) s0 = _mm_set1_ps(src3[sc]), d30 = _mm_add_ps(_mm_mul_ps(s0, w0), d30), d31 = _mm_add_ps(_mm_mul_ps(s0, w1), d31); |
148 | 0 | if (tail > 4) s0 = _mm_set1_ps(src4[sc]), d40 = _mm_add_ps(_mm_mul_ps(s0, w0), d40), d41 = _mm_add_ps(_mm_mul_ps(s0, w1), d41); |
149 | 0 | if (tail > 5) s0 = _mm_set1_ps(src5[sc]), d50 = _mm_add_ps(_mm_mul_ps(s0, w0), d50), d51 = _mm_add_ps(_mm_mul_ps(s0, w1), d51); |
150 | 0 | weight0 += F; |
151 | 0 | weight1 += F; |
152 | 0 | } |
153 | 0 | if (dstC == F) |
154 | 0 | { |
155 | 0 | if (tail > 0) Term<term>::template Save<type, 0>(dst + 0x0 * dD, d00, bias, params), Term<term>::template Save<type, 0>(dst + 0x1 * dD, d01, bias, params); |
156 | 0 | if (tail > 1) Term<term>::template Save<type, 0>(dst + 0x2 * dD, d10, bias, params), Term<term>::template Save<type, 0>(dst + 0x3 * dD, d11, bias, params); |
157 | 0 | if (tail > 2) Term<term>::template Save<type, 0>(dst + 0x4 * dD, d20, bias, params), Term<term>::template Save<type, 0>(dst + 0x5 * dD, d21, bias, params); |
158 | 0 | if (tail > 3) Term<term>::template Save<type, 0>(dst + 0x6 * dD, d30, bias, params), Term<term>::template Save<type, 0>(dst + 0x7 * dD, d31, bias, params); |
159 | 0 | if (tail > 4) Term<term>::template Save<type, 0>(dst + 0x8 * dD, d40, bias, params), Term<term>::template Save<type, 0>(dst + 0x9 * dD, d41, bias, params); |
160 | 0 | if (tail > 5) Term<term>::template Save<type, 0>(dst + 0xA * dD, d50, bias, params), Term<term>::template Save<type, 0>(dst + 0xB * dD, d51, bias, params); |
161 | 0 | } |
162 | 0 | else |
163 | 0 | { |
164 | 0 | if (tail > 0) Term<term>::template Save<type, 0>(dst + 0x0 * dD, d00, bias, params, dstC), Term<term>::template Save<type, 0>(dst + 0x1 * dD, d01, bias, params, dstC); |
165 | 0 | if (tail > 1) Term<term>::template Save<type, 0>(dst + 0x2 * dD, d10, bias, params, dstC), Term<term>::template Save<type, 0>(dst + 0x3 * dD, d11, bias, params, dstC); |
166 | 0 | if (tail > 2) Term<term>::template Save<type, 0>(dst + 0x4 * dD, d20, bias, params, dstC), Term<term>::template Save<type, 0>(dst + 0x5 * dD, d21, bias, params, dstC); |
167 | 0 | if (tail > 3) Term<term>::template Save<type, 0>(dst + 0x6 * dD, d30, bias, params, dstC), Term<term>::template Save<type, 0>(dst + 0x7 * dD, d31, bias, params, dstC); |
168 | 0 | if (tail > 4) Term<term>::template Save<type, 0>(dst + 0x8 * dD, d40, bias, params, dstC), Term<term>::template Save<type, 0>(dst + 0x9 * dD, d41, bias, params, dstC); |
169 | 0 | if (tail > 5) Term<term>::template Save<type, 0>(dst + 0xA * dD, d50, bias, params, dstC), Term<term>::template Save<type, 0>(dst + 0xB * dD, d51, bias, params, dstC); |
170 | 0 | } |
171 | 0 | } Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)3, 1ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)3, 2ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)3, 3ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)3, 4ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)3, 5ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)3, 6ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)1, (SimdConvolutionActivationType)0, 1ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)1, (SimdConvolutionActivationType)0, 2ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)1, (SimdConvolutionActivationType)0, 3ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)1, (SimdConvolutionActivationType)0, 4ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)1, (SimdConvolutionActivationType)0, 5ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)1, (SimdConvolutionActivationType)0, 6ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)4, 1ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)4, 2ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)4, 3ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)4, 4ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)4, 5ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)4, 6ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)5, 1ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)5, 2ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)5, 3ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)5, 4ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)5, 5ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)5, 6ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)6, 1ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)6, 2ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)6, 3ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)6, 4ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)6, 5ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)6, 6ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)7, 1ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)7, 2ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)7, 3ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)7, 4ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)7, 5ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)7, 6ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)8, 1ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)8, 2ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)8, 3ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)8, 4ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)8, 5ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)8, 6ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)9, 1ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)9, 2ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)9, 3ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)9, 4ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)9, 5ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)9, 6ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)10, 1ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)10, 2ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)10, 3ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)10, 4ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)10, 5ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2_M<(Simd::TermType)0, (SimdConvolutionActivationType)10, 6ul>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) |
172 | | |
173 | | template <TermType term, SimdConvolutionActivationType type> SIMD_INLINE DeconvolutionNhwcDirect2x2_Ptr GetDeconvolutionNhwcDirect2x2(size_t tail) |
174 | 0 | { |
175 | 0 | switch (tail) |
176 | 0 | { |
177 | 0 | case 0: return NULL; |
178 | 0 | case 1: return DeconvolutionNhwcDirect2x2_M<term, type, 1>; |
179 | 0 | case 2: return DeconvolutionNhwcDirect2x2_M<term, type, 2>; |
180 | 0 | case 3: return DeconvolutionNhwcDirect2x2_M<term, type, 3>; |
181 | 0 | case 4: return DeconvolutionNhwcDirect2x2_M<term, type, 4>; |
182 | 0 | case 5: return DeconvolutionNhwcDirect2x2_M<term, type, 5>; |
183 | 0 | case 6: return DeconvolutionNhwcDirect2x2_M<term, type, 6>; |
184 | 0 | default: |
185 | 0 | assert(0); |
186 | 0 | return NULL; |
187 | 0 | } |
188 | 0 | } Unexecuted instantiation: void (*Simd::Sse41::GetDeconvolutionNhwcDirect2x2<(Simd::TermType)0, (SimdConvolutionActivationType)3>(unsigned long))(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void (*Simd::Sse41::GetDeconvolutionNhwcDirect2x2<(Simd::TermType)1, (SimdConvolutionActivationType)0>(unsigned long))(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void (*Simd::Sse41::GetDeconvolutionNhwcDirect2x2<(Simd::TermType)0, (SimdConvolutionActivationType)4>(unsigned long))(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void (*Simd::Sse41::GetDeconvolutionNhwcDirect2x2<(Simd::TermType)0, (SimdConvolutionActivationType)5>(unsigned long))(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void (*Simd::Sse41::GetDeconvolutionNhwcDirect2x2<(Simd::TermType)0, (SimdConvolutionActivationType)6>(unsigned long))(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void (*Simd::Sse41::GetDeconvolutionNhwcDirect2x2<(Simd::TermType)0, (SimdConvolutionActivationType)7>(unsigned long))(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void (*Simd::Sse41::GetDeconvolutionNhwcDirect2x2<(Simd::TermType)0, (SimdConvolutionActivationType)8>(unsigned long))(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void (*Simd::Sse41::GetDeconvolutionNhwcDirect2x2<(Simd::TermType)0, (SimdConvolutionActivationType)9>(unsigned long))(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) Unexecuted instantiation: void (*Simd::Sse41::GetDeconvolutionNhwcDirect2x2<(Simd::TermType)0, (SimdConvolutionActivationType)10>(unsigned long))(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, float const*, float __vector(4) const*, float __vector(4) const*, float*, int) |
189 | | |
190 | | template<TermType term, SimdConvolutionActivationType type> void DeconvolutionNhwcDirect2x2(const float * src, const DeconvParam & p, |
191 | | size_t dstC, size_t yBeg, size_t yEnd, size_t srcC, const float * weight, const float * bias, const float * params, float * dst, int first) |
192 | 0 | { |
193 | 0 | size_t body = 6, srcWb = AlignLoAny(p.srcW, body), tail = p.srcW - srcWb; |
194 | 0 | DeconvolutionNhwcDirect2x2_Ptr bodyKernel = GetDeconvolutionNhwcDirect2x2<term, type>(body); |
195 | 0 | DeconvolutionNhwcDirect2x2_Ptr tailKernel = GetDeconvolutionNhwcDirect2x2<term, type>(tail); |
196 | |
|
197 | 0 | __m128 _params[2], _bias[1]; |
198 | 0 | _params[0] = _mm_set1_ps(params[0]); |
199 | 0 | if (type == SimdConvolutionActivationRestrictRange || |
200 | 0 | type == SimdConvolutionActivationHswish || |
201 | 0 | type == SimdConvolutionActivationHardSigmoid) |
202 | 0 | _params[1] = _mm_set1_ps(params[1]); |
203 | |
|
204 | 0 | for (size_t dc = 0; dc < dstC; dc += F) |
205 | 0 | { |
206 | 0 | size_t dC = Simd::Min(F, dstC - dc); |
207 | 0 | _bias[0] = _mm_loadu_ps(bias + dc); |
208 | 0 | if (type == ::SimdConvolutionActivationPrelu) |
209 | 0 | _params[0] = _mm_loadu_ps(params + dc); |
210 | 0 | const float * s = src + yBeg * p.srcW * p.srcC; |
211 | 0 | float * d = dst + yBeg * p.strideY * p.dstW * p.dstC; |
212 | 0 | const float * w0 = weight + 0 * p.kernelX * srcC * F; |
213 | 0 | const float * w1 = weight + 1 * p.kernelX * srcC * F; |
214 | 0 | for (size_t sy = yBeg; sy < yEnd; sy += 1, s += p.srcW * p.srcC) |
215 | 0 | { |
216 | 0 | for (size_t sx = 0; sx < srcWb; sx += body) |
217 | 0 | bodyKernel(s + sx * p.srcC, p, srcC, dC, w0, _bias, _params, d, first), d += body * p.strideX * p.dstC; |
218 | 0 | if(tail) |
219 | 0 | tailKernel(s + srcWb * p.srcC, p, srcC, dC, w0, _bias, _params, d, first), d += tail * p.strideX * p.dstC; |
220 | 0 | for (size_t sx = 0; sx < srcWb; sx += body) |
221 | 0 | bodyKernel(s + sx * p.srcC, p, srcC, dC, w1, _bias, _params, d, first), d += body * p.strideX * p.dstC; |
222 | 0 | if (tail) |
223 | 0 | tailKernel(s + srcWb * p.srcC, p, srcC, dC, w1, _bias, _params, d, first), d += tail * p.strideX * p.dstC; |
224 | 0 | } |
225 | 0 | weight += p.kernelY * p.kernelX*srcC*F; |
226 | 0 | dst += F; |
227 | 0 | } |
228 | 0 | } Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(Simd::TermType)0, (SimdConvolutionActivationType)3>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, unsigned long, unsigned long, float const*, float const*, float const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(Simd::TermType)1, (SimdConvolutionActivationType)0>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, unsigned long, unsigned long, float const*, float const*, float const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(Simd::TermType)0, (SimdConvolutionActivationType)4>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, unsigned long, unsigned long, float const*, float const*, float const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(Simd::TermType)0, (SimdConvolutionActivationType)5>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, unsigned long, unsigned long, float const*, float const*, float const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(Simd::TermType)0, (SimdConvolutionActivationType)6>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, unsigned long, unsigned long, float const*, float const*, float const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(Simd::TermType)0, (SimdConvolutionActivationType)7>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, unsigned long, unsigned long, float const*, float const*, float const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(Simd::TermType)0, (SimdConvolutionActivationType)8>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, unsigned long, unsigned long, float const*, float const*, float const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(Simd::TermType)0, (SimdConvolutionActivationType)9>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, unsigned long, unsigned long, float const*, float const*, float const*, float*, int) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(Simd::TermType)0, (SimdConvolutionActivationType)10>(float const*, Simd::DeconvParam const&, unsigned long, unsigned long, unsigned long, unsigned long, float const*, float const*, float const*, float*, int) |
229 | | |
230 | | template<SimdConvolutionActivationType type> void DeconvolutionNhwcDirect2x2(const float * src, const DeconvParam & p, |
231 | | const SynetDeconvolution32fNhwcDirect2x2::AlgParam & a, const float * weight, const float * bias, const float * params, float * dst) |
232 | 0 | { |
233 | 0 | for (size_t dc = 0; dc < p.dstC; dc += a.macroD) |
234 | 0 | { |
235 | 0 | size_t macroD = Simd::Min(p.dstC, dc + a.macroD) - dc; |
236 | 0 | for (size_t sc = 0; sc < p.srcC; sc += a.macroC) |
237 | 0 | { |
238 | 0 | size_t macroC = Simd::Min(p.srcC, sc + a.macroC) - sc; |
239 | 0 | size_t macroK = p.kernelY * p.kernelX * macroC; |
240 | 0 | for (size_t yBeg = 0; yBeg < p.srcH;) |
241 | 0 | { |
242 | 0 | size_t yEnd = Simd::Min(yBeg + a.macroH, p.srcH); |
243 | 0 | if (a.macroC == p.srcC) |
244 | 0 | DeconvolutionNhwcDirect2x2<TermLast, type>(src + sc, p, macroD, yBeg, yEnd, macroC, weight, bias + dc, params, dst + dc, 1); |
245 | 0 | else if (sc == 0) |
246 | 0 | DeconvolutionNhwcDirect2x2<TermInterim, SimdConvolutionActivationIdentity>(src + sc, p, macroD, yBeg, yEnd, macroC, weight, bias + dc, params, dst + dc, 1); |
247 | 0 | else if (sc + macroC == p.srcC) |
248 | 0 | DeconvolutionNhwcDirect2x2<TermLast, type>(src + sc, p, macroD, yBeg, yEnd, macroC, weight, bias + dc, params, dst + dc, 0); |
249 | 0 | else |
250 | 0 | DeconvolutionNhwcDirect2x2<TermInterim, SimdConvolutionActivationIdentity>(src + sc, p, macroD, yBeg, yEnd, macroC, weight, bias + dc, params, dst + dc, 0); |
251 | 0 | yBeg = yEnd; |
252 | 0 | } |
253 | 0 | weight += AlignHiAny(macroD, a.microD)*macroK; |
254 | 0 | } |
255 | 0 | if (type == ::SimdConvolutionActivationPrelu) |
256 | 0 | params += macroD; |
257 | 0 | } |
258 | 0 | } Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(SimdConvolutionActivationType)3>(float const*, Simd::DeconvParam const&, Simd::Base::SynetDeconvolution32fNhwcDirect2x2::AlgParam const&, float const*, float const*, float const*, float*) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(SimdConvolutionActivationType)4>(float const*, Simd::DeconvParam const&, Simd::Base::SynetDeconvolution32fNhwcDirect2x2::AlgParam const&, float const*, float const*, float const*, float*) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(SimdConvolutionActivationType)5>(float const*, Simd::DeconvParam const&, Simd::Base::SynetDeconvolution32fNhwcDirect2x2::AlgParam const&, float const*, float const*, float const*, float*) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(SimdConvolutionActivationType)6>(float const*, Simd::DeconvParam const&, Simd::Base::SynetDeconvolution32fNhwcDirect2x2::AlgParam const&, float const*, float const*, float const*, float*) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(SimdConvolutionActivationType)7>(float const*, Simd::DeconvParam const&, Simd::Base::SynetDeconvolution32fNhwcDirect2x2::AlgParam const&, float const*, float const*, float const*, float*) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(SimdConvolutionActivationType)8>(float const*, Simd::DeconvParam const&, Simd::Base::SynetDeconvolution32fNhwcDirect2x2::AlgParam const&, float const*, float const*, float const*, float*) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(SimdConvolutionActivationType)9>(float const*, Simd::DeconvParam const&, Simd::Base::SynetDeconvolution32fNhwcDirect2x2::AlgParam const&, float const*, float const*, float const*, float*) Unexecuted instantiation: void Simd::Sse41::DeconvolutionNhwcDirect2x2<(SimdConvolutionActivationType)10>(float const*, Simd::DeconvParam const&, Simd::Base::SynetDeconvolution32fNhwcDirect2x2::AlgParam const&, float const*, float const*, float const*, float*) |
259 | | |
260 | | SynetDeconvolution32fNhwcDirect2x2::SynetDeconvolution32fNhwcDirect2x2(const DeconvParam & p) |
261 | 0 | : Base::SynetDeconvolution32fNhwcDirect2x2(p) |
262 | 0 | { |
263 | 0 | switch (p.activation) |
264 | 0 | { |
265 | 0 | case SimdConvolutionActivationIdentity: _deconvolution = DeconvolutionNhwcDirect2x2<SimdConvolutionActivationRestrictRange>; break; |
266 | 0 | case SimdConvolutionActivationRelu: _deconvolution = DeconvolutionNhwcDirect2x2<SimdConvolutionActivationRestrictRange>; break; |
267 | 0 | case SimdConvolutionActivationLeakyRelu: _deconvolution = DeconvolutionNhwcDirect2x2<SimdConvolutionActivationPrelu>; break; |
268 | 0 | case SimdConvolutionActivationRestrictRange: _deconvolution = DeconvolutionNhwcDirect2x2<SimdConvolutionActivationRestrictRange>; break; |
269 | 0 | case SimdConvolutionActivationPrelu: _deconvolution = DeconvolutionNhwcDirect2x2<SimdConvolutionActivationPrelu>; break; |
270 | 0 | case SimdConvolutionActivationElu: _deconvolution = DeconvolutionNhwcDirect2x2<SimdConvolutionActivationElu>; break; |
271 | 0 | case SimdConvolutionActivationHswish: _deconvolution = DeconvolutionNhwcDirect2x2<SimdConvolutionActivationHswish>; break; |
272 | 0 | case SimdConvolutionActivationMish: _deconvolution = DeconvolutionNhwcDirect2x2<SimdConvolutionActivationMish>; break; |
273 | 0 | case SimdConvolutionActivationHardSigmoid: _deconvolution = DeconvolutionNhwcDirect2x2<SimdConvolutionActivationHardSigmoid>; break; |
274 | 0 | case SimdConvolutionActivationSwish: _deconvolution = DeconvolutionNhwcDirect2x2<SimdConvolutionActivationSwish>; break; |
275 | 0 | case SimdConvolutionActivationGelu: _deconvolution = DeconvolutionNhwcDirect2x2<SimdConvolutionActivationGelu>; break; |
276 | 0 | default: assert(0); |
277 | 0 | } |
278 | 0 | SetAlgParam(F, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3()); |
279 | 0 | } |
280 | | |
281 | | bool SynetDeconvolution32fNhwcDirect2x2::Preferable(const DeconvParam & p) |
282 | 0 | { |
283 | 0 | return p.IsPad(0) && p.IsDilation(1) && p.IsKernel(2) && p.IsStride(2) && p.group == 1 && p.trans; |
284 | 0 | } |
285 | | |
286 | | //------------------------------------------------------------------------------------------------- |
287 | | |
288 | | void * SynetDeconvolution32fInit(size_t batch, const SimdConvolutionParameters * conv, SimdSynetCompatibilityType compatibility) |
289 | 0 | { |
290 | 0 | DeconvParam param(batch, conv, compatibility); |
291 | 0 | if (!param.Valid(SimdTensorData32f)) |
292 | 0 | return NULL; |
293 | 0 | if (SynetDeconvolution32fNhwcDirect2x2::Preferable(param)) |
294 | 0 | return new SynetDeconvolution32fNhwcDirect2x2(param); |
295 | 0 | else |
296 | 0 | return new SynetDeconvolution32fGemmNN(param); |
297 | 0 | } |
298 | | } |
299 | | #endif |
300 | | } |