/src/Simd/src/Simd/SimdSse41SynetMergedConvolution32f.cpp
Line | Count | Source |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2024 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #include "Simd/SimdSynetMergedConvolution32f.h" |
25 | | #include "Simd/SimdSynetConvolution32fCommon.h" |
26 | | #include "Simd/SimdUpdate.h" |
27 | | #include "Simd/SimdCpu.h" |
28 | | |
29 | | namespace Simd |
30 | | { |
31 | | #if defined(SIMD_SSE41_ENABLE) && defined(SIMD_SYNET_ENABLE) |
32 | | namespace Sse41 |
33 | | { |
34 | | SynetMergedConvolution32fCdc::SynetMergedConvolution32fCdc(const MergConvParam& p) |
35 | 0 | : Base::SynetMergedConvolution32fCdc(p) |
36 | 0 | { |
37 | 0 | SetSize(Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F); |
38 | 0 | SetInput(p.conv[0], _convolution + 0); |
39 | 0 | SetDepthwise(p.conv[1], false, _convolution + 1); |
40 | 0 | SetOutput(p.conv[2], _convolution + 2); |
41 | 0 | } |
42 | | |
43 | | //------------------------------------------------------------------------------------------------- |
44 | | |
45 | | SynetMergedConvolution32fCd::SynetMergedConvolution32fCd(const MergConvParam& p) |
46 | 0 | : Base::SynetMergedConvolution32fCd(p) |
47 | 0 | { |
48 | 0 | SetSize(Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F); |
49 | 0 | SetInput(_param.conv[0], _convolution + 0); |
50 | 0 | SetDepthwise(_param.conv[1], true, _convolution + 1); |
51 | 0 | } |
52 | | |
53 | | //------------------------------------------------------------------------------------------------- |
54 | | |
55 | | SynetMergedConvolution32fDc::SynetMergedConvolution32fDc(const MergConvParam& p) |
56 | 0 | : Base::SynetMergedConvolution32fDc(p) |
57 | 0 | { |
58 | 0 | SetSize(Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F); |
59 | 0 | SetDepthwise(p.conv[0], false, _convolution + 0); |
60 | 0 | SetOutput(p.conv[1], _convolution + 1); |
61 | 0 | } |
62 | | |
63 | | //------------------------------------------------------------------------------------------------- |
64 | | |
65 | | void* SynetMergedConvolution32fInit(size_t batch, const SimdConvolutionParameters* convs, size_t count, SimdBool add) |
66 | 0 | { |
67 | 0 | MergConvParam param(batch, convs, count, add, SimdSynetCompatibilityDefault); |
68 | 0 | if (!param.Valid(SimdTensorData32f)) |
69 | 0 | return NULL; |
70 | 0 | if (SynetMergedConvolution32fCdc::Preferable(param)) |
71 | 0 | return new Sse41::SynetMergedConvolution32fCdc(param); |
72 | 0 | else if (SynetMergedConvolution32fCd::Preferable(param)) |
73 | 0 | return new Sse41::SynetMergedConvolution32fCd(param); |
74 | 0 | else if (SynetMergedConvolution32fDc::Preferable(param)) |
75 | 0 | return new Sse41::SynetMergedConvolution32fDc(param); |
76 | 0 | else |
77 | 0 | return new Base::SynetMergedConvolution32f(param); |
78 | 0 | } |
79 | | } |
80 | | #endif |
81 | | } |