Coverage Report

Created: 2026-04-09 07:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/Simd/src/Simd/SimdBaseSynetConvolution32fNhwcGrouped.cpp
Line
Count
Source
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2024 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdSynetConvolution32f.h"
25
#include "Simd/SimdSynetConvolution32fCommon.h"
26
#include "Simd/SimdSynet.h"
27
#include "Simd/SimdBase.h"
28
#include "Simd/SimdCpu.h"
29
30
namespace Simd
31
{
32
#if defined(SIMD_SYNET_ENABLE)
33
    namespace Base
34
    {
35
        static void ConvolutionNhwcGroupedBlock1x2(const float* src, const ConvParam& p, const float* weight, const float* bias, const float* params, float* dst)
36
0
        {
37
0
            size_t dW = p.kernelY * p.kernelX * p.srcC, srcC = p.srcC;
38
0
            for (size_t dy = 0; dy < p.dstH; ++dy)
39
0
            {
40
0
                for (size_t dx = 0; dx < p.dstW; ++dx)
41
0
                {
42
0
                    memset(dst, 0, p.dstC * sizeof(float));
43
0
                    for (size_t ky = 0; ky < p.kernelY; ++ky)
44
0
                    {
45
0
                        size_t sy = dy * p.strideY + ky * p.dilationY - p.padY;
46
0
                        if (sy < p.srcH)
47
0
                        {
48
0
                            for (size_t kx = 0; kx < p.kernelX; ++kx)
49
0
                            {
50
0
                                size_t sx = dx * p.strideX + kx * p.dilationX - p.padX;
51
0
                                if (sx < p.srcW)
52
0
                                {
53
0
                                    const float* pw0 = weight + (ky * p.kernelX + kx) * srcC, *pw1 = pw0 + dW;
54
0
                                    const float* ps = src + (sy * p.srcW + sx) * p.srcC;
55
0
                                    float* pd = dst;
56
0
                                    for (size_t c = 0; c < srcC; ++c, pd += 2)
57
0
                                    {
58
0
                                        pd[0] += ps[c] * pw0[c];
59
0
                                        pd[1] += ps[c] * pw1[c];
60
0
                                    }
61
0
                                }
62
0
                            }
63
0
                        }
64
0
                    }
65
0
                    ConvolutionBiasAndActivation(bias, p.dstC, 1, p.activation, params, ::SimdTrue, dst);
66
0
                    dst += p.dstC;
67
0
                }
68
0
            }
69
0
        }
70
71
        SynetConvolution32fNhwcGroupedBlock1x2::SynetConvolution32fNhwcGroupedBlock1x2(const ConvParam& p)
72
0
            : SynetConvolution32f(p)
73
0
        {
74
0
            _batch = p.batch;
75
0
            _sizeS = p.srcC * p.srcH * p.srcW;
76
0
            _sizeD = p.dstC * p.dstH * p.dstW;
77
0
            _convolution = ConvolutionNhwcGroupedBlock1x2;
78
0
        }
79
80
        void SynetConvolution32fNhwcGroupedBlock1x2::SetParams(const float* weight, SimdBool* internal, const float* bias, const float* params)
81
0
        {
82
0
            SynetConvolution32f::SetParams(weight, internal, bias, params);
83
0
            const ConvParam& p = _param;
84
0
            size_t size = p.kernelY * p.kernelX * p.srcC;
85
0
            _rWeight.Resize(size * 2);
86
0
            const float* src = _weight;
87
0
            float* dst0 = _rWeight.data, *dst1 = dst0 + size;
88
0
            for (size_t i = 0; i < size; ++i)
89
0
            {
90
0
                dst0[i] = src[0];
91
0
                dst1[i] = src[1];
92
0
                src += 2;
93
0
            }
94
0
            _weight = _rWeight.data;
95
0
            if (_bias == NULL)
96
0
            {
97
0
                _rBias.Resize(p.dstC, true);
98
0
                _bias = _rBias.data;
99
0
            }
100
0
        }
101
102
        void SynetConvolution32fNhwcGroupedBlock1x2::Forward(const float* src, float* buf, float* dst)
103
0
        {
104
0
            for (size_t b = 0; b < _batch; ++b)
105
0
            {
106
0
                _convolution(src, _param, _weight, _bias, _params, dst);
107
0
                src += _sizeS;
108
0
                dst += _sizeD;
109
0
            }
110
0
        }
111
112
        bool SynetConvolution32fNhwcGroupedBlock1x2::Preferable(const ConvParam& p)
113
0
        {
114
0
            if (p.trans == 0 || p.group == 1 || p.IsDepthwise())
115
0
                return false;
116
0
            return p.group == p.srcC && p.dstC == 2 * p.srcC;
117
0
        }
118
    }
119
#endif
120
}