Coverage Report

Created: 2025-12-31 07:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/Simd/src/Simd/SimdBaseSynetPooling.cpp
Line
Count
Source
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2025 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdArray.h"
25
#include "Simd/SimdPow.h"
26
#include "Simd/SimdSynet.h"
27
28
namespace Simd
29
{
30
#if defined(SIMD_SYNET_ENABLE)
31
    namespace Base
32
    {
33
        void SynetPoolingAverage(const float* src, size_t srcC, size_t srcH, size_t srcW, size_t kernelY, size_t kernelX,
34
            size_t strideY, size_t strideX, size_t padY, size_t padX, float* dst, size_t dstH, size_t dstW, SimdBool excludePad, SimdTensorFormatType format)
35
0
        {
36
0
            if (format == SimdTensorFormatNhwc)
37
0
            {
38
0
                for (size_t ph = 0; ph < dstH; ++ph)
39
0
                {
40
0
                    size_t hStart = ph * strideY - padY;
41
0
                    size_t hEnd = Simd::Min(hStart + kernelY, srcH);
42
0
                    hStart = Simd::Max<ptrdiff_t>(0, hStart);
43
0
                    for (size_t pw = 0; pw < dstW; ++pw)
44
0
                    {
45
0
                        size_t wStart = pw * strideX - padX;
46
0
                        size_t wEnd = Simd::Min(wStart + kernelX, srcW);
47
0
                        wStart = Simd::Max<ptrdiff_t>(0, wStart);
48
0
                        for (size_t c = 0; c < srcC; ++c)
49
0
                            dst[c] = 0.0f;
50
0
                        for (size_t h = hStart; h < hEnd; ++h)
51
0
                        {
52
0
                            for (size_t w = wStart; w < wEnd; ++w)
53
0
                            {
54
0
                                const float* pc = src + (h * srcW + w) * srcC;
55
0
                                for (size_t c = 0; c < srcC; ++c)
56
0
                                    dst[c] += pc[c];
57
0
                            }
58
0
                        }
59
0
                        if (excludePad)
60
0
                            for (size_t c = 0; c < srcC; ++c)
61
0
                                dst[c] = dst[c] / float((hEnd - hStart) * (wEnd - wStart));
62
0
                        else
63
0
                            for (size_t c = 0; c < srcC; ++c)
64
0
                                dst[c] = dst[c] / float(kernelY * kernelX);
65
0
                        dst += srcC;
66
0
                    }
67
0
                }
68
0
            }
69
0
            else if (format == SimdTensorFormatNchw)
70
0
            {
71
0
                if (kernelY == 2 && kernelX == 2 && strideY == 2 && strideX == 2 && padY == 0 && padX == 0)
72
0
                {
73
0
                    size_t dstH2 = srcH / 2, dstW2 = srcW / 2;
74
0
                    float mainA = 0.25f, edgeA = excludePad ? 0.5f : 0.25f, cornA = excludePad ? 1.0f : 0.25f;
75
0
                    for (size_t c = 0; c < srcC; ++c)
76
0
                    {
77
0
                        size_t dy = 0;
78
0
                        const float* src0 = src;
79
0
                        for (; dy < dstH2; ++dy)
80
0
                        {
81
0
                            size_t dx = 0, sx = 0;
82
0
                            const float* src1 = src0 + srcW;
83
0
                            for (; dx < dstW2; ++dx, sx += 2)
84
0
                                dst[dx] = (src0[sx] + src0[sx + 1] + src1[sx] + src1[sx + 1]) * mainA;
85
0
                            if (dx < dstW)
86
0
                                dst[dx] = (src0[sx] + src1[sx]) * edgeA;
87
0
                            src0 += srcW * 2;
88
0
                            dst += dstW;
89
0
                        }
90
0
                        for (; dy < dstH; ++dy)
91
0
                        {
92
0
                            size_t dx = 0, sx = 0;
93
0
                            for (; dx < dstW2; ++dx, sx += 2)
94
0
                                dst[dx] = (src0[sx] + src0[sx + 1]) * edgeA;
95
0
                            if (dx < dstW)
96
0
                                dst[dx] = src0[sx] * cornA;
97
0
                            src0 += srcW;
98
0
                            dst += dstW;
99
0
                        }
100
0
                        src += srcW * srcH;
101
0
                    }
102
0
                }
103
0
                else
104
0
                {
105
0
                    for (size_t c = 0; c < srcC; ++c)
106
0
                    {
107
0
                        for (size_t ph = 0; ph < dstH; ++ph)
108
0
                        {
109
0
                            size_t hStart = ph * strideY - padY;
110
0
                            size_t hEnd = Simd::Min(hStart + kernelY, srcH);
111
0
                            hStart = Simd::Max<ptrdiff_t>(0, hStart);
112
0
                            for (size_t pw = 0; pw < dstW; ++pw)
113
0
                            {
114
0
                                size_t wStart = pw * strideX - padX;
115
0
                                size_t wEnd = Simd::Min(wStart + kernelX, srcW);
116
0
                                wStart = Simd::Max<ptrdiff_t>(0, wStart);
117
0
                                float sum = 0.0f;
118
0
                                for (size_t h = hStart; h < hEnd; ++h)
119
0
                                    for (size_t w = wStart; w < wEnd; ++w)
120
0
                                        sum += src[h * srcW + w];
121
0
                                if (excludePad)
122
0
                                    dst[ph * dstW + pw] = sum / float((hEnd - hStart) * (wEnd - wStart));
123
0
                                else
124
0
                                    dst[ph * dstW + pw] = sum / float(kernelY * kernelX);
125
0
                            }
126
0
                        }
127
0
                        src += srcW * srcH;
128
0
                        dst += dstW * dstH;
129
0
                    }
130
0
                }
131
0
            }
132
0
            else
133
0
                assert(0);
134
0
        }
135
136
        //---------------------------------------------------------------------
137
138
        template<class T> void SynetPoolingMax2D(const T* src, size_t srcC, size_t srcH, size_t srcW, size_t kernelY, size_t kernelX,
139
            size_t strideY, size_t strideX, size_t padY, size_t padX, T* dst, size_t dstH, size_t dstW, SimdTensorFormatType format)
140
0
        {
141
0
            if (format == SimdTensorFormatNhwc)
142
0
            {
143
0
                for (size_t dh = 0; dh < dstH; ++dh)
144
0
                {
145
0
                    size_t hBeg = dh * strideY - padY;
146
0
                    size_t hEnd = Simd::Min(hBeg + kernelY, srcH);
147
0
                    hBeg = Simd::Max<ptrdiff_t>(0, hBeg);
148
0
                    for (size_t dw = 0; dw < dstW; ++dw)
149
0
                    {
150
0
                        size_t wBeg = dw * strideX - padX;
151
0
                        size_t wEnd = Simd::Min(wBeg + kernelX, srcW);
152
0
                        wBeg = Simd::Max<ptrdiff_t>(0, wBeg);
153
0
                        for (size_t c = 0; c < srcC; ++c)
154
0
                            dst[c] = std::numeric_limits<T>::lowest();
155
0
                        for (size_t sh = hBeg; sh < hEnd; ++sh)
156
0
                        {
157
0
                            for (size_t sw = wBeg; sw < wEnd; ++sw)
158
0
                            {
159
0
                                const T * ps = src + (sh * srcW + sw) * srcC;
160
0
                                for (size_t c = 0; c < srcC; ++c)
161
0
                                    dst[c] = Simd::Max(dst[c], ps[c]);
162
0
                            }
163
0
                        }
164
0
                        dst += srcC;
165
0
                    }
166
0
                }
167
0
            }
168
0
            else if (format == SimdTensorFormatNchw)
169
0
            {
170
0
                for (size_t c = 0; c < srcC; ++c)
171
0
                {
172
0
                    for (size_t dh = 0; dh < dstH; ++dh)
173
0
                    {
174
0
                        size_t hBeg = dh * strideY - padY;
175
0
                        size_t hEnd = Simd::Min(hBeg + kernelY, srcH);
176
0
                        hBeg = Simd::Max<ptrdiff_t>(0, hBeg);
177
0
                        for (size_t dw = 0; dw < dstW; ++dw)
178
0
                        {
179
0
                            size_t wBeg = dw * strideX - padX;
180
0
                            size_t wEnd = Simd::Min(wBeg + kernelX, srcW);
181
0
                            wBeg = Simd::Max<ptrdiff_t>(0, wBeg);
182
0
                            T max = std::numeric_limits<T>::lowest();;
183
0
                            for (size_t sh = hBeg; sh < hEnd; ++sh)
184
0
                                for (size_t sw = wBeg; sw < wEnd; ++sw)
185
0
                                    max = Simd::Max(max, src[sh * srcW + sw]);
186
0
                            dst[dh * dstW + dw] = max;
187
0
                        }
188
0
                    }
189
0
                    src += srcW * srcH;
190
0
                    dst += dstW * dstH;
191
0
                }
192
0
            }
193
0
            else
194
0
                assert(0);
195
0
        }
Unexecuted instantiation: void Simd::Base::SynetPoolingMax2D<float>(float const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, float*, unsigned long, unsigned long, SimdTensorFormatType)
Unexecuted instantiation: void Simd::Base::SynetPoolingMax2D<unsigned char>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long, unsigned long, SimdTensorFormatType)
196
197
        template <class T> void SynetPoolingMax3D(const T* src, size_t srcC, size_t srcH, size_t srcW, 
198
            size_t kernelC, size_t kernelY, size_t kernelX, size_t strideC, size_t strideY, size_t strideX, 
199
            size_t padC, size_t padY, size_t padX, T* dst, size_t dstC, size_t dstH, size_t dstW, SimdTensorFormatType format)
200
0
        {
201
0
            if (format == SimdTensorFormatNhwc)
202
0
            {
203
0
                for (size_t dh = 0; dh < dstH; ++dh)
204
0
                {
205
0
                    size_t hBeg = dh * strideY - padY;
206
0
                    size_t hEnd = Simd::Min(hBeg + kernelY, srcH);
207
0
                    hBeg = Simd::Max<ptrdiff_t>(0, hBeg);
208
0
                    for (size_t dw = 0; dw < dstW; ++dw)
209
0
                    {
210
0
                        size_t wBeg = dw * strideX - padX;
211
0
                        size_t wEnd = Simd::Min(wBeg + kernelX, srcW);
212
0
                        wBeg = Simd::Max<ptrdiff_t>(0, wBeg);
213
0
                        for (size_t dc = 0; dc < dstC; ++dc)
214
0
                        {
215
0
                            size_t cBeg = dc * strideC - padC;
216
0
                            size_t cEnd = Simd::Min(cBeg + kernelC, srcC);
217
0
                            cBeg = Simd::Max<ptrdiff_t>(0, cBeg);
218
0
                            T max = std::numeric_limits<T>::lowest();
219
0
                            for (size_t sh = hBeg; sh < hEnd; ++sh)
220
0
                            {
221
0
                                for (size_t sw = wBeg; sw < wEnd; ++sw)
222
0
                                {
223
0
                                    const T* ps = src + (sh * srcW + sw) * srcC;
224
0
                                    for (size_t c = cBeg; c < cEnd; ++c)
225
0
                                        max = Simd::Max(max, ps[c]);
226
0
                                }
227
0
                            }
228
0
                            dst[(dh * dstW + dw) * dstC + dc] = max;
229
0
                        }
230
0
                    }
231
0
                }
232
0
            }
233
0
            else if (format == SimdTensorFormatNchw)
234
0
            {
235
0
                for (size_t dc = 0; dc < dstC; ++dc)
236
0
                {
237
0
                    size_t cBeg = dc * strideC - padC;
238
0
                    size_t cEnd = Simd::Min(cBeg + kernelC, srcC);
239
0
                    cBeg = Simd::Max<ptrdiff_t>(0, cBeg);
240
0
                    for (size_t dh = 0; dh < dstH; ++dh)
241
0
                    {
242
0
                        size_t hBeg = dh * strideY - padY;
243
0
                        size_t hEnd = Simd::Min(hBeg + kernelY, srcH);
244
0
                        hBeg = Simd::Max<ptrdiff_t>(0, hBeg);
245
0
                        for (size_t dw = 0; dw < dstW; ++dw)
246
0
                        {
247
0
                            size_t wBeg = dw * strideX - padX;
248
0
                            size_t wEnd = Simd::Min(wBeg + kernelX, srcW);
249
0
                            wBeg = Simd::Max<ptrdiff_t>(0, wBeg);
250
0
                            T max = std::numeric_limits<T>::lowest();
251
0
                            for (size_t sc = cBeg; sc < cEnd; ++sc)
252
0
                                for (size_t sh = hBeg; sh < hEnd; ++sh)
253
0
                                    for (size_t sw = wBeg; sw < wEnd; ++sw)
254
0
                                        max = Simd::Max(max, src[(sc * srcH + sh) * srcW + sw]);
255
0
                            dst[(dc * dstH + dh) * dstW + dw] = max;
256
0
                        }
257
0
                    }
258
0
                }
259
0
            }
260
0
            else
261
0
                assert(0);
262
0
        }
263
264
        void SynetPoolingMax32f(const float* src, size_t srcC, size_t srcH, size_t srcW,
265
            size_t kernelC, size_t kernelY, size_t kernelX, size_t strideC, size_t strideY, size_t strideX,
266
            size_t padC, size_t padY, size_t padX, float* dst, size_t dstC, size_t dstH, size_t dstW, SimdTensorFormatType format)
267
0
        {
268
0
            if(kernelC == 1 && strideC == 1 && padC == 0 && srcC == dstC)
269
0
                SynetPoolingMax2D(src, srcC, srcH, srcW, kernelY, kernelX, 
270
0
                    strideY, strideX, padY, padX, dst, dstH, dstW, format);
271
0
            else
272
0
                SynetPoolingMax3D(src, srcC, srcH, srcW, kernelC, kernelY, kernelX, 
273
0
                    strideC, strideY, strideX, padC, padY, padX, dst, dstC, dstH, dstW, format);
274
0
        }
275
276
        void SynetPoolingMax8u(const uint8_t* src, size_t srcC, size_t srcH, size_t srcW, size_t kernelY, size_t kernelX,
277
            size_t strideY, size_t strideX, size_t padY, size_t padX, uint8_t* dst, size_t dstH, size_t dstW, SimdTensorFormatType format)
278
0
        {
279
0
            SynetPoolingMax2D(src, srcC, srcH, srcW, kernelY, kernelX, strideY, strideX, padY, padX, dst, dstH, dstW, format);
280
0
        }
281
    }
282
#endif
283
}