/src/Simd/src/Simd/SimdBaseSynetPooling.cpp
Line | Count | Source |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2025 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #include "Simd/SimdArray.h" |
25 | | #include "Simd/SimdPow.h" |
26 | | #include "Simd/SimdSynet.h" |
27 | | |
28 | | namespace Simd |
29 | | { |
30 | | #if defined(SIMD_SYNET_ENABLE) |
31 | | namespace Base |
32 | | { |
33 | | void SynetPoolingAverage(const float* src, size_t srcC, size_t srcH, size_t srcW, size_t kernelY, size_t kernelX, |
34 | | size_t strideY, size_t strideX, size_t padY, size_t padX, float* dst, size_t dstH, size_t dstW, SimdBool excludePad, SimdTensorFormatType format) |
35 | 0 | { |
36 | 0 | if (format == SimdTensorFormatNhwc) |
37 | 0 | { |
38 | 0 | for (size_t ph = 0; ph < dstH; ++ph) |
39 | 0 | { |
40 | 0 | size_t hStart = ph * strideY - padY; |
41 | 0 | size_t hEnd = Simd::Min(hStart + kernelY, srcH); |
42 | 0 | hStart = Simd::Max<ptrdiff_t>(0, hStart); |
43 | 0 | for (size_t pw = 0; pw < dstW; ++pw) |
44 | 0 | { |
45 | 0 | size_t wStart = pw * strideX - padX; |
46 | 0 | size_t wEnd = Simd::Min(wStart + kernelX, srcW); |
47 | 0 | wStart = Simd::Max<ptrdiff_t>(0, wStart); |
48 | 0 | for (size_t c = 0; c < srcC; ++c) |
49 | 0 | dst[c] = 0.0f; |
50 | 0 | for (size_t h = hStart; h < hEnd; ++h) |
51 | 0 | { |
52 | 0 | for (size_t w = wStart; w < wEnd; ++w) |
53 | 0 | { |
54 | 0 | const float* pc = src + (h * srcW + w) * srcC; |
55 | 0 | for (size_t c = 0; c < srcC; ++c) |
56 | 0 | dst[c] += pc[c]; |
57 | 0 | } |
58 | 0 | } |
59 | 0 | if (excludePad) |
60 | 0 | for (size_t c = 0; c < srcC; ++c) |
61 | 0 | dst[c] = dst[c] / float((hEnd - hStart) * (wEnd - wStart)); |
62 | 0 | else |
63 | 0 | for (size_t c = 0; c < srcC; ++c) |
64 | 0 | dst[c] = dst[c] / float(kernelY * kernelX); |
65 | 0 | dst += srcC; |
66 | 0 | } |
67 | 0 | } |
68 | 0 | } |
69 | 0 | else if (format == SimdTensorFormatNchw) |
70 | 0 | { |
71 | 0 | if (kernelY == 2 && kernelX == 2 && strideY == 2 && strideX == 2 && padY == 0 && padX == 0) |
72 | 0 | { |
73 | 0 | size_t dstH2 = srcH / 2, dstW2 = srcW / 2; |
74 | 0 | float mainA = 0.25f, edgeA = excludePad ? 0.5f : 0.25f, cornA = excludePad ? 1.0f : 0.25f; |
75 | 0 | for (size_t c = 0; c < srcC; ++c) |
76 | 0 | { |
77 | 0 | size_t dy = 0; |
78 | 0 | const float* src0 = src; |
79 | 0 | for (; dy < dstH2; ++dy) |
80 | 0 | { |
81 | 0 | size_t dx = 0, sx = 0; |
82 | 0 | const float* src1 = src0 + srcW; |
83 | 0 | for (; dx < dstW2; ++dx, sx += 2) |
84 | 0 | dst[dx] = (src0[sx] + src0[sx + 1] + src1[sx] + src1[sx + 1]) * mainA; |
85 | 0 | if (dx < dstW) |
86 | 0 | dst[dx] = (src0[sx] + src1[sx]) * edgeA; |
87 | 0 | src0 += srcW * 2; |
88 | 0 | dst += dstW; |
89 | 0 | } |
90 | 0 | for (; dy < dstH; ++dy) |
91 | 0 | { |
92 | 0 | size_t dx = 0, sx = 0; |
93 | 0 | for (; dx < dstW2; ++dx, sx += 2) |
94 | 0 | dst[dx] = (src0[sx] + src0[sx + 1]) * edgeA; |
95 | 0 | if (dx < dstW) |
96 | 0 | dst[dx] = src0[sx] * cornA; |
97 | 0 | src0 += srcW; |
98 | 0 | dst += dstW; |
99 | 0 | } |
100 | 0 | src += srcW * srcH; |
101 | 0 | } |
102 | 0 | } |
103 | 0 | else |
104 | 0 | { |
105 | 0 | for (size_t c = 0; c < srcC; ++c) |
106 | 0 | { |
107 | 0 | for (size_t ph = 0; ph < dstH; ++ph) |
108 | 0 | { |
109 | 0 | size_t hStart = ph * strideY - padY; |
110 | 0 | size_t hEnd = Simd::Min(hStart + kernelY, srcH); |
111 | 0 | hStart = Simd::Max<ptrdiff_t>(0, hStart); |
112 | 0 | for (size_t pw = 0; pw < dstW; ++pw) |
113 | 0 | { |
114 | 0 | size_t wStart = pw * strideX - padX; |
115 | 0 | size_t wEnd = Simd::Min(wStart + kernelX, srcW); |
116 | 0 | wStart = Simd::Max<ptrdiff_t>(0, wStart); |
117 | 0 | float sum = 0.0f; |
118 | 0 | for (size_t h = hStart; h < hEnd; ++h) |
119 | 0 | for (size_t w = wStart; w < wEnd; ++w) |
120 | 0 | sum += src[h * srcW + w]; |
121 | 0 | if (excludePad) |
122 | 0 | dst[ph * dstW + pw] = sum / float((hEnd - hStart) * (wEnd - wStart)); |
123 | 0 | else |
124 | 0 | dst[ph * dstW + pw] = sum / float(kernelY * kernelX); |
125 | 0 | } |
126 | 0 | } |
127 | 0 | src += srcW * srcH; |
128 | 0 | dst += dstW * dstH; |
129 | 0 | } |
130 | 0 | } |
131 | 0 | } |
132 | 0 | else |
133 | 0 | assert(0); |
134 | 0 | } |
135 | | |
136 | | //--------------------------------------------------------------------- |
137 | | |
138 | | template<class T> void SynetPoolingMax2D(const T* src, size_t srcC, size_t srcH, size_t srcW, size_t kernelY, size_t kernelX, |
139 | | size_t strideY, size_t strideX, size_t padY, size_t padX, T* dst, size_t dstH, size_t dstW, SimdTensorFormatType format) |
140 | 0 | { |
141 | 0 | if (format == SimdTensorFormatNhwc) |
142 | 0 | { |
143 | 0 | for (size_t dh = 0; dh < dstH; ++dh) |
144 | 0 | { |
145 | 0 | size_t hBeg = dh * strideY - padY; |
146 | 0 | size_t hEnd = Simd::Min(hBeg + kernelY, srcH); |
147 | 0 | hBeg = Simd::Max<ptrdiff_t>(0, hBeg); |
148 | 0 | for (size_t dw = 0; dw < dstW; ++dw) |
149 | 0 | { |
150 | 0 | size_t wBeg = dw * strideX - padX; |
151 | 0 | size_t wEnd = Simd::Min(wBeg + kernelX, srcW); |
152 | 0 | wBeg = Simd::Max<ptrdiff_t>(0, wBeg); |
153 | 0 | for (size_t c = 0; c < srcC; ++c) |
154 | 0 | dst[c] = std::numeric_limits<T>::lowest(); |
155 | 0 | for (size_t sh = hBeg; sh < hEnd; ++sh) |
156 | 0 | { |
157 | 0 | for (size_t sw = wBeg; sw < wEnd; ++sw) |
158 | 0 | { |
159 | 0 | const T * ps = src + (sh * srcW + sw) * srcC; |
160 | 0 | for (size_t c = 0; c < srcC; ++c) |
161 | 0 | dst[c] = Simd::Max(dst[c], ps[c]); |
162 | 0 | } |
163 | 0 | } |
164 | 0 | dst += srcC; |
165 | 0 | } |
166 | 0 | } |
167 | 0 | } |
168 | 0 | else if (format == SimdTensorFormatNchw) |
169 | 0 | { |
170 | 0 | for (size_t c = 0; c < srcC; ++c) |
171 | 0 | { |
172 | 0 | for (size_t dh = 0; dh < dstH; ++dh) |
173 | 0 | { |
174 | 0 | size_t hBeg = dh * strideY - padY; |
175 | 0 | size_t hEnd = Simd::Min(hBeg + kernelY, srcH); |
176 | 0 | hBeg = Simd::Max<ptrdiff_t>(0, hBeg); |
177 | 0 | for (size_t dw = 0; dw < dstW; ++dw) |
178 | 0 | { |
179 | 0 | size_t wBeg = dw * strideX - padX; |
180 | 0 | size_t wEnd = Simd::Min(wBeg + kernelX, srcW); |
181 | 0 | wBeg = Simd::Max<ptrdiff_t>(0, wBeg); |
182 | 0 | T max = std::numeric_limits<T>::lowest();; |
183 | 0 | for (size_t sh = hBeg; sh < hEnd; ++sh) |
184 | 0 | for (size_t sw = wBeg; sw < wEnd; ++sw) |
185 | 0 | max = Simd::Max(max, src[sh * srcW + sw]); |
186 | 0 | dst[dh * dstW + dw] = max; |
187 | 0 | } |
188 | 0 | } |
189 | 0 | src += srcW * srcH; |
190 | 0 | dst += dstW * dstH; |
191 | 0 | } |
192 | 0 | } |
193 | 0 | else |
194 | 0 | assert(0); |
195 | 0 | } Unexecuted instantiation: void Simd::Base::SynetPoolingMax2D<float>(float const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, float*, unsigned long, unsigned long, SimdTensorFormatType) Unexecuted instantiation: void Simd::Base::SynetPoolingMax2D<unsigned char>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long, unsigned long, SimdTensorFormatType) |
196 | | |
197 | | template <class T> void SynetPoolingMax3D(const T* src, size_t srcC, size_t srcH, size_t srcW, |
198 | | size_t kernelC, size_t kernelY, size_t kernelX, size_t strideC, size_t strideY, size_t strideX, |
199 | | size_t padC, size_t padY, size_t padX, T* dst, size_t dstC, size_t dstH, size_t dstW, SimdTensorFormatType format) |
200 | 0 | { |
201 | 0 | if (format == SimdTensorFormatNhwc) |
202 | 0 | { |
203 | 0 | for (size_t dh = 0; dh < dstH; ++dh) |
204 | 0 | { |
205 | 0 | size_t hBeg = dh * strideY - padY; |
206 | 0 | size_t hEnd = Simd::Min(hBeg + kernelY, srcH); |
207 | 0 | hBeg = Simd::Max<ptrdiff_t>(0, hBeg); |
208 | 0 | for (size_t dw = 0; dw < dstW; ++dw) |
209 | 0 | { |
210 | 0 | size_t wBeg = dw * strideX - padX; |
211 | 0 | size_t wEnd = Simd::Min(wBeg + kernelX, srcW); |
212 | 0 | wBeg = Simd::Max<ptrdiff_t>(0, wBeg); |
213 | 0 | for (size_t dc = 0; dc < dstC; ++dc) |
214 | 0 | { |
215 | 0 | size_t cBeg = dc * strideC - padC; |
216 | 0 | size_t cEnd = Simd::Min(cBeg + kernelC, srcC); |
217 | 0 | cBeg = Simd::Max<ptrdiff_t>(0, cBeg); |
218 | 0 | T max = std::numeric_limits<T>::lowest(); |
219 | 0 | for (size_t sh = hBeg; sh < hEnd; ++sh) |
220 | 0 | { |
221 | 0 | for (size_t sw = wBeg; sw < wEnd; ++sw) |
222 | 0 | { |
223 | 0 | const T* ps = src + (sh * srcW + sw) * srcC; |
224 | 0 | for (size_t c = cBeg; c < cEnd; ++c) |
225 | 0 | max = Simd::Max(max, ps[c]); |
226 | 0 | } |
227 | 0 | } |
228 | 0 | dst[(dh * dstW + dw) * dstC + dc] = max; |
229 | 0 | } |
230 | 0 | } |
231 | 0 | } |
232 | 0 | } |
233 | 0 | else if (format == SimdTensorFormatNchw) |
234 | 0 | { |
235 | 0 | for (size_t dc = 0; dc < dstC; ++dc) |
236 | 0 | { |
237 | 0 | size_t cBeg = dc * strideC - padC; |
238 | 0 | size_t cEnd = Simd::Min(cBeg + kernelC, srcC); |
239 | 0 | cBeg = Simd::Max<ptrdiff_t>(0, cBeg); |
240 | 0 | for (size_t dh = 0; dh < dstH; ++dh) |
241 | 0 | { |
242 | 0 | size_t hBeg = dh * strideY - padY; |
243 | 0 | size_t hEnd = Simd::Min(hBeg + kernelY, srcH); |
244 | 0 | hBeg = Simd::Max<ptrdiff_t>(0, hBeg); |
245 | 0 | for (size_t dw = 0; dw < dstW; ++dw) |
246 | 0 | { |
247 | 0 | size_t wBeg = dw * strideX - padX; |
248 | 0 | size_t wEnd = Simd::Min(wBeg + kernelX, srcW); |
249 | 0 | wBeg = Simd::Max<ptrdiff_t>(0, wBeg); |
250 | 0 | T max = std::numeric_limits<T>::lowest(); |
251 | 0 | for (size_t sc = cBeg; sc < cEnd; ++sc) |
252 | 0 | for (size_t sh = hBeg; sh < hEnd; ++sh) |
253 | 0 | for (size_t sw = wBeg; sw < wEnd; ++sw) |
254 | 0 | max = Simd::Max(max, src[(sc * srcH + sh) * srcW + sw]); |
255 | 0 | dst[(dc * dstH + dh) * dstW + dw] = max; |
256 | 0 | } |
257 | 0 | } |
258 | 0 | } |
259 | 0 | } |
260 | 0 | else |
261 | 0 | assert(0); |
262 | 0 | } |
263 | | |
264 | | void SynetPoolingMax32f(const float* src, size_t srcC, size_t srcH, size_t srcW, |
265 | | size_t kernelC, size_t kernelY, size_t kernelX, size_t strideC, size_t strideY, size_t strideX, |
266 | | size_t padC, size_t padY, size_t padX, float* dst, size_t dstC, size_t dstH, size_t dstW, SimdTensorFormatType format) |
267 | 0 | { |
268 | 0 | if(kernelC == 1 && strideC == 1 && padC == 0 && srcC == dstC) |
269 | 0 | SynetPoolingMax2D(src, srcC, srcH, srcW, kernelY, kernelX, |
270 | 0 | strideY, strideX, padY, padX, dst, dstH, dstW, format); |
271 | 0 | else |
272 | 0 | SynetPoolingMax3D(src, srcC, srcH, srcW, kernelC, kernelY, kernelX, |
273 | 0 | strideC, strideY, strideX, padC, padY, padX, dst, dstC, dstH, dstW, format); |
274 | 0 | } |
275 | | |
276 | | void SynetPoolingMax8u(const uint8_t* src, size_t srcC, size_t srcH, size_t srcW, size_t kernelY, size_t kernelX, |
277 | | size_t strideY, size_t strideX, size_t padY, size_t padX, uint8_t* dst, size_t dstH, size_t dstW, SimdTensorFormatType format) |
278 | 0 | { |
279 | 0 | SynetPoolingMax2D(src, srcC, srcH, srcW, kernelY, kernelX, strideY, strideX, padY, padX, dst, dstH, dstW, format); |
280 | 0 | } |
281 | | } |
282 | | #endif |
283 | | } |