/src/Simd/src/Simd/SimdBaseHog.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  | * Simd Library (http://ermig1979.github.io/Simd).  | 
3  |  | *  | 
4  |  | * Copyright (c) 2011-2019 Yermalayeu Ihar.  | 
5  |  | *  | 
6  |  | * Permission is hereby granted, free of charge, to any person obtaining a copy  | 
7  |  | * of this software and associated documentation files (the "Software"), to deal  | 
8  |  | * in the Software without restriction, including without limitation the rights  | 
9  |  | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell  | 
10  |  | * copies of the Software, and to permit persons to whom the Software is  | 
11  |  | * furnished to do so, subject to the following conditions:  | 
12  |  | *  | 
13  |  | * The above copyright notice and this permission notice shall be included in  | 
14  |  | * all copies or substantial portions of the Software.  | 
15  |  | *  | 
16  |  | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  | 
17  |  | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  | 
18  |  | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  | 
19  |  | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  | 
20  |  | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  | 
21  |  | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  | 
22  |  | * SOFTWARE.  | 
23  |  | */  | 
24  |  | #include "Simd/SimdArray.h"  | 
25  |  |  | 
26  |  | namespace Simd  | 
27  |  | { | 
28  |  |     namespace Base  | 
29  |  |     { | 
30  |  |         namespace  | 
31  |  |         { | 
32  |  |             struct Buffer  | 
33  |  |             { | 
34  |  |                 const int size;  | 
35  |  |                 float * cos, *sin;  | 
36  |  |                 int * index;  | 
37  |  |                 float * value;  | 
38  |  |  | 
39  |  |                 Buffer(size_t width, size_t quantization)  | 
40  |  |                     : size((int)quantization / 2)  | 
41  | 0  |                 { | 
42  | 0  |                     _p = Allocate(width*(sizeof(int) + sizeof(float)) + sizeof(float) * 2 * size);  | 
43  | 0  |                     index = (int*)_p;  | 
44  | 0  |                     value = (float*)index + width;  | 
45  | 0  |                     cos = value + width;  | 
46  | 0  |                     sin = cos + size;  | 
47  | 0  |                     for (int i = 0; i < size; ++i)  | 
48  | 0  |                     { | 
49  | 0  |                         cos[i] = (float)::cos(i*M_PI / size);  | 
50  | 0  |                         sin[i] = (float)::sin(i*M_PI / size);  | 
51  | 0  |                     }  | 
52  | 0  |                 }  | 
53  |  |  | 
54  |  |                 ~Buffer()  | 
55  | 0  |                 { | 
56  | 0  |                     Free(_p);  | 
57  | 0  |                 }  | 
58  |  |  | 
59  |  |             private:  | 
60  |  |                 void *_p;  | 
61  |  |             };  | 
62  |  |         }  | 
63  |  |  | 
64  |  |         void AddRowToHistograms(int * indexes, float * values, size_t row, size_t width, size_t height, size_t cellX, size_t cellY, size_t quantization, float * histograms)  | 
65  | 0  |         { | 
66  | 0  |             int blockX = int(width / cellX);  | 
67  | 0  |             int blockY = int(height / cellY);  | 
68  | 0  |             int blockStride = int(quantization*blockX);  | 
69  |  | 
  | 
70  | 0  |             float yp = ((float)row + 0.5f) / (float)cellY - 0.5f;  | 
71  | 0  |             int iyp = (int)floor(yp);  | 
72  | 0  |             float vy0 = yp - iyp;  | 
73  | 0  |             float vy1 = 1.0f - vy0;  | 
74  |  | 
  | 
75  | 0  |             size_t noseEnd = cellX / 2;  | 
76  | 0  |             size_t bodyEnd = width - cellX / 2;  | 
77  |  | 
  | 
78  | 0  |             if (iyp < 0)  | 
79  | 0  |             { | 
80  | 0  |                 float * h = histograms + (iyp + 1)*blockStride;  | 
81  | 0  |                 for (size_t col = 1; col < width - 1; ++col)  | 
82  | 0  |                 { | 
83  | 0  |                     float value = values[col];  | 
84  | 0  |                     int index = indexes[col];  | 
85  |  | 
  | 
86  | 0  |                     float xp = ((float)col + 0.5f) / (float)cellX - 0.5f;  | 
87  | 0  |                     int ixp = (int)floor(xp);  | 
88  | 0  |                     float vx0 = xp - ixp;  | 
89  | 0  |                     float vx1 = 1.0f - vx0;  | 
90  |  | 
  | 
91  | 0  |                     if (ixp >= 0)  | 
92  | 0  |                         h[ixp*quantization + index] += vx1 * vy0*value;  | 
93  | 0  |                     if (ixp + 1 < blockX)  | 
94  | 0  |                         h[(ixp + 1)*quantization + index] += vx0 * vy0*value;  | 
95  | 0  |                 }  | 
96  | 0  |             }  | 
97  | 0  |             else if (iyp + 1 == blockY)  | 
98  | 0  |             { | 
99  | 0  |                 float * h = histograms + iyp * blockStride;  | 
100  | 0  |                 for (size_t col = 1; col < width - 1; ++col)  | 
101  | 0  |                 { | 
102  | 0  |                     float value = values[col];  | 
103  | 0  |                     int index = indexes[col];  | 
104  |  | 
  | 
105  | 0  |                     float xp = ((float)col + 0.5f) / (float)cellX - 0.5f;  | 
106  | 0  |                     int ixp = (int)floor(xp);  | 
107  | 0  |                     float vx0 = xp - ixp;  | 
108  | 0  |                     float vx1 = 1.0f - vx0;  | 
109  |  | 
  | 
110  | 0  |                     if (ixp >= 0)  | 
111  | 0  |                         h[ixp*quantization + index] += vx1 * vy1*value;  | 
112  | 0  |                     if (ixp + 1 < blockX)  | 
113  | 0  |                         h[(ixp + 1)*quantization + index] += vx0 * vy1*value;  | 
114  | 0  |                 }  | 
115  | 0  |             }  | 
116  | 0  |             else  | 
117  | 0  |             { | 
118  | 0  |                 float * h0 = histograms + iyp * blockStride;  | 
119  | 0  |                 float * h1 = histograms + (iyp + 1)*blockStride;  | 
120  | 0  |                 size_t col = 1;  | 
121  | 0  |                 for (; col < noseEnd; ++col)  | 
122  | 0  |                 { | 
123  | 0  |                     float value = values[col];  | 
124  | 0  |                     int index = indexes[col];  | 
125  |  | 
  | 
126  | 0  |                     float xp = ((float)col + 0.5f) / (float)cellX - 0.5f;  | 
127  | 0  |                     int ixp = (int)floor(xp);  | 
128  | 0  |                     float vx0 = xp - ixp;  | 
129  |  | 
  | 
130  | 0  |                     h0[(ixp + 1)*quantization + index] += vx0 * vy1*value;  | 
131  | 0  |                     h1[(ixp + 1)*quantization + index] += vx0 * vy0*value;  | 
132  | 0  |                 }  | 
133  |  | 
  | 
134  | 0  |                 for (; col < bodyEnd; ++col)  | 
135  | 0  |                 { | 
136  | 0  |                     float value = values[col];  | 
137  | 0  |                     int index = indexes[col];  | 
138  |  | 
  | 
139  | 0  |                     float xp = ((float)col + 0.5f) / (float)cellX - 0.5f;  | 
140  | 0  |                     int ixp = (int)floor(xp);  | 
141  | 0  |                     float vx0 = xp - ixp;  | 
142  | 0  |                     float vx1 = 1.0f - vx0;  | 
143  |  | 
  | 
144  | 0  |                     h0[ixp*quantization + index] += vx1 * vy1*value;  | 
145  | 0  |                     h1[ixp*quantization + index] += vx1 * vy0*value;  | 
146  | 0  |                     h0[(ixp + 1)*quantization + index] += vx0 * vy1*value;  | 
147  | 0  |                     h1[(ixp + 1)*quantization + index] += vx0 * vy0*value;  | 
148  | 0  |                 }  | 
149  |  | 
  | 
150  | 0  |                 for (; col < width - 1; ++col)  | 
151  | 0  |                 { | 
152  | 0  |                     float value = values[col];  | 
153  | 0  |                     int index = indexes[col];  | 
154  |  | 
  | 
155  | 0  |                     float xp = ((float)col + 0.5f) / (float)cellX - 0.5f;  | 
156  | 0  |                     int ixp = (int)floor(xp);  | 
157  | 0  |                     float vx0 = xp - ixp;  | 
158  | 0  |                     float vx1 = 1.0f - vx0;  | 
159  |  | 
  | 
160  | 0  |                     h0[ixp*quantization + index] += vx1 * vy1*value;  | 
161  | 0  |                     h1[ixp*quantization + index] += vx1 * vy0*value;  | 
162  | 0  |                 }  | 
163  | 0  |             }  | 
164  | 0  |         }  | 
165  |  |  | 
166  |  |         void HogDirectionHistograms(const uint8_t * src, size_t stride, size_t width, size_t height,  | 
167  |  |             size_t cellX, size_t cellY, size_t quantization, float * histograms)  | 
168  | 0  |         { | 
169  | 0  |             assert(width%cellX == 0 && height%cellY == 0 && quantization % 2 == 0);  | 
170  |  | 
  | 
171  | 0  |             Buffer buffer(width, quantization);  | 
172  |  | 
  | 
173  | 0  |             memset(histograms, 0, quantization*(width / cellX)*(height / cellY) * sizeof(float));  | 
174  |  | 
  | 
175  | 0  |             for (size_t row = 1; row < height - 1; ++row)  | 
176  | 0  |             { | 
177  | 0  |                 const uint8_t * src1 = src + stride * row;  | 
178  | 0  |                 const uint8_t * src0 = src1 - stride;  | 
179  | 0  |                 const uint8_t * src2 = src1 + stride;  | 
180  |  | 
  | 
181  | 0  | #if 1  | 
182  | 0  |                 for (size_t col = 1; col < width - 1; ++col)  | 
183  | 0  |                 { | 
184  | 0  |                     float dy = (float)(src2[col] - src0[col]);  | 
185  | 0  |                     float dx = (float)(src1[col + 1] - src1[col - 1]);  | 
186  | 0  |                     float value = (float)::sqrt(dx*dx + dy * dy);  | 
187  |  | 
  | 
188  | 0  |                     float bestDot = 0;  | 
189  | 0  |                     int index = 0;  | 
190  | 0  |                     for (int direction = 0; direction < buffer.size; direction++)  | 
191  | 0  |                     { | 
192  | 0  |                         float dot = buffer.cos[direction] * dx + buffer.sin[direction] * dy;  | 
193  | 0  |                         if (dot > bestDot)  | 
194  | 0  |                         { | 
195  | 0  |                             bestDot = dot;  | 
196  | 0  |                             index = direction;  | 
197  | 0  |                         }  | 
198  | 0  |                         else if (-dot > bestDot)  | 
199  | 0  |                         { | 
200  | 0  |                             bestDot = -dot;  | 
201  | 0  |                             index = direction + buffer.size;  | 
202  | 0  |                         }  | 
203  | 0  |                     }  | 
204  |  | 
  | 
205  | 0  |                     buffer.value[col] = value;  | 
206  | 0  |                     buffer.index[col] = index;  | 
207  | 0  |                 }  | 
208  |  | #else  | 
209  |  |                 size_t size = (buffer.size + 1) / 2;  | 
210  |  |                 for (size_t col = 1; col < width - 1; ++col)  | 
211  |  |                 { | 
212  |  |                     float dy = (float)(src2[col] - src0[col]);  | 
213  |  |                     float dx = (float)(src1[col + 1] - src1[col - 1]);  | 
214  |  |                     float value = (float)::sqrt(dx*dx + dy * dy);  | 
215  |  |                     float ady = Simd::Abs(dy);  | 
216  |  |                     float adx = Simd::Abs(dx);  | 
217  |  |  | 
218  |  |                     float bestDot = 0;  | 
219  |  |                     int index = 0;  | 
220  |  |                     for (int direction = 0; direction < size; direction++)  | 
221  |  |                     { | 
222  |  |                         float dot = buffer.cos[direction] * adx + buffer.sin[direction] * ady;  | 
223  |  |                         if (dot > bestDot)  | 
224  |  |                         { | 
225  |  |                             bestDot = dot;  | 
226  |  |                             index = direction;  | 
227  |  |                         }  | 
228  |  |                     }  | 
229  |  |                     if (dx < 0)  | 
230  |  |                         index = buffer.size - index;  | 
231  |  |                     if (dy < 0 && index != 0)  | 
232  |  |                         index = buffer.size * 2 - index - (dx == 0);  | 
233  |  |  | 
234  |  |                     buffer.value[col] = value;  | 
235  |  |                     buffer.index[col] = index;  | 
236  |  |                 }  | 
237  |  | #endif  | 
238  |  | 
  | 
239  | 0  |                 AddRowToHistograms(buffer.index, buffer.value, row, width, height, cellX, cellY, quantization, histograms);  | 
240  | 0  |             }  | 
241  | 0  |         }  | 
242  |  |  | 
243  |  |         class HogFeatureExtractor  | 
244  |  |         { | 
245  |  |             static const size_t C = 8;  | 
246  |  |             static const size_t Q = 9;  | 
247  |  |             static const size_t Q2 = 18;  | 
248  |  |  | 
249  |  |             size_t _sx, _sy, _hs;  | 
250  |  |  | 
251  |  |             float _cos[5];  | 
252  |  |             float _sin[5];  | 
253  |  |             float _k[C];  | 
254  |  |  | 
255  |  |             Array32i _index;  | 
256  |  |             Array32f _value;  | 
257  |  |             Array32f _histogram;  | 
258  |  |             Array32f _norm;  | 
259  |  |  | 
260  |  |             void Init(size_t w, size_t h)  | 
261  | 0  |             { | 
262  | 0  |                 _sx = w / C;  | 
263  | 0  |                 _hs = _sx + 2;  | 
264  | 0  |                 _sy = h / C;  | 
265  | 0  |                 for (int i = 0; i < 5; ++i)  | 
266  | 0  |                 { | 
267  | 0  |                     _cos[i] = (float)::cos(i*M_PI / Q);  | 
268  | 0  |                     _sin[i] = (float)::sin(i*M_PI / Q);  | 
269  | 0  |                 }  | 
270  | 0  |                 for (int i = 0; i < C; ++i)  | 
271  | 0  |                     _k[i] = float((1 + i * 2) / 16.0f);  | 
272  | 0  |                 _index.Resize(w);  | 
273  | 0  |                 _value.Resize(w);  | 
274  | 0  |                 _histogram.Resize((_sx + 2)*(_sy + 2)*Q2);  | 
275  | 0  |                 _norm.Resize((_sx + 2)*(_sy + 2));  | 
276  | 0  |             }  | 
277  |  |  | 
278  |  |             void AddRowToHistogram(size_t row, size_t width, size_t height)  | 
279  | 0  |             { | 
280  | 0  |                 size_t iyp = (row - 4) / C;  | 
281  | 0  |                 float vy0 = _k[(row + 4) & 7];  | 
282  | 0  |                 float vy1 = 1.0f - vy0;  | 
283  | 0  |                 float * h0 = _histogram.data + ((iyp + 1)*_hs + 0)*Q2;  | 
284  | 0  |                 float * h1 = _histogram.data + ((iyp + 2)*_hs + 0)*Q2;  | 
285  | 0  |                 for (size_t col = 1, n = C, i = 5; col < width - 1; i = 0, n = Simd::Min<size_t>(C, width - col - 1))  | 
286  | 0  |                 { | 
287  | 0  |                     for (; i < n; ++i, ++col)  | 
288  | 0  |                     { | 
289  | 0  |                         float value = _value[col];  | 
290  | 0  |                         int index = _index[col];  | 
291  | 0  |                         float vx0 = _k[i];  | 
292  | 0  |                         float vx1 = 1.0f - vx0;  | 
293  | 0  |                         h0[index] += vx1 * vy1*value;  | 
294  | 0  |                         h1[index] += vx1 * vy0*value;  | 
295  | 0  |                         h0[Q2 + index] += vx0 * vy1*value;  | 
296  | 0  |                         h1[Q2 + index] += vx0 * vy0*value;  | 
297  | 0  |                     }  | 
298  | 0  |                     h0 += Q2;  | 
299  | 0  |                     h1 += Q2;  | 
300  | 0  |                 }  | 
301  | 0  |             }  | 
302  |  |  | 
303  |  |             void EstimateHistogram(const uint8_t * src, size_t stride, size_t width, size_t height)  | 
304  | 0  |             { | 
305  | 0  |                 _histogram.Clear();  | 
306  | 0  |                 for (size_t row = 1; row < height - 1; ++row)  | 
307  | 0  |                 { | 
308  | 0  |                     const uint8_t * src1 = src + stride * row;  | 
309  | 0  |                     const uint8_t * src0 = src1 - stride;  | 
310  | 0  |                     const uint8_t * src2 = src1 + stride;  | 
311  |  | 
  | 
312  | 0  |                     for (size_t col = 1; col < width - 1; ++col)  | 
313  | 0  |                     { | 
314  | 0  |                         float dy = (float)(src2[col] - src0[col]);  | 
315  | 0  |                         float dx = (float)(src1[col + 1] - src1[col - 1]);  | 
316  | 0  |                         float value = (float)::sqrt(dx*dx + dy * dy);  | 
317  | 0  |                         float ady = Simd::Abs(dy);  | 
318  | 0  |                         float adx = Simd::Abs(dx);  | 
319  |  | 
  | 
320  | 0  |                         float bestDot = 0;  | 
321  | 0  |                         int index = 0;  | 
322  | 0  |                         for (int direction = 0; direction < 5; direction++)  | 
323  | 0  |                         { | 
324  | 0  |                             float dot = _cos[direction] * adx + _sin[direction] * ady;  | 
325  | 0  |                             if (dot > bestDot)  | 
326  | 0  |                             { | 
327  | 0  |                                 bestDot = dot;  | 
328  | 0  |                                 index = direction;  | 
329  | 0  |                             }  | 
330  | 0  |                         }  | 
331  | 0  |                         if (dx < 0)  | 
332  | 0  |                             index = Q - index;  | 
333  | 0  |                         if (dy < 0 && index != 0)  | 
334  | 0  |                             index = Q2 - index - (dx == 0);  | 
335  |  | 
  | 
336  | 0  |                         _value[col] = value;  | 
337  | 0  |                         _index[col] = index;  | 
338  | 0  |                     }  | 
339  |  | 
  | 
340  | 0  |                     AddRowToHistogram(row, width, height);  | 
341  | 0  |                 }  | 
342  | 0  |             }  | 
343  |  |  | 
344  |  |             void EstimateNorm()  | 
345  | 0  |             { | 
346  | 0  |                 _norm.Clear();  | 
347  | 0  |                 for (size_t y = 0; y < _sy; ++y)  | 
348  | 0  |                 { | 
349  | 0  |                     const float * ph = _histogram.data + ((y + 1)*_hs + 1)*Q2;  | 
350  | 0  |                     float * pn = _norm.data + (y + 1)*_hs + 1;  | 
351  | 0  |                     for (size_t x = 0; x < _sx; ++x)  | 
352  | 0  |                     { | 
353  | 0  |                         const float * h = ph + x * Q2;  | 
354  | 0  |                         for (int o = 0; o < Q; ++o)  | 
355  | 0  |                             pn[x] += Simd::Square(h[o] + h[o + Q]);  | 
356  | 0  |                     }  | 
357  | 0  |                 }  | 
358  | 0  |             }  | 
359  |  |  | 
360  |  |             void ExtractFeatures(float * features)  | 
361  | 0  |             { | 
362  | 0  |                 float eps = 0.0001f;  | 
363  | 0  |                 for (size_t y = 0; y < _sy; y++)  | 
364  | 0  |                 { | 
365  | 0  |                     for (size_t x = 0; x < _sx; x++)  | 
366  | 0  |                     { | 
367  | 0  |                         float * dst = features + (y*_sx + x) * 31;  | 
368  |  | 
  | 
369  | 0  |                         float *psrc, n1, n2, n3, n4;  | 
370  |  | 
  | 
371  | 0  |                         float * p0 = _norm.data + y * _hs + x;  | 
372  | 0  |                         float * p1 = p0 + _hs;  | 
373  | 0  |                         float * p2 = p1 + _hs;  | 
374  |  | 
  | 
375  | 0  |                         n1 = 1.0f / sqrt(p1[1] + p1[2] + p2[1] + p2[2] + eps);  | 
376  | 0  |                         n2 = 1.0f / sqrt(p0[1] + p0[2] + p1[1] + p1[2] + eps);  | 
377  | 0  |                         n3 = 1.0f / sqrt(p1[0] + p1[1] + p2[0] + p2[1] + eps);  | 
378  | 0  |                         n4 = 1.0f / sqrt(p0[0] + p0[1] + p1[0] + p1[1] + eps);  | 
379  |  | 
  | 
380  | 0  |                         float t1 = 0;  | 
381  | 0  |                         float t2 = 0;  | 
382  | 0  |                         float t3 = 0;  | 
383  | 0  |                         float t4 = 0;  | 
384  |  | 
  | 
385  | 0  |                         psrc = _histogram.data + ((y + 1)*_hs + x + 1)*Q2;  | 
386  | 0  |                         for (int o = 0; o < Q2; o++)  | 
387  | 0  |                         { | 
388  | 0  |                             float h1 = Simd::Min(*psrc * n1, 0.2f);  | 
389  | 0  |                             float h2 = Simd::Min(*psrc * n2, 0.2f);  | 
390  | 0  |                             float h3 = Simd::Min(*psrc * n3, 0.2f);  | 
391  | 0  |                             float h4 = Simd::Min(*psrc * n4, 0.2f);  | 
392  | 0  |                             *dst = 0.5f * (h1 + h2 + h3 + h4);  | 
393  | 0  |                             t1 += h1;  | 
394  | 0  |                             t2 += h2;  | 
395  | 0  |                             t3 += h3;  | 
396  | 0  |                             t4 += h4;  | 
397  | 0  |                             dst++;  | 
398  | 0  |                             psrc++;  | 
399  | 0  |                         }  | 
400  |  | 
  | 
401  | 0  |                         psrc = _histogram.data + ((y + 1)*_hs + x + 1)*Q2;  | 
402  | 0  |                         for (int o = 0; o < Q; o++)  | 
403  | 0  |                         { | 
404  | 0  |                             float sum = *psrc + *(psrc + Q);  | 
405  | 0  |                             float h1 = Simd::Min(sum * n1, 0.2f);  | 
406  | 0  |                             float h2 = Simd::Min(sum * n2, 0.2f);  | 
407  | 0  |                             float h3 = Simd::Min(sum * n3, 0.2f);  | 
408  | 0  |                             float h4 = Simd::Min(sum * n4, 0.2f);  | 
409  | 0  |                             *dst = 0.5f * (h1 + h2 + h3 + h4);  | 
410  | 0  |                             dst++;  | 
411  | 0  |                             psrc++;  | 
412  | 0  |                         }  | 
413  |  | 
  | 
414  | 0  |                         *dst = 0.2357f * t1;  | 
415  | 0  |                         dst++;  | 
416  | 0  |                         *dst = 0.2357f * t2;  | 
417  | 0  |                         dst++;  | 
418  | 0  |                         *dst = 0.2357f * t3;  | 
419  | 0  |                         dst++;  | 
420  | 0  |                         *dst = 0.2357f * t4;  | 
421  | 0  |                     }  | 
422  | 0  |                 }  | 
423  | 0  |             }  | 
424  |  |  | 
425  |  |         public:  | 
426  |  |             void Run(const uint8_t * src, size_t stride, size_t width, size_t height, float * features)  | 
427  | 0  |             { | 
428  | 0  |                 Init(width, height);  | 
429  |  | 
  | 
430  | 0  |                 EstimateHistogram(src, stride, width, height);  | 
431  |  | 
  | 
432  | 0  |                 EstimateNorm();  | 
433  |  | 
  | 
434  | 0  |                 ExtractFeatures(features);  | 
435  | 0  |             }  | 
436  |  |         };  | 
437  |  |  | 
438  |  |         void HogExtractFeatures(const uint8_t * src, size_t stride, size_t width, size_t height, float * features)  | 
439  | 0  |         { | 
440  | 0  |             assert(width % 8 == 0 && height % 8 == 0 && width >= 16 && height >= 16);  | 
441  |  | 
  | 
442  | 0  |             HogFeatureExtractor extractor;  | 
443  | 0  |             extractor.Run(src, stride, width, height, features);  | 
444  | 0  |         }  | 
445  |  |  | 
446  |  |         namespace HogSeparableFilter_Detail  | 
447  |  |         { | 
448  |  |             template <int add> void Set(float & dst, float value);  | 
449  |  |  | 
450  |  |             template <> SIMD_INLINE void Set<0>(float & dst, float value)  | 
451  | 0  |             { | 
452  | 0  |                 dst = value;  | 
453  | 0  |             }  | 
454  |  |  | 
455  |  |             template <> SIMD_INLINE void Set<1>(float & dst, float value)  | 
456  | 0  |             { | 
457  | 0  |                 dst += value;  | 
458  | 0  |             }  | 
459  |  |         }  | 
460  |  |  | 
461  |  |         void HogDeinterleave(const float * src, size_t srcStride, size_t width, size_t height, size_t count, float ** dst, size_t dstStride)  | 
462  | 0  |         { | 
463  | 0  |             for (size_t row = 0; row < height; ++row)  | 
464  | 0  |             { | 
465  | 0  |                 const float * psrc = src + row * srcStride;  | 
466  | 0  |                 size_t offset = row * dstStride;  | 
467  | 0  |                 for (size_t col = 0; col < width; ++col)  | 
468  | 0  |                 { | 
469  | 0  |                     for (size_t i = 0; i < count; ++i)  | 
470  | 0  |                         dst[i][offset + col] = *psrc++;  | 
471  | 0  |                 }  | 
472  | 0  |             }  | 
473  | 0  |         }  | 
474  |  |  | 
475  |  |         class HogSeparableFilter  | 
476  |  |         { | 
477  |  |             typedef Array<float> Array32f;  | 
478  |  |  | 
479  |  |             size_t _w, _h;  | 
480  |  |             Array32f _buffer;  | 
481  |  |  | 
482  |  |             void Init(size_t w, size_t h, size_t rs, size_t cs)  | 
483  | 0  |             { | 
484  | 0  |                 _w = w - rs + 1;  | 
485  | 0  |                 _h = h - cs + 1;  | 
486  | 0  |                 _buffer.Resize(_w*h);  | 
487  | 0  |             }  | 
488  |  |  | 
489  |  |             void FilterRows(const float * src, size_t srcStride, size_t width, size_t height, const float * filter, size_t size, float * dst, size_t dstStride)  | 
490  | 0  |             { | 
491  | 0  |                 for (size_t row = 0; row < height; ++row)  | 
492  | 0  |                 { | 
493  | 0  |                     for (size_t col = 0; col < width; ++col)  | 
494  | 0  |                     { | 
495  | 0  |                         const float * s = src + col;  | 
496  | 0  |                         float sum = 0;  | 
497  | 0  |                         for (size_t i = 0; i < size; ++i)  | 
498  | 0  |                             sum += s[i] * filter[i];  | 
499  | 0  |                         dst[col] = sum;  | 
500  | 0  |                     }  | 
501  | 0  |                     src += srcStride;  | 
502  | 0  |                     dst += dstStride;  | 
503  | 0  |                 }  | 
504  | 0  |             }  | 
505  |  |  | 
506  |  |  | 
507  |  |             template <int add> void FilterCols(const float * src, size_t srcStride, size_t width, size_t height, const float * filter, size_t size, float * dst, size_t dstStride)  | 
508  | 0  |             { | 
509  | 0  |                 for (size_t row = 0; row < height; ++row)  | 
510  | 0  |                 { | 
511  | 0  |                     for (size_t col = 0; col < width; ++col)  | 
512  | 0  |                     { | 
513  | 0  |                         const float * s = src + col;  | 
514  | 0  |                         float sum = 0;  | 
515  | 0  |                         for (size_t i = 0; i < size; ++i)  | 
516  | 0  |                             sum += s[i*srcStride] * filter[i];  | 
517  | 0  |                         HogSeparableFilter_Detail::Set<add>(dst[col], sum);  | 
518  | 0  |                     }  | 
519  | 0  |                     src += srcStride;  | 
520  | 0  |                     dst += dstStride;  | 
521  | 0  |                 }  | 
522  | 0  |             } Unexecuted instantiation: void Simd::Base::HogSeparableFilter::FilterCols<1>(float const*, unsigned long, unsigned long, unsigned long, float const*, unsigned long, float*, unsigned long) Unexecuted instantiation: void Simd::Base::HogSeparableFilter::FilterCols<0>(float const*, unsigned long, unsigned long, unsigned long, float const*, unsigned long, float*, unsigned long)  | 
523  |  |  | 
524  |  |         public:  | 
525  |  |  | 
526  |  |             void Run(const float * src, size_t srcStride, size_t width, size_t height,  | 
527  |  |                 const float * rowFilter, size_t rowSize, const float * colFilter, size_t colSize, float * dst, size_t dstStride, int add)  | 
528  | 0  |             { | 
529  | 0  |                 Init(width, height, rowSize, colSize);  | 
530  |  | 
  | 
531  | 0  |                 FilterRows(src, srcStride, _w, height, rowFilter, rowSize, _buffer.data, _w);  | 
532  |  | 
  | 
533  | 0  |                 if (add)  | 
534  | 0  |                     FilterCols<1>(_buffer.data, _w, _w, _h, colFilter, colSize, dst, dstStride);  | 
535  | 0  |                 else  | 
536  | 0  |                     FilterCols<0>(_buffer.data, _w, _w, _h, colFilter, colSize, dst, dstStride);  | 
537  | 0  |             }  | 
538  |  |         };  | 
539  |  |  | 
540  |  |         void HogFilterSeparable(const float * src, size_t srcStride, size_t width, size_t height,  | 
541  |  |             const float * rowFilter, size_t rowSize, const float * colFilter, size_t colSize, float * dst, size_t dstStride, int add)  | 
542  | 0  |         { | 
543  | 0  |             assert(width >= rowSize - 1 && height >= colSize - 1);  | 
544  |  | 
  | 
545  | 0  |             HogSeparableFilter filter;  | 
546  | 0  |             filter.Run(src, srcStride, width, height, rowFilter, rowSize, colFilter, colSize, dst, dstStride, add);  | 
547  | 0  |         }  | 
548  |  |     }  | 
549  |  | }  |