Coverage Report

Created: 2024-10-01 06:54

/src/Simd/src/Simd/SimdBaseHog.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2019 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdArray.h"
25
26
namespace Simd
27
{
28
    namespace Base
29
    {
30
        namespace
31
        {
32
            struct Buffer
33
            {
34
                const int size;
35
                float * cos, *sin;
36
                int * index;
37
                float * value;
38
39
                Buffer(size_t width, size_t quantization)
40
                    : size((int)quantization / 2)
41
0
                {
42
0
                    _p = Allocate(width*(sizeof(int) + sizeof(float)) + sizeof(float) * 2 * size);
43
0
                    index = (int*)_p;
44
0
                    value = (float*)index + width;
45
0
                    cos = value + width;
46
0
                    sin = cos + size;
47
0
                    for (int i = 0; i < size; ++i)
48
0
                    {
49
0
                        cos[i] = (float)::cos(i*M_PI / size);
50
0
                        sin[i] = (float)::sin(i*M_PI / size);
51
0
                    }
52
0
                }
53
54
                ~Buffer()
55
0
                {
56
0
                    Free(_p);
57
0
                }
58
59
            private:
60
                void *_p;
61
            };
62
        }
63
64
        void AddRowToHistograms(int * indexes, float * values, size_t row, size_t width, size_t height, size_t cellX, size_t cellY, size_t quantization, float * histograms)
65
0
        {
66
0
            int blockX = int(width / cellX);
67
0
            int blockY = int(height / cellY);
68
0
            int blockStride = int(quantization*blockX);
69
70
0
            float yp = ((float)row + 0.5f) / (float)cellY - 0.5f;
71
0
            int iyp = (int)floor(yp);
72
0
            float vy0 = yp - iyp;
73
0
            float vy1 = 1.0f - vy0;
74
75
0
            size_t noseEnd = cellX / 2;
76
0
            size_t bodyEnd = width - cellX / 2;
77
78
0
            if (iyp < 0)
79
0
            {
80
0
                float * h = histograms + (iyp + 1)*blockStride;
81
0
                for (size_t col = 1; col < width - 1; ++col)
82
0
                {
83
0
                    float value = values[col];
84
0
                    int index = indexes[col];
85
86
0
                    float xp = ((float)col + 0.5f) / (float)cellX - 0.5f;
87
0
                    int ixp = (int)floor(xp);
88
0
                    float vx0 = xp - ixp;
89
0
                    float vx1 = 1.0f - vx0;
90
91
0
                    if (ixp >= 0)
92
0
                        h[ixp*quantization + index] += vx1 * vy0*value;
93
0
                    if (ixp + 1 < blockX)
94
0
                        h[(ixp + 1)*quantization + index] += vx0 * vy0*value;
95
0
                }
96
0
            }
97
0
            else if (iyp + 1 == blockY)
98
0
            {
99
0
                float * h = histograms + iyp * blockStride;
100
0
                for (size_t col = 1; col < width - 1; ++col)
101
0
                {
102
0
                    float value = values[col];
103
0
                    int index = indexes[col];
104
105
0
                    float xp = ((float)col + 0.5f) / (float)cellX - 0.5f;
106
0
                    int ixp = (int)floor(xp);
107
0
                    float vx0 = xp - ixp;
108
0
                    float vx1 = 1.0f - vx0;
109
110
0
                    if (ixp >= 0)
111
0
                        h[ixp*quantization + index] += vx1 * vy1*value;
112
0
                    if (ixp + 1 < blockX)
113
0
                        h[(ixp + 1)*quantization + index] += vx0 * vy1*value;
114
0
                }
115
0
            }
116
0
            else
117
0
            {
118
0
                float * h0 = histograms + iyp * blockStride;
119
0
                float * h1 = histograms + (iyp + 1)*blockStride;
120
0
                size_t col = 1;
121
0
                for (; col < noseEnd; ++col)
122
0
                {
123
0
                    float value = values[col];
124
0
                    int index = indexes[col];
125
126
0
                    float xp = ((float)col + 0.5f) / (float)cellX - 0.5f;
127
0
                    int ixp = (int)floor(xp);
128
0
                    float vx0 = xp - ixp;
129
130
0
                    h0[(ixp + 1)*quantization + index] += vx0 * vy1*value;
131
0
                    h1[(ixp + 1)*quantization + index] += vx0 * vy0*value;
132
0
                }
133
134
0
                for (; col < bodyEnd; ++col)
135
0
                {
136
0
                    float value = values[col];
137
0
                    int index = indexes[col];
138
139
0
                    float xp = ((float)col + 0.5f) / (float)cellX - 0.5f;
140
0
                    int ixp = (int)floor(xp);
141
0
                    float vx0 = xp - ixp;
142
0
                    float vx1 = 1.0f - vx0;
143
144
0
                    h0[ixp*quantization + index] += vx1 * vy1*value;
145
0
                    h1[ixp*quantization + index] += vx1 * vy0*value;
146
0
                    h0[(ixp + 1)*quantization + index] += vx0 * vy1*value;
147
0
                    h1[(ixp + 1)*quantization + index] += vx0 * vy0*value;
148
0
                }
149
150
0
                for (; col < width - 1; ++col)
151
0
                {
152
0
                    float value = values[col];
153
0
                    int index = indexes[col];
154
155
0
                    float xp = ((float)col + 0.5f) / (float)cellX - 0.5f;
156
0
                    int ixp = (int)floor(xp);
157
0
                    float vx0 = xp - ixp;
158
0
                    float vx1 = 1.0f - vx0;
159
160
0
                    h0[ixp*quantization + index] += vx1 * vy1*value;
161
0
                    h1[ixp*quantization + index] += vx1 * vy0*value;
162
0
                }
163
0
            }
164
0
        }
165
166
        void HogDirectionHistograms(const uint8_t * src, size_t stride, size_t width, size_t height,
167
            size_t cellX, size_t cellY, size_t quantization, float * histograms)
168
0
        {
169
0
            assert(width%cellX == 0 && height%cellY == 0 && quantization % 2 == 0);
170
171
0
            Buffer buffer(width, quantization);
172
173
0
            memset(histograms, 0, quantization*(width / cellX)*(height / cellY) * sizeof(float));
174
175
0
            for (size_t row = 1; row < height - 1; ++row)
176
0
            {
177
0
                const uint8_t * src1 = src + stride * row;
178
0
                const uint8_t * src0 = src1 - stride;
179
0
                const uint8_t * src2 = src1 + stride;
180
181
0
#if 1
182
0
                for (size_t col = 1; col < width - 1; ++col)
183
0
                {
184
0
                    float dy = (float)(src2[col] - src0[col]);
185
0
                    float dx = (float)(src1[col + 1] - src1[col - 1]);
186
0
                    float value = (float)::sqrt(dx*dx + dy * dy);
187
188
0
                    float bestDot = 0;
189
0
                    int index = 0;
190
0
                    for (int direction = 0; direction < buffer.size; direction++)
191
0
                    {
192
0
                        float dot = buffer.cos[direction] * dx + buffer.sin[direction] * dy;
193
0
                        if (dot > bestDot)
194
0
                        {
195
0
                            bestDot = dot;
196
0
                            index = direction;
197
0
                        }
198
0
                        else if (-dot > bestDot)
199
0
                        {
200
0
                            bestDot = -dot;
201
0
                            index = direction + buffer.size;
202
0
                        }
203
0
                    }
204
205
0
                    buffer.value[col] = value;
206
0
                    buffer.index[col] = index;
207
0
                }
208
#else
209
                size_t size = (buffer.size + 1) / 2;
210
                for (size_t col = 1; col < width - 1; ++col)
211
                {
212
                    float dy = (float)(src2[col] - src0[col]);
213
                    float dx = (float)(src1[col + 1] - src1[col - 1]);
214
                    float value = (float)::sqrt(dx*dx + dy * dy);
215
                    float ady = Simd::Abs(dy);
216
                    float adx = Simd::Abs(dx);
217
218
                    float bestDot = 0;
219
                    int index = 0;
220
                    for (int direction = 0; direction < size; direction++)
221
                    {
222
                        float dot = buffer.cos[direction] * adx + buffer.sin[direction] * ady;
223
                        if (dot > bestDot)
224
                        {
225
                            bestDot = dot;
226
                            index = direction;
227
                        }
228
                    }
229
                    if (dx < 0)
230
                        index = buffer.size - index;
231
                    if (dy < 0 && index != 0)
232
                        index = buffer.size * 2 - index - (dx == 0);
233
234
                    buffer.value[col] = value;
235
                    buffer.index[col] = index;
236
                }
237
#endif
238
239
0
                AddRowToHistograms(buffer.index, buffer.value, row, width, height, cellX, cellY, quantization, histograms);
240
0
            }
241
0
        }
242
243
        class HogFeatureExtractor
244
        {
245
            static const size_t C = 8;
246
            static const size_t Q = 9;
247
            static const size_t Q2 = 18;
248
249
            size_t _sx, _sy, _hs;
250
251
            float _cos[5];
252
            float _sin[5];
253
            float _k[C];
254
255
            Array32i _index;
256
            Array32f _value;
257
            Array32f _histogram;
258
            Array32f _norm;
259
260
            void Init(size_t w, size_t h)
261
0
            {
262
0
                _sx = w / C;
263
0
                _hs = _sx + 2;
264
0
                _sy = h / C;
265
0
                for (int i = 0; i < 5; ++i)
266
0
                {
267
0
                    _cos[i] = (float)::cos(i*M_PI / Q);
268
0
                    _sin[i] = (float)::sin(i*M_PI / Q);
269
0
                }
270
0
                for (int i = 0; i < C; ++i)
271
0
                    _k[i] = float((1 + i * 2) / 16.0f);
272
0
                _index.Resize(w);
273
0
                _value.Resize(w);
274
0
                _histogram.Resize((_sx + 2)*(_sy + 2)*Q2);
275
0
                _norm.Resize((_sx + 2)*(_sy + 2));
276
0
            }
277
278
            void AddRowToHistogram(size_t row, size_t width, size_t height)
279
0
            {
280
0
                size_t iyp = (row - 4) / C;
281
0
                float vy0 = _k[(row + 4) & 7];
282
0
                float vy1 = 1.0f - vy0;
283
0
                float * h0 = _histogram.data + ((iyp + 1)*_hs + 0)*Q2;
284
0
                float * h1 = _histogram.data + ((iyp + 2)*_hs + 0)*Q2;
285
0
                for (size_t col = 1, n = C, i = 5; col < width - 1; i = 0, n = Simd::Min<size_t>(C, width - col - 1))
286
0
                {
287
0
                    for (; i < n; ++i, ++col)
288
0
                    {
289
0
                        float value = _value[col];
290
0
                        int index = _index[col];
291
0
                        float vx0 = _k[i];
292
0
                        float vx1 = 1.0f - vx0;
293
0
                        h0[index] += vx1 * vy1*value;
294
0
                        h1[index] += vx1 * vy0*value;
295
0
                        h0[Q2 + index] += vx0 * vy1*value;
296
0
                        h1[Q2 + index] += vx0 * vy0*value;
297
0
                    }
298
0
                    h0 += Q2;
299
0
                    h1 += Q2;
300
0
                }
301
0
            }
302
303
            void EstimateHistogram(const uint8_t * src, size_t stride, size_t width, size_t height)
304
0
            {
305
0
                _histogram.Clear();
306
0
                for (size_t row = 1; row < height - 1; ++row)
307
0
                {
308
0
                    const uint8_t * src1 = src + stride * row;
309
0
                    const uint8_t * src0 = src1 - stride;
310
0
                    const uint8_t * src2 = src1 + stride;
311
312
0
                    for (size_t col = 1; col < width - 1; ++col)
313
0
                    {
314
0
                        float dy = (float)(src2[col] - src0[col]);
315
0
                        float dx = (float)(src1[col + 1] - src1[col - 1]);
316
0
                        float value = (float)::sqrt(dx*dx + dy * dy);
317
0
                        float ady = Simd::Abs(dy);
318
0
                        float adx = Simd::Abs(dx);
319
320
0
                        float bestDot = 0;
321
0
                        int index = 0;
322
0
                        for (int direction = 0; direction < 5; direction++)
323
0
                        {
324
0
                            float dot = _cos[direction] * adx + _sin[direction] * ady;
325
0
                            if (dot > bestDot)
326
0
                            {
327
0
                                bestDot = dot;
328
0
                                index = direction;
329
0
                            }
330
0
                        }
331
0
                        if (dx < 0)
332
0
                            index = Q - index;
333
0
                        if (dy < 0 && index != 0)
334
0
                            index = Q2 - index - (dx == 0);
335
336
0
                        _value[col] = value;
337
0
                        _index[col] = index;
338
0
                    }
339
340
0
                    AddRowToHistogram(row, width, height);
341
0
                }
342
0
            }
343
344
            void EstimateNorm()
345
0
            {
346
0
                _norm.Clear();
347
0
                for (size_t y = 0; y < _sy; ++y)
348
0
                {
349
0
                    const float * ph = _histogram.data + ((y + 1)*_hs + 1)*Q2;
350
0
                    float * pn = _norm.data + (y + 1)*_hs + 1;
351
0
                    for (size_t x = 0; x < _sx; ++x)
352
0
                    {
353
0
                        const float * h = ph + x * Q2;
354
0
                        for (int o = 0; o < Q; ++o)
355
0
                            pn[x] += Simd::Square(h[o] + h[o + Q]);
356
0
                    }
357
0
                }
358
0
            }
359
360
            void ExtractFeatures(float * features)
361
0
            {
362
0
                float eps = 0.0001f;
363
0
                for (size_t y = 0; y < _sy; y++)
364
0
                {
365
0
                    for (size_t x = 0; x < _sx; x++)
366
0
                    {
367
0
                        float * dst = features + (y*_sx + x) * 31;
368
369
0
                        float *psrc, n1, n2, n3, n4;
370
371
0
                        float * p0 = _norm.data + y * _hs + x;
372
0
                        float * p1 = p0 + _hs;
373
0
                        float * p2 = p1 + _hs;
374
375
0
                        n1 = 1.0f / sqrt(p1[1] + p1[2] + p2[1] + p2[2] + eps);
376
0
                        n2 = 1.0f / sqrt(p0[1] + p0[2] + p1[1] + p1[2] + eps);
377
0
                        n3 = 1.0f / sqrt(p1[0] + p1[1] + p2[0] + p2[1] + eps);
378
0
                        n4 = 1.0f / sqrt(p0[0] + p0[1] + p1[0] + p1[1] + eps);
379
380
0
                        float t1 = 0;
381
0
                        float t2 = 0;
382
0
                        float t3 = 0;
383
0
                        float t4 = 0;
384
385
0
                        psrc = _histogram.data + ((y + 1)*_hs + x + 1)*Q2;
386
0
                        for (int o = 0; o < Q2; o++)
387
0
                        {
388
0
                            float h1 = Simd::Min(*psrc * n1, 0.2f);
389
0
                            float h2 = Simd::Min(*psrc * n2, 0.2f);
390
0
                            float h3 = Simd::Min(*psrc * n3, 0.2f);
391
0
                            float h4 = Simd::Min(*psrc * n4, 0.2f);
392
0
                            *dst = 0.5f * (h1 + h2 + h3 + h4);
393
0
                            t1 += h1;
394
0
                            t2 += h2;
395
0
                            t3 += h3;
396
0
                            t4 += h4;
397
0
                            dst++;
398
0
                            psrc++;
399
0
                        }
400
401
0
                        psrc = _histogram.data + ((y + 1)*_hs + x + 1)*Q2;
402
0
                        for (int o = 0; o < Q; o++)
403
0
                        {
404
0
                            float sum = *psrc + *(psrc + Q);
405
0
                            float h1 = Simd::Min(sum * n1, 0.2f);
406
0
                            float h2 = Simd::Min(sum * n2, 0.2f);
407
0
                            float h3 = Simd::Min(sum * n3, 0.2f);
408
0
                            float h4 = Simd::Min(sum * n4, 0.2f);
409
0
                            *dst = 0.5f * (h1 + h2 + h3 + h4);
410
0
                            dst++;
411
0
                            psrc++;
412
0
                        }
413
414
0
                        *dst = 0.2357f * t1;
415
0
                        dst++;
416
0
                        *dst = 0.2357f * t2;
417
0
                        dst++;
418
0
                        *dst = 0.2357f * t3;
419
0
                        dst++;
420
0
                        *dst = 0.2357f * t4;
421
0
                    }
422
0
                }
423
0
            }
424
425
        public:
426
            void Run(const uint8_t * src, size_t stride, size_t width, size_t height, float * features)
427
0
            {
428
0
                Init(width, height);
429
430
0
                EstimateHistogram(src, stride, width, height);
431
432
0
                EstimateNorm();
433
434
0
                ExtractFeatures(features);
435
0
            }
436
        };
437
438
        void HogExtractFeatures(const uint8_t * src, size_t stride, size_t width, size_t height, float * features)
439
0
        {
440
0
            assert(width % 8 == 0 && height % 8 == 0 && width >= 16 && height >= 16);
441
442
0
            HogFeatureExtractor extractor;
443
0
            extractor.Run(src, stride, width, height, features);
444
0
        }
445
446
        namespace HogSeparableFilter_Detail
447
        {
448
            template <int add> void Set(float & dst, float value);
449
450
            template <> SIMD_INLINE void Set<0>(float & dst, float value)
451
0
            {
452
0
                dst = value;
453
0
            }
454
455
            template <> SIMD_INLINE void Set<1>(float & dst, float value)
456
0
            {
457
0
                dst += value;
458
0
            }
459
        }
460
461
        void HogDeinterleave(const float * src, size_t srcStride, size_t width, size_t height, size_t count, float ** dst, size_t dstStride)
462
0
        {
463
0
            for (size_t row = 0; row < height; ++row)
464
0
            {
465
0
                const float * psrc = src + row * srcStride;
466
0
                size_t offset = row * dstStride;
467
0
                for (size_t col = 0; col < width; ++col)
468
0
                {
469
0
                    for (size_t i = 0; i < count; ++i)
470
0
                        dst[i][offset + col] = *psrc++;
471
0
                }
472
0
            }
473
0
        }
474
475
        class HogSeparableFilter
476
        {
477
            typedef Array<float> Array32f;
478
479
            size_t _w, _h;
480
            Array32f _buffer;
481
482
            void Init(size_t w, size_t h, size_t rs, size_t cs)
483
0
            {
484
0
                _w = w - rs + 1;
485
0
                _h = h - cs + 1;
486
0
                _buffer.Resize(_w*h);
487
0
            }
488
489
            void FilterRows(const float * src, size_t srcStride, size_t width, size_t height, const float * filter, size_t size, float * dst, size_t dstStride)
490
0
            {
491
0
                for (size_t row = 0; row < height; ++row)
492
0
                {
493
0
                    for (size_t col = 0; col < width; ++col)
494
0
                    {
495
0
                        const float * s = src + col;
496
0
                        float sum = 0;
497
0
                        for (size_t i = 0; i < size; ++i)
498
0
                            sum += s[i] * filter[i];
499
0
                        dst[col] = sum;
500
0
                    }
501
0
                    src += srcStride;
502
0
                    dst += dstStride;
503
0
                }
504
0
            }
505
506
507
            template <int add> void FilterCols(const float * src, size_t srcStride, size_t width, size_t height, const float * filter, size_t size, float * dst, size_t dstStride)
508
0
            {
509
0
                for (size_t row = 0; row < height; ++row)
510
0
                {
511
0
                    for (size_t col = 0; col < width; ++col)
512
0
                    {
513
0
                        const float * s = src + col;
514
0
                        float sum = 0;
515
0
                        for (size_t i = 0; i < size; ++i)
516
0
                            sum += s[i*srcStride] * filter[i];
517
0
                        HogSeparableFilter_Detail::Set<add>(dst[col], sum);
518
0
                    }
519
0
                    src += srcStride;
520
0
                    dst += dstStride;
521
0
                }
522
0
            }
Unexecuted instantiation: void Simd::Base::HogSeparableFilter::FilterCols<1>(float const*, unsigned long, unsigned long, unsigned long, float const*, unsigned long, float*, unsigned long)
Unexecuted instantiation: void Simd::Base::HogSeparableFilter::FilterCols<0>(float const*, unsigned long, unsigned long, unsigned long, float const*, unsigned long, float*, unsigned long)
523
524
        public:
525
526
            void Run(const float * src, size_t srcStride, size_t width, size_t height,
527
                const float * rowFilter, size_t rowSize, const float * colFilter, size_t colSize, float * dst, size_t dstStride, int add)
528
0
            {
529
0
                Init(width, height, rowSize, colSize);
530
531
0
                FilterRows(src, srcStride, _w, height, rowFilter, rowSize, _buffer.data, _w);
532
533
0
                if (add)
534
0
                    FilterCols<1>(_buffer.data, _w, _w, _h, colFilter, colSize, dst, dstStride);
535
0
                else
536
0
                    FilterCols<0>(_buffer.data, _w, _w, _h, colFilter, colSize, dst, dstStride);
537
0
            }
538
        };
539
540
        void HogFilterSeparable(const float * src, size_t srcStride, size_t width, size_t height,
541
            const float * rowFilter, size_t rowSize, const float * colFilter, size_t colSize, float * dst, size_t dstStride, int add)
542
0
        {
543
0
            assert(width >= rowSize - 1 && height >= colSize - 1);
544
545
0
            HogSeparableFilter filter;
546
0
            filter.Run(src, srcStride, width, height, rowFilter, rowSize, colFilter, colSize, dst, dstStride, add);
547
0
        }
548
    }
549
}