Coverage Report

Created: 2026-04-09 07:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/Simd/src/Simd/SimdSse41ResizerNearest.cpp
Line
Count
Source
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2026 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdMemory.h"
25
#include "Simd/SimdStore.h"
26
#include "Simd/SimdResizer.h"
27
#include "Simd/SimdResizerCommon.h"
28
#include "Simd/SimdCopy.h"
29
#include "Simd/SimdParallel.hpp"
30
31
namespace Simd
32
{
33
#ifdef SIMD_SSE41_ENABLE
34
    namespace Sse41
35
    {
36
        ResizerNearest::ResizerNearest(const ResParam& param)
37
0
            : Base::ResizerNearest(param)
38
0
            , _blocks(0)
39
0
        {
40
0
        }
41
42
        size_t ResizerNearest::BlockCountMax(size_t align)
43
0
        {
44
0
            return (size_t)::ceil(float(Simd::Max(_param.srcW, _param.dstW) * _param.PixelSize()) / (align - _param.PixelSize()));
45
0
        }
46
47
        void ResizerNearest::EstimateParams()
48
0
        {
49
0
            if (_blocks)
50
0
                return;
51
0
            Base::ResizerNearest::EstimateParams();
52
0
            const size_t pixelSize = _param.PixelSize();
53
0
            if (pixelSize *_param.dstW < A || pixelSize * _param.srcW < A)
54
0
                return;
55
0
            if (pixelSize < 4 && _param.srcW < 4 * _param.dstW)
56
0
                _blocks = BlockCountMax(A);
57
0
            float scale = (float)_param.srcW / _param.dstW;
58
0
            if (_blocks)
59
0
            {
60
0
                _tails = 0;
61
0
                _ix16x1.Resize(_blocks);
62
0
                _tail16x1.Resize((size_t)::ceil(A * scale / pixelSize));
63
0
                size_t dstRowSize = _param.dstW * pixelSize;
64
0
                int block = 0;
65
0
                _ix16x1[0].src = 0;
66
0
                _ix16x1[0].dst = 0;
67
0
                for (int dstIndex = 0; dstIndex < (int)_param.dstW; ++dstIndex)
68
0
                {
69
0
                    int srcIndex = _ix[dstIndex] / (int)pixelSize;
70
0
                    int dst = dstIndex * (int)pixelSize - _ix16x1[block].dst;
71
0
                    int src = srcIndex * (int)pixelSize - _ix16x1[block].src;
72
0
                    if (src >= int(A - pixelSize) || dst >= int(A - pixelSize))
73
0
                    {
74
0
                        block++;
75
0
                        _ix16x1[block].src = srcIndex * (int)pixelSize;
76
0
                        _ix16x1[block].dst = dstIndex * (int)pixelSize;
77
0
                        if (_ix16x1[block].dst > int(dstRowSize - A))
78
0
                        {
79
0
                            _tail16x1[_tails] = LeftNotZero8i(dstRowSize - _ix16x1[block].dst);
80
0
                            _tails++;
81
0
                        }
82
0
                        dst = 0;
83
0
                        src = srcIndex * (int)pixelSize - _ix16x1[block].src;
84
0
                    }
85
0
                    for(size_t i = 0; i < pixelSize; ++i)
86
0
                        _ix16x1[block].shuffle[dst + i] = uint8_t(src + i);
87
0
                }
88
0
                _blocks = block + 1;
89
0
            }
90
0
        }
91
92
        void ResizerNearest::Shuffle16x1(const uint8_t* src, size_t srcStride, size_t dyBeg, size_t dyEnd, uint8_t* dst, size_t dstStride)
93
0
        {
94
0
            size_t body = _blocks - _tails;
95
0
            for (size_t dy = dyBeg; dy < dyEnd; dy++)
96
0
            {
97
0
                const uint8_t* srcRow = src + _iy[dy] * srcStride;
98
0
                size_t i = 0, t = 0;
99
0
                for (; i < body; ++i)
100
0
                {
101
0
                   const IndexShuffle16x1& index = _ix16x1[i];
102
0
                    __m128i _src = _mm_loadu_si128((__m128i*)(srcRow + index.src));
103
0
                    __m128i _shuffle = _mm_loadu_si128((__m128i*) & index.shuffle);
104
0
                    _mm_storeu_si128((__m128i*)(dst + index.dst), _mm_shuffle_epi8(_src, _shuffle));
105
0
                }
106
0
                for (; i < _blocks; ++i, t++)
107
0
                {
108
0
                    const IndexShuffle16x1& index = _ix16x1[i];
109
0
                    __m128i _src = _mm_loadu_si128((__m128i*)(srcRow + index.src));
110
0
                    __m128i _shuffle = _mm_loadu_si128((__m128i*) & index.shuffle);
111
0
                    StoreMasked<false>((__m128i*)(dst + index.dst), _mm_shuffle_epi8(_src, _shuffle), _tail16x1[t]);
112
0
                }
113
0
                dst += dstStride;
114
0
            }
115
0
        }
116
117
        SIMD_INLINE void CopyPixel12(const uint8_t* src, uint8_t* dst)
118
0
        {
119
0
            __m128i val = _mm_loadu_si128((__m128i*)src);
120
0
            _mm_storeu_si128((__m128i*)dst, val);
121
0
        }
122
        
123
        void ResizerNearest::Resize12(const uint8_t* src, size_t srcStride, size_t dyBeg, size_t dyEnd, uint8_t* dst, size_t dstStride)
124
0
        {
125
0
            size_t body = _param.dstW - 1;
126
0
            for (size_t dy = dyBeg; dy < dyEnd; dy++)
127
0
            {
128
0
                const uint8_t* srcRow = src + _iy[dy] * srcStride;
129
0
                size_t dx = 0, offset = 0;
130
0
                for (; dx < body; dx++, offset += 12)
131
0
                    CopyPixel12(srcRow + _ix[dx], dst + offset);
132
0
                Base::CopyPixel<12>(srcRow + _ix[dx], dst + offset);
133
0
                dst += dstStride;
134
0
            }
135
0
        }
136
137
        void ResizerNearest::Run(const uint8_t* src, size_t srcStride, uint8_t* dst, size_t dstStride)
138
0
        {
139
0
            EstimateParams();
140
0
            if (_blocks)
141
0
            {
142
0
                Simd::Parallel(0, _param.dstH, [&](size_t thread, size_t dstBeg, size_t dstEnd)
143
0
                {
144
0
                    this->Shuffle16x1(src, srcStride, dstBeg, dstEnd, dst + dstBeg * dstStride, dstStride);
145
0
                }, _threads, 1);
146
0
            }
147
0
            else if (_pixelSize == 12)
148
0
            {
149
0
                Simd::Parallel(0, _param.dstH, [&](size_t thread, size_t dstBeg, size_t dstEnd)
150
0
                {
151
0
                    this->Resize12(src, srcStride, dstBeg, dstEnd, dst + dstBeg * dstStride, dstStride);
152
0
                }, _threads, 1);
153
0
            }
154
0
            else
155
0
                Base::ResizerNearest::Run(src, srcStride, dst, dstStride);
156
0
        }
157
    }
158
#endif
159
}
160