Coverage Report

Created: 2025-08-11 07:29

/src/Simd/src/Simd/SimdSse41Texture.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2022 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdMemory.h"
25
#include "Simd/SimdStore.h"
26
#include "Simd/SimdSet.h"
27
#include "Simd/SimdExtract.h"
28
#include "Simd/SimdBase.h"
29
#include "Simd/SimdUnpack.h"
30
31
namespace Simd
32
{
33
#ifdef SIMD_SSE41_ENABLE    
34
    namespace Sse41
35
    {
36
        SIMD_INLINE __m128i TextureBoostedSaturatedGradient16(__m128i difference, __m128i saturation, const __m128i & boost)
37
0
        {
38
0
            return _mm_mullo_epi16(_mm_max_epi16(K_ZERO, _mm_add_epi16(saturation, _mm_min_epi16(difference, saturation))), boost);
39
0
        }
40
41
        SIMD_INLINE __m128i TextureBoostedSaturatedGradient8(__m128i a, __m128i b, __m128i saturation, const __m128i & boost)
42
0
        {
43
0
            __m128i lo = TextureBoostedSaturatedGradient16(SubUnpackedU8<0>(b, a), saturation, boost);
44
0
            __m128i hi = TextureBoostedSaturatedGradient16(SubUnpackedU8<1>(b, a), saturation, boost);
45
0
            return _mm_packus_epi16(lo, hi);
46
0
        }
47
48
        template<bool align> SIMD_INLINE void TextureBoostedSaturatedGradient(const uint8_t * src, uint8_t * dx, uint8_t * dy,
49
            size_t stride, __m128i saturation, __m128i boost)
50
0
        {
51
0
            const __m128i s10 = Load<false>((__m128i*)(src - 1));
52
0
            const __m128i s12 = Load<false>((__m128i*)(src + 1));
53
0
            const __m128i s01 = Load<align>((__m128i*)(src - stride));
54
0
            const __m128i s21 = Load<align>((__m128i*)(src + stride));
55
0
            Store<align>((__m128i*)dx, TextureBoostedSaturatedGradient8(s10, s12, saturation, boost));
56
0
            Store<align>((__m128i*)dy, TextureBoostedSaturatedGradient8(s01, s21, saturation, boost));
57
0
        }
Unexecuted instantiation: void Simd::Sse41::TextureBoostedSaturatedGradient<true>(unsigned char const*, unsigned char*, unsigned char*, unsigned long, long long __vector(2), long long __vector(2))
Unexecuted instantiation: void Simd::Sse41::TextureBoostedSaturatedGradient<false>(unsigned char const*, unsigned char*, unsigned char*, unsigned long, long long __vector(2), long long __vector(2))
58
59
        template<bool align> void TextureBoostedSaturatedGradient(const uint8_t * src, size_t srcStride, size_t width, size_t height,
60
            uint8_t saturation, uint8_t boost, uint8_t * dx, size_t dxStride, uint8_t * dy, size_t dyStride)
61
0
        {
62
0
            assert(width >= A && int(2)*saturation*boost <= 0xFF);
63
0
            if (align)
64
0
            {
65
0
                assert(Aligned(src) && Aligned(srcStride) && Aligned(dx) && Aligned(dxStride) && Aligned(dy) && Aligned(dyStride));
66
0
            }
67
68
0
            size_t alignedWidth = AlignLo(width, A);
69
0
            __m128i _saturation = _mm_set1_epi16(saturation);
70
0
            __m128i _boost = _mm_set1_epi16(boost);
71
72
0
            memset(dx, 0, width);
73
0
            memset(dy, 0, width);
74
0
            src += srcStride;
75
0
            dx += dxStride;
76
0
            dy += dyStride;
77
0
            for (size_t row = 2; row < height; ++row)
78
0
            {
79
0
                for (size_t col = 0; col < alignedWidth; col += A)
80
0
                    TextureBoostedSaturatedGradient<align>(src + col, dx + col, dy + col, srcStride, _saturation, _boost);
81
0
                if (width != alignedWidth)
82
0
                    TextureBoostedSaturatedGradient<false>(src + width - A, dx + width - A, dy + width - A, srcStride, _saturation, _boost);
83
84
0
                dx[0] = 0;
85
0
                dy[0] = 0;
86
0
                dx[width - 1] = 0;
87
0
                dy[width - 1] = 0;
88
89
0
                src += srcStride;
90
0
                dx += dxStride;
91
0
                dy += dyStride;
92
0
            }
93
0
            memset(dx, 0, width);
94
0
            memset(dy, 0, width);
95
0
        }
Unexecuted instantiation: void Simd::Sse41::TextureBoostedSaturatedGradient<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char, unsigned char, unsigned char*, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::TextureBoostedSaturatedGradient<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char, unsigned char, unsigned char*, unsigned long, unsigned char*, unsigned long)
96
97
        void TextureBoostedSaturatedGradient(const uint8_t * src, size_t srcStride, size_t width, size_t height,
98
            uint8_t saturation, uint8_t boost, uint8_t * dx, size_t dxStride, uint8_t * dy, size_t dyStride)
99
0
        {
100
0
            if (Aligned(src) && Aligned(srcStride) && Aligned(dx) && Aligned(dxStride) && Aligned(dy) && Aligned(dyStride))
101
0
                TextureBoostedSaturatedGradient<true>(src, srcStride, width, height, saturation, boost, dx, dxStride, dy, dyStride);
102
0
            else
103
0
                TextureBoostedSaturatedGradient<false>(src, srcStride, width, height, saturation, boost, dx, dxStride, dy, dyStride);
104
0
        }
105
106
        //-----------------------------------------------------------------------------------------
107
108
        template<bool align> SIMD_INLINE void TextureBoostedUv(const uint8_t* src, uint8_t* dst, __m128i min8, __m128i max8, __m128i boost16)
109
0
        {
110
0
            const __m128i _src = Load<align>((__m128i*)src);
111
0
            const __m128i saturated = _mm_sub_epi8(_mm_max_epu8(min8, _mm_min_epu8(max8, _src)), min8);
112
0
            const __m128i lo = _mm_mullo_epi16(_mm_unpacklo_epi8(saturated, K_ZERO), boost16);
113
0
            const __m128i hi = _mm_mullo_epi16(_mm_unpackhi_epi8(saturated, K_ZERO), boost16);
114
0
            Store<align>((__m128i*)dst, _mm_packus_epi16(lo, hi));
115
0
        }
Unexecuted instantiation: void Simd::Sse41::TextureBoostedUv<true>(unsigned char const*, unsigned char*, long long __vector(2), long long __vector(2), long long __vector(2))
Unexecuted instantiation: void Simd::Sse41::TextureBoostedUv<false>(unsigned char const*, unsigned char*, long long __vector(2), long long __vector(2), long long __vector(2))
116
117
        template<bool align> void TextureBoostedUv(const uint8_t* src, size_t srcStride, size_t width, size_t height,
118
            uint8_t boost, uint8_t* dst, size_t dstStride)
119
0
        {
120
0
            assert(width >= A && boost < 0x80);
121
0
            if (align)
122
0
            {
123
0
                assert(Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride));
124
0
            }
125
126
0
            size_t alignedWidth = AlignLo(width, A);
127
0
            int min = 128 - (128 / boost);
128
0
            int max = 255 - min;
129
130
0
            __m128i min8 = _mm_set1_epi8(min);
131
0
            __m128i max8 = _mm_set1_epi8(max);
132
0
            __m128i boost16 = _mm_set1_epi16(boost);
133
134
0
            for (size_t row = 0; row < height; ++row)
135
0
            {
136
0
                for (size_t col = 0; col < alignedWidth; col += A)
137
0
                    TextureBoostedUv<align>(src + col, dst + col, min8, max8, boost16);
138
0
                if (width != alignedWidth)
139
0
                    TextureBoostedUv<false>(src + width - A, dst + width - A, min8, max8, boost16);
140
141
0
                src += srcStride;
142
0
                dst += dstStride;
143
0
            }
144
0
        }
Unexecuted instantiation: void Simd::Sse41::TextureBoostedUv<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::TextureBoostedUv<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char, unsigned char*, unsigned long)
145
146
        void TextureBoostedUv(const uint8_t* src, size_t srcStride, size_t width, size_t height,
147
            uint8_t boost, uint8_t* dst, size_t dstStride)
148
0
        {
149
0
            if (Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride))
150
0
                TextureBoostedUv<true>(src, srcStride, width, height, boost, dst, dstStride);
151
0
            else
152
0
                TextureBoostedUv<false>(src, srcStride, width, height, boost, dst, dstStride);
153
0
        }
154
155
        //-----------------------------------------------------------------------------------------
156
157
        template <bool align> SIMD_INLINE void TextureGetDifferenceSum(const uint8_t* src, const uint8_t* lo, const uint8_t* hi,
158
            __m128i& positive, __m128i& negative, const __m128i& mask)
159
0
        {
160
0
            const __m128i _src = Load<align>((__m128i*)src);
161
0
            const __m128i _lo = Load<align>((__m128i*)lo);
162
0
            const __m128i _hi = Load<align>((__m128i*)hi);
163
0
            const __m128i average = _mm_and_si128(mask, _mm_avg_epu8(_lo, _hi));
164
0
            const __m128i current = _mm_and_si128(mask, _src);
165
0
            positive = _mm_add_epi64(positive, _mm_sad_epu8(_mm_subs_epu8(current, average), K_ZERO));
166
0
            negative = _mm_add_epi64(negative, _mm_sad_epu8(_mm_subs_epu8(average, current), K_ZERO));
167
0
        }
Unexecuted instantiation: void Simd::Sse41::TextureGetDifferenceSum<true>(unsigned char const*, unsigned char const*, unsigned char const*, long long __vector(2)&, long long __vector(2)&, long long __vector(2) const&)
Unexecuted instantiation: void Simd::Sse41::TextureGetDifferenceSum<false>(unsigned char const*, unsigned char const*, unsigned char const*, long long __vector(2)&, long long __vector(2)&, long long __vector(2) const&)
168
169
        template <bool align> void TextureGetDifferenceSum(const uint8_t* src, size_t srcStride, size_t width, size_t height,
170
            const uint8_t* lo, size_t loStride, const uint8_t* hi, size_t hiStride, int64_t* sum)
171
0
        {
172
0
            assert(width >= A && sum != NULL);
173
0
            if (align)
174
0
            {
175
0
                assert(Aligned(src) && Aligned(srcStride) && Aligned(lo) && Aligned(loStride) && Aligned(hi) && Aligned(hiStride));
176
0
            }
177
178
0
            size_t alignedWidth = AlignLo(width, A);
179
0
            __m128i tailMask = ShiftLeft(K_INV_ZERO, A - width + alignedWidth);
180
0
            __m128i positive = _mm_setzero_si128();
181
0
            __m128i negative = _mm_setzero_si128();
182
0
            for (size_t row = 0; row < height; ++row)
183
0
            {
184
0
                for (size_t col = 0; col < alignedWidth; col += A)
185
0
                    TextureGetDifferenceSum<align>(src + col, lo + col, hi + col, positive, negative, K_INV_ZERO);
186
0
                if (width != alignedWidth)
187
0
                    TextureGetDifferenceSum<false>(src + width - A, lo + width - A, hi + width - A, positive, negative, tailMask);
188
0
                src += srcStride;
189
0
                lo += loStride;
190
0
                hi += hiStride;
191
0
            }
192
0
            *sum = ExtractInt64Sum(positive) - ExtractInt64Sum(negative);
193
0
        }
Unexecuted instantiation: void Simd::Sse41::TextureGetDifferenceSum<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, long*)
Unexecuted instantiation: void Simd::Sse41::TextureGetDifferenceSum<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, long*)
194
195
        void TextureGetDifferenceSum(const uint8_t* src, size_t srcStride, size_t width, size_t height,
196
            const uint8_t* lo, size_t loStride, const uint8_t* hi, size_t hiStride, int64_t* sum)
197
0
        {
198
0
            if (Aligned(src) && Aligned(srcStride) && Aligned(lo) && Aligned(loStride) && Aligned(hi) && Aligned(hiStride))
199
0
                TextureGetDifferenceSum<true>(src, srcStride, width, height, lo, loStride, hi, hiStride, sum);
200
0
            else
201
0
                TextureGetDifferenceSum<false>(src, srcStride, width, height, lo, loStride, hi, hiStride, sum);
202
0
        }
203
204
        //-----------------------------------------------------------------------------------------
205
206
        template <bool align> void TexturePerformCompensation(const uint8_t* src, size_t srcStride, size_t width, size_t height,
207
            int shift, uint8_t* dst, size_t dstStride)
208
0
        {
209
0
            assert(width >= A && shift > -0xFF && shift < 0xFF && shift != 0);
210
0
            if (align)
211
0
            {
212
0
                assert(Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride));
213
0
            }
214
215
0
            size_t alignedWidth = AlignLo(width, A);
216
0
            __m128i tailMask = src == dst ? ShiftLeft(K_INV_ZERO, A - width + alignedWidth) : K_INV_ZERO;
217
0
            if (shift > 0)
218
0
            {
219
0
                __m128i _shift = _mm_set1_epi8((char)shift);
220
0
                for (size_t row = 0; row < height; ++row)
221
0
                {
222
0
                    for (size_t col = 0; col < alignedWidth; col += A)
223
0
                    {
224
0
                        const __m128i _src = Load<align>((__m128i*) (src + col));
225
0
                        Store<align>((__m128i*) (dst + col), _mm_adds_epu8(_src, _shift));
226
0
                    }
227
0
                    if (width != alignedWidth)
228
0
                    {
229
0
                        const __m128i _src = Load<false>((__m128i*) (src + width - A));
230
0
                        Store<false>((__m128i*) (dst + width - A), _mm_adds_epu8(_src, _mm_and_si128(_shift, tailMask)));
231
0
                    }
232
0
                    src += srcStride;
233
0
                    dst += dstStride;
234
0
                }
235
0
            }
236
0
            if (shift < 0)
237
0
            {
238
0
                __m128i _shift = _mm_set1_epi8((char)-shift);
239
0
                for (size_t row = 0; row < height; ++row)
240
0
                {
241
0
                    for (size_t col = 0; col < alignedWidth; col += A)
242
0
                    {
243
0
                        const __m128i _src = Load<align>((__m128i*) (src + col));
244
0
                        Store<align>((__m128i*) (dst + col), _mm_subs_epu8(_src, _shift));
245
0
                    }
246
0
                    if (width != alignedWidth)
247
0
                    {
248
0
                        const __m128i _src = Load<false>((__m128i*) (src + width - A));
249
0
                        Store<false>((__m128i*) (dst + width - A), _mm_subs_epu8(_src, _mm_and_si128(_shift, tailMask)));
250
0
                    }
251
0
                    src += srcStride;
252
0
                    dst += dstStride;
253
0
                }
254
0
            }
255
0
        }
Unexecuted instantiation: void Simd::Sse41::TexturePerformCompensation<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, int, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::TexturePerformCompensation<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, int, unsigned char*, unsigned long)
256
257
        void TexturePerformCompensation(const uint8_t* src, size_t srcStride, size_t width, size_t height,
258
            int shift, uint8_t* dst, size_t dstStride)
259
0
        {
260
0
            if (shift == 0)
261
0
            {
262
0
                if (src != dst)
263
0
                    Base::Copy(src, srcStride, width, height, 1, dst, dstStride);
264
0
                return;
265
0
            }
266
0
            if (Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride))
267
0
                TexturePerformCompensation<true>(src, srcStride, width, height, shift, dst, dstStride);
268
0
            else
269
0
                TexturePerformCompensation<false>(src, srcStride, width, height, shift, dst, dstStride);
270
0
        }
271
    }
272
#endif
273
}