Coverage Report

Created: 2025-12-10 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/Simd/src/Simd/SimdAvx512bwTexture.cpp
Line
Count
Source
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2025 Yermalayeu Ihar,
5
*               2025-2025 Ger Hobbelt.
6
*
7
* Permission is hereby granted, free of charge, to any person obtaining a copy
8
* of this software and associated documentation files (the "Software"), to deal
9
* in the Software without restriction, including without limitation the rights
10
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
* copies of the Software, and to permit persons to whom the Software is
12
* furnished to do so, subject to the following conditions:
13
*
14
* The above copyright notice and this permission notice shall be included in
15
* all copies or substantial portions of the Software.
16
*
17
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
* SOFTWARE.
24
*/
25
#include "Simd/SimdMemory.h"
26
#include "Simd/SimdStore.h"
27
#include "Simd/SimdSet.h"
28
#include "Simd/SimdExtract.h"
29
#include "Simd/SimdBase.h"
30
#include "Simd/SimdUnpack.h"
31
32
namespace Simd
33
{
34
#ifdef SIMD_AVX512BW_ENABLE    
35
    namespace Avx512bw
36
    {
37
        SIMD_INLINE __m512i TextureBoostedSaturatedGradient16(const __m512i & difference, const __m512i & saturation, const __m512i & boost)
38
0
        {
39
0
            return _mm512_mullo_epi16(_mm512_max_epi16(K_ZERO, _mm512_add_epi16(saturation, _mm512_min_epi16(difference, saturation))), boost);
40
0
        }
41
42
        SIMD_INLINE __m512i TextureBoostedSaturatedGradient8(const __m512i & a, const __m512i & b, const __m512i & saturation, const __m512i & boost)
43
0
        {
44
0
            __m512i lo = TextureBoostedSaturatedGradient16(SubUnpackedU8<0>(b, a), saturation, boost);
45
0
            __m512i hi = TextureBoostedSaturatedGradient16(SubUnpackedU8<1>(b, a), saturation, boost);
46
0
            return _mm512_packus_epi16(lo, hi);
47
0
        }
48
49
        template<bool align, bool mask> SIMD_INLINE void TextureBoostedSaturatedGradient(const uint8_t * src, uint8_t * dx, uint8_t * dy,
50
            size_t stride, const __m512i & saturation, const __m512i & boost, __mmask64 tail = -1)
51
0
        {
52
0
            const __m512i s10 = Load<false, mask>(src - 1, tail);
53
0
            const __m512i s12 = Load<false, mask>(src + 1, tail);
54
0
            const __m512i s01 = Load<align, mask>(src - stride, tail);
55
0
            const __m512i s21 = Load<align, mask>(src + stride, tail);
56
0
            Store<align, mask>(dx, TextureBoostedSaturatedGradient8(s10, s12, saturation, boost), tail);
57
0
            Store<align, mask>(dy, TextureBoostedSaturatedGradient8(s01, s21, saturation, boost), tail);
58
0
        }
Unexecuted instantiation: void Simd::Avx512bw::TextureBoostedSaturatedGradient<true, false>(unsigned char const*, unsigned char*, unsigned char*, unsigned long, long long __vector(8) const&, long long __vector(8) const&, unsigned long long)
Unexecuted instantiation: void Simd::Avx512bw::TextureBoostedSaturatedGradient<false, true>(unsigned char const*, unsigned char*, unsigned char*, unsigned long, long long __vector(8) const&, long long __vector(8) const&, unsigned long long)
Unexecuted instantiation: void Simd::Avx512bw::TextureBoostedSaturatedGradient<false, false>(unsigned char const*, unsigned char*, unsigned char*, unsigned long, long long __vector(8) const&, long long __vector(8) const&, unsigned long long)
59
60
        template<bool align> void TextureBoostedSaturatedGradient(const uint8_t * src, size_t srcStride, size_t width, size_t height,
61
            uint8_t saturation, uint8_t boost, uint8_t * dx, size_t dxStride, uint8_t * dy, size_t dyStride)
62
0
        {
63
0
            assert(int(2)*saturation*boost <= 0xFF);
64
0
            if (align)
65
0
                assert(Aligned(src) && Aligned(srcStride) && Aligned(dx) && Aligned(dxStride) && Aligned(dy) && Aligned(dyStride));
66
67
0
            size_t alignedWidth = AlignLo(width, A);
68
0
            __mmask64 tailMask = TailMask64(width - alignedWidth);
69
0
            __m512i _saturation = _mm512_set1_epi16(saturation);
70
0
            __m512i _boost = _mm512_set1_epi16(boost);
71
72
0
            memset(dx, 0, width);
73
0
            memset(dy, 0, width);
74
0
            src += srcStride;
75
0
            dx += dxStride;
76
0
            dy += dyStride;
77
0
            for (size_t row = 2; row < height; ++row)
78
0
            {
79
0
                size_t col = 0;
80
0
                for (; col < alignedWidth; col += A)
81
0
                    TextureBoostedSaturatedGradient<align, false>(src + col, dx + col, dy + col, srcStride, _saturation, _boost);
82
0
                if (col < width)
83
0
                    TextureBoostedSaturatedGradient<false, true>(src + col, dx + col, dy + col, srcStride, _saturation, _boost, tailMask);
84
85
0
                dx[0] = 0;
86
0
                dy[0] = 0;
87
0
                dx[width - 1] = 0;
88
0
                dy[width - 1] = 0;
89
90
0
                src += srcStride;
91
0
                dx += dxStride;
92
0
                dy += dyStride;
93
0
            }
94
0
            memset(dx, 0, width);
95
0
            memset(dy, 0, width);
96
0
        }
Unexecuted instantiation: void Simd::Avx512bw::TextureBoostedSaturatedGradient<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char, unsigned char, unsigned char*, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Avx512bw::TextureBoostedSaturatedGradient<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char, unsigned char, unsigned char*, unsigned long, unsigned char*, unsigned long)
97
98
        void TextureBoostedSaturatedGradient(const uint8_t * src, size_t srcStride, size_t width, size_t height,
99
            uint8_t saturation, uint8_t boost, uint8_t * dx, size_t dxStride, uint8_t * dy, size_t dyStride)
100
0
        {
101
0
            if (Aligned(src) && Aligned(srcStride) && Aligned(dx) && Aligned(dxStride) && Aligned(dy) && Aligned(dyStride))
102
0
                TextureBoostedSaturatedGradient<true>(src, srcStride, width, height, saturation, boost, dx, dxStride, dy, dyStride);
103
0
            else
104
0
                TextureBoostedSaturatedGradient<false>(src, srcStride, width, height, saturation, boost, dx, dxStride, dy, dyStride);
105
0
        }
106
107
        template<bool align, bool mask> SIMD_INLINE void TextureBoostedUv(const uint8_t * src, uint8_t * dst,
108
            const __m512i & min8, const __m512i & max8, const __m512i & boost16, __mmask64 tail = -1)
109
0
        {
110
0
            const __m512i _src = Load<align, mask>(src, tail);
111
0
            const __m512i saturated = _mm512_sub_epi8(_mm512_max_epu8(min8, _mm512_min_epu8(max8, _src)), min8);
112
0
            const __m512i lo = _mm512_mullo_epi16(_mm512_unpacklo_epi8(saturated, K_ZERO), boost16);
113
0
            const __m512i hi = _mm512_mullo_epi16(_mm512_unpackhi_epi8(saturated, K_ZERO), boost16);
114
0
            Store<align, mask>(dst, _mm512_packus_epi16(lo, hi), tail);
115
0
        }
Unexecuted instantiation: void Simd::Avx512bw::TextureBoostedUv<true, false>(unsigned char const*, unsigned char*, long long __vector(8) const&, long long __vector(8) const&, long long __vector(8) const&, unsigned long long)
Unexecuted instantiation: void Simd::Avx512bw::TextureBoostedUv<false, true>(unsigned char const*, unsigned char*, long long __vector(8) const&, long long __vector(8) const&, long long __vector(8) const&, unsigned long long)
Unexecuted instantiation: void Simd::Avx512bw::TextureBoostedUv<false, false>(unsigned char const*, unsigned char*, long long __vector(8) const&, long long __vector(8) const&, long long __vector(8) const&, unsigned long long)
116
117
        template<bool align> void TextureBoostedUv(const uint8_t * src, size_t srcStride, size_t width, size_t height,
118
            uint8_t boost, uint8_t * dst, size_t dstStride)
119
0
        {
120
0
            assert(boost < 0x80);
121
0
            if (align)
122
0
                assert(Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride));
123
124
0
            size_t alignedWidth = AlignLo(width, A);
125
0
            __mmask64 tailMask = TailMask64(width - alignedWidth);
126
0
            int min = 128 - (128 / boost);
127
0
            int max = 255 - min;
128
0
            __m512i min8 = _mm512_set1_epi8(min);
129
0
            __m512i max8 = _mm512_set1_epi8(max);
130
0
            __m512i boost16 = _mm512_set1_epi16(boost);
131
0
            for (size_t row = 0; row < height; ++row)
132
0
            {
133
0
                size_t col = 0;
134
0
                for (; col < alignedWidth; col += A)
135
0
                    TextureBoostedUv<align, false>(src + col, dst + col, min8, max8, boost16);
136
0
                if (col < width)
137
0
                    TextureBoostedUv<false, true>(src + col, dst + col, min8, max8, boost16, tailMask);
138
0
                src += srcStride;
139
0
                dst += dstStride;
140
0
            }
141
0
        }
Unexecuted instantiation: void Simd::Avx512bw::TextureBoostedUv<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Avx512bw::TextureBoostedUv<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char, unsigned char*, unsigned long)
142
143
        void TextureBoostedUv(const uint8_t * src, size_t srcStride, size_t width, size_t height,
144
            uint8_t boost, uint8_t * dst, size_t dstStride)
145
0
        {
146
0
            if (Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride))
147
0
                TextureBoostedUv<true>(src, srcStride, width, height, boost, dst, dstStride);
148
0
            else
149
0
                TextureBoostedUv<false>(src, srcStride, width, height, boost, dst, dstStride);
150
0
        }
151
152
        SIMD_INLINE void TextureGetDifferenceSum(const __m512i & current, const __m512i & average, __m512i & positive, __m512i & negative)
153
0
        {
154
0
            positive = _mm512_add_epi64(positive, _mm512_sad_epu8(_mm512_subs_epu8(current, average), K_ZERO));
155
0
            negative = _mm512_add_epi64(negative, _mm512_sad_epu8(_mm512_subs_epu8(average, current), K_ZERO));
156
0
        }
157
158
        template <bool align, bool mask> SIMD_INLINE void TextureGetDifferenceSum(const uint8_t * src, const uint8_t * lo, const uint8_t * hi,
159
            __m512i & positive, __m512i & negative, __mmask64 tail = -1)
160
0
        {
161
0
            const __m512i current = Load<align, mask>(src, tail);
162
0
            const __m512i _lo = Load<align, mask>(lo, tail);
163
0
            const __m512i _hi = Load<align, mask>(hi, tail);
164
0
            const __m512i average = _mm512_avg_epu8(_lo, _hi);
165
0
            TextureGetDifferenceSum(current, average, positive, negative);
166
0
        }
Unexecuted instantiation: void Simd::Avx512bw::TextureGetDifferenceSum<true, false>(unsigned char const*, unsigned char const*, unsigned char const*, long long __vector(8)&, long long __vector(8)&, unsigned long long)
Unexecuted instantiation: void Simd::Avx512bw::TextureGetDifferenceSum<true, true>(unsigned char const*, unsigned char const*, unsigned char const*, long long __vector(8)&, long long __vector(8)&, unsigned long long)
Unexecuted instantiation: void Simd::Avx512bw::TextureGetDifferenceSum<false, false>(unsigned char const*, unsigned char const*, unsigned char const*, long long __vector(8)&, long long __vector(8)&, unsigned long long)
Unexecuted instantiation: void Simd::Avx512bw::TextureGetDifferenceSum<false, true>(unsigned char const*, unsigned char const*, unsigned char const*, long long __vector(8)&, long long __vector(8)&, unsigned long long)
167
168
        template <bool align> SIMD_INLINE void TextureGetDifferenceSum4(const uint8_t * src, const uint8_t * lo, const uint8_t * hi, __m512i & positive, __m512i & negative)
169
0
        {
170
0
            TextureGetDifferenceSum(Load<align>(src + 0 * A), _mm512_avg_epu8(Load<align>(hi + 0 * A), Load<align>(lo + 0 * A)), positive, negative);
171
0
            TextureGetDifferenceSum(Load<align>(src + 1 * A), _mm512_avg_epu8(Load<align>(hi + 1 * A), Load<align>(lo + 1 * A)), positive, negative);
172
0
            TextureGetDifferenceSum(Load<align>(src + 2 * A), _mm512_avg_epu8(Load<align>(hi + 2 * A), Load<align>(lo + 2 * A)), positive, negative);
173
0
            TextureGetDifferenceSum(Load<align>(src + 3 * A), _mm512_avg_epu8(Load<align>(hi + 3 * A), Load<align>(lo + 3 * A)), positive, negative);
174
0
        }
Unexecuted instantiation: void Simd::Avx512bw::TextureGetDifferenceSum4<true>(unsigned char const*, unsigned char const*, unsigned char const*, long long __vector(8)&, long long __vector(8)&)
Unexecuted instantiation: void Simd::Avx512bw::TextureGetDifferenceSum4<false>(unsigned char const*, unsigned char const*, unsigned char const*, long long __vector(8)&, long long __vector(8)&)
175
176
        template <bool align> void TextureGetDifferenceSum(const uint8_t * src, size_t srcStride, size_t width, size_t height,
177
            const uint8_t * lo, size_t loStride, const uint8_t * hi, size_t hiStride, int64_t * sum)
178
0
        {
179
0
            assert(sum != nullptr);
180
0
            if (align)
181
0
                assert(Aligned(src) && Aligned(srcStride) && Aligned(lo) && Aligned(loStride) && Aligned(hi) && Aligned(hiStride));
182
183
0
            size_t alignedWidth = AlignLo(width, A);
184
0
            size_t fullAlignedWidth = AlignLo(width, QA);
185
0
            __mmask64 tailMask = TailMask64(width - alignedWidth);
186
0
            __m512i positive = _mm512_setzero_si512();
187
0
            __m512i negative = _mm512_setzero_si512();
188
0
            for (size_t row = 0; row < height; ++row)
189
0
            {
190
0
                size_t col = 0;
191
0
                for (; col < fullAlignedWidth; col += QA)
192
0
                    TextureGetDifferenceSum4<align>(src + col, lo + col, hi + col, positive, negative);
193
0
                for (; col < alignedWidth; col += A)
194
0
                    TextureGetDifferenceSum<align, false>(src + col, lo + col, hi + col, positive, negative);
195
0
                if (col < width)
196
0
                    TextureGetDifferenceSum<align, true>(src + col, lo + col, hi + col, positive, negative, tailMask);
197
0
                src += srcStride;
198
0
                lo += loStride;
199
0
                hi += hiStride;
200
0
            }
201
0
            *sum = ExtractSum<int64_t>(positive) - ExtractSum<int64_t>(negative);
202
0
        }
Unexecuted instantiation: void Simd::Avx512bw::TextureGetDifferenceSum<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, long*)
Unexecuted instantiation: void Simd::Avx512bw::TextureGetDifferenceSum<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, long*)
203
204
        void TextureGetDifferenceSum(const uint8_t * src, size_t srcStride, size_t width, size_t height,
205
            const uint8_t * lo, size_t loStride, const uint8_t * hi, size_t hiStride, int64_t * sum)
206
0
        {
207
0
            if (Aligned(src) && Aligned(srcStride) && Aligned(lo) && Aligned(loStride) && Aligned(hi) && Aligned(hiStride))
208
0
                TextureGetDifferenceSum<true>(src, srcStride, width, height, lo, loStride, hi, hiStride, sum);
209
0
            else
210
0
                TextureGetDifferenceSum<false>(src, srcStride, width, height, lo, loStride, hi, hiStride, sum);
211
0
        }
212
213
        template <bool align> void TexturePerformCompensation(const uint8_t * src, size_t srcStride, size_t width, size_t height, int shift, uint8_t * dst, size_t dstStride)
214
0
        {
215
0
            assert(shift > -0xFF && shift < 0xFF && shift != 0);
216
0
            if (align)
217
0
                assert(Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride));
218
219
0
            size_t alignedWidth = AlignLo(width, A);
220
0
            size_t fullAlignedWidth = AlignLo(width, QA);
221
0
            __mmask64 tailMask = TailMask64(width - alignedWidth);
222
0
            if (shift > 0)
223
0
            {
224
0
                __m512i _shift = _mm512_set1_epi8((char)shift);
225
0
                for (size_t row = 0; row < height; ++row)
226
0
                {
227
0
                    size_t col = 0;
228
0
                    for (; col < fullAlignedWidth; col += QA)
229
0
                    {
230
0
                        Store<align>(dst + col + 0 * A, _mm512_adds_epu8(Load<align>(src + col + 0 * A), _shift));
231
0
                        Store<align>(dst + col + 1 * A, _mm512_adds_epu8(Load<align>(src + col + 1 * A), _shift));
232
0
                        Store<align>(dst + col + 2 * A, _mm512_adds_epu8(Load<align>(src + col + 2 * A), _shift));
233
0
                        Store<align>(dst + col + 3 * A, _mm512_adds_epu8(Load<align>(src + col + 3 * A), _shift));
234
0
                    }
235
0
                    for (; col < alignedWidth; col += A)
236
0
                        Store<align>(dst + col, _mm512_adds_epu8(Load<align>(src + col), _shift));
237
0
                    if (col < width)
238
0
                        Store<align, true>(dst + col, _mm512_adds_epu8((Load<align, true>(src + col, tailMask)), _shift), tailMask);
239
0
                    src += srcStride;
240
0
                    dst += dstStride;
241
0
                }
242
0
            }
243
0
            if (shift < 0)
244
0
            {
245
0
                __m512i _shift = _mm512_set1_epi8((char)-shift);
246
0
                for (size_t row = 0; row < height; ++row)
247
0
                {
248
0
                    size_t col = 0;
249
0
                    for (; col < fullAlignedWidth; col += QA)
250
0
                    {
251
0
                        Store<align>(dst + col + 0 * A, _mm512_subs_epu8(Load<align>(src + col + 0 * A), _shift));
252
0
                        Store<align>(dst + col + 1 * A, _mm512_subs_epu8(Load<align>(src + col + 1 * A), _shift));
253
0
                        Store<align>(dst + col + 2 * A, _mm512_subs_epu8(Load<align>(src + col + 2 * A), _shift));
254
0
                        Store<align>(dst + col + 3 * A, _mm512_subs_epu8(Load<align>(src + col + 3 * A), _shift));
255
0
                    }
256
0
                    for (; col < alignedWidth; col += A)
257
0
                        Store<align>(dst + col, _mm512_subs_epu8(Load<align>(src + col), _shift));
258
0
                    if (col < width)
259
0
                        Store<align, true>(dst + col, _mm512_subs_epu8((Load<align, true>(src + col, tailMask)), _shift), tailMask);
260
0
                    src += srcStride;
261
0
                    dst += dstStride;
262
0
                }
263
0
            }
264
0
        }
Unexecuted instantiation: void Simd::Avx512bw::TexturePerformCompensation<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, int, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Avx512bw::TexturePerformCompensation<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, int, unsigned char*, unsigned long)
265
266
        void TexturePerformCompensation(const uint8_t * src, size_t srcStride, size_t width, size_t height,
267
            int shift, uint8_t * dst, size_t dstStride)
268
0
        {
269
0
            if (shift == 0)
270
0
            {
271
0
                if (src != dst)
272
0
                    Base::Copy(src, srcStride, width, height, 1, dst, dstStride);
273
0
                return;
274
0
            }
275
0
            if (Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride))
276
0
                TexturePerformCompensation<true>(src, srcStride, width, height, shift, dst, dstStride);
277
0
            else
278
0
                TexturePerformCompensation<false>(src, srcStride, width, height, shift, dst, dstStride);
279
0
        }
280
    }
281
#endif// SIMD_AVX512BW_ENABLE
282
}