Coverage Report

Created: 2025-08-11 07:29

/src/Simd/src/Simd/SimdAvx512bwInterleave.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2022 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdMemory.h"
25
#include "Simd/SimdStore.h"
26
#include "Simd/SimdConversion.h"
27
#include "Simd/SimdInterleave.h"
28
#include "Simd/SimdUnpack.h"
29
30
namespace Simd
31
{
32
#ifdef SIMD_AVX512BW_ENABLE    
33
    namespace Avx512bw
34
    {
35
        template <bool align, bool mask> SIMD_INLINE void InterleaveUv(const uint8_t * u, const uint8_t * v, uint8_t * uv, const __mmask64 * tails)
36
0
        {
37
0
            __m512i _u = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, (Load<align, mask>(u, tails[2])));
38
0
            __m512i _v = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, (Load<align, mask>(v, tails[2])));
39
0
            Store<align, mask>(uv + 0, UnpackU8<0>(_u, _v), tails[0]);
40
0
            Store<align, mask>(uv + A, UnpackU8<1>(_u, _v), tails[1]);
41
0
        }
Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<true, false>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*)
Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<true, true>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*)
Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<false, false>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*)
Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<false, true>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*)
42
43
        template <bool align> SIMD_INLINE void InterleaveUv2(const uint8_t * u, const uint8_t * v, uint8_t * uv)
44
0
        {
45
0
            __m512i u0 = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, Load<align>(u + 0));
46
0
            __m512i v0 = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, Load<align>(v + 0));
47
0
            Store<align>(uv + 0 * A, UnpackU8<0>(u0, v0));
48
0
            Store<align>(uv + 1 * A, UnpackU8<1>(u0, v0));
49
0
            __m512i u1 = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, Load<align>(u + A));
50
0
            __m512i v1 = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, Load<align>(v + A));
51
0
            Store<align>(uv + 2 * A, UnpackU8<0>(u1, v1));
52
0
            Store<align>(uv + 3 * A, UnpackU8<1>(u1, v1));
53
0
        }
Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv2<true>(unsigned char const*, unsigned char const*, unsigned char*)
Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv2<false>(unsigned char const*, unsigned char const*, unsigned char*)
54
55
        template <bool align> void InterleaveUv(const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride,
56
            size_t width, size_t height, uint8_t * uv, size_t uvStride)
57
0
        {
58
0
            if (align)
59
0
                assert(Aligned(uv) && Aligned(uvStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride));
60
61
0
            size_t alignedWidth = AlignLo(width, A);
62
0
            size_t fullAlignedWidth = AlignLo(width, DA);
63
0
            __mmask64 tailMasks[3];
64
0
            for (size_t c = 0; c < 2; ++c)
65
0
                tailMasks[c] = TailMask64((width - alignedWidth) * 2 - A*c);
66
0
            tailMasks[2] = TailMask64(width - alignedWidth);
67
0
            for (size_t row = 0; row < height; ++row)
68
0
            {
69
0
                size_t col = 0;
70
0
                for (; col < fullAlignedWidth; col += DA)
71
0
                    InterleaveUv2<align>(u + col, v + col, uv + col * 2);
72
0
                for (; col < alignedWidth; col += A)
73
0
                    InterleaveUv<align, false>(u + col, v + col, uv + col * 2, tailMasks);
74
0
                if (col < width)
75
0
                    InterleaveUv<align, true>(u + col, v + col, uv + col * 2, tailMasks);
76
0
                uv += uvStride;
77
0
                u += uStride;
78
0
                v += vStride;
79
0
            }
80
0
        }
Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<true>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<false>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
81
82
        void InterleaveUv(const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride, size_t width, size_t height, uint8_t * uv, size_t uvStride)
83
0
        {
84
0
            if (Aligned(uv) && Aligned(uvStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride))
85
0
                InterleaveUv<true>(u, uStride, v, vStride, width, height, uv, uvStride);
86
0
            else
87
0
                InterleaveUv<false>(u, uStride, v, vStride, width, height, uv, uvStride);
88
0
        }
89
90
        template <bool align, bool mask> SIMD_INLINE void InterleaveBgr(const uint8_t * b, const uint8_t * g, const uint8_t * r, uint8_t * bgr, const __mmask64 * tails)
91
0
        {
92
0
            __m512i _b = Load<align, mask>(b, tails[3]);
93
0
            __m512i _g = Load<align, mask>(g, tails[3]);
94
0
            __m512i _r = Load<align, mask>(r, tails[3]);
95
0
            Store<align, mask>(bgr + 0 * A, InterleaveBgr<0>(_b, _g, _r), tails[0]);
96
0
            Store<align, mask>(bgr + 1 * A, InterleaveBgr<1>(_b, _g, _r), tails[1]);
97
0
            Store<align, mask>(bgr + 2 * A, InterleaveBgr<2>(_b, _g, _r), tails[2]);
98
0
        }
Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<true, false>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*)
Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<true, true>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*)
Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<false, false>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*)
Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<false, true>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*)
99
100
        template <bool align> void InterleaveBgr(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, size_t width, size_t height, uint8_t * bgr, size_t bgrStride)
101
0
        {
102
0
            if (align)
103
0
            {
104
0
                assert(Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride));
105
0
                assert(Aligned(r) && Aligned(rStride) && Aligned(bgr) && Aligned(bgrStride));
106
0
            }
107
108
0
            size_t alignedWidth = AlignLo(width, A);
109
0
            __mmask64 tailMasks[4];
110
0
            for (size_t c = 0; c < 3; ++c)
111
0
                tailMasks[c] = TailMask64((width - alignedWidth) * 3 - A*c);
112
0
            tailMasks[3] = TailMask64(width - alignedWidth);
113
0
            for (size_t row = 0; row < height; ++row)
114
0
            {
115
0
                size_t col = 0;
116
0
                for (; col < alignedWidth; col += A)
117
0
                    InterleaveBgr<align, false>(b + col, g + col, r + col, bgr + col * 3, tailMasks);
118
0
                if (col < width)
119
0
                    InterleaveBgr<align, true>(b + col, g + col, r + col, bgr + col * 3, tailMasks);
120
0
                b += bStride;
121
0
                g += gStride;
122
0
                r += rStride;
123
0
                bgr += bgrStride;
124
0
            }
125
0
        }
Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<true>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<false>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
126
127
        void InterleaveBgr(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, size_t width, size_t height, uint8_t * bgr, size_t bgrStride)
128
0
        {
129
0
            if (Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride)
130
0
                && Aligned(r) && Aligned(rStride) && Aligned(bgr) && Aligned(bgrStride))
131
0
                InterleaveBgr<true>(b, bStride, g, gStride, r, rStride, width, height, bgr, bgrStride);
132
0
            else
133
0
                InterleaveBgr<false>(b, bStride, g, gStride, r, rStride, width, height, bgr, bgrStride);
134
0
        }
135
136
        template <bool align, bool mask> SIMD_INLINE void InterleaveBgra(const uint8_t * b, const uint8_t * g, const uint8_t * r, const uint8_t * a, uint8_t * bgra, const __mmask64 * tails)
137
0
        {
138
0
            __m512i _b = _mm512_permutexvar_epi32(K32_PERMUTE_FOR_TWO_UNPACK, (Load<align, mask>(b, tails[4])));
139
0
            __m512i _g = _mm512_permutexvar_epi32(K32_PERMUTE_FOR_TWO_UNPACK, (Load<align, mask>(g, tails[4])));
140
0
            __m512i _r = _mm512_permutexvar_epi32(K32_PERMUTE_FOR_TWO_UNPACK, (Load<align, mask>(r, tails[4])));
141
0
            __m512i _a = _mm512_permutexvar_epi32(K32_PERMUTE_FOR_TWO_UNPACK, (Load<align, mask>(a, tails[4])));
142
0
            __m512i bg0 = UnpackU8<0>(_b, _g);
143
0
            __m512i bg1 = UnpackU8<1>(_b, _g);
144
0
            __m512i ra0 = UnpackU8<0>(_r, _a);
145
0
            __m512i ra1 = UnpackU8<1>(_r, _a);
146
0
            Store<align, mask>(bgra + 0 * A, UnpackU16<0>(bg0, ra0), tails[0]);
147
0
            Store<align, mask>(bgra + 1 * A, UnpackU16<1>(bg0, ra0), tails[1]);
148
0
            Store<align, mask>(bgra + 2 * A, UnpackU16<0>(bg1, ra1), tails[2]);
149
0
            Store<align, mask>(bgra + 3 * A, UnpackU16<1>(bg1, ra1), tails[3]);
150
0
        }
Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<true, false>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*)
Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<true, true>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*)
Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<false, false>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*)
Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<false, true>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*)
151
152
        template <bool align> void InterleaveBgra(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, const uint8_t * a, size_t aStride, size_t width, size_t height, uint8_t * bgra, size_t bgraStride)
153
0
        {
154
0
            if (align)
155
0
            {
156
0
                assert(Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride) && Aligned(r) && Aligned(rStride));
157
0
                assert(Aligned(a) && Aligned(aStride) && Aligned(bgra) && Aligned(bgraStride));
158
0
            }
159
160
0
            size_t alignedWidth = AlignLo(width, A);
161
0
            __mmask64 tailMasks[5];
162
0
            for (size_t c = 0; c < 4; ++c)
163
0
                tailMasks[c] = TailMask64((width - alignedWidth) * 4 - A*c);
164
0
            tailMasks[4] = TailMask64(width - alignedWidth);
165
0
            for (size_t row = 0; row < height; ++row)
166
0
            {
167
0
                size_t col = 0;
168
0
                for (; col < alignedWidth; col += A)
169
0
                    InterleaveBgra<align, false>(b + col, g + col, r + col, a + col, bgra + col * 4, tailMasks);
170
0
                if (col < width)
171
0
                    InterleaveBgra<align, true>(b + col, g + col, r + col, a + col, bgra + col * 4, tailMasks);
172
0
                b += bStride;
173
0
                g += gStride;
174
0
                r += rStride;
175
0
                a += aStride;
176
0
                bgra += bgraStride;
177
0
            }
178
0
        }
Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<true>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<false>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
179
180
        void InterleaveBgra(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, const uint8_t * a, size_t aStride, size_t width, size_t height, uint8_t * bgra, size_t bgraStride)
181
0
        {
182
0
            if (Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride) && Aligned(r) && Aligned(rStride) &&
183
0
                Aligned(a) && Aligned(aStride) && Aligned(bgra) && Aligned(bgraStride))
184
0
                InterleaveBgra<true>(b, bStride, g, gStride, r, rStride, a, aStride, width, height, bgra, bgraStride);
185
0
            else
186
0
                InterleaveBgra<false>(b, bStride, g, gStride, r, rStride, a, aStride, width, height, bgra, bgraStride);
187
0
        }
188
    }
189
#endif// SIMD_AVX512BW_ENABLE
190
}