/src/Simd/src/Simd/SimdAvx512bwInterleave.cpp

Source (jump to first uncovered line)
/*
* Simd Library (http://ermig1979.github.io/Simd).
*
* Copyright (c) 2011-2022 Yermalayeu Ihar.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "Simd/SimdMemory.h"
#include "Simd/SimdStore.h"
#include "Simd/SimdConversion.h"
#include "Simd/SimdInterleave.h"
#include "Simd/SimdUnpack.h"

namespace Simd
{
#ifdef SIMD_AVX512BW_ENABLE    
    namespace Avx512bw
    {
        template <bool align, bool mask> SIMD_INLINE void InterleaveUv(const uint8_t * u, const uint8_t * v, uint8_t * uv, const __mmask64 * tails)
        {
            __m512i _u = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, (Load<align, mask>(u, tails[2])));
            __m512i _v = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, (Load<align, mask>(v, tails[2])));
            Store<align, mask>(uv + 0, UnpackU8<0>(_u, _v), tails[0]);
            Store<align, mask>(uv + A, UnpackU8<1>(_u, _v), tails[1]);
        }

        template <bool align> SIMD_INLINE void InterleaveUv2(const uint8_t * u, const uint8_t * v, uint8_t * uv)
        {
            __m512i u0 = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, Load<align>(u + 0));
            __m512i v0 = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, Load<align>(v + 0));
            Store<align>(uv + 0 * A, UnpackU8<0>(u0, v0));
            Store<align>(uv + 1 * A, UnpackU8<1>(u0, v0));
            __m512i u1 = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, Load<align>(u + A));
            __m512i v1 = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, Load<align>(v + A));
            Store<align>(uv + 2 * A, UnpackU8<0>(u1, v1));
            Store<align>(uv + 3 * A, UnpackU8<1>(u1, v1));
        }

        template <bool align> void InterleaveUv(const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride,
            size_t width, size_t height, uint8_t * uv, size_t uvStride)
        {
            if (align)
                assert(Aligned(uv) && Aligned(uvStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride));

            size_t alignedWidth = AlignLo(width, A);
            size_t fullAlignedWidth = AlignLo(width, DA);
            __mmask64 tailMasks[3];
            for (size_t c = 0; c < 2; ++c)
                tailMasks[c] = TailMask64((width - alignedWidth) * 2 - A*c);
            tailMasks[2] = TailMask64(width - alignedWidth);
            for (size_t row = 0; row < height; ++row)
            {
                size_t col = 0;
                for (; col < fullAlignedWidth; col += DA)
                    InterleaveUv2<align>(u + col, v + col, uv + col * 2);
                for (; col < alignedWidth; col += A)
                    InterleaveUv<align, false>(u + col, v + col, uv + col * 2, tailMasks);
                if (col < width)
                    InterleaveUv<align, true>(u + col, v + col, uv + col * 2, tailMasks);
                uv += uvStride;
                u += uStride;
                v += vStride;
            }
        }

        void InterleaveUv(const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride, size_t width, size_t height, uint8_t * uv, size_t uvStride)
        {
            if (Aligned(uv) && Aligned(uvStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride))
                InterleaveUv<true>(u, uStride, v, vStride, width, height, uv, uvStride);
            else
                InterleaveUv<false>(u, uStride, v, vStride, width, height, uv, uvStride);
        }

        template <bool align, bool mask> SIMD_INLINE void InterleaveBgr(const uint8_t * b, const uint8_t * g, const uint8_t * r, uint8_t * bgr, const __mmask64 * tails)
        {
            __m512i _b = Load<align, mask>(b, tails[3]);
            __m512i _g = Load<align, mask>(g, tails[3]);
            __m512i _r = Load<align, mask>(r, tails[3]);
            Store<align, mask>(bgr + 0 * A, InterleaveBgr<0>(_b, _g, _r), tails[0]);
            Store<align, mask>(bgr + 1 * A, InterleaveBgr<1>(_b, _g, _r), tails[1]);
            Store<align, mask>(bgr + 2 * A, InterleaveBgr<2>(_b, _g, _r), tails[2]);
        }

        template <bool align> void InterleaveBgr(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, size_t width, size_t height, uint8_t * bgr, size_t bgrStride)
        {
            if (align)
            {
                assert(Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride));
                assert(Aligned(r) && Aligned(rStride) && Aligned(bgr) && Aligned(bgrStride));
            }

            size_t alignedWidth = AlignLo(width, A);
            __mmask64 tailMasks[4];
            for (size_t c = 0; c < 3; ++c)
                tailMasks[c] = TailMask64((width - alignedWidth) * 3 - A*c);
            tailMasks[3] = TailMask64(width - alignedWidth);
            for (size_t row = 0; row < height; ++row)
            {
                size_t col = 0;
                for (; col < alignedWidth; col += A)
                    InterleaveBgr<align, false>(b + col, g + col, r + col, bgr + col * 3, tailMasks);
                if (col < width)
                    InterleaveBgr<align, true>(b + col, g + col, r + col, bgr + col * 3, tailMasks);
                b += bStride;
                g += gStride;
                r += rStride;
                bgr += bgrStride;
            }
        }

        void InterleaveBgr(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, size_t width, size_t height, uint8_t * bgr, size_t bgrStride)
        {
            if (Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride)
                && Aligned(r) && Aligned(rStride) && Aligned(bgr) && Aligned(bgrStride))
                InterleaveBgr<true>(b, bStride, g, gStride, r, rStride, width, height, bgr, bgrStride);
            else
                InterleaveBgr<false>(b, bStride, g, gStride, r, rStride, width, height, bgr, bgrStride);
        }

        template <bool align, bool mask> SIMD_INLINE void InterleaveBgra(const uint8_t * b, const uint8_t * g, const uint8_t * r, const uint8_t * a, uint8_t * bgra, const __mmask64 * tails)
        {
            __m512i _b = _mm512_permutexvar_epi32(K32_PERMUTE_FOR_TWO_UNPACK, (Load<align, mask>(b, tails[4])));
            __m512i _g = _mm512_permutexvar_epi32(K32_PERMUTE_FOR_TWO_UNPACK, (Load<align, mask>(g, tails[4])));
            __m512i _r = _mm512_permutexvar_epi32(K32_PERMUTE_FOR_TWO_UNPACK, (Load<align, mask>(r, tails[4])));
            __m512i _a = _mm512_permutexvar_epi32(K32_PERMUTE_FOR_TWO_UNPACK, (Load<align, mask>(a, tails[4])));
            __m512i bg0 = UnpackU8<0>(_b, _g);
            __m512i bg1 = UnpackU8<1>(_b, _g);
            __m512i ra0 = UnpackU8<0>(_r, _a);
            __m512i ra1 = UnpackU8<1>(_r, _a);
            Store<align, mask>(bgra + 0 * A, UnpackU16<0>(bg0, ra0), tails[0]);
            Store<align, mask>(bgra + 1 * A, UnpackU16<1>(bg0, ra0), tails[1]);
            Store<align, mask>(bgra + 2 * A, UnpackU16<0>(bg1, ra1), tails[2]);
            Store<align, mask>(bgra + 3 * A, UnpackU16<1>(bg1, ra1), tails[3]);
        }

        template <bool align> void InterleaveBgra(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, const uint8_t * a, size_t aStride, size_t width, size_t height, uint8_t * bgra, size_t bgraStride)
        {
            if (align)
            {
                assert(Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride) && Aligned(r) && Aligned(rStride));
                assert(Aligned(a) && Aligned(aStride) && Aligned(bgra) && Aligned(bgraStride));
            }

            size_t alignedWidth = AlignLo(width, A);
            __mmask64 tailMasks[5];
            for (size_t c = 0; c < 4; ++c)
                tailMasks[c] = TailMask64((width - alignedWidth) * 4 - A*c);
            tailMasks[4] = TailMask64(width - alignedWidth);
            for (size_t row = 0; row < height; ++row)
            {
                size_t col = 0;
                for (; col < alignedWidth; col += A)
                    InterleaveBgra<align, false>(b + col, g + col, r + col, a + col, bgra + col * 4, tailMasks);
                if (col < width)
                    InterleaveBgra<align, true>(b + col, g + col, r + col, a + col, bgra + col * 4, tailMasks);
                b += bStride;
                g += gStride;
                r += rStride;
                a += aStride;
                bgra += bgraStride;
            }
        }

        void InterleaveBgra(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, const uint8_t * a, size_t aStride, size_t width, size_t height, uint8_t * bgra, size_t bgraStride)
        {
            if (Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride) && Aligned(r) && Aligned(rStride) &&
                Aligned(a) && Aligned(aStride) && Aligned(bgra) && Aligned(bgraStride))
                InterleaveBgra<true>(b, bStride, g, gStride, r, rStride, a, aStride, width, height, bgra, bgraStride);
            else
                InterleaveBgra<false>(b, bStride, g, gStride, r, rStride, a, aStride, width, height, bgra, bgraStride);
        }
    }
#endif// SIMD_AVX512BW_ENABLE
}

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Simd Library (http://ermig1979.github.io/Simd).
3		*
4		* Copyright (c) 2011-2022 Yermalayeu Ihar.
5		*
6		* Permission is hereby granted, free of charge, to any person obtaining a copy
7		* of this software and associated documentation files (the "Software"), to deal
8		* in the Software without restriction, including without limitation the rights
9		* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10		* copies of the Software, and to permit persons to whom the Software is
11		* furnished to do so, subject to the following conditions:
12		*
13		* The above copyright notice and this permission notice shall be included in
14		* all copies or substantial portions of the Software.
15		*
16		* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17		* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18		* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19		* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20		* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21		* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22		* SOFTWARE.
23		*/
24		#include "Simd/SimdMemory.h"
25		#include "Simd/SimdStore.h"
26		#include "Simd/SimdConversion.h"
27		#include "Simd/SimdInterleave.h"
28		#include "Simd/SimdUnpack.h"
29
30		namespace Simd
31		{
32		#ifdef SIMD_AVX512BW_ENABLE
33		namespace Avx512bw
34		{
35		template <bool align, bool mask> SIMD_INLINE void InterleaveUv(const uint8_t * u, const uint8_t * v, uint8_t * uv, const __mmask64 * tails)
36	0	{
37	0	__m512i _u = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, (Load<align, mask>(u, tails[2])));
38	0	__m512i _v = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, (Load<align, mask>(v, tails[2])));
39	0	Store<align, mask>(uv + 0, UnpackU8<0>(_u, _v), tails[0]);
40	0	Store<align, mask>(uv + A, UnpackU8<1>(_u, _v), tails[1]);
41	0	} Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<true, false>(unsigned char const, unsigned char const, unsigned char, unsigned long long const) Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<true, true>(unsigned char const, unsigned char const, unsigned char, unsigned long long const) Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<false, false>(unsigned char const, unsigned char const, unsigned char, unsigned long long const) Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<false, true>(unsigned char const, unsigned char const, unsigned char, unsigned long long const)
42
43		template <bool align> SIMD_INLINE void InterleaveUv2(const uint8_t * u, const uint8_t * v, uint8_t * uv)
44	0	{
45	0	__m512i u0 = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, Load<align>(u + 0));
46	0	__m512i v0 = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, Load<align>(v + 0));
47	0	Store<align>(uv + 0 * A, UnpackU8<0>(u0, v0));
48	0	Store<align>(uv + 1 * A, UnpackU8<1>(u0, v0));
49	0	__m512i u1 = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, Load<align>(u + A));
50	0	__m512i v1 = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, Load<align>(v + A));
51	0	Store<align>(uv + 2 * A, UnpackU8<0>(u1, v1));
52	0	Store<align>(uv + 3 * A, UnpackU8<1>(u1, v1));
53	0	} Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv2<true>(unsigned char const, unsigned char const, unsigned char) Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv2<false>(unsigned char const, unsigned char const, unsigned char)
54
55		template <bool align> void InterleaveUv(const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride,
56		size_t width, size_t height, uint8_t * uv, size_t uvStride)
57	0	{
58	0	if (align)
59	0	assert(Aligned(uv) && Aligned(uvStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride));
60
61	0	size_t alignedWidth = AlignLo(width, A);
62	0	size_t fullAlignedWidth = AlignLo(width, DA);
63	0	__mmask64 tailMasks[3];
64	0	for (size_t c = 0; c < 2; ++c)
65	0	tailMasks[c] = TailMask64((width - alignedWidth) * 2 - A*c);
66	0	tailMasks[2] = TailMask64(width - alignedWidth);
67	0	for (size_t row = 0; row < height; ++row)
68	0	{
69	0	size_t col = 0;
70	0	for (; col < fullAlignedWidth; col += DA)
71	0	InterleaveUv2<align>(u + col, v + col, uv + col * 2);
72	0	for (; col < alignedWidth; col += A)
73	0	InterleaveUv<align, false>(u + col, v + col, uv + col * 2, tailMasks);
74	0	if (col < width)
75	0	InterleaveUv<align, true>(u + col, v + col, uv + col * 2, tailMasks);
76	0	uv += uvStride;
77	0	u += uStride;
78	0	v += vStride;
79	0	}
80	0	} Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<true>(unsigned char const, unsigned long, unsigned char const, unsigned long, unsigned long, unsigned long, unsigned char, unsigned long) Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<false>(unsigned char const, unsigned long, unsigned char const, unsigned long, unsigned long, unsigned long, unsigned char, unsigned long)
81
82		void InterleaveUv(const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride, size_t width, size_t height, uint8_t * uv, size_t uvStride)
83	0	{
84	0	if (Aligned(uv) && Aligned(uvStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride))
85	0	InterleaveUv<true>(u, uStride, v, vStride, width, height, uv, uvStride);
86	0	else
87	0	InterleaveUv<false>(u, uStride, v, vStride, width, height, uv, uvStride);
88	0	}
89
90		template <bool align, bool mask> SIMD_INLINE void InterleaveBgr(const uint8_t * b, const uint8_t * g, const uint8_t * r, uint8_t * bgr, const __mmask64 * tails)
91	0	{
92	0	__m512i _b = Load<align, mask>(b, tails[3]);
93	0	__m512i _g = Load<align, mask>(g, tails[3]);
94	0	__m512i _r = Load<align, mask>(r, tails[3]);
95	0	Store<align, mask>(bgr + 0 * A, InterleaveBgr<0>(_b, _g, _r), tails[0]);
96	0	Store<align, mask>(bgr + 1 * A, InterleaveBgr<1>(_b, _g, _r), tails[1]);
97	0	Store<align, mask>(bgr + 2 * A, InterleaveBgr<2>(_b, _g, _r), tails[2]);
98	0	} Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<true, false>(unsigned char const, unsigned char const, unsigned char const, unsigned char, unsigned long long const) Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<true, true>(unsigned char const, unsigned char const, unsigned char const, unsigned char, unsigned long long const) Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<false, false>(unsigned char const, unsigned char const, unsigned char const, unsigned char, unsigned long long const) Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<false, true>(unsigned char const, unsigned char const, unsigned char const, unsigned char, unsigned long long const)
99
100		template <bool align> void InterleaveBgr(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, size_t width, size_t height, uint8_t * bgr, size_t bgrStride)
101	0	{
102	0	if (align)
103	0	{
104	0	assert(Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride));
105	0	assert(Aligned(r) && Aligned(rStride) && Aligned(bgr) && Aligned(bgrStride));
106	0	}
107
108	0	size_t alignedWidth = AlignLo(width, A);
109	0	__mmask64 tailMasks[4];
110	0	for (size_t c = 0; c < 3; ++c)
111	0	tailMasks[c] = TailMask64((width - alignedWidth) * 3 - A*c);
112	0	tailMasks[3] = TailMask64(width - alignedWidth);
113	0	for (size_t row = 0; row < height; ++row)
114	0	{
115	0	size_t col = 0;
116	0	for (; col < alignedWidth; col += A)
117	0	InterleaveBgr<align, false>(b + col, g + col, r + col, bgr + col * 3, tailMasks);
118	0	if (col < width)
119	0	InterleaveBgr<align, true>(b + col, g + col, r + col, bgr + col * 3, tailMasks);
120	0	b += bStride;
121	0	g += gStride;
122	0	r += rStride;
123	0	bgr += bgrStride;
124	0	}
125	0	} Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<true>(unsigned char const, unsigned long, unsigned char const, unsigned long, unsigned char const, unsigned long, unsigned long, unsigned long, unsigned char, unsigned long) Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<false>(unsigned char const, unsigned long, unsigned char const, unsigned long, unsigned char const, unsigned long, unsigned long, unsigned long, unsigned char, unsigned long)
126
127		void InterleaveBgr(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, size_t width, size_t height, uint8_t * bgr, size_t bgrStride)
128	0	{
129	0	if (Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride)
130	0	&& Aligned(r) && Aligned(rStride) && Aligned(bgr) && Aligned(bgrStride))
131	0	InterleaveBgr<true>(b, bStride, g, gStride, r, rStride, width, height, bgr, bgrStride);
132	0	else
133	0	InterleaveBgr<false>(b, bStride, g, gStride, r, rStride, width, height, bgr, bgrStride);
134	0	}
135
136		template <bool align, bool mask> SIMD_INLINE void InterleaveBgra(const uint8_t * b, const uint8_t * g, const uint8_t * r, const uint8_t * a, uint8_t * bgra, const __mmask64 * tails)
137	0	{
138	0	__m512i _b = _mm512_permutexvar_epi32(K32_PERMUTE_FOR_TWO_UNPACK, (Load<align, mask>(b, tails[4])));
139	0	__m512i _g = _mm512_permutexvar_epi32(K32_PERMUTE_FOR_TWO_UNPACK, (Load<align, mask>(g, tails[4])));
140	0	__m512i _r = _mm512_permutexvar_epi32(K32_PERMUTE_FOR_TWO_UNPACK, (Load<align, mask>(r, tails[4])));
141	0	__m512i _a = _mm512_permutexvar_epi32(K32_PERMUTE_FOR_TWO_UNPACK, (Load<align, mask>(a, tails[4])));
142	0	__m512i bg0 = UnpackU8<0>(_b, _g);
143	0	__m512i bg1 = UnpackU8<1>(_b, _g);
144	0	__m512i ra0 = UnpackU8<0>(_r, _a);
145	0	__m512i ra1 = UnpackU8<1>(_r, _a);
146	0	Store<align, mask>(bgra + 0 * A, UnpackU16<0>(bg0, ra0), tails[0]);
147	0	Store<align, mask>(bgra + 1 * A, UnpackU16<1>(bg0, ra0), tails[1]);
148	0	Store<align, mask>(bgra + 2 * A, UnpackU16<0>(bg1, ra1), tails[2]);
149	0	Store<align, mask>(bgra + 3 * A, UnpackU16<1>(bg1, ra1), tails[3]);
150	0	} Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<true, false>(unsigned char const, unsigned char const, unsigned char const, unsigned char const, unsigned char, unsigned long long const) Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<true, true>(unsigned char const, unsigned char const, unsigned char const, unsigned char const, unsigned char, unsigned long long const) Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<false, false>(unsigned char const, unsigned char const, unsigned char const, unsigned char const, unsigned char, unsigned long long const) Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<false, true>(unsigned char const, unsigned char const, unsigned char const, unsigned char const, unsigned char, unsigned long long const)
151
152		template <bool align> void InterleaveBgra(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, const uint8_t * a, size_t aStride, size_t width, size_t height, uint8_t * bgra, size_t bgraStride)
153	0	{
154	0	if (align)
155	0	{
156	0	assert(Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride) && Aligned(r) && Aligned(rStride));
157	0	assert(Aligned(a) && Aligned(aStride) && Aligned(bgra) && Aligned(bgraStride));
158	0	}
159
160	0	size_t alignedWidth = AlignLo(width, A);
161	0	__mmask64 tailMasks[5];
162	0	for (size_t c = 0; c < 4; ++c)
163	0	tailMasks[c] = TailMask64((width - alignedWidth) * 4 - A*c);
164	0	tailMasks[4] = TailMask64(width - alignedWidth);
165	0	for (size_t row = 0; row < height; ++row)
166	0	{
167	0	size_t col = 0;
168	0	for (; col < alignedWidth; col += A)
169	0	InterleaveBgra<align, false>(b + col, g + col, r + col, a + col, bgra + col * 4, tailMasks);
170	0	if (col < width)
171	0	InterleaveBgra<align, true>(b + col, g + col, r + col, a + col, bgra + col * 4, tailMasks);
172	0	b += bStride;
173	0	g += gStride;
174	0	r += rStride;
175	0	a += aStride;
176	0	bgra += bgraStride;
177	0	}
178	0	} Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<true>(unsigned char const, unsigned long, unsigned char const, unsigned long, unsigned char const, unsigned long, unsigned char const, unsigned long, unsigned long, unsigned long, unsigned char, unsigned long) Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<false>(unsigned char const, unsigned long, unsigned char const, unsigned long, unsigned char const, unsigned long, unsigned char const, unsigned long, unsigned long, unsigned long, unsigned char, unsigned long)
179
180		void InterleaveBgra(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, const uint8_t * a, size_t aStride, size_t width, size_t height, uint8_t * bgra, size_t bgraStride)
181	0	{
182	0	if (Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride) && Aligned(r) && Aligned(rStride) &&
183	0	Aligned(a) && Aligned(aStride) && Aligned(bgra) && Aligned(bgraStride))
184	0	InterleaveBgra<true>(b, bStride, g, gStride, r, rStride, a, aStride, width, height, bgra, bgraStride);
185	0	else
186	0	InterleaveBgra<false>(b, bStride, g, gStride, r, rStride, a, aStride, width, height, bgra, bgraStride);
187	0	}
188		}
189		#endif// SIMD_AVX512BW_ENABLE
190		}

Coverage Report

Created: 2025-08-11 07:29