/src/Simd/src/Simd/SimdAvx512bwInterleave.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2022 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #include "Simd/SimdMemory.h" |
25 | | #include "Simd/SimdStore.h" |
26 | | #include "Simd/SimdConversion.h" |
27 | | #include "Simd/SimdInterleave.h" |
28 | | #include "Simd/SimdUnpack.h" |
29 | | |
30 | | namespace Simd |
31 | | { |
32 | | #ifdef SIMD_AVX512BW_ENABLE |
33 | | namespace Avx512bw |
34 | | { |
35 | | template <bool align, bool mask> SIMD_INLINE void InterleaveUv(const uint8_t * u, const uint8_t * v, uint8_t * uv, const __mmask64 * tails) |
36 | 0 | { |
37 | 0 | __m512i _u = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, (Load<align, mask>(u, tails[2]))); |
38 | 0 | __m512i _v = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, (Load<align, mask>(v, tails[2]))); |
39 | 0 | Store<align, mask>(uv + 0, UnpackU8<0>(_u, _v), tails[0]); |
40 | 0 | Store<align, mask>(uv + A, UnpackU8<1>(_u, _v), tails[1]); |
41 | 0 | } Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<true, false>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*) Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<true, true>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*) Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<false, false>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*) Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<false, true>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*) |
42 | | |
43 | | template <bool align> SIMD_INLINE void InterleaveUv2(const uint8_t * u, const uint8_t * v, uint8_t * uv) |
44 | 0 | { |
45 | 0 | __m512i u0 = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, Load<align>(u + 0)); |
46 | 0 | __m512i v0 = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, Load<align>(v + 0)); |
47 | 0 | Store<align>(uv + 0 * A, UnpackU8<0>(u0, v0)); |
48 | 0 | Store<align>(uv + 1 * A, UnpackU8<1>(u0, v0)); |
49 | 0 | __m512i u1 = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, Load<align>(u + A)); |
50 | 0 | __m512i v1 = _mm512_permutexvar_epi64(K64_PERMUTE_FOR_UNPACK, Load<align>(v + A)); |
51 | 0 | Store<align>(uv + 2 * A, UnpackU8<0>(u1, v1)); |
52 | 0 | Store<align>(uv + 3 * A, UnpackU8<1>(u1, v1)); |
53 | 0 | } Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv2<true>(unsigned char const*, unsigned char const*, unsigned char*) Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv2<false>(unsigned char const*, unsigned char const*, unsigned char*) |
54 | | |
55 | | template <bool align> void InterleaveUv(const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride, |
56 | | size_t width, size_t height, uint8_t * uv, size_t uvStride) |
57 | 0 | { |
58 | 0 | if (align) |
59 | 0 | assert(Aligned(uv) && Aligned(uvStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride)); |
60 | |
|
61 | 0 | size_t alignedWidth = AlignLo(width, A); |
62 | 0 | size_t fullAlignedWidth = AlignLo(width, DA); |
63 | 0 | __mmask64 tailMasks[3]; |
64 | 0 | for (size_t c = 0; c < 2; ++c) |
65 | 0 | tailMasks[c] = TailMask64((width - alignedWidth) * 2 - A*c); |
66 | 0 | tailMasks[2] = TailMask64(width - alignedWidth); |
67 | 0 | for (size_t row = 0; row < height; ++row) |
68 | 0 | { |
69 | 0 | size_t col = 0; |
70 | 0 | for (; col < fullAlignedWidth; col += DA) |
71 | 0 | InterleaveUv2<align>(u + col, v + col, uv + col * 2); |
72 | 0 | for (; col < alignedWidth; col += A) |
73 | 0 | InterleaveUv<align, false>(u + col, v + col, uv + col * 2, tailMasks); |
74 | 0 | if (col < width) |
75 | 0 | InterleaveUv<align, true>(u + col, v + col, uv + col * 2, tailMasks); |
76 | 0 | uv += uvStride; |
77 | 0 | u += uStride; |
78 | 0 | v += vStride; |
79 | 0 | } |
80 | 0 | } Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<true>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Avx512bw::InterleaveUv<false>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) |
81 | | |
82 | | void InterleaveUv(const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride, size_t width, size_t height, uint8_t * uv, size_t uvStride) |
83 | 0 | { |
84 | 0 | if (Aligned(uv) && Aligned(uvStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride)) |
85 | 0 | InterleaveUv<true>(u, uStride, v, vStride, width, height, uv, uvStride); |
86 | 0 | else |
87 | 0 | InterleaveUv<false>(u, uStride, v, vStride, width, height, uv, uvStride); |
88 | 0 | } |
89 | | |
90 | | template <bool align, bool mask> SIMD_INLINE void InterleaveBgr(const uint8_t * b, const uint8_t * g, const uint8_t * r, uint8_t * bgr, const __mmask64 * tails) |
91 | 0 | { |
92 | 0 | __m512i _b = Load<align, mask>(b, tails[3]); |
93 | 0 | __m512i _g = Load<align, mask>(g, tails[3]); |
94 | 0 | __m512i _r = Load<align, mask>(r, tails[3]); |
95 | 0 | Store<align, mask>(bgr + 0 * A, InterleaveBgr<0>(_b, _g, _r), tails[0]); |
96 | 0 | Store<align, mask>(bgr + 1 * A, InterleaveBgr<1>(_b, _g, _r), tails[1]); |
97 | 0 | Store<align, mask>(bgr + 2 * A, InterleaveBgr<2>(_b, _g, _r), tails[2]); |
98 | 0 | } Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<true, false>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*) Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<true, true>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*) Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<false, false>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*) Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<false, true>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*) |
99 | | |
100 | | template <bool align> void InterleaveBgr(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, size_t width, size_t height, uint8_t * bgr, size_t bgrStride) |
101 | 0 | { |
102 | 0 | if (align) |
103 | 0 | { |
104 | 0 | assert(Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride)); |
105 | 0 | assert(Aligned(r) && Aligned(rStride) && Aligned(bgr) && Aligned(bgrStride)); |
106 | 0 | } |
107 | |
|
108 | 0 | size_t alignedWidth = AlignLo(width, A); |
109 | 0 | __mmask64 tailMasks[4]; |
110 | 0 | for (size_t c = 0; c < 3; ++c) |
111 | 0 | tailMasks[c] = TailMask64((width - alignedWidth) * 3 - A*c); |
112 | 0 | tailMasks[3] = TailMask64(width - alignedWidth); |
113 | 0 | for (size_t row = 0; row < height; ++row) |
114 | 0 | { |
115 | 0 | size_t col = 0; |
116 | 0 | for (; col < alignedWidth; col += A) |
117 | 0 | InterleaveBgr<align, false>(b + col, g + col, r + col, bgr + col * 3, tailMasks); |
118 | 0 | if (col < width) |
119 | 0 | InterleaveBgr<align, true>(b + col, g + col, r + col, bgr + col * 3, tailMasks); |
120 | 0 | b += bStride; |
121 | 0 | g += gStride; |
122 | 0 | r += rStride; |
123 | 0 | bgr += bgrStride; |
124 | 0 | } |
125 | 0 | } Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<true>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgr<false>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) |
126 | | |
127 | | void InterleaveBgr(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, size_t width, size_t height, uint8_t * bgr, size_t bgrStride) |
128 | 0 | { |
129 | 0 | if (Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride) |
130 | 0 | && Aligned(r) && Aligned(rStride) && Aligned(bgr) && Aligned(bgrStride)) |
131 | 0 | InterleaveBgr<true>(b, bStride, g, gStride, r, rStride, width, height, bgr, bgrStride); |
132 | 0 | else |
133 | 0 | InterleaveBgr<false>(b, bStride, g, gStride, r, rStride, width, height, bgr, bgrStride); |
134 | 0 | } |
135 | | |
136 | | template <bool align, bool mask> SIMD_INLINE void InterleaveBgra(const uint8_t * b, const uint8_t * g, const uint8_t * r, const uint8_t * a, uint8_t * bgra, const __mmask64 * tails) |
137 | 0 | { |
138 | 0 | __m512i _b = _mm512_permutexvar_epi32(K32_PERMUTE_FOR_TWO_UNPACK, (Load<align, mask>(b, tails[4]))); |
139 | 0 | __m512i _g = _mm512_permutexvar_epi32(K32_PERMUTE_FOR_TWO_UNPACK, (Load<align, mask>(g, tails[4]))); |
140 | 0 | __m512i _r = _mm512_permutexvar_epi32(K32_PERMUTE_FOR_TWO_UNPACK, (Load<align, mask>(r, tails[4]))); |
141 | 0 | __m512i _a = _mm512_permutexvar_epi32(K32_PERMUTE_FOR_TWO_UNPACK, (Load<align, mask>(a, tails[4]))); |
142 | 0 | __m512i bg0 = UnpackU8<0>(_b, _g); |
143 | 0 | __m512i bg1 = UnpackU8<1>(_b, _g); |
144 | 0 | __m512i ra0 = UnpackU8<0>(_r, _a); |
145 | 0 | __m512i ra1 = UnpackU8<1>(_r, _a); |
146 | 0 | Store<align, mask>(bgra + 0 * A, UnpackU16<0>(bg0, ra0), tails[0]); |
147 | 0 | Store<align, mask>(bgra + 1 * A, UnpackU16<1>(bg0, ra0), tails[1]); |
148 | 0 | Store<align, mask>(bgra + 2 * A, UnpackU16<0>(bg1, ra1), tails[2]); |
149 | 0 | Store<align, mask>(bgra + 3 * A, UnpackU16<1>(bg1, ra1), tails[3]); |
150 | 0 | } Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<true, false>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*) Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<true, true>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*) Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<false, false>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*) Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<false, true>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*) |
151 | | |
152 | | template <bool align> void InterleaveBgra(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, const uint8_t * a, size_t aStride, size_t width, size_t height, uint8_t * bgra, size_t bgraStride) |
153 | 0 | { |
154 | 0 | if (align) |
155 | 0 | { |
156 | 0 | assert(Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride) && Aligned(r) && Aligned(rStride)); |
157 | 0 | assert(Aligned(a) && Aligned(aStride) && Aligned(bgra) && Aligned(bgraStride)); |
158 | 0 | } |
159 | |
|
160 | 0 | size_t alignedWidth = AlignLo(width, A); |
161 | 0 | __mmask64 tailMasks[5]; |
162 | 0 | for (size_t c = 0; c < 4; ++c) |
163 | 0 | tailMasks[c] = TailMask64((width - alignedWidth) * 4 - A*c); |
164 | 0 | tailMasks[4] = TailMask64(width - alignedWidth); |
165 | 0 | for (size_t row = 0; row < height; ++row) |
166 | 0 | { |
167 | 0 | size_t col = 0; |
168 | 0 | for (; col < alignedWidth; col += A) |
169 | 0 | InterleaveBgra<align, false>(b + col, g + col, r + col, a + col, bgra + col * 4, tailMasks); |
170 | 0 | if (col < width) |
171 | 0 | InterleaveBgra<align, true>(b + col, g + col, r + col, a + col, bgra + col * 4, tailMasks); |
172 | 0 | b += bStride; |
173 | 0 | g += gStride; |
174 | 0 | r += rStride; |
175 | 0 | a += aStride; |
176 | 0 | bgra += bgraStride; |
177 | 0 | } |
178 | 0 | } Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<true>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Avx512bw::InterleaveBgra<false>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) |
179 | | |
180 | | void InterleaveBgra(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, const uint8_t * a, size_t aStride, size_t width, size_t height, uint8_t * bgra, size_t bgraStride) |
181 | 0 | { |
182 | 0 | if (Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride) && Aligned(r) && Aligned(rStride) && |
183 | 0 | Aligned(a) && Aligned(aStride) && Aligned(bgra) && Aligned(bgraStride)) |
184 | 0 | InterleaveBgra<true>(b, bStride, g, gStride, r, rStride, a, aStride, width, height, bgra, bgraStride); |
185 | 0 | else |
186 | 0 | InterleaveBgra<false>(b, bStride, g, gStride, r, rStride, a, aStride, width, height, bgra, bgraStride); |
187 | 0 | } |
188 | | } |
189 | | #endif// SIMD_AVX512BW_ENABLE |
190 | | } |