/src/Simd/src/Simd/SimdSse41Interleave.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2022 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #include "Simd/SimdMemory.h" |
25 | | #include "Simd/SimdStore.h" |
26 | | #include "Simd/SimdInterleave.h" |
27 | | |
28 | | namespace Simd |
29 | | { |
30 | | #ifdef SIMD_SSE41_ENABLE |
31 | | namespace Sse41 |
32 | | { |
33 | | template <bool align> SIMD_INLINE void InterleaveUv(const uint8_t* u, const uint8_t* v, uint8_t* uv) |
34 | 0 | { |
35 | 0 | __m128i _u = Load<align>((__m128i*)u); |
36 | 0 | __m128i _v = Load<align>((__m128i*)v); |
37 | 0 | Store<align>((__m128i*)uv + 0, _mm_unpacklo_epi8(_u, _v)); |
38 | 0 | Store<align>((__m128i*)uv + 1, _mm_unpackhi_epi8(_u, _v)); |
39 | 0 | } Unexecuted instantiation: void Simd::Sse41::InterleaveUv<true>(unsigned char const*, unsigned char const*, unsigned char*) Unexecuted instantiation: void Simd::Sse41::InterleaveUv<false>(unsigned char const*, unsigned char const*, unsigned char*) |
40 | | |
41 | | template <bool align> void InterleaveUv(const uint8_t* u, size_t uStride, const uint8_t* v, size_t vStride, size_t width, size_t height, uint8_t* uv, size_t uvStride) |
42 | 0 | { |
43 | 0 | assert(width >= A); |
44 | 0 | if (align) |
45 | 0 | { |
46 | 0 | assert(Aligned(uv) && Aligned(uvStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride)); |
47 | 0 | } |
48 | |
|
49 | 0 | size_t bodyWidth = AlignLo(width, A); |
50 | 0 | size_t tail = width - bodyWidth; |
51 | 0 | for (size_t row = 0; row < height; ++row) |
52 | 0 | { |
53 | 0 | for (size_t col = 0, offset = 0; col < bodyWidth; col += A, offset += DA) |
54 | 0 | InterleaveUv<align>(u + col, v + col, uv + offset); |
55 | 0 | if (tail) |
56 | 0 | { |
57 | 0 | size_t col = width - A; |
58 | 0 | size_t offset = 2 * col; |
59 | 0 | InterleaveUv<false>(u + col, v + col, uv + offset); |
60 | 0 | } |
61 | 0 | u += uStride; |
62 | 0 | v += vStride; |
63 | 0 | uv += uvStride; |
64 | 0 | } |
65 | 0 | } Unexecuted instantiation: void Simd::Sse41::InterleaveUv<true>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::InterleaveUv<false>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) |
66 | | |
67 | | void InterleaveUv(const uint8_t* u, size_t uStride, const uint8_t* v, size_t vStride, size_t width, size_t height, uint8_t* uv, size_t uvStride) |
68 | 0 | { |
69 | 0 | if (Aligned(uv) && Aligned(uvStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride)) |
70 | 0 | InterleaveUv<true>(u, uStride, v, vStride, width, height, uv, uvStride); |
71 | 0 | else |
72 | 0 | InterleaveUv<false>(u, uStride, v, vStride, width, height, uv, uvStride); |
73 | 0 | } |
74 | | |
75 | | //----------------------------------------------------------------------------------------- |
76 | | |
77 | | template <bool align> SIMD_INLINE void InterleaveBgr(const uint8_t * b, const uint8_t * g, const uint8_t * r, size_t offset, uint8_t * bgr) |
78 | 0 | { |
79 | 0 | __m128i _b = Load<align>((__m128i*)(b + offset)); |
80 | 0 | __m128i _g = Load<align>((__m128i*)(g + offset)); |
81 | 0 | __m128i _r = Load<align>((__m128i*)(r + offset)); |
82 | 0 | Store<align>((__m128i*)bgr + 0, InterleaveBgr<0>(_b, _g, _r)); |
83 | 0 | Store<align>((__m128i*)bgr + 1, InterleaveBgr<1>(_b, _g, _r)); |
84 | 0 | Store<align>((__m128i*)bgr + 2, InterleaveBgr<2>(_b, _g, _r)); |
85 | 0 | } Unexecuted instantiation: void Simd::Sse41::InterleaveBgr<true>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned long, unsigned char*) Unexecuted instantiation: void Simd::Sse41::InterleaveBgr<false>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned long, unsigned char*) |
86 | | |
87 | | template <bool align> void InterleaveBgr(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, size_t width, size_t height, uint8_t * bgr, size_t bgrStride) |
88 | 0 | { |
89 | 0 | assert(width >= A); |
90 | 0 | if (align) |
91 | 0 | { |
92 | 0 | assert(Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride)); |
93 | 0 | assert(Aligned(r) && Aligned(rStride) && Aligned(bgr) && Aligned(bgrStride)); |
94 | 0 | } |
95 | |
|
96 | 0 | size_t alignedWidth = AlignLo(width, A); |
97 | 0 | size_t tail = width - alignedWidth; |
98 | 0 | size_t A3 = A * 3; |
99 | 0 | for (size_t row = 0; row < height; ++row) |
100 | 0 | { |
101 | 0 | for (size_t col = 0, offset = 0; col < alignedWidth; col += A, offset += A3) |
102 | 0 | InterleaveBgr<align>(b, g, r, col, bgr + offset); |
103 | 0 | if (tail) |
104 | 0 | InterleaveBgr<false>(b, g, r, width - A, bgr + 3 * (width - A)); |
105 | 0 | b += bStride; |
106 | 0 | g += gStride; |
107 | 0 | r += rStride; |
108 | 0 | bgr += bgrStride; |
109 | 0 | } |
110 | 0 | } Unexecuted instantiation: void Simd::Sse41::InterleaveBgr<true>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::InterleaveBgr<false>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) |
111 | | |
112 | | void InterleaveBgr(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, size_t width, size_t height, uint8_t * bgr, size_t bgrStride) |
113 | 0 | { |
114 | 0 | if (Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride) |
115 | 0 | && Aligned(r) && Aligned(rStride) && Aligned(bgr) && Aligned(bgrStride)) |
116 | 0 | InterleaveBgr<true>(b, bStride, g, gStride, r, rStride, width, height, bgr, bgrStride); |
117 | 0 | else |
118 | 0 | InterleaveBgr<false>(b, bStride, g, gStride, r, rStride, width, height, bgr, bgrStride); |
119 | 0 | } |
120 | | |
121 | | //----------------------------------------------------------------------------------------- |
122 | | |
123 | | template <bool align> SIMD_INLINE void InterleaveBgra(const uint8_t * b, const uint8_t * g, const uint8_t * r, const uint8_t * a, size_t offset, uint8_t * bgra) |
124 | 0 | { |
125 | 0 | __m128i _b = Load<align>((__m128i*)(b + offset)); |
126 | 0 | __m128i _g = Load<align>((__m128i*)(g + offset)); |
127 | 0 | __m128i _r = Load<align>((__m128i*)(r + offset)); |
128 | 0 | __m128i _a = Load<align>((__m128i*)(a + offset)); |
129 | 0 | __m128i bg0 = _mm_unpacklo_epi8(_b, _g); |
130 | 0 | __m128i bg1 = _mm_unpackhi_epi8(_b, _g); |
131 | 0 | __m128i ra0 = _mm_unpacklo_epi8(_r, _a); |
132 | 0 | __m128i ra1 = _mm_unpackhi_epi8(_r, _a); |
133 | 0 | Store<align>((__m128i*)bgra + 0, _mm_unpacklo_epi16(bg0, ra0)); |
134 | 0 | Store<align>((__m128i*)bgra + 1, _mm_unpackhi_epi16(bg0, ra0)); |
135 | 0 | Store<align>((__m128i*)bgra + 2, _mm_unpacklo_epi16(bg1, ra1)); |
136 | 0 | Store<align>((__m128i*)bgra + 3, _mm_unpackhi_epi16(bg1, ra1)); |
137 | 0 | } Unexecuted instantiation: void Simd::Sse41::InterleaveBgra<true>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned long, unsigned char*) Unexecuted instantiation: void Simd::Sse41::InterleaveBgra<false>(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, unsigned long, unsigned char*) |
138 | | |
139 | | template <bool align> void InterleaveBgra(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, const uint8_t * a, size_t aStride, size_t width, size_t height, uint8_t * bgra, size_t bgraStride) |
140 | 0 | { |
141 | 0 | assert(width >= A); |
142 | 0 | if (align) |
143 | 0 | { |
144 | 0 | assert(Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride) && Aligned(r) && Aligned(rStride)); |
145 | 0 | assert(Aligned(a) && Aligned(aStride) && Aligned(bgra) && Aligned(bgraStride)); |
146 | 0 | } |
147 | |
|
148 | 0 | size_t alignedWidth = AlignLo(width, A); |
149 | 0 | size_t tail = width - alignedWidth; |
150 | 0 | for (size_t row = 0; row < height; ++row) |
151 | 0 | { |
152 | 0 | for (size_t col = 0, offset = 0; col < alignedWidth; col += A, offset += QA) |
153 | 0 | InterleaveBgra<align>(b, g, r, a, col, bgra + offset); |
154 | 0 | if (tail) |
155 | 0 | InterleaveBgra<false>(b, g, r, a, width - A, bgra + 4 * (width - A)); |
156 | 0 | b += bStride; |
157 | 0 | g += gStride; |
158 | 0 | r += rStride; |
159 | 0 | a += aStride; |
160 | 0 | bgra += bgraStride; |
161 | 0 | } |
162 | 0 | } Unexecuted instantiation: void Simd::Sse41::InterleaveBgra<true>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::InterleaveBgra<false>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) |
163 | | |
164 | | void InterleaveBgra(const uint8_t * b, size_t bStride, const uint8_t * g, size_t gStride, const uint8_t * r, size_t rStride, const uint8_t * a, size_t aStride, size_t width, size_t height, uint8_t * bgra, size_t bgraStride) |
165 | 0 | { |
166 | 0 | if (Aligned(b) && Aligned(bStride) && Aligned(g) && Aligned(gStride) && Aligned(r) && Aligned(rStride) && |
167 | 0 | Aligned(a) && Aligned(aStride) && Aligned(bgra) && Aligned(bgraStride)) |
168 | 0 | InterleaveBgra<true>(b, bStride, g, gStride, r, rStride, a, aStride, width, height, bgra, bgraStride); |
169 | 0 | else |
170 | 0 | InterleaveBgra<false>(b, bStride, g, gStride, r, rStride, a, aStride, width, height, bgra, bgraStride); |
171 | 0 | } |
172 | | } |
173 | | #endif |
174 | | } |