/src/Simd/src/Simd/SimdSse41Operation.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2022 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #include "Simd/SimdMemory.h" |
25 | | #include "Simd/SimdStore.h" |
26 | | #include "Simd/SimdAlphaBlending.h" |
27 | | |
28 | | namespace Simd |
29 | | { |
30 | | #ifdef SIMD_SSE41_ENABLE |
31 | | namespace Sse41 |
32 | | { |
33 | | template <SimdOperationBinary8uType type> SIMD_INLINE __m128i OperationBinary8u(const __m128i & a, const __m128i & b); |
34 | | |
35 | | template <> SIMD_INLINE __m128i OperationBinary8u<SimdOperationBinary8uAverage>(const __m128i & a, const __m128i & b) |
36 | 0 | { |
37 | 0 | return _mm_avg_epu8(a, b); |
38 | 0 | } |
39 | | |
40 | | template <> SIMD_INLINE __m128i OperationBinary8u<SimdOperationBinary8uAnd>(const __m128i & a, const __m128i & b) |
41 | 0 | { |
42 | 0 | return _mm_and_si128(a, b); |
43 | 0 | } |
44 | | |
45 | | template <> SIMD_INLINE __m128i OperationBinary8u<SimdOperationBinary8uOr>(const __m128i & a, const __m128i & b) |
46 | 0 | { |
47 | 0 | return _mm_or_si128(a, b); |
48 | 0 | } |
49 | | |
50 | | template <> SIMD_INLINE __m128i OperationBinary8u<SimdOperationBinary8uMaximum>(const __m128i & a, const __m128i & b) |
51 | 0 | { |
52 | 0 | return _mm_max_epu8(a, b); |
53 | 0 | } |
54 | | |
55 | | template <> SIMD_INLINE __m128i OperationBinary8u<SimdOperationBinary8uMinimum>(const __m128i & a, const __m128i & b) |
56 | 0 | { |
57 | 0 | return _mm_min_epu8(a, b); |
58 | 0 | } |
59 | | |
60 | | template <> SIMD_INLINE __m128i OperationBinary8u<SimdOperationBinary8uSaturatedSubtraction>(const __m128i & a, const __m128i & b) |
61 | 0 | { |
62 | 0 | return _mm_subs_epu8(a, b); |
63 | 0 | } |
64 | | |
65 | | template <> SIMD_INLINE __m128i OperationBinary8u<SimdOperationBinary8uSaturatedAddition>(const __m128i & a, const __m128i & b) |
66 | 0 | { |
67 | 0 | return _mm_adds_epu8(a, b); |
68 | 0 | } |
69 | | |
70 | | template <bool align, SimdOperationBinary8uType type> void OperationBinary8u(const uint8_t * a, size_t aStride, const uint8_t * b, size_t bStride, |
71 | | size_t width, size_t height, size_t channelCount, uint8_t * dst, size_t dstStride) |
72 | 0 | { |
73 | 0 | assert(width*channelCount >= A); |
74 | 0 | if (align) |
75 | 0 | assert(Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride) && Aligned(dst) && Aligned(dstStride)); |
76 | |
|
77 | 0 | size_t size = channelCount*width; |
78 | 0 | size_t alignedSize = Simd::AlignLo(size, A); |
79 | 0 | for (size_t row = 0; row < height; ++row) |
80 | 0 | { |
81 | 0 | for (size_t offset = 0; offset < alignedSize; offset += A) |
82 | 0 | { |
83 | 0 | const __m128i a_ = Load<align>((__m128i*)(a + offset)); |
84 | 0 | const __m128i b_ = Load<align>((__m128i*)(b + offset)); |
85 | 0 | Store<align>((__m128i*)(dst + offset), OperationBinary8u<type>(a_, b_)); |
86 | 0 | } |
87 | 0 | if (alignedSize != size) |
88 | 0 | { |
89 | 0 | const __m128i a_ = Load<false>((__m128i*)(a + size - A)); |
90 | 0 | const __m128i b_ = Load<false>((__m128i*)(b + size - A)); |
91 | 0 | Store<false>((__m128i*)(dst + size - A), OperationBinary8u<type>(a_, b_)); |
92 | 0 | } |
93 | 0 | a += aStride; |
94 | 0 | b += bStride; |
95 | 0 | dst += dstStride; |
96 | 0 | } |
97 | 0 | } Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<true, (SimdOperationBinary8uType)0>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<true, (SimdOperationBinary8uType)1>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<true, (SimdOperationBinary8uType)2>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<true, (SimdOperationBinary8uType)3>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<true, (SimdOperationBinary8uType)4>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<true, (SimdOperationBinary8uType)5>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<true, (SimdOperationBinary8uType)6>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<false, (SimdOperationBinary8uType)0>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<false, (SimdOperationBinary8uType)1>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<false, (SimdOperationBinary8uType)2>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<false, (SimdOperationBinary8uType)3>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<false, (SimdOperationBinary8uType)4>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<false, (SimdOperationBinary8uType)5>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<false, (SimdOperationBinary8uType)6>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) |
98 | | |
99 | | template <bool align> void OperationBinary8u(const uint8_t * a, size_t aStride, const uint8_t * b, size_t bStride, |
100 | | size_t width, size_t height, size_t channelCount, uint8_t * dst, size_t dstStride, SimdOperationBinary8uType type) |
101 | 0 | { |
102 | 0 | switch (type) |
103 | 0 | { |
104 | 0 | case SimdOperationBinary8uAverage: |
105 | 0 | return OperationBinary8u<align, SimdOperationBinary8uAverage>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride); |
106 | 0 | case SimdOperationBinary8uAnd: |
107 | 0 | return OperationBinary8u<align, SimdOperationBinary8uAnd>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride); |
108 | 0 | case SimdOperationBinary8uOr: |
109 | 0 | return OperationBinary8u<align, SimdOperationBinary8uOr>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride); |
110 | 0 | case SimdOperationBinary8uMaximum: |
111 | 0 | return OperationBinary8u<align, SimdOperationBinary8uMaximum>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride); |
112 | 0 | case SimdOperationBinary8uMinimum: |
113 | 0 | return OperationBinary8u<align, SimdOperationBinary8uMinimum>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride); |
114 | 0 | case SimdOperationBinary8uSaturatedSubtraction: |
115 | 0 | return OperationBinary8u<align, SimdOperationBinary8uSaturatedSubtraction>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride); |
116 | 0 | case SimdOperationBinary8uSaturatedAddition: |
117 | 0 | return OperationBinary8u<align, SimdOperationBinary8uSaturatedAddition>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride); |
118 | 0 | default: |
119 | 0 | assert(0); |
120 | 0 | } |
121 | 0 | } Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<true>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long, SimdOperationBinary8uType) Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<false>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long, SimdOperationBinary8uType) |
122 | | |
123 | | void OperationBinary8u(const uint8_t * a, size_t aStride, const uint8_t * b, size_t bStride, |
124 | | size_t width, size_t height, size_t channelCount, uint8_t * dst, size_t dstStride, SimdOperationBinary8uType type) |
125 | 0 | { |
126 | 0 | if (Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride) && Aligned(dst) && Aligned(dstStride)) |
127 | 0 | OperationBinary8u<true>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride, type); |
128 | 0 | else |
129 | 0 | OperationBinary8u<false>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride, type); |
130 | 0 | } |
131 | | |
132 | | //----------------------------------------------------------------------------------------- |
133 | | |
134 | | template <SimdOperationBinary16iType type> SIMD_INLINE __m128i OperationBinary16i(const __m128i & a, const __m128i & b); |
135 | | |
136 | | template <> SIMD_INLINE __m128i OperationBinary16i<SimdOperationBinary16iAddition>(const __m128i & a, const __m128i & b) |
137 | 0 | { |
138 | 0 | return _mm_add_epi16(a, b); |
139 | 0 | } |
140 | | |
141 | | template <> SIMD_INLINE __m128i OperationBinary16i<SimdOperationBinary16iSubtraction>(const __m128i & a, const __m128i & b) |
142 | 0 | { |
143 | 0 | return _mm_sub_epi16(a, b); |
144 | 0 | } |
145 | | |
146 | | template <bool align, SimdOperationBinary16iType type> void OperationBinary16i(const uint8_t * a, size_t aStride, const uint8_t * b, size_t bStride, |
147 | | size_t width, size_t height, uint8_t * dst, size_t dstStride) |
148 | 0 | { |
149 | 0 | assert(width * sizeof(uint16_t) >= A); |
150 | 0 | if (align) |
151 | 0 | assert(Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride) && Aligned(dst) && Aligned(dstStride)); |
152 | |
|
153 | 0 | size_t size = width * sizeof(int16_t); |
154 | 0 | size_t alignedSize = Simd::AlignLo(size, A); |
155 | 0 | for (size_t row = 0; row < height; ++row) |
156 | 0 | { |
157 | 0 | for (size_t offset = 0; offset < alignedSize; offset += A) |
158 | 0 | { |
159 | 0 | const __m128i a_ = Load<align>((__m128i*)(a + offset)); |
160 | 0 | const __m128i b_ = Load<align>((__m128i*)(b + offset)); |
161 | 0 | Store<align>((__m128i*)(dst + offset), OperationBinary16i<type>(a_, b_)); |
162 | 0 | } |
163 | 0 | if (alignedSize != size) |
164 | 0 | { |
165 | 0 | const __m128i a_ = Load<false>((__m128i*)(a + size - A)); |
166 | 0 | const __m128i b_ = Load<false>((__m128i*)(b + size - A)); |
167 | 0 | Store<false>((__m128i*)(dst + size - A), OperationBinary16i<type>(a_, b_)); |
168 | 0 | } |
169 | 0 | a += aStride; |
170 | 0 | b += bStride; |
171 | 0 | dst += dstStride; |
172 | 0 | } |
173 | 0 | } Unexecuted instantiation: void Simd::Sse41::OperationBinary16i<true, (SimdOperationBinary16iType)0>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::OperationBinary16i<true, (SimdOperationBinary16iType)1>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::OperationBinary16i<false, (SimdOperationBinary16iType)0>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::OperationBinary16i<false, (SimdOperationBinary16iType)1>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) |
174 | | |
175 | | template <bool align> void OperationBinary16i(const uint8_t * a, size_t aStride, const uint8_t * b, size_t bStride, |
176 | | size_t width, size_t height, uint8_t * dst, size_t dstStride, SimdOperationBinary16iType type) |
177 | 0 | { |
178 | 0 | switch (type) |
179 | 0 | { |
180 | 0 | case SimdOperationBinary16iAddition: |
181 | 0 | return OperationBinary16i<align, SimdOperationBinary16iAddition>(a, aStride, b, bStride, width, height, dst, dstStride); |
182 | 0 | case SimdOperationBinary16iSubtraction: |
183 | 0 | return OperationBinary16i<align, SimdOperationBinary16iSubtraction>(a, aStride, b, bStride, width, height, dst, dstStride); |
184 | 0 | default: |
185 | 0 | assert(0); |
186 | 0 | } |
187 | 0 | } Unexecuted instantiation: void Simd::Sse41::OperationBinary16i<true>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long, SimdOperationBinary16iType) Unexecuted instantiation: void Simd::Sse41::OperationBinary16i<false>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long, SimdOperationBinary16iType) |
188 | | |
189 | | void OperationBinary16i(const uint8_t * a, size_t aStride, const uint8_t * b, size_t bStride, |
190 | | size_t width, size_t height, uint8_t * dst, size_t dstStride, SimdOperationBinary16iType type) |
191 | 0 | { |
192 | 0 | if (Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride) && Aligned(dst) && Aligned(dstStride)) |
193 | 0 | OperationBinary16i<true>(a, aStride, b, bStride, width, height, dst, dstStride, type); |
194 | 0 | else |
195 | 0 | OperationBinary16i<false>(a, aStride, b, bStride, width, height, dst, dstStride, type); |
196 | 0 | } |
197 | | |
198 | | //----------------------------------------------------------------------------------------- |
199 | | |
200 | | template <bool align> SIMD_INLINE void VectorProduct(const __m128i & vertical, const uint8_t * horizontal, uint8_t * dst) |
201 | 0 | { |
202 | 0 | __m128i _horizontal = Load<align>((__m128i*)horizontal); |
203 | 0 | __m128i lo = Divide16uBy255(_mm_mullo_epi16(vertical, _mm_unpacklo_epi8(_horizontal, K_ZERO))); |
204 | 0 | __m128i hi = Divide16uBy255(_mm_mullo_epi16(vertical, _mm_unpackhi_epi8(_horizontal, K_ZERO))); |
205 | 0 | Store<align>((__m128i*)dst, _mm_packus_epi16(lo, hi)); |
206 | 0 | } Unexecuted instantiation: void Simd::Sse41::VectorProduct<true>(long long __vector(2) const&, unsigned char const*, unsigned char*) Unexecuted instantiation: void Simd::Sse41::VectorProduct<false>(long long __vector(2) const&, unsigned char const*, unsigned char*) |
207 | | |
208 | | template <bool align> void VectorProduct(const uint8_t * vertical, const uint8_t * horizontal, uint8_t * dst, size_t stride, size_t width, size_t height) |
209 | 0 | { |
210 | 0 | assert(width >= A); |
211 | 0 | if (align) |
212 | 0 | assert(Aligned(horizontal) && Aligned(dst) && Aligned(stride)); |
213 | |
|
214 | 0 | size_t alignedWidth = Simd::AlignLo(width, A); |
215 | 0 | for (size_t row = 0; row < height; ++row) |
216 | 0 | { |
217 | 0 | __m128i _vertical = _mm_set1_epi16(vertical[row]); |
218 | 0 | for (size_t col = 0; col < alignedWidth; col += A) |
219 | 0 | VectorProduct<align>(_vertical, horizontal + col, dst + col); |
220 | 0 | if (alignedWidth != width) |
221 | 0 | VectorProduct<false>(_vertical, horizontal + width - A, dst + width - A); |
222 | 0 | dst += stride; |
223 | 0 | } |
224 | 0 | } Unexecuted instantiation: void Simd::Sse41::VectorProduct<true>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: void Simd::Sse41::VectorProduct<false>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long, unsigned long, unsigned long) |
225 | | |
226 | | void VectorProduct(const uint8_t * vertical, const uint8_t * horizontal, uint8_t * dst, size_t stride, size_t width, size_t height) |
227 | 0 | { |
228 | 0 | if (Aligned(horizontal) && Aligned(dst) && Aligned(stride)) |
229 | 0 | VectorProduct<true>(vertical, horizontal, dst, stride, width, height); |
230 | 0 | else |
231 | 0 | VectorProduct<false>(vertical, horizontal, dst, stride, width, height); |
232 | 0 | } |
233 | | } |
234 | | #endif |
235 | | } |