/src/Simd/src/Simd/SimdSse41Texture.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2022 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #include "Simd/SimdMemory.h" |
25 | | #include "Simd/SimdStore.h" |
26 | | #include "Simd/SimdSet.h" |
27 | | #include "Simd/SimdExtract.h" |
28 | | #include "Simd/SimdBase.h" |
29 | | #include "Simd/SimdUnpack.h" |
30 | | |
31 | | namespace Simd |
32 | | { |
33 | | #ifdef SIMD_SSE41_ENABLE |
34 | | namespace Sse41 |
35 | | { |
36 | | SIMD_INLINE __m128i TextureBoostedSaturatedGradient16(__m128i difference, __m128i saturation, const __m128i & boost) |
37 | 0 | { |
38 | 0 | return _mm_mullo_epi16(_mm_max_epi16(K_ZERO, _mm_add_epi16(saturation, _mm_min_epi16(difference, saturation))), boost); |
39 | 0 | } |
40 | | |
41 | | SIMD_INLINE __m128i TextureBoostedSaturatedGradient8(__m128i a, __m128i b, __m128i saturation, const __m128i & boost) |
42 | 0 | { |
43 | 0 | __m128i lo = TextureBoostedSaturatedGradient16(SubUnpackedU8<0>(b, a), saturation, boost); |
44 | 0 | __m128i hi = TextureBoostedSaturatedGradient16(SubUnpackedU8<1>(b, a), saturation, boost); |
45 | 0 | return _mm_packus_epi16(lo, hi); |
46 | 0 | } |
47 | | |
48 | | template<bool align> SIMD_INLINE void TextureBoostedSaturatedGradient(const uint8_t * src, uint8_t * dx, uint8_t * dy, |
49 | | size_t stride, __m128i saturation, __m128i boost) |
50 | 0 | { |
51 | 0 | const __m128i s10 = Load<false>((__m128i*)(src - 1)); |
52 | 0 | const __m128i s12 = Load<false>((__m128i*)(src + 1)); |
53 | 0 | const __m128i s01 = Load<align>((__m128i*)(src - stride)); |
54 | 0 | const __m128i s21 = Load<align>((__m128i*)(src + stride)); |
55 | 0 | Store<align>((__m128i*)dx, TextureBoostedSaturatedGradient8(s10, s12, saturation, boost)); |
56 | 0 | Store<align>((__m128i*)dy, TextureBoostedSaturatedGradient8(s01, s21, saturation, boost)); |
57 | 0 | } Unexecuted instantiation: void Simd::Sse41::TextureBoostedSaturatedGradient<true>(unsigned char const*, unsigned char*, unsigned char*, unsigned long, long long __vector(2), long long __vector(2)) Unexecuted instantiation: void Simd::Sse41::TextureBoostedSaturatedGradient<false>(unsigned char const*, unsigned char*, unsigned char*, unsigned long, long long __vector(2), long long __vector(2)) |
58 | | |
59 | | template<bool align> void TextureBoostedSaturatedGradient(const uint8_t * src, size_t srcStride, size_t width, size_t height, |
60 | | uint8_t saturation, uint8_t boost, uint8_t * dx, size_t dxStride, uint8_t * dy, size_t dyStride) |
61 | 0 | { |
62 | 0 | assert(width >= A && int(2)*saturation*boost <= 0xFF); |
63 | 0 | if (align) |
64 | 0 | { |
65 | 0 | assert(Aligned(src) && Aligned(srcStride) && Aligned(dx) && Aligned(dxStride) && Aligned(dy) && Aligned(dyStride)); |
66 | 0 | } |
67 | |
|
68 | 0 | size_t alignedWidth = AlignLo(width, A); |
69 | 0 | __m128i _saturation = _mm_set1_epi16(saturation); |
70 | 0 | __m128i _boost = _mm_set1_epi16(boost); |
71 | |
|
72 | 0 | memset(dx, 0, width); |
73 | 0 | memset(dy, 0, width); |
74 | 0 | src += srcStride; |
75 | 0 | dx += dxStride; |
76 | 0 | dy += dyStride; |
77 | 0 | for (size_t row = 2; row < height; ++row) |
78 | 0 | { |
79 | 0 | for (size_t col = 0; col < alignedWidth; col += A) |
80 | 0 | TextureBoostedSaturatedGradient<align>(src + col, dx + col, dy + col, srcStride, _saturation, _boost); |
81 | 0 | if (width != alignedWidth) |
82 | 0 | TextureBoostedSaturatedGradient<false>(src + width - A, dx + width - A, dy + width - A, srcStride, _saturation, _boost); |
83 | |
|
84 | 0 | dx[0] = 0; |
85 | 0 | dy[0] = 0; |
86 | 0 | dx[width - 1] = 0; |
87 | 0 | dy[width - 1] = 0; |
88 | |
|
89 | 0 | src += srcStride; |
90 | 0 | dx += dxStride; |
91 | 0 | dy += dyStride; |
92 | 0 | } |
93 | 0 | memset(dx, 0, width); |
94 | 0 | memset(dy, 0, width); |
95 | 0 | } Unexecuted instantiation: void Simd::Sse41::TextureBoostedSaturatedGradient<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char, unsigned char, unsigned char*, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::TextureBoostedSaturatedGradient<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char, unsigned char, unsigned char*, unsigned long, unsigned char*, unsigned long) |
96 | | |
97 | | void TextureBoostedSaturatedGradient(const uint8_t * src, size_t srcStride, size_t width, size_t height, |
98 | | uint8_t saturation, uint8_t boost, uint8_t * dx, size_t dxStride, uint8_t * dy, size_t dyStride) |
99 | 0 | { |
100 | 0 | if (Aligned(src) && Aligned(srcStride) && Aligned(dx) && Aligned(dxStride) && Aligned(dy) && Aligned(dyStride)) |
101 | 0 | TextureBoostedSaturatedGradient<true>(src, srcStride, width, height, saturation, boost, dx, dxStride, dy, dyStride); |
102 | 0 | else |
103 | 0 | TextureBoostedSaturatedGradient<false>(src, srcStride, width, height, saturation, boost, dx, dxStride, dy, dyStride); |
104 | 0 | } |
105 | | |
106 | | //----------------------------------------------------------------------------------------- |
107 | | |
108 | | template<bool align> SIMD_INLINE void TextureBoostedUv(const uint8_t* src, uint8_t* dst, __m128i min8, __m128i max8, __m128i boost16) |
109 | 0 | { |
110 | 0 | const __m128i _src = Load<align>((__m128i*)src); |
111 | 0 | const __m128i saturated = _mm_sub_epi8(_mm_max_epu8(min8, _mm_min_epu8(max8, _src)), min8); |
112 | 0 | const __m128i lo = _mm_mullo_epi16(_mm_unpacklo_epi8(saturated, K_ZERO), boost16); |
113 | 0 | const __m128i hi = _mm_mullo_epi16(_mm_unpackhi_epi8(saturated, K_ZERO), boost16); |
114 | 0 | Store<align>((__m128i*)dst, _mm_packus_epi16(lo, hi)); |
115 | 0 | } Unexecuted instantiation: void Simd::Sse41::TextureBoostedUv<true>(unsigned char const*, unsigned char*, long long __vector(2), long long __vector(2), long long __vector(2)) Unexecuted instantiation: void Simd::Sse41::TextureBoostedUv<false>(unsigned char const*, unsigned char*, long long __vector(2), long long __vector(2), long long __vector(2)) |
116 | | |
117 | | template<bool align> void TextureBoostedUv(const uint8_t* src, size_t srcStride, size_t width, size_t height, |
118 | | uint8_t boost, uint8_t* dst, size_t dstStride) |
119 | 0 | { |
120 | 0 | assert(width >= A && boost < 0x80); |
121 | 0 | if (align) |
122 | 0 | { |
123 | 0 | assert(Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride)); |
124 | 0 | } |
125 | |
|
126 | 0 | size_t alignedWidth = AlignLo(width, A); |
127 | 0 | int min = 128 - (128 / boost); |
128 | 0 | int max = 255 - min; |
129 | |
|
130 | 0 | __m128i min8 = _mm_set1_epi8(min); |
131 | 0 | __m128i max8 = _mm_set1_epi8(max); |
132 | 0 | __m128i boost16 = _mm_set1_epi16(boost); |
133 | |
|
134 | 0 | for (size_t row = 0; row < height; ++row) |
135 | 0 | { |
136 | 0 | for (size_t col = 0; col < alignedWidth; col += A) |
137 | 0 | TextureBoostedUv<align>(src + col, dst + col, min8, max8, boost16); |
138 | 0 | if (width != alignedWidth) |
139 | 0 | TextureBoostedUv<false>(src + width - A, dst + width - A, min8, max8, boost16); |
140 | |
|
141 | 0 | src += srcStride; |
142 | 0 | dst += dstStride; |
143 | 0 | } |
144 | 0 | } Unexecuted instantiation: void Simd::Sse41::TextureBoostedUv<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::TextureBoostedUv<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char, unsigned char*, unsigned long) |
145 | | |
146 | | void TextureBoostedUv(const uint8_t* src, size_t srcStride, size_t width, size_t height, |
147 | | uint8_t boost, uint8_t* dst, size_t dstStride) |
148 | 0 | { |
149 | 0 | if (Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride)) |
150 | 0 | TextureBoostedUv<true>(src, srcStride, width, height, boost, dst, dstStride); |
151 | 0 | else |
152 | 0 | TextureBoostedUv<false>(src, srcStride, width, height, boost, dst, dstStride); |
153 | 0 | } |
154 | | |
155 | | //----------------------------------------------------------------------------------------- |
156 | | |
157 | | template <bool align> SIMD_INLINE void TextureGetDifferenceSum(const uint8_t* src, const uint8_t* lo, const uint8_t* hi, |
158 | | __m128i& positive, __m128i& negative, const __m128i& mask) |
159 | 0 | { |
160 | 0 | const __m128i _src = Load<align>((__m128i*)src); |
161 | 0 | const __m128i _lo = Load<align>((__m128i*)lo); |
162 | 0 | const __m128i _hi = Load<align>((__m128i*)hi); |
163 | 0 | const __m128i average = _mm_and_si128(mask, _mm_avg_epu8(_lo, _hi)); |
164 | 0 | const __m128i current = _mm_and_si128(mask, _src); |
165 | 0 | positive = _mm_add_epi64(positive, _mm_sad_epu8(_mm_subs_epu8(current, average), K_ZERO)); |
166 | 0 | negative = _mm_add_epi64(negative, _mm_sad_epu8(_mm_subs_epu8(average, current), K_ZERO)); |
167 | 0 | } Unexecuted instantiation: void Simd::Sse41::TextureGetDifferenceSum<true>(unsigned char const*, unsigned char const*, unsigned char const*, long long __vector(2)&, long long __vector(2)&, long long __vector(2) const&) Unexecuted instantiation: void Simd::Sse41::TextureGetDifferenceSum<false>(unsigned char const*, unsigned char const*, unsigned char const*, long long __vector(2)&, long long __vector(2)&, long long __vector(2) const&) |
168 | | |
169 | | template <bool align> void TextureGetDifferenceSum(const uint8_t* src, size_t srcStride, size_t width, size_t height, |
170 | | const uint8_t* lo, size_t loStride, const uint8_t* hi, size_t hiStride, int64_t* sum) |
171 | 0 | { |
172 | 0 | assert(width >= A && sum != NULL); |
173 | 0 | if (align) |
174 | 0 | { |
175 | 0 | assert(Aligned(src) && Aligned(srcStride) && Aligned(lo) && Aligned(loStride) && Aligned(hi) && Aligned(hiStride)); |
176 | 0 | } |
177 | |
|
178 | 0 | size_t alignedWidth = AlignLo(width, A); |
179 | 0 | __m128i tailMask = ShiftLeft(K_INV_ZERO, A - width + alignedWidth); |
180 | 0 | __m128i positive = _mm_setzero_si128(); |
181 | 0 | __m128i negative = _mm_setzero_si128(); |
182 | 0 | for (size_t row = 0; row < height; ++row) |
183 | 0 | { |
184 | 0 | for (size_t col = 0; col < alignedWidth; col += A) |
185 | 0 | TextureGetDifferenceSum<align>(src + col, lo + col, hi + col, positive, negative, K_INV_ZERO); |
186 | 0 | if (width != alignedWidth) |
187 | 0 | TextureGetDifferenceSum<false>(src + width - A, lo + width - A, hi + width - A, positive, negative, tailMask); |
188 | 0 | src += srcStride; |
189 | 0 | lo += loStride; |
190 | 0 | hi += hiStride; |
191 | 0 | } |
192 | 0 | *sum = ExtractInt64Sum(positive) - ExtractInt64Sum(negative); |
193 | 0 | } Unexecuted instantiation: void Simd::Sse41::TextureGetDifferenceSum<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, long*) Unexecuted instantiation: void Simd::Sse41::TextureGetDifferenceSum<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, long*) |
194 | | |
195 | | void TextureGetDifferenceSum(const uint8_t* src, size_t srcStride, size_t width, size_t height, |
196 | | const uint8_t* lo, size_t loStride, const uint8_t* hi, size_t hiStride, int64_t* sum) |
197 | 0 | { |
198 | 0 | if (Aligned(src) && Aligned(srcStride) && Aligned(lo) && Aligned(loStride) && Aligned(hi) && Aligned(hiStride)) |
199 | 0 | TextureGetDifferenceSum<true>(src, srcStride, width, height, lo, loStride, hi, hiStride, sum); |
200 | 0 | else |
201 | 0 | TextureGetDifferenceSum<false>(src, srcStride, width, height, lo, loStride, hi, hiStride, sum); |
202 | 0 | } |
203 | | |
204 | | //----------------------------------------------------------------------------------------- |
205 | | |
206 | | template <bool align> void TexturePerformCompensation(const uint8_t* src, size_t srcStride, size_t width, size_t height, |
207 | | int shift, uint8_t* dst, size_t dstStride) |
208 | 0 | { |
209 | 0 | assert(width >= A && shift > -0xFF && shift < 0xFF && shift != 0); |
210 | 0 | if (align) |
211 | 0 | { |
212 | 0 | assert(Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride)); |
213 | 0 | } |
214 | |
|
215 | 0 | size_t alignedWidth = AlignLo(width, A); |
216 | 0 | __m128i tailMask = src == dst ? ShiftLeft(K_INV_ZERO, A - width + alignedWidth) : K_INV_ZERO; |
217 | 0 | if (shift > 0) |
218 | 0 | { |
219 | 0 | __m128i _shift = _mm_set1_epi8((char)shift); |
220 | 0 | for (size_t row = 0; row < height; ++row) |
221 | 0 | { |
222 | 0 | for (size_t col = 0; col < alignedWidth; col += A) |
223 | 0 | { |
224 | 0 | const __m128i _src = Load<align>((__m128i*) (src + col)); |
225 | 0 | Store<align>((__m128i*) (dst + col), _mm_adds_epu8(_src, _shift)); |
226 | 0 | } |
227 | 0 | if (width != alignedWidth) |
228 | 0 | { |
229 | 0 | const __m128i _src = Load<false>((__m128i*) (src + width - A)); |
230 | 0 | Store<false>((__m128i*) (dst + width - A), _mm_adds_epu8(_src, _mm_and_si128(_shift, tailMask))); |
231 | 0 | } |
232 | 0 | src += srcStride; |
233 | 0 | dst += dstStride; |
234 | 0 | } |
235 | 0 | } |
236 | 0 | if (shift < 0) |
237 | 0 | { |
238 | 0 | __m128i _shift = _mm_set1_epi8((char)-shift); |
239 | 0 | for (size_t row = 0; row < height; ++row) |
240 | 0 | { |
241 | 0 | for (size_t col = 0; col < alignedWidth; col += A) |
242 | 0 | { |
243 | 0 | const __m128i _src = Load<align>((__m128i*) (src + col)); |
244 | 0 | Store<align>((__m128i*) (dst + col), _mm_subs_epu8(_src, _shift)); |
245 | 0 | } |
246 | 0 | if (width != alignedWidth) |
247 | 0 | { |
248 | 0 | const __m128i _src = Load<false>((__m128i*) (src + width - A)); |
249 | 0 | Store<false>((__m128i*) (dst + width - A), _mm_subs_epu8(_src, _mm_and_si128(_shift, tailMask))); |
250 | 0 | } |
251 | 0 | src += srcStride; |
252 | 0 | dst += dstStride; |
253 | 0 | } |
254 | 0 | } |
255 | 0 | } Unexecuted instantiation: void Simd::Sse41::TexturePerformCompensation<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, int, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Sse41::TexturePerformCompensation<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, int, unsigned char*, unsigned long) |
256 | | |
257 | | void TexturePerformCompensation(const uint8_t* src, size_t srcStride, size_t width, size_t height, |
258 | | int shift, uint8_t* dst, size_t dstStride) |
259 | 0 | { |
260 | 0 | if (shift == 0) |
261 | 0 | { |
262 | 0 | if (src != dst) |
263 | 0 | Base::Copy(src, srcStride, width, height, 1, dst, dstStride); |
264 | 0 | return; |
265 | 0 | } |
266 | 0 | if (Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride)) |
267 | 0 | TexturePerformCompensation<true>(src, srcStride, width, height, shift, dst, dstStride); |
268 | 0 | else |
269 | 0 | TexturePerformCompensation<false>(src, srcStride, width, height, shift, dst, dstStride); |
270 | 0 | } |
271 | | } |
272 | | #endif |
273 | | } |