/src/Simd/src/Simd/SimdAvx2Reduce.cpp
Line | Count | Source |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2020 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #include "Simd/SimdMemory.h" |
25 | | #include "Simd/SimdStore.h" |
26 | | |
27 | | namespace Simd |
28 | | { |
29 | | #ifdef SIMD_AVX2_ENABLE |
30 | | namespace Avx2 |
31 | | { |
32 | | #ifdef SIMD_MADDUBS_ERROR |
33 | | SIMD_INLINE __m256i Average8(const __m256i & s00, const __m256i & s01, const __m256i & s10, const __m256i & s11) |
34 | | { |
35 | | __m256i lo = Average16( |
36 | | _mm256_and_si256(s00, K16_00FF), |
37 | | _mm256_and_si256(_mm256_srli_si256(s00, 1), K16_00FF), |
38 | | _mm256_and_si256(s10, K16_00FF), |
39 | | _mm256_and_si256(_mm256_srli_si256(s10, 1), K16_00FF)); |
40 | | __m256i hi = Average16( |
41 | | _mm256_and_si256(s01, K16_00FF), |
42 | | _mm256_and_si256(_mm256_srli_si256(s01, 1), K16_00FF), |
43 | | _mm256_and_si256(s11, K16_00FF), |
44 | | _mm256_and_si256(_mm256_srli_si256(s11, 1), K16_00FF)); |
45 | | return PackI16ToU8(lo, hi); |
46 | | } |
47 | | #else |
48 | | SIMD_INLINE __m256i Average16(const __m256i & s0, const __m256i & s1) |
49 | | { |
50 | | return _mm256_srli_epi16(_mm256_add_epi16(_mm256_add_epi16(_mm256_maddubs_epi16(s0, K8_01), _mm256_maddubs_epi16(s1, K8_01)), K16_0002), 2); |
51 | | } |
52 | | |
53 | | SIMD_INLINE __m256i Average8(const __m256i & s00, const __m256i & s01, const __m256i & s10, const __m256i & s11) |
54 | 0 | { |
55 | 0 | return PackI16ToU8(Average16(s00, s10), Average16(s01, s11)); |
56 | 0 | } |
57 | | #endif |
58 | | |
59 | | template <size_t channelCount> __m256i Average8(const __m256i & s00, const __m256i & s01, const __m256i & s10, const __m256i & s11); |
60 | | |
61 | | template<> SIMD_INLINE __m256i Average8<1>(const __m256i & s00, const __m256i & s01, const __m256i & s10, const __m256i & s11) |
62 | 0 | { |
63 | 0 | return Average8(s00, s01, s10, s11); |
64 | 0 | } |
65 | | |
66 | | const __m256i K8_RC2 = SIMD_MM256_SETR_EPI8( |
67 | | 0x0, 0x2, 0x1, 0x3, 0x4, 0x6, 0x5, 0x7, 0x8, 0xA, 0x9, 0xB, 0xC, 0xE, 0xD, 0xF, |
68 | | 0x0, 0x2, 0x1, 0x3, 0x4, 0x6, 0x5, 0x7, 0x8, 0xA, 0x9, 0xB, 0xC, 0xE, 0xD, 0xF); |
69 | | |
70 | | template<> SIMD_INLINE __m256i Average8<2>(const __m256i & s00, const __m256i & s01, const __m256i & s10, const __m256i & s11) |
71 | 0 | { |
72 | 0 | return Average8(_mm256_shuffle_epi8(s00, K8_RC2), _mm256_shuffle_epi8(s01, K8_RC2), _mm256_shuffle_epi8(s10, K8_RC2), _mm256_shuffle_epi8(s11, K8_RC2)); |
73 | 0 | } |
74 | | |
75 | | const __m256i K8_RC4 = SIMD_MM256_SETR_EPI8( |
76 | | 0x0, 0x4, 0x1, 0x5, 0x2, 0x6, 0x3, 0x7, 0x8, 0xC, 0x9, 0xD, 0xA, 0xE, 0xB, 0xF, |
77 | | 0x0, 0x4, 0x1, 0x5, 0x2, 0x6, 0x3, 0x7, 0x8, 0xC, 0x9, 0xD, 0xA, 0xE, 0xB, 0xF); |
78 | | |
79 | | template<> SIMD_INLINE __m256i Average8<4>(const __m256i & s00, const __m256i & s01, const __m256i & s10, const __m256i & s11) |
80 | 0 | { |
81 | 0 | return Average8(_mm256_shuffle_epi8(s00, K8_RC4), _mm256_shuffle_epi8(s01, K8_RC4), _mm256_shuffle_epi8(s10, K8_RC4), _mm256_shuffle_epi8(s11, K8_RC4)); |
82 | 0 | } |
83 | | |
84 | | template <size_t channelCount, bool align> SIMD_INLINE void ReduceColor2x2(const uint8_t * src0, const uint8_t * src1, uint8_t * dst) |
85 | 0 | { |
86 | 0 | __m256i s00 = Load<align>((__m256i*)src0 + 0); |
87 | 0 | __m256i s01 = Load<align>((__m256i*)src0 + 1); |
88 | 0 | __m256i s10 = Load<align>((__m256i*)src1 + 0); |
89 | 0 | __m256i s11 = Load<align>((__m256i*)src1 + 1); |
90 | 0 | Store<align>((__m256i*)dst, Average8<channelCount>(s00, s01, s10, s11)); |
91 | 0 | } Unexecuted instantiation: void Simd::Avx2::ReduceColor2x2<1ul, true>(unsigned char const*, unsigned char const*, unsigned char*) Unexecuted instantiation: void Simd::Avx2::ReduceColor2x2<1ul, false>(unsigned char const*, unsigned char const*, unsigned char*) Unexecuted instantiation: void Simd::Avx2::ReduceColor2x2<2ul, true>(unsigned char const*, unsigned char const*, unsigned char*) Unexecuted instantiation: void Simd::Avx2::ReduceColor2x2<2ul, false>(unsigned char const*, unsigned char const*, unsigned char*) Unexecuted instantiation: void Simd::Avx2::ReduceColor2x2<4ul, true>(unsigned char const*, unsigned char const*, unsigned char*) Unexecuted instantiation: void Simd::Avx2::ReduceColor2x2<4ul, false>(unsigned char const*, unsigned char const*, unsigned char*) |
92 | | |
93 | | template <size_t channelCount, bool align> void ReduceColor2x2(const uint8_t * src, size_t srcWidth, size_t srcHeight, size_t srcStride, uint8_t * dst, size_t dstStride) |
94 | 0 | { |
95 | 0 | size_t evenWidth = AlignLo(srcWidth, 2); |
96 | 0 | size_t evenSize = evenWidth * channelCount; |
97 | 0 | size_t alignedSize = AlignLo(evenSize, DA); |
98 | 0 | for (size_t srcRow = 0; srcRow < srcHeight; srcRow += 2) |
99 | 0 | { |
100 | 0 | const uint8_t *src0 = src; |
101 | 0 | const uint8_t *src1 = (srcRow == srcHeight - 1 ? src : src + srcStride); |
102 | 0 | size_t srcOffset = 0, dstOffset = 0; |
103 | 0 | for (; srcOffset < alignedSize; srcOffset += DA, dstOffset += A) |
104 | 0 | ReduceColor2x2<channelCount, align>(src0 + srcOffset, src1 + srcOffset, dst + dstOffset); |
105 | 0 | if (alignedSize != evenSize) |
106 | 0 | { |
107 | 0 | srcOffset = evenSize - DA; |
108 | 0 | dstOffset = srcOffset / 2; |
109 | 0 | ReduceColor2x2<channelCount, false>(src0 + srcOffset, src1 + srcOffset, dst + dstOffset); |
110 | 0 | } |
111 | 0 | if (evenWidth != srcWidth) |
112 | 0 | { |
113 | 0 | for (size_t c = 0; c < channelCount; ++c) |
114 | 0 | dst[evenSize / 2 + c] = Base::Average(src0[evenSize + c], src1[evenSize + c]); |
115 | 0 | } |
116 | 0 | src += 2 * srcStride; |
117 | 0 | dst += dstStride; |
118 | 0 | } |
119 | 0 | } Unexecuted instantiation: void Simd::Avx2::ReduceColor2x2<1ul, true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Avx2::ReduceColor2x2<2ul, true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Avx2::ReduceColor2x2<4ul, true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Avx2::ReduceColor2x2<1ul, false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Avx2::ReduceColor2x2<2ul, false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Avx2::ReduceColor2x2<4ul, false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) |
120 | | |
121 | | const __m256i K8_BGR0 = SIMD_MM256_SETR_EPI8( |
122 | | 0x0, 0x3, 0x1, 0x4, 0x2, 0x5, 0x6, 0x9, 0x7, 0xA, 0x8, 0xB, 0xC, 0xF, 0xD, -1, |
123 | | -1, 0x1, 0x2, 0x5, 0x3, 0x6, 0x4, 0x7, 0x8, 0xB, 0x9, 0xC, 0xA, 0xD, 0xE, -1); |
124 | | const __m256i K8_BGR1 = SIMD_MM256_SETR_EPI8( |
125 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
126 | | 0xE, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); |
127 | | const __m256i K8_BGR2 = SIMD_MM256_SETR_EPI8( |
128 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, |
129 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x1); |
130 | | const __m256i K8_BGR3 = SIMD_MM256_SETR_EPI8( |
131 | | -1, 0x2, 0x0, 0x3, 0x4, 0x7, 0x5, 0x8, 0x6, 0x9, 0xA, 0xD, 0xB, 0xE, 0xC, 0xF, |
132 | | 0x0, 0x3, 0x1, 0x4, 0x2, 0x5, 0x6, 0x9, 0x7, 0xA, 0x8, 0xB, 0xC, 0xF, 0xD, -1); |
133 | | const __m256i K8_BGR4 = SIMD_MM256_SETR_EPI8( |
134 | | 0xF, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
135 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); |
136 | | const __m256i K8_BGR5 = SIMD_MM256_SETR_EPI8( |
137 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
138 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0); |
139 | | const __m256i K8_BGR6 = SIMD_MM256_SETR_EPI8( |
140 | | -1, 0x1, 0x2, 0x5, 0x3, 0x6, 0x4, 0x7, 0x8, 0xB, 0x9, 0xC, 0xA, 0xD, 0xE, -1, |
141 | | -1, 0x2, 0x0, 0x3, 0x4, 0x7, 0x5, 0x8, 0x6, 0x9, 0xA, 0xD, 0xB, 0xE, 0xC, 0xF); |
142 | | const __m256i K8_BGR7 = SIMD_MM256_SETR_EPI8( |
143 | | 0xE, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
144 | | 0xF, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); |
145 | | const __m256i K8_BGR8 = SIMD_MM256_SETR_EPI8( |
146 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x1, |
147 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); |
148 | | |
149 | | template <bool align> SIMD_INLINE void ReduceBgr2x2(const uint8_t * src0, const uint8_t * src1, uint8_t * dst) |
150 | 0 | { |
151 | 0 | __m256i s001 = Load<align>((__m256i*)src0 + 0); |
152 | 0 | __m256i s023 = Load<align>((__m256i*)src0 + 1); |
153 | 0 | __m256i s045 = Load<align>((__m256i*)src0 + 2); |
154 | 0 | __m256i s101 = Load<align>((__m256i*)src1 + 0); |
155 | 0 | __m256i s123 = Load<align>((__m256i*)src1 + 1); |
156 | 0 | __m256i s145 = Load<align>((__m256i*)src1 + 2); |
157 | 0 | __m256i s000 = _mm256_permute2x128_si256(s001, s001, 0x00); |
158 | 0 | __m256i s100 = _mm256_permute2x128_si256(s101, s101, 0x00); |
159 | 0 | __m256i s012 = _mm256_permute2x128_si256(s001, s023, 0x21); |
160 | 0 | __m256i s112 = _mm256_permute2x128_si256(s101, s123, 0x21); |
161 | 0 | __m256i s034 = _mm256_permute2x128_si256(s023, s045, 0x21); |
162 | 0 | __m256i s134 = _mm256_permute2x128_si256(s123, s145, 0x21); |
163 | 0 | __m256i m00 = _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s001, K8_BGR0), _mm256_shuffle_epi8(s000, K8_BGR1)), _mm256_shuffle_epi8(s012, K8_BGR2)); |
164 | 0 | __m256i m01 = _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s023, K8_BGR3), _mm256_shuffle_epi8(s012, K8_BGR4)), _mm256_shuffle_epi8(s034, K8_BGR5)); |
165 | 0 | __m256i m10 = _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s101, K8_BGR0), _mm256_shuffle_epi8(s100, K8_BGR1)), _mm256_shuffle_epi8(s112, K8_BGR2)); |
166 | 0 | __m256i m11 = _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s123, K8_BGR3), _mm256_shuffle_epi8(s112, K8_BGR4)), _mm256_shuffle_epi8(s134, K8_BGR5)); |
167 | 0 | Store<align>((__m256i*)dst + 0, Average8(m00, m01, m10, m11)); |
168 | 0 | __m256i s067 = Load<align>((__m256i*)src0 + 3); |
169 | 0 | __m256i s089 = Load<align>((__m256i*)src0 + 4); |
170 | 0 | __m256i s167 = Load<align>((__m256i*)src1 + 3); |
171 | 0 | __m256i s189 = Load<align>((__m256i*)src1 + 4); |
172 | 0 | __m256i s056 = _mm256_permute2x128_si256(s045, s067, 0x21); |
173 | 0 | __m256i s156 = _mm256_permute2x128_si256(s145, s167, 0x21); |
174 | 0 | __m256i s078 = _mm256_permute2x128_si256(s067, s089, 0x21); |
175 | 0 | __m256i s178 = _mm256_permute2x128_si256(s167, s189, 0x21); |
176 | 0 | __m256i m02 = _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s045, K8_BGR6), _mm256_shuffle_epi8(s034, K8_BGR7)), _mm256_shuffle_epi8(s056, K8_BGR8)); |
177 | 0 | __m256i m03 = _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s067, K8_BGR0), _mm256_shuffle_epi8(s056, K8_BGR1)), _mm256_shuffle_epi8(s078, K8_BGR2)); |
178 | 0 | __m256i m12 = _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s145, K8_BGR6), _mm256_shuffle_epi8(s134, K8_BGR7)), _mm256_shuffle_epi8(s156, K8_BGR8)); |
179 | 0 | __m256i m13 = _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s167, K8_BGR0), _mm256_shuffle_epi8(s156, K8_BGR1)), _mm256_shuffle_epi8(s178, K8_BGR2)); |
180 | 0 | Store<align>((__m256i*)dst + 1, Average8(m02, m03, m12, m13)); |
181 | 0 | __m256i s0ab = Load<align>((__m256i*)src0 + 5); |
182 | 0 | __m256i s1ab = Load<align>((__m256i*)src1 + 5); |
183 | 0 | __m256i s09a = _mm256_permute2x128_si256(s089, s0ab, 0x21); |
184 | 0 | __m256i s19a = _mm256_permute2x128_si256(s189, s1ab, 0x21); |
185 | 0 | __m256i s0bb = _mm256_permute2x128_si256(s0ab, s0ab, 0x33); |
186 | 0 | __m256i s1bb = _mm256_permute2x128_si256(s1ab, s1ab, 0x33); |
187 | 0 | __m256i m04 = _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s089, K8_BGR3), _mm256_shuffle_epi8(s078, K8_BGR4)), _mm256_shuffle_epi8(s09a, K8_BGR5)); |
188 | 0 | __m256i m05 = _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s0ab, K8_BGR6), _mm256_shuffle_epi8(s09a, K8_BGR7)), _mm256_shuffle_epi8(s0bb, K8_BGR8)); |
189 | 0 | __m256i m14 = _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s189, K8_BGR3), _mm256_shuffle_epi8(s178, K8_BGR4)), _mm256_shuffle_epi8(s19a, K8_BGR5)); |
190 | 0 | __m256i m15 = _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s1ab, K8_BGR6), _mm256_shuffle_epi8(s19a, K8_BGR7)), _mm256_shuffle_epi8(s1bb, K8_BGR8)); |
191 | 0 | Store<align>((__m256i*)dst + 2, Average8(m04, m05, m14, m15)); |
192 | 0 | } Unexecuted instantiation: void Simd::Avx2::ReduceBgr2x2<true>(unsigned char const*, unsigned char const*, unsigned char*) Unexecuted instantiation: void Simd::Avx2::ReduceBgr2x2<false>(unsigned char const*, unsigned char const*, unsigned char*) |
193 | | |
194 | | template <bool align> void ReduceBgr2x2(const uint8_t * src, size_t srcWidth, size_t srcHeight, size_t srcStride, uint8_t * dst, size_t dstStride) |
195 | 0 | { |
196 | 0 | size_t evenWidth = AlignLo(srcWidth, 2); |
197 | 0 | size_t alignedWidth = AlignLo(srcWidth, DA); |
198 | 0 | size_t evenSize = evenWidth * 3; |
199 | 0 | size_t alignedSize = alignedWidth * 3; |
200 | 0 | size_t srcStep = DA * 3, dstStep = A * 3; |
201 | 0 | for (size_t srcRow = 0; srcRow < srcHeight; srcRow += 2) |
202 | 0 | { |
203 | 0 | const uint8_t *src0 = src; |
204 | 0 | const uint8_t *src1 = (srcRow == srcHeight - 1 ? src : src + srcStride); |
205 | 0 | size_t srcOffset = 0, dstOffset = 0; |
206 | 0 | for (; srcOffset < alignedSize; srcOffset += srcStep, dstOffset += dstStep) |
207 | 0 | ReduceBgr2x2<align>(src0 + srcOffset, src1 + srcOffset, dst + dstOffset); |
208 | 0 | if (alignedSize != evenSize) |
209 | 0 | { |
210 | 0 | srcOffset = evenSize - srcStep; |
211 | 0 | dstOffset = srcOffset / 2; |
212 | 0 | ReduceBgr2x2<false>(src0 + srcOffset, src1 + srcOffset, dst + dstOffset); |
213 | 0 | } |
214 | 0 | if (evenWidth != srcWidth) |
215 | 0 | { |
216 | 0 | for (size_t c = 0; c < 3; ++c) |
217 | 0 | dst[evenSize / 2 + c] = Base::Average(src0[evenSize + c], src1[evenSize + c]); |
218 | 0 | } |
219 | 0 | src += 2 * srcStride; |
220 | 0 | dst += dstStride; |
221 | 0 | } |
222 | 0 | } Unexecuted instantiation: void Simd::Avx2::ReduceBgr2x2<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) Unexecuted instantiation: void Simd::Avx2::ReduceBgr2x2<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long) |
223 | | |
224 | | template <bool align> void ReduceColor2x2(const uint8_t * src, size_t srcWidth, size_t srcHeight, size_t srcStride, |
225 | | uint8_t * dst, size_t dstWidth, size_t dstHeight, size_t dstStride, size_t channelCount) |
226 | 0 | { |
227 | 0 | assert((srcWidth + 1) / 2 == dstWidth && (srcHeight + 1) / 2 == dstHeight && srcWidth >= DA); |
228 | 0 | if (align) |
229 | 0 | { |
230 | 0 | assert(Aligned(src) && Aligned(srcStride)); |
231 | 0 | assert(Aligned(dst) && Aligned(dstStride)); |
232 | 0 | } |
233 | |
|
234 | 0 | switch (channelCount) |
235 | 0 | { |
236 | 0 | case 1: ReduceColor2x2<1, align>(src, srcWidth, srcHeight, srcStride, dst, dstStride); break; |
237 | 0 | case 2: ReduceColor2x2<2, align>(src, srcWidth, srcHeight, srcStride, dst, dstStride); break; |
238 | 0 | case 3: ReduceBgr2x2<align>(src, srcWidth, srcHeight, srcStride, dst, dstStride); break; |
239 | 0 | case 4: ReduceColor2x2<4, align>(src, srcWidth, srcHeight, srcStride, dst, dstStride); break; |
240 | 0 | default: assert(0); |
241 | 0 | } |
242 | 0 | } Unexecuted instantiation: void Simd::Avx2::ReduceColor2x2<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: void Simd::Avx2::ReduceColor2x2<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long, unsigned long, unsigned long, unsigned long) |
243 | | |
244 | | void ReduceColor2x2(const uint8_t * src, size_t srcWidth, size_t srcHeight, size_t srcStride, |
245 | | uint8_t * dst, size_t dstWidth, size_t dstHeight, size_t dstStride, size_t channelCount) |
246 | 0 | { |
247 | 0 | if (Aligned(src) && Aligned(srcStride) && Aligned(dst) && Aligned(dstStride)) |
248 | 0 | ReduceColor2x2<true>(src, srcWidth, srcHeight, srcStride, dst, dstWidth, dstHeight, dstStride, channelCount); |
249 | 0 | else |
250 | 0 | ReduceColor2x2<false>(src, srcWidth, srcHeight, srcStride, dst, dstWidth, dstHeight, dstStride, channelCount); |
251 | 0 | } |
252 | | } |
253 | | #endif// SIMD_AVX2_ENABLE |
254 | | } |