/src/Simd/src/Simd/SimdAvx512bwReduceGray2x2.cpp
Line | Count | Source |
1 | | /* |
2 | | * Simd Library (http://ermig1979.github.io/Simd). |
3 | | * |
4 | | * Copyright (c) 2011-2018 Yermalayeu Ihar. |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | | * of this software and associated documentation files (the "Software"), to deal |
8 | | * in the Software without restriction, including without limitation the rights |
9 | | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | | * copies of the Software, and to permit persons to whom the Software is |
11 | | * furnished to do so, subject to the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be included in |
14 | | * all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
22 | | * SOFTWARE. |
23 | | */ |
24 | | #include "Simd/SimdMemory.h" |
25 | | #include "Simd/SimdStore.h" |
26 | | |
27 | | namespace Simd |
28 | | { |
29 | | #ifdef SIMD_AVX512BW_ENABLE |
30 | | namespace Avx512bw |
31 | | { |
32 | | SIMD_INLINE __m512i Reduce16(const __m512i & s0, const __m512i & s1) |
33 | | { |
34 | | return _mm512_srli_epi16(_mm512_add_epi16(_mm512_add_epi16(_mm512_maddubs_epi16(s0, K8_01), _mm512_maddubs_epi16(s1, K8_01)), K16_0002), 2); |
35 | | } |
36 | | |
37 | | SIMD_INLINE __m512i Reduce8(const __m512i & s00, const __m512i & s01, const __m512i & s10, const __m512i & s11) |
38 | | { |
39 | | return _mm512_permutexvar_epi64(K64_PERMUTE_FOR_PACK, _mm512_packus_epi16(Reduce16(s00, s10), Reduce16(s01, s11))); |
40 | | } |
41 | | |
42 | | template <bool align, bool mask> SIMD_INLINE void ReduceGray2x2(const uint8_t * src0, const uint8_t * src1, uint8_t * dst, const __mmask64 * tails) |
43 | 0 | { |
44 | 0 | const __m512i s00 = Load<align, mask>(src0 + 0, tails[0]); |
45 | 0 | const __m512i s01 = Load<align, mask>(src0 + A, tails[1]); |
46 | 0 | const __m512i s10 = Load<align, mask>(src1 + 0, tails[0]); |
47 | 0 | const __m512i s11 = Load<align, mask>(src1 + A, tails[1]); |
48 | 0 | Store<align, mask>(dst, Reduce8(s00, s01, s10, s11), tails[2]); |
49 | 0 | } Unexecuted instantiation: void Simd::Avx512bw::ReduceGray2x2<true, false>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*) Unexecuted instantiation: void Simd::Avx512bw::ReduceGray2x2<true, true>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*) Unexecuted instantiation: void Simd::Avx512bw::ReduceGray2x2<false, false>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*) Unexecuted instantiation: void Simd::Avx512bw::ReduceGray2x2<false, true>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*) |
50 | | |
51 | | template <bool align> void ReduceGray2x2( |
52 | | const uint8_t * src, size_t srcWidth, size_t srcHeight, size_t srcStride, |
53 | | uint8_t * dst, size_t dstWidth, size_t dstHeight, size_t dstStride) |
54 | 0 | { |
55 | 0 | assert((srcWidth + 1) / 2 == dstWidth && (srcHeight + 1) / 2 == dstHeight); |
56 | 0 | if (align) |
57 | 0 | { |
58 | 0 | assert(Aligned(src) && Aligned(srcStride)); |
59 | 0 | assert(Aligned(dst) && Aligned(dstStride) && Aligned(dstWidth)); |
60 | 0 | } |
61 | |
|
62 | 0 | size_t alignedWidth = AlignLo(srcWidth, DA); |
63 | 0 | __mmask64 tailMasks[3]; |
64 | 0 | for (size_t c = 0; c < 2; ++c) |
65 | 0 | tailMasks[c] = TailMask64(srcWidth - alignedWidth - A*c); |
66 | 0 | tailMasks[2] = TailMask64((srcWidth - alignedWidth) / 2); |
67 | 0 | size_t evenWidth = AlignLo(srcWidth, 2); |
68 | |
|
69 | 0 | for (size_t srcRow = 0; srcRow < srcHeight; srcRow += 2) |
70 | 0 | { |
71 | 0 | const uint8_t *src0 = src; |
72 | 0 | const uint8_t *src1 = (srcRow == srcHeight - 1 ? src : src + srcStride); |
73 | 0 | size_t srcOffset = 0, dstOffset = 0; |
74 | 0 | for (; srcOffset < alignedWidth; srcOffset += DA, dstOffset += A) |
75 | 0 | ReduceGray2x2<align, false>(src0 + srcOffset, src1 + srcOffset, dst + dstOffset, tailMasks); |
76 | 0 | if (srcOffset < srcWidth) |
77 | 0 | { |
78 | 0 | ReduceGray2x2<align, true>(src0 + srcOffset, src1 + srcOffset, dst + dstOffset, tailMasks); |
79 | 0 | if (evenWidth != srcWidth) |
80 | 0 | dst[dstWidth - 1] = Base::Average(src0[evenWidth], src1[evenWidth]); |
81 | 0 | } |
82 | 0 | src += 2 * srcStride; |
83 | 0 | dst += dstStride; |
84 | 0 | } |
85 | 0 | } Unexecuted instantiation: void Simd::Avx512bw::ReduceGray2x2<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: void Simd::Avx512bw::ReduceGray2x2<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long, unsigned long, unsigned long) |
86 | | |
87 | | void ReduceGray2x2(const uint8_t * src, size_t srcWidth, size_t srcHeight, size_t srcStride, |
88 | | uint8_t * dst, size_t dstWidth, size_t dstHeight, size_t dstStride) |
89 | 0 | { |
90 | 0 | if (Aligned(src) && Aligned(srcWidth) && Aligned(srcStride) && Aligned(dst) && Aligned(dstWidth) && Aligned(dstStride)) |
91 | 0 | ReduceGray2x2<true>(src, srcWidth, srcHeight, srcStride, dst, dstWidth, dstHeight, dstStride); |
92 | 0 | else |
93 | 0 | ReduceGray2x2<false>(src, srcWidth, srcHeight, srcStride, dst, dstWidth, dstHeight, dstStride); |
94 | 0 | } |
95 | | } |
96 | | #endif// SIMD_AVX512BW_ENABLE |
97 | | } |