Coverage Report

Created: 2026-02-14 07:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/Simd/src/Simd/SimdAvx512bwReduceGray2x2.cpp
Line
Count
Source
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2018 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdMemory.h"
25
#include "Simd/SimdStore.h"
26
27
namespace Simd
28
{
29
#ifdef SIMD_AVX512BW_ENABLE    
30
    namespace Avx512bw
31
    {
32
        SIMD_INLINE __m512i Reduce16(const __m512i & s0, const __m512i & s1)
33
        {
34
            return _mm512_srli_epi16(_mm512_add_epi16(_mm512_add_epi16(_mm512_maddubs_epi16(s0, K8_01), _mm512_maddubs_epi16(s1, K8_01)), K16_0002), 2);
35
        }
36
37
        SIMD_INLINE __m512i Reduce8(const __m512i & s00, const __m512i & s01, const __m512i & s10, const __m512i & s11)
38
        {
39
            return _mm512_permutexvar_epi64(K64_PERMUTE_FOR_PACK, _mm512_packus_epi16(Reduce16(s00, s10), Reduce16(s01, s11)));
40
        }
41
42
        template <bool align, bool mask> SIMD_INLINE void ReduceGray2x2(const uint8_t * src0, const uint8_t * src1, uint8_t * dst, const __mmask64 * tails)
43
0
        {
44
0
            const __m512i s00 = Load<align, mask>(src0 + 0, tails[0]);
45
0
            const __m512i s01 = Load<align, mask>(src0 + A, tails[1]);
46
0
            const __m512i s10 = Load<align, mask>(src1 + 0, tails[0]);
47
0
            const __m512i s11 = Load<align, mask>(src1 + A, tails[1]);
48
0
            Store<align, mask>(dst, Reduce8(s00, s01, s10, s11), tails[2]);
49
0
        }
Unexecuted instantiation: void Simd::Avx512bw::ReduceGray2x2<true, false>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*)
Unexecuted instantiation: void Simd::Avx512bw::ReduceGray2x2<true, true>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*)
Unexecuted instantiation: void Simd::Avx512bw::ReduceGray2x2<false, false>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*)
Unexecuted instantiation: void Simd::Avx512bw::ReduceGray2x2<false, true>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long long const*)
50
51
        template <bool align> void ReduceGray2x2(
52
            const uint8_t * src, size_t srcWidth, size_t srcHeight, size_t srcStride,
53
            uint8_t * dst, size_t dstWidth, size_t dstHeight, size_t dstStride)
54
0
        {
55
0
            assert((srcWidth + 1) / 2 == dstWidth && (srcHeight + 1) / 2 == dstHeight);
56
0
            if (align)
57
0
            {
58
0
                assert(Aligned(src) && Aligned(srcStride));
59
0
                assert(Aligned(dst) && Aligned(dstStride) && Aligned(dstWidth));
60
0
            }
61
62
0
            size_t alignedWidth = AlignLo(srcWidth, DA);
63
0
            __mmask64 tailMasks[3];
64
0
            for (size_t c = 0; c < 2; ++c)
65
0
                tailMasks[c] = TailMask64(srcWidth - alignedWidth - A*c);
66
0
            tailMasks[2] = TailMask64((srcWidth - alignedWidth) / 2);
67
0
            size_t evenWidth = AlignLo(srcWidth, 2);
68
69
0
            for (size_t srcRow = 0; srcRow < srcHeight; srcRow += 2)
70
0
            {
71
0
                const uint8_t *src0 = src;
72
0
                const uint8_t *src1 = (srcRow == srcHeight - 1 ? src : src + srcStride);
73
0
                size_t srcOffset = 0, dstOffset = 0;
74
0
                for (; srcOffset < alignedWidth; srcOffset += DA, dstOffset += A)
75
0
                    ReduceGray2x2<align, false>(src0 + srcOffset, src1 + srcOffset, dst + dstOffset, tailMasks);
76
0
                if (srcOffset < srcWidth)
77
0
                {
78
0
                    ReduceGray2x2<align, true>(src0 + srcOffset, src1 + srcOffset, dst + dstOffset, tailMasks);
79
0
                    if (evenWidth != srcWidth)
80
0
                        dst[dstWidth - 1] = Base::Average(src0[evenWidth], src1[evenWidth]);
81
0
                }
82
0
                src += 2 * srcStride;
83
0
                dst += dstStride;
84
0
            }
85
0
        }
Unexecuted instantiation: void Simd::Avx512bw::ReduceGray2x2<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: void Simd::Avx512bw::ReduceGray2x2<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long, unsigned long, unsigned long)
86
87
        void ReduceGray2x2(const uint8_t * src, size_t srcWidth, size_t srcHeight, size_t srcStride,
88
            uint8_t * dst, size_t dstWidth, size_t dstHeight, size_t dstStride)
89
0
        {
90
0
            if (Aligned(src) && Aligned(srcWidth) && Aligned(srcStride) && Aligned(dst) && Aligned(dstWidth) && Aligned(dstStride))
91
0
                ReduceGray2x2<true>(src, srcWidth, srcHeight, srcStride, dst, dstWidth, dstHeight, dstStride);
92
0
            else
93
0
                ReduceGray2x2<false>(src, srcWidth, srcHeight, srcStride, dst, dstWidth, dstHeight, dstStride);
94
0
        }
95
    }
96
#endif// SIMD_AVX512BW_ENABLE
97
}