Coverage Report

Created: 2024-10-01 06:54

/src/Simd/src/Simd/SimdAvx2YuvToHue.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2017 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdStore.h"
25
#include "Simd/SimdMemory.h"
26
#include "Simd/SimdConversion.h"
27
28
namespace Simd
29
{
30
#ifdef SIMD_AVX2_ENABLE    
31
    namespace Avx2
32
    {
33
        SIMD_INLINE __m256i MulDiv32(__m256i dividend, __m256i divisor, const __m256 & KF_255_DIV_6)
34
0
        {
35
0
            return _mm256_cvttps_epi32(_mm256_div_ps(_mm256_mul_ps(KF_255_DIV_6, _mm256_cvtepi32_ps(dividend)), _mm256_cvtepi32_ps(divisor)));
36
0
        }
37
38
        SIMD_INLINE __m256i MulDiv16(__m256i dividend, __m256i divisor, const __m256 & KF_255_DIV_6)
39
0
        {
40
0
            const __m256i quotientLo = MulDiv32(_mm256_unpacklo_epi16(dividend, K_ZERO), _mm256_unpacklo_epi16(divisor, K_ZERO), KF_255_DIV_6);
41
0
            const __m256i quotientHi = MulDiv32(_mm256_unpackhi_epi16(dividend, K_ZERO), _mm256_unpackhi_epi16(divisor, K_ZERO), KF_255_DIV_6);
42
0
            return _mm256_packs_epi32(quotientLo, quotientHi);
43
0
        }
44
45
        SIMD_INLINE __m256i AdjustedYuvToHue16(__m256i y, __m256i u, __m256i v, const __m256 & KF_255_DIV_6)
46
0
        {
47
0
            const __m256i red = AdjustedYuvToRed16(y, v);
48
0
            const __m256i green = AdjustedYuvToGreen16(y, u, v);
49
0
            const __m256i blue = AdjustedYuvToBlue16(y, u);
50
0
            const __m256i max = MaxI16(red, green, blue);
51
0
            const __m256i range = _mm256_subs_epi16(max, MinI16(red, green, blue));
52
53
0
            const __m256i redMaxMask = _mm256_cmpeq_epi16(red, max);
54
0
            const __m256i greenMaxMask = _mm256_andnot_si256(redMaxMask, _mm256_cmpeq_epi16(green, max));
55
0
            const __m256i blueMaxMask = _mm256_andnot_si256(redMaxMask, _mm256_andnot_si256(greenMaxMask, K_INV_ZERO));
56
57
0
            const __m256i redMaxCase = _mm256_and_si256(redMaxMask,
58
0
                _mm256_add_epi16(_mm256_sub_epi16(green, blue), _mm256_mullo_epi16(range, K16_0006)));
59
0
            const __m256i greenMaxCase = _mm256_and_si256(greenMaxMask,
60
0
                _mm256_add_epi16(_mm256_sub_epi16(blue, red), _mm256_mullo_epi16(range, K16_0002)));
61
0
            const __m256i blueMaxCase = _mm256_and_si256(blueMaxMask,
62
0
                _mm256_add_epi16(_mm256_sub_epi16(red, green), _mm256_mullo_epi16(range, K16_0004)));
63
64
0
            const __m256i dividend = _mm256_or_si256(_mm256_or_si256(redMaxCase, greenMaxCase), blueMaxCase);
65
66
0
            return _mm256_andnot_si256(_mm256_cmpeq_epi16(range, K_ZERO), _mm256_and_si256(MulDiv16(dividend, range, KF_255_DIV_6), K16_00FF));
67
0
        }
68
69
        SIMD_INLINE __m256i YuvToHue16(__m256i y, __m256i u, __m256i v, const __m256 & KF_255_DIV_6)
70
0
        {
71
0
            return AdjustedYuvToHue16(AdjustY16(y), AdjustUV16(u), AdjustUV16(v), KF_255_DIV_6);
72
0
        }
73
74
        SIMD_INLINE __m256i YuvToHue8(__m256i y, __m256i u, __m256i v, const __m256 & KF_255_DIV_6)
75
0
        {
76
0
            return _mm256_packus_epi16(
77
0
                YuvToHue16(_mm256_unpacklo_epi8(y, K_ZERO), _mm256_unpacklo_epi8(u, K_ZERO), _mm256_unpacklo_epi8(v, K_ZERO), KF_255_DIV_6),
78
0
                YuvToHue16(_mm256_unpackhi_epi8(y, K_ZERO), _mm256_unpackhi_epi8(u, K_ZERO), _mm256_unpackhi_epi8(v, K_ZERO), KF_255_DIV_6));
79
0
        }
80
81
        template <bool align> SIMD_INLINE void Yuv420pToHue(const uint8_t * y, __m256i u, __m256i v, uint8_t * hue, const __m256 & KF_255_DIV_6)
82
0
        {
83
0
            Store<align>((__m256i*)(hue), YuvToHue8(Load<align>((__m256i*)(y)),
84
0
                _mm256_unpacklo_epi8(u, u), _mm256_unpacklo_epi8(v, v), KF_255_DIV_6));
85
0
            Store<align>((__m256i*)(hue + A), YuvToHue8(Load<align>((__m256i*)(y + A)),
86
0
                _mm256_unpackhi_epi8(u, u), _mm256_unpackhi_epi8(v, v), KF_255_DIV_6));
87
0
        }
Unexecuted instantiation: void Simd::Avx2::Yuv420pToHue<true>(unsigned char const*, long long __vector(4), long long __vector(4), unsigned char*, float __vector(8) const&)
Unexecuted instantiation: void Simd::Avx2::Yuv420pToHue<false>(unsigned char const*, long long __vector(4), long long __vector(4), unsigned char*, float __vector(8) const&)
88
89
        template <bool align> void Yuv420pToHue(const uint8_t * y, size_t yStride, const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride,
90
            size_t width, size_t height, uint8_t * hue, size_t hueStride)
91
0
        {
92
0
            assert((width % 2 == 0) && (height % 2 == 0) && (width >= DA) && (height >= 2));
93
0
            if (align)
94
0
            {
95
0
                assert(Aligned(y) && Aligned(yStride) && Aligned(u) && Aligned(uStride));
96
0
                assert(Aligned(v) && Aligned(vStride) && Aligned(hue) && Aligned(hueStride));
97
0
            }
98
99
0
            const __m256 KF_255_DIV_6 = _mm256_set1_ps(Base::KF_255_DIV_6);
100
101
0
            size_t bodyWidth = AlignLo(width, DA);
102
0
            size_t tail = width - bodyWidth;
103
0
            for (size_t row = 0; row < height; row += 2)
104
0
            {
105
0
                for (size_t colUV = 0, colY = 0, col_hue = 0; colY < bodyWidth; colY += DA, colUV += A, col_hue += DA)
106
0
                {
107
0
                    __m256i u_ = LoadPermuted<align>((__m256i*)(u + colUV));
108
0
                    __m256i v_ = LoadPermuted<align>((__m256i*)(v + colUV));
109
0
                    Yuv420pToHue<align>(y + colY, u_, v_, hue + col_hue, KF_255_DIV_6);
110
0
                    Yuv420pToHue<align>(y + yStride + colY, u_, v_, hue + hueStride + col_hue, KF_255_DIV_6);
111
0
                }
112
0
                if (tail)
113
0
                {
114
0
                    size_t offset = width - DA;
115
0
                    __m256i u_ = LoadPermuted<false>((__m256i*)(u + offset / 2));
116
0
                    __m256i v_ = LoadPermuted<false>((__m256i*)(v + offset / 2));
117
0
                    Yuv420pToHue<false>(y + offset, u_, v_, hue + offset, KF_255_DIV_6);
118
0
                    Yuv420pToHue<false>(y + yStride + offset, u_, v_, hue + hueStride + offset, KF_255_DIV_6);
119
0
                }
120
0
                y += 2 * yStride;
121
0
                u += uStride;
122
0
                v += vStride;
123
0
                hue += 2 * hueStride;
124
0
            }
125
0
        }
Unexecuted instantiation: void Simd::Avx2::Yuv420pToHue<true>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Avx2::Yuv420pToHue<false>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
126
127
        template <bool align> void Yuv444pToHue(const uint8_t * y, size_t yStride, const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride,
128
            size_t width, size_t height, uint8_t * hue, size_t hueStride)
129
0
        {
130
0
            assert(width >= A);
131
0
            if (align)
132
0
            {
133
0
                assert(Aligned(y) && Aligned(yStride) && Aligned(u) && Aligned(uStride));
134
0
                assert(Aligned(v) && Aligned(vStride) && Aligned(hue) && Aligned(hueStride));
135
0
            }
136
137
0
            const __m256 KF_255_DIV_6 = _mm256_set1_ps(Base::KF_255_DIV_6);
138
139
0
            size_t bodyWidth = AlignLo(width, A);
140
0
            size_t tail = width - bodyWidth;
141
0
            for (size_t row = 0; row < height; row += 1)
142
0
            {
143
0
                for (size_t col = 0; col < bodyWidth; col += A)
144
0
                {
145
0
                    Store<align>((__m256i*)(hue + col), YuvToHue8(Load<align>((__m256i*)(y + col)),
146
0
                        Load<align>((__m256i*)(u + col)), Load<align>((__m256i*)(v + col)), KF_255_DIV_6));
147
0
                }
148
0
                if (tail)
149
0
                {
150
0
                    size_t offset = width - A;
151
0
                    Store<false>((__m256i*)(hue + offset), YuvToHue8(Load<false>((__m256i*)(y + offset)),
152
0
                        Load<false>((__m256i*)(u + offset)), Load<false>((__m256i*)(v + offset)), KF_255_DIV_6));
153
0
                }
154
0
                y += yStride;
155
0
                u += uStride;
156
0
                v += vStride;
157
0
                hue += hueStride;
158
0
            }
159
0
        }
Unexecuted instantiation: void Simd::Avx2::Yuv444pToHue<true>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Avx2::Yuv444pToHue<false>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
160
161
        void Yuv420pToHue(const uint8_t * y, size_t yStride, const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride,
162
            size_t width, size_t height, uint8_t * hue, size_t hueStride)
163
0
        {
164
0
            if (Aligned(y) && Aligned(yStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride) && Aligned(hue) && Aligned(hueStride))
165
0
                Yuv420pToHue<true>(y, yStride, u, uStride, v, vStride, width, height, hue, hueStride);
166
0
            else
167
0
                Yuv420pToHue<false>(y, yStride, u, uStride, v, vStride, width, height, hue, hueStride);
168
0
        }
169
170
        void Yuv444pToHue(const uint8_t * y, size_t yStride, const uint8_t * u, size_t uStride, const uint8_t * v, size_t vStride,
171
            size_t width, size_t height, uint8_t * hue, size_t hueStride)
172
0
        {
173
0
            if (Aligned(y) && Aligned(yStride) && Aligned(u) && Aligned(uStride) && Aligned(v) && Aligned(vStride) && Aligned(hue) && Aligned(hueStride))
174
0
                Yuv444pToHue<true>(y, yStride, u, uStride, v, vStride, width, height, hue, hueStride);
175
0
            else
176
0
                Yuv444pToHue<false>(y, yStride, u, uStride, v, vStride, width, height, hue, hueStride);
177
0
        }
178
    }
179
#endif// SIMD_AVX2_ENABLE
180
}