Coverage Report

Created: 2025-12-10 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/Simd/src/Simd/SimdAvx2BgrToRgb.cpp
Line
Count
Source
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2021 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdMemory.h"
25
#include "Simd/SimdStore.h"
26
27
namespace Simd
28
{
29
#ifdef SIMD_AVX2_ENABLE    
30
    namespace Avx2
31
    {
32
        const __m256i K8_SHFL_0S0 = SIMD_MM256_SETR_EPI8(0x2, 0x1, 0x0, 0x5, 0x4, 0x3, 0x8, 0x7, 0x6, 0xB, 0xA, 0x9, 0xE, 0xD, 0xC, -1, 
33
            0x0, -1, 0x4, 0x3, 0x2, 0x7, 0x6, 0x5, 0xA, 0x9, 0x8, 0xD, 0xC, 0xB, -1, 0xF);
34
        const __m256i K8_SHFL_0P0 = SIMD_MM256_SETR_EPI8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x9,
35
            -1, 0x7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
36
        const __m256i K8_SHFL_0P1 = SIMD_MM256_SETR_EPI8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
37
            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x8, -1);
38
        const __m256i K8_SHFL_1S1 = SIMD_MM256_SETR_EPI8(-1, 0x3, 0x2, 0x1, 0x6, 0x5, 0x4, 0x9, 0x8, 0x7, 0xC, 0xB, 0xA, 0xF, 0xE, 0xD,
39
            0x2, 0x1, 0x0, 0x5, 0x4, 0x3, 0x8, 0x7, 0x6, 0xB, 0xA, 0x9, 0xE, 0xD, 0xC, -1);
40
        const __m256i K8_SHFL_1P0 = SIMD_MM256_SETR_EPI8(0x6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
41
            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
42
        const __m256i K8_SHFL_1P2 = SIMD_MM256_SETR_EPI8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
43
            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x9);
44
        const __m256i K8_SHFL_2S2 = SIMD_MM256_SETR_EPI8(0x0, -1, 0x4, 0x3, 0x2, 0x7, 0x6, 0x5, 0xA, 0x9, 0x8, 0xD, 0xC, 0xB, -1, 0xF,
45
            -1, 0x3, 0x2, 0x1, 0x6, 0x5, 0x4, 0x9, 0x8, 0x7, 0xC, 0xB, 0xA, 0xF, 0xE, 0xD);
46
        const __m256i K8_SHFL_2P1 = SIMD_MM256_SETR_EPI8(-1, 0x7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
47
            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
48
        const __m256i K8_SHFL_2P2 = SIMD_MM256_SETR_EPI8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x8, -1,
49
            0x6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
50
51
        template <bool align> SIMD_INLINE void BgrToRgb(const uint8_t * src, uint8_t * dst)
52
179k
        {
53
179k
            __m256i s0 = Load<align>((__m256i*)src + 0);
54
179k
            __m256i s1 = Load<align>((__m256i*)src + 1);
55
179k
            __m256i s2 = Load<align>((__m256i*)src + 2);
56
179k
            __m256i p0 = _mm256_permute4x64_epi64(s0, 0x1B);
57
179k
            __m256i p1 = _mm256_permute4x64_epi64(s1, 0x1B);
58
179k
            __m256i p2 = _mm256_permute4x64_epi64(s2, 0x1B);
59
179k
            Store<align>((__m256i*)dst + 0, _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s0, K8_SHFL_0S0), 
60
179k
                _mm256_shuffle_epi8(p0, K8_SHFL_0P0)), _mm256_shuffle_epi8(p1, K8_SHFL_0P1)));
61
179k
            Store<align>((__m256i*)dst + 1, _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s1, K8_SHFL_1S1),
62
179k
                _mm256_shuffle_epi8(p0, K8_SHFL_1P0)), _mm256_shuffle_epi8(p2, K8_SHFL_1P2)));
63
179k
            Store<align>((__m256i*)dst + 2, _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s2, K8_SHFL_2S2),
64
179k
                _mm256_shuffle_epi8(p1, K8_SHFL_2P1)), _mm256_shuffle_epi8(p2, K8_SHFL_2P2)));
65
179k
        }
void Simd::Avx2::BgrToRgb<true>(unsigned char const*, unsigned char*)
Line
Count
Source
52
6.82k
        {
53
6.82k
            __m256i s0 = Load<align>((__m256i*)src + 0);
54
6.82k
            __m256i s1 = Load<align>((__m256i*)src + 1);
55
6.82k
            __m256i s2 = Load<align>((__m256i*)src + 2);
56
6.82k
            __m256i p0 = _mm256_permute4x64_epi64(s0, 0x1B);
57
6.82k
            __m256i p1 = _mm256_permute4x64_epi64(s1, 0x1B);
58
            __m256i p2 = _mm256_permute4x64_epi64(s2, 0x1B);
59
6.82k
            Store<align>((__m256i*)dst + 0, _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s0, K8_SHFL_0S0), 
60
6.82k
                _mm256_shuffle_epi8(p0, K8_SHFL_0P0)), _mm256_shuffle_epi8(p1, K8_SHFL_0P1)));
61
6.82k
            Store<align>((__m256i*)dst + 1, _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s1, K8_SHFL_1S1),
62
6.82k
                _mm256_shuffle_epi8(p0, K8_SHFL_1P0)), _mm256_shuffle_epi8(p2, K8_SHFL_1P2)));
63
6.82k
            Store<align>((__m256i*)dst + 2, _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s2, K8_SHFL_2S2),
64
6.82k
                _mm256_shuffle_epi8(p1, K8_SHFL_2P1)), _mm256_shuffle_epi8(p2, K8_SHFL_2P2)));
65
6.82k
        }
void Simd::Avx2::BgrToRgb<false>(unsigned char const*, unsigned char*)
Line
Count
Source
52
172k
        {
53
172k
            __m256i s0 = Load<align>((__m256i*)src + 0);
54
172k
            __m256i s1 = Load<align>((__m256i*)src + 1);
55
172k
            __m256i s2 = Load<align>((__m256i*)src + 2);
56
172k
            __m256i p0 = _mm256_permute4x64_epi64(s0, 0x1B);
57
172k
            __m256i p1 = _mm256_permute4x64_epi64(s1, 0x1B);
58
            __m256i p2 = _mm256_permute4x64_epi64(s2, 0x1B);
59
172k
            Store<align>((__m256i*)dst + 0, _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s0, K8_SHFL_0S0), 
60
172k
                _mm256_shuffle_epi8(p0, K8_SHFL_0P0)), _mm256_shuffle_epi8(p1, K8_SHFL_0P1)));
61
172k
            Store<align>((__m256i*)dst + 1, _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s1, K8_SHFL_1S1),
62
172k
                _mm256_shuffle_epi8(p0, K8_SHFL_1P0)), _mm256_shuffle_epi8(p2, K8_SHFL_1P2)));
63
172k
            Store<align>((__m256i*)dst + 2, _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s2, K8_SHFL_2S2),
64
172k
                _mm256_shuffle_epi8(p1, K8_SHFL_2P1)), _mm256_shuffle_epi8(p2, K8_SHFL_2P2)));
65
172k
        }
66
67
        template <bool align> void BgrToRgb(const uint8_t * bgr, size_t width, size_t height, size_t bgrStride, uint8_t * rgb, size_t rgbStride)
68
119k
        {
69
119k
            assert(width >= A);
70
119k
            if (align)
71
119k
                assert(Aligned(bgr) && Aligned(bgrStride) && Aligned(rgb) && Aligned(rgbStride));
72
73
119k
            const size_t A3 = A * 3;
74
119k
            size_t size = width * 3;
75
119k
            size_t aligned = AlignLo(width, A) * 3;
76
77
239k
            for (size_t row = 0; row < height; ++row)
78
120k
            {
79
180k
                for (size_t i = 0; i < aligned; i += A3)
80
60.0k
                    BgrToRgb<align>(bgr + i, rgb + i);
81
120k
                if (aligned < size)
82
119k
                    BgrToRgb<false>(bgr + size - A3, rgb + size - A3);
83
120k
                bgr += bgrStride;
84
120k
                rgb += rgbStride;
85
120k
            }
86
119k
        }
void Simd::Avx2::BgrToRgb<true>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Line
Count
Source
68
39
        {
69
39
            assert(width >= A);
70
39
            if (align)
71
39
                assert(Aligned(bgr) && Aligned(bgrStride) && Aligned(rgb) && Aligned(rgbStride));
72
73
39
            const size_t A3 = A * 3;
74
39
            size_t size = width * 3;
75
39
            size_t aligned = AlignLo(width, A) * 3;
76
77
78
            for (size_t row = 0; row < height; ++row)
78
39
            {
79
6.86k
                for (size_t i = 0; i < aligned; i += A3)
80
6.82k
                    BgrToRgb<align>(bgr + i, rgb + i);
81
39
                if (aligned < size)
82
0
                    BgrToRgb<false>(bgr + size - A3, rgb + size - A3);
83
39
                bgr += bgrStride;
84
39
                rgb += rgbStride;
85
39
            }
86
39
        }
void Simd::Avx2::BgrToRgb<false>(unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Line
Count
Source
68
119k
        {
69
119k
            assert(width >= A);
70
119k
            if (align)
71
119k
                assert(Aligned(bgr) && Aligned(bgrStride) && Aligned(rgb) && Aligned(rgbStride));
72
73
119k
            const size_t A3 = A * 3;
74
119k
            size_t size = width * 3;
75
119k
            size_t aligned = AlignLo(width, A) * 3;
76
77
239k
            for (size_t row = 0; row < height; ++row)
78
119k
            {
79
173k
                for (size_t i = 0; i < aligned; i += A3)
80
53.2k
                    BgrToRgb<align>(bgr + i, rgb + i);
81
119k
                if (aligned < size)
82
119k
                    BgrToRgb<false>(bgr + size - A3, rgb + size - A3);
83
119k
                bgr += bgrStride;
84
119k
                rgb += rgbStride;
85
119k
            }
86
119k
        }
87
88
        void BgrToRgb(const uint8_t * bgr, size_t width, size_t height, size_t bgrStride, uint8_t * rgb, size_t rgbStride)
89
119k
        {
90
119k
            if (Aligned(bgr) && Aligned(bgrStride) && Aligned(rgb) && Aligned(rgbStride))
91
39
                BgrToRgb<true>(bgr, width, height, bgrStride, rgb, rgbStride);
92
119k
            else
93
119k
                BgrToRgb<false>(bgr, width, height, bgrStride, rgb, rgbStride);
94
119k
        }
95
    }
96
#endif//SIMD_AVX2_ENABLE
97
}