Coverage Report

Created: 2025-12-10 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/Simd/src/Simd/SimdAvx512bwYuvToUyvy.cpp
Line
Count
Source
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2022 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdMemory.h"
25
#include "Simd/SimdStore.h"
26
27
namespace Simd
28
{
29
#ifdef SIMD_AVX512BW_ENABLE    
30
    namespace Avx512bw
31
    {
32
        SIMD_INLINE void Yuv420pToUyvy422(const uint8_t* y0, size_t yStride, const uint8_t* u, const uint8_t* v, 
33
            uint8_t* uyvy0, size_t uyvyStride, __mmask32 yuvMask, __mmask32 uyvyMask0, __mmask32 uyvyMask1)
34
0
        {
35
0
            static const __m512i PRM0 = SIMD_MM512_SETR_EPI32(0x00, 0x08, 0x10, 0x11, 0x01, 0x09, 0x12, 0x13, 0x02, 0x0A, 0x14, 0x15, 0x03, 0x0B, 0x16, 0x17);
36
0
            static const __m512i PRM1 = SIMD_MM512_SETR_EPI32(0x04, 0x0C, 0x18, 0x19, 0x05, 0x0D, 0x1A, 0x1B, 0x06, 0x0E, 0x1C, 0x1D, 0x07, 0x0F, 0x1E, 0x1F);
37
0
            static const __m512i SHFL = SIMD_MM512_SETR_EPI8(
38
0
                0x0, 0x8, 0x4, 0x9, 0x1, 0xA, 0x5, 0xB, 0x2, 0xC, 0x6, 0xD, 0x3, 0xE, 0x7, 0xF,
39
0
                0x0, 0x8, 0x4, 0x9, 0x1, 0xA, 0x5, 0xB, 0x2, 0xC, 0x6, 0xD, 0x3, 0xE, 0x7, 0xF,
40
0
                0x0, 0x8, 0x4, 0x9, 0x1, 0xA, 0x5, 0xB, 0x2, 0xC, 0x6, 0xD, 0x3, 0xE, 0x7, 0xF,
41
0
                0x0, 0x8, 0x4, 0x9, 0x1, 0xA, 0x5, 0xB, 0x2, 0xC, 0x6, 0xD, 0x3, 0xE, 0x7, 0xF);
42
0
            __m512i uv = Load(u, v, yuvMask);
43
0
            __m512i _y0 = _mm512_maskz_loadu_epi16(yuvMask, y0);
44
0
            _mm512_mask_storeu_epi16(uyvy0 + 0 * 64, uyvyMask0, _mm512_shuffle_epi8(_mm512_permutex2var_epi32(uv, PRM0, _y0), SHFL));
45
0
            _mm512_mask_storeu_epi16(uyvy0 + 1 * 64, uyvyMask1, _mm512_shuffle_epi8(_mm512_permutex2var_epi32(uv, PRM1, _y0), SHFL));
46
0
            __m512i _y1 = _mm512_maskz_loadu_epi16(yuvMask, y0 + yStride);
47
0
            uint8_t* uyvy1 = uyvy0 + uyvyStride;
48
0
            _mm512_mask_storeu_epi16(uyvy1 + 0 * 64, uyvyMask0, _mm512_shuffle_epi8(_mm512_permutex2var_epi32(uv, PRM0, _y1), SHFL));
49
0
            _mm512_mask_storeu_epi16(uyvy1 + 1 * 64, uyvyMask1, _mm512_shuffle_epi8(_mm512_permutex2var_epi32(uv, PRM1, _y1), SHFL));
50
0
        }
51
52
        void Yuv420pToUyvy422(const uint8_t* y, size_t yStride, const uint8_t* u, size_t uStride, 
53
            const uint8_t* v, size_t vStride, size_t width, size_t height, uint8_t* uyvy, size_t uyvyStride)
54
0
        {
55
0
            assert((width % 2 == 0) && (height % 2 == 0) && width >= 2 * A);
56
57
0
            size_t size = width / 2;
58
0
            size_t size32 = AlignLo(size, 32);
59
0
            size_t tail = size - size32;
60
0
            __mmask32 yuvMask = TailMask32(tail);
61
0
            __mmask32 uyvyMask0 = TailMask32(tail * 2 - 32 * 0);
62
0
            __mmask32 uyvyMask1 = TailMask32(tail * 2 - 32 * 1);
63
64
0
            for (size_t row = 0; row < height; row += 2)
65
0
            {
66
0
                size_t colY = 0, colUV = 0, colUyvy = 0;
67
0
                for (; colUV < size32; colY += 64, colUV += 32, colUyvy += 128)
68
0
                    Yuv420pToUyvy422(y + colY, yStride, u + colUV, v + colUV, uyvy + colUyvy, uyvyStride, __mmask32(-1), __mmask32(-1), __mmask32(-1));
69
0
                if (tail)
70
0
                    Yuv420pToUyvy422(y + colY, yStride, u + colUV, v + colUV, uyvy + colUyvy, uyvyStride, yuvMask, uyvyMask0, uyvyMask1);
71
0
                y += 2 * yStride;
72
0
                u += uStride;
73
0
                v += vStride;
74
0
                uyvy += 2 * uyvyStride;
75
0
            }            
76
0
        }
77
    }
78
#endif
79
}