Coverage Report

Created: 2025-12-10 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/Simd/src/Simd/SimdSet.h
Line
Count
Source
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2025 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#ifndef __SimdSet_h__
25
#define __SimdSet_h__
26
27
#include "Simd/SimdDefs.h"
28
#include "Simd/SimdConst.h"
29
30
namespace Simd
31
{
32
    namespace Base
33
    {
34
        SIMD_INLINE void SetZero(uint16_t* dst, size_t size)
35
0
        {
36
0
            for (size_t i = 0; i < size; ++i)
37
0
                dst[i] = 0;
38
0
        }
39
    }
40
41
#ifdef SIMD_SSE41_ENABLE
42
    namespace Sse41
43
    {
44
        SIMD_INLINE __m128i SetInt8(char a0, char a1)
45
0
        {
46
0
            return _mm_unpacklo_epi8(_mm_set1_epi8(a0), _mm_set1_epi8(a1));
47
0
        }
48
49
        SIMD_INLINE __m128i SetInt16(short a0, short a1)
50
0
        {
51
0
            return _mm_unpacklo_epi16(_mm_set1_epi16(a0), _mm_set1_epi16(a1));
52
0
        }
53
54
        SIMD_INLINE __m128i SetInt32(int a0, int a1)
55
0
        {
56
0
            return _mm_unpacklo_epi32(_mm_set1_epi32(a0), _mm_set1_epi32(a1));
57
0
        }
58
59
        SIMD_INLINE __m128 SetFloat(float a0, float a1)
60
0
        {
61
0
            return _mm_unpacklo_ps(_mm_set_ps1(a0), _mm_set_ps1(a1));
62
0
        }
63
64
        //-------------------------------------------------------------------------------------------------
65
66
        SIMD_INLINE void SetZero(uint16_t* dst)
67
0
        {
68
0
            _mm_storeu_si128((__m128i*)dst, _mm_setzero_si128());
69
0
        }
70
    }
71
#endif
72
73
#ifdef SIMD_AVX2_ENABLE
74
    namespace Avx2
75
    {
76
        SIMD_INLINE __m256 Set(__m128 a0, __m128 a1)
77
0
        {
78
0
            return _mm256_insertf128_ps(_mm256_castps128_ps256(a0), a1, 1);
79
0
}
80
81
        SIMD_INLINE __m256 Set(__m128 a)
82
0
        {
83
0
            return _mm256_insertf128_ps(_mm256_castps128_ps256(a), a, 1);
84
0
        }
85
86
        SIMD_INLINE __m256i SetInt8(char a0, char a1)
87
0
        {
88
0
            return _mm256_unpacklo_epi8(_mm256_set1_epi8(a0), _mm256_set1_epi8(a1));
89
0
        }
90
91
        SIMD_INLINE __m256i SetInt16(short a0, short a1)
92
0
        {
93
0
            return _mm256_unpacklo_epi16(_mm256_set1_epi16(a0), _mm256_set1_epi16(a1));
94
0
        }
95
96
        SIMD_INLINE __m256i SetInt32(int a0, int a1)
97
0
        {
98
0
            return _mm256_unpacklo_epi32(_mm256_set1_epi32(a0), _mm256_set1_epi32(a1));
99
0
        }
100
101
        SIMD_INLINE __m256 SetFloat(float a0, float a1)
102
0
        {
103
0
            return _mm256_unpacklo_ps(_mm256_set1_ps(a0), _mm256_set1_ps(a1));
104
0
        }
105
106
        SIMD_INLINE __m256i Set(__m128i a0, __m128i a1)
107
0
        {
108
0
            return _mm256_inserti128_si256(_mm256_castsi128_si256(a0), a1, 1);
109
0
        }
110
111
        SIMD_INLINE __m256i Set(__m128i a)
112
0
        {
113
0
            return _mm256_inserti128_si256(_mm256_castsi128_si256(a), a, 1);
114
0
        }
115
116
        template <class T> SIMD_INLINE __m256i SetMask(T first, size_t position, T second)
117
0
        {
118
0
            const size_t size = A / sizeof(T);
119
0
            assert(position <= size);
120
0
            T mask[size];
121
0
            for (size_t i = 0; i < position; ++i)
122
0
                mask[i] = first;
123
0
            for (size_t i = position; i < size; ++i)
124
0
                mask[i] = second;
125
0
            return _mm256_loadu_si256((__m256i*)mask);
126
0
        }
Unexecuted instantiation: long long __vector(4) Simd::Avx2::SetMask<unsigned char>(unsigned char, unsigned long, unsigned char)
Unexecuted instantiation: long long __vector(4) Simd::Avx2::SetMask<unsigned short>(unsigned short, unsigned long, unsigned short)
127
128
        //-------------------------------------------------------------------------------------------------
129
130
        SIMD_INLINE void SetZero(uint16_t* dst)
131
0
        {
132
0
            _mm256_storeu_si256((__m256i*)dst, _mm256_setzero_si256());
133
0
        }
134
135
        SIMD_INLINE void SetZero2(uint16_t* dst)
136
0
        {
137
0
            _mm256_storeu_si256((__m256i*)dst + 0, _mm256_setzero_si256());
138
0
            _mm256_storeu_si256((__m256i*)dst + 1, _mm256_setzero_si256());
139
0
        }
140
    }
141
#endif
142
143
#ifdef SIMD_AVX512BW_ENABLE
144
    namespace Avx512bw
145
    {
146
        SIMD_INLINE __m512i SetInt8(char a0, char a1)
147
0
        {
148
0
            return _mm512_unpacklo_epi8(_mm512_set1_epi8(a0), _mm512_set1_epi8(a1));
149
0
        }
150
151
        SIMD_INLINE __m512i SetInt16(short a0, short a1)
152
0
        {
153
0
            return _mm512_unpacklo_epi16(_mm512_set1_epi16(a0), _mm512_set1_epi16(a1));
154
0
        }
155
156
        SIMD_INLINE __m512i SetInt32(int a0, int a1)
157
0
        {
158
0
            return _mm512_unpacklo_epi32(_mm512_set1_epi32(a0), _mm512_set1_epi32(a1));
159
0
        }
160
161
        SIMD_INLINE __m512 SetFloat(float a0, float a1)
162
0
        {
163
0
            return _mm512_unpacklo_ps(_mm512_set1_ps(a0), _mm512_set1_ps(a1));
164
0
        }
165
166
        SIMD_INLINE __m512 Set(__m256 a0, __m256 a1)
167
0
        {
168
0
            return _mm512_insertf32x8(_mm512_castps256_ps512(a0), a1, 1);
169
0
        }
170
171
        SIMD_INLINE __m512i Set(__m256i a0, __m256i a1)
172
0
        {
173
0
            return _mm512_inserti32x8(_mm512_castsi256_si512(a0), a1, 1);
174
0
        }
175
176
        SIMD_INLINE __m512i Set(const __m128i& a0, const __m128i& a1, const __m128i& a2, const __m128i& a3)
177
0
        {
178
0
            return _mm512_inserti32x4(_mm512_inserti32x4(_mm512_inserti32x4(_mm512_castsi128_si512(a0), a1, 1), a2, 2), a3, 3);
179
0
        }
180
181
        //-------------------------------------------------------------------------------------------------
182
183
        SIMD_INLINE void SetZero(float* dst, __mmask16 mask = __mmask16(-1))
184
0
        {
185
0
            _mm512_mask_storeu_ps(dst, mask, _mm512_setzero_ps());
186
0
        }
187
188
        //-------------------------------------------------------------------------------------------------
189
190
        SIMD_INLINE void SetZero(uint16_t* dst, __mmask32 mask = __mmask32(-1))
191
0
        {
192
0
            _mm512_mask_storeu_epi16(dst, mask, _mm512_setzero_si512());
193
0
        }
194
195
        SIMD_INLINE void SetZeros(uint16_t* dst, size_t size32, __mmask32 tail)
196
0
        {
197
0
            size_t i = 0;
198
0
            __m512i zero = _mm512_setzero_si512();
199
0
            for (; i < size32; i += 32)
200
0
                _mm512_storeu_si512(dst + i, zero);
201
0
            if (tail)
202
0
                _mm512_mask_storeu_epi16(dst + i, tail, zero);
203
0
        }
204
205
        SIMD_INLINE void SetZeros(uint16_t* dst, size_t size)
206
0
        {
207
0
            size_t tail = size & 31;
208
0
            SetZeros(dst, size & (~31), tail ? __mmask32(-1) >> (32 - tail) : 0);
209
0
        }
210
211
        //-------------------------------------------------------------------------------------------------
212
213
        SIMD_INLINE void SetZero(uint8_t* dst, __m512i zero = _mm512_setzero_si512(), __mmask64 mask = __mmask64(-1))
214
0
        {
215
0
            _mm512_mask_storeu_epi8(dst, mask, zero);
216
0
        }
217
218
        SIMD_INLINE void SetZeros(uint8_t* dst, __m512i zero, size_t size64, __mmask64 tail)
219
0
        {
220
0
            size_t i = 0;
221
0
            for (; i < size64; i += 64)
222
0
                _mm512_storeu_si512(dst + i, zero);
223
0
            if (tail)
224
0
                _mm512_mask_storeu_epi8(dst + i, tail, zero);
225
0
        }
226
227
        SIMD_INLINE void SetZeros(uint8_t* dst, __m512i zero, size_t size)
228
0
        {
229
0
            size_t tail = size & 63;
230
0
            SetZeros(dst, zero, size & (~63), tail ? __mmask64(-1) >> (64 - tail) : 0);
231
0
        }
232
    }
233
#endif
234
235
#ifdef SIMD_NEON_ENABLE
236
    namespace Neon
237
    {
238
        SIMD_INLINE float32x4_t SetF32(float a0, float a1, float a2, float a3)
239
        {
240
            const float a[4] = { a0, a1, a2, a3 };
241
            return vld1q_f32(a);
242
        }
243
244
        SIMD_INLINE int32x4_t SetI32(int32_t a0, int32_t a1, int32_t a2, int32_t a3)
245
        {
246
            const int32_t a[4] = { a0, a1, a2, a3 };
247
            return vld1q_s32(a);
248
        }
249
    }
250
#endif
251
}
252
253
#endif