Coverage Report

Created: 2025-07-23 07:53

/src/Simd/src/Simd/SimdSse41Operation.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2022 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdMemory.h"
25
#include "Simd/SimdStore.h"
26
#include "Simd/SimdAlphaBlending.h"
27
28
namespace Simd
29
{
30
#ifdef SIMD_SSE41_ENABLE    
31
    namespace Sse41
32
    {
33
        template <SimdOperationBinary8uType type> SIMD_INLINE __m128i OperationBinary8u(const __m128i & a, const __m128i & b);
34
35
        template <> SIMD_INLINE __m128i OperationBinary8u<SimdOperationBinary8uAverage>(const __m128i & a, const __m128i & b)
36
0
        {
37
0
            return _mm_avg_epu8(a, b);
38
0
        }
39
40
        template <> SIMD_INLINE __m128i OperationBinary8u<SimdOperationBinary8uAnd>(const __m128i & a, const __m128i & b)
41
0
        {
42
0
            return _mm_and_si128(a, b);
43
0
        }
44
45
        template <> SIMD_INLINE __m128i OperationBinary8u<SimdOperationBinary8uOr>(const __m128i & a, const __m128i & b)
46
0
        {
47
0
            return _mm_or_si128(a, b);
48
0
        }
49
50
        template <> SIMD_INLINE __m128i OperationBinary8u<SimdOperationBinary8uMaximum>(const __m128i & a, const __m128i & b)
51
0
        {
52
0
            return _mm_max_epu8(a, b);
53
0
        }
54
55
        template <> SIMD_INLINE __m128i OperationBinary8u<SimdOperationBinary8uMinimum>(const __m128i & a, const __m128i & b)
56
0
        {
57
0
            return _mm_min_epu8(a, b);
58
0
        }
59
60
        template <> SIMD_INLINE __m128i OperationBinary8u<SimdOperationBinary8uSaturatedSubtraction>(const __m128i & a, const __m128i & b)
61
0
        {
62
0
            return _mm_subs_epu8(a, b);
63
0
        }
64
65
        template <> SIMD_INLINE __m128i OperationBinary8u<SimdOperationBinary8uSaturatedAddition>(const __m128i & a, const __m128i & b)
66
0
        {
67
0
            return _mm_adds_epu8(a, b);
68
0
        }
69
70
        template <bool align, SimdOperationBinary8uType type> void OperationBinary8u(const uint8_t * a, size_t aStride, const uint8_t * b, size_t bStride,
71
            size_t width, size_t height, size_t channelCount, uint8_t * dst, size_t dstStride)
72
0
        {
73
0
            assert(width*channelCount >= A);
74
0
            if (align)
75
0
                assert(Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride) && Aligned(dst) && Aligned(dstStride));
76
77
0
            size_t size = channelCount*width;
78
0
            size_t alignedSize = Simd::AlignLo(size, A);
79
0
            for (size_t row = 0; row < height; ++row)
80
0
            {
81
0
                for (size_t offset = 0; offset < alignedSize; offset += A)
82
0
                {
83
0
                    const __m128i a_ = Load<align>((__m128i*)(a + offset));
84
0
                    const __m128i b_ = Load<align>((__m128i*)(b + offset));
85
0
                    Store<align>((__m128i*)(dst + offset), OperationBinary8u<type>(a_, b_));
86
0
                }
87
0
                if (alignedSize != size)
88
0
                {
89
0
                    const __m128i a_ = Load<false>((__m128i*)(a + size - A));
90
0
                    const __m128i b_ = Load<false>((__m128i*)(b + size - A));
91
0
                    Store<false>((__m128i*)(dst + size - A), OperationBinary8u<type>(a_, b_));
92
0
                }
93
0
                a += aStride;
94
0
                b += bStride;
95
0
                dst += dstStride;
96
0
            }
97
0
        }
Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<true, (SimdOperationBinary8uType)0>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<true, (SimdOperationBinary8uType)1>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<true, (SimdOperationBinary8uType)2>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<true, (SimdOperationBinary8uType)3>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<true, (SimdOperationBinary8uType)4>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<true, (SimdOperationBinary8uType)5>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<true, (SimdOperationBinary8uType)6>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<false, (SimdOperationBinary8uType)0>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<false, (SimdOperationBinary8uType)1>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<false, (SimdOperationBinary8uType)2>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<false, (SimdOperationBinary8uType)3>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<false, (SimdOperationBinary8uType)4>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<false, (SimdOperationBinary8uType)5>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<false, (SimdOperationBinary8uType)6>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
98
99
        template <bool align> void OperationBinary8u(const uint8_t * a, size_t aStride, const uint8_t * b, size_t bStride,
100
            size_t width, size_t height, size_t channelCount, uint8_t * dst, size_t dstStride, SimdOperationBinary8uType type)
101
0
        {
102
0
            switch (type)
103
0
            {
104
0
            case SimdOperationBinary8uAverage:
105
0
                return OperationBinary8u<align, SimdOperationBinary8uAverage>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride);
106
0
            case SimdOperationBinary8uAnd:
107
0
                return OperationBinary8u<align, SimdOperationBinary8uAnd>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride);
108
0
            case SimdOperationBinary8uOr:
109
0
                return OperationBinary8u<align, SimdOperationBinary8uOr>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride);
110
0
            case SimdOperationBinary8uMaximum:
111
0
                return OperationBinary8u<align, SimdOperationBinary8uMaximum>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride);
112
0
            case SimdOperationBinary8uMinimum:
113
0
                return OperationBinary8u<align, SimdOperationBinary8uMinimum>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride);
114
0
            case SimdOperationBinary8uSaturatedSubtraction:
115
0
                return OperationBinary8u<align, SimdOperationBinary8uSaturatedSubtraction>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride);
116
0
            case SimdOperationBinary8uSaturatedAddition:
117
0
                return OperationBinary8u<align, SimdOperationBinary8uSaturatedAddition>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride);
118
0
            default:
119
0
                assert(0);
120
0
            }
121
0
        }
Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<true>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long, SimdOperationBinary8uType)
Unexecuted instantiation: void Simd::Sse41::OperationBinary8u<false>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long, SimdOperationBinary8uType)
122
123
        void OperationBinary8u(const uint8_t * a, size_t aStride, const uint8_t * b, size_t bStride,
124
            size_t width, size_t height, size_t channelCount, uint8_t * dst, size_t dstStride, SimdOperationBinary8uType type)
125
0
        {
126
0
            if (Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride) && Aligned(dst) && Aligned(dstStride))
127
0
                OperationBinary8u<true>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride, type);
128
0
            else
129
0
                OperationBinary8u<false>(a, aStride, b, bStride, width, height, channelCount, dst, dstStride, type);
130
0
        }
131
132
        //-----------------------------------------------------------------------------------------
133
134
        template <SimdOperationBinary16iType type> SIMD_INLINE __m128i OperationBinary16i(const __m128i & a, const __m128i & b);
135
136
        template <> SIMD_INLINE __m128i OperationBinary16i<SimdOperationBinary16iAddition>(const __m128i & a, const __m128i & b)
137
0
        {
138
0
            return _mm_add_epi16(a, b);
139
0
        }
140
141
        template <> SIMD_INLINE __m128i OperationBinary16i<SimdOperationBinary16iSubtraction>(const __m128i & a, const __m128i & b)
142
0
        {
143
0
            return _mm_sub_epi16(a, b);
144
0
        }
145
146
        template <bool align, SimdOperationBinary16iType type> void OperationBinary16i(const uint8_t * a, size_t aStride, const uint8_t * b, size_t bStride,
147
            size_t width, size_t height, uint8_t * dst, size_t dstStride)
148
0
        {
149
0
            assert(width * sizeof(uint16_t) >= A);
150
0
            if (align)
151
0
                assert(Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride) && Aligned(dst) && Aligned(dstStride));
152
153
0
            size_t size = width * sizeof(int16_t);
154
0
            size_t alignedSize = Simd::AlignLo(size, A);
155
0
            for (size_t row = 0; row < height; ++row)
156
0
            {
157
0
                for (size_t offset = 0; offset < alignedSize; offset += A)
158
0
                {
159
0
                    const __m128i a_ = Load<align>((__m128i*)(a + offset));
160
0
                    const __m128i b_ = Load<align>((__m128i*)(b + offset));
161
0
                    Store<align>((__m128i*)(dst + offset), OperationBinary16i<type>(a_, b_));
162
0
                }
163
0
                if (alignedSize != size)
164
0
                {
165
0
                    const __m128i a_ = Load<false>((__m128i*)(a + size - A));
166
0
                    const __m128i b_ = Load<false>((__m128i*)(b + size - A));
167
0
                    Store<false>((__m128i*)(dst + size - A), OperationBinary16i<type>(a_, b_));
168
0
                }
169
0
                a += aStride;
170
0
                b += bStride;
171
0
                dst += dstStride;
172
0
            }
173
0
        }
Unexecuted instantiation: void Simd::Sse41::OperationBinary16i<true, (SimdOperationBinary16iType)0>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::OperationBinary16i<true, (SimdOperationBinary16iType)1>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::OperationBinary16i<false, (SimdOperationBinary16iType)0>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
Unexecuted instantiation: void Simd::Sse41::OperationBinary16i<false, (SimdOperationBinary16iType)1>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long)
174
175
        template <bool align> void OperationBinary16i(const uint8_t * a, size_t aStride, const uint8_t * b, size_t bStride,
176
            size_t width, size_t height, uint8_t * dst, size_t dstStride, SimdOperationBinary16iType type)
177
0
        {
178
0
            switch (type)
179
0
            {
180
0
            case SimdOperationBinary16iAddition:
181
0
                return OperationBinary16i<align, SimdOperationBinary16iAddition>(a, aStride, b, bStride, width, height, dst, dstStride);
182
0
            case SimdOperationBinary16iSubtraction:
183
0
                return OperationBinary16i<align, SimdOperationBinary16iSubtraction>(a, aStride, b, bStride, width, height, dst, dstStride);
184
0
            default:
185
0
                assert(0);
186
0
            }
187
0
        }
Unexecuted instantiation: void Simd::Sse41::OperationBinary16i<true>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long, SimdOperationBinary16iType)
Unexecuted instantiation: void Simd::Sse41::OperationBinary16i<false>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, unsigned long, unsigned char*, unsigned long, SimdOperationBinary16iType)
188
189
        void OperationBinary16i(const uint8_t * a, size_t aStride, const uint8_t * b, size_t bStride,
190
            size_t width, size_t height, uint8_t * dst, size_t dstStride, SimdOperationBinary16iType type)
191
0
        {
192
0
            if (Aligned(a) && Aligned(aStride) && Aligned(b) && Aligned(bStride) && Aligned(dst) && Aligned(dstStride))
193
0
                OperationBinary16i<true>(a, aStride, b, bStride, width, height, dst, dstStride, type);
194
0
            else
195
0
                OperationBinary16i<false>(a, aStride, b, bStride, width, height, dst, dstStride, type);
196
0
        }
197
198
        //-----------------------------------------------------------------------------------------
199
200
        template <bool align> SIMD_INLINE void VectorProduct(const __m128i & vertical, const uint8_t * horizontal, uint8_t * dst)
201
0
        {
202
0
            __m128i _horizontal = Load<align>((__m128i*)horizontal);
203
0
            __m128i lo = Divide16uBy255(_mm_mullo_epi16(vertical, _mm_unpacklo_epi8(_horizontal, K_ZERO)));
204
0
            __m128i hi = Divide16uBy255(_mm_mullo_epi16(vertical, _mm_unpackhi_epi8(_horizontal, K_ZERO)));
205
0
            Store<align>((__m128i*)dst, _mm_packus_epi16(lo, hi));
206
0
        }
Unexecuted instantiation: void Simd::Sse41::VectorProduct<true>(long long __vector(2) const&, unsigned char const*, unsigned char*)
Unexecuted instantiation: void Simd::Sse41::VectorProduct<false>(long long __vector(2) const&, unsigned char const*, unsigned char*)
207
208
        template <bool align> void VectorProduct(const uint8_t * vertical, const uint8_t * horizontal, uint8_t * dst, size_t stride, size_t width, size_t height)
209
0
        {
210
0
            assert(width >= A);
211
0
            if (align)
212
0
                assert(Aligned(horizontal) && Aligned(dst) && Aligned(stride));
213
214
0
            size_t alignedWidth = Simd::AlignLo(width, A);
215
0
            for (size_t row = 0; row < height; ++row)
216
0
            {
217
0
                __m128i _vertical = _mm_set1_epi16(vertical[row]);
218
0
                for (size_t col = 0; col < alignedWidth; col += A)
219
0
                    VectorProduct<align>(_vertical, horizontal + col, dst + col);
220
0
                if (alignedWidth != width)
221
0
                    VectorProduct<false>(_vertical, horizontal + width - A, dst + width - A);
222
0
                dst += stride;
223
0
            }
224
0
        }
Unexecuted instantiation: void Simd::Sse41::VectorProduct<true>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: void Simd::Sse41::VectorProduct<false>(unsigned char const*, unsigned char const*, unsigned char*, unsigned long, unsigned long, unsigned long)
225
226
        void VectorProduct(const uint8_t * vertical, const uint8_t * horizontal, uint8_t * dst, size_t stride, size_t width, size_t height)
227
0
        {
228
0
            if (Aligned(horizontal) && Aligned(dst) && Aligned(stride))
229
0
                VectorProduct<true>(vertical, horizontal, dst, stride, width, height);
230
0
            else
231
0
                VectorProduct<false>(vertical, horizontal, dst, stride, width, height);
232
0
        }
233
    }
234
#endif
235
}