Coverage Report

Created: 2025-08-11 07:29

/src/Simd/src/Simd/SimdSse41Float32.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2022 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#include "Simd/SimdMemory.h"
25
#include "Simd/SimdStore.h"
26
#include "Simd/SimdExtract.h"
27
#include "Simd/SimdUnpack.h"
28
29
namespace Simd
30
{
31
#ifdef SIMD_SSE41_ENABLE    
32
    namespace Sse41
33
    {
34
        template<bool align> void CosineDistance32f(const float* a, const float* b, size_t size, float* distance)
35
0
        {
36
0
            if (align)
37
0
                assert(Aligned(a) && Aligned(b));
38
39
0
            size_t partialAlignedSize = AlignLo(size, F);
40
0
            size_t fullAlignedSize = AlignLo(size, DF);
41
0
            size_t i = 0;
42
0
            __m128 _aa[2] = { _mm_setzero_ps(), _mm_setzero_ps() };
43
0
            __m128 _ab[2] = { _mm_setzero_ps(), _mm_setzero_ps() };
44
0
            __m128 _bb[2] = { _mm_setzero_ps(), _mm_setzero_ps() };
45
0
            if (fullAlignedSize)
46
0
            {
47
0
                for (; i < fullAlignedSize; i += DF)
48
0
                {
49
0
                    __m128 a0 = Load<align>(a + i + 0 * F);
50
0
                    __m128 b0 = Load<align>(b + i + 0 * F);
51
0
                    _aa[0] = _mm_add_ps(_aa[0], _mm_mul_ps(a0, a0));
52
0
                    _ab[0] = _mm_add_ps(_ab[0], _mm_mul_ps(a0, b0));
53
0
                    _bb[0] = _mm_add_ps(_bb[0], _mm_mul_ps(b0, b0));
54
0
                    __m128 a1 = Load<align>(a + i + 1 * F);
55
0
                    __m128 b1 = Load<align>(b + i + 1 * F);
56
0
                    _aa[1] = _mm_add_ps(_aa[1], _mm_mul_ps(a1, a1));
57
0
                    _ab[1] = _mm_add_ps(_ab[1], _mm_mul_ps(a1, b1));
58
0
                    _bb[1] = _mm_add_ps(_bb[1], _mm_mul_ps(b1, b1));
59
0
                }
60
0
                _aa[0] = _mm_add_ps(_aa[0], _aa[1]);
61
0
                _ab[0] = _mm_add_ps(_ab[0], _ab[1]);
62
0
                _bb[0] = _mm_add_ps(_bb[0], _bb[1]);
63
0
            }
64
0
            for (; i < partialAlignedSize; i += F)
65
0
            {
66
0
                __m128 a0 = Load<align>(a + i);
67
0
                __m128 b0 = Load<align>(b + i);
68
0
                _aa[0] = _mm_add_ps(_aa[0], _mm_mul_ps(a0, a0));
69
0
                _ab[0] = _mm_add_ps(_ab[0], _mm_mul_ps(a0, b0));
70
0
                _bb[0] = _mm_add_ps(_bb[0], _mm_mul_ps(b0, b0));
71
0
            }
72
0
            float aa = ExtractSum(_aa[0]), ab = ExtractSum(_ab[0]), bb = ExtractSum(_bb[0]);
73
0
            for (; i < size; ++i)
74
0
            {
75
0
                float _a = a[i];
76
0
                float _b = b[i];
77
0
                aa += _a * _a;
78
0
                ab += _a * _b;
79
0
                bb += _b * _b;
80
0
            }
81
0
            *distance = 1.0f - ab / ::sqrt(aa * bb);
82
0
        }
Unexecuted instantiation: void Simd::Sse41::CosineDistance32f<true>(float const*, float const*, unsigned long, float*)
Unexecuted instantiation: void Simd::Sse41::CosineDistance32f<false>(float const*, float const*, unsigned long, float*)
83
84
        void CosineDistance32f(const float* a, const float* b, size_t size, float* distance)
85
0
        {
86
0
            if (Aligned(a) && Aligned(b))
87
0
                CosineDistance32f<true>(a, b, size, distance);
88
0
            else
89
0
                CosineDistance32f<false>(a, b, size, distance);
90
0
        }
91
92
        //-----------------------------------------------------------------------------------------
93
94
        template <bool align> SIMD_INLINE __m128i Float32ToUint8(const float * src, const __m128 & lower, const __m128 & upper, const __m128 & boost)
95
0
        {
96
0
            return _mm_cvtps_epi32(_mm_mul_ps(_mm_sub_ps(_mm_min_ps(_mm_max_ps(Load<align>(src), lower), upper), lower), boost));
97
0
        }
Unexecuted instantiation: long long __vector(2) Simd::Sse41::Float32ToUint8<true>(float const*, float __vector(4) const&, float __vector(4) const&, float __vector(4) const&)
Unexecuted instantiation: long long __vector(2) Simd::Sse41::Float32ToUint8<false>(float const*, float __vector(4) const&, float __vector(4) const&, float __vector(4) const&)
98
99
        template <bool align> SIMD_INLINE void Float32ToUint8(const float * src, const __m128 & lower, const __m128 & upper, const __m128 & boost, uint8_t * dst)
100
0
        {
101
0
            __m128i d0 = Float32ToUint8<align>(src + F * 0, lower, upper, boost);
102
0
            __m128i d1 = Float32ToUint8<align>(src + F * 1, lower, upper, boost);
103
0
            __m128i d2 = Float32ToUint8<align>(src + F * 2, lower, upper, boost);
104
0
            __m128i d3 = Float32ToUint8<align>(src + F * 3, lower, upper, boost);
105
0
            Store<align>((__m128i*)dst, _mm_packus_epi16(_mm_packs_epi32(d0, d1), _mm_packs_epi32(d2, d3)));
106
0
        }
Unexecuted instantiation: void Simd::Sse41::Float32ToUint8<true>(float const*, float __vector(4) const&, float __vector(4) const&, float __vector(4) const&, unsigned char*)
Unexecuted instantiation: void Simd::Sse41::Float32ToUint8<false>(float const*, float __vector(4) const&, float __vector(4) const&, float __vector(4) const&, unsigned char*)
107
108
        template <bool align> void Float32ToUint8(const float * src, size_t size, const float * lower, const float * upper, uint8_t * dst)
109
0
        {
110
0
            assert(size >= A);
111
0
            if (align)
112
0
                assert(Aligned(src) && Aligned(dst));
113
114
0
            __m128 _lower = _mm_set1_ps(lower[0]);
115
0
            __m128 _upper = _mm_set1_ps(upper[0]);
116
0
            __m128 boost = _mm_set1_ps(255.0f / (upper[0] - lower[0]));
117
118
0
            size_t alignedSize = AlignLo(size, A);
119
0
            for (size_t i = 0; i < alignedSize; i += A)
120
0
                Float32ToUint8<align>(src + i, _lower, _upper, boost, dst + i);
121
0
            if (alignedSize != size)
122
0
                Float32ToUint8<false>(src + size - A, _lower, _upper, boost, dst + size - A);
123
0
        }
Unexecuted instantiation: void Simd::Sse41::Float32ToUint8<true>(float const*, unsigned long, float const*, float const*, unsigned char*)
Unexecuted instantiation: void Simd::Sse41::Float32ToUint8<false>(float const*, unsigned long, float const*, float const*, unsigned char*)
124
125
        void Float32ToUint8(const float * src, size_t size, const float * lower, const float * upper, uint8_t * dst)
126
0
        {
127
0
            if (Aligned(src) && Aligned(dst))
128
0
                Float32ToUint8<true>(src, size, lower, upper, dst);
129
0
            else
130
0
                Float32ToUint8<false>(src, size, lower, upper, dst);
131
0
        }
132
133
        //-----------------------------------------------------------------------------------------
134
135
        SIMD_INLINE __m128 Uint8ToFloat32(const __m128i & value, const __m128 & lower, const __m128 & boost)
136
0
        {
137
0
            return _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(value), boost), lower);
138
0
        }
139
140
        template <bool align> SIMD_INLINE void Uint8ToFloat32(const uint8_t * src, const __m128 & lower, const __m128 & boost, float * dst)
141
0
        {
142
0
            __m128i _src = Load<align>((__m128i*)src);
143
0
            __m128i lo = UnpackU8<0>(_src);
144
0
            __m128i hi = UnpackU8<1>(_src);
145
0
            Store<align>(dst + F * 0, Uint8ToFloat32(UnpackU16<0>(lo), lower, boost));
146
0
            Store<align>(dst + F * 1, Uint8ToFloat32(UnpackU16<1>(lo), lower, boost));
147
0
            Store<align>(dst + F * 2, Uint8ToFloat32(UnpackU16<0>(hi), lower, boost));
148
0
            Store<align>(dst + F * 3, Uint8ToFloat32(UnpackU16<1>(hi), lower, boost));
149
0
        }
Unexecuted instantiation: void Simd::Sse41::Uint8ToFloat32<true>(unsigned char const*, float __vector(4) const&, float __vector(4) const&, float*)
Unexecuted instantiation: void Simd::Sse41::Uint8ToFloat32<false>(unsigned char const*, float __vector(4) const&, float __vector(4) const&, float*)
150
151
        template <bool align> void Uint8ToFloat32(const uint8_t * src, size_t size, const float * lower, const float * upper, float * dst)
152
0
        {
153
0
            assert(size >= A);
154
0
            if (align)
155
0
                assert(Aligned(src) && Aligned(dst));
156
157
0
            __m128 _lower = _mm_set1_ps(lower[0]);
158
0
            __m128 boost = _mm_set1_ps((upper[0] - lower[0]) / 255.0f);
159
160
0
            size_t alignedSize = AlignLo(size, A);
161
0
            for (size_t i = 0; i < alignedSize; i += A)
162
0
                Uint8ToFloat32<align>(src + i, _lower, boost, dst + i);
163
0
            if (alignedSize != size)
164
0
                Uint8ToFloat32<false>(src + size - A, _lower, boost, dst + size - A);
165
0
        }
Unexecuted instantiation: void Simd::Sse41::Uint8ToFloat32<true>(unsigned char const*, unsigned long, float const*, float const*, float*)
Unexecuted instantiation: void Simd::Sse41::Uint8ToFloat32<false>(unsigned char const*, unsigned long, float const*, float const*, float*)
166
167
        void Uint8ToFloat32(const uint8_t * src, size_t size, const float * lower, const float * upper, float * dst)
168
0
        {
169
0
            if (Aligned(src) && Aligned(dst))
170
0
                Uint8ToFloat32<true>(src, size, lower, upper, dst);
171
0
            else
172
0
                Uint8ToFloat32<false>(src, size, lower, upper, dst);
173
0
        }
174
    }
175
#endif
176
}