Coverage Report

Created: 2025-12-10 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/Simd/src/Simd/SimdFmadd.h
Line
Count
Source
1
/*
2
* Simd Library (http://ermig1979.github.io/Simd).
3
*
4
* Copyright (c) 2011-2023 Yermalayeu Ihar.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a copy
7
* of this software and associated documentation files (the "Software"), to deal
8
* in the Software without restriction, including without limitation the rights
9
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the Software is
11
* furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
#ifndef __SimdFmadd_h__
25
#define __SimdFmadd_h__
26
27
#include "Simd/SimdDefs.h"
28
#include "Simd/SimdConvert.h"
29
30
namespace Simd
31
{
32
    namespace Base
33
    {
34
        template<bool nofma> SIMD_INLINE float Fmadd(float a, float b, float c);
35
36
        template <> SIMD_INLINE float Fmadd<false>(float a, float b, float c)
37
0
        {
38
0
            return float(double(a) * double(b) + double(c));
39
0
        }
40
41
        template <> SIMD_INLINE float Fmadd<true>(float a, float b, float c)
42
0
        {
43
0
            return a * b + c;
44
0
        }
45
    }
46
47
#ifdef SIMD_SSE41_ENABLE
48
    namespace Sse41
49
    {
50
        template<bool nofma> SIMD_INLINE __m128 Fmadd(__m128 a, __m128 b, __m128 c);
51
52
        template <> SIMD_INLINE __m128 Fmadd<false>(__m128 a, __m128 b, __m128 c)
53
0
        {
54
0
            __m128d lo = _mm_add_pd(_mm_mul_pd(Fp32ToFp64<0>(a), Fp32ToFp64<0>(b)), Fp32ToFp64<0>(c));
55
0
            __m128d hi = _mm_add_pd(_mm_mul_pd(Fp32ToFp64<1>(a), Fp32ToFp64<1>(b)), Fp32ToFp64<1>(c));
56
0
            return Fp64ToFp32(lo, hi);
57
0
        }
58
59
        template <> SIMD_INLINE __m128 Fmadd<true>(__m128 a, __m128 b, __m128 c)
60
0
        {
61
0
            return _mm_add_ps(_mm_mul_ps(a, b), c);
62
0
        }
63
    }
64
#endif
65
66
#ifdef SIMD_AVX2_ENABLE
67
    namespace Avx2
68
    {
69
        template<bool nofma> SIMD_INLINE __m128 Fmadd(__m128 a, __m128 b, __m128 c);
70
71
        template <> SIMD_INLINE __m128 Fmadd<false>(__m128 a, __m128 b, __m128 c)
72
0
        {
73
0
            return _mm_fmadd_ps(a, b, c);
74
0
        }
75
76
        template <> SIMD_INLINE __m128 Fmadd<true>(__m128 a, __m128 b, __m128 c)
77
0
        {
78
0
            return _mm_add_ps(_mm_or_ps(_mm_mul_ps(a, b), _mm_setzero_ps()), c);
79
0
        }
80
81
        //-----------------------------------------------------------------------------------------
82
83
        template<bool nofma> SIMD_INLINE __m128 Fmadd(__m128 a, __m128 b, __m128 c, const __m128 & d);
84
85
        template <> SIMD_INLINE __m128 Fmadd<false>(__m128 a, __m128 b, __m128 c, const __m128 & d)
86
0
        {
87
0
            return _mm_fmadd_ps(a, b, _mm_mul_ps(c, d));
88
0
        }
89
90
        template <> SIMD_INLINE __m128 Fmadd<true>(__m128 a, __m128 b, __m128 c, const __m128 & d)
91
0
        {
92
0
            return _mm_add_ps(_mm_or_ps(_mm_mul_ps(a, b), _mm_setzero_ps()), _mm_or_ps(_mm_mul_ps(c, d), _mm_setzero_ps()));
93
0
        }
94
95
        //-----------------------------------------------------------------------------------------
96
97
        template<bool nofma> SIMD_INLINE __m256 Fmadd(__m256 a, __m256 b, __m256 c);
98
99
        template <> SIMD_INLINE __m256 Fmadd<false>(__m256 a, __m256 b, __m256 c)
100
0
        {
101
0
            return _mm256_fmadd_ps(a, b, c);
102
0
        }
103
104
        template <> SIMD_INLINE __m256 Fmadd<true>(__m256 a, __m256 b, __m256 c)
105
0
        {
106
0
            return _mm256_add_ps(_mm256_or_ps(_mm256_mul_ps(a, b), _mm256_setzero_ps()), c);
107
0
        }
108
109
        //-----------------------------------------------------------------------------------------
110
111
        template<bool nofma> SIMD_INLINE __m256 Fmadd(__m256 a, __m256 b, __m256 c, const __m256 &  d);
112
113
        template <> SIMD_INLINE __m256 Fmadd<false>(__m256 a, __m256 b, __m256 c, const __m256 & d)
114
0
        {
115
0
            return _mm256_fmadd_ps(a, b, _mm256_mul_ps(c, d));
116
0
        }
117
118
        template <> SIMD_INLINE __m256 Fmadd<true>(__m256 a, __m256 b, __m256 c, const __m256 & d)
119
0
        {
120
0
            return _mm256_add_ps(_mm256_or_ps(_mm256_mul_ps(a, b), _mm256_setzero_ps()), _mm256_or_ps(_mm256_mul_ps(c, d), _mm256_setzero_ps()));
121
0
        }
122
    }
123
#endif
124
125
#ifdef SIMD_AVX512BW_ENABLE    
126
    namespace Avx512bw
127
    {
128
        template<bool nofma> SIMD_INLINE __m512 Fmadd(__m512 a, __m512 b, __m512 c);
129
130
        template <> SIMD_INLINE __m512 Fmadd<false>(__m512 a, __m512 b, __m512 c)
131
0
        {
132
0
            return _mm512_fmadd_ps(a, b, c);
133
0
        }
134
135
        template <> SIMD_INLINE __m512 Fmadd<true>(__m512 a, __m512 b, __m512 c)
136
0
        {
137
#ifdef _MSC_VER
138
            return _mm512_add_ps(_mm512_fmadd_ps(a, b, _mm512_setzero_ps()), c);
139
#else
140
0
            return _mm512_maskz_add_ps(-1, _mm512_mul_ps(a, b), c);
141
0
#endif
142
0
        }
143
    }
144
#endif
145
}
146
147
#endif