/src/Simd/src/Simd/SimdFloat16.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  | * Simd Library (http://ermig1979.github.io/Simd).  | 
3  |  | *  | 
4  |  | * Copyright (c) 2011-2021 Yermalayeu Ihar.  | 
5  |  | *  | 
6  |  | * Permission is hereby granted, free of charge, to any person obtaining a copy  | 
7  |  | * of this software and associated documentation files (the "Software"), to deal  | 
8  |  | * in the Software without restriction, including without limitation the rights  | 
9  |  | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell  | 
10  |  | * copies of the Software, and to permit persons to whom the Software is  | 
11  |  | * furnished to do so, subject to the following conditions:  | 
12  |  | *  | 
13  |  | * The above copyright notice and this permission notice shall be included in  | 
14  |  | * all copies or substantial portions of the Software.  | 
15  |  | *  | 
16  |  | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  | 
17  |  | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  | 
18  |  | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  | 
19  |  | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  | 
20  |  | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  | 
21  |  | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  | 
22  |  | * SOFTWARE.  | 
23  |  | */  | 
24  |  | #ifndef __SimdFloat16_h__  | 
25  |  | #define __SimdFloat16_h__  | 
26  |  |  | 
27  |  | #include "Simd/SimdInit.h"  | 
28  |  |  | 
29  |  | namespace Simd  | 
30  |  | { | 
31  |  |     namespace Base  | 
32  |  |     { | 
33  |  |         namespace Fp16  | 
34  |  |         { | 
35  |  |             union Bits  | 
36  |  |             { | 
37  |  |                 float f;  | 
38  |  |                 int32_t si;  | 
39  |  |                 uint32_t ui;  | 
40  |  |             };  | 
41  |  |  | 
42  |  |             const int SHIFT = 13;  | 
43  |  |             const int SHIFT_SIGN = 16;  | 
44  |  |  | 
45  |  |             const int32_t INF_N = 0x7F800000; // flt32 infinity  | 
46  |  |             const int32_t MAX_N = 0x477FE000; // max flt16 normal as a flt32  | 
47  |  |             const int32_t MIN_N = 0x38800000; // min flt16 normal as a flt32  | 
48  |  |             const int32_t SIGN_N = 0x80000000; // flt32 sign bit  | 
49  |  |  | 
50  |  |             const int32_t INF_C = INF_N >> SHIFT;  | 
51  |  |             const int32_t NAN_N = (INF_C + 1) << SHIFT; // minimum flt16 nan as a flt32  | 
52  |  |             const int32_t MAX_C = MAX_N >> SHIFT;  | 
53  |  |             const int32_t MIN_C = MIN_N >> SHIFT;  | 
54  |  |             const int32_t SIGN_C = SIGN_N >> SHIFT_SIGN; // flt16 sign bit  | 
55  |  |  | 
56  |  |             const int32_t MUL_N = 0x52000000; // (1 << 23) / MIN_N  | 
57  |  |             const int32_t MUL_C = 0x33800000; // MIN_N / (1 << (23 - shift))  | 
58  |  |  | 
59  |  |             const int32_t SUB_C = 0x003FF; // max flt32 subnormal down shifted  | 
60  |  |             const int32_t NOR_C = 0x00400; // min flt32 normal down shifted  | 
61  |  |  | 
62  |  |             const int32_t MAX_D = INF_C - MAX_C - 1;  | 
63  |  |             const int32_t MIN_D = MIN_C - SUB_C - 1;  | 
64  |  |         }  | 
65  |  |  | 
66  |  |         SIMD_INLINE uint16_t Float32ToFloat16(float value)  | 
67  | 0  |         { | 
68  | 0  |             Fp16::Bits v, s;  | 
69  | 0  |             v.f = value;  | 
70  | 0  |             uint32_t sign = v.si & Fp16::SIGN_N;  | 
71  | 0  |             v.si ^= sign;  | 
72  | 0  |             sign >>= Fp16::SHIFT_SIGN; // logical shift  | 
73  | 0  |             s.si = Fp16::MUL_N;  | 
74  | 0  |             s.si = int32_t(s.f * v.f); // correct subnormals  | 
75  | 0  |             v.si ^= (s.si ^ v.si) & -(Fp16::MIN_N > v.si);  | 
76  | 0  |             v.si ^= (Fp16::INF_N ^ v.si) & -((Fp16::INF_N > v.si) & (v.si > Fp16::MAX_N));  | 
77  | 0  |             v.si ^= (Fp16::NAN_N ^ v.si) & -((Fp16::NAN_N > v.si) & (v.si > Fp16::INF_N));  | 
78  | 0  |             v.ui >>= Fp16::SHIFT; // logical shift  | 
79  | 0  |             v.si ^= ((v.si - Fp16::MAX_D) ^ v.si) & -(v.si > Fp16::MAX_C);  | 
80  | 0  |             v.si ^= ((v.si - Fp16::MIN_D) ^ v.si) & -(v.si > Fp16::SUB_C);  | 
81  | 0  |             return v.ui | sign;  | 
82  | 0  |         }  | 
83  |  |  | 
84  |  |         SIMD_INLINE float Float16ToFloat32(uint16_t value)  | 
85  | 0  |         { | 
86  | 0  |             Fp16::Bits v;  | 
87  | 0  |             v.ui = value;  | 
88  | 0  |             int32_t sign = v.si & Fp16::SIGN_C;  | 
89  | 0  |             v.si ^= sign;  | 
90  | 0  |             sign <<= Fp16::SHIFT_SIGN;  | 
91  | 0  |             v.si ^= ((v.si + Fp16::MIN_D) ^ v.si) & -(v.si > Fp16::SUB_C);  | 
92  | 0  |             v.si ^= ((v.si + Fp16::MAX_D) ^ v.si) & -(v.si > Fp16::MAX_C);  | 
93  | 0  |             Fp16::Bits s;  | 
94  | 0  |             s.si = Fp16::MUL_C;  | 
95  | 0  |             s.f *= v.si;  | 
96  | 0  |             int32_t mask = -(Fp16::NOR_C > v.si);  | 
97  | 0  |             v.si <<= Fp16::SHIFT;  | 
98  | 0  |             v.si ^= (s.si ^ v.si) & mask;  | 
99  | 0  |             v.si |= sign;  | 
100  | 0  |             return v.f;  | 
101  | 0  |         }  | 
102  |  |     }  | 
103  |  |  | 
104  |  | #ifdef SIMD_SSE41_ENABLE      | 
105  |  |     namespace Sse41  | 
106  |  |     { | 
107  |  |         namespace Fp16  | 
108  |  |         { | 
109  |  |             union Bits  | 
110  |  |             { | 
111  |  |                 __m128 f;  | 
112  |  |                 __m128i i;  | 
113  |  |             };  | 
114  |  |  | 
115  |  |             const __m128i INF_N = SIMD_MM_SET1_EPI32(Base::Fp16::INF_N);  | 
116  |  |             const __m128i MAX_N = SIMD_MM_SET1_EPI32(Base::Fp16::MAX_N);  | 
117  |  |             const __m128i MIN_N = SIMD_MM_SET1_EPI32(Base::Fp16::MIN_N);  | 
118  |  |             const __m128i SIGN_N = SIMD_MM_SET1_EPI32(Base::Fp16::SIGN_N);  | 
119  |  |  | 
120  |  |             const __m128i INF_C = SIMD_MM_SET1_EPI32(Base::Fp16::INF_C);  | 
121  |  |             const __m128i NAN_N = SIMD_MM_SET1_EPI32(Base::Fp16::NAN_N);  | 
122  |  |             const __m128i MAX_C = SIMD_MM_SET1_EPI32(Base::Fp16::MAX_C);  | 
123  |  |             const __m128i MIN_C = SIMD_MM_SET1_EPI32(Base::Fp16::MIN_C);  | 
124  |  |             const __m128i SIGN_C = SIMD_MM_SET1_EPI32(Base::Fp16::SIGN_C);  | 
125  |  |  | 
126  |  |             const __m128i MUL_N = SIMD_MM_SET1_EPI32(Base::Fp16::MUL_N);  | 
127  |  |             const __m128i MUL_C = SIMD_MM_SET1_EPI32(Base::Fp16::MUL_C);  | 
128  |  |  | 
129  |  |             const __m128i SUB_C = SIMD_MM_SET1_EPI32(Base::Fp16::SUB_C);  | 
130  |  |             const __m128i NOR_C = SIMD_MM_SET1_EPI32(Base::Fp16::NOR_C);  | 
131  |  |  | 
132  |  |             const __m128i MAX_D = SIMD_MM_SET1_EPI32(Base::Fp16::MAX_D);  | 
133  |  |             const __m128i MIN_D = SIMD_MM_SET1_EPI32(Base::Fp16::MIN_D);  | 
134  |  |         }  | 
135  |  |  | 
136  |  |         SIMD_INLINE __m128i Float32ToFloat16(__m128 value)  | 
137  | 0  |         { | 
138  | 0  |             Fp16::Bits v, s;  | 
139  | 0  |             v.f = value;  | 
140  | 0  |             __m128i sign = _mm_and_si128(v.i, Fp16::SIGN_N);  | 
141  | 0  |             v.i = _mm_xor_si128(v.i, sign);  | 
142  | 0  |             sign = _mm_srli_epi32(sign, Base::Fp16::SHIFT_SIGN);  | 
143  | 0  |             s.i = Fp16::MUL_N;  | 
144  | 0  |             s.i = _mm_cvtps_epi32(_mm_floor_ps(_mm_mul_ps(s.f, v.f)));   | 
145  | 0  |             v.i = _mm_xor_si128(v.i, _mm_and_si128(_mm_xor_si128(s.i, v.i), _mm_cmpgt_epi32(Fp16::MIN_N, v.i)));  | 
146  | 0  |             v.i = _mm_xor_si128(v.i, _mm_and_si128(_mm_xor_si128(Fp16::INF_N, v.i), _mm_and_si128(_mm_cmpgt_epi32(Fp16::INF_N, v.i), _mm_cmpgt_epi32(v.i, Fp16::MAX_N))));  | 
147  | 0  |             v.i = _mm_xor_si128(v.i, _mm_and_si128(_mm_xor_si128(Fp16::NAN_N, v.i), _mm_and_si128(_mm_cmpgt_epi32(Fp16::NAN_N, v.i), _mm_cmpgt_epi32(v.i, Fp16::INF_N))));  | 
148  | 0  |             v.i = _mm_srli_epi32(v.i, Base::Fp16::SHIFT);   | 
149  | 0  |             v.i = _mm_xor_si128(v.i, _mm_and_si128(_mm_xor_si128(_mm_sub_epi32(v.i, Fp16::MAX_D), v.i), _mm_cmpgt_epi32(v.i, Fp16::MAX_C)));  | 
150  | 0  |             v.i = _mm_xor_si128(v.i, _mm_and_si128(_mm_xor_si128(_mm_sub_epi32(v.i, Fp16::MIN_D), v.i), _mm_cmpgt_epi32(v.i, Fp16::SUB_C)));  | 
151  | 0  |             return _mm_or_si128(v.i, sign);  | 
152  | 0  |         }  | 
153  |  |  | 
154  |  |         SIMD_INLINE __m128 Float16ToFloat32(__m128i value)  | 
155  | 0  |         { | 
156  | 0  |             Fp16::Bits v;  | 
157  | 0  |             v.i = value;  | 
158  | 0  |             __m128i sign = _mm_and_si128(v.i, Fp16::SIGN_C);  | 
159  | 0  |             v.i = _mm_xor_si128(v.i, sign);  | 
160  | 0  |             sign = _mm_slli_epi32(sign, Base::Fp16::SHIFT_SIGN);  | 
161  | 0  |             v.i = _mm_xor_si128(v.i, _mm_and_si128(_mm_xor_si128(_mm_add_epi32(v.i, Fp16::MIN_D), v.i), _mm_cmpgt_epi32(v.i, Fp16::SUB_C)));  | 
162  | 0  |             v.i = _mm_xor_si128(v.i, _mm_and_si128(_mm_xor_si128(_mm_add_epi32(v.i, Fp16::MAX_D), v.i), _mm_cmpgt_epi32(v.i, Fp16::MAX_C)));  | 
163  | 0  |             Fp16::Bits s;  | 
164  | 0  |             s.i = Fp16::MUL_C;  | 
165  | 0  |             s.f = _mm_mul_ps(s.f, _mm_cvtepi32_ps(v.i));  | 
166  | 0  |             __m128i mask = _mm_cmpgt_epi32(Fp16::NOR_C, v.i);  | 
167  | 0  |             v.i = _mm_slli_epi32(v.i, Base::Fp16::SHIFT);  | 
168  | 0  |             v.i = _mm_xor_si128(v.i, _mm_and_si128(_mm_xor_si128(s.i, v.i), mask));  | 
169  | 0  |             v.i = _mm_or_si128(v.i, sign);  | 
170  | 0  |             return v.f;  | 
171  | 0  |         }  | 
172  |  |     }  | 
173  |  | #endif     | 
174  |  | }  | 
175  |  |  | 
176  |  | #endif//__SimdFloat16_h__  |