/usr/local/include/Imath/half.h
Line | Count | Source (jump to first uncovered line) |
1 | | // |
2 | | // SPDX-License-Identifier: BSD-3-Clause |
3 | | // Copyright Contributors to the OpenEXR Project. |
4 | | // |
5 | | |
6 | | // |
7 | | // Primary original authors: |
8 | | // Florian Kainz <kainz@ilm.com> |
9 | | // Rod Bogart <rgb@ilm.com> |
10 | | // |
11 | | |
12 | | #ifndef IMATH_HALF_H_ |
13 | | #define IMATH_HALF_H_ |
14 | | |
15 | | #include "ImathExport.h" |
16 | | #include "ImathNamespace.h" |
17 | | #include "ImathPlatform.h" |
18 | | |
19 | | /// @file half.h |
20 | | /// The half type is a 16-bit floating number, compatible with the |
21 | | /// IEEE 754-2008 binary16 type. |
22 | | /// |
23 | | /// **Representation of a 32-bit float:** |
24 | | /// |
25 | | /// We assume that a float, f, is an IEEE 754 single-precision |
26 | | /// floating point number, whose bits are arranged as follows: |
27 | | /// |
28 | | /// 31 (msb) |
29 | | /// | |
30 | | /// | 30 23 |
31 | | /// | | | |
32 | | /// | | | 22 0 (lsb) |
33 | | /// | | | | | |
34 | | /// X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX |
35 | | /// |
36 | | /// s e m |
37 | | /// |
38 | | /// S is the sign-bit, e is the exponent and m is the significand. |
39 | | /// |
40 | | /// If e is between 1 and 254, f is a normalized number: |
41 | | /// |
42 | | /// s e-127 |
43 | | /// f = (-1) * 2 * 1.m |
44 | | /// |
45 | | /// If e is 0, and m is not zero, f is a denormalized number: |
46 | | /// |
47 | | /// s -126 |
48 | | /// f = (-1) * 2 * 0.m |
49 | | /// |
50 | | /// If e and m are both zero, f is zero: |
51 | | /// |
52 | | /// f = 0.0 |
53 | | /// |
54 | | /// If e is 255, f is an "infinity" or "not a number" (NAN), |
55 | | /// depending on whether m is zero or not. |
56 | | /// |
57 | | /// Examples: |
58 | | /// |
59 | | /// 0 00000000 00000000000000000000000 = 0.0 |
60 | | /// 0 01111110 00000000000000000000000 = 0.5 |
61 | | /// 0 01111111 00000000000000000000000 = 1.0 |
62 | | /// 0 10000000 00000000000000000000000 = 2.0 |
63 | | /// 0 10000000 10000000000000000000000 = 3.0 |
64 | | /// 1 10000101 11110000010000000000000 = -124.0625 |
65 | | /// 0 11111111 00000000000000000000000 = +infinity |
66 | | /// 1 11111111 00000000000000000000000 = -infinity |
67 | | /// 0 11111111 10000000000000000000000 = NAN |
68 | | /// 1 11111111 11111111111111111111111 = NAN |
69 | | /// |
70 | | /// **Representation of a 16-bit half:** |
71 | | /// |
72 | | /// Here is the bit-layout for a half number, h: |
73 | | /// |
74 | | /// 15 (msb) |
75 | | /// | |
76 | | /// | 14 10 |
77 | | /// | | | |
78 | | /// | | | 9 0 (lsb) |
79 | | /// | | | | | |
80 | | /// X XXXXX XXXXXXXXXX |
81 | | /// |
82 | | /// s e m |
83 | | /// |
84 | | /// S is the sign-bit, e is the exponent and m is the significand. |
85 | | /// |
86 | | /// If e is between 1 and 30, h is a normalized number: |
87 | | /// |
88 | | /// s e-15 |
89 | | /// h = (-1) * 2 * 1.m |
90 | | /// |
91 | | /// If e is 0, and m is not zero, h is a denormalized number: |
92 | | /// |
93 | | /// S -14 |
94 | | /// h = (-1) * 2 * 0.m |
95 | | /// |
96 | | /// If e and m are both zero, h is zero: |
97 | | /// |
98 | | /// h = 0.0 |
99 | | /// |
100 | | /// If e is 31, h is an "infinity" or "not a number" (NAN), |
101 | | /// depending on whether m is zero or not. |
102 | | /// |
103 | | /// Examples: |
104 | | /// |
105 | | /// 0 00000 0000000000 = 0.0 |
106 | | /// 0 01110 0000000000 = 0.5 |
107 | | /// 0 01111 0000000000 = 1.0 |
108 | | /// 0 10000 0000000000 = 2.0 |
109 | | /// 0 10000 1000000000 = 3.0 |
110 | | /// 1 10101 1111000001 = -124.0625 |
111 | | /// 0 11111 0000000000 = +infinity |
112 | | /// 1 11111 0000000000 = -infinity |
113 | | /// 0 11111 1000000000 = NAN |
114 | | /// 1 11111 1111111111 = NAN |
115 | | /// |
116 | | /// **Conversion via Lookup Table:** |
117 | | /// |
118 | | /// Converting from half to float is performed by default using a |
119 | | /// lookup table. There are only 65,536 different half numbers; each |
120 | | /// of these numbers has been converted and stored in a table pointed |
121 | | /// to by the ``imath_half_to_float_table`` pointer. |
122 | | /// |
123 | | /// Prior to Imath v3.1, conversion from float to half was |
124 | | /// accomplished with the help of an exponent look table, but this is |
125 | | /// now replaced with explicit bit shifting. |
126 | | /// |
127 | | /// **Conversion via Hardware:** |
128 | | /// |
129 | | /// For Imath v3.1, the conversion routines have been extended to use |
130 | | /// F16C SSE instructions whenever present and enabled by compiler |
131 | | /// flags. |
132 | | /// |
133 | | /// **Conversion via Bit-Shifting** |
134 | | /// |
135 | | /// If F16C SSE instructions are not available, conversion can be |
136 | | /// accomplished by a bit-shifting algorithm. For half-to-float |
137 | | /// conversion, this is generally slower than the lookup table, but it |
138 | | /// may be preferable when memory limits preclude storing of the |
139 | | /// 65,536-entry lookup table. |
140 | | /// |
141 | | /// The lookup table symbol is included in the compilation even if |
142 | | /// ``IMATH_HALF_USE_LOOKUP_TABLE`` is false, because application code |
143 | | /// using the exported ``half.h`` may choose to enable the use of the table. |
144 | | /// |
145 | | /// An implementation can eliminate the table from compilation by |
146 | | /// defining the ``IMATH_HALF_NO_LOOKUP_TABLE`` preprocessor symbol. |
147 | | /// Simply add: |
148 | | /// |
149 | | /// #define IMATH_HALF_NO_LOOKUP_TABLE |
150 | | /// |
151 | | /// before including ``half.h``, or define the symbol on the compile |
152 | | /// command line. |
153 | | /// |
154 | | /// Furthermore, an implementation wishing to receive ``FE_OVERFLOW`` |
155 | | /// and ``FE_UNDERFLOW`` floating point exceptions when converting |
156 | | /// float to half by the bit-shift algorithm can define the |
157 | | /// preprocessor symbol ``IMATH_HALF_ENABLE_FP_EXCEPTIONS`` prior to |
158 | | /// including ``half.h``: |
159 | | /// |
160 | | /// #define IMATH_HALF_ENABLE_FP_EXCEPTIONS |
161 | | /// |
162 | | /// **Conversion Performance Comparison:** |
163 | | /// |
164 | | /// Testing on a Core i9, the timings are approximately: |
165 | | /// |
166 | | /// half to float |
167 | | /// - table: 0.71 ns / call |
168 | | /// - no table: 1.06 ns / call |
169 | | /// - f16c: 0.45 ns / call |
170 | | /// |
171 | | /// float-to-half: |
172 | | /// - original: 5.2 ns / call |
173 | | /// - no exp table + opt: 1.27 ns / call |
174 | | /// - f16c: 0.45 ns / call |
175 | | /// |
176 | | /// **Note:** the timing above depends on the distribution of the |
177 | | /// floats in question. |
178 | | /// |
179 | | |
180 | | #ifdef __CUDA_ARCH__ |
181 | | // do not include intrinsics headers on Cuda |
182 | | #elif defined(_WIN32) |
183 | | # include <intrin.h> |
184 | | #elif defined(__x86_64__) |
185 | | # include <x86intrin.h> |
186 | | #elif defined(__F16C__) |
187 | | # include <immintrin.h> |
188 | | #endif |
189 | | |
190 | | #include <stdint.h> |
191 | | #include <stdio.h> |
192 | | |
193 | | #ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS |
194 | | # include <fenv.h> |
195 | | #endif |
196 | | |
197 | | //------------------------------------------------------------------------- |
198 | | // Limits |
199 | | // |
200 | | // Visual C++ will complain if HALF_DENORM_MIN, HALF_NRM_MIN etc. are not float |
201 | | // constants, but at least one other compiler (gcc 2.96) produces incorrect |
202 | | // results if they are. |
203 | | //------------------------------------------------------------------------- |
204 | | |
205 | | #if (defined _WIN32 || defined _WIN64) && defined _MSC_VER |
206 | | |
207 | | /// Smallest positive denormalized half |
208 | | # define HALF_DENORM_MIN 5.96046448e-08f |
209 | | /// Smallest positive normalized half |
210 | | # define HALF_NRM_MIN 6.10351562e-05f |
211 | | /// Smallest positive normalized half |
212 | | # define HALF_MIN 6.10351562e-05f |
213 | | /// Largest positive half |
214 | | # define HALF_MAX 65504.0f |
215 | | /// Smallest positive e for which ``half(1.0 + e) != half(1.0)`` |
216 | | # define HALF_EPSILON 0.00097656f |
217 | | #else |
218 | | /// Smallest positive denormalized half |
219 | | # define HALF_DENORM_MIN 5.96046448e-08 |
220 | | /// Smallest positive normalized half |
221 | | # define HALF_NRM_MIN 6.10351562e-05 |
222 | | /// Smallest positive normalized half |
223 | | # define HALF_MIN 6.10351562e-05f |
224 | | /// Largest positive half |
225 | | # define HALF_MAX 65504.0 |
226 | | /// Smallest positive e for which ``half(1.0 + e) != half(1.0)`` |
227 | | # define HALF_EPSILON 0.00097656 |
228 | | #endif |
229 | | |
230 | | /// Number of digits in mantissa (significand + hidden leading 1) |
231 | | #define HALF_MANT_DIG 11 |
232 | | /// Number of base 10 digits that can be represented without change: |
233 | | /// |
234 | | /// ``floor( (HALF_MANT_DIG - 1) * log10(2) ) => 3.01... -> 3`` |
235 | | #define HALF_DIG 3 |
236 | | /// Number of base-10 digits that are necessary to uniquely represent |
237 | | /// all distinct values: |
238 | | /// |
239 | | /// ``ceil(HALF_MANT_DIG * log10(2) + 1) => 4.31... -> 5`` |
240 | | #define HALF_DECIMAL_DIG 5 |
241 | | /// Base of the exponent |
242 | | #define HALF_RADIX 2 |
243 | | /// Minimum negative integer such that ``HALF_RADIX`` raised to the power |
244 | | /// of one less than that integer is a normalized half |
245 | | #define HALF_DENORM_MIN_EXP -13 |
246 | | /// Maximum positive integer such that ``HALF_RADIX`` raised to the power |
247 | | /// of one less than that integer is a normalized half |
248 | | #define HALF_MAX_EXP 16 |
249 | | /// Minimum positive integer such that 10 raised to that power is a |
250 | | /// normalized half |
251 | | #define HALF_DENORM_MIN_10_EXP -4 |
252 | | /// Maximum positive integer such that 10 raised to that power is a |
253 | | /// normalized half |
254 | | #define HALF_MAX_10_EXP 4 |
255 | | |
256 | | /// a type for both C-only programs and C++ to use the same utilities |
257 | | typedef union imath_half_uif |
258 | | { |
259 | | uint32_t i; |
260 | | float f; |
261 | | } imath_half_uif_t; |
262 | | |
263 | | /// a type for both C-only programs and C++ to use the same utilities |
264 | | typedef uint16_t imath_half_bits_t; |
265 | | |
266 | | #if !defined(__cplusplus) && !defined(__CUDACC__) |
267 | | /// if we're in a C-only context, alias the half bits type to half |
268 | | typedef imath_half_bits_t half; |
269 | | #endif |
270 | | |
271 | | #if !defined(IMATH_HALF_NO_LOOKUP_TABLE) |
272 | | # if defined(__cplusplus) |
273 | | extern "C" |
274 | | # else |
275 | | extern |
276 | | # endif |
277 | | IMATH_EXPORT const imath_half_uif_t* imath_half_to_float_table; |
278 | | #endif |
279 | | |
280 | | /// |
281 | | /// Convert half to float |
282 | | /// |
283 | | |
284 | | static inline float |
285 | | imath_half_to_float (imath_half_bits_t h) |
286 | 990M | { |
287 | | #if defined(__F16C__) |
288 | | // NB: The intel implementation does seem to treat NaN slightly |
289 | | // different than the original toFloat table does (i.e. where the |
290 | | // 1 bits are, meaning the signalling or not bits). This seems |
291 | | // benign, given that the original library didn't really deal with |
292 | | // signalling vs non-signalling NaNs |
293 | | # ifdef _MSC_VER |
294 | | /* msvc does not seem to have cvtsh_ss :( */ |
295 | | return _mm_cvtss_f32 (_mm_cvtph_ps (_mm_set1_epi16 (h))); |
296 | | # else |
297 | | return _cvtsh_ss (h); |
298 | | # endif |
299 | | #elif defined(IMATH_HALF_USE_LOOKUP_TABLE) && !defined(IMATH_HALF_NO_LOOKUP_TABLE) |
300 | | return imath_half_to_float_table[h].f; |
301 | | #else |
302 | | imath_half_uif_t v; |
303 | | // this code would be clearer, although it does appear to be faster |
304 | | // (1.06 vs 1.08 ns/call) to avoid the constants and just do 4 |
305 | | // shifts. |
306 | | // |
307 | | uint32_t hexpmant = ( (uint32_t)(h) << 17 ) >> 4; |
308 | | v.i = ((uint32_t)(h >> 15)) << 31; |
309 | | |
310 | | // the likely really does help if most of your numbers are "normal" half numbers |
311 | | if (IMATH_LIKELY ((hexpmant >= 0x00800000))) |
312 | | { |
313 | | v.i |= hexpmant; |
314 | | // either we are a normal number, in which case add in the bias difference |
315 | | // otherwise make sure all exponent bits are set |
316 | | if (IMATH_LIKELY ((hexpmant < 0x0f800000))) |
317 | | v.i += 0x38000000; |
318 | | else |
319 | | v.i |= 0x7f800000; |
320 | | } |
321 | | else if (hexpmant != 0) |
322 | | { |
323 | | // exponent is 0 because we're denormal, don't have to extract |
324 | | // the mantissa, can just use as is |
325 | | // |
326 | | // |
327 | | // other compilers may provide count-leading-zeros primitives, |
328 | | // but we need the community to inform us of the variants |
329 | | uint32_t lc; |
330 | | # if defined(_MSC_VER) |
331 | | // The direct intrinsic for this is __lznct, but that is not supported |
332 | | // on older x86_64 hardware or ARM. Instead uses the bsr instruction |
333 | | // and one additional subtraction. This assumes hexpmant != 0, for 0 |
334 | | // bsr and lznct would behave differently. |
335 | | unsigned long bsr; |
336 | | _BitScanReverse (&bsr, hexpmant); |
337 | | lc = (31 - bsr); |
338 | | # elif defined(__GNUC__) || defined(__clang__) |
339 | | lc = (uint32_t) __builtin_clz (hexpmant); |
340 | | # else |
341 | | lc = 0; |
342 | | while (0 == ((hexpmant << lc) & 0x80000000)) |
343 | | ++lc; |
344 | | # endif |
345 | | lc -= 8; |
346 | | // so nominally we want to remove that extra bit we shifted |
347 | | // up, but we are going to add that bit back in, then subtract |
348 | | // from it with the 0x38800000 - (lc << 23).... |
349 | | // |
350 | | // by combining, this allows us to skip the & operation (and |
351 | | // remove a constant) |
352 | | // |
353 | | // hexpmant &= ~0x00800000; |
354 | | v.i |= 0x38800000; |
355 | | // lc is now x, where the desired exponent is then |
356 | | // -14 - lc |
357 | | // + 127 -> new exponent |
358 | | v.i |= (hexpmant << lc); |
359 | | v.i -= (lc << 23); |
360 | | } |
361 | | return v.f; |
362 | | #endif |
363 | 990M | } |
364 | | |
365 | | /// |
366 | | /// Convert half to float |
367 | | /// |
368 | | /// Note: This only supports the "round to even" rounding mode, which |
369 | | /// was the only mode supported by the original OpenEXR library |
370 | | /// |
371 | | |
372 | | static inline imath_half_bits_t |
373 | | imath_float_to_half (float f) |
374 | 493M | { |
375 | | #if defined(__F16C__) |
376 | | # ifdef _MSC_VER |
377 | | // msvc does not seem to have cvtsh_ss :( |
378 | | return _mm_extract_epi16 ( |
379 | | _mm_cvtps_ph (_mm_set_ss (f), (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)), |
380 | | 0); |
381 | | # else |
382 | | // preserve the fixed rounding mode to nearest |
383 | | return _cvtss_sh (f, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); |
384 | | # endif |
385 | | #else |
386 | 493M | imath_half_uif_t v; |
387 | 493M | imath_half_bits_t ret; |
388 | 493M | uint32_t e, m, ui, r, shift; |
389 | | |
390 | 493M | v.f = f; |
391 | | |
392 | 493M | ui = (v.i & ~0x80000000); |
393 | 493M | ret = ((v.i >> 16) & 0x8000); |
394 | | |
395 | | // exponent large enough to result in a normal number, round and return |
396 | 493M | if (ui >= 0x38800000) |
397 | 271M | { |
398 | | // inf or nan |
399 | 271M | if (IMATH_UNLIKELY (ui >= 0x7f800000)) |
400 | 191M | { |
401 | 191M | ret |= 0x7c00; |
402 | 191M | if (ui == 0x7f800000) |
403 | 317k | return ret; |
404 | 191M | m = (ui & 0x7fffff) >> 13; |
405 | | // make sure we have at least one bit after shift to preserve nan-ness |
406 | 191M | return ret | (uint16_t)m | (uint16_t)(m == 0); |
407 | 191M | } |
408 | | |
409 | | // too large, round to infinity |
410 | 79.2M | if (IMATH_UNLIKELY (ui > 0x477fefff)) |
411 | 1.38M | { |
412 | | # ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS |
413 | | feraiseexcept (FE_OVERFLOW); |
414 | | # endif |
415 | 1.38M | return ret | 0x7c00; |
416 | 1.38M | } |
417 | | |
418 | 77.9M | ui -= 0x38000000; |
419 | 77.9M | ui = ((ui + 0x00000fff + ((ui >> 13) & 1)) >> 13); |
420 | 77.9M | return ret | (uint16_t)ui; |
421 | 79.2M | } |
422 | | |
423 | | // zero or flush to 0 |
424 | 222M | if (ui < 0x33000001) |
425 | 117M | { |
426 | | # ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS |
427 | | if (ui == 0) |
428 | | return ret; |
429 | | feraiseexcept (FE_UNDERFLOW); |
430 | | # endif |
431 | 117M | return ret; |
432 | 117M | } |
433 | | |
434 | | // produce a denormalized half |
435 | 105M | e = (ui >> 23); |
436 | 105M | shift = 0x7e - e; |
437 | 105M | m = 0x800000 | (ui & 0x7fffff); |
438 | 105M | r = m << (32 - shift); |
439 | 105M | ret |= (m >> shift); |
440 | 105M | if (r > 0x80000000 || (r == 0x80000000 && (ret & 0x1) != 0)) |
441 | 48.2M | ++ret; |
442 | 105M | return ret; |
443 | 222M | #endif |
444 | 222M | } |
445 | | |
446 | | //////////////////////////////////////// |
447 | | |
448 | | #ifdef __cplusplus |
449 | | |
450 | | # include <iostream> |
451 | | |
452 | | IMATH_INTERNAL_NAMESPACE_HEADER_ENTER |
453 | | |
454 | | /// |
455 | | /// |
456 | | /// class half represents a 16-bit floating point number |
457 | | /// |
458 | | /// Type half can represent positive and negative numbers whose |
459 | | /// magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative |
460 | | /// error of 9.8e-4; numbers smaller than 6.1e-5 can be represented |
461 | | /// with an absolute error of 6.0e-8. All integers from -2048 to |
462 | | /// +2048 can be represented exactly. |
463 | | /// |
464 | | /// Type half behaves (almost) like the built-in C++ floating point |
465 | | /// types. In arithmetic expressions, half, float and double can be |
466 | | /// mixed freely. Here are a few examples: |
467 | | /// |
468 | | /// half a (3.5); |
469 | | /// float b (a + sqrt (a)); |
470 | | /// a += b; |
471 | | /// b += a; |
472 | | /// b = a + 7; |
473 | | /// |
474 | | /// Conversions from half to float are lossless; all half numbers |
475 | | /// are exactly representable as floats. |
476 | | /// |
477 | | /// Conversions from float to half may not preserve a float's value |
478 | | /// exactly. If a float is not representable as a half, then the |
479 | | /// float value is rounded to the nearest representable half. If a |
480 | | /// float value is exactly in the middle between the two closest |
481 | | /// representable half values, then the float value is rounded to |
482 | | /// the closest half whose least significant bit is zero. |
483 | | /// |
484 | | /// Overflows during float-to-half conversions cause arithmetic |
485 | | /// exceptions. An overflow occurs when the float value to be |
486 | | /// converted is too large to be represented as a half, or if the |
487 | | /// float value is an infinity or a NAN. |
488 | | /// |
489 | | /// The implementation of type half makes the following assumptions |
490 | | /// about the implementation of the built-in C++ types: |
491 | | /// |
492 | | /// * float is an IEEE 754 single-precision number |
493 | | /// * sizeof (float) == 4 |
494 | | /// * sizeof (unsigned int) == sizeof (float) |
495 | | /// * alignof (unsigned int) == alignof (float) |
496 | | /// * sizeof (uint16_t) == 2 |
497 | | /// |
498 | | |
499 | | class IMATH_EXPORT_TYPE half |
500 | | { |
501 | | public: |
502 | | /// A special tag that lets us initialize a half from the raw bits. |
503 | | enum IMATH_EXPORT_ENUM FromBitsTag |
504 | | { |
505 | | FromBits |
506 | | }; |
507 | | |
508 | | /// @{ |
509 | | /// @name Constructors |
510 | | |
511 | | /// Default construction provides no initialization (hence it is |
512 | | /// not constexpr). |
513 | | half() IMATH_NOEXCEPT = default; |
514 | | |
515 | | /// Construct from float |
516 | | half (float f) IMATH_NOEXCEPT; |
517 | | |
518 | | /// Construct from bit-vector |
519 | | constexpr half (FromBitsTag, uint16_t bits) IMATH_NOEXCEPT; |
520 | | |
521 | | /// Copy constructor |
522 | | constexpr half (const half&) IMATH_NOEXCEPT = default; |
523 | | |
524 | | /// Move constructor |
525 | | constexpr half (half&&) IMATH_NOEXCEPT = default; |
526 | | |
527 | | /// Destructor |
528 | | ~half() IMATH_NOEXCEPT = default; |
529 | | |
530 | | /// @} |
531 | | |
532 | | /// Conversion to float |
533 | | operator float() const IMATH_NOEXCEPT; |
534 | | |
535 | | /// @{ |
536 | | /// @name Basic Algebra |
537 | | |
538 | | /// Unary minus |
539 | | constexpr half operator-() const IMATH_NOEXCEPT; |
540 | | |
541 | | /// Assignment |
542 | | half& operator= (const half& h) IMATH_NOEXCEPT = default; |
543 | | |
544 | | /// Move assignment |
545 | | half& operator= (half&& h) IMATH_NOEXCEPT = default; |
546 | | |
547 | | /// Assignment from float |
548 | | half& operator= (float f) IMATH_NOEXCEPT; |
549 | | |
550 | | /// Addition assignment |
551 | | half& operator+= (half h) IMATH_NOEXCEPT; |
552 | | |
553 | | /// Addition assignment from float |
554 | | half& operator+= (float f) IMATH_NOEXCEPT; |
555 | | |
556 | | /// Subtraction assignment |
557 | | half& operator-= (half h) IMATH_NOEXCEPT; |
558 | | |
559 | | /// Subtraction assignment from float |
560 | | half& operator-= (float f) IMATH_NOEXCEPT; |
561 | | |
562 | | /// Multiplication assignment |
563 | | half& operator*= (half h) IMATH_NOEXCEPT; |
564 | | |
565 | | /// Multiplication assignment from float |
566 | | half& operator*= (float f) IMATH_NOEXCEPT; |
567 | | |
568 | | /// Division assignment |
569 | | half& operator/= (half h) IMATH_NOEXCEPT; |
570 | | |
571 | | /// Division assignment from float |
572 | | half& operator/= (float f) IMATH_NOEXCEPT; |
573 | | |
574 | | /// @} |
575 | | |
576 | | /// Round to n-bit precision (n should be between 0 and 10). |
577 | | /// After rounding, the significand's 10-n least significant |
578 | | /// bits will be zero. |
579 | | IMATH_CONSTEXPR14 half round (unsigned int n) const IMATH_NOEXCEPT; |
580 | | |
581 | | /// @{ |
582 | | /// @name Classification |
583 | | |
584 | | /// Return true if a normalized number, a denormalized number, or |
585 | | /// zero. |
586 | | constexpr bool isFinite() const IMATH_NOEXCEPT; |
587 | | |
588 | | /// Return true if a normalized number. |
589 | | constexpr bool isNormalized() const IMATH_NOEXCEPT; |
590 | | |
591 | | /// Return true if a denormalized number. |
592 | | constexpr bool isDenormalized() const IMATH_NOEXCEPT; |
593 | | |
594 | | /// Return true if zero. |
595 | | constexpr bool isZero() const IMATH_NOEXCEPT; |
596 | | |
597 | | /// Return true if NAN. |
598 | | constexpr bool isNan() const IMATH_NOEXCEPT; |
599 | | |
600 | | /// Return true if a positive or a negative infinity |
601 | | constexpr bool isInfinity() const IMATH_NOEXCEPT; |
602 | | |
603 | | /// Return true if the sign bit is set (negative) |
604 | | constexpr bool isNegative() const IMATH_NOEXCEPT; |
605 | | |
606 | | /// @} |
607 | | |
608 | | /// @{ |
609 | | /// @name Special values |
610 | | |
611 | | /// Return +infinity |
612 | | static constexpr half posInf() IMATH_NOEXCEPT; |
613 | | |
614 | | /// Return -infinity |
615 | | static constexpr half negInf() IMATH_NOEXCEPT; |
616 | | |
617 | | /// Returns a NAN with the bit pattern 0111111111111111 |
618 | | static constexpr half qNan() IMATH_NOEXCEPT; |
619 | | |
620 | | /// Return a NAN with the bit pattern 0111110111111111 |
621 | | static constexpr half sNan() IMATH_NOEXCEPT; |
622 | | |
623 | | /// @} |
624 | | |
625 | | /// @{ |
626 | | /// @name Access to the internal representation |
627 | | |
628 | | /// Return the bit pattern |
629 | | constexpr uint16_t bits () const IMATH_NOEXCEPT; |
630 | | |
631 | | /// Set the bit pattern |
632 | | IMATH_CONSTEXPR14 void setBits (uint16_t bits) IMATH_NOEXCEPT; |
633 | | |
634 | | /// @} |
635 | | |
636 | | public: |
637 | | static_assert (sizeof (float) == sizeof (uint32_t), |
638 | | "Assumption about the size of floats correct"); |
639 | | using uif = imath_half_uif; |
640 | | |
641 | | private: |
642 | | |
643 | | constexpr uint16_t mantissa() const IMATH_NOEXCEPT; |
644 | | constexpr uint16_t exponent() const IMATH_NOEXCEPT; |
645 | | |
646 | | uint16_t _h; |
647 | | }; |
648 | | |
649 | | //---------------------------- |
650 | | // Half-from-float constructor |
651 | | //---------------------------- |
652 | | |
653 | | inline half::half (float f) IMATH_NOEXCEPT |
654 | 1.27G | : _h (imath_float_to_half (f)) |
655 | 1.27G | { |
656 | 1.27G | } |
657 | | |
658 | | //------------------------------------------ |
659 | | // Half from raw bits constructor |
660 | | //------------------------------------------ |
661 | | |
662 | | inline constexpr half::half (FromBitsTag, uint16_t bits) IMATH_NOEXCEPT : _h (bits) |
663 | | {} |
664 | | |
665 | | //------------------------- |
666 | | // Half-to-float conversion |
667 | | //------------------------- |
668 | | |
669 | | inline half::operator float() const IMATH_NOEXCEPT |
670 | 7.99G | { |
671 | 7.99G | return imath_half_to_float (_h); |
672 | 7.99G | } |
673 | | |
674 | | //------------------------- |
675 | | // Round to n-bit precision |
676 | | //------------------------- |
677 | | |
678 | | inline IMATH_CONSTEXPR14 half |
679 | | half::round (unsigned int n) const IMATH_NOEXCEPT |
680 | | { |
681 | | // |
682 | | // Parameter check. |
683 | | // |
684 | | |
685 | | if (n >= 10) |
686 | | return *this; |
687 | | |
688 | | // |
689 | | // Disassemble h into the sign, s, |
690 | | // and the combined exponent and significand, e. |
691 | | // |
692 | | |
693 | | uint16_t s = _h & 0x8000; |
694 | | uint16_t e = _h & 0x7fff; |
695 | | |
696 | | // |
697 | | // Round the exponent and significand to the nearest value |
698 | | // where ones occur only in the (10-n) most significant bits. |
699 | | // Note that the exponent adjusts automatically if rounding |
700 | | // up causes the significand to overflow. |
701 | | // |
702 | | |
703 | | e >>= 9 - n; |
704 | | e += e & 1; |
705 | | e <<= 9 - n; |
706 | | |
707 | | // |
708 | | // Check for exponent overflow. |
709 | | // |
710 | | |
711 | | if (e >= 0x7c00) |
712 | | { |
713 | | // |
714 | | // Overflow occurred -- truncate instead of rounding. |
715 | | // |
716 | | |
717 | | e = _h; |
718 | | e >>= 10 - n; |
719 | | e <<= 10 - n; |
720 | | } |
721 | | |
722 | | // |
723 | | // Put the original sign bit back. |
724 | | // |
725 | | |
726 | | half h (FromBits, s | e); |
727 | | |
728 | | return h; |
729 | | } |
730 | | |
731 | | //----------------------- |
732 | | // Other inline functions |
733 | | //----------------------- |
734 | | |
735 | | inline constexpr half |
736 | | half::operator-() const IMATH_NOEXCEPT |
737 | 0 | { |
738 | 0 | return half (FromBits, bits() ^ 0x8000); |
739 | 0 | } |
740 | | |
741 | | inline half& |
742 | | half::operator= (float f) IMATH_NOEXCEPT |
743 | 488M | { |
744 | 488M | *this = half (f); |
745 | 488M | return *this; |
746 | 488M | } |
747 | | |
748 | | inline half& |
749 | | half::operator+= (half h) IMATH_NOEXCEPT |
750 | 0 | { |
751 | 0 | *this = half (float (*this) + float (h)); |
752 | 0 | return *this; |
753 | 0 | } |
754 | | |
755 | | inline half& |
756 | | half::operator+= (float f) IMATH_NOEXCEPT |
757 | 0 | { |
758 | 0 | *this = half (float (*this) + f); |
759 | 0 | return *this; |
760 | 0 | } |
761 | | |
762 | | inline half& |
763 | | half::operator-= (half h) IMATH_NOEXCEPT |
764 | 0 | { |
765 | 0 | *this = half (float (*this) - float (h)); |
766 | 0 | return *this; |
767 | 0 | } |
768 | | |
769 | | inline half& |
770 | | half::operator-= (float f) IMATH_NOEXCEPT |
771 | 0 | { |
772 | 0 | *this = half (float (*this) - f); |
773 | 0 | return *this; |
774 | 0 | } |
775 | | |
776 | | inline half& |
777 | | half::operator*= (half h) IMATH_NOEXCEPT |
778 | 0 | { |
779 | 0 | *this = half (float (*this) * float (h)); |
780 | 0 | return *this; |
781 | 0 | } |
782 | | |
783 | | inline half& |
784 | | half::operator*= (float f) IMATH_NOEXCEPT |
785 | | { |
786 | | *this = half (float (*this) * f); |
787 | | return *this; |
788 | | } |
789 | | |
790 | | inline half& |
791 | | half::operator/= (half h) IMATH_NOEXCEPT |
792 | 0 | { |
793 | 0 | *this = half (float (*this) / float (h)); |
794 | 0 | return *this; |
795 | 0 | } |
796 | | |
797 | | inline half& |
798 | | half::operator/= (float f) IMATH_NOEXCEPT |
799 | 0 | { |
800 | 0 | *this = half (float (*this) / f); |
801 | 0 | return *this; |
802 | 0 | } |
803 | | |
804 | | inline constexpr uint16_t |
805 | | half::mantissa() const IMATH_NOEXCEPT |
806 | | { |
807 | | return _h & 0x3ff; |
808 | | } |
809 | | |
810 | | inline constexpr uint16_t |
811 | | half::exponent() const IMATH_NOEXCEPT |
812 | | { |
813 | | return (_h >> 10) & 0x001f; |
814 | | } |
815 | | |
816 | | inline constexpr bool |
817 | | half::isFinite() const IMATH_NOEXCEPT |
818 | | { |
819 | | return exponent() < 31; |
820 | | } |
821 | | |
822 | | inline constexpr bool |
823 | | half::isNormalized() const IMATH_NOEXCEPT |
824 | 0 | { |
825 | 0 | return exponent() > 0 && exponent() < 31; |
826 | 0 | } |
827 | | |
828 | | inline constexpr bool |
829 | | half::isDenormalized() const IMATH_NOEXCEPT |
830 | 0 | { |
831 | 0 | return exponent() == 0 && mantissa() != 0; |
832 | 0 | } |
833 | | |
834 | | inline constexpr bool |
835 | | half::isZero() const IMATH_NOEXCEPT |
836 | 0 | { |
837 | 0 | return (_h & 0x7fff) == 0; |
838 | 0 | } |
839 | | |
840 | | inline constexpr bool |
841 | | half::isNan() const IMATH_NOEXCEPT |
842 | | { |
843 | | return exponent() == 31 && mantissa() != 0; |
844 | | } |
845 | | |
846 | | inline constexpr bool |
847 | | half::isInfinity() const IMATH_NOEXCEPT |
848 | | { |
849 | | return exponent() == 31 && mantissa() == 0; |
850 | | } |
851 | | |
852 | | inline constexpr bool |
853 | | half::isNegative() const IMATH_NOEXCEPT |
854 | | { |
855 | | return (_h & 0x8000) != 0; |
856 | | } |
857 | | |
858 | | inline constexpr half |
859 | | half::posInf() IMATH_NOEXCEPT |
860 | | { |
861 | | return half (FromBits, 0x7c00); |
862 | | } |
863 | | |
864 | | inline constexpr half |
865 | | half::negInf() IMATH_NOEXCEPT |
866 | | { |
867 | | return half (FromBits, 0xfc00); |
868 | | } |
869 | | |
870 | | inline constexpr half |
871 | | half::qNan() IMATH_NOEXCEPT |
872 | 0 | { |
873 | 0 | return half (FromBits, 0x7fff); |
874 | 0 | } |
875 | | |
876 | | inline constexpr half |
877 | | half::sNan() IMATH_NOEXCEPT |
878 | 0 | { |
879 | 0 | return half (FromBits, 0x7dff); |
880 | 0 | } |
881 | | |
882 | | inline constexpr uint16_t |
883 | | half::bits() const IMATH_NOEXCEPT |
884 | | { |
885 | | return _h; |
886 | | } |
887 | | |
888 | | inline IMATH_CONSTEXPR14 void |
889 | | half::setBits (uint16_t bits) IMATH_NOEXCEPT |
890 | | { |
891 | | _h = bits; |
892 | | } |
893 | | |
894 | | IMATH_INTERNAL_NAMESPACE_HEADER_EXIT |
895 | | |
896 | | /// Output h to os, formatted as a float |
897 | | IMATH_EXPORT std::ostream& operator<< (std::ostream& os, IMATH_INTERNAL_NAMESPACE::half h); |
898 | | |
899 | | /// Input h from is |
900 | | IMATH_EXPORT std::istream& operator>> (std::istream& is, IMATH_INTERNAL_NAMESPACE::half& h); |
901 | | |
902 | | #include <limits> |
903 | | |
904 | | namespace std |
905 | | { |
906 | | |
907 | | template <> class numeric_limits<IMATH_INTERNAL_NAMESPACE::half> |
908 | | { |
909 | | public: |
910 | | static const bool is_specialized = true; |
911 | | |
912 | | static constexpr IMATH_INTERNAL_NAMESPACE::half min () IMATH_NOEXCEPT |
913 | 0 | { |
914 | 0 | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x0400); /*HALF_MIN*/ |
915 | 0 | } |
916 | | static constexpr IMATH_INTERNAL_NAMESPACE::half max () IMATH_NOEXCEPT |
917 | | { |
918 | | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7bff); /*HALF_MAX*/ |
919 | | } |
920 | | static constexpr IMATH_INTERNAL_NAMESPACE::half lowest () |
921 | | { |
922 | | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0xfbff); /* -HALF_MAX */ |
923 | | } |
924 | | |
925 | | static constexpr int digits = HALF_MANT_DIG; |
926 | | static constexpr int digits10 = HALF_DIG; |
927 | | static constexpr int max_digits10 = HALF_DECIMAL_DIG; |
928 | | static constexpr bool is_signed = true; |
929 | | static constexpr bool is_integer = false; |
930 | | static constexpr bool is_exact = false; |
931 | | static constexpr int radix = HALF_RADIX; |
932 | | static constexpr IMATH_INTERNAL_NAMESPACE::half epsilon () IMATH_NOEXCEPT |
933 | 0 | { |
934 | 0 | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x1400); /*HALF_EPSILON*/ |
935 | 0 | } |
936 | | static constexpr IMATH_INTERNAL_NAMESPACE::half round_error () IMATH_NOEXCEPT |
937 | 0 | { |
938 | 0 | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x3800); /*0.5*/ |
939 | 0 | } |
940 | | |
941 | | static constexpr int min_exponent = HALF_DENORM_MIN_EXP; |
942 | | static constexpr int min_exponent10 = HALF_DENORM_MIN_10_EXP; |
943 | | static constexpr int max_exponent = HALF_MAX_EXP; |
944 | | static constexpr int max_exponent10 = HALF_MAX_10_EXP; |
945 | | |
946 | | static constexpr bool has_infinity = true; |
947 | | static constexpr bool has_quiet_NaN = true; |
948 | | static constexpr bool has_signaling_NaN = true; |
949 | | static constexpr float_denorm_style has_denorm = denorm_present; |
950 | | static constexpr bool has_denorm_loss = false; |
951 | | static constexpr IMATH_INTERNAL_NAMESPACE::half infinity () IMATH_NOEXCEPT |
952 | 0 | { |
953 | 0 | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7c00); /*half::posInf()*/ |
954 | 0 | } |
955 | | static constexpr IMATH_INTERNAL_NAMESPACE::half quiet_NaN () IMATH_NOEXCEPT |
956 | 0 | { |
957 | 0 | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7fff); /*half::qNan()*/ |
958 | 0 | } |
959 | | static constexpr IMATH_INTERNAL_NAMESPACE::half signaling_NaN () IMATH_NOEXCEPT |
960 | 0 | { |
961 | 0 | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7dff); /*half::sNan()*/ |
962 | 0 | } |
963 | | static constexpr IMATH_INTERNAL_NAMESPACE::half denorm_min () IMATH_NOEXCEPT |
964 | 0 | { |
965 | 0 | return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x0001); /*HALF_DENORM_MIN*/ |
966 | 0 | } |
967 | | |
968 | | static constexpr bool is_iec559 = false; |
969 | | static constexpr bool is_bounded = false; |
970 | | static constexpr bool is_modulo = false; |
971 | | |
972 | | static constexpr bool traps = true; |
973 | | static constexpr bool tinyness_before = false; |
974 | | static constexpr float_round_style round_style = round_to_nearest; |
975 | | }; |
976 | | |
977 | | } // namespace std |
978 | | |
979 | | //---------- |
980 | | // Debugging |
981 | | //---------- |
982 | | |
983 | | IMATH_EXPORT void printBits (std::ostream& os, IMATH_INTERNAL_NAMESPACE::half h); |
984 | | IMATH_EXPORT void printBits (std::ostream& os, float f); |
985 | | IMATH_EXPORT void printBits (char c[19], IMATH_INTERNAL_NAMESPACE::half h); |
986 | | IMATH_EXPORT void printBits (char c[35], float f); |
987 | | |
988 | | #if !defined(__CUDACC__) && !defined(__CUDA_FP16_HPP__) && !defined(__HIP__) |
989 | | using half = IMATH_INTERNAL_NAMESPACE::half; |
990 | | #elif defined(__CUDACC__) || defined(__CUDA_FP16_HPP__) |
991 | | #include <cuda_fp16.h> |
992 | | #elif defined(__HIP__) |
993 | | #include <hip/amd_detail/amd_hip_fp16.h> |
994 | | #endif |
995 | | |
996 | | #endif // __cplusplus |
997 | | |
998 | | #endif // IMATH_HALF_H_ |