/src/freeimage-svn/FreeImage/trunk/Source/OpenEXR/Half/half.h

Source
///////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
// Digital Ltd. LLC
// 
// All rights reserved.
// 
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
// *       Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// *       Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// *       Neither the name of Industrial Light & Magic nor the names of
// its contributors may be used to endorse or promote products derived
// from this software without specific prior written permission. 
// 
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////

// Primary authors:
//     Florian Kainz <kainz@ilm.com>
//     Rod Bogart <rgb@ilm.com>

//---------------------------------------------------------------------------
//
//  half -- a 16-bit floating point number class:
//
//  Type half can represent positive and negative numbers whose
//  magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative
//  error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
//  with an absolute error of 6.0e-8.  All integers from -2048 to
//  +2048 can be represented exactly.
//
//  Type half behaves (almost) like the built-in C++ floating point
//  types.  In arithmetic expressions, half, float and double can be
//  mixed freely.  Here are a few examples:
//
//      half a (3.5);
//      float b (a + sqrt (a));
//      a += b;
//      b += a;
//      b = a + 7;
//
//  Conversions from half to float are lossless; all half numbers
//  are exactly representable as floats.
//
//  Conversions from float to half may not preserve a float's value
//  exactly.  If a float is not representable as a half, then the
//  float value is rounded to the nearest representable half.  If a
//  float value is exactly in the middle between the two closest
//  representable half values, then the float value is rounded to
//  the closest half whose least significant bit is zero.
//
//  Overflows during float-to-half conversions cause arithmetic
//  exceptions.  An overflow occurs when the float value to be
//  converted is too large to be represented as a half, or if the
//  float value is an infinity or a NAN.
//
//  The implementation of type half makes the following assumptions
//  about the implementation of the built-in C++ types:
//
//      float is an IEEE 754 single-precision number
//      sizeof (float) == 4
//      sizeof (unsigned int) == sizeof (float)
//      alignof (unsigned int) == alignof (float)
//      sizeof (unsigned short) == 2
//
//---------------------------------------------------------------------------

#ifndef _HALF_H_
#define _HALF_H_

#include "halfExport.h"    // for definition of HALF_EXPORT
#include <iostream>

class half
{
  public:

    //-------------
    // Constructors
    //-------------

    half ();      // no initialization
    half (float f);


    //--------------------
    // Conversion to float
    //--------------------

    operator    float () const;


    //------------
    // Unary minus
    //------------

    half    operator - () const;


    //-----------
    // Assignment
    //-----------

    half &    operator = (half  h);
    half &    operator = (float f);

    half &    operator += (half  h);
    half &    operator += (float f);

    half &    operator -= (half  h);
    half &    operator -= (float f);

    half &    operator *= (half  h);
    half &    operator *= (float f);

    half &    operator /= (half  h);
    half &    operator /= (float f);


    //---------------------------------------------------------
    // Round to n-bit precision (n should be between 0 and 10).
    // After rounding, the significand's 10-n least significant
    // bits will be zero.
    //---------------------------------------------------------

    half    round (unsigned int n) const;


    //--------------------------------------------------------------------
    // Classification:
    //
    //  h.isFinite()    returns true if h is a normalized number,
    //        a denormalized number or zero
    //
    //  h.isNormalized()  returns true if h is a normalized number
    //
    //  h.isDenormalized()  returns true if h is a denormalized number
    //
    //  h.isZero()    returns true if h is zero
    //
    //  h.isNan()   returns true if h is a NAN
    //
    //  h.isInfinity()    returns true if h is a positive
    //        or a negative infinity
    //
    //  h.isNegative()    returns true if the sign bit of h
    //        is set (negative)
    //--------------------------------------------------------------------

    bool    isFinite () const;
    bool    isNormalized () const;
    bool    isDenormalized () const;
    bool    isZero () const;
    bool    isNan () const;
    bool    isInfinity () const;
    bool    isNegative () const;


    //--------------------------------------------
    // Special values
    //
    //  posInf()  returns +infinity
    //
    //  negInf()  returns -infinity
    //
    //  qNan()    returns a NAN with the bit
    //      pattern 0111111111111111
    //
    //  sNan()    returns a NAN with the bit
    //      pattern 0111110111111111
    //--------------------------------------------

    static half   posInf ();
    static half   negInf ();
    static half   qNan ();
    static half   sNan ();


    //--------------------------------------
    // Access to the internal representation
    //--------------------------------------

    HALF_EXPORT unsigned short  bits () const;
    HALF_EXPORT void    setBits (unsigned short bits);


  public:

    union uif
    {
  unsigned int  i;
  float   f;
    };

  private:

    HALF_EXPORT static short                  convert (int i);
    HALF_EXPORT static float                  overflow ();

    unsigned short                            _h;

    HALF_EXPORT static const uif              _toFloat[1 << 16];
    HALF_EXPORT static const unsigned short   _eLut[1 << 9];
};



//-----------
// Stream I/O
//-----------

HALF_EXPORT std::ostream &      operator << (std::ostream &os, half  h);
HALF_EXPORT std::istream &      operator >> (std::istream &is, half &h);


//----------
// Debugging
//----------

HALF_EXPORT void        printBits   (std::ostream &os, half  h);
HALF_EXPORT void        printBits   (std::ostream &os, float f);
HALF_EXPORT void        printBits   (char  c[19], half  h);
HALF_EXPORT void        printBits   (char  c[35], float f);


//-------------------------------------------------------------------------
// Limits
//
// Visual C++ will complain if HALF_MIN, HALF_NRM_MIN etc. are not float
// constants, but at least one other compiler (gcc 2.96) produces incorrect
// results if they are.
//-------------------------------------------------------------------------

#if (defined _WIN32 || defined _WIN64) && defined _MSC_VER

  #define HALF_MIN  5.96046448e-08f // Smallest positive half

  #define HALF_NRM_MIN  6.10351562e-05f // Smallest positive normalized half

  #define HALF_MAX  65504.0f  // Largest positive half

  #define HALF_EPSILON  0.00097656f // Smallest positive e for which
          // half (1.0 + e) != half (1.0)
#else

  #define HALF_MIN  5.96046448e-08  // Smallest positive half

  #define HALF_NRM_MIN  6.10351562e-05  // Smallest positive normalized half

  #define HALF_MAX  65504.0    // Largest positive half

  #define HALF_EPSILON  0.00097656  // Smallest positive e for which
          // half (1.0 + e) != half (1.0)
#endif


#define HALF_MANT_DIG 11    // Number of digits in mantissa
          // (significand + hidden leading 1)

#define HALF_DIG  2   // Number of base 10 digits that
          // can be represented without change

#define HALF_RADIX  2   // Base of the exponent

#define HALF_MIN_EXP  -13   // Minimum negative integer such that
          // HALF_RADIX raised to the power of
          // one less than that integer is a
          // normalized half

#define HALF_MAX_EXP  16    // Maximum positive integer such that
          // HALF_RADIX raised to the power of
          // one less than that integer is a
          // normalized half

#define HALF_MIN_10_EXP -4    // Minimum positive integer such
          // that 10 raised to that power is
          // a normalized half

#define HALF_MAX_10_EXP 4   // Maximum positive integer such
          // that 10 raised to that power is
          // a normalized half


//---------------------------------------------------------------------------
//
// Implementation --
//
// Representation of a float:
//
//  We assume that a float, f, is an IEEE 754 single-precision
//  floating point number, whose bits are arranged as follows:
//
//      31 (msb)
//      | 
//      | 30     23
//      | |      | 
//      | |      | 22                    0 (lsb)
//      | |      | |                     |
//      X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
//
//      s e        m
//
//  S is the sign-bit, e is the exponent and m is the significand.
//
//  If e is between 1 and 254, f is a normalized number:
//
//              s    e-127
//      f = (-1)  * 2      * 1.m
//
//  If e is 0, and m is not zero, f is a denormalized number:
//
//              s    -126
//      f = (-1)  * 2      * 0.m
//
//  If e and m are both zero, f is zero:
//
//      f = 0.0
//
//  If e is 255, f is an "infinity" or "not a number" (NAN),
//  depending on whether m is zero or not.
//
//  Examples:
//
//      0 00000000 00000000000000000000000 = 0.0
//      0 01111110 00000000000000000000000 = 0.5
//      0 01111111 00000000000000000000000 = 1.0
//      0 10000000 00000000000000000000000 = 2.0
//      0 10000000 10000000000000000000000 = 3.0
//      1 10000101 11110000010000000000000 = -124.0625
//      0 11111111 00000000000000000000000 = +infinity
//      1 11111111 00000000000000000000000 = -infinity
//      0 11111111 10000000000000000000000 = NAN
//      1 11111111 11111111111111111111111 = NAN
//
// Representation of a half:
//
//  Here is the bit-layout for a half number, h:
//
//      15 (msb)
//      | 
//      | 14  10
//      | |   |
//      | |   | 9        0 (lsb)
//      | |   | |        |
//      X XXXXX XXXXXXXXXX
//
//      s e     m
//
//  S is the sign-bit, e is the exponent and m is the significand.
//
//  If e is between 1 and 30, h is a normalized number:
//
//              s    e-15
//      h = (-1)  * 2     * 1.m
//
//  If e is 0, and m is not zero, h is a denormalized number:
//
//              S    -14
//      h = (-1)  * 2     * 0.m
//
//  If e and m are both zero, h is zero:
//
//      h = 0.0
//
//  If e is 31, h is an "infinity" or "not a number" (NAN),
//  depending on whether m is zero or not.
//
//  Examples:
//
//      0 00000 0000000000 = 0.0
//      0 01110 0000000000 = 0.5
//      0 01111 0000000000 = 1.0
//      0 10000 0000000000 = 2.0
//      0 10000 1000000000 = 3.0
//      1 10101 1111000001 = -124.0625
//      0 11111 0000000000 = +infinity
//      1 11111 0000000000 = -infinity
//      0 11111 1000000000 = NAN
//      1 11111 1111111111 = NAN
//
// Conversion:
//
//  Converting from a float to a half requires some non-trivial bit
//  manipulations.  In some cases, this makes conversion relatively
//  slow, but the most common case is accelerated via table lookups.
//
//  Converting back from a half to a float is easier because we don't
//  have to do any rounding.  In addition, there are only 65536
//  different half numbers; we can convert each of those numbers once
//  and store the results in a table.  Later, all conversions can be
//  done using only simple table lookups.
//
//---------------------------------------------------------------------------


//--------------------
// Simple constructors
//--------------------

inline
half::half ()
{
    // no initialization
}


//----------------------------
// Half-from-float constructor
//----------------------------

inline
half::half (float f)
{
    uif x;

    x.f = f;

    if (f == 0)
    {
  //
  // Common special case - zero.
  // Preserve the zero's sign bit.
  //

  _h = (x.i >> 16);
    }
    else
    {
  //
  // We extract the combined sign and exponent, e, from our
  // floating-point number, f.  Then we convert e to the sign
  // and exponent of the half number via a table lookup.
  //
  // For the most common case, where a normalized half is produced,
  // the table lookup returns a non-zero value; in this case, all
  // we have to do is round f's significand to 10 bits and combine
  // the result with e.
  //
  // For all other cases (overflow, zeroes, denormalized numbers
  // resulting from underflow, infinities and NANs), the table
  // lookup returns zero, and we call a longer, non-inline function
  // to do the float-to-half conversion.
  //

  register int e = (x.i >> 23) & 0x000001ff;

  e = _eLut[e];

  if (e)
  {
      //
      // Simple case - round the significand, m, to 10
      // bits and combine it with the sign and exponent.
      //

      register int m = x.i & 0x007fffff;
      _h = e + ((m + 0x00000fff + ((m >> 13) & 1)) >> 13);
  }
  else
  {
      //
      // Difficult case - call a function.
      //

      _h = convert (x.i);
  }
    }
}


//------------------------------------------
// Half-to-float conversion via table lookup
//------------------------------------------

inline
half::operator float () const
{
    return _toFloat[_h].f;
}


//-------------------------
// Round to n-bit precision
//-------------------------

inline half
half::round (unsigned int n) const
{
    //
    // Parameter check.
    //

    if (n >= 10)
  return *this;

    //
    // Disassemble h into the sign, s,
    // and the combined exponent and significand, e.
    //

    unsigned short s = _h & 0x8000;
    unsigned short e = _h & 0x7fff;

    //
    // Round the exponent and significand to the nearest value
    // where ones occur only in the (10-n) most significant bits.
    // Note that the exponent adjusts automatically if rounding
    // up causes the significand to overflow.
    //

    e >>= 9 - n;
    e  += e & 1;
    e <<= 9 - n;

    //
    // Check for exponent overflow.
    //

    if (e >= 0x7c00)
    {
  //
  // Overflow occurred -- truncate instead of rounding.
  //

  e = _h;
  e >>= 10 - n;
  e <<= 10 - n;
    }

    //
    // Put the original sign bit back.
    //

    half h;
    h._h = s | e;

    return h;
}


//-----------------------
// Other inline functions
//-----------------------

inline half 
half::operator - () const
{
    half h;
    h._h = _h ^ 0x8000;
    return h;
}


inline half &
half::operator = (half h)
{
    _h = h._h;
    return *this;
}


inline half &
half::operator = (float f)
{
    *this = half (f);
    return *this;
}


inline half &
half::operator += (half h)
{
    *this = half (float (*this) + float (h));
    return *this;
}


inline half &
half::operator += (float f)
{
    *this = half (float (*this) + f);
    return *this;
}


inline half &
half::operator -= (half h)
{
    *this = half (float (*this) - float (h));
    return *this;
}


inline half &
half::operator -= (float f)
{
    *this = half (float (*this) - f);
    return *this;
}


inline half &
half::operator *= (half h)
{
    *this = half (float (*this) * float (h));
    return *this;
}


inline half &
half::operator *= (float f)
{
    *this = half (float (*this) * f);
    return *this;
}


inline half &
half::operator /= (half h)
{
    *this = half (float (*this) / float (h));
    return *this;
}


inline half &
half::operator /= (float f)
{
    *this = half (float (*this) / f);
    return *this;
}


inline bool 
half::isFinite () const
{
    unsigned short e = (_h >> 10) & 0x001f;
    return e < 31;
}


inline bool
half::isNormalized () const
{
    unsigned short e = (_h >> 10) & 0x001f;
    return e > 0 && e < 31;
}


inline bool
half::isDenormalized () const
{
    unsigned short e = (_h >> 10) & 0x001f;
    unsigned short m =  _h & 0x3ff;
    return e == 0 && m != 0;
}


inline bool
half::isZero () const
{
    return (_h & 0x7fff) == 0;
}


inline bool
half::isNan () const
{
    unsigned short e = (_h >> 10) & 0x001f;
    unsigned short m =  _h & 0x3ff;
    return e == 31 && m != 0;
}


inline bool
half::isInfinity () const
{
    unsigned short e = (_h >> 10) & 0x001f;
    unsigned short m =  _h & 0x3ff;
    return e == 31 && m == 0;
}


inline bool 
half::isNegative () const
{
    return (_h & 0x8000) != 0;
}


inline half
half::posInf ()
{
    half h;
    h._h = 0x7c00;
    return h;
}


inline half
half::negInf ()
{
    half h;
    h._h = 0xfc00;
    return h;
}


inline half
half::qNan ()
{
    half h;
    h._h = 0x7fff;
    return h;
}


inline half
half::sNan ()
{
    half h;
    h._h = 0x7dff;
    return h;
}


inline unsigned short
half::bits () const
{
    return _h;
}


inline void
half::setBits (unsigned short bits)
{
    _h = bits;
}

#endif

Coverage Report

Created: 2025-10-28 06:24

Line	Count	Source
1		///////////////////////////////////////////////////////////////////////////
2		//
3		// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
4		// Digital Ltd. LLC
5		//
6		// All rights reserved.
7		//
8		// Redistribution and use in source and binary forms, with or without
9		// modification, are permitted provided that the following conditions are
10		// met:
11		// * Redistributions of source code must retain the above copyright
12		// notice, this list of conditions and the following disclaimer.
13		// * Redistributions in binary form must reproduce the above
14		// copyright notice, this list of conditions and the following disclaimer
15		// in the documentation and/or other materials provided with the
16		// distribution.
17		// * Neither the name of Industrial Light & Magic nor the names of
18		// its contributors may be used to endorse or promote products derived
19		// from this software without specific prior written permission.
20		//
21		// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22		// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23		// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24		// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25		// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26		// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27		// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28		// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29		// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30		// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31		// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32		//
33		///////////////////////////////////////////////////////////////////////////
34
35		// Primary authors:
36		// Florian Kainz <kainz@ilm.com>
37		// Rod Bogart <rgb@ilm.com>
38
39		//---------------------------------------------------------------------------
40		//
41		// half -- a 16-bit floating point number class:
42		//
43		// Type half can represent positive and negative numbers whose
44		// magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative
45		// error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
46		// with an absolute error of 6.0e-8. All integers from -2048 to
47		// +2048 can be represented exactly.
48		//
49		// Type half behaves (almost) like the built-in C++ floating point
50		// types. In arithmetic expressions, half, float and double can be
51		// mixed freely. Here are a few examples:
52		//
53		// half a (3.5);
54		// float b (a + sqrt (a));
55		// a += b;
56		// b += a;
57		// b = a + 7;
58		//
59		// Conversions from half to float are lossless; all half numbers
60		// are exactly representable as floats.
61		//
62		// Conversions from float to half may not preserve a float's value
63		// exactly. If a float is not representable as a half, then the
64		// float value is rounded to the nearest representable half. If a
65		// float value is exactly in the middle between the two closest
66		// representable half values, then the float value is rounded to
67		// the closest half whose least significant bit is zero.
68		//
69		// Overflows during float-to-half conversions cause arithmetic
70		// exceptions. An overflow occurs when the float value to be
71		// converted is too large to be represented as a half, or if the
72		// float value is an infinity or a NAN.
73		//
74		// The implementation of type half makes the following assumptions
75		// about the implementation of the built-in C++ types:
76		//
77		// float is an IEEE 754 single-precision number
78		// sizeof (float) == 4
79		// sizeof (unsigned int) == sizeof (float)
80		// alignof (unsigned int) == alignof (float)
81		// sizeof (unsigned short) == 2
82		//
83		//---------------------------------------------------------------------------
84
85		#ifndef _HALF_H_
86		#define _HALF_H_
87
88		#include "halfExport.h" // for definition of HALF_EXPORT
89		#include <iostream>
90
91		class half
92		{
93		public:
94
95		//-------------
96		// Constructors
97		//-------------
98
99		half (); // no initialization
100		half (float f);
101
102
103		//--------------------
104		// Conversion to float
105		//--------------------
106
107		operator float () const;
108
109
110		//------------
111		// Unary minus
112		//------------
113
114		half operator - () const;
115
116
117		//-----------
118		// Assignment
119		//-----------
120
121		half & operator = (half h);
122		half & operator = (float f);
123
124		half & operator += (half h);
125		half & operator += (float f);
126
127		half & operator -= (half h);
128		half & operator -= (float f);
129
130		half & operator *= (half h);
131		half & operator *= (float f);
132
133		half & operator /= (half h);
134		half & operator /= (float f);
135
136
137		//---------------------------------------------------------
138		// Round to n-bit precision (n should be between 0 and 10).
139		// After rounding, the significand's 10-n least significant
140		// bits will be zero.
141		//---------------------------------------------------------
142
143		half round (unsigned int n) const;
144
145
146		//--------------------------------------------------------------------
147		// Classification:
148		//
149		// h.isFinite() returns true if h is a normalized number,
150		// a denormalized number or zero
151		//
152		// h.isNormalized() returns true if h is a normalized number
153		//
154		// h.isDenormalized() returns true if h is a denormalized number
155		//
156		// h.isZero() returns true if h is zero
157		//
158		// h.isNan() returns true if h is a NAN
159		//
160		// h.isInfinity() returns true if h is a positive
161		// or a negative infinity
162		//
163		// h.isNegative() returns true if the sign bit of h
164		// is set (negative)
165		//--------------------------------------------------------------------
166
167		bool isFinite () const;
168		bool isNormalized () const;
169		bool isDenormalized () const;
170		bool isZero () const;
171		bool isNan () const;
172		bool isInfinity () const;
173		bool isNegative () const;
174
175
176		//--------------------------------------------
177		// Special values
178		//
179		// posInf() returns +infinity
180		//
181		// negInf() returns -infinity
182		//
183		// qNan() returns a NAN with the bit
184		// pattern 0111111111111111
185		//
186		// sNan() returns a NAN with the bit
187		// pattern 0111110111111111
188		//--------------------------------------------
189
190		static half posInf ();
191		static half negInf ();
192		static half qNan ();
193		static half sNan ();
194
195
196		//--------------------------------------
197		// Access to the internal representation
198		//--------------------------------------
199
200		HALF_EXPORT unsigned short bits () const;
201		HALF_EXPORT void setBits (unsigned short bits);
202
203
204		public:
205
206		union uif
207		{
208		unsigned int i;
209		float f;
210		};
211
212		private:
213
214		HALF_EXPORT static short convert (int i);
215		HALF_EXPORT static float overflow ();
216
217		unsigned short _h;
218
219		HALF_EXPORT static const uif _toFloat[1 << 16];
220		HALF_EXPORT static const unsigned short _eLut[1 << 9];
221		};
222
223
224
225		//-----------
226		// Stream I/O
227		//-----------
228
229		HALF_EXPORT std::ostream & operator << (std::ostream &os, half h);
230		HALF_EXPORT std::istream & operator >> (std::istream &is, half &h);
231
232
233		//----------
234		// Debugging
235		//----------
236
237		HALF_EXPORT void printBits (std::ostream &os, half h);
238		HALF_EXPORT void printBits (std::ostream &os, float f);
239		HALF_EXPORT void printBits (char c[19], half h);
240		HALF_EXPORT void printBits (char c[35], float f);
241
242
243		//-------------------------------------------------------------------------
244		// Limits
245		//
246		// Visual C++ will complain if HALF_MIN, HALF_NRM_MIN etc. are not float
247		// constants, but at least one other compiler (gcc 2.96) produces incorrect
248		// results if they are.
249		//-------------------------------------------------------------------------
250
251		#if (defined _WIN32 \|\| defined _WIN64) && defined _MSC_VER
252
253		#define HALF_MIN 5.96046448e-08f // Smallest positive half
254
255		#define HALF_NRM_MIN 6.10351562e-05f // Smallest positive normalized half
256
257		#define HALF_MAX 65504.0f // Largest positive half
258
259		#define HALF_EPSILON 0.00097656f // Smallest positive e for which
260		// half (1.0 + e) != half (1.0)
261		#else
262
263		#define HALF_MIN 5.96046448e-08 // Smallest positive half
264
265		#define HALF_NRM_MIN 6.10351562e-05 // Smallest positive normalized half
266
267	0	#define HALF_MAX 65504.0 // Largest positive half
268
269		#define HALF_EPSILON 0.00097656 // Smallest positive e for which
270		// half (1.0 + e) != half (1.0)
271		#endif
272
273
274		#define HALF_MANT_DIG 11 // Number of digits in mantissa
275		// (significand + hidden leading 1)
276
277		#define HALF_DIG 2 // Number of base 10 digits that
278		// can be represented without change
279
280		#define HALF_RADIX 2 // Base of the exponent
281
282		#define HALF_MIN_EXP -13 // Minimum negative integer such that
283		// HALF_RADIX raised to the power of
284		// one less than that integer is a
285		// normalized half
286
287		#define HALF_MAX_EXP 16 // Maximum positive integer such that
288		// HALF_RADIX raised to the power of
289		// one less than that integer is a
290		// normalized half
291
292		#define HALF_MIN_10_EXP -4 // Minimum positive integer such
293		// that 10 raised to that power is
294		// a normalized half
295
296		#define HALF_MAX_10_EXP 4 // Maximum positive integer such
297		// that 10 raised to that power is
298		// a normalized half
299
300
301		//---------------------------------------------------------------------------
302		//
303		// Implementation --
304		//
305		// Representation of a float:
306		//
307		// We assume that a float, f, is an IEEE 754 single-precision
308		// floating point number, whose bits are arranged as follows:
309		//
310		// 31 (msb)
311		// \|
312		// \| 30 23
313		// \| \| \|
314		// \| \| \| 22 0 (lsb)
315		// \| \| \| \| \|
316		// X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
317		//
318		// s e m
319		//
320		// S is the sign-bit, e is the exponent and m is the significand.
321		//
322		// If e is between 1 and 254, f is a normalized number:
323		//
324		// s e-127
325		// f = (-1) * 2 * 1.m
326		//
327		// If e is 0, and m is not zero, f is a denormalized number:
328		//
329		// s -126
330		// f = (-1) * 2 * 0.m
331		//
332		// If e and m are both zero, f is zero:
333		//
334		// f = 0.0
335		//
336		// If e is 255, f is an "infinity" or "not a number" (NAN),
337		// depending on whether m is zero or not.
338		//
339		// Examples:
340		//
341		// 0 00000000 00000000000000000000000 = 0.0
342		// 0 01111110 00000000000000000000000 = 0.5
343		// 0 01111111 00000000000000000000000 = 1.0
344		// 0 10000000 00000000000000000000000 = 2.0
345		// 0 10000000 10000000000000000000000 = 3.0
346		// 1 10000101 11110000010000000000000 = -124.0625
347		// 0 11111111 00000000000000000000000 = +infinity
348		// 1 11111111 00000000000000000000000 = -infinity
349		// 0 11111111 10000000000000000000000 = NAN
350		// 1 11111111 11111111111111111111111 = NAN
351		//
352		// Representation of a half:
353		//
354		// Here is the bit-layout for a half number, h:
355		//
356		// 15 (msb)
357		// \|
358		// \| 14 10
359		// \| \| \|
360		// \| \| \| 9 0 (lsb)
361		// \| \| \| \| \|
362		// X XXXXX XXXXXXXXXX
363		//
364		// s e m
365		//
366		// S is the sign-bit, e is the exponent and m is the significand.
367		//
368		// If e is between 1 and 30, h is a normalized number:
369		//
370		// s e-15
371		// h = (-1) * 2 * 1.m
372		//
373		// If e is 0, and m is not zero, h is a denormalized number:
374		//
375		// S -14
376		// h = (-1) * 2 * 0.m
377		//
378		// If e and m are both zero, h is zero:
379		//
380		// h = 0.0
381		//
382		// If e is 31, h is an "infinity" or "not a number" (NAN),
383		// depending on whether m is zero or not.
384		//
385		// Examples:
386		//
387		// 0 00000 0000000000 = 0.0
388		// 0 01110 0000000000 = 0.5
389		// 0 01111 0000000000 = 1.0
390		// 0 10000 0000000000 = 2.0
391		// 0 10000 1000000000 = 3.0
392		// 1 10101 1111000001 = -124.0625
393		// 0 11111 0000000000 = +infinity
394		// 1 11111 0000000000 = -infinity
395		// 0 11111 1000000000 = NAN
396		// 1 11111 1111111111 = NAN
397		//
398		// Conversion:
399		//
400		// Converting from a float to a half requires some non-trivial bit
401		// manipulations. In some cases, this makes conversion relatively
402		// slow, but the most common case is accelerated via table lookups.
403		//
404		// Converting back from a half to a float is easier because we don't
405		// have to do any rounding. In addition, there are only 65536
406		// different half numbers; we can convert each of those numbers once
407		// and store the results in a table. Later, all conversions can be
408		// done using only simple table lookups.
409		//
410		//---------------------------------------------------------------------------
411
412
413		//--------------------
414		// Simple constructors
415		//--------------------
416
417		inline
418		half::half ()
419	0	{
420		// no initialization
421	0	}
422
423
424		//----------------------------
425		// Half-from-float constructor
426		//----------------------------
427
428		inline
429		half::half (float f)
430	0	{
431	0	uif x;
432
433	0	x.f = f;
434
435	0	if (f == 0)
436	0	{
437		//
438		// Common special case - zero.
439		// Preserve the zero's sign bit.
440		//
441
442	0	_h = (x.i >> 16);
443	0	}
444	0	else
445	0	{
446		//
447		// We extract the combined sign and exponent, e, from our
448		// floating-point number, f. Then we convert e to the sign
449		// and exponent of the half number via a table lookup.
450		//
451		// For the most common case, where a normalized half is produced,
452		// the table lookup returns a non-zero value; in this case, all
453		// we have to do is round f's significand to 10 bits and combine
454		// the result with e.
455		//
456		// For all other cases (overflow, zeroes, denormalized numbers
457		// resulting from underflow, infinities and NANs), the table
458		// lookup returns zero, and we call a longer, non-inline function
459		// to do the float-to-half conversion.
460		//
461
462	0	register int e = (x.i >> 23) & 0x000001ff;
463
464	0	e = _eLut[e];
465
466	0	if (e)
467	0	{
468		//
469		// Simple case - round the significand, m, to 10
470		// bits and combine it with the sign and exponent.
471		//
472
473	0	register int m = x.i & 0x007fffff;
474	0	_h = e + ((m + 0x00000fff + ((m >> 13) & 1)) >> 13);
475	0	}
476	0	else
477	0	{
478		//
479		// Difficult case - call a function.
480		//
481
482	0	_h = convert (x.i);
483	0	}
484	0	}
485	0	}
486
487
488		//------------------------------------------
489		// Half-to-float conversion via table lookup
490		//------------------------------------------
491
492		inline
493		half::operator float () const
494	0	{
495	0	return _toFloat[_h].f;
496	0	}
497
498
499		//-------------------------
500		// Round to n-bit precision
501		//-------------------------
502
503		inline half
504		half::round (unsigned int n) const
505	0	{
506		//
507		// Parameter check.
508		//
509
510	0	if (n >= 10)
511	0	return *this;
512
513		//
514		// Disassemble h into the sign, s,
515		// and the combined exponent and significand, e.
516		//
517
518	0	unsigned short s = _h & 0x8000;
519	0	unsigned short e = _h & 0x7fff;
520
521		//
522		// Round the exponent and significand to the nearest value
523		// where ones occur only in the (10-n) most significant bits.
524		// Note that the exponent adjusts automatically if rounding
525		// up causes the significand to overflow.
526		//
527
528	0	e >>= 9 - n;
529	0	e += e & 1;
530	0	e <<= 9 - n;
531
532		//
533		// Check for exponent overflow.
534		//
535
536	0	if (e >= 0x7c00)
537	0	{
538		//
539		// Overflow occurred -- truncate instead of rounding.
540		//
541
542	0	e = _h;
543	0	e >>= 10 - n;
544	0	e <<= 10 - n;
545	0	}
546
547		//
548		// Put the original sign bit back.
549		//
550
551	0	half h;
552	0	h._h = s \| e;
553
554	0	return h;
555	0	}
556
557
558		//-----------------------
559		// Other inline functions
560		//-----------------------
561
562		inline half
563		half::operator - () const
564	0	{
565	0	half h;
566	0	h._h = _h ^ 0x8000;
567	0	return h;
568	0	}
569
570
571		inline half &
572		half::operator = (half h)
573	0	{
574	0	_h = h._h;
575	0	return *this;
576	0	}
577
578
579		inline half &
580		half::operator = (float f)
581	0	{
582	0	*this = half (f);
583	0	return *this;
584	0	}
585
586
587		inline half &
588		half::operator += (half h)
589	0	{
590	0	this = half (float (this) + float (h));
591	0	return *this;
592	0	}
593
594
595		inline half &
596		half::operator += (float f)
597	0	{
598	0	this = half (float (this) + f);
599	0	return *this;
600	0	}
601
602
603		inline half &
604		half::operator -= (half h)
605	0	{
606	0	this = half (float (this) - float (h));
607	0	return *this;
608	0	}
609
610
611		inline half &
612		half::operator -= (float f)
613	0	{
614	0	this = half (float (this) - f);
615	0	return *this;
616	0	}
617
618
619		inline half &
620		half::operator *= (half h)
621	0	{
622	0	this = half (float (this) * float (h));
623	0	return *this;
624	0	}
625
626
627		inline half &
628		half::operator *= (float f)
629	0	{
630	0	this = half (float (this) * f);
631	0	return *this;
632	0	}
633
634
635		inline half &
636		half::operator /= (half h)
637	0	{
638	0	this = half (float (this) / float (h));
639	0	return *this;
640	0	}
641
642
643		inline half &
644		half::operator /= (float f)
645	0	{
646	0	this = half (float (this) / f);
647	0	return *this;
648	0	}
649
650
651		inline bool
652		half::isFinite () const
653	0	{
654	0	unsigned short e = (_h >> 10) & 0x001f;
655	0	return e < 31;
656	0	}
657
658
659		inline bool
660		half::isNormalized () const
661	0	{
662	0	unsigned short e = (_h >> 10) & 0x001f;
663	0	return e > 0 && e < 31;
664	0	}
665
666
667		inline bool
668		half::isDenormalized () const
669	0	{
670	0	unsigned short e = (_h >> 10) & 0x001f;
671	0	unsigned short m = _h & 0x3ff;
672	0	return e == 0 && m != 0;
673	0	}
674
675
676		inline bool
677		half::isZero () const
678	0	{
679	0	return (_h & 0x7fff) == 0;
680	0	}
681
682
683		inline bool
684		half::isNan () const
685	0	{
686	0	unsigned short e = (_h >> 10) & 0x001f;
687	0	unsigned short m = _h & 0x3ff;
688	0	return e == 31 && m != 0;
689	0	}
690
691
692		inline bool
693		half::isInfinity () const
694	0	{
695	0	unsigned short e = (_h >> 10) & 0x001f;
696	0	unsigned short m = _h & 0x3ff;
697	0	return e == 31 && m == 0;
698	0	}
699
700
701		inline bool
702		half::isNegative () const
703	0	{
704	0	return (_h & 0x8000) != 0;
705	0	}
706
707
708		inline half
709		half::posInf ()
710	0	{
711	0	half h;
712	0	h._h = 0x7c00;
713	0	return h;
714	0	}
715
716
717		inline half
718		half::negInf ()
719	0	{
720	0	half h;
721	0	h._h = 0xfc00;
722	0	return h;
723	0	}
724
725
726		inline half
727		half::qNan ()
728	0	{
729	0	half h;
730	0	h._h = 0x7fff;
731	0	return h;
732	0	}
733
734
735		inline half
736		half::sNan ()
737	0	{
738	0	half h;
739	0	h._h = 0x7dff;
740	0	return h;
741	0	}
742
743
744		inline unsigned short
745		half::bits () const
746	0	{
747	0	return _h;
748	0	}
749
750
751		inline void
752		half::setBits (unsigned short bits)
753	0	{
754	0	_h = bits;
755	0	}
756
757		#endif