Coverage Report

Created: 2026-06-04 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/astc-encoder/Source/astcenc_mathlib_softfloat.cpp
Line
Count
Source
1
// SPDX-License-Identifier: Apache-2.0
2
// ----------------------------------------------------------------------------
3
// Copyright 2011-2021,2025 Arm Limited
4
//
5
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6
// use this file except in compliance with the License. You may obtain a copy
7
// of the License at:
8
//
9
//     http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14
// License for the specific language governing permissions and limitations
15
// under the License.
16
// ----------------------------------------------------------------------------
17
18
/**
19
 * @brief Soft-float library for IEEE-754.
20
 */
21
#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
22
23
#include "astcenc_mathlib.h"
24
25
/*  sized soft-float types. These are mapped to the sized integer
26
    types of C99, instead of C's floating-point types; this is because
27
    the library needs to maintain exact, bit-level control on all
28
    operations on these data types. */
29
typedef uint16_t sf16;
30
typedef uint32_t sf32;
31
32
/******************************************
33
  helper functions
34
 ******************************************/
35
36
/* Idiomatic count-leading zeros, generates native instruction on modern compilers. */
37
static uint32_t clz32(uint32_t inp)
38
4.25k
{
39
4.25k
    uint32_t count = 32;
40
42.0k
    while (inp)
41
37.7k
    {
42
37.7k
        inp >>= 1;
43
37.7k
        count--;
44
37.7k
    }
45
4.25k
    return count;
46
4.25k
}
47
48
/* the five rounding modes that IEEE-754r defines */
49
typedef enum
50
{
51
  SF_UP = 0,        /* round towards positive infinity */
52
  SF_DOWN = 1,      /* round towards negative infinity */
53
  SF_TOZERO = 2,      /* round towards zero */
54
  SF_NEARESTEVEN = 3,   /* round toward nearest value; if mid-between, round to even value */
55
  SF_NEARESTAWAY = 4    /* round toward nearest value; if mid-between, round away from zero */
56
} roundmode;
57
58
59
static uint32_t rtne_shift32(uint32_t inp, uint32_t shamt)
60
0
{
61
0
  uint32_t vl1 = UINT32_C(1) << shamt;
62
0
  uint32_t inp2 = inp + (vl1 >> 1); /* added 0.5 ULP */
63
0
  uint32_t msk = (inp | UINT32_C(1)) & vl1; /* nonzero if odd. '| 1' forces it to 1 if the shamt is 0. */
64
0
  msk--;            /* negative if even, nonnegative if odd. */
65
0
  inp2 -= (msk >> 31);    /* subtract epsilon before shift if even. */
66
0
  inp2 >>= shamt;
67
0
  return inp2;
68
0
}
69
70
static uint32_t rtna_shift32(uint32_t inp, uint32_t shamt)
71
0
{
72
0
  uint32_t vl1 = (UINT32_C(1) << shamt) >> 1;
73
0
  inp += vl1;
74
0
  inp >>= shamt;
75
0
  return inp;
76
0
}
77
78
static uint32_t rtup_shift32(uint32_t inp, uint32_t shamt)
79
0
{
80
0
  uint32_t vl1 = UINT32_C(1) << shamt;
81
0
  inp += vl1;
82
0
  inp--;
83
0
  inp >>= shamt;
84
0
  return inp;
85
0
}
86
87
/* convert from FP16 to FP32. */
88
static sf32 sf16_to_sf32(sf16 inp)
89
817k
{
90
817k
  uint32_t inpx = inp;
91
92
  /*
93
    This table contains, for every FP16 sign/exponent value combination,
94
    the difference between the input FP16 value and the value obtained
95
    by shifting the correct FP32 result right by 13 bits.
96
    This table allows us to handle every case except denormals and NaN
97
    with just 1 table lookup, 2 shifts and 1 add.
98
  */
99
100
4.08M
  #define WITH_MSB(a) (UINT32_C(a) | (1u << 31))
101
817k
  static const uint32_t tbl[64] =
102
817k
  {
103
817k
    WITH_MSB(0x00000), 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000,          0x1C000,
104
817k
             0x1C000,  0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000,          0x1C000,
105
817k
             0x1C000,  0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000,          0x1C000,
106
817k
             0x1C000,  0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, WITH_MSB(0x38000),
107
817k
    WITH_MSB(0x38000), 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000,          0x54000,
108
817k
             0x54000,  0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000,          0x54000,
109
817k
             0x54000,  0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000,          0x54000,
110
817k
             0x54000,  0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, WITH_MSB(0x70000)
111
817k
  };
112
113
817k
  uint32_t res = tbl[inpx >> 10];
114
817k
  res += inpx;
115
116
  /* Normal cases: MSB of 'res' not set. */
117
817k
  if ((res & WITH_MSB(0)) == 0)
118
694k
  {
119
694k
    return res << 13;
120
694k
  }
121
122
  /* Infinity and Zero: 10 LSB of 'res' not set. */
123
122k
  if ((res & 0x3FF) == 0)
124
118k
  {
125
118k
    return res << 13;
126
118k
  }
127
128
  /* NaN: the exponent field of 'inp' is non-zero. */
129
4.39k
  if ((inpx & 0x7C00) != 0)
130
133
  {
131
    /* All NaNs are quietened. */
132
133
    return (res << 13) | 0x400000;
133
133
  }
134
135
  /* Denormal cases */
136
4.25k
  uint32_t sign = (inpx & 0x8000) << 16;
137
4.25k
  uint32_t mskval = inpx & 0x7FFF;
138
4.25k
  uint32_t leadingzeroes = clz32(mskval);
139
4.25k
  mskval <<= leadingzeroes;
140
4.25k
  return (mskval >> 8) + ((0x85 - leadingzeroes) << 23) + sign;
141
4.39k
}
142
143
/* Conversion routine that converts from FP32 to FP16. It supports denormals and all rounding modes. If a NaN is given as input, it is quietened. */
144
static sf16 sf32_to_sf16(sf32 inp, roundmode rmode)
145
24
{
146
  /* for each possible sign/exponent combination, store a case index. This gives a 512-byte table */
147
24
  static const uint8_t tab[512] {
148
24
    0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
149
24
    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
150
24
    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
151
24
    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
152
24
    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
153
24
    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
154
24
    10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
155
24
    20, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
156
24
    30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 40,
157
24
    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
158
24
    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
159
24
    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
160
24
    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
161
24
    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
162
24
    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
163
24
    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 50,
164
165
24
    5, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
166
24
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
167
24
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
168
24
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
169
24
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
170
24
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
171
24
    15, 15, 15, 15, 15, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
172
24
    25, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
173
24
    35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 45,
174
24
    45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
175
24
    45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
176
24
    45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
177
24
    45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
178
24
    45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
179
24
    45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
180
24
    45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 55,
181
24
  };
182
183
  /* many of the cases below use a case-dependent magic constant. So we look up a magic constant before actually performing the switch. This table allows us to group cases, thereby minimizing code
184
     size. */
185
24
  static const uint32_t tabx[60] {
186
24
    UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x80000000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
187
24
    UINT32_C(1), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x8001), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
188
24
    UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
189
24
    UINT32_C(0xC8001FFF), UINT32_C(0xC8000000), UINT32_C(0xC8000000), UINT32_C(0xC8000FFF), UINT32_C(0xC8001000),
190
24
    UINT32_C(0x58000000), UINT32_C(0x38001FFF), UINT32_C(0x58000000), UINT32_C(0x58000FFF), UINT32_C(0x58001000),
191
24
    UINT32_C(0x7C00), UINT32_C(0x7BFF), UINT32_C(0x7BFF), UINT32_C(0x7C00), UINT32_C(0x7C00),
192
24
    UINT32_C(0xFBFF), UINT32_C(0xFC00), UINT32_C(0xFBFF), UINT32_C(0xFC00), UINT32_C(0xFC00),
193
24
    UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000),
194
24
    UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000)
195
24
  };
196
197
24
  uint32_t p;
198
24
  uint32_t idx = rmode + tab[inp >> 23];
199
24
  uint32_t vlx = tabx[idx];
200
24
  switch (idx)
201
24
  {
202
    /*
203
      Positive number which may be Infinity or NaN.
204
      We need to check whether it is NaN; if it is, quieten it by setting the top bit of the mantissa.
205
      (If we don't do this quieting, then a NaN  that is distinguished only by having
206
      its low-order bits set, would be turned into an INF. */
207
0
  case 50:
208
0
  case 51:
209
0
  case 52:
210
0
  case 53:
211
0
  case 54:
212
0
  case 55:
213
0
  case 56:
214
0
  case 57:
215
0
  case 58:
216
0
  case 59:
217
    /*
218
      the input value is 0x7F800000 or 0xFF800000 if it is INF.
219
      By subtracting 1, we get 7F7FFFFF or FF7FFFFF, that is, bit 23 becomes zero.
220
      For NaNs, however, this operation will keep bit 23 with the value 1.
221
      We can then extract bit 23, and logical-OR bit 9 of the result with this
222
      bit in order to quieten the NaN (a Quiet NaN is a NaN where the top bit
223
      of the mantissa is set.)
224
    */
225
0
    p = (inp - 1) & UINT32_C(0x800000); /* zero if INF, nonzero if NaN. */
226
0
    return static_cast<sf16>(((inp + vlx) >> 13) | (p >> 14));
227
    /*
228
      positive, exponent = 0, round-mode == UP; need to check whether number actually is 0.
229
      If it is, then return 0, else return 1 (the smallest representable nonzero number)
230
    */
231
0
  case 0:
232
    /*
233
      -inp will set the MSB if the input number is nonzero.
234
      Thus (-inp) >> 31 will turn into 0 if the input number is 0 and 1 otherwise.
235
    */
236
0
    return static_cast<sf16>(static_cast<uint32_t>((-static_cast<int32_t>(inp))) >> 31);
237
238
    /*
239
      negative, exponent = , round-mode == DOWN, need to check whether number is
240
      actually 0. If it is, return 0x8000 ( float -0.0 )
241
      Else return the smallest negative number ( 0x8001 ) */
242
0
  case 6:
243
    /*
244
      in this case 'vlx' is 0x80000000. By subtracting the input value from it,
245
      we obtain a value that is 0 if the input value is in fact zero and has
246
      the MSB set if it isn't. We then right-shift the value by 31 places to
247
      get a value that is 0 if the input is -0.0 and 1 otherwise.
248
    */
249
0
    return static_cast<sf16>(((vlx - inp) >> 31) + UINT32_C(0x8000));
250
251
    /*
252
      for all other cases involving underflow/overflow, we don't need to
253
      do actual tests; we just return 'vlx'.
254
    */
255
0
  case 1:
256
0
  case 2:
257
14
  case 3:
258
14
  case 4:
259
14
  case 5:
260
14
  case 7:
261
14
  case 8:
262
14
  case 9:
263
14
  case 10:
264
14
  case 11:
265
14
  case 12:
266
14
  case 13:
267
14
  case 14:
268
14
  case 15:
269
14
  case 16:
270
14
  case 17:
271
14
  case 18:
272
14
  case 19:
273
14
  case 40:
274
14
  case 41:
275
14
  case 42:
276
14
  case 43:
277
14
  case 44:
278
14
  case 45:
279
14
  case 46:
280
14
  case 47:
281
14
  case 48:
282
14
  case 49:
283
14
    return static_cast<sf16>(vlx);
284
285
    /*
286
      for normal numbers, 'vlx' is the difference between the FP32 value of a number and the
287
      FP16 representation of the same number left-shifted by 13 places. In addition, a rounding constant is
288
      baked into 'vlx': for rounding-away-from zero, the constant is 2^13 - 1, causing roundoff away
289
      from zero. for round-to-nearest away, the constant is 2^12, causing roundoff away from zero.
290
      for round-to-nearest-even, the constant is 2^12 - 1. This causes correct round-to-nearest-even
291
      except for odd input numbers. For odd input numbers, we need to add 1 to the constant. */
292
293
    /* normal number, all rounding modes except round-to-nearest-even: */
294
0
  case 30:
295
0
  case 31:
296
0
  case 32:
297
0
  case 34:
298
0
  case 35:
299
0
  case 36:
300
0
  case 37:
301
0
  case 39:
302
0
    return static_cast<sf16>((inp + vlx) >> 13);
303
304
    /* normal number, round-to-nearest-even. */
305
10
  case 33:
306
10
  case 38:
307
10
    p = inp + vlx;
308
10
    p += (inp >> 13) & 1;
309
10
    return static_cast<sf16>(p >> 13);
310
311
    /*
312
      the various denormal cases. These are not expected to be common, so their performance is a bit
313
      less important. For each of these cases, we need to extract an exponent and a mantissa
314
      (including the implicit '1'!), and then right-shift the mantissa by a shift-amount that
315
      depends on the exponent. The shift must apply the correct rounding mode. 'vlx' is used to supply the
316
      sign of the resulting denormal number.
317
    */
318
0
  case 21:
319
0
  case 22:
320
0
  case 25:
321
0
  case 27:
322
    /* denormal, round towards zero. */
323
0
    p = 126 - ((inp >> 23) & 0xFF);
324
0
    return static_cast<sf16>((((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000)) >> p) | vlx);
325
0
  case 20:
326
0
  case 26:
327
    /* denormal, round away from zero. */
328
0
    p = 126 - ((inp >> 23) & 0xFF);
329
0
    return static_cast<sf16>(rtup_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx);
330
0
  case 24:
331
0
  case 29:
332
    /* denormal, round to nearest-away */
333
0
    p = 126 - ((inp >> 23) & 0xFF);
334
0
    return static_cast<sf16>(rtna_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx);
335
0
  case 23:
336
0
  case 28:
337
    /* denormal, round to nearest-even. */
338
0
    p = 126 - ((inp >> 23) & 0xFF);
339
0
    return static_cast<sf16>(rtne_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx);
340
24
  }
341
342
0
  return 0;
343
24
}
344
345
/* convert from soft-float to native-float */
346
float sf16_to_float(uint16_t p)
347
817k
{
348
817k
  return astc::uint_as_float(sf16_to_sf32(p));
349
817k
}
350
351
/* convert from native-float to soft-float */
352
uint16_t float_to_sf16(float p)
353
24
{
354
24
  unsigned int ip = astc::float_as_uint(p);
355
24
  return sf32_to_sf16(ip, SF_NEARESTEVEN);
356
24
}
357
358
#endif