Coverage Report

Created: 2025-11-16 07:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/serenity/AK/FloatingPoint.h
Line
Count
Source
1
/*
2
 * Copyright (c) 2022, Jelle Raaijmakers <jelle@gmta.nl>
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#pragma once
8
9
#include <AK/BitCast.h>
10
#include <AK/StdLibExtras.h>
11
#include <AK/Types.h>
12
13
namespace AK {
14
15
template<typename T>
16
struct FloatExtractor;
17
18
#ifdef AK_HAS_FLOAT_128
19
template<>
20
struct FloatExtractor<f128> {
21
    static constexpr FloatExtractor<f128> from_float(f128 f) { return bit_cast<FloatExtractor<f128>>(f); }
22
    constexpr f128 to_float() const { return bit_cast<f128>(*this); }
23
24
    using ComponentType = unsigned __int128;
25
    static constexpr int mantissa_bits = 112;
26
    static constexpr ComponentType mantissa_max = (((ComponentType)1) << 112) - 1;
27
    static constexpr int exponent_bias = 16383;
28
    static constexpr int exponent_bits = 15;
29
    static constexpr unsigned exponent_max = 32767;
30
31
    ComponentType mantissa : 112;
32
    ComponentType exponent : 15;
33
    ComponentType sign : 1;
34
};
35
// Validate that f128 and the FloatExtractor struct are 128 bits.
36
static_assert(AssertSize<f128, 16>());
37
static_assert(AssertSize<FloatExtractor<f128>, sizeof(f128)>());
38
#endif
39
40
#ifdef AK_HAS_FLOAT_80
41
template<>
42
struct FloatExtractor<f80> {
43
0
    static constexpr FloatExtractor<f80> from_float(f80 f) { return bit_cast<FloatExtractor<f80>>(f); }
44
0
    constexpr f80 to_float() const { return bit_cast<f80>(*this); }
45
46
    using ComponentType = unsigned long long;
47
    static constexpr int mantissa_bits = 64;
48
    static constexpr ComponentType mantissa_max = ~0ull;
49
    static constexpr int exponent_bias = 16383;
50
    static constexpr int exponent_bits = 15;
51
    static constexpr unsigned exponent_max = 32767;
52
53
    // This is technically wrong: Extended floating point values really only have 63 bits of mantissa
54
    // and an "integer bit" that behaves in various strange, unintuitive and non-IEEE-754 ways.
55
    // However, since all bit-fiddling float code assumes IEEE floats, it cannot handle this properly.
56
    // If we pretend that 80-bit floats are IEEE floats with 64-bit mantissas, almost everything works correctly
57
    // and we just need a few special cases.
58
    ComponentType mantissa : 64;
59
    ComponentType exponent : 15;
60
    ComponentType sign : 1;
61
};
62
static_assert(AssertSize<FloatExtractor<f80>, sizeof(f80)>());
63
#endif
64
65
template<>
66
struct FloatExtractor<f64> {
67
0
    static constexpr FloatExtractor<f64> from_float(f64 f) { return bit_cast<FloatExtractor<f64>>(f); }
68
0
    constexpr f64 to_float() const { return bit_cast<f64>(*this); }
69
70
    using ComponentType = unsigned long long;
71
    static constexpr int mantissa_bits = 52;
72
    static constexpr ComponentType mantissa_max = (1ull << 52) - 1;
73
    static constexpr int exponent_bias = 1023;
74
    static constexpr int exponent_bits = 11;
75
    static constexpr unsigned exponent_max = 2047;
76
77
    // FIXME: These types have to all be the same, otherwise this struct
78
    //        goes from being a bitfield describing the layout of an f64
79
    //        into being a multibyte mess on windows.
80
    //        Technically, '-mno-ms-bitfields' is supposed to disable this
81
    //        very intuitive and portable behaviour on windows, but it doesn't
82
    //        work with the msvc ABI.
83
    //        See <https://github.com/llvm/llvm-project/issues/24757>
84
    ComponentType mantissa : 52;
85
    ComponentType exponent : 11;
86
    ComponentType sign : 1;
87
};
88
static_assert(AssertSize<FloatExtractor<f64>, sizeof(f64)>());
89
90
template<>
91
struct FloatExtractor<f32> {
92
4.17M
    static constexpr FloatExtractor<f32> from_float(f32 f) { return bit_cast<FloatExtractor<f32>>(f); }
93
570k
    constexpr f32 to_float() const { return bit_cast<f32>(*this); }
94
95
    using ComponentType = unsigned;
96
    static constexpr int mantissa_bits = 23;
97
    static constexpr ComponentType mantissa_max = (1 << 23) - 1;
98
    static constexpr int exponent_bias = 127;
99
    static constexpr int exponent_bits = 8;
100
    static constexpr ComponentType exponent_max = 255;
101
102
    ComponentType mantissa : 23;
103
    ComponentType exponent : 8;
104
    ComponentType sign : 1;
105
};
106
static_assert(AssertSize<FloatExtractor<f32>, sizeof(f32)>());
107
108
template<size_t S, size_t E, size_t M>
109
requires(S <= 1 && E >= 1 && M >= 1 && (S + E + M) <= 64) class FloatingPointBits final {
110
public:
111
    static size_t const signbit = S;
112
    static size_t const exponentbits = E;
113
    static size_t const mantissabits = M;
114
115
    template<typename T>
116
    requires(IsIntegral<T> && IsUnsigned<T> && sizeof(T) <= 8) constexpr FloatingPointBits(T bits)
117
        : m_bits(bits)
118
    {
119
    }
120
121
    constexpr FloatingPointBits(double value)
122
        : m_bits(bit_cast<u64>(value))
123
    {
124
    }
125
126
    constexpr FloatingPointBits(float value)
127
        : m_bits(bit_cast<u32>(value))
128
    {
129
    }
130
131
    double as_double() const
132
    requires(S == 1 && E == 11 && M == 52)
133
    {
134
        return bit_cast<double>(m_bits);
135
    }
136
    float as_float() const
137
    requires(S == 1 && E == 8 && M == 23)
138
    {
139
        return bit_cast<float>(static_cast<u32>(m_bits));
140
    }
141
    u64 bits() const { return m_bits; }
142
143
private:
144
    u64 m_bits;
145
};
146
147
typedef FloatingPointBits<1, 8, 23> SingleFloatingPointBits;
148
typedef FloatingPointBits<1, 11, 52> DoubleFloatingPointBits;
149
150
/**
151
 * Convert between two IEEE 754 floating point types in any arrangement of sign, exponent and mantissa bits.
152
 */
153
template<typename To, typename From>
154
constexpr To float_to_float(From const input)
155
{
156
    constexpr u64 from_exponent_nonnumber = (1ull << From::exponentbits) - 1;
157
    constexpr u64 from_exponent_bias = (1ull << (From::exponentbits - 1)) - 1;
158
    constexpr u64 to_exponent_nonnumber = (1ull << To::exponentbits) - 1;
159
    constexpr u64 to_exponent_bias = (1ull << (To::exponentbits - 1)) - 1;
160
    constexpr u64 to_exponent_max = (1ull << To::exponentbits) - 2;
161
162
    // Deconstruct input bits to float components
163
    u64 from_sign = (input.bits() >> (From::exponentbits + From::mantissabits)) & From::signbit;
164
    u64 from_exponent = (input.bits() >> From::mantissabits) & ((1ull << From::exponentbits) - 1);
165
    u64 from_mantissa = input.bits() & ((1ull << From::mantissabits) - 1);
166
167
    u64 to_sign = from_sign & To::signbit;
168
    u64 to_exponent;
169
    u64 to_mantissa;
170
    auto target_value = [&to_sign, &to_exponent, &to_mantissa]() {
171
        return To((to_sign << (To::exponentbits + To::mantissabits)) | (to_exponent << To::mantissabits) | to_mantissa);
172
    };
173
174
    auto shift_mantissa = [](u64 mantissa) -> u64 {
175
        if constexpr (From::mantissabits < To::mantissabits)
176
            return mantissa << (To::mantissabits - From::mantissabits);
177
        else
178
            return mantissa >> (From::mantissabits - To::mantissabits);
179
    };
180
181
    // If target is unsigned and source is negative, clamp to 0 or keep NaN
182
    if constexpr (To::signbit == 0) {
183
        if (from_sign == 1) {
184
            if (from_exponent == from_exponent_nonnumber && from_mantissa > 0) {
185
                to_exponent = to_exponent_nonnumber;
186
                to_mantissa = 1;
187
            } else {
188
                to_exponent = 0;
189
                to_mantissa = 0;
190
            }
191
            return target_value();
192
        }
193
    }
194
195
    // If the source floating point is denormalized;
196
    if (from_exponent == 0) {
197
        // If the source mantissa is 0, the value is +/-0
198
        if (from_mantissa == 0) {
199
            to_exponent = 0;
200
            to_mantissa = 0;
201
            return target_value();
202
        }
203
204
        // If the source has more exponent bits than the target, then the largest possible
205
        // source mantissa still cannot be represented in the target denormalized value.
206
        if constexpr (From::exponentbits > To::exponentbits) {
207
            to_exponent = 0;
208
            to_mantissa = 0;
209
            return target_value();
210
        }
211
212
        // If the source and target have the same number of exponent bits, we only need to
213
        // shift the mantissa.
214
        if constexpr (From::exponentbits == To::exponentbits) {
215
            to_exponent = 0;
216
            to_mantissa = shift_mantissa(from_mantissa);
217
            return target_value();
218
        }
219
220
        // The target has more exponent bits, so our denormalized value can be represented
221
        // as a normalized value in the target floating point. Normalized values have an
222
        // implicit leading 1, so we shift the mantissa left until we find our explicit
223
        // leading 1 which is then dropped.
224
        int adjust_exponent = -1;
225
        to_mantissa = from_mantissa;
226
        do {
227
            ++adjust_exponent;
228
            to_mantissa <<= 1;
229
        } while ((to_mantissa & (1ull << From::mantissabits)) == 0);
230
        to_exponent = to_exponent_bias - from_exponent_bias - adjust_exponent;
231
232
        // Drop the most significant bit from the mantissa
233
        to_mantissa &= (1ull << From::mantissabits) - 1;
234
        to_mantissa = shift_mantissa(to_mantissa);
235
        return target_value();
236
    }
237
238
    // If the source is NaN or +/-Inf, keep it that way
239
    if (from_exponent == from_exponent_nonnumber) {
240
        to_exponent = to_exponent_nonnumber;
241
        to_mantissa = (from_mantissa == 0) ? 0 : 1;
242
        return target_value();
243
    }
244
245
    // Determine the target exponent
246
    to_exponent = to_exponent_bias - from_exponent_bias + from_exponent;
247
248
    // If the calculated exponent exceeds the target's capacity, clamp both the exponent and the
249
    // mantissa to their maximum values.
250
    if (to_exponent > to_exponent_max) {
251
        to_exponent = to_exponent_max;
252
        to_mantissa = (1ull << To::mantissabits) - 1;
253
        return target_value();
254
    }
255
256
    // If the new exponent is less than 1, we can only represent this value as a denormalized number
257
    if (to_exponent < 1) {
258
        to_exponent = 0;
259
260
        // Add a leading 1 and shift the mantissa right
261
        int adjust_exponent = 1 - to_exponent_bias - from_exponent + from_exponent_bias;
262
        to_mantissa = ((1ull << From::mantissabits) | from_mantissa) >> adjust_exponent;
263
        to_mantissa = shift_mantissa(to_mantissa);
264
        return target_value();
265
    }
266
267
    // New exponent fits; shift the mantissa to fit as well
268
    to_mantissa = shift_mantissa(from_mantissa);
269
    return target_value();
270
}
271
272
template<typename O>
273
constexpr O convert_from_native_double(double input) { return float_to_float<O>(DoubleFloatingPointBits(input)); }
274
275
template<typename O>
276
constexpr O convert_from_native_float(float input) { return float_to_float<O>(SingleFloatingPointBits(input)); }
277
278
template<typename I>
279
constexpr double convert_to_native_double(I input) { return float_to_float<DoubleFloatingPointBits>(input).as_double(); }
280
281
template<typename I>
282
constexpr float convert_to_native_float(I input) { return float_to_float<SingleFloatingPointBits>(input).as_float(); }
283
284
}
285
286
#if USING_AK_GLOBALLY
287
using AK::DoubleFloatingPointBits;
288
using AK::FloatExtractor;
289
using AK::FloatingPointBits;
290
using AK::SingleFloatingPointBits;
291
292
using AK::convert_from_native_double;
293
using AK::convert_from_native_float;
294
using AK::convert_to_native_double;
295
using AK::convert_to_native_float;
296
using AK::float_to_float;
297
#endif