/src/rapidjson/include/rapidjson/internal/diyfp.h
Line | Count | Source |
1 | | // Tencent is pleased to support the open source community by making RapidJSON available. |
2 | | // |
3 | | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. |
4 | | // |
5 | | // Licensed under the MIT License (the "License"); you may not use this file except |
6 | | // in compliance with the License. You may obtain a copy of the License at |
7 | | // |
8 | | // http://opensource.org/licenses/MIT |
9 | | // |
10 | | // Unless required by applicable law or agreed to in writing, software distributed |
11 | | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR |
12 | | // CONDITIONS OF ANY KIND, either express or implied. See the License for the |
13 | | // specific language governing permissions and limitations under the License. |
14 | | |
15 | | // This is a C++ header-only implementation of Grisu2 algorithm from the publication: |
16 | | // Loitsch, Florian. "Printing floating-point numbers quickly and accurately with |
17 | | // integers." ACM Sigplan Notices 45.6 (2010): 233-243. |
18 | | |
19 | | #ifndef RAPIDJSON_DIYFP_H_ |
20 | | #define RAPIDJSON_DIYFP_H_ |
21 | | |
22 | | #include "../rapidjson.h" |
23 | | #include "clzll.h" |
24 | | #include <limits> |
25 | | |
26 | | #if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER) |
27 | | #include <intrin.h> |
28 | | #if !defined(_ARM64EC_) |
29 | | #pragma intrinsic(_umul128) |
30 | | #else |
31 | | #pragma comment(lib,"softintrin") |
32 | | #endif |
33 | | #endif |
34 | | |
35 | | RAPIDJSON_NAMESPACE_BEGIN |
36 | | namespace internal { |
37 | | |
38 | | #ifdef __GNUC__ |
39 | | RAPIDJSON_DIAG_PUSH |
40 | | RAPIDJSON_DIAG_OFF(effc++) |
41 | | #endif |
42 | | |
43 | | #ifdef __clang__ |
44 | | RAPIDJSON_DIAG_PUSH |
45 | | RAPIDJSON_DIAG_OFF(padded) |
46 | | #endif |
47 | | |
48 | | struct DiyFp { |
49 | 18 | DiyFp() : f(), e() {} |
50 | | |
51 | 3.61k | DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {} |
52 | | |
53 | 9 | explicit DiyFp(double d) { |
54 | 9 | union { |
55 | 9 | double d; |
56 | 9 | uint64_t u64; |
57 | 9 | } u = { d }; |
58 | | |
59 | 9 | int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize); |
60 | 9 | uint64_t significand = (u.u64 & kDpSignificandMask); |
61 | 9 | if (biased_e != 0) { |
62 | 9 | f = significand + kDpHiddenBit; |
63 | 9 | e = biased_e - kDpExponentBias; |
64 | 9 | } |
65 | 0 | else { |
66 | 0 | f = significand; |
67 | 0 | e = kDpMinExponent + 1; |
68 | 0 | } |
69 | 9 | } |
70 | | |
71 | 9 | DiyFp operator-(const DiyFp& rhs) const { |
72 | 9 | return DiyFp(f - rhs.f, e); |
73 | 9 | } |
74 | | |
75 | 992 | DiyFp operator*(const DiyFp& rhs) const { |
76 | | #if defined(_MSC_VER) && defined(_M_AMD64) |
77 | | uint64_t h; |
78 | | uint64_t l = _umul128(f, rhs.f, &h); |
79 | | if (l & (uint64_t(1) << 63)) // rounding |
80 | | h++; |
81 | | return DiyFp(h, e + rhs.e + 64); |
82 | | #elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__) |
83 | | __extension__ typedef unsigned __int128 uint128; |
84 | | uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f); |
85 | | uint64_t h = static_cast<uint64_t>(p >> 64); |
86 | | uint64_t l = static_cast<uint64_t>(p); |
87 | | if (l & (uint64_t(1) << 63)) // rounding |
88 | | h++; |
89 | | return DiyFp(h, e + rhs.e + 64); |
90 | | #else |
91 | 992 | const uint64_t M32 = 0xFFFFFFFF; |
92 | 992 | const uint64_t a = f >> 32; |
93 | 992 | const uint64_t b = f & M32; |
94 | 992 | const uint64_t c = rhs.f >> 32; |
95 | 992 | const uint64_t d = rhs.f & M32; |
96 | 992 | const uint64_t ac = a * c; |
97 | 992 | const uint64_t bc = b * c; |
98 | 992 | const uint64_t ad = a * d; |
99 | 992 | const uint64_t bd = b * d; |
100 | 992 | uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32); |
101 | 992 | tmp += 1U << 31; /// mult_round |
102 | 992 | return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64); |
103 | 992 | #endif |
104 | 992 | } |
105 | | |
106 | 1.03k | DiyFp Normalize() const { |
107 | 1.03k | int s = static_cast<int>(clzll(f)); |
108 | 1.03k | return DiyFp(f << s, e - s); |
109 | 1.03k | } |
110 | | |
111 | 9 | DiyFp NormalizeBoundary() const { |
112 | 9 | DiyFp res = *this; |
113 | 9 | while (!(res.f & (kDpHiddenBit << 1))) { |
114 | 0 | res.f <<= 1; |
115 | 0 | res.e--; |
116 | 0 | } |
117 | 9 | res.f <<= (kDiySignificandSize - kDpSignificandSize - 2); |
118 | 9 | res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2); |
119 | 9 | return res; |
120 | 9 | } |
121 | | |
122 | 9 | void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const { |
123 | 9 | DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary(); |
124 | 9 | DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1); |
125 | 9 | mi.f <<= mi.e - pl.e; |
126 | 9 | mi.e = pl.e; |
127 | 9 | *plus = pl; |
128 | 9 | *minus = mi; |
129 | 9 | } |
130 | | |
131 | 513 | double ToDouble() const { |
132 | 513 | union { |
133 | 513 | double d; |
134 | 513 | uint64_t u64; |
135 | 513 | }u; |
136 | 513 | RAPIDJSON_ASSERT(f <= kDpHiddenBit + kDpSignificandMask); |
137 | 513 | if (e < kDpDenormalExponent) { |
138 | | // Underflow. |
139 | 0 | return 0.0; |
140 | 0 | } |
141 | 513 | if (e >= kDpMaxExponent) { |
142 | | // Overflow. |
143 | 0 | return std::numeric_limits<double>::infinity(); |
144 | 0 | } |
145 | 513 | const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 : |
146 | 513 | static_cast<uint64_t>(e + kDpExponentBias); |
147 | 513 | u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize); |
148 | 513 | return u.d; |
149 | 513 | } |
150 | | |
151 | | static const int kDiySignificandSize = 64; |
152 | | static const int kDpSignificandSize = 52; |
153 | | static const int kDpExponentBias = 0x3FF + kDpSignificandSize; |
154 | | static const int kDpMaxExponent = 0x7FF - kDpExponentBias; |
155 | | static const int kDpMinExponent = -kDpExponentBias; |
156 | | static const int kDpDenormalExponent = -kDpExponentBias + 1; |
157 | | static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000); |
158 | | static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF); |
159 | | static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000); |
160 | | |
161 | | uint64_t f; |
162 | | int e; |
163 | | }; |
164 | | |
165 | 522 | inline DiyFp GetCachedPowerByIndex(size_t index) { |
166 | | // 10^-348, 10^-340, ..., 10^340 |
167 | 522 | static const uint64_t kCachedPowers_F[] = { |
168 | 522 | RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76), |
169 | 522 | RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea), |
170 | 522 | RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df), |
171 | 522 | RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f), |
172 | 522 | RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c), |
173 | 522 | RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5), |
174 | 522 | RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d), |
175 | 522 | RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637), |
176 | 522 | RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7), |
177 | 522 | RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5), |
178 | 522 | RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b), |
179 | 522 | RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996), |
180 | 522 | RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6), |
181 | 522 | RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8), |
182 | 522 | RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053), |
183 | 522 | RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd), |
184 | 522 | RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94), |
185 | 522 | RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b), |
186 | 522 | RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac), |
187 | 522 | RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3), |
188 | 522 | RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb), |
189 | 522 | RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c), |
190 | 522 | RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000), |
191 | 522 | RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984), |
192 | 522 | RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70), |
193 | 522 | RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245), |
194 | 522 | RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8), |
195 | 522 | RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a), |
196 | 522 | RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea), |
197 | 522 | RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85), |
198 | 522 | RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2), |
199 | 522 | RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3), |
200 | 522 | RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25), |
201 | 522 | RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece), |
202 | 522 | RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5), |
203 | 522 | RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a), |
204 | 522 | RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c), |
205 | 522 | RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a), |
206 | 522 | RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129), |
207 | 522 | RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429), |
208 | 522 | RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d), |
209 | 522 | RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841), |
210 | 522 | RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9), |
211 | 522 | RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b) |
212 | 522 | }; |
213 | 522 | static const int16_t kCachedPowers_E[] = { |
214 | 522 | -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, |
215 | 522 | -954, -927, -901, -874, -847, -821, -794, -768, -741, -715, |
216 | 522 | -688, -661, -635, -608, -582, -555, -529, -502, -475, -449, |
217 | 522 | -422, -396, -369, -343, -316, -289, -263, -236, -210, -183, |
218 | 522 | -157, -130, -103, -77, -50, -24, 3, 30, 56, 83, |
219 | 522 | 109, 136, 162, 189, 216, 242, 269, 295, 322, 348, |
220 | 522 | 375, 402, 428, 455, 481, 508, 534, 561, 588, 614, |
221 | 522 | 641, 667, 694, 720, 747, 774, 800, 827, 853, 880, |
222 | 522 | 907, 933, 960, 986, 1013, 1039, 1066 |
223 | 522 | }; |
224 | 522 | RAPIDJSON_ASSERT(index < 87); |
225 | 522 | return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]); |
226 | 522 | } |
227 | | |
228 | 9 | inline DiyFp GetCachedPower(int e, int* K) { |
229 | | |
230 | | //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374; |
231 | 9 | double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive |
232 | 9 | int k = static_cast<int>(dk); |
233 | 9 | if (dk - k > 0.0) |
234 | 9 | k++; |
235 | | |
236 | 9 | unsigned index = static_cast<unsigned>((k >> 3) + 1); |
237 | 9 | *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table |
238 | | |
239 | 9 | return GetCachedPowerByIndex(index); |
240 | 9 | } |
241 | | |
242 | 513 | inline DiyFp GetCachedPower10(int exp, int *outExp) { |
243 | 513 | RAPIDJSON_ASSERT(exp >= -348); |
244 | 513 | unsigned index = static_cast<unsigned>(exp + 348) / 8u; |
245 | 513 | *outExp = -348 + static_cast<int>(index) * 8; |
246 | 513 | return GetCachedPowerByIndex(index); |
247 | 513 | } |
248 | | |
249 | | #ifdef __GNUC__ |
250 | | RAPIDJSON_DIAG_POP |
251 | | #endif |
252 | | |
253 | | #ifdef __clang__ |
254 | | RAPIDJSON_DIAG_POP |
255 | | RAPIDJSON_DIAG_OFF(padded) |
256 | | #endif |
257 | | |
258 | | } // namespace internal |
259 | | RAPIDJSON_NAMESPACE_END |
260 | | |
261 | | #endif // RAPIDJSON_DIYFP_H_ |