/src/spotify-json/include/spotify/json/codec/number.hpp
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2015-2019 Spotify AB |
3 | | * |
4 | | * Licensed under the Apache License, Version 2.0 (the "License"); you may not |
5 | | * use this file except in compliance with the License. You may obtain a copy of |
6 | | * the License at |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
12 | | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
13 | | * License for the specific language governing permissions and limitations under |
14 | | * the License. |
15 | | */ |
16 | | |
17 | | #pragma once |
18 | | |
19 | | #include <algorithm> |
20 | | #include <cmath> |
21 | | #include <cstdlib> |
22 | | #include <type_traits> |
23 | | #include <double-conversion/double-conversion.h> |
24 | | #include <spotify/json/decode_context.hpp> |
25 | | #include <spotify/json/default_codec.hpp> |
26 | | #include <spotify/json/detail/decode_helpers.hpp> |
27 | | #include <spotify/json/detail/encode_helpers.hpp> |
28 | | #include <spotify/json/detail/encode_integer.hpp> |
29 | | #include <spotify/json/detail/macros.hpp> |
30 | | #include <spotify/json/encode_context.hpp> |
31 | | |
32 | | namespace spotify { |
33 | | namespace json { |
34 | | namespace detail { |
35 | | |
36 | | float decode_float(decode_context &context); |
37 | | double decode_double(decode_context &context); |
38 | | void encode_float(encode_context &context, float value); |
39 | | void encode_double(encode_context &context, double value); |
40 | | |
41 | | template <typename T> T decode_floating_point(decode_context &context); |
42 | | template <typename T> void encode_floating_point(encode_context &context, T value); |
43 | | |
44 | | template <> |
45 | 0 | json_force_inline float decode_floating_point(decode_context &context) { |
46 | 0 | return decode_float(context); |
47 | 0 | } |
48 | | |
49 | | template <> |
50 | 0 | json_force_inline double decode_floating_point(decode_context &context) { |
51 | 0 | return decode_double(context); |
52 | 0 | } |
53 | | |
54 | | template <> |
55 | 0 | json_force_inline void encode_floating_point(encode_context &context, float value) { |
56 | 0 | encode_float(context, value); |
57 | 0 | } |
58 | | |
59 | | template <> |
60 | 0 | json_force_inline void encode_floating_point(encode_context &context, double value) { |
61 | 0 | encode_double(context, value); |
62 | 0 | } |
63 | | |
64 | | template <typename T> |
65 | | class floating_point_t { |
66 | | public: |
67 | | using object_type = T; |
68 | | |
69 | | json_force_inline object_type decode(decode_context &context) const { |
70 | | return decode_floating_point<object_type>(context); |
71 | | } |
72 | | |
73 | | json_force_inline void encode(encode_context &context, const object_type &value) const { |
74 | | encode_floating_point<object_type>(context, value); |
75 | | } |
76 | | }; |
77 | | |
78 | | template <typename T, bool is_positive> |
79 | | struct integer_ops { |
80 | | static json_force_inline bool is_overflow(T old_value, T new_value) { |
81 | | return (new_value < old_value); |
82 | | } |
83 | | |
84 | | static json_force_inline T accumulate(T old_value, T acc_value) { |
85 | | return (old_value + acc_value); |
86 | | } |
87 | | }; |
88 | | |
89 | | template <typename T> |
90 | | struct integer_ops<T, false> { |
91 | | static json_force_inline bool is_overflow(T old_value, T new_value) { |
92 | | return (new_value > old_value); |
93 | | } |
94 | | |
95 | | static json_force_inline T accumulate(T old_value, T acc_value) { |
96 | | return (old_value - acc_value); |
97 | | } |
98 | | }; |
99 | | |
100 | | template <typename T> |
101 | | json_force_inline bool is_invalid_digit(T digit) { |
102 | | const auto forced_positive = static_cast<unsigned>(digit); |
103 | | return (forced_positive > 9); // negative number -> very large positive number |
104 | | } |
105 | | |
106 | | template <typename T> |
107 | | json_force_inline T to_integer(const char c) { |
108 | | return T(c - '0'); |
109 | | } |
110 | | |
111 | | template <typename iterator_type> |
112 | | json_force_inline iterator_type find_non_digit(const iterator_type begin, const iterator_type end) { |
113 | | return std::find_if_not(begin, end, [](const char c) { |
114 | | return (c >= '0' && c <= '9'); |
115 | | }); |
116 | | } |
117 | | |
118 | | /** |
119 | | * Calculate 'exp_10(e, v) = v * 10^e', throwing a decode_exception if the value |
120 | | * overflows the integer type. This function executes in linear time over the |
121 | | * value of 'e', so it will not be very efficient for large exponents, although |
122 | | * the value will overflow rather quickly so the runtime is bounded (a value of |
123 | | * zero is specifically handled to avoid a semi-infinite loop). Note that the |
124 | | * arguments are reversed to make the function easier to call from the functions |
125 | | * below (only decode_with_positive_exponent_and_too_few_decimal_digits(...)). |
126 | | */ |
127 | | template <typename T, bool is_positive> |
128 | | json_force_inline T exp_10( |
129 | | decode_context &context, |
130 | | const unsigned exponent, |
131 | | const T initial_value) { |
132 | | T value = initial_value; |
133 | | if (json_likely(value)) { |
134 | | using intops = integer_ops<T, is_positive>; |
135 | | for (unsigned i = 0; i < exponent; i++) { |
136 | | const auto old_value = value; |
137 | | value *= 10; |
138 | | fail_if(context, intops::is_overflow(old_value, value), "Integer overflow"); |
139 | | } |
140 | | } |
141 | | return value; |
142 | | } |
143 | | |
144 | | /** |
145 | | * Decode an integer specified in a byte range. The range must be known to only |
146 | | * contain digits characters ('0' through '9'). If it contains anything else, |
147 | | * the result is undefined. The output variable 'did_overflow' will be set when |
148 | | * the parsed value overflows the integer type. |
149 | | */ |
150 | | template <typename T, bool is_positive> |
151 | | json_never_inline T decode_integer_range_with_overflow( |
152 | | decode_context & /*context*/, |
153 | | const char *begin, |
154 | | const char *end, |
155 | | const T initial_value, |
156 | | bool &did_overflow) { |
157 | | T value = initial_value; |
158 | | |
159 | | using intops = integer_ops<T, is_positive>; |
160 | | for (auto it = begin; it != end; ++it) { |
161 | | const auto c = (*it); |
162 | | const auto i = to_integer<T>(c); |
163 | | const auto old_value = value; |
164 | | value = intops::accumulate(value * 10, i); |
165 | | if (json_unlikely(intops::is_overflow(old_value, value))) { |
166 | | did_overflow = true; |
167 | | return old_value; |
168 | | } |
169 | | } |
170 | | |
171 | | return value; |
172 | | } |
173 | | |
174 | | /** |
175 | | * Decode an integer specified in a byte range. The range must be known to only |
176 | | * contain digits characters ('0' through '9'). If it contains anything else, |
177 | | * the result is undefined. A decode_exception is thrown if the value overflows |
178 | | * the integer type. |
179 | | */ |
180 | | template <typename T, bool is_positive> |
181 | | json_never_inline T decode_integer_range( |
182 | | decode_context &context, |
183 | | const char *begin, |
184 | | const char *end, |
185 | | const T initial_value = 0) { |
186 | | bool did_overflow = false; |
187 | | const T value = decode_integer_range_with_overflow<T, is_positive>( |
188 | | context, |
189 | | begin, |
190 | | end, |
191 | | initial_value, |
192 | | did_overflow); |
193 | | fail_if(context, did_overflow, "Integer overflow"); |
194 | | return value; |
195 | | } |
196 | | |
197 | | /** |
198 | | * Decode an integer that has a negative exponent. We can easily do this by |
199 | | * cutting off the least significant digits of the integer part of the number. |
200 | | * If the negative exponent is larger than the number of integer digits, zero is |
201 | | * returned. If the parsed number is too large to fit in the given integer type, |
202 | | * a decode_exception is thrown. |
203 | | */ |
204 | | template <typename T, bool is_positive> |
205 | | json_never_inline T decode_with_negative_exponent( |
206 | | decode_context &context, |
207 | | const unsigned exponent, |
208 | | const char *int_beg, |
209 | | const char *int_end) { |
210 | | const auto num_int_digits = static_cast<unsigned>(int_end - int_beg); |
211 | | const auto lshift_int_end = (int_end - exponent); |
212 | | return (json_likely(num_int_digits > exponent) ? |
213 | | decode_integer_range<T, is_positive>(context, int_beg, lshift_int_end) : |
214 | | 0); // the negative exponent is larger than the number of digits, nothing left |
215 | | } |
216 | | |
217 | | /** |
218 | | * Decode an integer that has a positive exponent. We can easily do this by |
219 | | * pretending that the decimal digits are just a continuation of the integer |
220 | | * digits, until the exponent has been used up. If there are not enough decimal |
221 | | * digits, the remaining exponent is used to multiply the parsed (from both the |
222 | | * integer and decimal digits) value appropriately. If the parsed number is too |
223 | | * large to fit in the given integer type, a decode_exception is thrown. |
224 | | */ |
225 | | template <typename T, bool is_positive> |
226 | | json_never_inline T decode_with_positive_exponent( |
227 | | decode_context &context, |
228 | | const unsigned exponent, |
229 | | const char *int_beg, |
230 | | const char *int_end, |
231 | | const char *dec_beg, |
232 | | const char *dec_end) { |
233 | | T value; |
234 | | const auto num_dec_digits = static_cast<unsigned>(dec_end - dec_beg); |
235 | | if (num_dec_digits >= exponent) { |
236 | | value = decode_integer_range<T, is_positive>(context, int_beg, int_end); |
237 | | value = decode_integer_range<T, is_positive>(context, dec_beg, dec_beg + exponent, value); |
238 | | } else { |
239 | | value = decode_integer_range<T, is_positive>(context, int_beg, int_end); |
240 | | value = decode_integer_range<T, is_positive>(context, dec_beg, dec_end, value); |
241 | | value = exp_10<T, is_positive>(context, exponent - num_dec_digits, value); |
242 | | } |
243 | | return value; |
244 | | } |
245 | | |
246 | | /** |
247 | | * It is possible that the exponent overflows, but this does not necessary mean |
248 | | * that the parsed integer would overflow as well, e.g., a zero integer with a |
249 | | * very large exponent is still a zero integer; so when we catch an overflow |
250 | | * error for the exponent, we do some special case handling to figure out which |
251 | | * integer value to return. |
252 | | */ |
253 | | template <typename T> |
254 | | json_never_inline T handle_overflowing_exponent( |
255 | | decode_context &context, |
256 | | const bool exp_is_positive, |
257 | | const char *int_beg, |
258 | | const char *int_end, |
259 | | const char *dec_beg, |
260 | | const char *dec_end) { |
261 | | bool ignore; |
262 | | const auto i = decode_integer_range_with_overflow<unsigned, true>(context, int_beg, int_end, 0, ignore); |
263 | | const auto d = decode_integer_range_with_overflow<unsigned, true>(context, dec_beg, dec_end, 0, ignore); |
264 | | fail_if(context, exp_is_positive && (i || d), "Integer overflow"); |
265 | | return 0; |
266 | | } |
267 | | |
268 | | /** |
269 | | * Decode the "tricky" integer at the given context position. By tricky we mean |
270 | | * an integer that has decimal digits or an exponent or both. This function is |
271 | | * carefully constructed to not overflow unless the parsed integer (taking the |
272 | | * exponent into account) overflows the given type, e.g., 52e-1 = 5. It also |
273 | | * makes sure to not discard the decimal digits until the exponent has been |
274 | | * taken into account, e.g., 5.2e1 = 52. If the parsed number is too large to |
275 | | * fit in the given integer type, a decode_exception is thrown. |
276 | | */ |
277 | | template <typename T, bool is_positive> |
278 | | json_never_inline T decode_integer_tricky(decode_context &context, const char *int_beg) { |
279 | | // Find [xxxx].yyyyE±zzzz |
280 | | auto int_end = find_non_digit(int_beg, context.end); |
281 | | context.position = int_end; |
282 | | |
283 | | // Find xxxx.[yyyy]E±zzzz |
284 | | auto dec_beg = int_end; |
285 | | auto dec_end = int_end; |
286 | | if (peek(context) == '.') { |
287 | | skip_unchecked_1(context); |
288 | | dec_beg = context.position; |
289 | | dec_end = find_non_digit(dec_beg, context.end); |
290 | | fail_if(context, dec_beg == dec_end, "Invalid digits after decimal point"); |
291 | | context.position = dec_end; |
292 | | } |
293 | | |
294 | | // Find xxxx.yyyyE[±zzzz] |
295 | | auto exp_is_positive = true; |
296 | | auto exp_beg = dec_end; |
297 | | auto exp_end = dec_end; |
298 | | const auto e = peek(context); |
299 | | if (e == 'e' || e == 'E') { |
300 | | skip_unchecked_1(context); |
301 | | const auto sign = peek(context); |
302 | | if (sign == '-' || sign == '+') { |
303 | | exp_is_positive = (sign == '+'); |
304 | | skip_unchecked_1(context); |
305 | | } |
306 | | exp_beg = context.position; |
307 | | exp_end = find_non_digit(exp_beg, context.end); |
308 | | fail_if(context, exp_beg == exp_end, "Exponent symbols should be followed by an optional '+' or '-' and then by at least one number"); |
309 | | context.position = exp_end; |
310 | | } |
311 | | |
312 | | bool did_overflow = false; |
313 | | const auto exp = decode_integer_range_with_overflow<unsigned, true>(context, exp_beg, exp_end, 0, did_overflow); |
314 | | if (json_unlikely(did_overflow)) { |
315 | | return handle_overflowing_exponent<T>(context, exp_is_positive, int_beg, int_end, dec_beg, dec_end); |
316 | | } |
317 | | |
318 | | return (json_likely(exp_is_positive) ? |
319 | | decode_with_positive_exponent<T, is_positive>(context, exp, int_beg, int_end, dec_beg, dec_end) : |
320 | | decode_with_negative_exponent<T, is_positive>(context, exp, int_beg, int_end)); |
321 | | } |
322 | | |
323 | | /** |
324 | | * Decode the integer at the context position. The integer can be specified |
325 | | * either as a pure integer: 'xxxx', where 'x' is a digit character between '0' |
326 | | * and '9'; and 'xxxx.yyyyE±zzzz', where 'y' and 'z' also are digit characters. |
327 | | * Pure integers are easy and fast to parse, but if we run into a decimal point |
328 | | * or an "exponent E", we need to switch over to a more complex parser. The same |
329 | | * thing happens if we overflow, because we cannot yet know if this was a true |
330 | | * overflow or if a negative exponent will reduce the integer into range again. |
331 | | * If the parsed number is too large to fit in the given integer type, a |
332 | | * decode_exception is thrown. Decimal digits are simply discarded if they are |
333 | | * not used, i.e., if there is no positive exponent. |
334 | | */ |
335 | | template <typename T, bool is_positive> |
336 | | json_never_inline T decode_integer(decode_context &context) { |
337 | | using intops = integer_ops<T, is_positive>; |
338 | | const auto pos = context.position; |
339 | | const auto next_char = next(context); |
340 | | const auto next_char_as_int = to_integer<T>(next_char); |
341 | | fail_if(context, is_invalid_digit(next_char_as_int), "Invalid integer"); |
342 | | T value = intops::accumulate(0, next_char_as_int); |
343 | | |
344 | | while (json_likely(context.remaining())) { |
345 | | const auto next_unchecked = peek_unchecked(context); |
346 | | const auto next_unchecked_as_int = to_integer<T>(next_unchecked); |
347 | | if (is_invalid_digit(next_unchecked_as_int)) { |
348 | | const auto is_tricky = ((next_unchecked == '.') | (next_unchecked == 'e') | (next_unchecked == 'E')); |
349 | | return (json_unlikely(is_tricky) ? decode_integer_tricky<T, is_positive>(context, pos) : value); |
350 | | } |
351 | | |
352 | | skip_unchecked_1(context); |
353 | | const auto old_value = value; |
354 | | value = intops::accumulate(value * 10, next_unchecked_as_int); |
355 | | if (json_unlikely(intops::is_overflow(old_value, value))) { |
356 | | return decode_integer_tricky<T, is_positive>(context, pos); |
357 | | } |
358 | | } |
359 | | |
360 | | return value; |
361 | | } |
362 | | |
363 | | template <typename T> |
364 | | json_force_inline T decode_negative_integer(decode_context &context) { |
365 | | skip_unchecked_1(context); // Skip past leading '-' character (checked in decode(...)). |
366 | | return decode_integer<T, false>(context); |
367 | | } |
368 | | |
369 | | template <typename T> |
370 | | json_force_inline T decode_positive_integer(decode_context &context) { |
371 | | return decode_integer<T, true>(context); |
372 | | } |
373 | | |
374 | | template <typename T, bool is_integer, bool is_signed> |
375 | | class integer_t; |
376 | | |
377 | | template <typename T> |
378 | | class integer_t<T, true, false> { |
379 | | public: |
380 | | using object_type = T; |
381 | | |
382 | | json_force_inline object_type decode(decode_context &context) const { |
383 | | return decode_positive_integer<object_type>(context); |
384 | | } |
385 | | |
386 | | json_force_inline void encode(encode_context &context, const object_type value) const { |
387 | | encode_positive_integer(context, value); |
388 | | } |
389 | | }; |
390 | | |
391 | | template <typename T> |
392 | | class integer_t<T, true, true> { |
393 | | public: |
394 | | using object_type = T; |
395 | | |
396 | | json_force_inline object_type decode(decode_context &context) const { |
397 | | return (peek(context) == '-' ? |
398 | | decode_negative_integer<object_type>(context) : |
399 | | decode_positive_integer<object_type>(context)); |
400 | | } |
401 | | |
402 | | json_force_inline void encode(encode_context &context, const object_type value) const { |
403 | | if (value < 0) { |
404 | | encode_negative_integer(context, value); |
405 | | } else { |
406 | | encode_positive_integer(context, value); |
407 | | } |
408 | | } |
409 | | }; |
410 | | |
411 | | template <typename T> |
412 | | using is_bool = std::is_same<typename std::remove_cv<T>::type, bool>; |
413 | | |
414 | | } // namespace detail |
415 | | |
416 | | namespace codec { |
417 | | |
418 | | template <typename T> |
419 | | class number_t final : public detail::integer_t<T, |
420 | | std::is_integral<T>::value, |
421 | | std::is_signed<T>::value> { |
422 | | static_assert( |
423 | | std::is_integral<T>::value && !detail::is_bool<T>::value, |
424 | | "Trying to use number_t codec for boolean"); |
425 | | }; |
426 | | |
427 | | template <> |
428 | | class number_t<float> final : public detail::floating_point_t<float> { |
429 | | }; |
430 | | |
431 | | template <> |
432 | | class number_t<double> final : public detail::floating_point_t<double> { |
433 | | }; |
434 | | |
435 | | template <typename T> |
436 | | number_t<T> number() { |
437 | | return number_t<T>(); |
438 | | } |
439 | | |
440 | | } // namespace codec |
441 | | |
442 | | template <typename T> |
443 | | struct default_codec_t { |
444 | | static_assert( |
445 | | (std::is_integral<T>::value || std::is_floating_point<T>::value) && |
446 | | !detail::is_bool<T>::value, |
447 | | "No default_codec_t specialization for type T"); |
448 | | |
449 | | static codec::number_t<T> codec() { |
450 | | return codec::number_t<T>(); |
451 | | } |
452 | | }; |
453 | | |
454 | | } // namespace json |
455 | | } // namespace spotify |