/src/abseil-cpp/absl/strings/numbers.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // Copyright 2017 The Abseil Authors.  | 
2  |  | //  | 
3  |  | // Licensed under the Apache License, Version 2.0 (the "License");  | 
4  |  | // you may not use this file except in compliance with the License.  | 
5  |  | // You may obtain a copy of the License at  | 
6  |  | //  | 
7  |  | //      https://www.apache.org/licenses/LICENSE-2.0  | 
8  |  | //  | 
9  |  | // Unless required by applicable law or agreed to in writing, software  | 
10  |  | // distributed under the License is distributed on an "AS IS" BASIS,  | 
11  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
12  |  | // See the License for the specific language governing permissions and  | 
13  |  | // limitations under the License.  | 
14  |  | //  | 
15  |  | // -----------------------------------------------------------------------------  | 
16  |  | // File: numbers.h  | 
17  |  | // -----------------------------------------------------------------------------  | 
18  |  | //  | 
19  |  | // This package contains functions for converting strings to numbers. For  | 
20  |  | // converting numbers to strings, use `StrCat()` or `StrAppend()` in str_cat.h,  | 
21  |  | // which automatically detect and convert most number values appropriately.  | 
22  |  |  | 
23  |  | #ifndef ABSL_STRINGS_NUMBERS_H_  | 
24  |  | #define ABSL_STRINGS_NUMBERS_H_  | 
25  |  |  | 
26  |  | #ifdef __SSSE3__  | 
27  |  | #include <tmmintrin.h>  | 
28  |  | #endif  | 
29  |  |  | 
30  |  | #ifdef _MSC_VER  | 
31  |  | #include <intrin.h>  | 
32  |  | #endif  | 
33  |  |  | 
34  |  | #include <cstddef>  | 
35  |  | #include <cstdlib>  | 
36  |  | #include <cstring>  | 
37  |  | #include <ctime>  | 
38  |  | #include <limits>  | 
39  |  | #include <string>  | 
40  |  | #include <type_traits>  | 
41  |  |  | 
42  |  | #include "absl/base/config.h"  | 
43  |  | #include "absl/base/internal/endian.h"  | 
44  |  | #include "absl/base/macros.h"  | 
45  |  | #include "absl/base/port.h"  | 
46  |  | #include "absl/numeric/bits.h"  | 
47  |  | #include "absl/numeric/int128.h"  | 
48  |  | #include "absl/strings/string_view.h"  | 
49  |  |  | 
50  |  | namespace absl { | 
51  |  | ABSL_NAMESPACE_BEGIN  | 
52  |  |  | 
53  |  | // SimpleAtoi()  | 
54  |  | //  | 
55  |  | // Converts the given string (optionally followed or preceded by ASCII  | 
56  |  | // whitespace) into an integer value, returning `true` if successful. The string  | 
57  |  | // must reflect a base-10 integer whose value falls within the range of the  | 
58  |  | // integer type (optionally preceded by a `+` or `-`). If any errors are  | 
59  |  | // encountered, this function returns `false`, leaving `out` in an unspecified  | 
60  |  | // state.  | 
61  |  | template <typename int_type>  | 
62  |  | ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view str, int_type* out);  | 
63  |  |  | 
64  |  | // SimpleAtof()  | 
65  |  | //  | 
66  |  | // Converts the given string (optionally followed or preceded by ASCII  | 
67  |  | // whitespace) into a float, which may be rounded on overflow or underflow,  | 
68  |  | // returning `true` if successful.  | 
69  |  | // See https://en.cppreference.com/w/c/string/byte/strtof for details about the  | 
70  |  | // allowed formats for `str`, except SimpleAtof() is locale-independent and will  | 
71  |  | // always use the "C" locale. If any errors are encountered, this function  | 
72  |  | // returns `false`, leaving `out` in an unspecified state.  | 
73  |  | ABSL_MUST_USE_RESULT bool SimpleAtof(absl::string_view str, float* out);  | 
74  |  |  | 
75  |  | // SimpleAtod()  | 
76  |  | //  | 
77  |  | // Converts the given string (optionally followed or preceded by ASCII  | 
78  |  | // whitespace) into a double, which may be rounded on overflow or underflow,  | 
79  |  | // returning `true` if successful.  | 
80  |  | // See https://en.cppreference.com/w/c/string/byte/strtof for details about the  | 
81  |  | // allowed formats for `str`, except SimpleAtod is locale-independent and will  | 
82  |  | // always use the "C" locale. If any errors are encountered, this function  | 
83  |  | // returns `false`, leaving `out` in an unspecified state.  | 
84  |  | ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* out);  | 
85  |  |  | 
86  |  | // SimpleAtob()  | 
87  |  | //  | 
88  |  | // Converts the given string into a boolean, returning `true` if successful.  | 
89  |  | // The following case-insensitive strings are interpreted as boolean `true`:  | 
90  |  | // "true", "t", "yes", "y", "1". The following case-insensitive strings  | 
91  |  | // are interpreted as boolean `false`: "false", "f", "no", "n", "0". If any  | 
92  |  | // errors are encountered, this function returns `false`, leaving `out` in an  | 
93  |  | // unspecified state.  | 
94  |  | ABSL_MUST_USE_RESULT bool SimpleAtob(absl::string_view str, bool* out);  | 
95  |  |  | 
96  |  | // SimpleHexAtoi()  | 
97  |  | //  | 
98  |  | // Converts a hexadecimal string (optionally followed or preceded by ASCII  | 
99  |  | // whitespace) to an integer, returning `true` if successful. Only valid base-16  | 
100  |  | // hexadecimal integers whose value falls within the range of the integer type  | 
101  |  | // (optionally preceded by a `+` or `-`) can be converted. A valid hexadecimal  | 
102  |  | // value may include both upper and lowercase character symbols, and may  | 
103  |  | // optionally include a leading "0x" (or "0X") number prefix, which is ignored  | 
104  |  | // by this function. If any errors are encountered, this function returns  | 
105  |  | // `false`, leaving `out` in an unspecified state.  | 
106  |  | template <typename int_type>  | 
107  |  | ABSL_MUST_USE_RESULT bool SimpleHexAtoi(absl::string_view str, int_type* out);  | 
108  |  |  | 
109  |  | // Overloads of SimpleHexAtoi() for 128 bit integers.  | 
110  |  | ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str,  | 
111  |  |                                                absl::int128* out);  | 
112  |  | ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str,  | 
113  |  |                                                absl::uint128* out);  | 
114  |  |  | 
115  |  | ABSL_NAMESPACE_END  | 
116  |  | }  // namespace absl  | 
117  |  |  | 
118  |  | // End of public API.  Implementation details follow.  | 
119  |  |  | 
120  |  | namespace absl { | 
121  |  | ABSL_NAMESPACE_BEGIN  | 
122  |  | namespace numbers_internal { | 
123  |  |  | 
124  |  | // Digit conversion.  | 
125  |  | ABSL_DLL extern const char kHexChar[17];  // 0123456789abcdef  | 
126  |  | ABSL_DLL extern const char  | 
127  |  |     kHexTable[513];  // 000102030405060708090a0b0c0d0e0f1011...  | 
128  |  |  | 
129  |  | // Writes a two-character representation of 'i' to 'buf'. 'i' must be in the  | 
130  |  | // range 0 <= i < 100, and buf must have space for two characters. Example:  | 
131  |  | //   char buf[2];  | 
132  |  | //   PutTwoDigits(42, buf);  | 
133  |  | //   // buf[0] == '4'  | 
134  |  | //   // buf[1] == '2'  | 
135  |  | void PutTwoDigits(uint32_t i, char* buf);  | 
136  |  |  | 
137  |  | // safe_strto?() functions for implementing SimpleAtoi()  | 
138  |  |  | 
139  |  | bool safe_strto32_base(absl::string_view text, int32_t* value, int base);  | 
140  |  | bool safe_strto64_base(absl::string_view text, int64_t* value, int base);  | 
141  |  | bool safe_strto128_base(absl::string_view text, absl::int128* value,  | 
142  |  |                          int base);  | 
143  |  | bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base);  | 
144  |  | bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base);  | 
145  |  | bool safe_strtou128_base(absl::string_view text, absl::uint128* value,  | 
146  |  |                          int base);  | 
147  |  |  | 
148  |  | static const int kFastToBufferSize = 32;  | 
149  |  | static const int kSixDigitsToBufferSize = 16;  | 
150  |  |  | 
151  |  | // Helper function for fast formatting of floating-point values.  | 
152  |  | // The result is the same as printf's "%g", a.k.a. "%.6g"; that is, six  | 
153  |  | // significant digits are returned, trailing zeros are removed, and numbers  | 
154  |  | // outside the range 0.0001-999999 are output using scientific notation  | 
155  |  | // (1.23456e+06). This routine is heavily optimized.  | 
156  |  | // Required buffer size is `kSixDigitsToBufferSize`.  | 
157  |  | size_t SixDigitsToBuffer(double d, char* buffer);  | 
158  |  |  | 
159  |  | // These functions are intended for speed. All functions take an output buffer  | 
160  |  | // as an argument and return a pointer to the last byte they wrote, which is the  | 
161  |  | // terminating '\0'. At most `kFastToBufferSize` bytes are written.  | 
162  |  | char* FastIntToBuffer(int32_t, char*);  | 
163  |  | char* FastIntToBuffer(uint32_t, char*);  | 
164  |  | char* FastIntToBuffer(int64_t, char*);  | 
165  |  | char* FastIntToBuffer(uint64_t, char*);  | 
166  |  |  | 
167  |  | // For enums and integer types that are not an exact match for the types above,  | 
168  |  | // use templates to call the appropriate one of the four overloads above.  | 
169  |  | template <typename int_type>  | 
170  | 0  | char* FastIntToBuffer(int_type i, char* buffer) { | 
171  | 0  |   static_assert(sizeof(i) <= 64 / 8,  | 
172  | 0  |                 "FastIntToBuffer works only with 64-bit-or-less integers.");  | 
173  |  |   // TODO(jorg): This signed-ness check is used because it works correctly  | 
174  |  |   // with enums, and it also serves to check that int_type is not a pointer.  | 
175  |  |   // If one day something like std::is_signed<enum E> works, switch to it.  | 
176  |  |   // These conditions are constexpr bools to suppress MSVC warning C4127.  | 
177  | 0  |   constexpr bool kIsSigned = static_cast<int_type>(1) - 2 < 0;  | 
178  | 0  |   constexpr bool kUse64Bit = sizeof(i) > 32 / 8;  | 
179  | 0  |   if (kIsSigned) { | 
180  | 0  |     if (kUse64Bit) { | 
181  | 0  |       return FastIntToBuffer(static_cast<int64_t>(i), buffer);  | 
182  | 0  |     } else { | 
183  | 0  |       return FastIntToBuffer(static_cast<int32_t>(i), buffer);  | 
184  | 0  |     }  | 
185  | 0  |   } else { | 
186  | 0  |     if (kUse64Bit) { | 
187  | 0  |       return FastIntToBuffer(static_cast<uint64_t>(i), buffer);  | 
188  | 0  |     } else { | 
189  | 0  |       return FastIntToBuffer(static_cast<uint32_t>(i), buffer);  | 
190  | 0  |     }  | 
191  | 0  |   }  | 
192  | 0  | } Unexecuted instantiation: char* absl::numbers_internal::FastIntToBuffer<long long>(long long, char*) Unexecuted instantiation: char* absl::numbers_internal::FastIntToBuffer<unsigned long long>(unsigned long long, char*) Unexecuted instantiation: char* absl::numbers_internal::FastIntToBuffer<unsigned char>(unsigned char, char*) Unexecuted instantiation: char* absl::numbers_internal::FastIntToBuffer<char>(char, char*) Unexecuted instantiation: char* absl::numbers_internal::FastIntToBuffer<signed char>(signed char, char*) Unexecuted instantiation: char* absl::numbers_internal::FastIntToBuffer<unsigned short>(unsigned short, char*) Unexecuted instantiation: char* absl::numbers_internal::FastIntToBuffer<short>(short, char*)  | 
193  |  |  | 
194  |  | // Implementation of SimpleAtoi, generalized to support arbitrary base (used  | 
195  |  | // with base different from 10 elsewhere in Abseil implementation).  | 
196  |  | template <typename int_type>  | 
197  |  | ABSL_MUST_USE_RESULT bool safe_strtoi_base(absl::string_view s, int_type* out,  | 
198  | 0  |                                            int base) { | 
199  | 0  |   static_assert(sizeof(*out) == 4 || sizeof(*out) == 8,  | 
200  | 0  |                 "SimpleAtoi works only with 32-bit or 64-bit integers.");  | 
201  | 0  |   static_assert(!std::is_floating_point<int_type>::value,  | 
202  | 0  |                 "Use SimpleAtof or SimpleAtod instead.");  | 
203  | 0  |   bool parsed;  | 
204  |  |   // TODO(jorg): This signed-ness check is used because it works correctly  | 
205  |  |   // with enums, and it also serves to check that int_type is not a pointer.  | 
206  |  |   // If one day something like std::is_signed<enum E> works, switch to it.  | 
207  |  |   // These conditions are constexpr bools to suppress MSVC warning C4127.  | 
208  | 0  |   constexpr bool kIsSigned = static_cast<int_type>(1) - 2 < 0;  | 
209  | 0  |   constexpr bool kUse64Bit = sizeof(*out) == 64 / 8;  | 
210  | 0  |   if (kIsSigned) { | 
211  | 0  |     if (kUse64Bit) { | 
212  | 0  |       int64_t val;  | 
213  | 0  |       parsed = numbers_internal::safe_strto64_base(s, &val, base);  | 
214  | 0  |       *out = static_cast<int_type>(val);  | 
215  | 0  |     } else { | 
216  | 0  |       int32_t val;  | 
217  | 0  |       parsed = numbers_internal::safe_strto32_base(s, &val, base);  | 
218  | 0  |       *out = static_cast<int_type>(val);  | 
219  | 0  |     }  | 
220  | 0  |   } else { | 
221  | 0  |     if (kUse64Bit) { | 
222  | 0  |       uint64_t val;  | 
223  | 0  |       parsed = numbers_internal::safe_strtou64_base(s, &val, base);  | 
224  | 0  |       *out = static_cast<int_type>(val);  | 
225  | 0  |     } else { | 
226  | 0  |       uint32_t val;  | 
227  | 0  |       parsed = numbers_internal::safe_strtou32_base(s, &val, base);  | 
228  | 0  |       *out = static_cast<int_type>(val);  | 
229  | 0  |     }  | 
230  | 0  |   }  | 
231  | 0  |   return parsed;  | 
232  | 0  | } Unexecuted instantiation: bool absl::numbers_internal::safe_strtoi_base<int>(absl::string_view, int*, int) Unexecuted instantiation: bool absl::numbers_internal::safe_strtoi_base<unsigned int>(absl::string_view, unsigned int*, int) Unexecuted instantiation: bool absl::numbers_internal::safe_strtoi_base<long>(absl::string_view, long*, int) Unexecuted instantiation: bool absl::numbers_internal::safe_strtoi_base<unsigned long>(absl::string_view, unsigned long*, int) Unexecuted instantiation: bool absl::numbers_internal::safe_strtoi_base<long long>(absl::string_view, long long*, int) Unexecuted instantiation: bool absl::numbers_internal::safe_strtoi_base<unsigned long long>(absl::string_view, unsigned long long*, int)  | 
233  |  |  | 
234  |  | // FastHexToBufferZeroPad16()  | 
235  |  | //  | 
236  |  | // Outputs `val` into `out` as if by `snprintf(out, 17, "%016x", val)` but  | 
237  |  | // without the terminating null character. Thus `out` must be of length >= 16.  | 
238  |  | // Returns the number of non-pad digits of the output (it can never be zero  | 
239  |  | // since 0 has one digit).  | 
240  | 0  | inline size_t FastHexToBufferZeroPad16(uint64_t val, char* out) { | 
241  | 0  | #ifdef ABSL_INTERNAL_HAVE_SSSE3  | 
242  | 0  |   uint64_t be = absl::big_endian::FromHost64(val);  | 
243  | 0  |   const auto kNibbleMask = _mm_set1_epi8(0xf);  | 
244  | 0  |   const auto kHexDigits = _mm_setr_epi8('0', '1', '2', '3', '4', '5', '6', '7', | 
245  | 0  |                                         '8', '9', 'a', 'b', 'c', 'd', 'e', 'f');  | 
246  | 0  |   auto v = _mm_loadl_epi64(reinterpret_cast<__m128i*>(&be));  // load lo dword  | 
247  | 0  |   auto v4 = _mm_srli_epi64(v, 4);                            // shift 4 right  | 
248  | 0  |   auto il = _mm_unpacklo_epi8(v4, v);                        // interleave bytes  | 
249  | 0  |   auto m = _mm_and_si128(il, kNibbleMask);                   // mask out nibbles  | 
250  | 0  |   auto hexchars = _mm_shuffle_epi8(kHexDigits, m);           // hex chars  | 
251  | 0  |   _mm_storeu_si128(reinterpret_cast<__m128i*>(out), hexchars);  | 
252  | 0  | #else  | 
253  | 0  |   for (int i = 0; i < 8; ++i) { | 
254  | 0  |     auto byte = (val >> (56 - 8 * i)) & 0xFF;  | 
255  | 0  |     auto* hex = &absl::numbers_internal::kHexTable[byte * 2];  | 
256  | 0  |     std::memcpy(out + 2 * i, hex, 2);  | 
257  | 0  |   }  | 
258  | 0  | #endif  | 
259  | 0  |   // | 0x1 so that even 0 has 1 digit.  | 
260  | 0  |   return 16 - static_cast<size_t>(countl_zero(val | 0x1) / 4);  | 
261  | 0  | }  | 
262  |  |  | 
263  |  | }  // namespace numbers_internal  | 
264  |  |  | 
265  |  | template <typename int_type>  | 
266  |  | ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view str, int_type* out) { | 
267  |  |   return numbers_internal::safe_strtoi_base(str, out, 10);  | 
268  |  | }  | 
269  |  |  | 
270  |  | ABSL_MUST_USE_RESULT inline bool SimpleAtoi(absl::string_view str,  | 
271  | 0  |                                             absl::int128* out) { | 
272  | 0  |   return numbers_internal::safe_strto128_base(str, out, 10);  | 
273  | 0  | }  | 
274  |  |  | 
275  |  | ABSL_MUST_USE_RESULT inline bool SimpleAtoi(absl::string_view str,  | 
276  | 0  |                                             absl::uint128* out) { | 
277  | 0  |   return numbers_internal::safe_strtou128_base(str, out, 10);  | 
278  | 0  | }  | 
279  |  |  | 
280  |  | template <typename int_type>  | 
281  |  | ABSL_MUST_USE_RESULT bool SimpleHexAtoi(absl::string_view str, int_type* out) { | 
282  |  |   return numbers_internal::safe_strtoi_base(str, out, 16);  | 
283  |  | }  | 
284  |  |  | 
285  |  | ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str,  | 
286  | 0  |                                                absl::int128* out) { | 
287  | 0  |   return numbers_internal::safe_strto128_base(str, out, 16);  | 
288  | 0  | }  | 
289  |  |  | 
290  |  | ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str,  | 
291  | 0  |                                                absl::uint128* out) { | 
292  | 0  |   return numbers_internal::safe_strtou128_base(str, out, 16);  | 
293  | 0  | }  | 
294  |  |  | 
295  |  | ABSL_NAMESPACE_END  | 
296  |  | }  // namespace absl  | 
297  |  |  | 
298  |  | #endif  // ABSL_STRINGS_NUMBERS_H_  |