/src/parallel-hashmap/parallel_hashmap/phmap_bits.h
Line  | Count  | Source  | 
1  |  | #if !defined(phmap_bits_h_guard_)  | 
2  |  | #define phmap_bits_h_guard_  | 
3  |  |  | 
4  |  | // ---------------------------------------------------------------------------  | 
5  |  | // Copyright (c) 2019, Gregory Popovitch - greg7mdp@gmail.com  | 
6  |  | //  | 
7  |  | // Licensed under the Apache License, Version 2.0 (the "License");  | 
8  |  | // you may not use this file except in compliance with the License.  | 
9  |  | // You may obtain a copy of the License at  | 
10  |  | //  | 
11  |  | //      https://www.apache.org/licenses/LICENSE-2.0  | 
12  |  | //  | 
13  |  | // Unless required by applicable law or agreed to in writing, software  | 
14  |  | // distributed under the License is distributed on an "AS IS" BASIS,  | 
15  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
16  |  | // See the License for the specific language governing permissions and  | 
17  |  | // limitations under the License.  | 
18  |  | //  | 
19  |  | // Includes work from abseil-cpp (https://github.com/abseil/abseil-cpp)  | 
20  |  | // with modifications.  | 
21  |  | //   | 
22  |  | // Copyright 2018 The Abseil Authors.  | 
23  |  | //  | 
24  |  | // Licensed under the Apache License, Version 2.0 (the "License");  | 
25  |  | // you may not use this file except in compliance with the License.  | 
26  |  | // You may obtain a copy of the License at  | 
27  |  | //  | 
28  |  | //      https://www.apache.org/licenses/LICENSE-2.0  | 
29  |  | //  | 
30  |  | // Unless required by applicable law or agreed to in writing, software  | 
31  |  | // distributed under the License is distributed on an "AS IS" BASIS,  | 
32  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
33  |  | // See the License for the specific language governing permissions and  | 
34  |  | // limitations under the License.  | 
35  |  | // ---------------------------------------------------------------------------  | 
36  |  |  | 
37  |  | // The following guarantees declaration of the byte swap functions  | 
38  |  | #ifdef _MSC_VER  | 
39  |  |     #include <stdlib.h>  // NOLINT(build/include)  | 
40  |  | #elif defined(__APPLE__)  | 
41  |  |     // Mac OS X / Darwin features  | 
42  |  |     #include <libkern/OSByteOrder.h>  | 
43  |  | #elif defined(__FreeBSD__)  | 
44  |  |     #include <sys/endian.h>  | 
45  |  | #elif defined(__GLIBC__)  | 
46  |  |     #include <byteswap.h>  // IWYU pragma: export  | 
47  |  | #endif  | 
48  |  |  | 
49  |  | #include <string.h>  | 
50  |  | #include <cstdint>  | 
51  |  | #include "phmap_config.h"  | 
52  |  |  | 
53  |  | #ifdef _MSC_VER  | 
54  |  |     #pragma warning(push)    | 
55  |  |     #pragma warning(disable : 4514) // unreferenced inline function has been removed  | 
56  |  | #endif  | 
57  |  |  | 
58  |  | // -----------------------------------------------------------------------------  | 
59  |  | // unaligned APIs  | 
60  |  | // -----------------------------------------------------------------------------  | 
61  |  | // Portable handling of unaligned loads, stores, and copies.  | 
62  |  | // On some platforms, like ARM, the copy functions can be more efficient  | 
63  |  | // then a load and a store.  | 
64  |  | // -----------------------------------------------------------------------------  | 
65  |  |  | 
66  |  | #if defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) ||\  | 
67  |  |     defined(MEMORY_SANITIZER)  | 
68  |  | #include <stdint.h>  | 
69  |  |  | 
70  |  | extern "C" { | 
71  |  |     uint16_t __sanitizer_unaligned_load16(const void *p);  | 
72  |  |     uint32_t __sanitizer_unaligned_load32(const void *p);  | 
73  |  |     uint64_t __sanitizer_unaligned_load64(const void *p);  | 
74  |  |     void __sanitizer_unaligned_store16(void *p, uint16_t v);  | 
75  |  |     void __sanitizer_unaligned_store32(void *p, uint32_t v);  | 
76  |  |     void __sanitizer_unaligned_store64(void *p, uint64_t v);  | 
77  |  | }  // extern "C"  | 
78  |  |  | 
79  |  | namespace phmap { | 
80  |  | namespace bits { | 
81  |  |  | 
82  |  | inline uint16_t UnalignedLoad16(const void *p) { | 
83  |  |   return __sanitizer_unaligned_load16(p);  | 
84  |  | }  | 
85  |  |  | 
86  |  | inline uint32_t UnalignedLoad32(const void *p) { | 
87  |  |   return __sanitizer_unaligned_load32(p);  | 
88  |  | }  | 
89  |  |  | 
90  |  | inline uint64_t UnalignedLoad64(const void *p) { | 
91  |  |   return __sanitizer_unaligned_load64(p);  | 
92  |  | }  | 
93  |  |  | 
94  |  | inline void UnalignedStore16(void *p, uint16_t v) { | 
95  |  |   __sanitizer_unaligned_store16(p, v);  | 
96  |  | }  | 
97  |  |  | 
98  |  | inline void UnalignedStore32(void *p, uint32_t v) { | 
99  |  |   __sanitizer_unaligned_store32(p, v);  | 
100  |  | }  | 
101  |  |  | 
102  |  | inline void UnalignedStore64(void *p, uint64_t v) { | 
103  |  |   __sanitizer_unaligned_store64(p, v);  | 
104  |  | }  | 
105  |  |  | 
106  |  | }  // namespace bits  | 
107  |  | }  // namespace phmap  | 
108  |  |  | 
109  |  | #define PHMAP_INTERNAL_UNALIGNED_LOAD16(_p) (phmap::bits::UnalignedLoad16(_p))  | 
110  |  | #define PHMAP_INTERNAL_UNALIGNED_LOAD32(_p) (phmap::bits::UnalignedLoad32(_p))  | 
111  |  | #define PHMAP_INTERNAL_UNALIGNED_LOAD64(_p) (phmap::bits::UnalignedLoad64(_p))  | 
112  |  |  | 
113  |  | #define PHMAP_INTERNAL_UNALIGNED_STORE16(_p, _val) (phmap::bits::UnalignedStore16(_p, _val))  | 
114  |  | #define PHMAP_INTERNAL_UNALIGNED_STORE32(_p, _val) (phmap::bits::UnalignedStore32(_p, _val))  | 
115  |  | #define PHMAP_INTERNAL_UNALIGNED_STORE64(_p, _val) (phmap::bits::UnalignedStore64(_p, _val))  | 
116  |  |  | 
117  |  | #else  | 
118  |  |  | 
119  |  | namespace phmap { | 
120  |  | namespace bits { | 
121  |  |  | 
122  | 0  | inline uint16_t UnalignedLoad16(const void *p) { | 
123  | 0  |   uint16_t t;  | 
124  | 0  |   memcpy(&t, p, sizeof t);  | 
125  | 0  |   return t;  | 
126  | 0  | }  | 
127  |  |  | 
128  | 0  | inline uint32_t UnalignedLoad32(const void *p) { | 
129  | 0  |   uint32_t t;  | 
130  | 0  |   memcpy(&t, p, sizeof t);  | 
131  | 0  |   return t;  | 
132  | 0  | }  | 
133  |  |  | 
134  | 0  | inline uint64_t UnalignedLoad64(const void *p) { | 
135  | 0  |   uint64_t t;  | 
136  | 0  |   memcpy(&t, p, sizeof t);  | 
137  | 0  |   return t;  | 
138  | 0  | }  | 
139  |  |  | 
140  | 0  | inline void UnalignedStore16(void *p, uint16_t v) { memcpy(p, &v, sizeof v); } | 
141  |  |  | 
142  | 0  | inline void UnalignedStore32(void *p, uint32_t v) { memcpy(p, &v, sizeof v); } | 
143  |  |  | 
144  | 0  | inline void UnalignedStore64(void *p, uint64_t v) { memcpy(p, &v, sizeof v); } | 
145  |  |  | 
146  |  | }  // namespace bits  | 
147  |  | }  // namespace phmap  | 
148  |  |  | 
149  |  | #define PHMAP_INTERNAL_UNALIGNED_LOAD16(_p) (phmap::bits::UnalignedLoad16(_p))  | 
150  |  | #define PHMAP_INTERNAL_UNALIGNED_LOAD32(_p) (phmap::bits::UnalignedLoad32(_p))  | 
151  |  | #define PHMAP_INTERNAL_UNALIGNED_LOAD64(_p) (phmap::bits::UnalignedLoad64(_p))  | 
152  |  |  | 
153  |  | #define PHMAP_INTERNAL_UNALIGNED_STORE16(_p, _val) (phmap::bits::UnalignedStore16(_p, _val))  | 
154  |  | #define PHMAP_INTERNAL_UNALIGNED_STORE32(_p, _val) (phmap::bits::UnalignedStore32(_p, _val))  | 
155  |  | #define PHMAP_INTERNAL_UNALIGNED_STORE64(_p, _val) (phmap::bits::UnalignedStore64(_p, _val))  | 
156  |  |  | 
157  |  | #endif  | 
158  |  |  | 
159  |  | // -----------------------------------------------------------------------------  | 
160  |  | // File: optimization.h  | 
161  |  | // -----------------------------------------------------------------------------  | 
162  |  |  | 
163  |  | #if defined(__pnacl__)  | 
164  |  |     #define PHMAP_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; } | 
165  |  | #elif defined(__clang__)  | 
166  |  |     // Clang will not tail call given inline volatile assembly.  | 
167  |  |     #define PHMAP_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("") | 
168  |  | #elif defined(__GNUC__)  | 
169  |  |     // GCC will not tail call given inline volatile assembly.  | 
170  |  |     #define PHMAP_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("") | 
171  |  | #elif defined(_MSC_VER)  | 
172  |  |     #include <intrin.h>  | 
173  |  |     // The __nop() intrinsic blocks the optimisation.  | 
174  |  |     #define PHMAP_BLOCK_TAIL_CALL_OPTIMIZATION() __nop()  | 
175  |  | #else  | 
176  |  |     #define PHMAP_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; } | 
177  |  | #endif  | 
178  |  |  | 
179  |  | #if defined(__GNUC__)  | 
180  |  |     // Cache line alignment  | 
181  |  |     #if defined(__i386__) || defined(__x86_64__)  | 
182  |  |         #define PHMAP_CACHELINE_SIZE 64  | 
183  |  |     #elif defined(__powerpc64__)  | 
184  |  |         #define PHMAP_CACHELINE_SIZE 128  | 
185  |  |     #elif defined(__aarch64__)  | 
186  |  |         // We would need to read special register ctr_el0 to find out L1 dcache size.  | 
187  |  |         // This value is a good estimate based on a real aarch64 machine.  | 
188  |  |         #define PHMAP_CACHELINE_SIZE 64  | 
189  |  |     #elif defined(__arm__)  | 
190  |  |         // Cache line sizes for ARM: These values are not strictly correct since  | 
191  |  |         // cache line sizes depend on implementations, not architectures.  There  | 
192  |  |         // are even implementations with cache line sizes configurable at boot  | 
193  |  |         // time.  | 
194  |  |         #if defined(__ARM_ARCH_5T__)  | 
195  |  |             #define PHMAP_CACHELINE_SIZE 32  | 
196  |  |         #elif defined(__ARM_ARCH_7A__)  | 
197  |  |             #define PHMAP_CACHELINE_SIZE 64  | 
198  |  |         #endif  | 
199  |  |     #endif  | 
200  |  |  | 
201  |  |     #ifndef PHMAP_CACHELINE_SIZE  | 
202  |  |         // A reasonable default guess.  Note that overestimates tend to waste more  | 
203  |  |         // space, while underestimates tend to waste more time.  | 
204  |  |         #define PHMAP_CACHELINE_SIZE 64  | 
205  |  |     #endif  | 
206  |  |  | 
207  |  |     #define PHMAP_CACHELINE_ALIGNED __attribute__((aligned(PHMAP_CACHELINE_SIZE)))  | 
208  |  | #elif defined(_MSC_VER)  | 
209  |  |     #define PHMAP_CACHELINE_SIZE 64  | 
210  |  |     #define PHMAP_CACHELINE_ALIGNED __declspec(align(PHMAP_CACHELINE_SIZE))  | 
211  |  | #else  | 
212  |  |     #define PHMAP_CACHELINE_SIZE 64  | 
213  |  |     #define PHMAP_CACHELINE_ALIGNED  | 
214  |  | #endif  | 
215  |  |  | 
216  |  |  | 
217  |  | #if PHMAP_HAVE_BUILTIN(__builtin_expect) || \  | 
218  |  |     (defined(__GNUC__) && !defined(__clang__))  | 
219  | 541k  |     #define PHMAP_PREDICT_FALSE(x) (__builtin_expect(x, 0))  | 
220  | 15.5M  |     #define PHMAP_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))  | 
221  |  | #else  | 
222  |  |     #define PHMAP_PREDICT_FALSE(x) (x)  | 
223  |  |     #define PHMAP_PREDICT_TRUE(x) (x)  | 
224  |  | #endif  | 
225  |  |  | 
226  |  | // -----------------------------------------------------------------------------  | 
227  |  | // File: bits.h  | 
228  |  | // -----------------------------------------------------------------------------  | 
229  |  |  | 
230  |  | #if defined(_MSC_VER)  | 
231  |  |     // We can achieve something similar to attribute((always_inline)) with MSVC by  | 
232  |  |     // using the __forceinline keyword, however this is not perfect. MSVC is  | 
233  |  |     // much less aggressive about inlining, and even with the __forceinline keyword.  | 
234  |  |     #define PHMAP_BASE_INTERNAL_FORCEINLINE __forceinline  | 
235  |  | #else  | 
236  |  |     // Use default attribute inline.  | 
237  |  |     #define PHMAP_BASE_INTERNAL_FORCEINLINE inline PHMAP_ATTRIBUTE_ALWAYS_INLINE  | 
238  |  | #endif  | 
239  |  |  | 
240  |  |  | 
241  |  | namespace phmap { | 
242  |  |  | 
243  |  | #if defined(__GNUC__)  | 
244  |  |     #pragma GCC diagnostic push  | 
245  |  |     #pragma GCC diagnostic ignored "-Wpedantic"  | 
246  |  | #endif  | 
247  |  |  | 
248  |  | #ifdef PHMAP_HAVE_INTRINSIC_INT128  | 
249  |  |     __extension__ typedef unsigned __int128 phmap_uint128;  | 
250  |  |     inline uint64_t umul128(uint64_t a, uint64_t b, uint64_t* high)  | 
251  | 12.8M  |     { | 
252  | 12.8M  |         auto result = static_cast<phmap_uint128>(a) * static_cast<phmap_uint128>(b);  | 
253  | 12.8M  |         *high = static_cast<uint64_t>(result >> 64);  | 
254  | 12.8M  |         return static_cast<uint64_t>(result);  | 
255  | 12.8M  |     }  | 
256  |  |     #define PHMAP_HAS_UMUL128 1  | 
257  |  | #elif (defined(_MSC_VER))  | 
258  |  |     #if defined(_M_X64)  | 
259  |  |         #pragma intrinsic(_umul128)  | 
260  |  |         inline uint64_t umul128(uint64_t a, uint64_t b, uint64_t* high)  | 
261  |  |         { | 
262  |  |             return _umul128(a, b, high);  | 
263  |  |         }  | 
264  |  |         #define PHMAP_HAS_UMUL128 1  | 
265  |  |     #endif  | 
266  |  | #endif  | 
267  |  |  | 
268  |  | #if defined(__GNUC__)  | 
269  |  |     #pragma GCC diagnostic pop  | 
270  |  | #endif  | 
271  |  |  | 
272  |  | namespace base_internal { | 
273  |  |  | 
274  | 0  | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountLeadingZeros64Slow(uint64_t n) { | 
275  | 0  |     int zeroes = 60;  | 
276  | 0  |     if (n >> 32) zeroes -= 32, n >>= 32;  | 
277  | 0  |     if (n >> 16) zeroes -= 16, n >>= 16;  | 
278  | 0  |     if (n >> 8) zeroes -= 8, n >>= 8;  | 
279  | 0  |     if (n >> 4) zeroes -= 4, n >>= 4;  | 
280  | 0  |     return (uint32_t)("\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes); | 
281  | 0  | }  | 
282  |  |  | 
283  | 0  | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountLeadingZeros64(uint64_t n) { | 
284  | 0  | #if defined(_MSC_VER) && defined(_M_X64)  | 
285  | 0  |     // MSVC does not have __buitin_clzll. Use _BitScanReverse64.  | 
286  | 0  |     unsigned long result = 0;  // NOLINT(runtime/int)  | 
287  | 0  |     if (_BitScanReverse64(&result, n)) { | 
288  | 0  |         return (uint32_t)(63 - result);  | 
289  | 0  |     }  | 
290  | 0  |     return 64;  | 
291  | 0  | #elif defined(_MSC_VER) && !defined(__clang__)  | 
292  | 0  |     // MSVC does not have __buitin_clzll. Compose two calls to _BitScanReverse  | 
293  | 0  |     unsigned long result = 0;  // NOLINT(runtime/int)  | 
294  | 0  |     if ((n >> 32) && _BitScanReverse(&result, (unsigned long)(n >> 32))) { | 
295  | 0  |         return  (uint32_t)(31 - result);  | 
296  | 0  |     }  | 
297  | 0  |     if (_BitScanReverse(&result, (unsigned long)n)) { | 
298  | 0  |         return (uint32_t)(63 - result);  | 
299  | 0  |     }  | 
300  | 0  |     return 64;  | 
301  | 0  | #elif defined(__GNUC__) || defined(__clang__)  | 
302  | 0  |     // Use __builtin_clzll, which uses the following instructions:  | 
303  | 0  |     //  x86: bsr  | 
304  | 0  |     //  ARM64: clz  | 
305  | 0  |     //  PPC: cntlzd  | 
306  | 0  |     static_assert(sizeof(unsigned long long) == sizeof(n),  // NOLINT(runtime/int)  | 
307  | 0  |                   "__builtin_clzll does not take 64-bit arg");  | 
308  | 0  | 
  | 
309  | 0  |     // Handle 0 as a special case because __builtin_clzll(0) is undefined.  | 
310  | 0  |     if (n == 0) { | 
311  | 0  |         return 64;  | 
312  | 0  |     }  | 
313  | 0  |     return  (uint32_t)__builtin_clzll(n);  | 
314  | 0  | #else  | 
315  | 0  |     return CountLeadingZeros64Slow(n);  | 
316  | 0  | #endif  | 
317  | 0  | }  | 
318  |  |  | 
319  | 0  | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountLeadingZeros32Slow(uint64_t n) { | 
320  | 0  |     uint32_t zeroes = 28;  | 
321  | 0  |     if (n >> 16) zeroes -= 16, n >>= 16;  | 
322  | 0  |     if (n >> 8) zeroes -= 8, n >>= 8;  | 
323  | 0  |     if (n >> 4) zeroes -= 4, n >>= 4;  | 
324  | 0  |     return static_cast<uint32_t>("\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n]) + zeroes; | 
325  | 0  | }  | 
326  |  |  | 
327  | 0  | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountLeadingZeros32(uint32_t n) { | 
328  | 0  | #if defined(_MSC_VER) && !defined(__clang__)  | 
329  | 0  |     unsigned long result = 0;  // NOLINT(runtime/int)  | 
330  | 0  |     if (_BitScanReverse(&result, n)) { | 
331  | 0  |         return (uint32_t)(31 - result);  | 
332  | 0  |     }  | 
333  | 0  |     return 32;  | 
334  | 0  | #elif defined(__GNUC__) || defined(__clang__)  | 
335  | 0  |     // Use __builtin_clz, which uses the following instructions:  | 
336  | 0  |     //  x86: bsr  | 
337  | 0  |     //  ARM64: clz  | 
338  | 0  |     //  PPC: cntlzd  | 
339  | 0  |     static_assert(sizeof(int) == sizeof(n),  | 
340  | 0  |                   "__builtin_clz does not take 32-bit arg");  | 
341  | 0  | 
  | 
342  | 0  |     // Handle 0 as a special case because __builtin_clz(0) is undefined.  | 
343  | 0  |     if (n == 0) { | 
344  | 0  |         return 32;  | 
345  | 0  |     }  | 
346  | 0  |     return static_cast<uint32_t>(__builtin_clz(n));  | 
347  | 0  | #else  | 
348  | 0  |     return CountLeadingZeros32Slow(n);  | 
349  | 0  | #endif  | 
350  | 0  | }  | 
351  |  |  | 
352  | 0  | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountTrailingZerosNonZero64Slow(uint64_t n) { | 
353  | 0  |     uint32_t c = 63;  | 
354  | 0  |     n &= ~n + 1;  | 
355  | 0  |     if (n & 0x00000000FFFFFFFF) c -= 32;  | 
356  | 0  |     if (n & 0x0000FFFF0000FFFF) c -= 16;  | 
357  | 0  |     if (n & 0x00FF00FF00FF00FF) c -= 8;  | 
358  | 0  |     if (n & 0x0F0F0F0F0F0F0F0F) c -= 4;  | 
359  | 0  |     if (n & 0x3333333333333333) c -= 2;  | 
360  | 0  |     if (n & 0x5555555555555555) c -= 1;  | 
361  | 0  |     return c;  | 
362  | 0  | }  | 
363  |  |  | 
364  | 0  | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountTrailingZerosNonZero64(uint64_t n) { | 
365  | 0  | #if defined(_MSC_VER) && !defined(__clang__) && defined(_M_X64)  | 
366  | 0  |     unsigned long result = 0;  // NOLINT(runtime/int)  | 
367  | 0  |     _BitScanForward64(&result, n);  | 
368  | 0  |     return (uint32_t)result;  | 
369  | 0  | #elif defined(_MSC_VER) && !defined(__clang__)  | 
370  | 0  |     unsigned long result = 0;  // NOLINT(runtime/int)  | 
371  | 0  |     if (static_cast<uint32_t>(n) == 0) { | 
372  | 0  |         _BitScanForward(&result, (unsigned long)(n >> 32));  | 
373  | 0  |         return result + 32;  | 
374  | 0  |     }  | 
375  | 0  |     _BitScanForward(&result, (unsigned long)n);  | 
376  | 0  |     return result;  | 
377  | 0  | #elif defined(__GNUC__) || defined(__clang__)  | 
378  | 0  |     static_assert(sizeof(unsigned long long) == sizeof(n),  // NOLINT(runtime/int)  | 
379  | 0  |                   "__builtin_ctzll does not take 64-bit arg");  | 
380  | 0  |     return static_cast<uint32_t>(__builtin_ctzll(n));  | 
381  | 0  | #else  | 
382  | 0  |     return CountTrailingZerosNonZero64Slow(n);  | 
383  | 0  | #endif  | 
384  | 0  | }  | 
385  |  |  | 
386  | 0  | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountTrailingZerosNonZero32Slow(uint32_t n) { | 
387  | 0  |     uint32_t c = 31;  | 
388  | 0  |     n &= ~n + 1;  | 
389  | 0  |     if (n & 0x0000FFFF) c -= 16;  | 
390  | 0  |     if (n & 0x00FF00FF) c -= 8;  | 
391  | 0  |     if (n & 0x0F0F0F0F) c -= 4;  | 
392  | 0  |     if (n & 0x33333333) c -= 2;  | 
393  | 0  |     if (n & 0x55555555) c -= 1;  | 
394  | 0  |     return c;  | 
395  | 0  | }  | 
396  |  |  | 
397  | 15.6M  | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountTrailingZerosNonZero32(uint32_t n) { | 
398  |  | #if defined(_MSC_VER) && !defined(__clang__)  | 
399  |  |     unsigned long result = 0;  // NOLINT(runtime/int)  | 
400  |  |     _BitScanForward(&result, n);  | 
401  |  |     return (uint32_t)result;  | 
402  |  | #elif defined(__GNUC__) || defined(__clang__)  | 
403  |  |     static_assert(sizeof(int) == sizeof(n),  | 
404  | 15.6M  |                   "__builtin_ctz does not take 32-bit arg");  | 
405  | 15.6M  |     return static_cast<uint32_t>(__builtin_ctz(n));  | 
406  |  | #else  | 
407  |  |     return CountTrailingZerosNonZero32Slow(n);  | 
408  |  | #endif  | 
409  | 15.6M  | }  | 
410  |  |  | 
411  |  | #undef PHMAP_BASE_INTERNAL_FORCEINLINE  | 
412  |  |  | 
413  |  | }  // namespace base_internal  | 
414  |  | }  // namespace phmap  | 
415  |  |  | 
416  |  | // -----------------------------------------------------------------------------  | 
417  |  | // File: endian.h  | 
418  |  | // -----------------------------------------------------------------------------  | 
419  |  |  | 
420  |  | namespace phmap { | 
421  |  |  | 
422  |  | // Use compiler byte-swapping intrinsics if they are available.  32-bit  | 
423  |  | // and 64-bit versions are available in Clang and GCC as of GCC 4.3.0.  | 
424  |  | // The 16-bit version is available in Clang and GCC only as of GCC 4.8.0.  | 
425  |  | // For simplicity, we enable them all only for GCC 4.8.0 or later.  | 
426  |  | #if defined(__clang__) || \  | 
427  |  |     (defined(__GNUC__) && \  | 
428  |  |      ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ >= 5))  | 
429  |  |  | 
430  | 0  |     inline uint64_t gbswap_64(uint64_t host_int) { | 
431  | 0  |         return __builtin_bswap64(host_int);  | 
432  | 0  |     }  | 
433  | 0  |     inline uint32_t gbswap_32(uint32_t host_int) { | 
434  | 0  |         return __builtin_bswap32(host_int);  | 
435  | 0  |     }  | 
436  | 0  |     inline uint16_t gbswap_16(uint16_t host_int) { | 
437  | 0  |         return __builtin_bswap16(host_int);  | 
438  | 0  |     }  | 
439  |  |  | 
440  |  | #elif defined(_MSC_VER)  | 
441  |  |  | 
442  |  |     inline uint64_t gbswap_64(uint64_t host_int) { | 
443  |  |         return _byteswap_uint64(host_int);  | 
444  |  |     }  | 
445  |  |     inline uint32_t gbswap_32(uint32_t host_int) { | 
446  |  |         return _byteswap_ulong(host_int);  | 
447  |  |     }  | 
448  |  |     inline uint16_t gbswap_16(uint16_t host_int) { | 
449  |  |         return _byteswap_ushort(host_int);  | 
450  |  |     }  | 
451  |  |  | 
452  |  | #elif defined(__APPLE__)  | 
453  |  |  | 
454  |  |     inline uint64_t gbswap_64(uint64_t host_int) { return OSSwapInt16(host_int); } | 
455  |  |     inline uint32_t gbswap_32(uint32_t host_int) { return OSSwapInt32(host_int); } | 
456  |  |     inline uint16_t gbswap_16(uint16_t host_int) { return OSSwapInt64(host_int); } | 
457  |  |  | 
458  |  | #else  | 
459  |  |  | 
460  |  |     inline uint64_t gbswap_64(uint64_t host_int) { | 
461  |  | #if defined(__GNUC__) && defined(__x86_64__) && !defined(__APPLE__)  | 
462  |  |         // Adapted from /usr/include/byteswap.h.  Not available on Mac.  | 
463  |  |         if (__builtin_constant_p(host_int)) { | 
464  |  |             return __bswap_constant_64(host_int);  | 
465  |  |         } else { | 
466  |  |             uint64_t result;  | 
467  |  |             __asm__("bswap %0" : "=r"(result) : "0"(host_int)); | 
468  |  |             return result;  | 
469  |  |         }  | 
470  |  | #elif defined(__GLIBC__)  | 
471  |  |         return bswap_64(host_int);  | 
472  |  | #else  | 
473  |  |         return (((host_int & uint64_t{0xFF}) << 56) | | 
474  |  |                 ((host_int & uint64_t{0xFF00}) << 40) | | 
475  |  |                 ((host_int & uint64_t{0xFF0000}) << 24) | | 
476  |  |                 ((host_int & uint64_t{0xFF000000}) << 8) | | 
477  |  |                 ((host_int & uint64_t{0xFF00000000}) >> 8) | | 
478  |  |                 ((host_int & uint64_t{0xFF0000000000}) >> 24) | | 
479  |  |                 ((host_int & uint64_t{0xFF000000000000}) >> 40) | | 
480  |  |                 ((host_int & uint64_t{0xFF00000000000000}) >> 56)); | 
481  |  | #endif  // bswap_64  | 
482  |  |     }  | 
483  |  |  | 
484  |  |     inline uint32_t gbswap_32(uint32_t host_int) { | 
485  |  | #if defined(__GLIBC__)  | 
486  |  |         return bswap_32(host_int);  | 
487  |  | #else  | 
488  |  |         return (((host_int & uint32_t{0xFF}) << 24) | | 
489  |  |                 ((host_int & uint32_t{0xFF00}) << 8) | | 
490  |  |                 ((host_int & uint32_t{0xFF0000}) >> 8) | | 
491  |  |                 ((host_int & uint32_t{0xFF000000}) >> 24)); | 
492  |  | #endif  | 
493  |  |     }  | 
494  |  |  | 
495  |  |     inline uint16_t gbswap_16(uint16_t host_int) { | 
496  |  | #if defined(__GLIBC__)  | 
497  |  |         return bswap_16(host_int);  | 
498  |  | #else  | 
499  |  |         return (((host_int & uint16_t{0xFF}) << 8) | | 
500  |  |                 ((host_int & uint16_t{0xFF00}) >> 8)); | 
501  |  | #endif  | 
502  |  |     }  | 
503  |  |  | 
504  |  | #endif  // intrinics available  | 
505  |  |  | 
506  |  | #ifdef PHMAP_IS_LITTLE_ENDIAN  | 
507  |  |  | 
508  |  |     // Definitions for ntohl etc. that don't require us to include  | 
509  |  |     // netinet/in.h. We wrap gbswap_32 and gbswap_16 in functions rather  | 
510  |  |     // than just #defining them because in debug mode, gcc doesn't  | 
511  |  |     // correctly handle the (rather involved) definitions of bswap_32.  | 
512  |  |     // gcc guarantees that inline functions are as fast as macros, so  | 
513  |  |     // this isn't a performance hit.  | 
514  | 0  |     inline uint16_t ghtons(uint16_t x) { return gbswap_16(x); } | 
515  | 0  |     inline uint32_t ghtonl(uint32_t x) { return gbswap_32(x); } | 
516  | 0  |     inline uint64_t ghtonll(uint64_t x) { return gbswap_64(x); } | 
517  |  |  | 
518  |  | #elif defined PHMAP_IS_BIG_ENDIAN  | 
519  |  |  | 
520  |  |     // These definitions are simpler on big-endian machines  | 
521  |  |     // These are functions instead of macros to avoid self-assignment warnings  | 
522  |  |     // on calls such as "i = ghtnol(i);".  This also provides type checking.  | 
523  |  |     inline uint16_t ghtons(uint16_t x) { return x; } | 
524  |  |     inline uint32_t ghtonl(uint32_t x) { return x; } | 
525  |  |     inline uint64_t ghtonll(uint64_t x) { return x; } | 
526  |  |  | 
527  |  | #else  | 
528  |  |     #error \  | 
529  |  |         "Unsupported byte order: Either PHMAP_IS_BIG_ENDIAN or " \  | 
530  |  |            "PHMAP_IS_LITTLE_ENDIAN must be defined"  | 
531  |  | #endif  // byte order  | 
532  |  |  | 
533  | 0  | inline uint16_t gntohs(uint16_t x) { return ghtons(x); } | 
534  | 0  | inline uint32_t gntohl(uint32_t x) { return ghtonl(x); } | 
535  | 0  | inline uint64_t gntohll(uint64_t x) { return ghtonll(x); } | 
536  |  |  | 
537  |  | // Utilities to convert numbers between the current hosts's native byte  | 
538  |  | // order and little-endian byte order  | 
539  |  | //  | 
540  |  | // Load/Store methods are alignment safe  | 
541  |  | namespace little_endian { | 
542  |  | // Conversion functions.  | 
543  |  | #ifdef PHMAP_IS_LITTLE_ENDIAN  | 
544  |  |  | 
545  | 0  |     inline uint16_t FromHost16(uint16_t x) { return x; } | 
546  | 0  |     inline uint16_t ToHost16(uint16_t x) { return x; } | 
547  |  |  | 
548  | 0  |     inline uint32_t FromHost32(uint32_t x) { return x; } | 
549  | 0  |     inline uint32_t ToHost32(uint32_t x) { return x; } | 
550  |  |  | 
551  | 0  |     inline uint64_t FromHost64(uint64_t x) { return x; } | 
552  | 0  |     inline uint64_t ToHost64(uint64_t x) { return x; } | 
553  |  |  | 
554  | 0  |     inline constexpr bool IsLittleEndian() { return true; } | 
555  |  |  | 
556  |  | #elif defined PHMAP_IS_BIG_ENDIAN  | 
557  |  |  | 
558  |  |     inline uint16_t FromHost16(uint16_t x) { return gbswap_16(x); } | 
559  |  |     inline uint16_t ToHost16(uint16_t x) { return gbswap_16(x); } | 
560  |  |  | 
561  |  |     inline uint32_t FromHost32(uint32_t x) { return gbswap_32(x); } | 
562  |  |     inline uint32_t ToHost32(uint32_t x) { return gbswap_32(x); } | 
563  |  |  | 
564  |  |     inline uint64_t FromHost64(uint64_t x) { return gbswap_64(x); } | 
565  |  |     inline uint64_t ToHost64(uint64_t x) { return gbswap_64(x); } | 
566  |  |  | 
567  |  |     inline constexpr bool IsLittleEndian() { return false; } | 
568  |  |  | 
569  |  | #endif /* ENDIAN */  | 
570  |  |  | 
571  |  | // Functions to do unaligned loads and stores in little-endian order.  | 
572  |  | // ------------------------------------------------------------------  | 
573  | 0  | inline uint16_t Load16(const void *p) { | 
574  | 0  |   return ToHost16(PHMAP_INTERNAL_UNALIGNED_LOAD16(p));  | 
575  | 0  | }  | 
576  |  |  | 
577  | 0  | inline void Store16(void *p, uint16_t v) { | 
578  | 0  |   PHMAP_INTERNAL_UNALIGNED_STORE16(p, FromHost16(v));  | 
579  | 0  | }  | 
580  |  |  | 
581  | 0  | inline uint32_t Load32(const void *p) { | 
582  | 0  |   return ToHost32(PHMAP_INTERNAL_UNALIGNED_LOAD32(p));  | 
583  | 0  | }  | 
584  |  |  | 
585  | 0  | inline void Store32(void *p, uint32_t v) { | 
586  | 0  |   PHMAP_INTERNAL_UNALIGNED_STORE32(p, FromHost32(v));  | 
587  | 0  | }  | 
588  |  |  | 
589  | 0  | inline uint64_t Load64(const void *p) { | 
590  | 0  |   return ToHost64(PHMAP_INTERNAL_UNALIGNED_LOAD64(p));  | 
591  | 0  | }  | 
592  |  |  | 
593  | 0  | inline void Store64(void *p, uint64_t v) { | 
594  | 0  |   PHMAP_INTERNAL_UNALIGNED_STORE64(p, FromHost64(v));  | 
595  | 0  | }  | 
596  |  |  | 
597  |  | }  // namespace little_endian  | 
598  |  |  | 
599  |  | // Utilities to convert numbers between the current hosts's native byte  | 
600  |  | // order and big-endian byte order (same as network byte order)  | 
601  |  | //  | 
602  |  | // Load/Store methods are alignment safe  | 
603  |  | namespace big_endian { | 
604  |  | #ifdef PHMAP_IS_LITTLE_ENDIAN  | 
605  |  |  | 
606  | 0  |     inline uint16_t FromHost16(uint16_t x) { return gbswap_16(x); } | 
607  | 0  |     inline uint16_t ToHost16(uint16_t x) { return gbswap_16(x); } | 
608  |  |  | 
609  | 0  |     inline uint32_t FromHost32(uint32_t x) { return gbswap_32(x); } | 
610  | 0  |     inline uint32_t ToHost32(uint32_t x) { return gbswap_32(x); } | 
611  |  |  | 
612  | 0  |     inline uint64_t FromHost64(uint64_t x) { return gbswap_64(x); } | 
613  | 0  |     inline uint64_t ToHost64(uint64_t x) { return gbswap_64(x); } | 
614  |  |  | 
615  | 0  |     inline constexpr bool IsLittleEndian() { return true; } | 
616  |  |  | 
617  |  | #elif defined PHMAP_IS_BIG_ENDIAN  | 
618  |  |  | 
619  |  |     inline uint16_t FromHost16(uint16_t x) { return x; } | 
620  |  |     inline uint16_t ToHost16(uint16_t x) { return x; } | 
621  |  |  | 
622  |  |     inline uint32_t FromHost32(uint32_t x) { return x; } | 
623  |  |     inline uint32_t ToHost32(uint32_t x) { return x; } | 
624  |  |  | 
625  |  |     inline uint64_t FromHost64(uint64_t x) { return x; } | 
626  |  |     inline uint64_t ToHost64(uint64_t x) { return x; } | 
627  |  |  | 
628  |  |     inline constexpr bool IsLittleEndian() { return false; } | 
629  |  |  | 
630  |  | #endif /* ENDIAN */  | 
631  |  |  | 
632  |  | // Functions to do unaligned loads and stores in big-endian order.  | 
633  | 0  | inline uint16_t Load16(const void *p) { | 
634  | 0  |   return ToHost16(PHMAP_INTERNAL_UNALIGNED_LOAD16(p));  | 
635  | 0  | }  | 
636  |  |  | 
637  | 0  | inline void Store16(void *p, uint16_t v) { | 
638  | 0  |   PHMAP_INTERNAL_UNALIGNED_STORE16(p, FromHost16(v));  | 
639  | 0  | }  | 
640  |  |  | 
641  | 0  | inline uint32_t Load32(const void *p) { | 
642  | 0  |   return ToHost32(PHMAP_INTERNAL_UNALIGNED_LOAD32(p));  | 
643  | 0  | }  | 
644  |  |  | 
645  | 0  | inline void Store32(void *p, uint32_t v) { | 
646  | 0  |   PHMAP_INTERNAL_UNALIGNED_STORE32(p, FromHost32(v));  | 
647  | 0  | }  | 
648  |  |  | 
649  | 0  | inline uint64_t Load64(const void *p) { | 
650  | 0  |   return ToHost64(PHMAP_INTERNAL_UNALIGNED_LOAD64(p));  | 
651  | 0  | }  | 
652  |  |  | 
653  | 0  | inline void Store64(void *p, uint64_t v) { | 
654  | 0  |   PHMAP_INTERNAL_UNALIGNED_STORE64(p, FromHost64(v));  | 
655  | 0  | }  | 
656  |  |  | 
657  |  | }  // namespace big_endian  | 
658  |  |  | 
659  |  | }  // namespace phmap  | 
660  |  |  | 
661  |  | #ifdef _MSC_VER  | 
662  |  |      #pragma warning(pop)    | 
663  |  | #endif  | 
664  |  |  | 
665  |  | #endif // phmap_bits_h_guard_  |