/src/parallel-hashmap/parallel_hashmap/phmap_bits.h
Line | Count | Source (jump to first uncovered line) |
1 | | #if !defined(phmap_bits_h_guard_) |
2 | | #define phmap_bits_h_guard_ |
3 | | |
4 | | // --------------------------------------------------------------------------- |
5 | | // Copyright (c) 2019, Gregory Popovitch - greg7mdp@gmail.com |
6 | | // |
7 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | // you may not use this file except in compliance with the License. |
9 | | // You may obtain a copy of the License at |
10 | | // |
11 | | // https://www.apache.org/licenses/LICENSE-2.0 |
12 | | // |
13 | | // Unless required by applicable law or agreed to in writing, software |
14 | | // distributed under the License is distributed on an "AS IS" BASIS, |
15 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | | // See the License for the specific language governing permissions and |
17 | | // limitations under the License. |
18 | | // |
19 | | // Includes work from abseil-cpp (https://github.com/abseil/abseil-cpp) |
20 | | // with modifications. |
21 | | // |
22 | | // Copyright 2018 The Abseil Authors. |
23 | | // |
24 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
25 | | // you may not use this file except in compliance with the License. |
26 | | // You may obtain a copy of the License at |
27 | | // |
28 | | // https://www.apache.org/licenses/LICENSE-2.0 |
29 | | // |
30 | | // Unless required by applicable law or agreed to in writing, software |
31 | | // distributed under the License is distributed on an "AS IS" BASIS, |
32 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
33 | | // See the License for the specific language governing permissions and |
34 | | // limitations under the License. |
35 | | // --------------------------------------------------------------------------- |
36 | | |
37 | | // The following guarantees declaration of the byte swap functions |
38 | | #ifdef _MSC_VER |
39 | | #include <stdlib.h> // NOLINT(build/include) |
40 | | #elif defined(__APPLE__) |
41 | | // Mac OS X / Darwin features |
42 | | #include <libkern/OSByteOrder.h> |
43 | | #elif defined(__FreeBSD__) |
44 | | #include <sys/endian.h> |
45 | | #elif defined(__GLIBC__) |
46 | | #include <byteswap.h> // IWYU pragma: export |
47 | | #endif |
48 | | |
49 | | #include <string.h> |
50 | | #include <cstdint> |
51 | | #include "phmap_config.h" |
52 | | |
53 | | #ifdef _MSC_VER |
54 | | #pragma warning(push) |
55 | | #pragma warning(disable : 4514) // unreferenced inline function has been removed |
56 | | #endif |
57 | | |
58 | | // ----------------------------------------------------------------------------- |
59 | | // unaligned APIs |
60 | | // ----------------------------------------------------------------------------- |
61 | | // Portable handling of unaligned loads, stores, and copies. |
62 | | // On some platforms, like ARM, the copy functions can be more efficient |
63 | | // then a load and a store. |
64 | | // ----------------------------------------------------------------------------- |
65 | | |
66 | | #if defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) ||\ |
67 | | defined(MEMORY_SANITIZER) |
68 | | #include <stdint.h> |
69 | | |
70 | | extern "C" { |
71 | | uint16_t __sanitizer_unaligned_load16(const void *p); |
72 | | uint32_t __sanitizer_unaligned_load32(const void *p); |
73 | | uint64_t __sanitizer_unaligned_load64(const void *p); |
74 | | void __sanitizer_unaligned_store16(void *p, uint16_t v); |
75 | | void __sanitizer_unaligned_store32(void *p, uint32_t v); |
76 | | void __sanitizer_unaligned_store64(void *p, uint64_t v); |
77 | | } // extern "C" |
78 | | |
79 | | namespace phmap { |
80 | | namespace bits { |
81 | | |
82 | | inline uint16_t UnalignedLoad16(const void *p) { |
83 | | return __sanitizer_unaligned_load16(p); |
84 | | } |
85 | | |
86 | | inline uint32_t UnalignedLoad32(const void *p) { |
87 | | return __sanitizer_unaligned_load32(p); |
88 | | } |
89 | | |
90 | | inline uint64_t UnalignedLoad64(const void *p) { |
91 | | return __sanitizer_unaligned_load64(p); |
92 | | } |
93 | | |
94 | | inline void UnalignedStore16(void *p, uint16_t v) { |
95 | | __sanitizer_unaligned_store16(p, v); |
96 | | } |
97 | | |
98 | | inline void UnalignedStore32(void *p, uint32_t v) { |
99 | | __sanitizer_unaligned_store32(p, v); |
100 | | } |
101 | | |
102 | | inline void UnalignedStore64(void *p, uint64_t v) { |
103 | | __sanitizer_unaligned_store64(p, v); |
104 | | } |
105 | | |
106 | | } // namespace bits |
107 | | } // namespace phmap |
108 | | |
109 | | #define PHMAP_INTERNAL_UNALIGNED_LOAD16(_p) (phmap::bits::UnalignedLoad16(_p)) |
110 | | #define PHMAP_INTERNAL_UNALIGNED_LOAD32(_p) (phmap::bits::UnalignedLoad32(_p)) |
111 | | #define PHMAP_INTERNAL_UNALIGNED_LOAD64(_p) (phmap::bits::UnalignedLoad64(_p)) |
112 | | |
113 | | #define PHMAP_INTERNAL_UNALIGNED_STORE16(_p, _val) (phmap::bits::UnalignedStore16(_p, _val)) |
114 | | #define PHMAP_INTERNAL_UNALIGNED_STORE32(_p, _val) (phmap::bits::UnalignedStore32(_p, _val)) |
115 | | #define PHMAP_INTERNAL_UNALIGNED_STORE64(_p, _val) (phmap::bits::UnalignedStore64(_p, _val)) |
116 | | |
117 | | #else |
118 | | |
119 | | namespace phmap { |
120 | | namespace bits { |
121 | | |
122 | 0 | inline uint16_t UnalignedLoad16(const void *p) { |
123 | 0 | uint16_t t; |
124 | 0 | memcpy(&t, p, sizeof t); |
125 | 0 | return t; |
126 | 0 | } |
127 | | |
128 | 0 | inline uint32_t UnalignedLoad32(const void *p) { |
129 | 0 | uint32_t t; |
130 | 0 | memcpy(&t, p, sizeof t); |
131 | 0 | return t; |
132 | 0 | } |
133 | | |
134 | 0 | inline uint64_t UnalignedLoad64(const void *p) { |
135 | 0 | uint64_t t; |
136 | 0 | memcpy(&t, p, sizeof t); |
137 | 0 | return t; |
138 | 0 | } |
139 | | |
140 | 0 | inline void UnalignedStore16(void *p, uint16_t v) { memcpy(p, &v, sizeof v); } |
141 | | |
142 | 0 | inline void UnalignedStore32(void *p, uint32_t v) { memcpy(p, &v, sizeof v); } |
143 | | |
144 | 0 | inline void UnalignedStore64(void *p, uint64_t v) { memcpy(p, &v, sizeof v); } |
145 | | |
146 | | } // namespace bits |
147 | | } // namespace phmap |
148 | | |
149 | | #define PHMAP_INTERNAL_UNALIGNED_LOAD16(_p) (phmap::bits::UnalignedLoad16(_p)) |
150 | | #define PHMAP_INTERNAL_UNALIGNED_LOAD32(_p) (phmap::bits::UnalignedLoad32(_p)) |
151 | | #define PHMAP_INTERNAL_UNALIGNED_LOAD64(_p) (phmap::bits::UnalignedLoad64(_p)) |
152 | | |
153 | | #define PHMAP_INTERNAL_UNALIGNED_STORE16(_p, _val) (phmap::bits::UnalignedStore16(_p, _val)) |
154 | | #define PHMAP_INTERNAL_UNALIGNED_STORE32(_p, _val) (phmap::bits::UnalignedStore32(_p, _val)) |
155 | | #define PHMAP_INTERNAL_UNALIGNED_STORE64(_p, _val) (phmap::bits::UnalignedStore64(_p, _val)) |
156 | | |
157 | | #endif |
158 | | |
159 | | // ----------------------------------------------------------------------------- |
160 | | // File: optimization.h |
161 | | // ----------------------------------------------------------------------------- |
162 | | |
163 | | #if defined(__pnacl__) |
164 | | #define PHMAP_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; } |
165 | | #elif defined(__clang__) |
166 | | // Clang will not tail call given inline volatile assembly. |
167 | | #define PHMAP_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("") |
168 | | #elif defined(__GNUC__) |
169 | | // GCC will not tail call given inline volatile assembly. |
170 | | #define PHMAP_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("") |
171 | | #elif defined(_MSC_VER) |
172 | | #include <intrin.h> |
173 | | // The __nop() intrinsic blocks the optimisation. |
174 | | #define PHMAP_BLOCK_TAIL_CALL_OPTIMIZATION() __nop() |
175 | | #else |
176 | | #define PHMAP_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; } |
177 | | #endif |
178 | | |
179 | | #if defined(__GNUC__) |
180 | | // Cache line alignment |
181 | | #if defined(__i386__) || defined(__x86_64__) |
182 | | #define PHMAP_CACHELINE_SIZE 64 |
183 | | #elif defined(__powerpc64__) |
184 | | #define PHMAP_CACHELINE_SIZE 128 |
185 | | #elif defined(__aarch64__) |
186 | | // We would need to read special register ctr_el0 to find out L1 dcache size. |
187 | | // This value is a good estimate based on a real aarch64 machine. |
188 | | #define PHMAP_CACHELINE_SIZE 64 |
189 | | #elif defined(__arm__) |
190 | | // Cache line sizes for ARM: These values are not strictly correct since |
191 | | // cache line sizes depend on implementations, not architectures. There |
192 | | // are even implementations with cache line sizes configurable at boot |
193 | | // time. |
194 | | #if defined(__ARM_ARCH_5T__) |
195 | | #define PHMAP_CACHELINE_SIZE 32 |
196 | | #elif defined(__ARM_ARCH_7A__) |
197 | | #define PHMAP_CACHELINE_SIZE 64 |
198 | | #endif |
199 | | #endif |
200 | | |
201 | | #ifndef PHMAP_CACHELINE_SIZE |
202 | | // A reasonable default guess. Note that overestimates tend to waste more |
203 | | // space, while underestimates tend to waste more time. |
204 | | #define PHMAP_CACHELINE_SIZE 64 |
205 | | #endif |
206 | | |
207 | | #define PHMAP_CACHELINE_ALIGNED __attribute__((aligned(PHMAP_CACHELINE_SIZE))) |
208 | | #elif defined(_MSC_VER) |
209 | | #define PHMAP_CACHELINE_SIZE 64 |
210 | | #define PHMAP_CACHELINE_ALIGNED __declspec(align(PHMAP_CACHELINE_SIZE)) |
211 | | #else |
212 | | #define PHMAP_CACHELINE_SIZE 64 |
213 | | #define PHMAP_CACHELINE_ALIGNED |
214 | | #endif |
215 | | |
216 | | |
217 | | #if PHMAP_HAVE_BUILTIN(__builtin_expect) || \ |
218 | | (defined(__GNUC__) && !defined(__clang__)) |
219 | 616k | #define PHMAP_PREDICT_FALSE(x) (__builtin_expect(x, 0)) |
220 | 12.3M | #define PHMAP_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) |
221 | | #else |
222 | | #define PHMAP_PREDICT_FALSE(x) (x) |
223 | | #define PHMAP_PREDICT_TRUE(x) (x) |
224 | | #endif |
225 | | |
226 | | // ----------------------------------------------------------------------------- |
227 | | // File: bits.h |
228 | | // ----------------------------------------------------------------------------- |
229 | | |
230 | | #if defined(_MSC_VER) |
231 | | // We can achieve something similar to attribute((always_inline)) with MSVC by |
232 | | // using the __forceinline keyword, however this is not perfect. MSVC is |
233 | | // much less aggressive about inlining, and even with the __forceinline keyword. |
234 | | #define PHMAP_BASE_INTERNAL_FORCEINLINE __forceinline |
235 | | #else |
236 | | // Use default attribute inline. |
237 | | #define PHMAP_BASE_INTERNAL_FORCEINLINE inline PHMAP_ATTRIBUTE_ALWAYS_INLINE |
238 | | #endif |
239 | | |
240 | | |
241 | | namespace phmap { |
242 | | |
243 | | #if defined(__GNUC__) |
244 | | #pragma GCC diagnostic push |
245 | | #pragma GCC diagnostic ignored "-Wpedantic" |
246 | | #endif |
247 | | |
248 | | #ifdef PHMAP_HAVE_INTRINSIC_INT128 |
249 | | __extension__ typedef unsigned __int128 phmap_uint128; |
250 | | inline uint64_t umul128(uint64_t a, uint64_t b, uint64_t* high) |
251 | 10.2M | { |
252 | 10.2M | auto result = static_cast<phmap_uint128>(a) * static_cast<phmap_uint128>(b); |
253 | 10.2M | *high = static_cast<uint64_t>(result >> 64); |
254 | 10.2M | return static_cast<uint64_t>(result); |
255 | 10.2M | } |
256 | | #define PHMAP_HAS_UMUL128 1 |
257 | | #elif (defined(_MSC_VER)) |
258 | | #if defined(_M_X64) |
259 | | #pragma intrinsic(_umul128) |
260 | | inline uint64_t umul128(uint64_t a, uint64_t b, uint64_t* high) |
261 | | { |
262 | | return _umul128(a, b, high); |
263 | | } |
264 | | #define PHMAP_HAS_UMUL128 1 |
265 | | #endif |
266 | | #endif |
267 | | |
268 | | #if defined(__GNUC__) |
269 | | #pragma GCC diagnostic pop |
270 | | #endif |
271 | | |
272 | | namespace base_internal { |
273 | | |
274 | 0 | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountLeadingZeros64Slow(uint64_t n) { |
275 | 0 | int zeroes = 60; |
276 | 0 | if (n >> 32) zeroes -= 32, n >>= 32; |
277 | 0 | if (n >> 16) zeroes -= 16, n >>= 16; |
278 | 0 | if (n >> 8) zeroes -= 8, n >>= 8; |
279 | 0 | if (n >> 4) zeroes -= 4, n >>= 4; |
280 | 0 | return (uint32_t)("\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes); |
281 | 0 | } |
282 | | |
283 | 0 | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountLeadingZeros64(uint64_t n) { |
284 | 0 | #if defined(_MSC_VER) && defined(_M_X64) |
285 | 0 | // MSVC does not have __buitin_clzll. Use _BitScanReverse64. |
286 | 0 | unsigned long result = 0; // NOLINT(runtime/int) |
287 | 0 | if (_BitScanReverse64(&result, n)) { |
288 | 0 | return (uint32_t)(63 - result); |
289 | 0 | } |
290 | 0 | return 64; |
291 | 0 | #elif defined(_MSC_VER) && !defined(__clang__) |
292 | 0 | // MSVC does not have __buitin_clzll. Compose two calls to _BitScanReverse |
293 | 0 | unsigned long result = 0; // NOLINT(runtime/int) |
294 | 0 | if ((n >> 32) && _BitScanReverse(&result, (unsigned long)(n >> 32))) { |
295 | 0 | return (uint32_t)(31 - result); |
296 | 0 | } |
297 | 0 | if (_BitScanReverse(&result, (unsigned long)n)) { |
298 | 0 | return (uint32_t)(63 - result); |
299 | 0 | } |
300 | 0 | return 64; |
301 | 0 | #elif defined(__GNUC__) || defined(__clang__) |
302 | 0 | // Use __builtin_clzll, which uses the following instructions: |
303 | 0 | // x86: bsr |
304 | 0 | // ARM64: clz |
305 | 0 | // PPC: cntlzd |
306 | 0 | static_assert(sizeof(unsigned long long) == sizeof(n), // NOLINT(runtime/int) |
307 | 0 | "__builtin_clzll does not take 64-bit arg"); |
308 | 0 |
|
309 | 0 | // Handle 0 as a special case because __builtin_clzll(0) is undefined. |
310 | 0 | if (n == 0) { |
311 | 0 | return 64; |
312 | 0 | } |
313 | 0 | return (uint32_t)__builtin_clzll(n); |
314 | 0 | #else |
315 | 0 | return CountLeadingZeros64Slow(n); |
316 | 0 | #endif |
317 | 0 | } |
318 | | |
319 | 0 | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountLeadingZeros32Slow(uint64_t n) { |
320 | 0 | uint32_t zeroes = 28; |
321 | 0 | if (n >> 16) zeroes -= 16, n >>= 16; |
322 | 0 | if (n >> 8) zeroes -= 8, n >>= 8; |
323 | 0 | if (n >> 4) zeroes -= 4, n >>= 4; |
324 | 0 | return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes; |
325 | 0 | } |
326 | | |
327 | 0 | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountLeadingZeros32(uint32_t n) { |
328 | 0 | #if defined(_MSC_VER) && !defined(__clang__) |
329 | 0 | unsigned long result = 0; // NOLINT(runtime/int) |
330 | 0 | if (_BitScanReverse(&result, n)) { |
331 | 0 | return (uint32_t)(31 - result); |
332 | 0 | } |
333 | 0 | return 32; |
334 | 0 | #elif defined(__GNUC__) || defined(__clang__) |
335 | 0 | // Use __builtin_clz, which uses the following instructions: |
336 | 0 | // x86: bsr |
337 | 0 | // ARM64: clz |
338 | 0 | // PPC: cntlzd |
339 | 0 | static_assert(sizeof(int) == sizeof(n), |
340 | 0 | "__builtin_clz does not take 32-bit arg"); |
341 | 0 |
|
342 | 0 | // Handle 0 as a special case because __builtin_clz(0) is undefined. |
343 | 0 | if (n == 0) { |
344 | 0 | return 32; |
345 | 0 | } |
346 | 0 | return __builtin_clz(n); |
347 | 0 | #else |
348 | 0 | return CountLeadingZeros32Slow(n); |
349 | 0 | #endif |
350 | 0 | } |
351 | | |
352 | 0 | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountTrailingZerosNonZero64Slow(uint64_t n) { |
353 | 0 | uint32_t c = 63; |
354 | 0 | n &= ~n + 1; |
355 | 0 | if (n & 0x00000000FFFFFFFF) c -= 32; |
356 | 0 | if (n & 0x0000FFFF0000FFFF) c -= 16; |
357 | 0 | if (n & 0x00FF00FF00FF00FF) c -= 8; |
358 | 0 | if (n & 0x0F0F0F0F0F0F0F0F) c -= 4; |
359 | 0 | if (n & 0x3333333333333333) c -= 2; |
360 | 0 | if (n & 0x5555555555555555) c -= 1; |
361 | 0 | return c; |
362 | 0 | } |
363 | | |
364 | 0 | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountTrailingZerosNonZero64(uint64_t n) { |
365 | 0 | #if defined(_MSC_VER) && !defined(__clang__) && defined(_M_X64) |
366 | 0 | unsigned long result = 0; // NOLINT(runtime/int) |
367 | 0 | _BitScanForward64(&result, n); |
368 | 0 | return (uint32_t)result; |
369 | 0 | #elif defined(_MSC_VER) && !defined(__clang__) |
370 | 0 | unsigned long result = 0; // NOLINT(runtime/int) |
371 | 0 | if (static_cast<uint32_t>(n) == 0) { |
372 | 0 | _BitScanForward(&result, (unsigned long)(n >> 32)); |
373 | 0 | return result + 32; |
374 | 0 | } |
375 | 0 | _BitScanForward(&result, (unsigned long)n); |
376 | 0 | return result; |
377 | 0 | #elif defined(__GNUC__) || defined(__clang__) |
378 | 0 | static_assert(sizeof(unsigned long long) == sizeof(n), // NOLINT(runtime/int) |
379 | 0 | "__builtin_ctzll does not take 64-bit arg"); |
380 | 0 | return __builtin_ctzll(n); |
381 | 0 | #else |
382 | 0 | return CountTrailingZerosNonZero64Slow(n); |
383 | 0 | #endif |
384 | 0 | } |
385 | | |
386 | 0 | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountTrailingZerosNonZero32Slow(uint32_t n) { |
387 | 0 | uint32_t c = 31; |
388 | 0 | n &= ~n + 1; |
389 | 0 | if (n & 0x0000FFFF) c -= 16; |
390 | 0 | if (n & 0x00FF00FF) c -= 8; |
391 | 0 | if (n & 0x0F0F0F0F) c -= 4; |
392 | 0 | if (n & 0x33333333) c -= 2; |
393 | 0 | if (n & 0x55555555) c -= 1; |
394 | 0 | return c; |
395 | 0 | } |
396 | | |
397 | 12.6M | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountTrailingZerosNonZero32(uint32_t n) { |
398 | | #if defined(_MSC_VER) && !defined(__clang__) |
399 | | unsigned long result = 0; // NOLINT(runtime/int) |
400 | | _BitScanForward(&result, n); |
401 | | return (uint32_t)result; |
402 | | #elif defined(__GNUC__) || defined(__clang__) |
403 | | static_assert(sizeof(int) == sizeof(n), |
404 | 12.6M | "__builtin_ctz does not take 32-bit arg"); |
405 | 12.6M | return __builtin_ctz(n); |
406 | | #else |
407 | | return CountTrailingZerosNonZero32Slow(n); |
408 | | #endif |
409 | 12.6M | } |
410 | | |
411 | | #undef PHMAP_BASE_INTERNAL_FORCEINLINE |
412 | | |
413 | | } // namespace base_internal |
414 | | } // namespace phmap |
415 | | |
416 | | // ----------------------------------------------------------------------------- |
417 | | // File: endian.h |
418 | | // ----------------------------------------------------------------------------- |
419 | | |
420 | | namespace phmap { |
421 | | |
422 | | // Use compiler byte-swapping intrinsics if they are available. 32-bit |
423 | | // and 64-bit versions are available in Clang and GCC as of GCC 4.3.0. |
424 | | // The 16-bit version is available in Clang and GCC only as of GCC 4.8.0. |
425 | | // For simplicity, we enable them all only for GCC 4.8.0 or later. |
426 | | #if defined(__clang__) || \ |
427 | | (defined(__GNUC__) && \ |
428 | | ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ >= 5)) |
429 | | |
430 | 0 | inline uint64_t gbswap_64(uint64_t host_int) { |
431 | 0 | return __builtin_bswap64(host_int); |
432 | 0 | } |
433 | 0 | inline uint32_t gbswap_32(uint32_t host_int) { |
434 | 0 | return __builtin_bswap32(host_int); |
435 | 0 | } |
436 | 0 | inline uint16_t gbswap_16(uint16_t host_int) { |
437 | 0 | return __builtin_bswap16(host_int); |
438 | 0 | } |
439 | | |
440 | | #elif defined(_MSC_VER) |
441 | | |
442 | | inline uint64_t gbswap_64(uint64_t host_int) { |
443 | | return _byteswap_uint64(host_int); |
444 | | } |
445 | | inline uint32_t gbswap_32(uint32_t host_int) { |
446 | | return _byteswap_ulong(host_int); |
447 | | } |
448 | | inline uint16_t gbswap_16(uint16_t host_int) { |
449 | | return _byteswap_ushort(host_int); |
450 | | } |
451 | | |
452 | | #elif defined(__APPLE__) |
453 | | |
454 | | inline uint64_t gbswap_64(uint64_t host_int) { return OSSwapInt16(host_int); } |
455 | | inline uint32_t gbswap_32(uint32_t host_int) { return OSSwapInt32(host_int); } |
456 | | inline uint16_t gbswap_16(uint16_t host_int) { return OSSwapInt64(host_int); } |
457 | | |
458 | | #else |
459 | | |
460 | | inline uint64_t gbswap_64(uint64_t host_int) { |
461 | | #if defined(__GNUC__) && defined(__x86_64__) && !defined(__APPLE__) |
462 | | // Adapted from /usr/include/byteswap.h. Not available on Mac. |
463 | | if (__builtin_constant_p(host_int)) { |
464 | | return __bswap_constant_64(host_int); |
465 | | } else { |
466 | | uint64_t result; |
467 | | __asm__("bswap %0" : "=r"(result) : "0"(host_int)); |
468 | | return result; |
469 | | } |
470 | | #elif defined(__GLIBC__) |
471 | | return bswap_64(host_int); |
472 | | #else |
473 | | return (((host_int & uint64_t{0xFF}) << 56) | |
474 | | ((host_int & uint64_t{0xFF00}) << 40) | |
475 | | ((host_int & uint64_t{0xFF0000}) << 24) | |
476 | | ((host_int & uint64_t{0xFF000000}) << 8) | |
477 | | ((host_int & uint64_t{0xFF00000000}) >> 8) | |
478 | | ((host_int & uint64_t{0xFF0000000000}) >> 24) | |
479 | | ((host_int & uint64_t{0xFF000000000000}) >> 40) | |
480 | | ((host_int & uint64_t{0xFF00000000000000}) >> 56)); |
481 | | #endif // bswap_64 |
482 | | } |
483 | | |
484 | | inline uint32_t gbswap_32(uint32_t host_int) { |
485 | | #if defined(__GLIBC__) |
486 | | return bswap_32(host_int); |
487 | | #else |
488 | | return (((host_int & uint32_t{0xFF}) << 24) | |
489 | | ((host_int & uint32_t{0xFF00}) << 8) | |
490 | | ((host_int & uint32_t{0xFF0000}) >> 8) | |
491 | | ((host_int & uint32_t{0xFF000000}) >> 24)); |
492 | | #endif |
493 | | } |
494 | | |
495 | | inline uint16_t gbswap_16(uint16_t host_int) { |
496 | | #if defined(__GLIBC__) |
497 | | return bswap_16(host_int); |
498 | | #else |
499 | | return (((host_int & uint16_t{0xFF}) << 8) | |
500 | | ((host_int & uint16_t{0xFF00}) >> 8)); |
501 | | #endif |
502 | | } |
503 | | |
504 | | #endif // intrinics available |
505 | | |
506 | | #ifdef PHMAP_IS_LITTLE_ENDIAN |
507 | | |
508 | | // Definitions for ntohl etc. that don't require us to include |
509 | | // netinet/in.h. We wrap gbswap_32 and gbswap_16 in functions rather |
510 | | // than just #defining them because in debug mode, gcc doesn't |
511 | | // correctly handle the (rather involved) definitions of bswap_32. |
512 | | // gcc guarantees that inline functions are as fast as macros, so |
513 | | // this isn't a performance hit. |
514 | 0 | inline uint16_t ghtons(uint16_t x) { return gbswap_16(x); } |
515 | 0 | inline uint32_t ghtonl(uint32_t x) { return gbswap_32(x); } |
516 | 0 | inline uint64_t ghtonll(uint64_t x) { return gbswap_64(x); } |
517 | | |
518 | | #elif defined PHMAP_IS_BIG_ENDIAN |
519 | | |
520 | | // These definitions are simpler on big-endian machines |
521 | | // These are functions instead of macros to avoid self-assignment warnings |
522 | | // on calls such as "i = ghtnol(i);". This also provides type checking. |
523 | | inline uint16_t ghtons(uint16_t x) { return x; } |
524 | | inline uint32_t ghtonl(uint32_t x) { return x; } |
525 | | inline uint64_t ghtonll(uint64_t x) { return x; } |
526 | | |
527 | | #else |
528 | | #error \ |
529 | | "Unsupported byte order: Either PHMAP_IS_BIG_ENDIAN or " \ |
530 | | "PHMAP_IS_LITTLE_ENDIAN must be defined" |
531 | | #endif // byte order |
532 | | |
533 | 0 | inline uint16_t gntohs(uint16_t x) { return ghtons(x); } |
534 | 0 | inline uint32_t gntohl(uint32_t x) { return ghtonl(x); } |
535 | 0 | inline uint64_t gntohll(uint64_t x) { return ghtonll(x); } |
536 | | |
537 | | // Utilities to convert numbers between the current hosts's native byte |
538 | | // order and little-endian byte order |
539 | | // |
540 | | // Load/Store methods are alignment safe |
541 | | namespace little_endian { |
542 | | // Conversion functions. |
543 | | #ifdef PHMAP_IS_LITTLE_ENDIAN |
544 | | |
545 | 0 | inline uint16_t FromHost16(uint16_t x) { return x; } |
546 | 0 | inline uint16_t ToHost16(uint16_t x) { return x; } |
547 | | |
548 | 0 | inline uint32_t FromHost32(uint32_t x) { return x; } |
549 | 0 | inline uint32_t ToHost32(uint32_t x) { return x; } |
550 | | |
551 | 0 | inline uint64_t FromHost64(uint64_t x) { return x; } |
552 | 0 | inline uint64_t ToHost64(uint64_t x) { return x; } |
553 | | |
554 | 0 | inline constexpr bool IsLittleEndian() { return true; } |
555 | | |
556 | | #elif defined PHMAP_IS_BIG_ENDIAN |
557 | | |
558 | | inline uint16_t FromHost16(uint16_t x) { return gbswap_16(x); } |
559 | | inline uint16_t ToHost16(uint16_t x) { return gbswap_16(x); } |
560 | | |
561 | | inline uint32_t FromHost32(uint32_t x) { return gbswap_32(x); } |
562 | | inline uint32_t ToHost32(uint32_t x) { return gbswap_32(x); } |
563 | | |
564 | | inline uint64_t FromHost64(uint64_t x) { return gbswap_64(x); } |
565 | | inline uint64_t ToHost64(uint64_t x) { return gbswap_64(x); } |
566 | | |
567 | | inline constexpr bool IsLittleEndian() { return false; } |
568 | | |
569 | | #endif /* ENDIAN */ |
570 | | |
571 | | // Functions to do unaligned loads and stores in little-endian order. |
572 | | // ------------------------------------------------------------------ |
573 | 0 | inline uint16_t Load16(const void *p) { |
574 | 0 | return ToHost16(PHMAP_INTERNAL_UNALIGNED_LOAD16(p)); |
575 | 0 | } |
576 | | |
577 | 0 | inline void Store16(void *p, uint16_t v) { |
578 | 0 | PHMAP_INTERNAL_UNALIGNED_STORE16(p, FromHost16(v)); |
579 | 0 | } |
580 | | |
581 | 0 | inline uint32_t Load32(const void *p) { |
582 | 0 | return ToHost32(PHMAP_INTERNAL_UNALIGNED_LOAD32(p)); |
583 | 0 | } |
584 | | |
585 | 0 | inline void Store32(void *p, uint32_t v) { |
586 | 0 | PHMAP_INTERNAL_UNALIGNED_STORE32(p, FromHost32(v)); |
587 | 0 | } |
588 | | |
589 | 0 | inline uint64_t Load64(const void *p) { |
590 | 0 | return ToHost64(PHMAP_INTERNAL_UNALIGNED_LOAD64(p)); |
591 | 0 | } |
592 | | |
593 | 0 | inline void Store64(void *p, uint64_t v) { |
594 | 0 | PHMAP_INTERNAL_UNALIGNED_STORE64(p, FromHost64(v)); |
595 | 0 | } |
596 | | |
597 | | } // namespace little_endian |
598 | | |
599 | | // Utilities to convert numbers between the current hosts's native byte |
600 | | // order and big-endian byte order (same as network byte order) |
601 | | // |
602 | | // Load/Store methods are alignment safe |
603 | | namespace big_endian { |
604 | | #ifdef PHMAP_IS_LITTLE_ENDIAN |
605 | | |
606 | 0 | inline uint16_t FromHost16(uint16_t x) { return gbswap_16(x); } |
607 | 0 | inline uint16_t ToHost16(uint16_t x) { return gbswap_16(x); } |
608 | | |
609 | 0 | inline uint32_t FromHost32(uint32_t x) { return gbswap_32(x); } |
610 | 0 | inline uint32_t ToHost32(uint32_t x) { return gbswap_32(x); } |
611 | | |
612 | 0 | inline uint64_t FromHost64(uint64_t x) { return gbswap_64(x); } |
613 | 0 | inline uint64_t ToHost64(uint64_t x) { return gbswap_64(x); } |
614 | | |
615 | 0 | inline constexpr bool IsLittleEndian() { return true; } |
616 | | |
617 | | #elif defined PHMAP_IS_BIG_ENDIAN |
618 | | |
619 | | inline uint16_t FromHost16(uint16_t x) { return x; } |
620 | | inline uint16_t ToHost16(uint16_t x) { return x; } |
621 | | |
622 | | inline uint32_t FromHost32(uint32_t x) { return x; } |
623 | | inline uint32_t ToHost32(uint32_t x) { return x; } |
624 | | |
625 | | inline uint64_t FromHost64(uint64_t x) { return x; } |
626 | | inline uint64_t ToHost64(uint64_t x) { return x; } |
627 | | |
628 | | inline constexpr bool IsLittleEndian() { return false; } |
629 | | |
630 | | #endif /* ENDIAN */ |
631 | | |
632 | | // Functions to do unaligned loads and stores in big-endian order. |
633 | 0 | inline uint16_t Load16(const void *p) { |
634 | 0 | return ToHost16(PHMAP_INTERNAL_UNALIGNED_LOAD16(p)); |
635 | 0 | } |
636 | | |
637 | 0 | inline void Store16(void *p, uint16_t v) { |
638 | 0 | PHMAP_INTERNAL_UNALIGNED_STORE16(p, FromHost16(v)); |
639 | 0 | } |
640 | | |
641 | 0 | inline uint32_t Load32(const void *p) { |
642 | 0 | return ToHost32(PHMAP_INTERNAL_UNALIGNED_LOAD32(p)); |
643 | 0 | } |
644 | | |
645 | 0 | inline void Store32(void *p, uint32_t v) { |
646 | 0 | PHMAP_INTERNAL_UNALIGNED_STORE32(p, FromHost32(v)); |
647 | 0 | } |
648 | | |
649 | 0 | inline uint64_t Load64(const void *p) { |
650 | 0 | return ToHost64(PHMAP_INTERNAL_UNALIGNED_LOAD64(p)); |
651 | 0 | } |
652 | | |
653 | 0 | inline void Store64(void *p, uint64_t v) { |
654 | 0 | PHMAP_INTERNAL_UNALIGNED_STORE64(p, FromHost64(v)); |
655 | 0 | } |
656 | | |
657 | | } // namespace big_endian |
658 | | |
659 | | } // namespace phmap |
660 | | |
661 | | #ifdef _MSC_VER |
662 | | #pragma warning(pop) |
663 | | #endif |
664 | | |
665 | | #endif // phmap_bits_h_guard_ |