/src/parallel-hashmap/parallel_hashmap/phmap_bits.h
Line | Count | Source (jump to first uncovered line) |
1 | | #if !defined(phmap_bits_h_guard_) |
2 | | #define phmap_bits_h_guard_ |
3 | | |
4 | | // --------------------------------------------------------------------------- |
5 | | // Copyright (c) 2019, Gregory Popovitch - greg7mdp@gmail.com |
6 | | // |
7 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | // you may not use this file except in compliance with the License. |
9 | | // You may obtain a copy of the License at |
10 | | // |
11 | | // https://www.apache.org/licenses/LICENSE-2.0 |
12 | | // |
13 | | // Unless required by applicable law or agreed to in writing, software |
14 | | // distributed under the License is distributed on an "AS IS" BASIS, |
15 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | | // See the License for the specific language governing permissions and |
17 | | // limitations under the License. |
18 | | // |
19 | | // Includes work from abseil-cpp (https://github.com/abseil/abseil-cpp) |
20 | | // with modifications. |
21 | | // |
22 | | // Copyright 2018 The Abseil Authors. |
23 | | // |
24 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
25 | | // you may not use this file except in compliance with the License. |
26 | | // You may obtain a copy of the License at |
27 | | // |
28 | | // https://www.apache.org/licenses/LICENSE-2.0 |
29 | | // |
30 | | // Unless required by applicable law or agreed to in writing, software |
31 | | // distributed under the License is distributed on an "AS IS" BASIS, |
32 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
33 | | // See the License for the specific language governing permissions and |
34 | | // limitations under the License. |
35 | | // --------------------------------------------------------------------------- |
36 | | |
37 | | // The following guarantees declaration of the byte swap functions |
38 | | #ifdef _MSC_VER |
39 | | #include <stdlib.h> // NOLINT(build/include) |
40 | | #elif defined(__APPLE__) |
41 | | // Mac OS X / Darwin features |
42 | | #include <libkern/OSByteOrder.h> |
43 | | #elif defined(__FreeBSD__) |
44 | | #include <sys/endian.h> |
45 | | #elif defined(__GLIBC__) |
46 | | #include <byteswap.h> // IWYU pragma: export |
47 | | #endif |
48 | | |
49 | | #include <string.h> |
50 | | #include <cstdint> |
51 | | #include "phmap_config.h" |
52 | | |
53 | | #ifdef _MSC_VER |
54 | | #pragma warning(push) |
55 | | #pragma warning(disable : 4514) // unreferenced inline function has been removed |
56 | | #endif |
57 | | |
58 | | // ----------------------------------------------------------------------------- |
59 | | // unaligned APIs |
60 | | // ----------------------------------------------------------------------------- |
61 | | // Portable handling of unaligned loads, stores, and copies. |
62 | | // On some platforms, like ARM, the copy functions can be more efficient |
63 | | // then a load and a store. |
64 | | // ----------------------------------------------------------------------------- |
65 | | |
66 | | #if defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) ||\ |
67 | | defined(MEMORY_SANITIZER) |
68 | | #include <stdint.h> |
69 | | |
70 | | extern "C" { |
71 | | uint16_t __sanitizer_unaligned_load16(const void *p); |
72 | | uint32_t __sanitizer_unaligned_load32(const void *p); |
73 | | uint64_t __sanitizer_unaligned_load64(const void *p); |
74 | | void __sanitizer_unaligned_store16(void *p, uint16_t v); |
75 | | void __sanitizer_unaligned_store32(void *p, uint32_t v); |
76 | | void __sanitizer_unaligned_store64(void *p, uint64_t v); |
77 | | } // extern "C" |
78 | | |
79 | | namespace phmap { |
80 | | namespace bits { |
81 | | |
82 | | inline uint16_t UnalignedLoad16(const void *p) { |
83 | | return __sanitizer_unaligned_load16(p); |
84 | | } |
85 | | |
86 | | inline uint32_t UnalignedLoad32(const void *p) { |
87 | | return __sanitizer_unaligned_load32(p); |
88 | | } |
89 | | |
90 | | inline uint64_t UnalignedLoad64(const void *p) { |
91 | | return __sanitizer_unaligned_load64(p); |
92 | | } |
93 | | |
94 | | inline void UnalignedStore16(void *p, uint16_t v) { |
95 | | __sanitizer_unaligned_store16(p, v); |
96 | | } |
97 | | |
98 | | inline void UnalignedStore32(void *p, uint32_t v) { |
99 | | __sanitizer_unaligned_store32(p, v); |
100 | | } |
101 | | |
102 | | inline void UnalignedStore64(void *p, uint64_t v) { |
103 | | __sanitizer_unaligned_store64(p, v); |
104 | | } |
105 | | |
106 | | } // namespace bits |
107 | | } // namespace phmap |
108 | | |
109 | | #define PHMAP_INTERNAL_UNALIGNED_LOAD16(_p) (phmap::bits::UnalignedLoad16(_p)) |
110 | | #define PHMAP_INTERNAL_UNALIGNED_LOAD32(_p) (phmap::bits::UnalignedLoad32(_p)) |
111 | | #define PHMAP_INTERNAL_UNALIGNED_LOAD64(_p) (phmap::bits::UnalignedLoad64(_p)) |
112 | | |
113 | | #define PHMAP_INTERNAL_UNALIGNED_STORE16(_p, _val) (phmap::bits::UnalignedStore16(_p, _val)) |
114 | | #define PHMAP_INTERNAL_UNALIGNED_STORE32(_p, _val) (phmap::bits::UnalignedStore32(_p, _val)) |
115 | | #define PHMAP_INTERNAL_UNALIGNED_STORE64(_p, _val) (phmap::bits::UnalignedStore64(_p, _val)) |
116 | | |
117 | | #else |
118 | | |
119 | | namespace phmap { |
120 | | namespace bits { |
121 | | |
122 | 0 | inline uint16_t UnalignedLoad16(const void *p) { |
123 | 0 | uint16_t t; |
124 | 0 | memcpy(&t, p, sizeof t); |
125 | 0 | return t; |
126 | 0 | } |
127 | | |
128 | 0 | inline uint32_t UnalignedLoad32(const void *p) { |
129 | 0 | uint32_t t; |
130 | 0 | memcpy(&t, p, sizeof t); |
131 | 0 | return t; |
132 | 0 | } |
133 | | |
134 | 0 | inline uint64_t UnalignedLoad64(const void *p) { |
135 | 0 | uint64_t t; |
136 | 0 | memcpy(&t, p, sizeof t); |
137 | 0 | return t; |
138 | 0 | } |
139 | | |
140 | 0 | inline void UnalignedStore16(void *p, uint16_t v) { memcpy(p, &v, sizeof v); } |
141 | | |
142 | 0 | inline void UnalignedStore32(void *p, uint32_t v) { memcpy(p, &v, sizeof v); } |
143 | | |
144 | 0 | inline void UnalignedStore64(void *p, uint64_t v) { memcpy(p, &v, sizeof v); } |
145 | | |
146 | | } // namespace bits |
147 | | } // namespace phmap |
148 | | |
149 | | #define PHMAP_INTERNAL_UNALIGNED_LOAD16(_p) (phmap::bits::UnalignedLoad16(_p)) |
150 | | #define PHMAP_INTERNAL_UNALIGNED_LOAD32(_p) (phmap::bits::UnalignedLoad32(_p)) |
151 | | #define PHMAP_INTERNAL_UNALIGNED_LOAD64(_p) (phmap::bits::UnalignedLoad64(_p)) |
152 | | |
153 | | #define PHMAP_INTERNAL_UNALIGNED_STORE16(_p, _val) (phmap::bits::UnalignedStore16(_p, _val)) |
154 | | #define PHMAP_INTERNAL_UNALIGNED_STORE32(_p, _val) (phmap::bits::UnalignedStore32(_p, _val)) |
155 | | #define PHMAP_INTERNAL_UNALIGNED_STORE64(_p, _val) (phmap::bits::UnalignedStore64(_p, _val)) |
156 | | |
157 | | #endif |
158 | | |
159 | | // ----------------------------------------------------------------------------- |
160 | | // File: optimization.h |
161 | | // ----------------------------------------------------------------------------- |
162 | | |
163 | | #if defined(__pnacl__) |
164 | | #define PHMAP_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; } |
165 | | #elif defined(__clang__) |
166 | | // Clang will not tail call given inline volatile assembly. |
167 | | #define PHMAP_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("") |
168 | | #elif defined(__GNUC__) |
169 | | // GCC will not tail call given inline volatile assembly. |
170 | | #define PHMAP_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("") |
171 | | #elif defined(_MSC_VER) |
172 | | #include <intrin.h> |
173 | | // The __nop() intrinsic blocks the optimisation. |
174 | | #define PHMAP_BLOCK_TAIL_CALL_OPTIMIZATION() __nop() |
175 | | #else |
176 | | #define PHMAP_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; } |
177 | | #endif |
178 | | |
179 | | #if defined(__GNUC__) |
180 | | #pragma GCC diagnostic push |
181 | | #pragma GCC diagnostic ignored "-Wpedantic" |
182 | | #endif |
183 | | |
184 | | #ifdef PHMAP_HAVE_INTRINSIC_INT128 |
185 | | __extension__ typedef unsigned __int128 phmap_uint128; |
186 | | inline uint64_t umul128(uint64_t a, uint64_t b, uint64_t* high) |
187 | 10.3M | { |
188 | 10.3M | auto result = static_cast<phmap_uint128>(a) * static_cast<phmap_uint128>(b); |
189 | 10.3M | *high = static_cast<uint64_t>(result >> 64); |
190 | 10.3M | return static_cast<uint64_t>(result); |
191 | 10.3M | } |
192 | | #define PHMAP_HAS_UMUL128 1 |
193 | | #elif (defined(_MSC_VER)) |
194 | | #if defined(_M_X64) |
195 | | #pragma intrinsic(_umul128) |
196 | | inline uint64_t umul128(uint64_t a, uint64_t b, uint64_t* high) |
197 | | { |
198 | | return _umul128(a, b, high); |
199 | | } |
200 | | #define PHMAP_HAS_UMUL128 1 |
201 | | #endif |
202 | | #endif |
203 | | |
204 | | #if defined(__GNUC__) |
205 | | #pragma GCC diagnostic pop |
206 | | #endif |
207 | | |
208 | | #if defined(__GNUC__) |
209 | | // Cache line alignment |
210 | | #if defined(__i386__) || defined(__x86_64__) |
211 | | #define PHMAP_CACHELINE_SIZE 64 |
212 | | #elif defined(__powerpc64__) |
213 | | #define PHMAP_CACHELINE_SIZE 128 |
214 | | #elif defined(__aarch64__) |
215 | | // We would need to read special register ctr_el0 to find out L1 dcache size. |
216 | | // This value is a good estimate based on a real aarch64 machine. |
217 | | #define PHMAP_CACHELINE_SIZE 64 |
218 | | #elif defined(__arm__) |
219 | | // Cache line sizes for ARM: These values are not strictly correct since |
220 | | // cache line sizes depend on implementations, not architectures. There |
221 | | // are even implementations with cache line sizes configurable at boot |
222 | | // time. |
223 | | #if defined(__ARM_ARCH_5T__) |
224 | | #define PHMAP_CACHELINE_SIZE 32 |
225 | | #elif defined(__ARM_ARCH_7A__) |
226 | | #define PHMAP_CACHELINE_SIZE 64 |
227 | | #endif |
228 | | #endif |
229 | | |
230 | | #ifndef PHMAP_CACHELINE_SIZE |
231 | | // A reasonable default guess. Note that overestimates tend to waste more |
232 | | // space, while underestimates tend to waste more time. |
233 | | #define PHMAP_CACHELINE_SIZE 64 |
234 | | #endif |
235 | | |
236 | | #define PHMAP_CACHELINE_ALIGNED __attribute__((aligned(PHMAP_CACHELINE_SIZE))) |
237 | | #elif defined(_MSC_VER) |
238 | | #define PHMAP_CACHELINE_SIZE 64 |
239 | | #define PHMAP_CACHELINE_ALIGNED __declspec(align(PHMAP_CACHELINE_SIZE)) |
240 | | #else |
241 | | #define PHMAP_CACHELINE_SIZE 64 |
242 | | #define PHMAP_CACHELINE_ALIGNED |
243 | | #endif |
244 | | |
245 | | |
246 | | #if PHMAP_HAVE_BUILTIN(__builtin_expect) || \ |
247 | | (defined(__GNUC__) && !defined(__clang__)) |
248 | 594k | #define PHMAP_PREDICT_FALSE(x) (__builtin_expect(x, 0)) |
249 | 12.5M | #define PHMAP_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) |
250 | | #else |
251 | | #define PHMAP_PREDICT_FALSE(x) (x) |
252 | | #define PHMAP_PREDICT_TRUE(x) (x) |
253 | | #endif |
254 | | |
255 | | // ----------------------------------------------------------------------------- |
256 | | // File: bits.h |
257 | | // ----------------------------------------------------------------------------- |
258 | | |
259 | | #if defined(_MSC_VER) |
260 | | // We can achieve something similar to attribute((always_inline)) with MSVC by |
261 | | // using the __forceinline keyword, however this is not perfect. MSVC is |
262 | | // much less aggressive about inlining, and even with the __forceinline keyword. |
263 | | #define PHMAP_BASE_INTERNAL_FORCEINLINE __forceinline |
264 | | #else |
265 | | // Use default attribute inline. |
266 | | #define PHMAP_BASE_INTERNAL_FORCEINLINE inline PHMAP_ATTRIBUTE_ALWAYS_INLINE |
267 | | #endif |
268 | | |
269 | | |
270 | | namespace phmap { |
271 | | namespace base_internal { |
272 | | |
273 | 0 | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountLeadingZeros64Slow(uint64_t n) { |
274 | 0 | int zeroes = 60; |
275 | 0 | if (n >> 32) zeroes -= 32, n >>= 32; |
276 | 0 | if (n >> 16) zeroes -= 16, n >>= 16; |
277 | 0 | if (n >> 8) zeroes -= 8, n >>= 8; |
278 | 0 | if (n >> 4) zeroes -= 4, n >>= 4; |
279 | 0 | return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes; |
280 | 0 | } |
281 | | |
282 | 0 | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountLeadingZeros64(uint64_t n) { |
283 | 0 | #if defined(_MSC_VER) && defined(_M_X64) |
284 | 0 | // MSVC does not have __buitin_clzll. Use _BitScanReverse64. |
285 | 0 | unsigned long result = 0; // NOLINT(runtime/int) |
286 | 0 | if (_BitScanReverse64(&result, n)) { |
287 | 0 | return (int)(63 - result); |
288 | 0 | } |
289 | 0 | return 64; |
290 | 0 | #elif defined(_MSC_VER) && !defined(__clang__) |
291 | 0 | // MSVC does not have __buitin_clzll. Compose two calls to _BitScanReverse |
292 | 0 | unsigned long result = 0; // NOLINT(runtime/int) |
293 | 0 | if ((n >> 32) && _BitScanReverse(&result, (unsigned long)(n >> 32))) { |
294 | 0 | return 31 - result; |
295 | 0 | } |
296 | 0 | if (_BitScanReverse(&result, (unsigned long)n)) { |
297 | 0 | return 63 - result; |
298 | 0 | } |
299 | 0 | return 64; |
300 | 0 | #elif defined(__GNUC__) || defined(__clang__) |
301 | 0 | // Use __builtin_clzll, which uses the following instructions: |
302 | 0 | // x86: bsr |
303 | 0 | // ARM64: clz |
304 | 0 | // PPC: cntlzd |
305 | 0 | static_assert(sizeof(unsigned long long) == sizeof(n), // NOLINT(runtime/int) |
306 | 0 | "__builtin_clzll does not take 64-bit arg"); |
307 | 0 |
|
308 | 0 | // Handle 0 as a special case because __builtin_clzll(0) is undefined. |
309 | 0 | if (n == 0) { |
310 | 0 | return 64; |
311 | 0 | } |
312 | 0 | return __builtin_clzll(n); |
313 | 0 | #else |
314 | 0 | return CountLeadingZeros64Slow(n); |
315 | 0 | #endif |
316 | 0 | } |
317 | | |
318 | 0 | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountLeadingZeros32Slow(uint64_t n) { |
319 | 0 | uint32_t zeroes = 28; |
320 | 0 | if (n >> 16) zeroes -= 16, n >>= 16; |
321 | 0 | if (n >> 8) zeroes -= 8, n >>= 8; |
322 | 0 | if (n >> 4) zeroes -= 4, n >>= 4; |
323 | 0 | return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes; |
324 | 0 | } |
325 | | |
326 | 0 | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountLeadingZeros32(uint32_t n) { |
327 | 0 | #if defined(_MSC_VER) && !defined(__clang__) |
328 | 0 | unsigned long result = 0; // NOLINT(runtime/int) |
329 | 0 | if (_BitScanReverse(&result, n)) { |
330 | 0 | return (uint32_t)(31 - result); |
331 | 0 | } |
332 | 0 | return 32; |
333 | 0 | #elif defined(__GNUC__) || defined(__clang__) |
334 | 0 | // Use __builtin_clz, which uses the following instructions: |
335 | 0 | // x86: bsr |
336 | 0 | // ARM64: clz |
337 | 0 | // PPC: cntlzd |
338 | 0 | static_assert(sizeof(int) == sizeof(n), |
339 | 0 | "__builtin_clz does not take 32-bit arg"); |
340 | 0 |
|
341 | 0 | // Handle 0 as a special case because __builtin_clz(0) is undefined. |
342 | 0 | if (n == 0) { |
343 | 0 | return 32; |
344 | 0 | } |
345 | 0 | return __builtin_clz(n); |
346 | 0 | #else |
347 | 0 | return CountLeadingZeros32Slow(n); |
348 | 0 | #endif |
349 | 0 | } |
350 | | |
351 | 0 | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountTrailingZerosNonZero64Slow(uint64_t n) { |
352 | 0 | uint32_t c = 63; |
353 | 0 | n &= ~n + 1; |
354 | 0 | if (n & 0x00000000FFFFFFFF) c -= 32; |
355 | 0 | if (n & 0x0000FFFF0000FFFF) c -= 16; |
356 | 0 | if (n & 0x00FF00FF00FF00FF) c -= 8; |
357 | 0 | if (n & 0x0F0F0F0F0F0F0F0F) c -= 4; |
358 | 0 | if (n & 0x3333333333333333) c -= 2; |
359 | 0 | if (n & 0x5555555555555555) c -= 1; |
360 | 0 | return c; |
361 | 0 | } |
362 | | |
363 | 0 | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountTrailingZerosNonZero64(uint64_t n) { |
364 | 0 | #if defined(_MSC_VER) && !defined(__clang__) && defined(_M_X64) |
365 | 0 | unsigned long result = 0; // NOLINT(runtime/int) |
366 | 0 | _BitScanForward64(&result, n); |
367 | 0 | return (uint32_t)result; |
368 | 0 | #elif defined(_MSC_VER) && !defined(__clang__) |
369 | 0 | unsigned long result = 0; // NOLINT(runtime/int) |
370 | 0 | if (static_cast<uint32_t>(n) == 0) { |
371 | 0 | _BitScanForward(&result, (unsigned long)(n >> 32)); |
372 | 0 | return result + 32; |
373 | 0 | } |
374 | 0 | _BitScanForward(&result, (unsigned long)n); |
375 | 0 | return result; |
376 | 0 | #elif defined(__GNUC__) || defined(__clang__) |
377 | 0 | static_assert(sizeof(unsigned long long) == sizeof(n), // NOLINT(runtime/int) |
378 | 0 | "__builtin_ctzll does not take 64-bit arg"); |
379 | 0 | return __builtin_ctzll(n); |
380 | 0 | #else |
381 | 0 | return CountTrailingZerosNonZero64Slow(n); |
382 | 0 | #endif |
383 | 0 | } |
384 | | |
385 | 0 | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountTrailingZerosNonZero32Slow(uint32_t n) { |
386 | 0 | uint32_t c = 31; |
387 | 0 | n &= ~n + 1; |
388 | 0 | if (n & 0x0000FFFF) c -= 16; |
389 | 0 | if (n & 0x00FF00FF) c -= 8; |
390 | 0 | if (n & 0x0F0F0F0F) c -= 4; |
391 | 0 | if (n & 0x33333333) c -= 2; |
392 | 0 | if (n & 0x55555555) c -= 1; |
393 | 0 | return c; |
394 | 0 | } |
395 | | |
396 | 12.8M | PHMAP_BASE_INTERNAL_FORCEINLINE uint32_t CountTrailingZerosNonZero32(uint32_t n) { |
397 | | #if defined(_MSC_VER) && !defined(__clang__) |
398 | | unsigned long result = 0; // NOLINT(runtime/int) |
399 | | _BitScanForward(&result, n); |
400 | | return (uint32_t)result; |
401 | | #elif defined(__GNUC__) || defined(__clang__) |
402 | 12.8M | static_assert(sizeof(int) == sizeof(n), |
403 | 12.8M | "__builtin_ctz does not take 32-bit arg"); |
404 | 12.8M | return __builtin_ctz(n); |
405 | | #else |
406 | | return CountTrailingZerosNonZero32Slow(n); |
407 | | #endif |
408 | 12.8M | } |
409 | | |
410 | | #undef PHMAP_BASE_INTERNAL_FORCEINLINE |
411 | | |
412 | | } // namespace base_internal |
413 | | } // namespace phmap |
414 | | |
415 | | // ----------------------------------------------------------------------------- |
416 | | // File: endian.h |
417 | | // ----------------------------------------------------------------------------- |
418 | | |
419 | | namespace phmap { |
420 | | |
421 | | // Use compiler byte-swapping intrinsics if they are available. 32-bit |
422 | | // and 64-bit versions are available in Clang and GCC as of GCC 4.3.0. |
423 | | // The 16-bit version is available in Clang and GCC only as of GCC 4.8.0. |
424 | | // For simplicity, we enable them all only for GCC 4.8.0 or later. |
425 | | #if defined(__clang__) || \ |
426 | | (defined(__GNUC__) && \ |
427 | | ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ >= 5)) |
428 | | |
429 | 0 | inline uint64_t gbswap_64(uint64_t host_int) { |
430 | 0 | return __builtin_bswap64(host_int); |
431 | 0 | } |
432 | 0 | inline uint32_t gbswap_32(uint32_t host_int) { |
433 | 0 | return __builtin_bswap32(host_int); |
434 | 0 | } |
435 | 0 | inline uint16_t gbswap_16(uint16_t host_int) { |
436 | 0 | return __builtin_bswap16(host_int); |
437 | 0 | } |
438 | | |
439 | | #elif defined(_MSC_VER) |
440 | | |
441 | | inline uint64_t gbswap_64(uint64_t host_int) { |
442 | | return _byteswap_uint64(host_int); |
443 | | } |
444 | | inline uint32_t gbswap_32(uint32_t host_int) { |
445 | | return _byteswap_ulong(host_int); |
446 | | } |
447 | | inline uint16_t gbswap_16(uint16_t host_int) { |
448 | | return _byteswap_ushort(host_int); |
449 | | } |
450 | | |
451 | | #elif defined(__APPLE__) |
452 | | |
453 | | inline uint64_t gbswap_64(uint64_t host_int) { return OSSwapInt16(host_int); } |
454 | | inline uint32_t gbswap_32(uint32_t host_int) { return OSSwapInt32(host_int); } |
455 | | inline uint16_t gbswap_16(uint16_t host_int) { return OSSwapInt64(host_int); } |
456 | | |
457 | | #else |
458 | | |
459 | | inline uint64_t gbswap_64(uint64_t host_int) { |
460 | | #if defined(__GNUC__) && defined(__x86_64__) && !defined(__APPLE__) |
461 | | // Adapted from /usr/include/byteswap.h. Not available on Mac. |
462 | | if (__builtin_constant_p(host_int)) { |
463 | | return __bswap_constant_64(host_int); |
464 | | } else { |
465 | | uint64_t result; |
466 | | __asm__("bswap %0" : "=r"(result) : "0"(host_int)); |
467 | | return result; |
468 | | } |
469 | | #elif defined(__GLIBC__) |
470 | | return bswap_64(host_int); |
471 | | #else |
472 | | return (((host_int & uint64_t{0xFF}) << 56) | |
473 | | ((host_int & uint64_t{0xFF00}) << 40) | |
474 | | ((host_int & uint64_t{0xFF0000}) << 24) | |
475 | | ((host_int & uint64_t{0xFF000000}) << 8) | |
476 | | ((host_int & uint64_t{0xFF00000000}) >> 8) | |
477 | | ((host_int & uint64_t{0xFF0000000000}) >> 24) | |
478 | | ((host_int & uint64_t{0xFF000000000000}) >> 40) | |
479 | | ((host_int & uint64_t{0xFF00000000000000}) >> 56)); |
480 | | #endif // bswap_64 |
481 | | } |
482 | | |
483 | | inline uint32_t gbswap_32(uint32_t host_int) { |
484 | | #if defined(__GLIBC__) |
485 | | return bswap_32(host_int); |
486 | | #else |
487 | | return (((host_int & uint32_t{0xFF}) << 24) | |
488 | | ((host_int & uint32_t{0xFF00}) << 8) | |
489 | | ((host_int & uint32_t{0xFF0000}) >> 8) | |
490 | | ((host_int & uint32_t{0xFF000000}) >> 24)); |
491 | | #endif |
492 | | } |
493 | | |
494 | | inline uint16_t gbswap_16(uint16_t host_int) { |
495 | | #if defined(__GLIBC__) |
496 | | return bswap_16(host_int); |
497 | | #else |
498 | | return (((host_int & uint16_t{0xFF}) << 8) | |
499 | | ((host_int & uint16_t{0xFF00}) >> 8)); |
500 | | #endif |
501 | | } |
502 | | |
503 | | #endif // intrinics available |
504 | | |
505 | | #ifdef PHMAP_IS_LITTLE_ENDIAN |
506 | | |
507 | | // Definitions for ntohl etc. that don't require us to include |
508 | | // netinet/in.h. We wrap gbswap_32 and gbswap_16 in functions rather |
509 | | // than just #defining them because in debug mode, gcc doesn't |
510 | | // correctly handle the (rather involved) definitions of bswap_32. |
511 | | // gcc guarantees that inline functions are as fast as macros, so |
512 | | // this isn't a performance hit. |
513 | 0 | inline uint16_t ghtons(uint16_t x) { return gbswap_16(x); } |
514 | 0 | inline uint32_t ghtonl(uint32_t x) { return gbswap_32(x); } |
515 | 0 | inline uint64_t ghtonll(uint64_t x) { return gbswap_64(x); } |
516 | | |
517 | | #elif defined PHMAP_IS_BIG_ENDIAN |
518 | | |
519 | | // These definitions are simpler on big-endian machines |
520 | | // These are functions instead of macros to avoid self-assignment warnings |
521 | | // on calls such as "i = ghtnol(i);". This also provides type checking. |
522 | | inline uint16_t ghtons(uint16_t x) { return x; } |
523 | | inline uint32_t ghtonl(uint32_t x) { return x; } |
524 | | inline uint64_t ghtonll(uint64_t x) { return x; } |
525 | | |
526 | | #else |
527 | | #error \ |
528 | | "Unsupported byte order: Either PHMAP_IS_BIG_ENDIAN or " \ |
529 | | "PHMAP_IS_LITTLE_ENDIAN must be defined" |
530 | | #endif // byte order |
531 | | |
532 | 0 | inline uint16_t gntohs(uint16_t x) { return ghtons(x); } |
533 | 0 | inline uint32_t gntohl(uint32_t x) { return ghtonl(x); } |
534 | 0 | inline uint64_t gntohll(uint64_t x) { return ghtonll(x); } |
535 | | |
536 | | // Utilities to convert numbers between the current hosts's native byte |
537 | | // order and little-endian byte order |
538 | | // |
539 | | // Load/Store methods are alignment safe |
540 | | namespace little_endian { |
541 | | // Conversion functions. |
542 | | #ifdef PHMAP_IS_LITTLE_ENDIAN |
543 | | |
544 | 0 | inline uint16_t FromHost16(uint16_t x) { return x; } |
545 | 0 | inline uint16_t ToHost16(uint16_t x) { return x; } |
546 | | |
547 | 0 | inline uint32_t FromHost32(uint32_t x) { return x; } |
548 | 0 | inline uint32_t ToHost32(uint32_t x) { return x; } |
549 | | |
550 | 0 | inline uint64_t FromHost64(uint64_t x) { return x; } |
551 | 0 | inline uint64_t ToHost64(uint64_t x) { return x; } |
552 | | |
553 | 0 | inline constexpr bool IsLittleEndian() { return true; } |
554 | | |
555 | | #elif defined PHMAP_IS_BIG_ENDIAN |
556 | | |
557 | | inline uint16_t FromHost16(uint16_t x) { return gbswap_16(x); } |
558 | | inline uint16_t ToHost16(uint16_t x) { return gbswap_16(x); } |
559 | | |
560 | | inline uint32_t FromHost32(uint32_t x) { return gbswap_32(x); } |
561 | | inline uint32_t ToHost32(uint32_t x) { return gbswap_32(x); } |
562 | | |
563 | | inline uint64_t FromHost64(uint64_t x) { return gbswap_64(x); } |
564 | | inline uint64_t ToHost64(uint64_t x) { return gbswap_64(x); } |
565 | | |
566 | | inline constexpr bool IsLittleEndian() { return false; } |
567 | | |
568 | | #endif /* ENDIAN */ |
569 | | |
570 | | // Functions to do unaligned loads and stores in little-endian order. |
571 | | // ------------------------------------------------------------------ |
572 | 0 | inline uint16_t Load16(const void *p) { |
573 | 0 | return ToHost16(PHMAP_INTERNAL_UNALIGNED_LOAD16(p)); |
574 | 0 | } |
575 | | |
576 | 0 | inline void Store16(void *p, uint16_t v) { |
577 | 0 | PHMAP_INTERNAL_UNALIGNED_STORE16(p, FromHost16(v)); |
578 | 0 | } |
579 | | |
580 | 0 | inline uint32_t Load32(const void *p) { |
581 | 0 | return ToHost32(PHMAP_INTERNAL_UNALIGNED_LOAD32(p)); |
582 | 0 | } |
583 | | |
584 | 0 | inline void Store32(void *p, uint32_t v) { |
585 | 0 | PHMAP_INTERNAL_UNALIGNED_STORE32(p, FromHost32(v)); |
586 | 0 | } |
587 | | |
588 | 0 | inline uint64_t Load64(const void *p) { |
589 | 0 | return ToHost64(PHMAP_INTERNAL_UNALIGNED_LOAD64(p)); |
590 | 0 | } |
591 | | |
592 | 0 | inline void Store64(void *p, uint64_t v) { |
593 | 0 | PHMAP_INTERNAL_UNALIGNED_STORE64(p, FromHost64(v)); |
594 | 0 | } |
595 | | |
596 | | } // namespace little_endian |
597 | | |
598 | | // Utilities to convert numbers between the current hosts's native byte |
599 | | // order and big-endian byte order (same as network byte order) |
600 | | // |
601 | | // Load/Store methods are alignment safe |
602 | | namespace big_endian { |
603 | | #ifdef PHMAP_IS_LITTLE_ENDIAN |
604 | | |
605 | 0 | inline uint16_t FromHost16(uint16_t x) { return gbswap_16(x); } |
606 | 0 | inline uint16_t ToHost16(uint16_t x) { return gbswap_16(x); } |
607 | | |
608 | 0 | inline uint32_t FromHost32(uint32_t x) { return gbswap_32(x); } |
609 | 0 | inline uint32_t ToHost32(uint32_t x) { return gbswap_32(x); } |
610 | | |
611 | 0 | inline uint64_t FromHost64(uint64_t x) { return gbswap_64(x); } |
612 | 0 | inline uint64_t ToHost64(uint64_t x) { return gbswap_64(x); } |
613 | | |
614 | 0 | inline constexpr bool IsLittleEndian() { return true; } |
615 | | |
616 | | #elif defined PHMAP_IS_BIG_ENDIAN |
617 | | |
618 | | inline uint16_t FromHost16(uint16_t x) { return x; } |
619 | | inline uint16_t ToHost16(uint16_t x) { return x; } |
620 | | |
621 | | inline uint32_t FromHost32(uint32_t x) { return x; } |
622 | | inline uint32_t ToHost32(uint32_t x) { return x; } |
623 | | |
624 | | inline uint64_t FromHost64(uint64_t x) { return x; } |
625 | | inline uint64_t ToHost64(uint64_t x) { return x; } |
626 | | |
627 | | inline constexpr bool IsLittleEndian() { return false; } |
628 | | |
629 | | #endif /* ENDIAN */ |
630 | | |
631 | | // Functions to do unaligned loads and stores in big-endian order. |
632 | 0 | inline uint16_t Load16(const void *p) { |
633 | 0 | return ToHost16(PHMAP_INTERNAL_UNALIGNED_LOAD16(p)); |
634 | 0 | } |
635 | | |
636 | 0 | inline void Store16(void *p, uint16_t v) { |
637 | 0 | PHMAP_INTERNAL_UNALIGNED_STORE16(p, FromHost16(v)); |
638 | 0 | } |
639 | | |
640 | 0 | inline uint32_t Load32(const void *p) { |
641 | 0 | return ToHost32(PHMAP_INTERNAL_UNALIGNED_LOAD32(p)); |
642 | 0 | } |
643 | | |
644 | 0 | inline void Store32(void *p, uint32_t v) { |
645 | 0 | PHMAP_INTERNAL_UNALIGNED_STORE32(p, FromHost32(v)); |
646 | 0 | } |
647 | | |
648 | 0 | inline uint64_t Load64(const void *p) { |
649 | 0 | return ToHost64(PHMAP_INTERNAL_UNALIGNED_LOAD64(p)); |
650 | 0 | } |
651 | | |
652 | 0 | inline void Store64(void *p, uint64_t v) { |
653 | 0 | PHMAP_INTERNAL_UNALIGNED_STORE64(p, FromHost64(v)); |
654 | 0 | } |
655 | | |
656 | | } // namespace big_endian |
657 | | |
658 | | } // namespace phmap |
659 | | |
660 | | #ifdef _MSC_VER |
661 | | #pragma warning(pop) |
662 | | #endif |
663 | | |
664 | | #endif // phmap_bits_h_guard_ |