/src/abseil-cpp/absl/hash/internal/hash.cc
Line  | Count  | Source  | 
1  |  | // Copyright 2018 The Abseil Authors.  | 
2  |  | //  | 
3  |  | // Licensed under the Apache License, Version 2.0 (the "License");  | 
4  |  | // you may not use this file except in compliance with the License.  | 
5  |  | // You may obtain a copy of the License at  | 
6  |  | //  | 
7  |  | //      https://www.apache.org/licenses/LICENSE-2.0  | 
8  |  | //  | 
9  |  | // Unless required by applicable law or agreed to in writing, software  | 
10  |  | // distributed under the License is distributed on an "AS IS" BASIS,  | 
11  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
12  |  | // See the License for the specific language governing permissions and  | 
13  |  | // limitations under the License.  | 
14  |  |  | 
15  |  | #include "absl/hash/internal/hash.h"  | 
16  |  |  | 
17  |  | #include <cassert>  | 
18  |  | #include <cstddef>  | 
19  |  | #include <cstdint>  | 
20  |  | #include <type_traits>  | 
21  |  |  | 
22  |  | #include "absl/base/attributes.h"  | 
23  |  | #include "absl/base/config.h"  | 
24  |  | #include "absl/base/internal/unaligned_access.h"  | 
25  |  | #include "absl/base/optimization.h"  | 
26  |  | #include "absl/base/prefetch.h"  | 
27  |  | #include "absl/hash/internal/city.h"  | 
28  |  |  | 
29  |  | namespace absl { | 
30  |  | ABSL_NAMESPACE_BEGIN  | 
31  |  | namespace hash_internal { | 
32  |  |  | 
33  |  | namespace { | 
34  |  |  | 
35  | 36  | uint64_t Mix32Bytes(const uint8_t* ptr, uint64_t current_state) { | 
36  | 36  |   uint64_t a = absl::base_internal::UnalignedLoad64(ptr);  | 
37  | 36  |   uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8);  | 
38  | 36  |   uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16);  | 
39  | 36  |   uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24);  | 
40  |  |  | 
41  | 36  |   uint64_t cs0 = Mix(a ^ kStaticRandomData[1], b ^ current_state);  | 
42  | 36  |   uint64_t cs1 = Mix(c ^ kStaticRandomData[2], d ^ current_state);  | 
43  | 36  |   return cs0 ^ cs1;  | 
44  | 36  | }  | 
45  |  |  | 
46  |  | [[maybe_unused]] uint64_t LowLevelHashLenGt32(const void* data, size_t len,  | 
47  | 18  |                                               uint64_t seed) { | 
48  | 18  |   assert(len > 32);  | 
49  | 18  |   const uint8_t* ptr = static_cast<const uint8_t*>(data);  | 
50  | 18  |   uint64_t current_state = seed ^ kStaticRandomData[0] ^ len;  | 
51  | 18  |   const uint8_t* last_32_ptr = ptr + len - 32;  | 
52  |  |  | 
53  | 18  |   if (len > 64) { | 
54  |  |     // If we have more than 64 bytes, we're going to handle chunks of 64  | 
55  |  |     // bytes at a time. We're going to build up four separate hash states  | 
56  |  |     // which we will then hash together. This avoids short dependency chains.  | 
57  | 0  |     uint64_t duplicated_state0 = current_state;  | 
58  | 0  |     uint64_t duplicated_state1 = current_state;  | 
59  | 0  |     uint64_t duplicated_state2 = current_state;  | 
60  |  | 
  | 
61  | 0  |     do { | 
62  | 0  |       PrefetchToLocalCache(ptr + 5 * ABSL_CACHELINE_SIZE);  | 
63  |  | 
  | 
64  | 0  |       uint64_t a = absl::base_internal::UnalignedLoad64(ptr);  | 
65  | 0  |       uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8);  | 
66  | 0  |       uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16);  | 
67  | 0  |       uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24);  | 
68  | 0  |       uint64_t e = absl::base_internal::UnalignedLoad64(ptr + 32);  | 
69  | 0  |       uint64_t f = absl::base_internal::UnalignedLoad64(ptr + 40);  | 
70  | 0  |       uint64_t g = absl::base_internal::UnalignedLoad64(ptr + 48);  | 
71  | 0  |       uint64_t h = absl::base_internal::UnalignedLoad64(ptr + 56);  | 
72  |  | 
  | 
73  | 0  |       current_state = Mix(a ^ kStaticRandomData[1], b ^ current_state);  | 
74  | 0  |       duplicated_state0 = Mix(c ^ kStaticRandomData[2], d ^ duplicated_state0);  | 
75  |  | 
  | 
76  | 0  |       duplicated_state1 = Mix(e ^ kStaticRandomData[3], f ^ duplicated_state1);  | 
77  | 0  |       duplicated_state2 = Mix(g ^ kStaticRandomData[4], h ^ duplicated_state2);  | 
78  |  | 
  | 
79  | 0  |       ptr += 64;  | 
80  | 0  |       len -= 64;  | 
81  | 0  |     } while (len > 64);  | 
82  |  | 
  | 
83  | 0  |     current_state = (current_state ^ duplicated_state0) ^  | 
84  | 0  |                     (duplicated_state1 + duplicated_state2);  | 
85  | 0  |   }  | 
86  |  |  | 
87  |  |   // We now have a data `ptr` with at most 64 bytes and the current state  | 
88  |  |   // of the hashing state machine stored in current_state.  | 
89  | 18  |   if (len > 32) { | 
90  | 18  |     current_state = Mix32Bytes(ptr, current_state);  | 
91  | 18  |   }  | 
92  |  |  | 
93  |  |   // We now have a data `ptr` with at most 32 bytes and the current state  | 
94  |  |   // of the hashing state machine stored in current_state. But we can  | 
95  |  |   // safely read from `ptr + len - 32`.  | 
96  | 18  |   return Mix32Bytes(last_32_ptr, current_state);  | 
97  | 18  | }  | 
98  |  |  | 
99  |  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn32Bit(  | 
100  | 0  |     const unsigned char* data, size_t len, uint64_t state) { | 
101  | 0  |   // TODO(b/417141985): expose and use CityHash32WithSeed.  | 
102  | 0  |   // Note: we can't use PrecombineLengthMix here because len can be up to 1024.  | 
103  | 0  |   return CombineRawImpl(  | 
104  | 0  |       state + len,  | 
105  | 0  |       hash_internal::CityHash32(reinterpret_cast<const char*>(data), len));  | 
106  | 0  | }  | 
107  |  |  | 
108  |  | ABSL_ATTRIBUTE_NOINLINE uint64_t  | 
109  | 0  | SplitAndCombineOn32Bit(const unsigned char* first, size_t len, uint64_t state) { | 
110  | 0  |   while (len >= PiecewiseChunkSize()) { | 
111  | 0  |     state = HashBlockOn32Bit(first, PiecewiseChunkSize(), state);  | 
112  | 0  |     len -= PiecewiseChunkSize();  | 
113  | 0  |     first += PiecewiseChunkSize();  | 
114  | 0  |   }  | 
115  | 0  |   // Do not call CombineContiguousImpl for empty range since it is modifying  | 
116  | 0  |   // state.  | 
117  | 0  |   if (len == 0) { | 
118  | 0  |     return state;  | 
119  | 0  |   }  | 
120  | 0  |   // Handle the remainder.  | 
121  | 0  |   return CombineContiguousImpl(state, first, len,  | 
122  | 0  |                                std::integral_constant<int, 4>{}); | 
123  | 0  | }  | 
124  |  |  | 
125  |  | ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn64Bit(  | 
126  | 18  |     const unsigned char* data, size_t len, uint64_t state) { | 
127  | 18  | #ifdef ABSL_HAVE_INTRINSIC_INT128  | 
128  | 18  |   return LowLevelHashLenGt32(data, len, state);  | 
129  |  | #else  | 
130  |  |   return hash_internal::CityHash64WithSeed(reinterpret_cast<const char*>(data),  | 
131  |  |                                            len, state);  | 
132  |  | #endif  | 
133  | 18  | }  | 
134  |  |  | 
135  |  | ABSL_ATTRIBUTE_NOINLINE uint64_t  | 
136  | 0  | SplitAndCombineOn64Bit(const unsigned char* first, size_t len, uint64_t state) { | 
137  | 0  |   while (len >= PiecewiseChunkSize()) { | 
138  | 0  |     state = HashBlockOn64Bit(first, PiecewiseChunkSize(), state);  | 
139  | 0  |     len -= PiecewiseChunkSize();  | 
140  | 0  |     first += PiecewiseChunkSize();  | 
141  | 0  |   }  | 
142  |  |   // Do not call CombineContiguousImpl for empty range since it is modifying  | 
143  |  |   // state.  | 
144  | 0  |   if (len == 0) { | 
145  | 0  |     return state;  | 
146  | 0  |   }  | 
147  |  |   // Handle the remainder.  | 
148  | 0  |   return CombineContiguousImpl(state, first, len,  | 
149  | 0  |                                std::integral_constant<int, 8>{}); | 
150  | 0  | }  | 
151  |  |  | 
152  |  | }  // namespace  | 
153  |  |  | 
154  |  | uint64_t CombineLargeContiguousImplOn32BitLengthGt8(const unsigned char* first,  | 
155  |  |                                                     size_t len,  | 
156  | 0  |                                                     uint64_t state) { | 
157  | 0  |   assert(len > 8);  | 
158  | 0  |   assert(sizeof(size_t) == 4);  // NOLINT(misc-static-assert)  | 
159  | 0  |   if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) { | 
160  | 0  |     return HashBlockOn32Bit(first, len, state);  | 
161  | 0  |   }  | 
162  | 0  |   return SplitAndCombineOn32Bit(first, len, state);  | 
163  | 0  | }  | 
164  |  |  | 
165  |  | uint64_t CombineLargeContiguousImplOn64BitLengthGt32(const unsigned char* first,  | 
166  |  |                                                      size_t len,  | 
167  | 18  |                                                      uint64_t state) { | 
168  | 18  |   assert(len > 32);  | 
169  | 18  |   assert(sizeof(size_t) == 8);  // NOLINT(misc-static-assert)  | 
170  | 18  |   if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) { | 
171  | 18  |     return HashBlockOn64Bit(first, len, state);  | 
172  | 18  |   }  | 
173  | 0  |   return SplitAndCombineOn64Bit(first, len, state);  | 
174  | 18  | }  | 
175  |  |  | 
176  |  | ABSL_CONST_INIT const void* const MixingHashState::kSeed = &kSeed;  | 
177  |  |  | 
178  |  | }  // namespace hash_internal  | 
179  |  | ABSL_NAMESPACE_END  | 
180  |  | }  // namespace absl  |