/src/abseil-cpp/absl/hash/internal/hash.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2018 The Abseil Authors. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "absl/hash/internal/hash.h" |
16 | | |
17 | | #include <cassert> |
18 | | #include <cstddef> |
19 | | #include <cstdint> |
20 | | #include <type_traits> |
21 | | |
22 | | #include "absl/base/attributes.h" |
23 | | #include "absl/base/config.h" |
24 | | #include "absl/base/internal/unaligned_access.h" |
25 | | #include "absl/base/optimization.h" |
26 | | #include "absl/base/prefetch.h" |
27 | | #include "absl/hash/internal/city.h" |
28 | | |
29 | | namespace absl { |
30 | | ABSL_NAMESPACE_BEGIN |
31 | | namespace hash_internal { |
32 | | |
33 | | namespace { |
34 | | |
35 | 4.40M | uint64_t Mix32Bytes(const uint8_t* ptr, uint64_t current_state) { |
36 | 4.40M | uint64_t a = absl::base_internal::UnalignedLoad64(ptr); |
37 | 4.40M | uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8); |
38 | 4.40M | uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16); |
39 | 4.40M | uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24); |
40 | | |
41 | 4.40M | uint64_t cs0 = Mix(a ^ kStaticRandomData[1], b ^ current_state); |
42 | 4.40M | uint64_t cs1 = Mix(c ^ kStaticRandomData[2], d ^ current_state); |
43 | 4.40M | return cs0 ^ cs1; |
44 | 4.40M | } |
45 | | |
46 | | [[maybe_unused]] uint64_t LowLevelHashLenGt32(const void* data, size_t len, |
47 | 2.30M | uint64_t seed) { |
48 | 2.30M | assert(len > 32); |
49 | 2.30M | const uint8_t* ptr = static_cast<const uint8_t*>(data); |
50 | 2.30M | uint64_t current_state = seed ^ kStaticRandomData[0] ^ len; |
51 | 2.30M | const uint8_t* last_32_ptr = ptr + len - 32; |
52 | | |
53 | 2.30M | if (len > 64) { |
54 | | // If we have more than 64 bytes, we're going to handle chunks of 64 |
55 | | // bytes at a time. We're going to build up four separate hash states |
56 | | // which we will then hash together. This avoids short dependency chains. |
57 | 2.22M | uint64_t duplicated_state0 = current_state; |
58 | 2.22M | uint64_t duplicated_state1 = current_state; |
59 | 2.22M | uint64_t duplicated_state2 = current_state; |
60 | | |
61 | 29.0M | do { |
62 | | // Always prefetch the next cacheline. |
63 | 29.0M | PrefetchToLocalCache(ptr + ABSL_CACHELINE_SIZE); |
64 | | |
65 | 29.0M | uint64_t a = absl::base_internal::UnalignedLoad64(ptr); |
66 | 29.0M | uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8); |
67 | 29.0M | uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16); |
68 | 29.0M | uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24); |
69 | 29.0M | uint64_t e = absl::base_internal::UnalignedLoad64(ptr + 32); |
70 | 29.0M | uint64_t f = absl::base_internal::UnalignedLoad64(ptr + 40); |
71 | 29.0M | uint64_t g = absl::base_internal::UnalignedLoad64(ptr + 48); |
72 | 29.0M | uint64_t h = absl::base_internal::UnalignedLoad64(ptr + 56); |
73 | | |
74 | 29.0M | current_state = Mix(a ^ kStaticRandomData[1], b ^ current_state); |
75 | 29.0M | duplicated_state0 = Mix(c ^ kStaticRandomData[2], d ^ duplicated_state0); |
76 | | |
77 | 29.0M | duplicated_state1 = Mix(e ^ kStaticRandomData[3], f ^ duplicated_state1); |
78 | 29.0M | duplicated_state2 = Mix(g ^ kStaticRandomData[4], h ^ duplicated_state2); |
79 | | |
80 | 29.0M | ptr += 64; |
81 | 29.0M | len -= 64; |
82 | 29.0M | } while (len > 64); |
83 | | |
84 | 2.22M | current_state = (current_state ^ duplicated_state0) ^ |
85 | 2.22M | (duplicated_state1 + duplicated_state2); |
86 | 2.22M | } |
87 | | |
88 | | // We now have a data `ptr` with at most 64 bytes and the current state |
89 | | // of the hashing state machine stored in current_state. |
90 | 2.30M | if (len > 32) { |
91 | 2.09M | current_state = Mix32Bytes(ptr, current_state); |
92 | 2.09M | } |
93 | | |
94 | | // We now have a data `ptr` with at most 32 bytes and the current state |
95 | | // of the hashing state machine stored in current_state. But we can |
96 | | // safely read from `ptr + len - 32`. |
97 | 2.30M | return Mix32Bytes(last_32_ptr, current_state); |
98 | 2.30M | } |
99 | | |
100 | | ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn32Bit( |
101 | 0 | const unsigned char* data, size_t len, uint64_t state) { |
102 | 0 | // TODO(b/417141985): expose and use CityHash32WithSeed. |
103 | 0 | // Note: we can't use PrecombineLengthMix here because len can be up to 1024. |
104 | 0 | return CombineRawImpl( |
105 | 0 | state + len, |
106 | 0 | hash_internal::CityHash32(reinterpret_cast<const char*>(data), len)); |
107 | 0 | } |
108 | | |
109 | | ABSL_ATTRIBUTE_NOINLINE uint64_t |
110 | 0 | SplitAndCombineOn32Bit(const unsigned char* first, size_t len, uint64_t state) { |
111 | 0 | while (len >= PiecewiseChunkSize()) { |
112 | 0 | state = HashBlockOn32Bit(first, PiecewiseChunkSize(), state); |
113 | 0 | len -= PiecewiseChunkSize(); |
114 | 0 | first += PiecewiseChunkSize(); |
115 | 0 | } |
116 | 0 | // Do not call CombineContiguousImpl for empty range since it is modifying |
117 | 0 | // state. |
118 | 0 | if (len == 0) { |
119 | 0 | return state; |
120 | 0 | } |
121 | 0 | // Handle the remainder. |
122 | 0 | return CombineContiguousImpl(state, first, len, |
123 | 0 | std::integral_constant<int, 4>{}); |
124 | 0 | } |
125 | | |
126 | | ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn64Bit( |
127 | 2.30M | const unsigned char* data, size_t len, uint64_t state) { |
128 | 2.30M | #ifdef ABSL_HAVE_INTRINSIC_INT128 |
129 | 2.30M | return LowLevelHashLenGt32(data, len, state); |
130 | | #else |
131 | | return hash_internal::CityHash64WithSeed(reinterpret_cast<const char*>(data), |
132 | | len, state); |
133 | | #endif |
134 | 2.30M | } |
135 | | |
136 | | ABSL_ATTRIBUTE_NOINLINE uint64_t |
137 | 170k | SplitAndCombineOn64Bit(const unsigned char* first, size_t len, uint64_t state) { |
138 | 1.95M | while (len >= PiecewiseChunkSize()) { |
139 | 1.78M | state = HashBlockOn64Bit(first, PiecewiseChunkSize(), state); |
140 | 1.78M | len -= PiecewiseChunkSize(); |
141 | 1.78M | first += PiecewiseChunkSize(); |
142 | 1.78M | } |
143 | | // Do not call CombineContiguousImpl for empty range since it is modifying |
144 | | // state. |
145 | 170k | if (len == 0) { |
146 | 383 | return state; |
147 | 383 | } |
148 | | // Handle the remainder. |
149 | 170k | return CombineContiguousImpl(state, first, len, |
150 | 170k | std::integral_constant<int, 8>{}); |
151 | 170k | } |
152 | | |
153 | | } // namespace |
154 | | |
155 | | uint64_t CombineLargeContiguousImplOn32BitLengthGt8(const unsigned char* first, |
156 | | size_t len, |
157 | 0 | uint64_t state) { |
158 | 0 | assert(len > 8); |
159 | 0 | assert(sizeof(size_t) == 4); // NOLINT(misc-static-assert) |
160 | 0 | if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) { |
161 | 0 | return HashBlockOn32Bit(first, len, state); |
162 | 0 | } |
163 | 0 | return SplitAndCombineOn32Bit(first, len, state); |
164 | 0 | } |
165 | | |
166 | | uint64_t CombineLargeContiguousImplOn64BitLengthGt32(const unsigned char* first, |
167 | | size_t len, |
168 | 692k | uint64_t state) { |
169 | 692k | assert(len > 32); |
170 | 692k | assert(sizeof(size_t) == 8); // NOLINT(misc-static-assert) |
171 | 692k | if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) { |
172 | 521k | return HashBlockOn64Bit(first, len, state); |
173 | 521k | } |
174 | 170k | return SplitAndCombineOn64Bit(first, len, state); |
175 | 692k | } |
176 | | |
177 | | ABSL_CONST_INIT const void* const MixingHashState::kSeed = &kSeed; |
178 | | |
179 | | } // namespace hash_internal |
180 | | ABSL_NAMESPACE_END |
181 | | } // namespace absl |