Coverage Report

Created: 2025-12-31 06:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/abseil-cpp/absl/hash/internal/hash.cc
Line
Count
Source
1
// Copyright 2018 The Abseil Authors.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//      https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "absl/hash/internal/hash.h"
16
17
#include <cassert>
18
#include <cstddef>
19
#include <cstdint>
20
#include <type_traits>
21
22
#include "absl/base/attributes.h"
23
#include "absl/base/config.h"
24
#include "absl/base/internal/unaligned_access.h"
25
#include "absl/base/optimization.h"
26
#include "absl/base/prefetch.h"
27
#include "absl/hash/internal/city.h"
28
29
#ifdef ABSL_AES_INTERNAL_HAVE_X86_SIMD
30
#error ABSL_AES_INTERNAL_HAVE_X86_SIMD cannot be directly set
31
#elif defined(__SSE4_2__) && defined(__AES__)
32
#define ABSL_AES_INTERNAL_HAVE_X86_SIMD
33
#endif
34
35
36
#ifdef ABSL_AES_INTERNAL_HAVE_X86_SIMD
37
#include <smmintrin.h>
38
#include <wmmintrin.h>
39
#include <xmmintrin.h>
40
#endif  // ABSL_AES_INTERNAL_HAVE_X86_SIMD
41
42
namespace absl {
43
ABSL_NAMESPACE_BEGIN
44
namespace hash_internal {
45
46
namespace {
47
48
27.8M
void PrefetchFutureDataToLocalCache(const uint8_t* ptr) {
49
27.8M
  PrefetchToLocalCache(ptr + 5 * ABSL_CACHELINE_SIZE);
50
27.8M
}
51
52
#ifdef ABSL_AES_INTERNAL_HAVE_X86_SIMD
53
uint64_t Mix4x16Vectors(__m128i a, __m128i b, __m128i c, __m128i d) {
54
  // res128 = encrypt(a + c, d) + decrypt(b - d, a)
55
  auto res128 = _mm_add_epi64(_mm_aesenc_si128(_mm_add_epi64(a, c), d),
56
                              _mm_aesdec_si128(_mm_sub_epi64(b, d), a));
57
  auto x64 = static_cast<uint64_t>(_mm_cvtsi128_si64(res128));
58
  auto y64 = static_cast<uint64_t>(_mm_extract_epi64(res128, 1));
59
  return x64 ^ y64;
60
}
61
62
uint64_t LowLevelHash33To64(uint64_t seed, const uint8_t* ptr, size_t len) {
63
  assert(len > 32);
64
  assert(len <= 64);
65
  __m128i state =
66
      _mm_set_epi64x(static_cast<int64_t>(seed), static_cast<int64_t>(len));
67
  auto a = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr));
68
  auto b = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr + 16));
69
  auto* last32_ptr = ptr + len - 32;
70
  auto c = _mm_loadu_si128(reinterpret_cast<const __m128i*>(last32_ptr));
71
  auto d = _mm_loadu_si128(reinterpret_cast<const __m128i*>(last32_ptr + 16));
72
73
  // Bits of the second argument to _mm_aesdec_si128/_mm_aesenc_si128 are
74
  // XORed with the state argument after encryption.
75
  // We use each value as the first argument to shuffle all the bits around.
76
  // We do not add any salt to the state or loaded data, instead we vary
77
  // instructions used to mix bits _mm_aesdec_si128/_mm_aesenc_si128 and
78
  // _mm_add_epi64/_mm_sub_epi64.
79
  // _mm_add_epi64/_mm_sub_epi64 are combined to one instruction with data
80
  // loading like `vpaddq  xmm1, xmm0, xmmword ptr [rdi]`.
81
  auto na = _mm_aesdec_si128(_mm_add_epi64(state, a), state);
82
  auto nb = _mm_aesdec_si128(_mm_sub_epi64(state, b), state);
83
  auto nc = _mm_aesenc_si128(_mm_add_epi64(state, c), state);
84
  auto nd = _mm_aesenc_si128(_mm_sub_epi64(state, d), state);
85
86
  // We perform another round of encryption to mix bits between two halves of
87
  // the input.
88
  return Mix4x16Vectors(na, nb, nc, nd);
89
}
90
91
[[maybe_unused]] ABSL_ATTRIBUTE_NOINLINE uint64_t
92
LowLevelHashLenGt64(uint64_t seed, const void* data, size_t len) {
93
  assert(len > 64);
94
  const uint8_t* ptr = static_cast<const uint8_t*>(data);
95
  const uint8_t* last_32_ptr = ptr + len - 32;
96
97
  // If we have more than 64 bytes, we're going to handle chunks of 64
98
  // bytes at a time. We're going to build up four separate hash states
99
  // which we will then hash together. This avoids short dependency chains.
100
  __m128i state0 =
101
      _mm_set_epi64x(static_cast<int64_t>(seed), static_cast<int64_t>(len));
102
  __m128i state1 = state0;
103
  __m128i state2 = state1;
104
  __m128i state3 = state2;
105
106
  // Mixing two 128-bit vectors at a time with corresponding states.
107
  // All variables are mixed slightly differently to avoid hash collision
108
  // due to trivial byte rotation.
109
  // We combine state and data with _mm_add_epi64/_mm_sub_epi64 before applying
110
  // AES encryption to make hash function dependent on the order of the blocks.
111
  // See comments in LowLevelHash33To64 for more considerations.
112
  auto mix_ab = [&state0,
113
                 &state1](const uint8_t* p) ABSL_ATTRIBUTE_ALWAYS_INLINE {
114
    // i128 a = *p;
115
    // i128 b = *(p + 16);
116
    // state0 = decrypt(state0 + a, state0);
117
    // state1 = decrypt(state1 - b, state1);
118
    auto a = _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
119
    auto b = _mm_loadu_si128(reinterpret_cast<const __m128i*>(p + 16));
120
    state0 = _mm_aesdec_si128(_mm_add_epi64(state0, a), state0);
121
    state1 = _mm_aesdec_si128(_mm_sub_epi64(state1, b), state1);
122
  };
123
  auto mix_cd = [&state2,
124
                 &state3](const uint8_t* p) ABSL_ATTRIBUTE_ALWAYS_INLINE {
125
    // i128 c = *p;
126
    // i128 d = *(p + 16);
127
    // state2 = encrypt(state2 + c, state2);
128
    // state3 = encrypt(state3 - d, state3);
129
    auto c = _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
130
    auto d = _mm_loadu_si128(reinterpret_cast<const __m128i*>(p + 16));
131
    state2 = _mm_aesenc_si128(_mm_add_epi64(state2, c), state2);
132
    state3 = _mm_aesenc_si128(_mm_sub_epi64(state3, d), state3);
133
  };
134
135
  do {
136
    PrefetchFutureDataToLocalCache(ptr);
137
    mix_ab(ptr);
138
    mix_cd(ptr + 32);
139
140
    ptr += 64;
141
    len -= 64;
142
  } while (len > 64);
143
144
  // We now have a data `ptr` with at most 64 bytes.
145
  if (len > 32) {
146
    mix_ab(ptr);
147
  }
148
  mix_cd(last_32_ptr);
149
150
  return Mix4x16Vectors(state0, state1, state2, state3);
151
}
152
#else
153
4.09M
uint64_t Mix32Bytes(const uint8_t* ptr, uint64_t current_state) {
154
4.09M
  uint64_t a = absl::base_internal::UnalignedLoad64(ptr);
155
4.09M
  uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8);
156
4.09M
  uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16);
157
4.09M
  uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24);
158
159
4.09M
  uint64_t cs0 = Mix(a ^ kStaticRandomData[1], b ^ current_state);
160
4.09M
  uint64_t cs1 = Mix(c ^ kStaticRandomData[2], d ^ current_state);
161
4.09M
  return cs0 ^ cs1;
162
4.09M
}
163
164
81.4k
uint64_t LowLevelHash33To64(uint64_t seed, const uint8_t* ptr, size_t len) {
165
81.4k
  assert(len > 32);
166
81.4k
  assert(len <= 64);
167
81.4k
  uint64_t current_state = seed ^ kStaticRandomData[0] ^ len;
168
81.4k
  const uint8_t* last_32_ptr = ptr + len - 32;
169
81.4k
  return Mix32Bytes(last_32_ptr, Mix32Bytes(ptr, current_state));
170
81.4k
}
171
172
[[maybe_unused]] ABSL_ATTRIBUTE_NOINLINE uint64_t
173
2.03M
LowLevelHashLenGt64(uint64_t seed, const void* data, size_t len) {
174
2.03M
  assert(len > 64);
175
2.03M
  const uint8_t* ptr = static_cast<const uint8_t*>(data);
176
2.03M
  uint64_t current_state = seed ^ kStaticRandomData[0] ^ len;
177
2.03M
  const uint8_t* last_32_ptr = ptr + len - 32;
178
  // If we have more than 64 bytes, we're going to handle chunks of 64
179
  // bytes at a time. We're going to build up four separate hash states
180
  // which we will then hash together. This avoids short dependency chains.
181
2.03M
  uint64_t duplicated_state0 = current_state;
182
2.03M
  uint64_t duplicated_state1 = current_state;
183
2.03M
  uint64_t duplicated_state2 = current_state;
184
185
27.8M
  do {
186
27.8M
    PrefetchFutureDataToLocalCache(ptr);
187
188
27.8M
    uint64_t a = absl::base_internal::UnalignedLoad64(ptr);
189
27.8M
    uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8);
190
27.8M
    uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16);
191
27.8M
    uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24);
192
27.8M
    uint64_t e = absl::base_internal::UnalignedLoad64(ptr + 32);
193
27.8M
    uint64_t f = absl::base_internal::UnalignedLoad64(ptr + 40);
194
27.8M
    uint64_t g = absl::base_internal::UnalignedLoad64(ptr + 48);
195
27.8M
    uint64_t h = absl::base_internal::UnalignedLoad64(ptr + 56);
196
197
27.8M
    current_state = Mix(a ^ kStaticRandomData[1], b ^ current_state);
198
27.8M
    duplicated_state0 = Mix(c ^ kStaticRandomData[2], d ^ duplicated_state0);
199
200
27.8M
    duplicated_state1 = Mix(e ^ kStaticRandomData[3], f ^ duplicated_state1);
201
27.8M
    duplicated_state2 = Mix(g ^ kStaticRandomData[4], h ^ duplicated_state2);
202
203
27.8M
    ptr += 64;
204
27.8M
    len -= 64;
205
27.8M
  } while (len > 64);
206
207
2.03M
  current_state = (current_state ^ duplicated_state0) ^
208
2.03M
                  (duplicated_state1 + duplicated_state2);
209
  // We now have a data `ptr` with at most 64 bytes and the current state
210
  // of the hashing state machine stored in current_state.
211
2.03M
  if (len > 32) {
212
1.89M
    current_state = Mix32Bytes(ptr, current_state);
213
1.89M
  }
214
215
  // We now have a data `ptr` with at most 32 bytes and the current state
216
  // of the hashing state machine stored in current_state. But we can
217
  // safely read from `ptr + len - 32`.
218
2.03M
  return Mix32Bytes(last_32_ptr, current_state);
219
2.03M
}
220
#endif  // ABSL_AES_INTERNAL_HAVE_X86_SIMD
221
222
[[maybe_unused]] uint64_t LowLevelHashLenGt32(uint64_t seed, const void* data,
223
2.11M
                                              size_t len) {
224
2.11M
  assert(len > 32);
225
2.11M
  if (ABSL_PREDICT_FALSE(len > 64)) {
226
2.03M
    return LowLevelHashLenGt64(seed, data, len);
227
2.03M
  }
228
81.4k
  return LowLevelHash33To64(seed, static_cast<const uint8_t*>(data), len);
229
2.11M
}
230
231
ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn32Bit(
232
0
    uint64_t state, const unsigned char* data, size_t len) {
233
0
  // TODO(b/417141985): expose and use CityHash32WithSeed.
234
0
  // Note: we can't use PrecombineLengthMix here because len can be up to 1024.
235
0
  return CombineRawImpl(
236
0
      state + len,
237
0
      hash_internal::CityHash32(reinterpret_cast<const char*>(data), len));
238
0
}
239
240
ABSL_ATTRIBUTE_NOINLINE uint64_t
241
0
SplitAndCombineOn32Bit(uint64_t state, const unsigned char* first, size_t len) {
242
0
  while (len >= PiecewiseChunkSize()) {
243
0
    state = HashBlockOn32Bit(state, first, PiecewiseChunkSize());
244
0
    len -= PiecewiseChunkSize();
245
0
    first += PiecewiseChunkSize();
246
0
  }
247
0
  // Do not call CombineContiguousImpl for empty range since it is modifying
248
0
  // state.
249
0
  if (len == 0) {
250
0
    return state;
251
0
  }
252
0
  // Handle the remainder.
253
0
  return CombineContiguousImpl(state, first, len,
254
0
                               std::integral_constant<int, 4>{});
255
0
}
256
257
ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn64Bit(
258
2.11M
    uint64_t state, const unsigned char* data, size_t len) {
259
2.11M
#ifdef ABSL_HAVE_INTRINSIC_INT128
260
2.11M
  return LowLevelHashLenGt32(state, data, len);
261
#else
262
  return hash_internal::CityHash64WithSeed(reinterpret_cast<const char*>(data),
263
                                           len, state);
264
#endif
265
2.11M
}
266
267
ABSL_ATTRIBUTE_NOINLINE uint64_t
268
153k
SplitAndCombineOn64Bit(uint64_t state, const unsigned char* first, size_t len) {
269
1.92M
  while (len >= PiecewiseChunkSize()) {
270
1.76M
    state = HashBlockOn64Bit(state, first, PiecewiseChunkSize());
271
1.76M
    len -= PiecewiseChunkSize();
272
1.76M
    first += PiecewiseChunkSize();
273
1.76M
  }
274
  // Do not call CombineContiguousImpl for empty range since it is modifying
275
  // state.
276
153k
  if (len == 0) {
277
490
    return state;
278
490
  }
279
  // Handle the remainder.
280
153k
  return CombineContiguousImpl(state, first, len,
281
153k
                               std::integral_constant<int, 8>{});
282
153k
}
283
284
}  // namespace
285
286
uint64_t CombineLargeContiguousImplOn32BitLengthGt8(uint64_t state,
287
                                                    const unsigned char* first,
288
0
                                                    size_t len) {
289
0
  assert(len > 8);
290
0
  assert(sizeof(size_t) == 4);  // NOLINT(misc-static-assert)
291
0
  if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) {
292
0
    return HashBlockOn32Bit(state, first, len);
293
0
  }
294
0
  return SplitAndCombineOn32Bit(state, first, len);
295
0
}
296
297
uint64_t CombineLargeContiguousImplOn64BitLengthGt32(uint64_t state,
298
                                                     const unsigned char* first,
299
499k
                                                     size_t len) {
300
499k
  assert(len > 32);
301
499k
  assert(sizeof(size_t) == 8);  // NOLINT(misc-static-assert)
302
499k
  if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) {
303
345k
    return HashBlockOn64Bit(state, first, len);
304
345k
  }
305
153k
  return SplitAndCombineOn64Bit(state, first, len);
306
499k
}
307
308
ABSL_CONST_INIT const void* const MixingHashState::kSeed = &kSeed;
309
310
}  // namespace hash_internal
311
ABSL_NAMESPACE_END
312
}  // namespace absl