/src/abseil-cpp/absl/hash/internal/hash.cc

Source
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "absl/hash/internal/hash.h"

#include <cassert>
#include <cstddef>
#include <cstdint>
#include <type_traits>

#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/unaligned_access.h"
#include "absl/base/optimization.h"
#include "absl/base/prefetch.h"
#include "absl/hash/internal/city.h"

#ifdef ABSL_AES_INTERNAL_HAVE_X86_SIMD
#error ABSL_AES_INTERNAL_HAVE_X86_SIMD cannot be directly set
#elif defined(__SSE4_2__) && defined(__AES__)
#define ABSL_AES_INTERNAL_HAVE_X86_SIMD
#endif


#ifdef ABSL_AES_INTERNAL_HAVE_X86_SIMD
#include <smmintrin.h>
#include <wmmintrin.h>
#include <xmmintrin.h>
#endif  // ABSL_AES_INTERNAL_HAVE_X86_SIMD

#ifdef ABSL_AES_INTERNAL_HAVE_ARM_SIMD
#error ABSL_AES_INTERNAL_HAVE_ARM_SIMD cannot be directly set
#elif defined(ABSL_INTERNAL_HAVE_ARM_NEON) && defined(__ARM_FEATURE_CRYPTO)
#include <arm_neon.h>
#define ABSL_AES_INTERNAL_HAVE_ARM_SIMD
#endif  // ABSL_INTERNAL_HAVE_ARM_NEON

namespace absl {
ABSL_NAMESPACE_BEGIN
namespace hash_internal {

namespace {

void PrefetchFutureDataToLocalCache(const uint8_t* ptr) {
  PrefetchToLocalCache(ptr + 5 * ABSL_CACHELINE_SIZE);
}

#if defined(ABSL_AES_INTERNAL_HAVE_X86_SIMD) || \
    defined(ABSL_AES_INTERNAL_HAVE_ARM_SIMD)

#if defined(ABSL_AES_INTERNAL_HAVE_X86_SIMD)
using Vector128 = __m128i;

inline Vector128 Load128(const uint8_t* ptr) {
  return _mm_loadu_si128(reinterpret_cast<const Vector128*>(ptr));
}

inline Vector128 Set128(uint64_t a, uint64_t b) {
  return _mm_set_epi64x(static_cast<int64_t>(a), static_cast<int64_t>(b));
}

inline Vector128 Add128(Vector128 a, Vector128 b) {
  return _mm_add_epi64(a, b);
}

inline Vector128 Sub128(Vector128 a, Vector128 b) {
  return _mm_sub_epi64(a, b);
}

// Bits of the second argument to Encrypt128/Decrypt128 are XORed with the
// first argument after encryption/decryption.

inline Vector128 Encrypt128(Vector128 data, Vector128 key) {
  return _mm_aesenc_si128(data, key);
}

inline Vector128 Decrypt128(Vector128 data, Vector128 key) {
  return _mm_aesdec_si128(data, key);
}

// We use each value as the first argument to shuffle all the bits around. We do
// not add any salt to the state or loaded data, instead we vary instructions
// used to mix bits Encrypt128/Decrypt128 and Add128/Sub128. On x86,
// Add128/Sub128 are combined to one instruction with data loading like
// `vpaddq xmm1, xmm0, xmmword ptr [rdi]`.

inline Vector128 MixA(Vector128 a, Vector128 state) {
  return Decrypt128(Add128(state, a), state);
}

inline Vector128 MixB(Vector128 b, Vector128 state) {
  return Decrypt128(Sub128(state, b), state);
}

inline Vector128 MixC(Vector128 c, Vector128 state) {
  return Encrypt128(Add128(state, c), state);
}

inline Vector128 MixD(Vector128 d, Vector128 state) {
  return Encrypt128(Sub128(state, d), state);
}

inline uint64_t ExtractLow64(Vector128 v) {
  return static_cast<uint64_t>(_mm_cvtsi128_si64(v));
}

inline uint64_t ExtractHigh64(Vector128 v) {
  return static_cast<uint64_t>(_mm_extract_epi64(v, 1));
}

inline uint64_t Mix4x16Vectors(Vector128 a, Vector128 b, Vector128 c,
                               Vector128 d) {
  Vector128 res128 =
      Add128(Encrypt128(Add128(a, c), d), Decrypt128(Sub128(b, d), a));
  uint64_t x64 = ExtractLow64(res128);
  uint64_t y64 = ExtractHigh64(res128);
  return x64 ^ y64;
}

#else  // ABSL_AES_INTERNAL_HAVE_ARM_SIMD

using Vector128 = uint8x16_t;

inline Vector128 Load128(const uint8_t* ptr) { return vld1q_u8(ptr); }

inline Vector128 Set128(uint64_t a, uint64_t b) {
  return vreinterpretq_u8_u64(vsetq_lane_u64(a, vdupq_n_u64(b), 1));
}

inline Vector128 Add128(Vector128 a, Vector128 b) {
  return vreinterpretq_u8_u64(
      vaddq_u64(vreinterpretq_u64_u8(a), vreinterpretq_u64_u8(b)));
}

// Bits of the second argument to Decrypt128/Encrypt128 are XORed with the
// state argument BEFORE encryption (in x86 version they are XORed after).

inline Vector128 Encrypt128(Vector128 data, Vector128 key) {
  return vaesmcq_u8(vaeseq_u8(data, key));
}

inline Vector128 Decrypt128(Vector128 data, Vector128 key) {
  return vaesimcq_u8(vaesdq_u8(data, key));
}

// We use decryption for a, b and encryption for c, d. That helps us to avoid
// collisions for trivial byte rotations. Mix4x16Vectors later uses
// encrypted/decrypted pairs differently to ensure that the order of blocks is
// important for the hash value.
// We also avoid using Add128/Sub128 instructions because state is being mixed
// before encryption/decryption. On ARM, there is no fusion of load and add/sub
// instructions so it is more expensive to use them.

inline Vector128 MixA(Vector128 a, Vector128 state) {
  return Decrypt128(a, state);
}

inline Vector128 MixB(Vector128 b, Vector128 state) {
  return Decrypt128(b, state);
}

inline Vector128 MixC(Vector128 c, Vector128 state) {
  return Encrypt128(c, state);
}

inline Vector128 MixD(Vector128 d, Vector128 state) {
  return Encrypt128(d, state);
}

inline uint64_t ExtractLow64(Vector128 v) {
  return vgetq_lane_u64(vreinterpretq_u64_u8(v), 0);
}

inline uint64_t ExtractHigh64(Vector128 v) {
  return vgetq_lane_u64(vreinterpretq_u64_u8(v), 1);
}

uint64_t Mix4x16Vectors(Vector128 a, Vector128 b, Vector128 c, Vector128 d) {
  Vector128 res128 = Add128(Encrypt128(a, c), Decrypt128(b, d));
  uint64_t x64 = ExtractLow64(res128);
  uint64_t y64 = ExtractHigh64(res128);
  return x64 ^ y64;
}

#endif  // ABSL_AES_INTERNAL_HAVE_X86_SIMD

uint64_t LowLevelHash33To64(uint64_t seed, const uint8_t* ptr, size_t len) {
  assert(len > 32);
  assert(len <= 64);
  Vector128 state = Set128(seed, len);
  Vector128 a = Load128(ptr);
  Vector128 b = Load128(ptr + 16);
  auto* last32_ptr = ptr + len - 32;
  Vector128 c = Load128(last32_ptr);
  Vector128 d = Load128(last32_ptr + 16);

  Vector128 na = MixA(a, state);
  Vector128 nb = MixB(b, state);
  Vector128 nc = MixC(c, state);
  Vector128 nd = MixD(d, state);

  // We perform another round of encryption to mix bits between two halves of
  // the input.
  return Mix4x16Vectors(na, nb, nc, nd);
}

[[maybe_unused]] ABSL_ATTRIBUTE_NOINLINE uint64_t
LowLevelHashLenGt64(uint64_t seed, const void* data, size_t len) {
  assert(len > 64);
  const uint8_t* ptr = static_cast<const uint8_t*>(data);
  const uint8_t* last_32_ptr = ptr + len - 32;

  // If we have more than 64 bytes, we're going to handle chunks of 64
  // bytes at a time. We're going to build up four separate hash states
  // which we will then hash together. This avoids short dependency chains.
  Vector128 state0 = Set128(seed, len);
  Vector128 state1 = state0;
  Vector128 state2 = state1;
  Vector128 state3 = state2;

  // Mixing two 128-bit vectors at a time with corresponding states.
  // All variables are mixed slightly differently to avoid hash collision
  // due to trivial byte rotation.
  // We combine state and data with _mm_add_epi64/_mm_sub_epi64 before applying
  // AES encryption to make hash function dependent on the order of the blocks.
  // See comments in LowLevelHash33To64 for more considerations.
  auto mix_ab = [&state0,
                 &state1](const uint8_t* p) ABSL_ATTRIBUTE_ALWAYS_INLINE {
    Vector128 a = Load128(p);
    Vector128 b = Load128(p + 16);
    state0 = MixA(a, state0);
    state1 = MixB(b, state1);
  };
  auto mix_cd = [&state2,
                 &state3](const uint8_t* p) ABSL_ATTRIBUTE_ALWAYS_INLINE {
    Vector128 c = Load128(p);
    Vector128 d = Load128(p + 16);
    state2 = MixC(c, state2);
    state3 = MixD(d, state3);
  };

  do {
    PrefetchFutureDataToLocalCache(ptr);
    mix_ab(ptr);
    mix_cd(ptr + 32);

    ptr += 64;
    len -= 64;
  } while (len > 64);

  // We now have a data `ptr` with at most 64 bytes.
  if (len > 32) {
    mix_ab(ptr);
  }
  mix_cd(last_32_ptr);

  return Mix4x16Vectors(state0, state1, state2, state3);
}
#else
uint64_t Mix32Bytes(const uint8_t* ptr, uint64_t current_state) {
  uint64_t a = absl::base_internal::UnalignedLoad64(ptr);
  uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8);
  uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16);
  uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24);

  uint64_t cs0 = Mix(a ^ kStaticRandomData[1], b ^ current_state);
  uint64_t cs1 = Mix(c ^ kStaticRandomData[2], d ^ current_state);
  return cs0 ^ cs1;
}

uint64_t LowLevelHash33To64(uint64_t seed, const uint8_t* ptr, size_t len) {
  assert(len > 32);
  assert(len <= 64);
  uint64_t current_state = seed ^ kStaticRandomData[0] ^ len;
  const uint8_t* last_32_ptr = ptr + len - 32;
  return Mix32Bytes(last_32_ptr, Mix32Bytes(ptr, current_state));
}

[[maybe_unused]] ABSL_ATTRIBUTE_NOINLINE uint64_t
LowLevelHashLenGt64(uint64_t seed, const void* data, size_t len) {
  assert(len > 64);
  const uint8_t* ptr = static_cast<const uint8_t*>(data);
  uint64_t current_state = seed ^ kStaticRandomData[0] ^ len;
  const uint8_t* last_32_ptr = ptr + len - 32;
  // If we have more than 64 bytes, we're going to handle chunks of 64
  // bytes at a time. We're going to build up four separate hash states
  // which we will then hash together. This avoids short dependency chains.
  uint64_t duplicated_state0 = current_state;
  uint64_t duplicated_state1 = current_state;
  uint64_t duplicated_state2 = current_state;

  do {
    PrefetchFutureDataToLocalCache(ptr);

    uint64_t a = absl::base_internal::UnalignedLoad64(ptr);
    uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8);
    uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16);
    uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24);
    uint64_t e = absl::base_internal::UnalignedLoad64(ptr + 32);
    uint64_t f = absl::base_internal::UnalignedLoad64(ptr + 40);
    uint64_t g = absl::base_internal::UnalignedLoad64(ptr + 48);
    uint64_t h = absl::base_internal::UnalignedLoad64(ptr + 56);

    current_state = Mix(a ^ kStaticRandomData[1], b ^ current_state);
    duplicated_state0 = Mix(c ^ kStaticRandomData[2], d ^ duplicated_state0);

    duplicated_state1 = Mix(e ^ kStaticRandomData[3], f ^ duplicated_state1);
    duplicated_state2 = Mix(g ^ kStaticRandomData[4], h ^ duplicated_state2);

    ptr += 64;
    len -= 64;
  } while (len > 64);

  current_state = (current_state ^ duplicated_state0) ^
                  (duplicated_state1 + duplicated_state2);
  // We now have a data `ptr` with at most 64 bytes and the current state
  // of the hashing state machine stored in current_state.
  if (len > 32) {
    current_state = Mix32Bytes(ptr, current_state);
  }

  // We now have a data `ptr` with at most 32 bytes and the current state
  // of the hashing state machine stored in current_state. But we can
  // safely read from `ptr + len - 32`.
  return Mix32Bytes(last_32_ptr, current_state);
}
#endif  // ABSL_AES_INTERNAL_HAVE_X86_SIMD

[[maybe_unused]] uint64_t LowLevelHashLenGt32(uint64_t seed, const void* data,
                                              size_t len) {
  assert(len > 32);
  if (ABSL_PREDICT_FALSE(len > 64)) {
    return LowLevelHashLenGt64(seed, data, len);
  }
  return LowLevelHash33To64(seed, static_cast<const uint8_t*>(data), len);
}

ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn32Bit(
    uint64_t state, const unsigned char* data, size_t len) {
  // TODO(b/417141985): expose and use CityHash32WithSeed.
  // Note: we can't use PrecombineLengthMix here because len can be up to 1024.
  return CombineRawImpl(
      state + len,
      hash_internal::CityHash32(reinterpret_cast<const char*>(data), len));
}

ABSL_ATTRIBUTE_NOINLINE uint64_t
SplitAndCombineOn32Bit(uint64_t state, const unsigned char* first, size_t len) {
  while (len >= PiecewiseChunkSize()) {
    state = HashBlockOn32Bit(state, first, PiecewiseChunkSize());
    len -= PiecewiseChunkSize();
    first += PiecewiseChunkSize();
  }
  // Do not call CombineContiguousImpl for empty range since it is modifying
  // state.
  if (len == 0) {
    return state;
  }
  // Handle the remainder.
  return CombineContiguousImpl(state, first, len,
                               std::integral_constant<int, 4>{});
}

ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn64Bit(
    uint64_t state, const unsigned char* data, size_t len) {
#ifdef ABSL_HAVE_INTRINSIC_INT128
  return LowLevelHashLenGt32(state, data, len);
#else
  return hash_internal::CityHash64WithSeed(reinterpret_cast<const char*>(data),
                                           len, state);
#endif
}

ABSL_ATTRIBUTE_NOINLINE uint64_t
SplitAndCombineOn64Bit(uint64_t state, const unsigned char* first, size_t len) {
  while (len >= PiecewiseChunkSize()) {
    state = HashBlockOn64Bit(state, first, PiecewiseChunkSize());
    len -= PiecewiseChunkSize();
    first += PiecewiseChunkSize();
  }
  // Do not call CombineContiguousImpl for empty range since it is modifying
  // state.
  if (len == 0) {
    return state;
  }
  // Handle the remainder.
  return CombineContiguousImpl(state, first, len,
                               std::integral_constant<int, 8>{});
}

}  // namespace

uint64_t CombineLargeContiguousImplOn32BitLengthGt8(uint64_t state,
                                                    const unsigned char* first,
                                                    size_t len) {
  assert(len > 8);
  assert(sizeof(size_t) == 4);  // NOLINT(misc-static-assert)
  if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) {
    return HashBlockOn32Bit(state, first, len);
  }
  return SplitAndCombineOn32Bit(state, first, len);
}

uint64_t CombineLargeContiguousImplOn64BitLengthGt32(uint64_t state,
                                                     const unsigned char* first,
                                                     size_t len) {
  assert(len > 32);
  assert(sizeof(size_t) == 8);  // NOLINT(misc-static-assert)
  if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) {
    return HashBlockOn64Bit(state, first, len);
  }
  return SplitAndCombineOn64Bit(state, first, len);
}

ABSL_CONST_INIT const void* const MixingHashState::kSeed = &kSeed;

}  // namespace hash_internal
ABSL_NAMESPACE_END
}  // namespace absl

Coverage Report

Created: 2026-02-14 07:09

Line	Count	Source
1		// Copyright 2018 The Abseil Authors.
2		//
3		// Licensed under the Apache License, Version 2.0 (the "License");
4		// you may not use this file except in compliance with the License.
5		// You may obtain a copy of the License at
6		//
7		// https://www.apache.org/licenses/LICENSE-2.0
8		//
9		// Unless required by applicable law or agreed to in writing, software
10		// distributed under the License is distributed on an "AS IS" BASIS,
11		// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12		// See the License for the specific language governing permissions and
13		// limitations under the License.
14
15		#include "absl/hash/internal/hash.h"
16
17		#include <cassert>
18		#include <cstddef>
19		#include <cstdint>
20		#include <type_traits>
21
22		#include "absl/base/attributes.h"
23		#include "absl/base/config.h"
24		#include "absl/base/internal/unaligned_access.h"
25		#include "absl/base/optimization.h"
26		#include "absl/base/prefetch.h"
27		#include "absl/hash/internal/city.h"
28
29		#ifdef ABSL_AES_INTERNAL_HAVE_X86_SIMD
30		#error ABSL_AES_INTERNAL_HAVE_X86_SIMD cannot be directly set
31		#elif defined(__SSE4_2__) && defined(__AES__)
32		#define ABSL_AES_INTERNAL_HAVE_X86_SIMD
33		#endif
34
35
36		#ifdef ABSL_AES_INTERNAL_HAVE_X86_SIMD
37		#include <smmintrin.h>
38		#include <wmmintrin.h>
39		#include <xmmintrin.h>
40		#endif // ABSL_AES_INTERNAL_HAVE_X86_SIMD
41
42		#ifdef ABSL_AES_INTERNAL_HAVE_ARM_SIMD
43		#error ABSL_AES_INTERNAL_HAVE_ARM_SIMD cannot be directly set
44		#elif defined(ABSL_INTERNAL_HAVE_ARM_NEON) && defined(__ARM_FEATURE_CRYPTO)
45		#include <arm_neon.h>
46		#define ABSL_AES_INTERNAL_HAVE_ARM_SIMD
47		#endif // ABSL_INTERNAL_HAVE_ARM_NEON
48
49		namespace absl {
50		ABSL_NAMESPACE_BEGIN
51		namespace hash_internal {
52
53		namespace {
54
55	14.3M	void PrefetchFutureDataToLocalCache(const uint8_t* ptr) {
56	14.3M	PrefetchToLocalCache(ptr + 5 * ABSL_CACHELINE_SIZE);
57	14.3M	}
58
59		#if defined(ABSL_AES_INTERNAL_HAVE_X86_SIMD) \|\| \
60		defined(ABSL_AES_INTERNAL_HAVE_ARM_SIMD)
61
62		#if defined(ABSL_AES_INTERNAL_HAVE_X86_SIMD)
63		using Vector128 = __m128i;
64
65		inline Vector128 Load128(const uint8_t* ptr) {
66		return _mm_loadu_si128(reinterpret_cast<const Vector128*>(ptr));
67		}
68
69		inline Vector128 Set128(uint64_t a, uint64_t b) {
70		return _mm_set_epi64x(static_cast<int64_t>(a), static_cast<int64_t>(b));
71		}
72
73		inline Vector128 Add128(Vector128 a, Vector128 b) {
74		return _mm_add_epi64(a, b);
75		}
76
77		inline Vector128 Sub128(Vector128 a, Vector128 b) {
78		return _mm_sub_epi64(a, b);
79		}
80
81		// Bits of the second argument to Encrypt128/Decrypt128 are XORed with the
82		// first argument after encryption/decryption.
83
84		inline Vector128 Encrypt128(Vector128 data, Vector128 key) {
85		return _mm_aesenc_si128(data, key);
86		}
87
88		inline Vector128 Decrypt128(Vector128 data, Vector128 key) {
89		return _mm_aesdec_si128(data, key);
90		}
91
92		// We use each value as the first argument to shuffle all the bits around. We do
93		// not add any salt to the state or loaded data, instead we vary instructions
94		// used to mix bits Encrypt128/Decrypt128 and Add128/Sub128. On x86,
95		// Add128/Sub128 are combined to one instruction with data loading like
96		// `vpaddq xmm1, xmm0, xmmword ptr [rdi]`.
97
98		inline Vector128 MixA(Vector128 a, Vector128 state) {
99		return Decrypt128(Add128(state, a), state);
100		}
101
102		inline Vector128 MixB(Vector128 b, Vector128 state) {
103		return Decrypt128(Sub128(state, b), state);
104		}
105
106		inline Vector128 MixC(Vector128 c, Vector128 state) {
107		return Encrypt128(Add128(state, c), state);
108		}
109
110		inline Vector128 MixD(Vector128 d, Vector128 state) {
111		return Encrypt128(Sub128(state, d), state);
112		}
113
114		inline uint64_t ExtractLow64(Vector128 v) {
115		return static_cast<uint64_t>(_mm_cvtsi128_si64(v));
116		}
117
118		inline uint64_t ExtractHigh64(Vector128 v) {
119		return static_cast<uint64_t>(_mm_extract_epi64(v, 1));
120		}
121
122		inline uint64_t Mix4x16Vectors(Vector128 a, Vector128 b, Vector128 c,
123		Vector128 d) {
124		Vector128 res128 =
125		Add128(Encrypt128(Add128(a, c), d), Decrypt128(Sub128(b, d), a));
126		uint64_t x64 = ExtractLow64(res128);
127		uint64_t y64 = ExtractHigh64(res128);
128		return x64 ^ y64;
129		}
130
131		#else // ABSL_AES_INTERNAL_HAVE_ARM_SIMD
132
133		using Vector128 = uint8x16_t;
134
135		inline Vector128 Load128(const uint8_t* ptr) { return vld1q_u8(ptr); }
136
137		inline Vector128 Set128(uint64_t a, uint64_t b) {
138		return vreinterpretq_u8_u64(vsetq_lane_u64(a, vdupq_n_u64(b), 1));
139		}
140
141		inline Vector128 Add128(Vector128 a, Vector128 b) {
142		return vreinterpretq_u8_u64(
143		vaddq_u64(vreinterpretq_u64_u8(a), vreinterpretq_u64_u8(b)));
144		}
145
146		// Bits of the second argument to Decrypt128/Encrypt128 are XORed with the
147		// state argument BEFORE encryption (in x86 version they are XORed after).
148
149		inline Vector128 Encrypt128(Vector128 data, Vector128 key) {
150		return vaesmcq_u8(vaeseq_u8(data, key));
151		}
152
153		inline Vector128 Decrypt128(Vector128 data, Vector128 key) {
154		return vaesimcq_u8(vaesdq_u8(data, key));
155		}
156
157		// We use decryption for a, b and encryption for c, d. That helps us to avoid
158		// collisions for trivial byte rotations. Mix4x16Vectors later uses
159		// encrypted/decrypted pairs differently to ensure that the order of blocks is
160		// important for the hash value.
161		// We also avoid using Add128/Sub128 instructions because state is being mixed
162		// before encryption/decryption. On ARM, there is no fusion of load and add/sub
163		// instructions so it is more expensive to use them.
164
165		inline Vector128 MixA(Vector128 a, Vector128 state) {
166		return Decrypt128(a, state);
167		}
168
169		inline Vector128 MixB(Vector128 b, Vector128 state) {
170		return Decrypt128(b, state);
171		}
172
173		inline Vector128 MixC(Vector128 c, Vector128 state) {
174		return Encrypt128(c, state);
175		}
176
177		inline Vector128 MixD(Vector128 d, Vector128 state) {
178		return Encrypt128(d, state);
179		}
180
181		inline uint64_t ExtractLow64(Vector128 v) {
182		return vgetq_lane_u64(vreinterpretq_u64_u8(v), 0);
183		}
184
185		inline uint64_t ExtractHigh64(Vector128 v) {
186		return vgetq_lane_u64(vreinterpretq_u64_u8(v), 1);
187		}
188
189		uint64_t Mix4x16Vectors(Vector128 a, Vector128 b, Vector128 c, Vector128 d) {
190		Vector128 res128 = Add128(Encrypt128(a, c), Decrypt128(b, d));
191		uint64_t x64 = ExtractLow64(res128);
192		uint64_t y64 = ExtractHigh64(res128);
193		return x64 ^ y64;
194		}
195
196		#endif // ABSL_AES_INTERNAL_HAVE_X86_SIMD
197
198		uint64_t LowLevelHash33To64(uint64_t seed, const uint8_t* ptr, size_t len) {
199		assert(len > 32);
200		assert(len <= 64);
201		Vector128 state = Set128(seed, len);
202		Vector128 a = Load128(ptr);
203		Vector128 b = Load128(ptr + 16);
204		auto* last32_ptr = ptr + len - 32;
205		Vector128 c = Load128(last32_ptr);
206		Vector128 d = Load128(last32_ptr + 16);
207
208		Vector128 na = MixA(a, state);
209		Vector128 nb = MixB(b, state);
210		Vector128 nc = MixC(c, state);
211		Vector128 nd = MixD(d, state);
212
213		// We perform another round of encryption to mix bits between two halves of
214		// the input.
215		return Mix4x16Vectors(na, nb, nc, nd);
216		}
217
218		[[maybe_unused]] ABSL_ATTRIBUTE_NOINLINE uint64_t
219		LowLevelHashLenGt64(uint64_t seed, const void* data, size_t len) {
220		assert(len > 64);
221		const uint8_t* ptr = static_cast<const uint8_t*>(data);
222		const uint8_t* last_32_ptr = ptr + len - 32;
223
224		// If we have more than 64 bytes, we're going to handle chunks of 64
225		// bytes at a time. We're going to build up four separate hash states
226		// which we will then hash together. This avoids short dependency chains.
227		Vector128 state0 = Set128(seed, len);
228		Vector128 state1 = state0;
229		Vector128 state2 = state1;
230		Vector128 state3 = state2;
231
232		// Mixing two 128-bit vectors at a time with corresponding states.
233		// All variables are mixed slightly differently to avoid hash collision
234		// due to trivial byte rotation.
235		// We combine state and data with _mm_add_epi64/_mm_sub_epi64 before applying
236		// AES encryption to make hash function dependent on the order of the blocks.
237		// See comments in LowLevelHash33To64 for more considerations.
238		auto mix_ab = [&state0,
239		&state1](const uint8_t* p) ABSL_ATTRIBUTE_ALWAYS_INLINE {
240		Vector128 a = Load128(p);
241		Vector128 b = Load128(p + 16);
242		state0 = MixA(a, state0);
243		state1 = MixB(b, state1);
244		};
245		auto mix_cd = [&state2,
246		&state3](const uint8_t* p) ABSL_ATTRIBUTE_ALWAYS_INLINE {
247		Vector128 c = Load128(p);
248		Vector128 d = Load128(p + 16);
249		state2 = MixC(c, state2);
250		state3 = MixD(d, state3);
251		};
252
253		do {
254		PrefetchFutureDataToLocalCache(ptr);
255		mix_ab(ptr);
256		mix_cd(ptr + 32);
257
258		ptr += 64;
259		len -= 64;
260		} while (len > 64);
261
262		// We now have a data `ptr` with at most 64 bytes.
263		if (len > 32) {
264		mix_ab(ptr);
265		}
266		mix_cd(last_32_ptr);
267
268		return Mix4x16Vectors(state0, state1, state2, state3);
269		}
270		#else
271	2.42M	uint64_t Mix32Bytes(const uint8_t* ptr, uint64_t current_state) {
272	2.42M	uint64_t a = absl::base_internal::UnalignedLoad64(ptr);
273	2.42M	uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8);
274	2.42M	uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16);
275	2.42M	uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24);
276
277	2.42M	uint64_t cs0 = Mix(a ^ kStaticRandomData[1], b ^ current_state);
278	2.42M	uint64_t cs1 = Mix(c ^ kStaticRandomData[2], d ^ current_state);
279	2.42M	return cs0 ^ cs1;
280	2.42M	}
281
282	61.0k	uint64_t LowLevelHash33To64(uint64_t seed, const uint8_t* ptr, size_t len) {
283	61.0k	assert(len > 32);
284	61.0k	assert(len <= 64);
285	61.0k	uint64_t current_state = seed ^ kStaticRandomData[0] ^ len;
286	61.0k	const uint8_t* last_32_ptr = ptr + len - 32;
287	61.0k	return Mix32Bytes(last_32_ptr, Mix32Bytes(ptr, current_state));
288	61.0k	}
289
290		[[maybe_unused]] ABSL_ATTRIBUTE_NOINLINE uint64_t
291	1.25M	LowLevelHashLenGt64(uint64_t seed, const void* data, size_t len) {
292	1.25M	assert(len > 64);
293	1.25M	const uint8_t* ptr = static_cast<const uint8_t*>(data);
294	1.25M	uint64_t current_state = seed ^ kStaticRandomData[0] ^ len;
295	1.25M	const uint8_t* last_32_ptr = ptr + len - 32;
296		// If we have more than 64 bytes, we're going to handle chunks of 64
297		// bytes at a time. We're going to build up four separate hash states
298		// which we will then hash together. This avoids short dependency chains.
299	1.25M	uint64_t duplicated_state0 = current_state;
300	1.25M	uint64_t duplicated_state1 = current_state;
301	1.25M	uint64_t duplicated_state2 = current_state;
302
303	14.3M	do {
304	14.3M	PrefetchFutureDataToLocalCache(ptr);
305
306	14.3M	uint64_t a = absl::base_internal::UnalignedLoad64(ptr);
307	14.3M	uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8);
308	14.3M	uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16);
309	14.3M	uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24);
310	14.3M	uint64_t e = absl::base_internal::UnalignedLoad64(ptr + 32);
311	14.3M	uint64_t f = absl::base_internal::UnalignedLoad64(ptr + 40);
312	14.3M	uint64_t g = absl::base_internal::UnalignedLoad64(ptr + 48);
313	14.3M	uint64_t h = absl::base_internal::UnalignedLoad64(ptr + 56);
314
315	14.3M	current_state = Mix(a ^ kStaticRandomData[1], b ^ current_state);
316	14.3M	duplicated_state0 = Mix(c ^ kStaticRandomData[2], d ^ duplicated_state0);
317
318	14.3M	duplicated_state1 = Mix(e ^ kStaticRandomData[3], f ^ duplicated_state1);
319	14.3M	duplicated_state2 = Mix(g ^ kStaticRandomData[4], h ^ duplicated_state2);
320
321	14.3M	ptr += 64;
322	14.3M	len -= 64;
323	14.3M	} while (len > 64);
324
325	1.25M	current_state = (current_state ^ duplicated_state0) ^
326	1.25M	(duplicated_state1 + duplicated_state2);
327		// We now have a data `ptr` with at most 64 bytes and the current state
328		// of the hashing state machine stored in current_state.
329	1.25M	if (len > 32) {
330	1.04M	current_state = Mix32Bytes(ptr, current_state);
331	1.04M	}
332
333		// We now have a data `ptr` with at most 32 bytes and the current state
334		// of the hashing state machine stored in current_state. But we can
335		// safely read from `ptr + len - 32`.
336	1.25M	return Mix32Bytes(last_32_ptr, current_state);
337	1.25M	}
338		#endif // ABSL_AES_INTERNAL_HAVE_X86_SIMD
339
340		[[maybe_unused]] uint64_t LowLevelHashLenGt32(uint64_t seed, const void* data,
341	1.32M	size_t len) {
342	1.32M	assert(len > 32);
343	1.32M	if (ABSL_PREDICT_FALSE(len > 64)) {
344	1.25M	return LowLevelHashLenGt64(seed, data, len);
345	1.25M	}
346	61.0k	return LowLevelHash33To64(seed, static_cast<const uint8_t*>(data), len);
347	1.32M	}
348
349		ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn32Bit(
350	0	uint64_t state, const unsigned char* data, size_t len) {
351	0	// TODO(b/417141985): expose and use CityHash32WithSeed.
352	0	// Note: we can't use PrecombineLengthMix here because len can be up to 1024.
353	0	return CombineRawImpl(
354	0	state + len,
355	0	hash_internal::CityHash32(reinterpret_cast<const char*>(data), len));
356	0	}
357
358		ABSL_ATTRIBUTE_NOINLINE uint64_t
359	0	SplitAndCombineOn32Bit(uint64_t state, const unsigned char* first, size_t len) {
360	0	while (len >= PiecewiseChunkSize()) {
361	0	state = HashBlockOn32Bit(state, first, PiecewiseChunkSize());
362	0	len -= PiecewiseChunkSize();
363	0	first += PiecewiseChunkSize();
364	0	}
365	0	// Do not call CombineContiguousImpl for empty range since it is modifying
366	0	// state.
367	0	if (len == 0) {
368	0	return state;
369	0	}
370	0	// Handle the remainder.
371	0	return CombineContiguousImpl(state, first, len,
372	0	std::integral_constant<int, 4>{});
373	0	}
374
375		ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn64Bit(
376	1.32M	uint64_t state, const unsigned char* data, size_t len) {
377	1.32M	#ifdef ABSL_HAVE_INTRINSIC_INT128
378	1.32M	return LowLevelHashLenGt32(state, data, len);
379		#else
380		return hash_internal::CityHash64WithSeed(reinterpret_cast<const char*>(data),
381		len, state);
382		#endif
383	1.32M	}
384
385		ABSL_ATTRIBUTE_NOINLINE uint64_t
386	96.0k	SplitAndCombineOn64Bit(uint64_t state, const unsigned char* first, size_t len) {
387	941k	while (len >= PiecewiseChunkSize()) {
388	845k	state = HashBlockOn64Bit(state, first, PiecewiseChunkSize());
389	845k	len -= PiecewiseChunkSize();
390	845k	first += PiecewiseChunkSize();
391	845k	}
392		// Do not call CombineContiguousImpl for empty range since it is modifying
393		// state.
394	96.0k	if (len == 0) {
395	177	return state;
396	177	}
397		// Handle the remainder.
398	95.8k	return CombineContiguousImpl(state, first, len,
399	95.8k	std::integral_constant<int, 8>{});
400	96.0k	}
401
402		} // namespace
403
404		uint64_t CombineLargeContiguousImplOn32BitLengthGt8(uint64_t state,
405		const unsigned char* first,
406	0	size_t len) {
407	0	assert(len > 8);
408	0	assert(sizeof(size_t) == 4); // NOLINT(misc-static-assert)
409	0	if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) {
410	0	return HashBlockOn32Bit(state, first, len);
411	0	}
412	0	return SplitAndCombineOn32Bit(state, first, len);
413	0	}
414
415		uint64_t CombineLargeContiguousImplOn64BitLengthGt32(uint64_t state,
416		const unsigned char* first,
417	570k	size_t len) {
418	570k	assert(len > 32);
419	570k	assert(sizeof(size_t) == 8); // NOLINT(misc-static-assert)
420	570k	if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) {
421	474k	return HashBlockOn64Bit(state, first, len);
422	474k	}
423	96.0k	return SplitAndCombineOn64Bit(state, first, len);
424	570k	}
425
426		ABSL_CONST_INIT const void* const MixingHashState::kSeed = &kSeed;
427
428		} // namespace hash_internal
429		ABSL_NAMESPACE_END
430		} // namespace absl