Coverage Report

Created: 2024-07-27 06:53

/src/rocksdb/util/hash.cc
Line
Count
Source (jump to first uncovered line)
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under both the GPLv2 (found in the
3
//  COPYING file in the root directory) and Apache 2.0 License
4
//  (found in the LICENSE.Apache file in the root directory).
5
//
6
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7
// Use of this source code is governed by a BSD-style license that can be
8
// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10
#include "util/hash.h"
11
12
#include <string>
13
14
#include "port/lang.h"
15
#include "util/coding.h"
16
#include "util/hash128.h"
17
#include "util/math128.h"
18
#include "util/xxhash.h"
19
#include "util/xxph3.h"
20
21
namespace ROCKSDB_NAMESPACE {
22
23
uint64_t (*kGetSliceNPHash64UnseededFnPtr)(const Slice&) = &GetSliceHash64;
24
25
404k
uint32_t Hash(const char* data, size_t n, uint32_t seed) {
26
  // MurmurHash1 - fast but mediocre quality
27
  // https://github.com/aappleby/smhasher/wiki/MurmurHash1
28
  //
29
404k
  const uint32_t m = 0xc6a4a793;
30
404k
  const uint32_t r = 24;
31
404k
  const char* limit = data + n;
32
404k
  uint32_t h = static_cast<uint32_t>(seed ^ (n * m));
33
34
  // Pick up four bytes at a time
35
1.61M
  while (data + 4 <= limit) {
36
1.21M
    uint32_t w = DecodeFixed32(data);
37
1.21M
    data += 4;
38
1.21M
    h += w;
39
1.21M
    h *= m;
40
1.21M
    h ^= (h >> 16);
41
1.21M
  }
42
43
  // Pick up remaining bytes
44
404k
  switch (limit - data) {
45
    // Note: The original hash implementation used data[i] << shift, which
46
    // promotes the char to int and then performs the shift. If the char is
47
    // negative, the shift is undefined behavior in C++. The hash algorithm is
48
    // part of the format definition, so we cannot change it; to obtain the same
49
    // behavior in a legal way we just cast to uint32_t, which will do
50
    // sign-extension. To guarantee compatibility with architectures where chars
51
    // are unsigned we first cast the char to int8_t.
52
0
    case 3:
53
0
      h += static_cast<uint32_t>(static_cast<int8_t>(data[2])) << 16;
54
0
      FALLTHROUGH_INTENDED;
55
0
    case 2:
56
0
      h += static_cast<uint32_t>(static_cast<int8_t>(data[1])) << 8;
57
0
      FALLTHROUGH_INTENDED;
58
0
    case 1:
59
0
      h += static_cast<uint32_t>(static_cast<int8_t>(data[0]));
60
0
      h *= m;
61
0
      h ^= (h >> r);
62
0
      break;
63
404k
  }
64
404k
  return h;
65
404k
}
66
67
// We are standardizing on a preview release of XXH3, because that's
68
// the best available at time of standardizing.
69
//
70
// In testing (mostly Intel Skylake), this hash function is much more
71
// thorough than Hash32 and is almost universally faster. Hash() only
72
// seems faster when passing runtime-sized keys of the same small size
73
// (less than about 24 bytes) thousands of times in a row; this seems
74
// to allow the branch predictor to work some magic. XXH3's speed is
75
// much less dependent on branch prediction.
76
//
77
// Hashing with a prefix extractor is potentially a common case of
78
// hashing objects of small, predictable size. We could consider
79
// bundling hash functions specialized for particular lengths with
80
// the prefix extractors.
81
13.8M
uint64_t Hash64(const char* data, size_t n, uint64_t seed) {
82
13.8M
  return XXPH3_64bits_withSeed(data, n, seed);
83
13.8M
}
84
85
0
uint64_t Hash64(const char* data, size_t n) {
86
  // Same as seed = 0
87
0
  return XXPH3_64bits(data, n);
88
0
}
89
90
0
uint64_t GetSlicePartsNPHash64(const SliceParts& data, uint64_t seed) {
91
  // TODO(ajkr): use XXH3 streaming APIs to avoid the copy/allocation.
92
0
  size_t concat_len = 0;
93
0
  for (int i = 0; i < data.num_parts; ++i) {
94
0
    concat_len += data.parts[i].size();
95
0
  }
96
0
  std::string concat_data;
97
0
  concat_data.reserve(concat_len);
98
0
  for (int i = 0; i < data.num_parts; ++i) {
99
0
    concat_data.append(data.parts[i].data(), data.parts[i].size());
100
0
  }
101
0
  assert(concat_data.size() == concat_len);
102
0
  return NPHash64(concat_data.data(), concat_len, seed);
103
0
}
104
105
0
Unsigned128 Hash128(const char* data, size_t n, uint64_t seed) {
106
0
  auto h = XXH3_128bits_withSeed(data, n, seed);
107
0
  return (Unsigned128{h.high64} << 64) | (h.low64);
108
0
}
109
110
0
Unsigned128 Hash128(const char* data, size_t n) {
111
  // Same as seed = 0
112
0
  auto h = XXH3_128bits(data, n);
113
0
  return (Unsigned128{h.high64} << 64) | (h.low64);
114
0
}
115
116
4
void Hash2x64(const char* data, size_t n, uint64_t* high64, uint64_t* low64) {
117
  // Same as seed = 0
118
4
  auto h = XXH3_128bits(data, n);
119
4
  *high64 = h.high64;
120
4
  *low64 = h.low64;
121
4
}
122
123
void Hash2x64(const char* data, size_t n, uint64_t seed, uint64_t* high64,
124
82.7k
              uint64_t* low64) {
125
82.7k
  auto h = XXH3_128bits_withSeed(data, n, seed);
126
82.7k
  *high64 = h.high64;
127
82.7k
  *low64 = h.low64;
128
82.7k
}
129
130
namespace {
131
132
0
inline uint64_t XXH3_avalanche(uint64_t h64) {
133
0
  h64 ^= h64 >> 37;
134
0
  h64 *= 0x165667919E3779F9U;
135
0
  h64 ^= h64 >> 32;
136
0
  return h64;
137
0
}
138
139
0
inline uint64_t XXH3_unavalanche(uint64_t h64) {
140
0
  h64 ^= h64 >> 32;
141
0
  h64 *= 0x8da8ee41d6df849U;  // inverse of 0x165667919E3779F9U
142
0
  h64 ^= h64 >> 37;
143
0
  return h64;
144
0
}
145
146
}  // namespace
147
148
void BijectiveHash2x64(uint64_t in_high64, uint64_t in_low64, uint64_t seed,
149
0
                       uint64_t* out_high64, uint64_t* out_low64) {
150
  // Adapted from XXH3_len_9to16_128b
151
0
  const uint64_t bitflipl = /*secret part*/ 0x59973f0033362349U - seed;
152
0
  const uint64_t bitfliph = /*secret part*/ 0xc202797692d63d58U + seed;
153
0
  Unsigned128 tmp128 =
154
0
      Multiply64to128(in_low64 ^ in_high64 ^ bitflipl, 0x9E3779B185EBCA87U);
155
0
  uint64_t lo = Lower64of128(tmp128);
156
0
  uint64_t hi = Upper64of128(tmp128);
157
0
  lo += 0x3c0000000000000U;  // (len - 1) << 54
158
0
  in_high64 ^= bitfliph;
159
0
  hi += in_high64 + (Lower32of64(in_high64) * uint64_t{0x85EBCA76});
160
0
  lo ^= EndianSwapValue(hi);
161
0
  tmp128 = Multiply64to128(lo, 0xC2B2AE3D27D4EB4FU);
162
0
  lo = Lower64of128(tmp128);
163
0
  hi = Upper64of128(tmp128) + (hi * 0xC2B2AE3D27D4EB4FU);
164
0
  *out_low64 = XXH3_avalanche(lo);
165
0
  *out_high64 = XXH3_avalanche(hi);
166
0
}
167
168
void BijectiveUnhash2x64(uint64_t in_high64, uint64_t in_low64, uint64_t seed,
169
0
                         uint64_t* out_high64, uint64_t* out_low64) {
170
  // Inverted above (also consulting XXH3_len_9to16_128b)
171
0
  const uint64_t bitflipl = /*secret part*/ 0x59973f0033362349U - seed;
172
0
  const uint64_t bitfliph = /*secret part*/ 0xc202797692d63d58U + seed;
173
0
  uint64_t lo = XXH3_unavalanche(in_low64);
174
0
  uint64_t hi = XXH3_unavalanche(in_high64);
175
0
  lo *= 0xba79078168d4baf;  // inverse of 0xC2B2AE3D27D4EB4FU
176
0
  hi -= Upper64of128(Multiply64to128(lo, 0xC2B2AE3D27D4EB4FU));
177
0
  hi *= 0xba79078168d4baf;  // inverse of 0xC2B2AE3D27D4EB4FU
178
0
  lo ^= EndianSwapValue(hi);
179
0
  lo -= 0x3c0000000000000U;
180
0
  lo *= 0x887493432badb37U;  // inverse of 0x9E3779B185EBCA87U
181
0
  hi -= Upper64of128(Multiply64to128(lo, 0x9E3779B185EBCA87U));
182
0
  uint32_t tmp32 = Lower32of64(hi) * 0xb6c92f47;  // inverse of 0x85EBCA77
183
0
  hi -= tmp32;
184
0
  hi = (hi & 0xFFFFFFFF00000000U) -
185
0
       ((tmp32 * uint64_t{0x85EBCA76}) & 0xFFFFFFFF00000000U) + tmp32;
186
0
  hi ^= bitfliph;
187
0
  lo ^= hi ^ bitflipl;
188
0
  *out_high64 = hi;
189
0
  *out_low64 = lo;
190
0
}
191
192
void BijectiveHash2x64(uint64_t in_high64, uint64_t in_low64,
193
0
                       uint64_t* out_high64, uint64_t* out_low64) {
194
0
  BijectiveHash2x64(in_high64, in_low64, /*seed*/ 0, out_high64, out_low64);
195
0
}
196
197
void BijectiveUnhash2x64(uint64_t in_high64, uint64_t in_low64,
198
0
                         uint64_t* out_high64, uint64_t* out_low64) {
199
0
  BijectiveUnhash2x64(in_high64, in_low64, /*seed*/ 0, out_high64, out_low64);
200
0
}
201
}  // namespace ROCKSDB_NAMESPACE