/src/rocksdb/util/murmurhash.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | // |
6 | | /* |
7 | | Murmurhash from http://sites.google.com/site/murmurhash/ |
8 | | |
9 | | All code is released to the public domain. For business purposes, Murmurhash |
10 | | is under the MIT license. |
11 | | */ |
12 | | #include "murmurhash.h" |
13 | | |
14 | | #include "port/lang.h" |
15 | | |
16 | | #if defined(__x86_64__) |
17 | | |
18 | | // ------------------------------------------------------------------- |
19 | | // |
20 | | // The same caveats as 32-bit MurmurHash2 apply here - beware of alignment |
21 | | // and endian-ness issues if used across multiple platforms. |
22 | | // |
23 | | // 64-bit hash for 64-bit platforms |
24 | | |
25 | | #ifdef ROCKSDB_UBSAN_RUN |
26 | | #if defined(__clang__) |
27 | | __attribute__((__no_sanitize__("alignment"))) |
28 | | #elif defined(__GNUC__) |
29 | | __attribute__((__no_sanitize_undefined__)) |
30 | | #endif |
31 | | #endif |
32 | | // clang-format off |
33 | | uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed ) |
34 | 0 | { |
35 | 0 | const uint64_t m = 0xc6a4a7935bd1e995; |
36 | 0 | const int r = 47; |
37 | |
|
38 | 0 | uint64_t h = seed ^ (len * m); |
39 | |
|
40 | 0 | const uint64_t * data = (const uint64_t *)key; |
41 | 0 | const uint64_t * end = data + (len/8); |
42 | |
|
43 | 0 | while(data != end) |
44 | 0 | { |
45 | 0 | uint64_t k = *data++; |
46 | |
|
47 | 0 | k *= m; |
48 | 0 | k ^= k >> r; |
49 | 0 | k *= m; |
50 | |
|
51 | 0 | h ^= k; |
52 | 0 | h *= m; |
53 | 0 | } |
54 | |
|
55 | 0 | const unsigned char * data2 = (const unsigned char*)data; |
56 | |
|
57 | 0 | switch(len & 7) |
58 | 0 | { |
59 | 0 | case 7: h ^= ((uint64_t)data2[6]) << 48; FALLTHROUGH_INTENDED; |
60 | 0 | case 6: h ^= ((uint64_t)data2[5]) << 40; FALLTHROUGH_INTENDED; |
61 | 0 | case 5: h ^= ((uint64_t)data2[4]) << 32; FALLTHROUGH_INTENDED; |
62 | 0 | case 4: h ^= ((uint64_t)data2[3]) << 24; FALLTHROUGH_INTENDED; |
63 | 0 | case 3: h ^= ((uint64_t)data2[2]) << 16; FALLTHROUGH_INTENDED; |
64 | 0 | case 2: h ^= ((uint64_t)data2[1]) << 8; FALLTHROUGH_INTENDED; |
65 | 0 | case 1: h ^= ((uint64_t)data2[0]); |
66 | 0 | h *= m; |
67 | 0 | } |
68 | |
|
69 | 0 | h ^= h >> r; |
70 | 0 | h *= m; |
71 | 0 | h ^= h >> r; |
72 | |
|
73 | 0 | return h; |
74 | 0 | } |
75 | | // clang-format on |
76 | | |
77 | | #elif defined(__i386__) |
78 | | |
79 | | // ------------------------------------------------------------------- |
80 | | // |
81 | | // Note - This code makes a few assumptions about how your machine behaves - |
82 | | // |
83 | | // 1. We can read a 4-byte value from any address without crashing |
84 | | // 2. sizeof(int) == 4 |
85 | | // |
86 | | // And it has a few limitations - |
87 | | // |
88 | | // 1. It will not work incrementally. |
89 | | // 2. It will not produce the same results on little-endian and big-endian |
90 | | // machines. |
91 | | // clang-format off |
92 | | unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed ) |
93 | | { |
94 | | // 'm' and 'r' are mixing constants generated offline. |
95 | | // They're not really 'magic', they just happen to work well. |
96 | | |
97 | | const unsigned int m = 0x5bd1e995; |
98 | | const int r = 24; |
99 | | |
100 | | // Initialize the hash to a 'random' value |
101 | | |
102 | | unsigned int h = seed ^ len; |
103 | | |
104 | | // Mix 4 bytes at a time into the hash |
105 | | |
106 | | const unsigned char * data = (const unsigned char *)key; |
107 | | |
108 | | while(len >= 4) |
109 | | { |
110 | | unsigned int k = *(unsigned int *)data; |
111 | | |
112 | | k *= m; |
113 | | k ^= k >> r; |
114 | | k *= m; |
115 | | |
116 | | h *= m; |
117 | | h ^= k; |
118 | | |
119 | | data += 4; |
120 | | len -= 4; |
121 | | } |
122 | | |
123 | | // Handle the last few bytes of the input array |
124 | | |
125 | | switch(len) |
126 | | { |
127 | | case 3: h ^= data[2] << 16; FALLTHROUGH_INTENDED; |
128 | | case 2: h ^= data[1] << 8; FALLTHROUGH_INTENDED; |
129 | | case 1: h ^= data[0]; |
130 | | h *= m; |
131 | | }; |
132 | | |
133 | | // Do a few final mixes of the hash to ensure the last few |
134 | | // bytes are well-incorporated. |
135 | | |
136 | | h ^= h >> 13; |
137 | | h *= m; |
138 | | h ^= h >> 15; |
139 | | |
140 | | return h; |
141 | | } |
142 | | // clang-format on |
143 | | |
144 | | #else |
145 | | |
146 | | // ------------------------------------------------------------------- |
147 | | // |
148 | | // Same as MurmurHash2, but endian- and alignment-neutral. |
149 | | // Half the speed though, alas. |
150 | | // clang-format off |
151 | | unsigned int MurmurHashNeutral2 ( const void * key, int len, unsigned int seed ) |
152 | | { |
153 | | const unsigned int m = 0x5bd1e995; |
154 | | const int r = 24; |
155 | | |
156 | | unsigned int h = seed ^ len; |
157 | | |
158 | | const unsigned char * data = (const unsigned char *)key; |
159 | | |
160 | | while(len >= 4) |
161 | | { |
162 | | unsigned int k; |
163 | | |
164 | | k = data[0]; |
165 | | k |= data[1] << 8; |
166 | | k |= data[2] << 16; |
167 | | k |= data[3] << 24; |
168 | | |
169 | | k *= m; |
170 | | k ^= k >> r; |
171 | | k *= m; |
172 | | |
173 | | h *= m; |
174 | | h ^= k; |
175 | | |
176 | | data += 4; |
177 | | len -= 4; |
178 | | } |
179 | | |
180 | | switch(len) |
181 | | { |
182 | | case 3: h ^= data[2] << 16; FALLTHROUGH_INTENDED; |
183 | | case 2: h ^= data[1] << 8; FALLTHROUGH_INTENDED; |
184 | | case 1: h ^= data[0]; |
185 | | h *= m; |
186 | | }; |
187 | | |
188 | | h ^= h >> 13; |
189 | | h *= m; |
190 | | h ^= h >> 15; |
191 | | |
192 | | return h; |
193 | | } |
194 | | // clang-format on |
195 | | |
196 | | #endif |