/src/php-src/ext/hash/murmur/PMurHash128.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*----------------------------------------------------------------------------- |
2 | | * MurmurHash3 was written by Austin Appleby, and is placed in the public |
3 | | * domain. |
4 | | * |
5 | | * This is a c++ implementation of MurmurHash3_128 with support for progressive |
6 | | * processing based on PMurHash implementation written by Shane Day. |
7 | | */ |
8 | | |
9 | | /*----------------------------------------------------------------------------- |
10 | | |
11 | | If you want to understand the MurmurHash algorithm you would be much better |
12 | | off reading the original source. Just point your browser at: |
13 | | http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp |
14 | | |
15 | | |
16 | | What this version provides? |
17 | | |
18 | | 1. Progressive data feeding. Useful when the entire payload to be hashed |
19 | | does not fit in memory or when the data is streamed through the application. |
20 | | Also useful when hashing a number of strings with a common prefix. A partial |
21 | | hash of a prefix string can be generated and reused for each suffix string. |
22 | | |
23 | | How does it work? |
24 | | |
25 | | We can only process entire 128 bit chunks of input, except for the very end |
26 | | that may be shorter. So along with the partial hash we need to give back to |
27 | | the caller a carry containing up to 15 bytes that we were unable to process. |
28 | | This carry also needs to record the number of bytes the carry holds. I use |
29 | | the low 4 bits as a count (0..15) and the carry bytes are shifted into the |
30 | | high byte in stream order. |
31 | | |
32 | | To handle endianess I simply use a macro that reads an uint and define |
33 | | that macro to be a direct read on little endian machines, a read and swap |
34 | | on big endian machines. |
35 | | |
36 | | -----------------------------------------------------------------------------*/ |
37 | | |
38 | | |
39 | | #include "PMurHash128.h" |
40 | | |
41 | | /*----------------------------------------------------------------------------- |
42 | | * Endianess, misalignment capabilities and util macros |
43 | | * |
44 | | * The following 5 macros are defined in this section. The other macros defined |
45 | | * are only needed to help derive these 5. |
46 | | * |
47 | | * READ_UINT32(x,i) Read a little endian unsigned 32-bit int at index |
48 | | * READ_UINT64(x,i) Read a little endian unsigned 64-bit int at index |
49 | | * UNALIGNED_SAFE Defined if READ_UINTXX works on non-word boundaries |
50 | | * ROTL32(x,r) Rotate x left by r bits |
51 | | * ROTL64(x,r) Rotate x left by r bits |
52 | | * BIG_CONSTANT |
53 | | * FORCE_INLINE |
54 | | */ |
55 | | |
56 | | /* I386 or AMD64 */ |
57 | | #if defined(_M_I86) || defined(_M_IX86) || defined(_X86_) || defined(__i386__) || defined(__i386) || defined(i386) \ |
58 | | || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) |
59 | | #define UNALIGNED_SAFE |
60 | | #endif |
61 | | |
62 | | /* Find best way to ROTL */ |
63 | | #if defined(_MSC_VER) |
64 | | #define FORCE_INLINE static __forceinline |
65 | | #include <stdlib.h> /* Microsoft put _rotl declaration in here */ |
66 | | #define ROTL32(x,y) _rotl(x,y) |
67 | | #define ROTL64(x,y) _rotl64(x,y) |
68 | | #define BIG_CONSTANT(x) (x) |
69 | | #else |
70 | | #define FORCE_INLINE static inline __attribute__((always_inline)) |
71 | | /* gcc recognises this code and generates a rotate instruction for CPUs with one */ |
72 | 206k | #define ROTL32(x,r) (((uint32_t)x << r) | ((uint32_t)x >> (32 - r))) |
73 | 32.3k | #define ROTL64(x,r) (((uint64_t)x << r) | ((uint64_t)x >> (64 - r))) |
74 | 120 | #define BIG_CONSTANT(x) (x##LLU) |
75 | | #endif |
76 | | |
77 | | #include "endianness.h" |
78 | | |
79 | 16.1k | #define READ_UINT64(ptr,i) getblock64((uint64_t *)ptr,i) |
80 | 103k | #define READ_UINT32(ptr,i) getblock32((uint32_t *)ptr,i) |
81 | | |
82 | | //----------------------------------------------------------------------------- |
83 | | // Finalization mix - force all bits of a hash block to avalanche |
84 | | |
85 | | FORCE_INLINE uint32_t fmix32 ( uint32_t h ) |
86 | 168 | { |
87 | 168 | h ^= h >> 16; |
88 | 168 | h *= 0x85ebca6b; |
89 | 168 | h ^= h >> 13; |
90 | 168 | h *= 0xc2b2ae35; |
91 | 168 | h ^= h >> 16; |
92 | | |
93 | 168 | return h; |
94 | 168 | } |
95 | | |
96 | | //---------- |
97 | | |
98 | | FORCE_INLINE uint64_t fmix64 ( uint64_t k ) |
99 | 60 | { |
100 | 60 | k ^= k >> 33; |
101 | 60 | k *= BIG_CONSTANT(0xff51afd7ed558ccd); |
102 | 60 | k ^= k >> 33; |
103 | 60 | k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); |
104 | 60 | k ^= k >> 33; |
105 | | |
106 | 60 | return k; |
107 | 60 | } |
108 | | |
109 | | /*-----------------------------------------------------------------------------* |
110 | | PMurHash128x86 |
111 | | *-----------------------------------------------------------------------------*/ |
112 | | /*----------------------------------------------------------------------------- |
113 | | * Core murmurhash algorithm macros */ |
114 | | |
115 | | static const uint32_t kC1 = 0x239b961b; |
116 | | static const uint32_t kC2 = 0xab0e9789; |
117 | | static const uint32_t kC3 = 0x38b34ae5; |
118 | | static const uint32_t kC4 = 0xa1e38b93; |
119 | | |
120 | | /* This is the main processing body of the algorithm. It operates |
121 | | * on each full 128-bits of input. */ |
122 | 25.8k | #define doblock128x86(h1, h2, h3, h4, k1, k2, k3,k4)\ |
123 | 25.8k | do {\ |
124 | 25.8k | k1 *= kC1; k1 = ROTL32(k1,15); k1 *= kC2; h1 ^= k1;\ |
125 | 25.8k | \ |
126 | 25.8k | h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;\ |
127 | 25.8k | \ |
128 | 25.8k | k2 *= kC2; k2 = ROTL32(k2,16); k2 *= kC3; h2 ^= k2;\ |
129 | 25.8k | \ |
130 | 25.8k | h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;\ |
131 | 25.8k | \ |
132 | 25.8k | k3 *= kC3; k3 = ROTL32(k3,17); k3 *= kC4; h3 ^= k3;\ |
133 | 25.8k | \ |
134 | 25.8k | h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;\ |
135 | 25.8k | \ |
136 | 25.8k | k4 *= kC4; k4 = ROTL32(k4,18); k4 *= kC1; h4 ^= k4;\ |
137 | 25.8k | \ |
138 | 25.8k | h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;\ |
139 | 25.8k | } while(0) |
140 | | |
141 | | /* Append unaligned bytes to carry, forcing hash churn if we have 16 bytes */ |
142 | | /* cnt=bytes to process, h1-h4=hash k1-k4=carry, n=bytes in carry, ptr/len=payload */ |
143 | 59 | #define dobytes128x86(cnt, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len)\ |
144 | 59 | do {\ |
145 | 59 | unsigned __cnt = cnt;\ |
146 | 309 | for(;__cnt--; len--) {\ |
147 | 250 | switch(n) {\ |
148 | 57 | case 0: case 1: case 2: case 3:\ |
149 | 57 | k1 = k1>>8 | (uint32_t)*ptr++<<24;\ |
150 | 57 | ++n; break;\ |
151 | 45 | \ |
152 | 53 | case 4: case 5: case 6: case 7:\ |
153 | 53 | k2 = k2>>8 | (uint32_t)*ptr++<<24;\ |
154 | 53 | ++n; break;\ |
155 | 40 | \ |
156 | 68 | case 8: case 9: case 10: case 11:\ |
157 | 68 | k3 = k3>>8 | (uint32_t)*ptr++<<24;\ |
158 | 68 | ++n; break;\ |
159 | 52 | \ |
160 | 55 | case 12: case 13: case 14:\ |
161 | 55 | k4 = k4>>8 | (uint32_t)*ptr++<<24;\ |
162 | 55 | ++n; break;\ |
163 | 36 | \ |
164 | 36 | case 15:\ |
165 | 17 | k4 = k4>>8 | (uint32_t)*ptr++<<24;\ |
166 | 17 | doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);\ |
167 | 17 | n = 0; break;\ |
168 | 250 | }\ |
169 | 250 | }\ |
170 | 59 | } while(0) |
171 | | |
172 | | /* Finalize a hash. To match the original Murmur3_128x86 the total_length must be provided */ |
173 | | void PMurHash128x86_Result(const uint32_t ph[4], const uint32_t pcarry[4], uint32_t total_length, uint32_t out[4]) |
174 | 42 | { |
175 | 42 | uint32_t h1 = ph[0]; |
176 | 42 | uint32_t h2 = ph[1]; |
177 | 42 | uint32_t h3 = ph[2]; |
178 | 42 | uint32_t h4 = ph[3]; |
179 | | |
180 | 42 | uint32_t k1, k2, k3, k4 = pcarry[3]; |
181 | | |
182 | 42 | int n = k4 & 15; |
183 | 42 | switch(n) { |
184 | 10 | case 1: case 2: case 3: case 4: |
185 | 10 | k1 = pcarry[0] >> (4-n)*8; |
186 | 10 | goto finrot_k1; |
187 | | |
188 | 7 | case 5: case 6: case 7: case 8: |
189 | 7 | k2 = pcarry[1] >> (8-n)*8; |
190 | 7 | goto finrot_k21; |
191 | | |
192 | 6 | case 9: case 10: case 11: case 12: |
193 | 6 | k3 = pcarry[2] >> (12-n)*8; |
194 | 6 | goto finrot_k321; |
195 | | |
196 | 5 | case 13: case 14: case 15: |
197 | 5 | k4 >>= (16-n)*8; |
198 | 5 | goto finrot_k4321; |
199 | | |
200 | 14 | default: |
201 | 14 | goto skiprot; |
202 | 42 | } |
203 | 5 | finrot_k4321: |
204 | 5 | k4 *= kC4; k4 = ROTL32(k4,18); k4 *= kC1; h4 ^= k4; |
205 | 5 | k3 = pcarry[2]; |
206 | 11 | finrot_k321: |
207 | 11 | k3 *= kC3; k3 = ROTL32(k3,17); k3 *= kC4; h3 ^= k3; |
208 | 11 | k2 = pcarry[1]; |
209 | 18 | finrot_k21: |
210 | 18 | k2 *= kC2; k2 = ROTL32(k2,16); k2 *= kC3; h2 ^= k2; |
211 | 18 | k1 = pcarry[0]; |
212 | 28 | finrot_k1: |
213 | 28 | k1 *= kC1; k1 = ROTL32(k1,15); k1 *= kC2; h1 ^= k1; |
214 | 42 | skiprot: |
215 | | |
216 | | //---------- |
217 | | // finalization |
218 | | |
219 | 42 | h1 ^= total_length; h2 ^= total_length; |
220 | 42 | h3 ^= total_length; h4 ^= total_length; |
221 | | |
222 | 42 | h1 += h2; h1 += h3; h1 += h4; |
223 | 42 | h2 += h1; h3 += h1; h4 += h1; |
224 | | |
225 | 42 | h1 = fmix32(h1); |
226 | 42 | h2 = fmix32(h2); |
227 | 42 | h3 = fmix32(h3); |
228 | 42 | h4 = fmix32(h4); |
229 | | |
230 | 42 | h1 += h2; h1 += h3; h1 += h4; |
231 | 42 | h2 += h1; h3 += h1; h4 += h1; |
232 | | |
233 | 42 | out[0] = h1; |
234 | 42 | out[1] = h2; |
235 | 42 | out[2] = h3; |
236 | 42 | out[3] = h4; |
237 | 42 | } |
238 | | |
239 | | /*---------------------------------------------------------------------------*/ |
240 | | |
241 | | /* Main hashing function. Initialise carry[4] to {0,0,0,0} and h[4] to an initial {seed,seed,seed,seed} |
242 | | * if wanted. Both ph and pcarry are required arguments. */ |
243 | | void PMurHash128x86_Process(uint32_t ph[4], uint32_t pcarry[4], const void * const key, int len) |
244 | 42 | { |
245 | 42 | uint32_t h1 = ph[0]; |
246 | 42 | uint32_t h2 = ph[1]; |
247 | 42 | uint32_t h3 = ph[2]; |
248 | 42 | uint32_t h4 = ph[3]; |
249 | | |
250 | 42 | uint32_t k1 = pcarry[0]; |
251 | 42 | uint32_t k2 = pcarry[1]; |
252 | 42 | uint32_t k3 = pcarry[2]; |
253 | 42 | uint32_t k4 = pcarry[3]; |
254 | | |
255 | 42 | const uint8_t *ptr = (uint8_t*)key; |
256 | 42 | const uint8_t *end; |
257 | | |
258 | | /* Extract carry count from low 4 bits of c value */ |
259 | 42 | int n = k4 & 15; |
260 | | |
261 | 42 | #if defined(UNALIGNED_SAFE) |
262 | | /* This CPU handles unaligned word access */ |
263 | | // #pragma message ( "UNALIGNED_SAFE" ) |
264 | | /* Consume any carry bytes */ |
265 | 42 | int i = (16-n) & 15; |
266 | 42 | if(i && i <= len) { |
267 | 17 | dobytes128x86(i, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len); |
268 | 17 | } |
269 | | |
270 | | /* Process 128-bit chunks */ |
271 | 42 | end = ptr + (len & ~15); |
272 | 25.8k | for( ; ptr < end ; ptr+=16) { |
273 | 25.8k | k1 = READ_UINT32(ptr, 0); |
274 | 25.8k | k2 = READ_UINT32(ptr, 1); |
275 | 25.8k | k3 = READ_UINT32(ptr, 2); |
276 | 25.8k | k4 = READ_UINT32(ptr, 3); |
277 | 25.8k | doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4); |
278 | 25.8k | } |
279 | | |
280 | | #else /*UNALIGNED_SAFE*/ |
281 | | /* This CPU does not handle unaligned word access */ |
282 | | // #pragma message ( "ALIGNED" ) |
283 | | /* Consume enough so that the next data byte is word aligned */ |
284 | | int i = -(intptr_t)(void *)ptr & 3; |
285 | | if(i && i <= len) { |
286 | | dobytes128x86(i, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len); |
287 | | } |
288 | | /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ |
289 | | end = ptr + (len & ~15); |
290 | | |
291 | | switch(n) { /* how many bytes in c */ |
292 | | case 0: /* |
293 | | k1=[----] k2=[----] k2=[----] k4=[----] w=[3210 7654 ba98 fedc] b=[3210 7654 ba98 fedc] */ |
294 | | for( ; ptr < end ; ptr+=16) { |
295 | | k1 = READ_UINT32(ptr, 0); |
296 | | k2 = READ_UINT32(ptr, 1); |
297 | | k3 = READ_UINT32(ptr, 2); |
298 | | k4 = READ_UINT32(ptr, 3); |
299 | | doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4); |
300 | | } |
301 | | break; |
302 | | case 1: case 2: case 3: /* |
303 | | k1=[10--] k2=[----] k3=[----] k4=[----] w=[5432 9876 dcba hgfe] b=[3210 7654 ba98 fedc] k1'=[hg--] */ |
304 | | { |
305 | | const int lshift = n*8, rshift = 32-lshift; |
306 | | for( ; ptr < end ; ptr+=16) { |
307 | | uint32_t c = k1>>rshift; // --10 |
308 | | k2 = READ_UINT32(ptr, 0); // 5432 |
309 | | c |= k2<<lshift; // 3210. |
310 | | k1 = READ_UINT32(ptr, 1); // 9876 |
311 | | k2 = k1<<lshift | k2>>rshift; // 7654. |
312 | | k4 = READ_UINT32(ptr, 2); // dcba |
313 | | k3 = k4<<lshift | k1>>rshift; // ba98. |
314 | | k1 = READ_UINT32(ptr, 3); // hgfe. |
315 | | k4 = k1<<lshift | k4>>rshift; // fedc. |
316 | | doblock128x86(h1, h2, h3, h4, c, k2, k3, k4); |
317 | | } |
318 | | } |
319 | | break; |
320 | | case 4: /* |
321 | | k1=[3210] k2=[----] k3=[----] k4=[----] w=[7654 ba98 fedc jihg] b=[3210 7654 ba98 fedc] k1'=[jihg] */ |
322 | | for( ; ptr < end ; ptr+=16) { |
323 | | k2 = READ_UINT32(ptr, 0); |
324 | | k3 = READ_UINT32(ptr, 1); |
325 | | k4 = READ_UINT32(ptr, 2); |
326 | | doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4); |
327 | | k1 = READ_UINT32(ptr, 3); |
328 | | } |
329 | | break; |
330 | | case 5: case 6: case 7: /* |
331 | | k1=[3210] k2=[54--] k3=[----] k4=[----] w=[9876 dcba hgfe lkji] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[lk--] */ |
332 | | { |
333 | | const int lshift = n*8-32, rshift = 32-lshift; |
334 | | for( ; ptr < end ; ptr+=16) { |
335 | | uint32_t c = k2>>rshift; // --54 |
336 | | k3 = READ_UINT32(ptr, 0); // 9876 |
337 | | c |= k3<<lshift; // 7654. |
338 | | k4 = READ_UINT32(ptr, 1); // dcba |
339 | | k3 = k4<<lshift | k3>>rshift; // ba98. |
340 | | k2 = READ_UINT32(ptr, 2); // hgfe |
341 | | k4 = k2<<lshift | k4>>rshift; // fedc. |
342 | | doblock128x86(h1, h2, h3, h4, k1, c, k3, k4); |
343 | | k1 = k2>>rshift; // --hg |
344 | | k2 = READ_UINT32(ptr, 3); // lkji. |
345 | | k1 |= k2<<lshift; // jihg. |
346 | | } |
347 | | } |
348 | | case 8: /* |
349 | | k1=[3210] k2=[7654] k3=[----] k4=[----] w=[ba98 fedc jihg nmlk] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] */ |
350 | | for( ; ptr < end ; ptr+=16) { |
351 | | k3 = READ_UINT32(ptr, 0); |
352 | | k4 = READ_UINT32(ptr, 1); |
353 | | doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4); |
354 | | k1 = READ_UINT32(ptr, 2); |
355 | | k2 = READ_UINT32(ptr, 3); |
356 | | } |
357 | | break; |
358 | | case 9: case 10: case 11: /* |
359 | | k1=[3210] k2=[7654] k3=[98--] k4=[----] w=[dcba hgfe lkji ponm] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[po--] */ |
360 | | { |
361 | | const int lshift = n*8-64, rshift = 32-lshift; |
362 | | for( ; ptr < end ; ptr+=16) { |
363 | | uint32_t c = k3>>rshift; // --98 |
364 | | k4 = READ_UINT32(ptr, 0); // dcba |
365 | | c |= k4<<lshift; // ba98. |
366 | | k3 = READ_UINT32(ptr, 1); // hgfe |
367 | | k4 = k3<<lshift | k4>>rshift; // fedc. |
368 | | doblock128x86(h1, h2, h3, h4, k1, k2, c, k4); |
369 | | k2 = READ_UINT32(ptr, 2); // lkji |
370 | | k1 = k2<<lshift | k3>>rshift; // jihg. |
371 | | k3 = READ_UINT32(ptr, 3); // ponm. |
372 | | k2 = k3<<lshift | k2>>rshift; // nmlk. |
373 | | } |
374 | | } |
375 | | case 12: /* |
376 | | k1=[3210] k2=[7654] k3=[ba98] k4=[----] w=[fedc jihg nmlk rqpo] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[rqpo] */ |
377 | | for( ; ptr < end ; ptr+=16) { |
378 | | k4 = READ_UINT32(ptr, 0); |
379 | | doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4); |
380 | | k1 = READ_UINT32(ptr, 1); |
381 | | k2 = READ_UINT32(ptr, 2); |
382 | | k3 = READ_UINT32(ptr, 3); |
383 | | } |
384 | | break; |
385 | | default: /* 12 < n <= 15 |
386 | | k1=[3210] k2=[7654] k3=[ba98] k4=[dc--] w=[hgfe lkji ponm tsrq] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[rqpo] k3'=[ts--] */ |
387 | | { |
388 | | const int lshift = n*8-96, rshift = 32-lshift; |
389 | | for( ; ptr < end ; ptr+=16) { |
390 | | uint32_t c = k4>>rshift; // --dc |
391 | | k4 = READ_UINT32(ptr, 0); // hgfe |
392 | | c |= k4<<lshift; // fedc. |
393 | | doblock128x86(h1, h2, h3, h4, k1, k2, k3, c); |
394 | | k3 = READ_UINT32(ptr, 1); // lkji |
395 | | k1 = k3<<lshift | k4>>rshift; // jihg. |
396 | | c = READ_UINT32(ptr, 2); // ponm |
397 | | k2 = c<<lshift | k3>>rshift; // nmlk. |
398 | | k4 = READ_UINT32(ptr, 3); // tsrq. |
399 | | k3 = k4<<lshift | c>>rshift; // rqpo. |
400 | | } |
401 | | } |
402 | | } |
403 | | #endif /*UNALIGNED_SAFE*/ |
404 | | |
405 | | /* Advance over whole 128-bit chunks, possibly leaving 1..15 bytes */ |
406 | 42 | len -= len & ~15; |
407 | | |
408 | | /* Append any remaining bytes into carry */ |
409 | 42 | dobytes128x86(len, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len); |
410 | | |
411 | | /* Copy out new running hash and carry */ |
412 | 42 | ph[0] = h1; |
413 | 42 | ph[1] = h2; |
414 | 42 | ph[2] = h3; |
415 | 42 | ph[3] = h4; |
416 | 42 | pcarry[0] = k1; |
417 | 42 | pcarry[1] = k2; |
418 | 42 | pcarry[2] = k3; |
419 | 42 | pcarry[3] = (k4 & ~0xff) | n; |
420 | 42 | } |
421 | | |
422 | | /*---------------------------------------------------------------------------*/ |
423 | | |
424 | | /* All in one go */ |
425 | | |
426 | | /* MurmurHash3_x86_128 api */ |
427 | | void PMurHash128x86(const void * key, const int len, uint32_t seed, void * out) |
428 | 0 | { |
429 | 0 | uint32_t carry[4] = {0, 0, 0, 0}; |
430 | 0 | uint32_t h[4] = {seed, seed, seed, seed}; |
431 | 0 | PMurHash128x86_Process(h, carry, key, len); |
432 | 0 | PMurHash128x86_Result(h, carry, (uint32_t) len, (uint32_t *) out); |
433 | 0 | } |
434 | | |
435 | | /*-----------------------------------------------------------------------------* |
436 | | PMurHash128x64 |
437 | | *-----------------------------------------------------------------------------*/ |
438 | | /*----------------------------------------------------------------------------- |
439 | | * Core murmurhash algorithm macros */ |
440 | | |
441 | | static const uint64_t kC1L = BIG_CONSTANT(0x87c37b91114253d5); |
442 | | static const uint64_t kC2L = BIG_CONSTANT(0x4cf5ad432745937f); |
443 | | |
444 | | /* This is the main processing body of the algorithm. It operates |
445 | | * on each full 128-bits of input. */ |
446 | 8.07k | #define doblock128x64(h1, h2, k1, k2)\ |
447 | 8.07k | do {\ |
448 | 8.07k | k1 *= kC1L; k1 = ROTL64(k1,31); k1 *= kC2L; h1 ^= k1;\ |
449 | 8.07k | \ |
450 | 8.07k | h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;\ |
451 | 8.07k | \ |
452 | 8.07k | k2 *= kC2L; k2 = ROTL64(k2,33); k2 *= kC1L; h2 ^= k2;\ |
453 | 8.07k | \ |
454 | 8.07k | h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;\ |
455 | 8.07k | } while(0) |
456 | | |
457 | | /* Append unaligned bytes to carry, forcing hash churn if we have 16 bytes */ |
458 | | /* cnt=bytes to process, h1,h2=hash k1,k2=carry, n=bytes in carry, ptr/len=payload */ |
459 | 45 | #define dobytes128x64(cnt, h1, h2, k1, k2, n, ptr, len) \ |
460 | 45 | do {\ |
461 | 45 | unsigned __cnt = cnt;\ |
462 | 218 | for(;__cnt--; len--) {\ |
463 | 173 | switch(n) {\ |
464 | 48 | case 0: case 1: case 2: case 3:\ |
465 | 92 | case 4: case 5: case 6: case 7:\ |
466 | 92 | k1 = k1>>8 | (uint64_t)*ptr++<<56;\ |
467 | 92 | n++; break;\ |
468 | 80 | \ |
469 | 80 | case 8: case 9: case 10: case 11:\ |
470 | 66 | case 12: case 13: case 14:\ |
471 | 66 | k2 = k2>>8 | (uint64_t)*ptr++<<56;\ |
472 | 66 | n++; break;\ |
473 | 59 | \ |
474 | 59 | case 15:\ |
475 | 15 | k2 = k2>>8 | (uint64_t)*ptr++<<56;\ |
476 | 15 | doblock128x64(h1, h2, k1, k2);\ |
477 | 15 | n = 0; break;\ |
478 | 173 | }\ |
479 | 173 | }\ |
480 | 45 | } while(0) |
481 | | |
482 | | /* Finalize a hash. To match the original Murmur3_128x64 the total_length must be provided */ |
483 | | void PMurHash128x64_Result(const uint64_t ph[2], const uint64_t pcarry[2], |
484 | | const uint32_t total_length, uint64_t out[2]) |
485 | 30 | { |
486 | 30 | uint64_t h1 = ph[0]; |
487 | 30 | uint64_t h2 = ph[1]; |
488 | | |
489 | 30 | uint64_t k1; |
490 | 30 | uint64_t k2 = pcarry[1]; |
491 | | |
492 | 30 | int n = k2 & 15; |
493 | 30 | if (n) { |
494 | 18 | k1 = pcarry[0]; |
495 | 18 | if (n > 8) { |
496 | 11 | k2 >>= (16-n)*8; |
497 | 11 | k2 *= kC2L; k2 = ROTL64(k2,33); k2 *= kC1L; h2 ^= k2; |
498 | 11 | } else { |
499 | 7 | k1 >>= (8-n)*8; |
500 | 7 | } |
501 | 18 | k1 *= kC1L; k1 = ROTL64(k1,31); k1 *= kC2L; h1 ^= k1; |
502 | 18 | } |
503 | | |
504 | | //---------- |
505 | | // finalization |
506 | | |
507 | 30 | h1 ^= total_length; h2 ^= total_length; |
508 | | |
509 | 30 | h1 += h2; |
510 | 30 | h2 += h1; |
511 | | |
512 | 30 | h1 = fmix64(h1); |
513 | 30 | h2 = fmix64(h2); |
514 | | |
515 | 30 | h1 += h2; |
516 | 30 | h2 += h1; |
517 | | |
518 | 30 | out[0] = h1; |
519 | 30 | out[1] = h2; |
520 | 30 | } |
521 | | |
522 | | /*---------------------------------------------------------------------------*/ |
523 | | |
524 | | /* Main hashing function. Initialise carry[2] to {0,0} and h[2] to an initial {seed,seed} |
525 | | * if wanted. Both ph and pcarry are required arguments. */ |
526 | | void PMurHash128x64_Process(uint64_t ph[2], uint64_t pcarry[2], const void * const key, int len) |
527 | 30 | { |
528 | 30 | uint64_t h1 = ph[0]; |
529 | 30 | uint64_t h2 = ph[1]; |
530 | | |
531 | 30 | uint64_t k1 = pcarry[0]; |
532 | 30 | uint64_t k2 = pcarry[1]; |
533 | | |
534 | 30 | const uint8_t *ptr = (uint8_t*)key; |
535 | 30 | const uint8_t *end; |
536 | | |
537 | | /* Extract carry count from low 4 bits of c value */ |
538 | 30 | int n = k2 & 15; |
539 | | |
540 | 30 | #if defined(UNALIGNED_SAFE) |
541 | | /* This CPU handles unaligned word access */ |
542 | | // #pragma message ( "UNALIGNED_SAFE" ) |
543 | | /* Consume any carry bytes */ |
544 | 30 | int i = (16-n) & 15; |
545 | 30 | if(i && i <= len) { |
546 | 15 | dobytes128x64(i, h1, h2, k1, k2, n, ptr, len); |
547 | 15 | } |
548 | | |
549 | | /* Process 128-bit chunks */ |
550 | 30 | end = ptr + (len & ~15); |
551 | 8.09k | for( ; ptr < end ; ptr+=16) { |
552 | 8.06k | k1 = READ_UINT64(ptr, 0); |
553 | 8.06k | k2 = READ_UINT64(ptr, 1); |
554 | 8.06k | doblock128x64(h1, h2, k1, k2); |
555 | 8.06k | } |
556 | | |
557 | | #else /*UNALIGNED_SAFE*/ |
558 | | /* This CPU does not handle unaligned word access */ |
559 | | // #pragma message ( "ALIGNED" ) |
560 | | /* Consume enough so that the next data byte is word aligned */ |
561 | | int i = -(intptr_t)(void *)ptr & 7; |
562 | | if(i && i <= len) { |
563 | | dobytes128x64(i, h1, h2, k1, k2, n, ptr, len); |
564 | | } |
565 | | /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ |
566 | | end = ptr + (len & ~15); |
567 | | |
568 | | switch(n) { /* how many bytes in c */ |
569 | | case 0: /* |
570 | | k1=[--------] k2=[--------] w=[76543210 fedcba98] b=[76543210 fedcba98] */ |
571 | | for( ; ptr < end ; ptr+=16) { |
572 | | k1 = READ_UINT64(ptr, 0); |
573 | | k2 = READ_UINT64(ptr, 1); |
574 | | doblock128x64(h1, h2, k1, k2); |
575 | | } |
576 | | break; |
577 | | case 1: case 2: case 3: case 4: case 5: case 6: case 7: /* |
578 | | k1=[10------] k2=[--------] w=[98765432 hgfedcba] b=[76543210 fedcba98] k1'=[hg------] */ |
579 | | { |
580 | | const int lshift = n*8, rshift = 64-lshift; |
581 | | for( ; ptr < end ; ptr+=16) { |
582 | | uint64_t c = k1>>rshift; |
583 | | k2 = READ_UINT64(ptr, 0); |
584 | | c |= k2<<lshift; |
585 | | k1 = READ_UINT64(ptr, 1); |
586 | | k2 = k2>>rshift | k1<<lshift; |
587 | | doblock128x64(h1, h2, c, k2); |
588 | | } |
589 | | } |
590 | | break; |
591 | | case 8: /* |
592 | | k1=[76543210] k2=[--------] w=[fedcba98 nmlkjihg] b=[76543210 fedcba98] k1`=[nmlkjihg] */ |
593 | | for( ; ptr < end ; ptr+=16) { |
594 | | k2 = READ_UINT64(ptr, 0); |
595 | | doblock128x64(h1, h2, k1, k2); |
596 | | k1 = READ_UINT64(ptr, 1); |
597 | | } |
598 | | break; |
599 | | default: /* 8 < n <= 15 |
600 | | k1=[76543210] k2=[98------] w=[hgfedcba ponmlkji] b=[76543210 fedcba98] k1`=[nmlkjihg] k2`=[po------] */ |
601 | | { |
602 | | const int lshift = n*8-64, rshift = 64-lshift; |
603 | | for( ; ptr < end ; ptr+=16) { |
604 | | uint64_t c = k2 >> rshift; |
605 | | k2 = READ_UINT64(ptr, 0); |
606 | | c |= k2 << lshift; |
607 | | doblock128x64(h1, h2, k1, c); |
608 | | k1 = k2 >> rshift; |
609 | | k2 = READ_UINT64(ptr, 1); |
610 | | k1 |= k2 << lshift; |
611 | | } |
612 | | } |
613 | | } |
614 | | #endif /*UNALIGNED_SAFE*/ |
615 | | |
616 | | /* Advance over whole 128-bit chunks, possibly leaving 1..15 bytes */ |
617 | 30 | len -= len & ~15; |
618 | | |
619 | | /* Append any remaining bytes into carry */ |
620 | 30 | dobytes128x64(len, h1, h2, k1, k2, n, ptr, len); |
621 | | |
622 | | /* Copy out new running hash and carry */ |
623 | 30 | ph[0] = h1; |
624 | 30 | ph[1] = h2; |
625 | 30 | pcarry[0] = k1; |
626 | 30 | pcarry[1] = (k2 & ~0xff) | n; |
627 | 30 | } |
628 | | |
629 | | /*---------------------------------------------------------------------------*/ |
630 | | |
631 | | /* All in one go */ |
632 | | |
633 | | /* MurmurHash3_x64_128 api */ |
634 | | void PMurHash128x64(const void * key, const int len, uint32_t seed, void * out) |
635 | 0 | { |
636 | 0 | uint64_t carry[2] = {0, 0}; |
637 | 0 | uint64_t h[2] = {seed, seed}; |
638 | 0 | PMurHash128x64_Process(h, carry, key, len); |
639 | 0 | PMurHash128x64_Result(h, carry, (uint32_t) len, (uint64_t *) out); |
640 | 0 | } |