Coverage Report

Created: 2025-12-31 07:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/hash/murmur/PMurHash128.c
Line
Count
Source
1
/*-----------------------------------------------------------------------------
2
 * MurmurHash3 was written by Austin Appleby, and is placed in the public
3
 * domain.
4
 *
5
 * This is a c++ implementation of MurmurHash3_128 with support for progressive
6
 * processing based on PMurHash implementation written by Shane Day.
7
 */
8
9
/*-----------------------------------------------------------------------------
10
11
If you want to understand the MurmurHash algorithm you would be much better
12
off reading the original source. Just point your browser at:
13
http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
14
15
16
What this version provides?
17
18
1. Progressive data feeding. Useful when the entire payload to be hashed
19
does not fit in memory or when the data is streamed through the application.
20
Also useful when hashing a number of strings with a common prefix. A partial
21
hash of a prefix string can be generated and reused for each suffix string.
22
23
How does it work?
24
25
We can only process entire 128 bit chunks of input, except for the very end
26
that may be shorter. So along with the partial hash we need to give back to
27
the caller a carry containing up to 15 bytes that we were unable to process.
28
This carry also needs to record the number of bytes the carry holds. I use
29
the low 4 bits as a count (0..15) and the carry bytes are shifted into the
30
high byte in stream order.
31
32
To handle endianess I simply use a macro that reads an uint and define
33
that macro to be a direct read on little endian machines, a read and swap
34
on big endian machines.
35
36
-----------------------------------------------------------------------------*/
37
38
39
#include "PMurHash128.h"
40
41
/*-----------------------------------------------------------------------------
42
 * Endianess, misalignment capabilities and util macros
43
 *
44
 * The following 5 macros are defined in this section. The other macros defined
45
 * are only needed to help derive these 5.
46
 *
47
 * READ_UINT32(x,i) Read a little endian unsigned 32-bit int at index
48
 * READ_UINT64(x,i) Read a little endian unsigned 64-bit int at index
49
 * UNALIGNED_SAFE   Defined if READ_UINTXX works on non-word boundaries
50
 * ROTL32(x,r)      Rotate x left by r bits
51
 * ROTL64(x,r)      Rotate x left by r bits
52
 * BIG_CONSTANT
53
 * FORCE_INLINE
54
 */
55
56
/* I386 or AMD64 */
57
#if defined(_M_I86) || defined(_M_IX86) || defined(_X86_) || defined(__i386__) || defined(__i386) || defined(i386) \
58
 || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64)
59
  #define UNALIGNED_SAFE
60
#endif
61
62
/* Find best way to ROTL */
63
#if defined(_MSC_VER)
64
  #define FORCE_INLINE  static __forceinline
65
  #include <stdlib.h>  /* Microsoft put _rotl declaration in here */
66
  #define ROTL32(x,y)  _rotl(x,y)
67
  #define ROTL64(x,y)  _rotl64(x,y)
68
  #define BIG_CONSTANT(x) (x)
69
#else
70
  #define FORCE_INLINE static inline __attribute__((always_inline))
71
  /* gcc recognises this code and generates a rotate instruction for CPUs with one */
72
169k
  #define ROTL32(x,r)  (((uint32_t)x << r) | ((uint32_t)x >> (32 - r)))
73
17.6k
  #define ROTL64(x,r)  (((uint64_t)x << r) | ((uint64_t)x >> (64 - r)))
74
104
  #define BIG_CONSTANT(x) (x##LLU)
75
#endif
76
77
#include "endianness.h"
78
79
8.78k
#define READ_UINT64(ptr,i) getblock64((uint64_t *)ptr,i)
80
84.4k
#define READ_UINT32(ptr,i) getblock32((uint32_t *)ptr,i)
81
82
//-----------------------------------------------------------------------------
83
// Finalization mix - force all bits of a hash block to avalanche
84
85
FORCE_INLINE uint32_t fmix32 ( uint32_t h )
86
148
{
87
148
  h ^= h >> 16;
88
148
  h *= 0x85ebca6b;
89
148
  h ^= h >> 13;
90
148
  h *= 0xc2b2ae35;
91
148
  h ^= h >> 16;
92
93
148
  return h;
94
148
}
95
96
//----------
97
98
FORCE_INLINE uint64_t fmix64 ( uint64_t k )
99
52
{
100
52
  k ^= k >> 33;
101
52
  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
102
52
  k ^= k >> 33;
103
52
  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
104
52
  k ^= k >> 33;
105
106
52
  return k;
107
52
}
108
109
/*-----------------------------------------------------------------------------*
110
                                 PMurHash128x86
111
 *-----------------------------------------------------------------------------*/
112
/*-----------------------------------------------------------------------------
113
 * Core murmurhash algorithm macros */
114
115
static const uint32_t kC1 = 0x239b961b;
116
static const uint32_t kC2 = 0xab0e9789;
117
static const uint32_t kC3 = 0x38b34ae5;
118
static const uint32_t kC4 = 0xa1e38b93;
119
120
/* This is the main processing body of the algorithm. It operates
121
 * on each full 128-bits of input. */
122
21.1k
#define doblock128x86(h1, h2, h3, h4, k1, k2, k3,k4)\
123
21.1k
do {\
124
21.1k
  k1 *= kC1; k1  = ROTL32(k1,15); k1 *= kC2; h1 ^= k1;\
125
21.1k
\
126
21.1k
  h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;\
127
21.1k
\
128
21.1k
  k2 *= kC2; k2  = ROTL32(k2,16); k2 *= kC3; h2 ^= k2;\
129
21.1k
\
130
21.1k
  h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;\
131
21.1k
\
132
21.1k
  k3 *= kC3; k3  = ROTL32(k3,17); k3 *= kC4; h3 ^= k3;\
133
21.1k
\
134
21.1k
  h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;\
135
21.1k
\
136
21.1k
  k4 *= kC4; k4  = ROTL32(k4,18); k4 *= kC1; h4 ^= k4;\
137
21.1k
\
138
21.1k
  h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;\
139
21.1k
} while(0)
140
141
/* Append unaligned bytes to carry, forcing hash churn if we have 16 bytes */
142
/* cnt=bytes to process, h1-h4=hash k1-k4=carry, n=bytes in carry, ptr/len=payload */
143
60
#define dobytes128x86(cnt, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len)\
144
60
do {\
145
60
  unsigned __cnt = cnt;\
146
346
  for(;__cnt--; len--) {\
147
286
    switch(n) {\
148
66
      case  0: case  1: case  2: case  3:\
149
66
        k1 = k1>>8 | (uint32_t)*ptr++<<24;\
150
66
        ++n; break;\
151
53
\
152
53
      case  4: case  5: case  6: case  7:\
153
51
        k2 = k2>>8 | (uint32_t)*ptr++<<24;\
154
51
        ++n; break;\
155
32
\
156
78
      case  8: case  9: case 10: case 11:\
157
78
        k3 = k3>>8 | (uint32_t)*ptr++<<24;\
158
78
        ++n; break;\
159
57
\
160
68
      case 12: case 13: case 14:\
161
68
        k4 = k4>>8 | (uint32_t)*ptr++<<24;\
162
68
        ++n; break;\
163
45
\
164
45
      case 15:\
165
23
        k4 = k4>>8 | (uint32_t)*ptr++<<24;\
166
23
        doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);\
167
23
        n = 0; break;\
168
286
    }\
169
286
  }\
170
60
} while(0)
171
172
/* Finalize a hash. To match the original Murmur3_128x86 the total_length must be provided */
173
void PMurHash128x86_Result(const uint32_t ph[4], const uint32_t pcarry[4], uint32_t total_length, uint32_t out[4])
174
37
{
175
37
  uint32_t h1 = ph[0];
176
37
  uint32_t h2 = ph[1];
177
37
  uint32_t h3 = ph[2];
178
37
  uint32_t h4 = ph[3];
179
180
37
  uint32_t k1, k2, k3, k4 = pcarry[3];
181
182
37
  int n = k4 & 15;
183
37
  switch(n) {
184
13
    case  1: case  2: case  3: case  4:
185
13
      k1 = pcarry[0] >> (4-n)*8;
186
13
      goto finrot_k1;
187
188
5
    case  5: case  6: case  7: case  8:
189
5
      k2 = pcarry[1] >> (8-n)*8;
190
5
      goto finrot_k21;
191
192
4
    case  9: case 10: case 11: case 12:
193
4
      k3 = pcarry[2] >> (12-n)*8;
194
4
      goto finrot_k321;
195
196
5
    case 13: case 14: case 15:
197
5
      k4 >>= (16-n)*8;
198
5
      goto finrot_k4321;
199
200
10
    default:
201
10
      goto skiprot;
202
37
  }
203
5
finrot_k4321:
204
5
  k4 *= kC4; k4  = ROTL32(k4,18); k4 *= kC1; h4 ^= k4;
205
5
  k3 = pcarry[2];
206
9
finrot_k321:
207
9
  k3 *= kC3; k3  = ROTL32(k3,17); k3 *= kC4; h3 ^= k3;
208
9
  k2 = pcarry[1];
209
14
finrot_k21:
210
14
  k2 *= kC2; k2  = ROTL32(k2,16); k2 *= kC3; h2 ^= k2;
211
14
  k1 = pcarry[0];
212
27
finrot_k1:
213
27
  k1 *= kC1; k1  = ROTL32(k1,15); k1 *= kC2; h1 ^= k1;
214
37
skiprot:
215
216
  //----------
217
  // finalization
218
219
37
  h1 ^= total_length; h2 ^= total_length;
220
37
  h3 ^= total_length; h4 ^= total_length;
221
222
37
  h1 += h2; h1 += h3; h1 += h4;
223
37
  h2 += h1; h3 += h1; h4 += h1;
224
225
37
  h1 = fmix32(h1);
226
37
  h2 = fmix32(h2);
227
37
  h3 = fmix32(h3);
228
37
  h4 = fmix32(h4);
229
230
37
  h1 += h2; h1 += h3; h1 += h4;
231
37
  h2 += h1; h3 += h1; h4 += h1;
232
233
37
  out[0] = h1;
234
37
  out[1] = h2;
235
37
  out[2] = h3;
236
37
  out[3] = h4;
237
37
}
238
239
/*---------------------------------------------------------------------------*/
240
241
/* Main hashing function. Initialise carry[4] to {0,0,0,0} and h[4] to an initial {seed,seed,seed,seed}
242
 * if wanted. Both ph and pcarry are required arguments. */
243
void PMurHash128x86_Process(uint32_t ph[4], uint32_t pcarry[4], const void * const key, int len)
244
37
{
245
37
  uint32_t h1 = ph[0];
246
37
  uint32_t h2 = ph[1];
247
37
  uint32_t h3 = ph[2];
248
37
  uint32_t h4 = ph[3];
249
250
37
  uint32_t k1 = pcarry[0];
251
37
  uint32_t k2 = pcarry[1];
252
37
  uint32_t k3 = pcarry[2];
253
37
  uint32_t k4 = pcarry[3];
254
255
37
  const uint8_t *ptr = (uint8_t*)key;
256
37
  const uint8_t *end;
257
258
  /* Extract carry count from low 4 bits of c value */
259
37
  int n = k4 & 15;
260
261
37
#if defined(UNALIGNED_SAFE)
262
  /* This CPU handles unaligned word access */
263
// #pragma message ( "UNALIGNED_SAFE" )
264
  /* Consume any carry bytes */
265
37
  int i = (16-n) & 15;
266
37
  if(i && i <= len) {
267
23
    dobytes128x86(i, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
268
23
  }
269
270
  /* Process 128-bit chunks */
271
37
  end = ptr + (len & ~15);
272
21.1k
  for( ; ptr < end ; ptr+=16) {
273
21.1k
    k1 = READ_UINT32(ptr, 0);
274
21.1k
    k2 = READ_UINT32(ptr, 1);
275
21.1k
    k3 = READ_UINT32(ptr, 2);
276
21.1k
    k4 = READ_UINT32(ptr, 3);
277
21.1k
    doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
278
21.1k
  }
279
280
#else /*UNALIGNED_SAFE*/
281
  /* This CPU does not handle unaligned word access */
282
// #pragma message ( "ALIGNED" )
283
  /* Consume enough so that the next data byte is word aligned */
284
  int i = -(intptr_t)(void *)ptr & 3;
285
  if(i && i <= len) {
286
    dobytes128x86(i, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
287
  }
288
  /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
289
  end = ptr + (len & ~15);
290
291
  switch(n) { /* how many bytes in c */
292
  case 0: /*
293
  k1=[----] k2=[----] k2=[----] k4=[----] w=[3210 7654 ba98 fedc] b=[3210 7654 ba98 fedc] */
294
    for( ; ptr < end ; ptr+=16) {
295
      k1 = READ_UINT32(ptr, 0);
296
      k2 = READ_UINT32(ptr, 1);
297
      k3 = READ_UINT32(ptr, 2);
298
      k4 = READ_UINT32(ptr, 3);
299
      doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
300
    }
301
    break;
302
  case 1: case 2: case 3: /*
303
  k1=[10--] k2=[----] k3=[----] k4=[----] w=[5432 9876 dcba hgfe] b=[3210 7654 ba98 fedc] k1'=[hg--] */
304
    {
305
      const int lshift = n*8, rshift = 32-lshift;
306
      for( ; ptr < end ; ptr+=16) {
307
        uint32_t c = k1>>rshift;      // --10
308
        k2 = READ_UINT32(ptr, 0);     // 5432
309
        c |= k2<<lshift;              // 3210.
310
        k1 = READ_UINT32(ptr, 1);     // 9876
311
        k2 = k1<<lshift | k2>>rshift; // 7654.
312
        k4 = READ_UINT32(ptr, 2);     // dcba
313
        k3 = k4<<lshift | k1>>rshift; // ba98.
314
        k1 = READ_UINT32(ptr, 3);     // hgfe.
315
        k4 = k1<<lshift | k4>>rshift; // fedc.
316
        doblock128x86(h1, h2, h3, h4, c, k2, k3, k4);
317
      }
318
    }
319
    break;
320
  case 4: /*
321
  k1=[3210] k2=[----] k3=[----] k4=[----] w=[7654 ba98 fedc jihg] b=[3210 7654 ba98 fedc] k1'=[jihg] */
322
    for( ; ptr < end ; ptr+=16) {
323
      k2 = READ_UINT32(ptr, 0);
324
      k3 = READ_UINT32(ptr, 1);
325
      k4 = READ_UINT32(ptr, 2);
326
      doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
327
      k1 = READ_UINT32(ptr, 3);
328
    }
329
    break;
330
  case 5: case 6: case 7: /*
331
  k1=[3210] k2=[54--] k3=[----] k4=[----] w=[9876 dcba hgfe lkji] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[lk--] */
332
    {
333
      const int lshift = n*8-32, rshift = 32-lshift;
334
      for( ; ptr < end ; ptr+=16) {
335
        uint32_t c = k2>>rshift;      // --54
336
        k3 = READ_UINT32(ptr, 0);     // 9876
337
        c |= k3<<lshift;              // 7654.
338
        k4 = READ_UINT32(ptr, 1);     // dcba
339
        k3 = k4<<lshift | k3>>rshift; // ba98.
340
        k2 = READ_UINT32(ptr, 2);     // hgfe
341
        k4 = k2<<lshift | k4>>rshift; // fedc.
342
        doblock128x86(h1, h2, h3, h4, k1, c, k3, k4);
343
        k1 = k2>>rshift;              // --hg
344
        k2 = READ_UINT32(ptr, 3);     // lkji.
345
        k1 |= k2<<lshift;             // jihg.
346
      }
347
    }
348
  case 8: /*
349
  k1=[3210] k2=[7654] k3=[----] k4=[----] w=[ba98 fedc jihg nmlk] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] */
350
    for( ; ptr < end ; ptr+=16) {
351
      k3 = READ_UINT32(ptr, 0);
352
      k4 = READ_UINT32(ptr, 1);
353
      doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
354
      k1 = READ_UINT32(ptr, 2);
355
      k2 = READ_UINT32(ptr, 3);
356
    }
357
    break;
358
  case 9: case 10: case 11: /*
359
  k1=[3210] k2=[7654] k3=[98--] k4=[----] w=[dcba hgfe lkji ponm] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[po--] */
360
    {
361
      const int lshift = n*8-64, rshift = 32-lshift;
362
      for( ; ptr < end ; ptr+=16) {
363
        uint32_t c = k3>>rshift;      // --98
364
        k4 = READ_UINT32(ptr, 0);     // dcba
365
        c |= k4<<lshift;              // ba98.
366
        k3 = READ_UINT32(ptr, 1);     // hgfe
367
        k4 = k3<<lshift | k4>>rshift; // fedc.
368
        doblock128x86(h1, h2, h3, h4, k1, k2, c, k4);
369
        k2 = READ_UINT32(ptr, 2);     // lkji
370
        k1 = k2<<lshift | k3>>rshift; // jihg.
371
        k3 = READ_UINT32(ptr, 3);     // ponm.
372
        k2 = k3<<lshift | k2>>rshift; // nmlk.
373
      }
374
    }
375
  case 12: /*
376
  k1=[3210] k2=[7654] k3=[ba98] k4=[----] w=[fedc jihg nmlk rqpo] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[rqpo] */
377
    for( ; ptr < end ; ptr+=16) {
378
      k4 = READ_UINT32(ptr, 0);
379
      doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
380
      k1 = READ_UINT32(ptr, 1);
381
      k2 = READ_UINT32(ptr, 2);
382
      k3 = READ_UINT32(ptr, 3);
383
    }
384
    break;
385
  default: /* 12 < n <= 15
386
  k1=[3210] k2=[7654] k3=[ba98] k4=[dc--] w=[hgfe lkji ponm tsrq] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[rqpo] k3'=[ts--] */
387
    {
388
      const int lshift = n*8-96, rshift = 32-lshift;
389
      for( ; ptr < end ; ptr+=16) {
390
        uint32_t c = k4>>rshift;      // --dc
391
        k4 = READ_UINT32(ptr, 0);     // hgfe
392
        c |= k4<<lshift;              // fedc.
393
        doblock128x86(h1, h2, h3, h4, k1, k2, k3, c);
394
        k3 = READ_UINT32(ptr, 1);     // lkji
395
        k1 = k3<<lshift | k4>>rshift; // jihg.
396
        c  = READ_UINT32(ptr, 2);     // ponm
397
        k2 = c<<lshift | k3>>rshift;  // nmlk.
398
        k4 = READ_UINT32(ptr, 3);     // tsrq.
399
        k3 = k4<<lshift | c>>rshift;  // rqpo.
400
      }
401
    }
402
  }
403
#endif /*UNALIGNED_SAFE*/
404
405
  /* Advance over whole 128-bit chunks, possibly leaving 1..15 bytes */
406
37
  len -= len & ~15;
407
408
  /* Append any remaining bytes into carry */
409
37
  dobytes128x86(len, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
410
411
  /* Copy out new running hash and carry */
412
37
  ph[0] = h1;
413
37
  ph[1] = h2;
414
37
  ph[2] = h3;
415
37
  ph[3] = h4;
416
37
  pcarry[0] = k1;
417
37
  pcarry[1] = k2;
418
37
  pcarry[2] = k3;
419
37
  pcarry[3] = (k4 & ~0xff) | n;
420
37
}
421
422
/*---------------------------------------------------------------------------*/
423
424
/* All in one go */
425
426
/* MurmurHash3_x86_128 api */
427
void PMurHash128x86(const void * key, const int len, uint32_t seed, void * out)
428
0
{
429
0
  uint32_t carry[4] = {0, 0, 0, 0};
430
0
  uint32_t h[4] = {seed, seed, seed, seed};
431
0
  PMurHash128x86_Process(h, carry, key, len);
432
0
  PMurHash128x86_Result(h, carry, (uint32_t) len, (uint32_t *) out);
433
0
}
434
435
/*-----------------------------------------------------------------------------*
436
                                 PMurHash128x64
437
 *-----------------------------------------------------------------------------*/
438
/*-----------------------------------------------------------------------------
439
 * Core murmurhash algorithm macros */
440
441
static const uint64_t kC1L = BIG_CONSTANT(0x87c37b91114253d5);
442
static const uint64_t kC2L = BIG_CONSTANT(0x4cf5ad432745937f);
443
444
/* This is the main processing body of the algorithm. It operates
445
 * on each full 128-bits of input. */
446
4.40k
#define doblock128x64(h1, h2, k1, k2)\
447
4.40k
do {\
448
4.40k
  k1 *= kC1L; k1  = ROTL64(k1,31); k1 *= kC2L; h1 ^= k1;\
449
4.40k
\
450
4.40k
  h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;\
451
4.40k
\
452
4.40k
  k2 *= kC2L; k2  = ROTL64(k2,33); k2 *= kC1L; h2 ^= k2;\
453
4.40k
\
454
4.40k
  h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;\
455
4.40k
} while(0)
456
457
/* Append unaligned bytes to carry, forcing hash churn if we have 16 bytes */
458
/* cnt=bytes to process, h1,h2=hash k1,k2=carry, n=bytes in carry, ptr/len=payload */
459
40
#define dobytes128x64(cnt, h1, h2, k1, k2, n, ptr, len) \
460
40
do {\
461
40
  unsigned __cnt = cnt;\
462
205
  for(;__cnt--; len--) {\
463
165
    switch(n) {\
464
51
      case  0: case  1: case  2: case  3:\
465
85
      case  4: case  5: case  6: case  7:\
466
85
        k1 = k1>>8 | (uint64_t)*ptr++<<56;\
467
85
        n++; break;\
468
77
\
469
77
      case  8: case  9: case 10: case 11:\
470
66
      case 12: case 13: case 14:\
471
66
        k2 = k2>>8 | (uint64_t)*ptr++<<56;\
472
66
        n++; break;\
473
54
\
474
54
      case 15:\
475
14
        k2 = k2>>8 | (uint64_t)*ptr++<<56;\
476
14
        doblock128x64(h1, h2, k1, k2);\
477
14
        n = 0; break;\
478
165
    }\
479
165
  }\
480
40
} while(0)
481
482
/* Finalize a hash. To match the original Murmur3_128x64 the total_length must be provided */
483
void PMurHash128x64_Result(const uint64_t ph[2], const uint64_t pcarry[2],
484
                        const uint32_t total_length, uint64_t out[2])
485
26
{
486
26
  uint64_t h1 = ph[0];
487
26
  uint64_t h2 = ph[1];
488
489
26
  uint64_t k1;
490
26
  uint64_t k2 = pcarry[1];
491
492
26
  int n = k2 & 15;
493
26
  if (n) {
494
19
    k1 = pcarry[0];
495
19
    if (n > 8) {
496
3
      k2 >>= (16-n)*8;
497
3
      k2 *= kC2L; k2  = ROTL64(k2,33); k2 *= kC1L; h2 ^= k2;
498
16
    } else {
499
16
      k1 >>= (8-n)*8;
500
16
    }
501
19
    k1 *= kC1L; k1  = ROTL64(k1,31); k1 *= kC2L; h1 ^= k1;
502
19
  }
503
504
  //----------
505
  // finalization
506
507
26
  h1 ^= total_length; h2 ^= total_length;
508
509
26
  h1 += h2;
510
26
  h2 += h1;
511
512
26
  h1 = fmix64(h1);
513
26
  h2 = fmix64(h2);
514
515
26
  h1 += h2;
516
26
  h2 += h1;
517
518
26
  out[0] = h1;
519
26
  out[1] = h2;
520
26
}
521
522
/*---------------------------------------------------------------------------*/
523
524
/* Main hashing function. Initialise carry[2] to {0,0} and h[2] to an initial {seed,seed}
525
 * if wanted. Both ph and pcarry are required arguments. */
526
void PMurHash128x64_Process(uint64_t ph[2], uint64_t pcarry[2], const void * const key, int len)
527
26
{
528
26
  uint64_t h1 = ph[0];
529
26
  uint64_t h2 = ph[1];
530
531
26
  uint64_t k1 = pcarry[0];
532
26
  uint64_t k2 = pcarry[1];
533
534
26
  const uint8_t *ptr = (uint8_t*)key;
535
26
  const uint8_t *end;
536
537
  /* Extract carry count from low 4 bits of c value */
538
26
  int n = k2 & 15;
539
540
26
#if defined(UNALIGNED_SAFE)
541
  /* This CPU handles unaligned word access */
542
// #pragma message ( "UNALIGNED_SAFE" )
543
  /* Consume any carry bytes */
544
26
  int i = (16-n) & 15;
545
26
  if(i && i <= len) {
546
14
    dobytes128x64(i, h1, h2, k1, k2, n, ptr, len);
547
14
  }
548
549
  /* Process 128-bit chunks */
550
26
  end = ptr + (len & ~15);
551
4.41k
  for( ; ptr < end ; ptr+=16) {
552
4.39k
    k1 = READ_UINT64(ptr, 0);
553
4.39k
    k2 = READ_UINT64(ptr, 1);
554
4.39k
    doblock128x64(h1, h2, k1, k2);
555
4.39k
  }
556
557
#else /*UNALIGNED_SAFE*/
558
  /* This CPU does not handle unaligned word access */
559
// #pragma message ( "ALIGNED" )
560
  /* Consume enough so that the next data byte is word aligned */
561
  int i = -(intptr_t)(void *)ptr & 7;
562
  if(i && i <= len) {
563
    dobytes128x64(i, h1, h2, k1, k2, n, ptr, len);
564
  }
565
  /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
566
  end = ptr + (len & ~15);
567
568
  switch(n) { /* how many bytes in c */
569
  case 0: /*
570
    k1=[--------] k2=[--------] w=[76543210 fedcba98] b=[76543210 fedcba98] */
571
    for( ; ptr < end ; ptr+=16) {
572
      k1 = READ_UINT64(ptr, 0);
573
      k2 = READ_UINT64(ptr, 1);
574
      doblock128x64(h1, h2, k1, k2);
575
    }
576
    break;
577
  case 1: case 2: case 3: case 4: case 5: case 6: case 7: /*
578
    k1=[10------] k2=[--------] w=[98765432 hgfedcba] b=[76543210 fedcba98] k1'=[hg------] */
579
    {
580
      const int lshift = n*8, rshift = 64-lshift;
581
      for( ; ptr < end ; ptr+=16) {
582
        uint64_t c = k1>>rshift;
583
        k2 = READ_UINT64(ptr, 0);
584
        c |= k2<<lshift;
585
        k1 = READ_UINT64(ptr, 1);
586
        k2 = k2>>rshift | k1<<lshift;
587
        doblock128x64(h1, h2, c, k2);
588
      }
589
    }
590
    break;
591
  case 8: /*
592
  k1=[76543210] k2=[--------] w=[fedcba98 nmlkjihg] b=[76543210 fedcba98] k1`=[nmlkjihg] */
593
    for( ; ptr < end ; ptr+=16) {
594
      k2 = READ_UINT64(ptr, 0);
595
      doblock128x64(h1, h2, k1, k2);
596
      k1 = READ_UINT64(ptr, 1);
597
    }
598
    break;
599
  default: /* 8 < n <= 15
600
  k1=[76543210] k2=[98------] w=[hgfedcba ponmlkji] b=[76543210 fedcba98] k1`=[nmlkjihg] k2`=[po------] */
601
    {
602
      const int lshift = n*8-64, rshift = 64-lshift;
603
      for( ; ptr < end ; ptr+=16) {
604
        uint64_t c = k2 >> rshift;
605
        k2 = READ_UINT64(ptr, 0);
606
        c |= k2 << lshift;
607
        doblock128x64(h1, h2, k1, c);
608
        k1 = k2 >> rshift;
609
        k2 = READ_UINT64(ptr, 1);
610
        k1 |= k2 << lshift;
611
      }
612
    }
613
  }
614
#endif /*UNALIGNED_SAFE*/
615
616
  /* Advance over whole 128-bit chunks, possibly leaving 1..15 bytes */
617
26
  len -= len & ~15;
618
619
  /* Append any remaining bytes into carry */
620
26
  dobytes128x64(len, h1, h2, k1, k2, n, ptr, len);
621
622
  /* Copy out new running hash and carry */
623
26
  ph[0] = h1;
624
26
  ph[1] = h2;
625
26
  pcarry[0] = k1;
626
26
  pcarry[1] = (k2 & ~0xff) | n;
627
26
}
628
629
/*---------------------------------------------------------------------------*/
630
631
/* All in one go */
632
633
/* MurmurHash3_x64_128 api */
634
void PMurHash128x64(const void * key, const int len, uint32_t seed, void * out)
635
0
{
636
0
  uint64_t carry[2] = {0, 0};
637
0
  uint64_t h[2] = {seed, seed};
638
0
  PMurHash128x64_Process(h, carry, key, len);
639
0
  PMurHash128x64_Result(h, carry, (uint32_t) len, (uint64_t *) out);
640
0
}