/src/php-src/ext/hash/murmur/PMurHash128.c

Source
/*-----------------------------------------------------------------------------
 * MurmurHash3 was written by Austin Appleby, and is placed in the public
 * domain.
 *
 * This is a c++ implementation of MurmurHash3_128 with support for progressive
 * processing based on PMurHash implementation written by Shane Day.
 */

/*-----------------------------------------------------------------------------

If you want to understand the MurmurHash algorithm you would be much better
off reading the original source. Just point your browser at:
http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp


What this version provides?

1. Progressive data feeding. Useful when the entire payload to be hashed
does not fit in memory or when the data is streamed through the application.
Also useful when hashing a number of strings with a common prefix. A partial
hash of a prefix string can be generated and reused for each suffix string.

How does it work?

We can only process entire 128 bit chunks of input, except for the very end
that may be shorter. So along with the partial hash we need to give back to
the caller a carry containing up to 15 bytes that we were unable to process.
This carry also needs to record the number of bytes the carry holds. I use
the low 4 bits as a count (0..15) and the carry bytes are shifted into the
high byte in stream order.

To handle endianess I simply use a macro that reads an uint and define
that macro to be a direct read on little endian machines, a read and swap
on big endian machines.

-----------------------------------------------------------------------------*/


#include "PMurHash128.h"

/*-----------------------------------------------------------------------------
 * Endianess, misalignment capabilities and util macros
 *
 * The following 5 macros are defined in this section. The other macros defined
 * are only needed to help derive these 5.
 *
 * READ_UINT32(x,i) Read a little endian unsigned 32-bit int at index
 * READ_UINT64(x,i) Read a little endian unsigned 64-bit int at index
 * UNALIGNED_SAFE   Defined if READ_UINTXX works on non-word boundaries
 * ROTL32(x,r)      Rotate x left by r bits
 * ROTL64(x,r)      Rotate x left by r bits
 * BIG_CONSTANT
 * FORCE_INLINE
 */

/* I386 or AMD64 */
#if defined(_M_I86) || defined(_M_IX86) || defined(_X86_) || defined(__i386__) || defined(__i386) || defined(i386) \
 || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64)
  #define UNALIGNED_SAFE
#endif

/* Find best way to ROTL */
#if defined(_MSC_VER)
  #define FORCE_INLINE  static __forceinline
  #include <stdlib.h>  /* Microsoft put _rotl declaration in here */
  #define ROTL32(x,y)  _rotl(x,y)
  #define ROTL64(x,y)  _rotl64(x,y)
  #define BIG_CONSTANT(x) (x)
#else
  #define FORCE_INLINE static inline __attribute__((always_inline))
  /* gcc recognises this code and generates a rotate instruction for CPUs with one */
  #define ROTL32(x,r)  (((uint32_t)x << r) | ((uint32_t)x >> (32 - r)))
  #define ROTL64(x,r)  (((uint64_t)x << r) | ((uint64_t)x >> (64 - r)))
  #define BIG_CONSTANT(x) (x##LLU)
#endif

#include "endianness.h"

#define READ_UINT64(ptr,i) getblock64((uint64_t *)ptr,i)
#define READ_UINT32(ptr,i) getblock32((uint32_t *)ptr,i)

//-----------------------------------------------------------------------------
// Finalization mix - force all bits of a hash block to avalanche

FORCE_INLINE uint32_t fmix32 ( uint32_t h )
{
  h ^= h >> 16;
  h *= 0x85ebca6b;
  h ^= h >> 13;
  h *= 0xc2b2ae35;
  h ^= h >> 16;

  return h;
}

//----------

FORCE_INLINE uint64_t fmix64 ( uint64_t k )
{
  k ^= k >> 33;
  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
  k ^= k >> 33;
  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
  k ^= k >> 33;

  return k;
}

/*-----------------------------------------------------------------------------*
                                 PMurHash128x86
 *-----------------------------------------------------------------------------*/
/*-----------------------------------------------------------------------------
 * Core murmurhash algorithm macros */

static const uint32_t kC1 = 0x239b961b;
static const uint32_t kC2 = 0xab0e9789;
static const uint32_t kC3 = 0x38b34ae5;
static const uint32_t kC4 = 0xa1e38b93;

/* This is the main processing body of the algorithm. It operates
 * on each full 128-bits of input. */
#define doblock128x86(h1, h2, h3, h4, k1, k2, k3,k4)\
do {\
  k1 *= kC1; k1  = ROTL32(k1,15); k1 *= kC2; h1 ^= k1;\
\
  h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;\
\
  k2 *= kC2; k2  = ROTL32(k2,16); k2 *= kC3; h2 ^= k2;\
\
  h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;\
\
  k3 *= kC3; k3  = ROTL32(k3,17); k3 *= kC4; h3 ^= k3;\
\
  h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;\
\
  k4 *= kC4; k4  = ROTL32(k4,18); k4 *= kC1; h4 ^= k4;\
\
  h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;\
} while(0)

/* Append unaligned bytes to carry, forcing hash churn if we have 16 bytes */
/* cnt=bytes to process, h1-h4=hash k1-k4=carry, n=bytes in carry, ptr/len=payload */
#define dobytes128x86(cnt, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len)\
do {\
  unsigned __cnt = cnt;\
  for(;__cnt--; len--) {\
    switch(n) {\
      case  0: case  1: case  2: case  3:\
        k1 = k1>>8 | (uint32_t)*ptr++<<24;\
        ++n; break;\
\
      case  4: case  5: case  6: case  7:\
        k2 = k2>>8 | (uint32_t)*ptr++<<24;\
        ++n; break;\
\
      case  8: case  9: case 10: case 11:\
        k3 = k3>>8 | (uint32_t)*ptr++<<24;\
        ++n; break;\
\
      case 12: case 13: case 14:\
        k4 = k4>>8 | (uint32_t)*ptr++<<24;\
        ++n; break;\
\
      case 15:\
        k4 = k4>>8 | (uint32_t)*ptr++<<24;\
        doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);\
        n = 0; break;\
    }\
  }\
} while(0)

/* Finalize a hash. To match the original Murmur3_128x86 the total_length must be provided */
void PMurHash128x86_Result(const uint32_t ph[4], const uint32_t pcarry[4], uint32_t total_length, uint32_t out[4])
{
  uint32_t h1 = ph[0];
  uint32_t h2 = ph[1];
  uint32_t h3 = ph[2];
  uint32_t h4 = ph[3];

  uint32_t k1, k2, k3, k4 = pcarry[3];

  int n = k4 & 15;
  switch(n) {
    case  1: case  2: case  3: case  4:
      k1 = pcarry[0] >> (4-n)*8;
      goto finrot_k1;

    case  5: case  6: case  7: case  8:
      k2 = pcarry[1] >> (8-n)*8;
      goto finrot_k21;

    case  9: case 10: case 11: case 12:
      k3 = pcarry[2] >> (12-n)*8;
      goto finrot_k321;

    case 13: case 14: case 15:
      k4 >>= (16-n)*8;
      goto finrot_k4321;

    default:
      goto skiprot;
  }
finrot_k4321:
  k4 *= kC4; k4  = ROTL32(k4,18); k4 *= kC1; h4 ^= k4;
  k3 = pcarry[2];
finrot_k321:
  k3 *= kC3; k3  = ROTL32(k3,17); k3 *= kC4; h3 ^= k3;
  k2 = pcarry[1];
finrot_k21:
  k2 *= kC2; k2  = ROTL32(k2,16); k2 *= kC3; h2 ^= k2;
  k1 = pcarry[0];
finrot_k1:
  k1 *= kC1; k1  = ROTL32(k1,15); k1 *= kC2; h1 ^= k1;
skiprot:

  //----------
  // finalization

  h1 ^= total_length; h2 ^= total_length;
  h3 ^= total_length; h4 ^= total_length;

  h1 += h2; h1 += h3; h1 += h4;
  h2 += h1; h3 += h1; h4 += h1;

  h1 = fmix32(h1);
  h2 = fmix32(h2);
  h3 = fmix32(h3);
  h4 = fmix32(h4);

  h1 += h2; h1 += h3; h1 += h4;
  h2 += h1; h3 += h1; h4 += h1;

  out[0] = h1;
  out[1] = h2;
  out[2] = h3;
  out[3] = h4;
}

/*---------------------------------------------------------------------------*/

/* Main hashing function. Initialise carry[4] to {0,0,0,0} and h[4] to an initial {seed,seed,seed,seed}
 * if wanted. Both ph and pcarry are required arguments. */
void PMurHash128x86_Process(uint32_t ph[4], uint32_t pcarry[4], const void * const key, int len)
{
  uint32_t h1 = ph[0];
  uint32_t h2 = ph[1];
  uint32_t h3 = ph[2];
  uint32_t h4 = ph[3];

  uint32_t k1 = pcarry[0];
  uint32_t k2 = pcarry[1];
  uint32_t k3 = pcarry[2];
  uint32_t k4 = pcarry[3];

  const uint8_t *ptr = (uint8_t*)key;
  const uint8_t *end;

  /* Extract carry count from low 4 bits of c value */
  int n = k4 & 15;

#if defined(UNALIGNED_SAFE)
  /* This CPU handles unaligned word access */
// #pragma message ( "UNALIGNED_SAFE" )
  /* Consume any carry bytes */
  int i = (16-n) & 15;
  if(i && i <= len) {
    dobytes128x86(i, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
  }

  /* Process 128-bit chunks */
  end = ptr + (len & ~15);
  for( ; ptr < end ; ptr+=16) {
    k1 = READ_UINT32(ptr, 0);
    k2 = READ_UINT32(ptr, 1);
    k3 = READ_UINT32(ptr, 2);
    k4 = READ_UINT32(ptr, 3);
    doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
  }

#else /*UNALIGNED_SAFE*/
  /* This CPU does not handle unaligned word access */
// #pragma message ( "ALIGNED" )
  /* Consume enough so that the next data byte is word aligned */
  int i = -(intptr_t)(void *)ptr & 3;
  if(i && i <= len) {
    dobytes128x86(i, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
  }
  /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
  end = ptr + (len & ~15);

  switch(n) { /* how many bytes in c */
  case 0: /*
  k1=[----] k2=[----] k2=[----] k4=[----] w=[3210 7654 ba98 fedc] b=[3210 7654 ba98 fedc] */
    for( ; ptr < end ; ptr+=16) {
      k1 = READ_UINT32(ptr, 0);
      k2 = READ_UINT32(ptr, 1);
      k3 = READ_UINT32(ptr, 2);
      k4 = READ_UINT32(ptr, 3);
      doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
    }
    break;
  case 1: case 2: case 3: /*
  k1=[10--] k2=[----] k3=[----] k4=[----] w=[5432 9876 dcba hgfe] b=[3210 7654 ba98 fedc] k1'=[hg--] */
    {
      const int lshift = n*8, rshift = 32-lshift;
      for( ; ptr < end ; ptr+=16) {
        uint32_t c = k1>>rshift;      // --10
        k2 = READ_UINT32(ptr, 0);     // 5432
        c |= k2<<lshift;              // 3210.
        k1 = READ_UINT32(ptr, 1);     // 9876
        k2 = k1<<lshift | k2>>rshift; // 7654.
        k4 = READ_UINT32(ptr, 2);     // dcba
        k3 = k4<<lshift | k1>>rshift; // ba98.
        k1 = READ_UINT32(ptr, 3);     // hgfe.
        k4 = k1<<lshift | k4>>rshift; // fedc.
        doblock128x86(h1, h2, h3, h4, c, k2, k3, k4);
      }
    }
    break;
  case 4: /*
  k1=[3210] k2=[----] k3=[----] k4=[----] w=[7654 ba98 fedc jihg] b=[3210 7654 ba98 fedc] k1'=[jihg] */
    for( ; ptr < end ; ptr+=16) {
      k2 = READ_UINT32(ptr, 0);
      k3 = READ_UINT32(ptr, 1);
      k4 = READ_UINT32(ptr, 2);
      doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
      k1 = READ_UINT32(ptr, 3);
    }
    break;
  case 5: case 6: case 7: /*
  k1=[3210] k2=[54--] k3=[----] k4=[----] w=[9876 dcba hgfe lkji] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[lk--] */
    {
      const int lshift = n*8-32, rshift = 32-lshift;
      for( ; ptr < end ; ptr+=16) {
        uint32_t c = k2>>rshift;      // --54
        k3 = READ_UINT32(ptr, 0);     // 9876
        c |= k3<<lshift;              // 7654.
        k4 = READ_UINT32(ptr, 1);     // dcba
        k3 = k4<<lshift | k3>>rshift; // ba98.
        k2 = READ_UINT32(ptr, 2);     // hgfe
        k4 = k2<<lshift | k4>>rshift; // fedc.
        doblock128x86(h1, h2, h3, h4, k1, c, k3, k4);
        k1 = k2>>rshift;              // --hg
        k2 = READ_UINT32(ptr, 3);     // lkji.
        k1 |= k2<<lshift;             // jihg.
      }
    }
  case 8: /*
  k1=[3210] k2=[7654] k3=[----] k4=[----] w=[ba98 fedc jihg nmlk] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] */
    for( ; ptr < end ; ptr+=16) {
      k3 = READ_UINT32(ptr, 0);
      k4 = READ_UINT32(ptr, 1);
      doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
      k1 = READ_UINT32(ptr, 2);
      k2 = READ_UINT32(ptr, 3);
    }
    break;
  case 9: case 10: case 11: /*
  k1=[3210] k2=[7654] k3=[98--] k4=[----] w=[dcba hgfe lkji ponm] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[po--] */
    {
      const int lshift = n*8-64, rshift = 32-lshift;
      for( ; ptr < end ; ptr+=16) {
        uint32_t c = k3>>rshift;      // --98
        k4 = READ_UINT32(ptr, 0);     // dcba
        c |= k4<<lshift;              // ba98.
        k3 = READ_UINT32(ptr, 1);     // hgfe
        k4 = k3<<lshift | k4>>rshift; // fedc.
        doblock128x86(h1, h2, h3, h4, k1, k2, c, k4);
        k2 = READ_UINT32(ptr, 2);     // lkji
        k1 = k2<<lshift | k3>>rshift; // jihg.
        k3 = READ_UINT32(ptr, 3);     // ponm.
        k2 = k3<<lshift | k2>>rshift; // nmlk.
      }
    }
  case 12: /*
  k1=[3210] k2=[7654] k3=[ba98] k4=[----] w=[fedc jihg nmlk rqpo] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[rqpo] */
    for( ; ptr < end ; ptr+=16) {
      k4 = READ_UINT32(ptr, 0);
      doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
      k1 = READ_UINT32(ptr, 1);
      k2 = READ_UINT32(ptr, 2);
      k3 = READ_UINT32(ptr, 3);
    }
    break;
  default: /* 12 < n <= 15
  k1=[3210] k2=[7654] k3=[ba98] k4=[dc--] w=[hgfe lkji ponm tsrq] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[rqpo] k3'=[ts--] */
    {
      const int lshift = n*8-96, rshift = 32-lshift;
      for( ; ptr < end ; ptr+=16) {
        uint32_t c = k4>>rshift;      // --dc
        k4 = READ_UINT32(ptr, 0);     // hgfe
        c |= k4<<lshift;              // fedc.
        doblock128x86(h1, h2, h3, h4, k1, k2, k3, c);
        k3 = READ_UINT32(ptr, 1);     // lkji
        k1 = k3<<lshift | k4>>rshift; // jihg.
        c  = READ_UINT32(ptr, 2);     // ponm
        k2 = c<<lshift | k3>>rshift;  // nmlk.
        k4 = READ_UINT32(ptr, 3);     // tsrq.
        k3 = k4<<lshift | c>>rshift;  // rqpo.
      }
    }
  }
#endif /*UNALIGNED_SAFE*/

  /* Advance over whole 128-bit chunks, possibly leaving 1..15 bytes */
  len -= len & ~15;

  /* Append any remaining bytes into carry */
  dobytes128x86(len, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);

  /* Copy out new running hash and carry */
  ph[0] = h1;
  ph[1] = h2;
  ph[2] = h3;
  ph[3] = h4;
  pcarry[0] = k1;
  pcarry[1] = k2;
  pcarry[2] = k3;
  pcarry[3] = (k4 & ~0xff) | n;
}

/*---------------------------------------------------------------------------*/

/* All in one go */

/* MurmurHash3_x86_128 api */
void PMurHash128x86(const void * key, const int len, uint32_t seed, void * out)
{
  uint32_t carry[4] = {0, 0, 0, 0};
  uint32_t h[4] = {seed, seed, seed, seed};
  PMurHash128x86_Process(h, carry, key, len);
  PMurHash128x86_Result(h, carry, (uint32_t) len, (uint32_t *) out);
}

/*-----------------------------------------------------------------------------*
                                 PMurHash128x64
 *-----------------------------------------------------------------------------*/
/*-----------------------------------------------------------------------------
 * Core murmurhash algorithm macros */

static const uint64_t kC1L = BIG_CONSTANT(0x87c37b91114253d5);
static const uint64_t kC2L = BIG_CONSTANT(0x4cf5ad432745937f);

/* This is the main processing body of the algorithm. It operates
 * on each full 128-bits of input. */
#define doblock128x64(h1, h2, k1, k2)\
do {\
  k1 *= kC1L; k1  = ROTL64(k1,31); k1 *= kC2L; h1 ^= k1;\
\
  h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;\
\
  k2 *= kC2L; k2  = ROTL64(k2,33); k2 *= kC1L; h2 ^= k2;\
\
  h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;\
} while(0)

/* Append unaligned bytes to carry, forcing hash churn if we have 16 bytes */
/* cnt=bytes to process, h1,h2=hash k1,k2=carry, n=bytes in carry, ptr/len=payload */
#define dobytes128x64(cnt, h1, h2, k1, k2, n, ptr, len) \
do {\
  unsigned __cnt = cnt;\
  for(;__cnt--; len--) {\
    switch(n) {\
      case  0: case  1: case  2: case  3:\
      case  4: case  5: case  6: case  7:\
        k1 = k1>>8 | (uint64_t)*ptr++<<56;\
        n++; break;\
\
      case  8: case  9: case 10: case 11:\
      case 12: case 13: case 14:\
        k2 = k2>>8 | (uint64_t)*ptr++<<56;\
        n++; break;\
\
      case 15:\
        k2 = k2>>8 | (uint64_t)*ptr++<<56;\
        doblock128x64(h1, h2, k1, k2);\
        n = 0; break;\
    }\
  }\
} while(0)

/* Finalize a hash. To match the original Murmur3_128x64 the total_length must be provided */
void PMurHash128x64_Result(const uint64_t ph[2], const uint64_t pcarry[2],
                        const uint32_t total_length, uint64_t out[2])
{
  uint64_t h1 = ph[0];
  uint64_t h2 = ph[1];

  uint64_t k1;
  uint64_t k2 = pcarry[1];

  int n = k2 & 15;
  if (n) {
    k1 = pcarry[0];
    if (n > 8) {
      k2 >>= (16-n)*8;
      k2 *= kC2L; k2  = ROTL64(k2,33); k2 *= kC1L; h2 ^= k2;
    } else {
      k1 >>= (8-n)*8;
    }
    k1 *= kC1L; k1  = ROTL64(k1,31); k1 *= kC2L; h1 ^= k1;
  }

  //----------
  // finalization

  h1 ^= total_length; h2 ^= total_length;

  h1 += h2;
  h2 += h1;

  h1 = fmix64(h1);
  h2 = fmix64(h2);

  h1 += h2;
  h2 += h1;

  out[0] = h1;
  out[1] = h2;
}

/*---------------------------------------------------------------------------*/

/* Main hashing function. Initialise carry[2] to {0,0} and h[2] to an initial {seed,seed}
 * if wanted. Both ph and pcarry are required arguments. */
void PMurHash128x64_Process(uint64_t ph[2], uint64_t pcarry[2], const void * const key, int len)
{
  uint64_t h1 = ph[0];
  uint64_t h2 = ph[1];

  uint64_t k1 = pcarry[0];
  uint64_t k2 = pcarry[1];

  const uint8_t *ptr = (uint8_t*)key;
  const uint8_t *end;

  /* Extract carry count from low 4 bits of c value */
  int n = k2 & 15;

#if defined(UNALIGNED_SAFE)
  /* This CPU handles unaligned word access */
// #pragma message ( "UNALIGNED_SAFE" )
  /* Consume any carry bytes */
  int i = (16-n) & 15;
  if(i && i <= len) {
    dobytes128x64(i, h1, h2, k1, k2, n, ptr, len);
  }

  /* Process 128-bit chunks */
  end = ptr + (len & ~15);
  for( ; ptr < end ; ptr+=16) {
    k1 = READ_UINT64(ptr, 0);
    k2 = READ_UINT64(ptr, 1);
    doblock128x64(h1, h2, k1, k2);
  }

#else /*UNALIGNED_SAFE*/
  /* This CPU does not handle unaligned word access */
// #pragma message ( "ALIGNED" )
  /* Consume enough so that the next data byte is word aligned */
  int i = -(intptr_t)(void *)ptr & 7;
  if(i && i <= len) {
    dobytes128x64(i, h1, h2, k1, k2, n, ptr, len);
  }
  /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
  end = ptr + (len & ~15);

  switch(n) { /* how many bytes in c */
  case 0: /*
    k1=[--------] k2=[--------] w=[76543210 fedcba98] b=[76543210 fedcba98] */
    for( ; ptr < end ; ptr+=16) {
      k1 = READ_UINT64(ptr, 0);
      k2 = READ_UINT64(ptr, 1);
      doblock128x64(h1, h2, k1, k2);
    }
    break;
  case 1: case 2: case 3: case 4: case 5: case 6: case 7: /*
    k1=[10------] k2=[--------] w=[98765432 hgfedcba] b=[76543210 fedcba98] k1'=[hg------] */
    {
      const int lshift = n*8, rshift = 64-lshift;
      for( ; ptr < end ; ptr+=16) {
        uint64_t c = k1>>rshift;
        k2 = READ_UINT64(ptr, 0);
        c |= k2<<lshift;
        k1 = READ_UINT64(ptr, 1);
        k2 = k2>>rshift | k1<<lshift;
        doblock128x64(h1, h2, c, k2);
      }
    }
    break;
  case 8: /*
  k1=[76543210] k2=[--------] w=[fedcba98 nmlkjihg] b=[76543210 fedcba98] k1`=[nmlkjihg] */
    for( ; ptr < end ; ptr+=16) {
      k2 = READ_UINT64(ptr, 0);
      doblock128x64(h1, h2, k1, k2);
      k1 = READ_UINT64(ptr, 1);
    }
    break;
  default: /* 8 < n <= 15
  k1=[76543210] k2=[98------] w=[hgfedcba ponmlkji] b=[76543210 fedcba98] k1`=[nmlkjihg] k2`=[po------] */
    {
      const int lshift = n*8-64, rshift = 64-lshift;
      for( ; ptr < end ; ptr+=16) {
        uint64_t c = k2 >> rshift;
        k2 = READ_UINT64(ptr, 0);
        c |= k2 << lshift;
        doblock128x64(h1, h2, k1, c);
        k1 = k2 >> rshift;
        k2 = READ_UINT64(ptr, 1);
        k1 |= k2 << lshift;
      }
    }
  }
#endif /*UNALIGNED_SAFE*/

  /* Advance over whole 128-bit chunks, possibly leaving 1..15 bytes */
  len -= len & ~15;

  /* Append any remaining bytes into carry */
  dobytes128x64(len, h1, h2, k1, k2, n, ptr, len);

  /* Copy out new running hash and carry */
  ph[0] = h1;
  ph[1] = h2;
  pcarry[0] = k1;
  pcarry[1] = (k2 & ~0xff) | n;
}

/*---------------------------------------------------------------------------*/

/* All in one go */

/* MurmurHash3_x64_128 api */
void PMurHash128x64(const void * key, const int len, uint32_t seed, void * out)
{
  uint64_t carry[2] = {0, 0};
  uint64_t h[2] = {seed, seed};
  PMurHash128x64_Process(h, carry, key, len);
  PMurHash128x64_Result(h, carry, (uint32_t) len, (uint64_t *) out);
}

Coverage Report

Created: 2025-12-14 06:05

Line	Count	Source
1		/*-----------------------------------------------------------------------------
2		* MurmurHash3 was written by Austin Appleby, and is placed in the public
3		* domain.
4		*
5		* This is a c++ implementation of MurmurHash3_128 with support for progressive
6		* processing based on PMurHash implementation written by Shane Day.
7		*/
8
9		/*-----------------------------------------------------------------------------
10
11		If you want to understand the MurmurHash algorithm you would be much better
12		off reading the original source. Just point your browser at:
13		http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
14
15
16		What this version provides?
17
18		1. Progressive data feeding. Useful when the entire payload to be hashed
19		does not fit in memory or when the data is streamed through the application.
20		Also useful when hashing a number of strings with a common prefix. A partial
21		hash of a prefix string can be generated and reused for each suffix string.
22
23		How does it work?
24
25		We can only process entire 128 bit chunks of input, except for the very end
26		that may be shorter. So along with the partial hash we need to give back to
27		the caller a carry containing up to 15 bytes that we were unable to process.
28		This carry also needs to record the number of bytes the carry holds. I use
29		the low 4 bits as a count (0..15) and the carry bytes are shifted into the
30		high byte in stream order.
31
32		To handle endianess I simply use a macro that reads an uint and define
33		that macro to be a direct read on little endian machines, a read and swap
34		on big endian machines.
35
36		-----------------------------------------------------------------------------*/
37
38
39		#include "PMurHash128.h"
40
41		/*-----------------------------------------------------------------------------
42		* Endianess, misalignment capabilities and util macros
43		*
44		* The following 5 macros are defined in this section. The other macros defined
45		* are only needed to help derive these 5.
46		*
47		* READ_UINT32(x,i) Read a little endian unsigned 32-bit int at index
48		* READ_UINT64(x,i) Read a little endian unsigned 64-bit int at index
49		* UNALIGNED_SAFE Defined if READ_UINTXX works on non-word boundaries
50		* ROTL32(x,r) Rotate x left by r bits
51		* ROTL64(x,r) Rotate x left by r bits
52		* BIG_CONSTANT
53		* FORCE_INLINE
54		*/
55
56		/* I386 or AMD64 */
57		#if defined(_M_I86) \|\| defined(_M_IX86) \|\| defined(_X86_) \|\| defined(__i386__) \|\| defined(__i386) \|\| defined(i386) \
58		\|\| defined(_M_X64) \|\| defined(__x86_64__) \|\| defined(__x86_64) \|\| defined(__amd64__) \|\| defined(__amd64)
59		#define UNALIGNED_SAFE
60		#endif
61
62		/* Find best way to ROTL */
63		#if defined(_MSC_VER)
64		#define FORCE_INLINE static __forceinline
65		#include <stdlib.h> /* Microsoft put _rotl declaration in here */
66		#define ROTL32(x,y) _rotl(x,y)
67		#define ROTL64(x,y) _rotl64(x,y)
68		#define BIG_CONSTANT(x) (x)
69		#else
70		#define FORCE_INLINE static inline __attribute__((always_inline))
71		/* gcc recognises this code and generates a rotate instruction for CPUs with one */
72	0	#define ROTL32(x,r) (((uint32_t)x << r) \| ((uint32_t)x >> (32 - r)))
73	0	#define ROTL64(x,r) (((uint64_t)x << r) \| ((uint64_t)x >> (64 - r)))
74	0	#define BIG_CONSTANT(x) (x##LLU)
75		#endif
76
77		#include "endianness.h"
78
79	0	#define READ_UINT64(ptr,i) getblock64((uint64_t *)ptr,i)
80	0	#define READ_UINT32(ptr,i) getblock32((uint32_t *)ptr,i)
81
82		//-----------------------------------------------------------------------------
83		// Finalization mix - force all bits of a hash block to avalanche
84
85		FORCE_INLINE uint32_t fmix32 ( uint32_t h )
86	0	{
87	0	h ^= h >> 16;
88	0	h *= 0x85ebca6b;
89	0	h ^= h >> 13;
90	0	h *= 0xc2b2ae35;
91	0	h ^= h >> 16;
92
93	0	return h;
94	0	}
95
96		//----------
97
98		FORCE_INLINE uint64_t fmix64 ( uint64_t k )
99	0	{
100	0	k ^= k >> 33;
101	0	k *= BIG_CONSTANT(0xff51afd7ed558ccd);
102	0	k ^= k >> 33;
103	0	k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
104	0	k ^= k >> 33;
105
106	0	return k;
107	0	}
108
109		/-----------------------------------------------------------------------------
110		PMurHash128x86
111		-----------------------------------------------------------------------------/
112		/*-----------------------------------------------------------------------------
113		* Core murmurhash algorithm macros */
114
115		static const uint32_t kC1 = 0x239b961b;
116		static const uint32_t kC2 = 0xab0e9789;
117		static const uint32_t kC3 = 0x38b34ae5;
118		static const uint32_t kC4 = 0xa1e38b93;
119
120		/* This is the main processing body of the algorithm. It operates
121		* on each full 128-bits of input. */
122	0	#define doblock128x86(h1, h2, h3, h4, k1, k2, k3,k4)\
123	0	do {\
124	0	k1 = kC1; k1 = ROTL32(k1,15); k1 = kC2; h1 ^= k1;\
125	0	\
126	0	h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;\
127	0	\
128	0	k2 = kC2; k2 = ROTL32(k2,16); k2 = kC3; h2 ^= k2;\
129	0	\
130	0	h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;\
131	0	\
132	0	k3 = kC3; k3 = ROTL32(k3,17); k3 = kC4; h3 ^= k3;\
133	0	\
134	0	h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;\
135	0	\
136	0	k4 = kC4; k4 = ROTL32(k4,18); k4 = kC1; h4 ^= k4;\
137	0	\
138	0	h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;\
139	0	} while(0)
140
141		/* Append unaligned bytes to carry, forcing hash churn if we have 16 bytes */
142		/* cnt=bytes to process, h1-h4=hash k1-k4=carry, n=bytes in carry, ptr/len=payload */
143	0	#define dobytes128x86(cnt, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len)\
144	0	do {\
145	0	unsigned __cnt = cnt;\
146	0	for(;__cnt--; len--) {\
147	0	switch(n) {\
148	0	case 0: case 1: case 2: case 3:\
149	0	k1 = k1>>8 \| (uint32_t)*ptr++<<24;\
150	0	++n; break;\
151	0	\
152	0	case 4: case 5: case 6: case 7:\
153	0	k2 = k2>>8 \| (uint32_t)*ptr++<<24;\
154	0	++n; break;\
155	0	\
156	0	case 8: case 9: case 10: case 11:\
157	0	k3 = k3>>8 \| (uint32_t)*ptr++<<24;\
158	0	++n; break;\
159	0	\
160	0	case 12: case 13: case 14:\
161	0	k4 = k4>>8 \| (uint32_t)*ptr++<<24;\
162	0	++n; break;\
163	0	\
164	0	case 15:\
165	0	k4 = k4>>8 \| (uint32_t)*ptr++<<24;\
166	0	doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);\
167	0	n = 0; break;\
168	0	}\
169	0	}\
170	0	} while(0)
171
172		/* Finalize a hash. To match the original Murmur3_128x86 the total_length must be provided */
173		void PMurHash128x86_Result(const uint32_t ph[4], const uint32_t pcarry[4], uint32_t total_length, uint32_t out[4])
174	0	{
175	0	uint32_t h1 = ph[0];
176	0	uint32_t h2 = ph[1];
177	0	uint32_t h3 = ph[2];
178	0	uint32_t h4 = ph[3];
179
180	0	uint32_t k1, k2, k3, k4 = pcarry[3];
181
182	0	int n = k4 & 15;
183	0	switch(n) {
184	0	case 1: case 2: case 3: case 4:
185	0	k1 = pcarry[0] >> (4-n)*8;
186	0	goto finrot_k1;
187
188	0	case 5: case 6: case 7: case 8:
189	0	k2 = pcarry[1] >> (8-n)*8;
190	0	goto finrot_k21;
191
192	0	case 9: case 10: case 11: case 12:
193	0	k3 = pcarry[2] >> (12-n)*8;
194	0	goto finrot_k321;
195
196	0	case 13: case 14: case 15:
197	0	k4 >>= (16-n)*8;
198	0	goto finrot_k4321;
199
200	0	default:
201	0	goto skiprot;
202	0	}
203	0	finrot_k4321:
204	0	k4 = kC4; k4 = ROTL32(k4,18); k4 = kC1; h4 ^= k4;
205	0	k3 = pcarry[2];
206	0	finrot_k321:
207	0	k3 = kC3; k3 = ROTL32(k3,17); k3 = kC4; h3 ^= k3;
208	0	k2 = pcarry[1];
209	0	finrot_k21:
210	0	k2 = kC2; k2 = ROTL32(k2,16); k2 = kC3; h2 ^= k2;
211	0	k1 = pcarry[0];
212	0	finrot_k1:
213	0	k1 = kC1; k1 = ROTL32(k1,15); k1 = kC2; h1 ^= k1;
214	0	skiprot:
215
216		//----------
217		// finalization
218
219	0	h1 ^= total_length; h2 ^= total_length;
220	0	h3 ^= total_length; h4 ^= total_length;
221
222	0	h1 += h2; h1 += h3; h1 += h4;
223	0	h2 += h1; h3 += h1; h4 += h1;
224
225	0	h1 = fmix32(h1);
226	0	h2 = fmix32(h2);
227	0	h3 = fmix32(h3);
228	0	h4 = fmix32(h4);
229
230	0	h1 += h2; h1 += h3; h1 += h4;
231	0	h2 += h1; h3 += h1; h4 += h1;
232
233	0	out[0] = h1;
234	0	out[1] = h2;
235	0	out[2] = h3;
236	0	out[3] = h4;
237	0	}
238
239		/---------------------------------------------------------------------------/
240
241		/* Main hashing function. Initialise carry[4] to {0,0,0,0} and h[4] to an initial {seed,seed,seed,seed}
242		* if wanted. Both ph and pcarry are required arguments. */
243		void PMurHash128x86_Process(uint32_t ph[4], uint32_t pcarry[4], const void * const key, int len)
244	0	{
245	0	uint32_t h1 = ph[0];
246	0	uint32_t h2 = ph[1];
247	0	uint32_t h3 = ph[2];
248	0	uint32_t h4 = ph[3];
249
250	0	uint32_t k1 = pcarry[0];
251	0	uint32_t k2 = pcarry[1];
252	0	uint32_t k3 = pcarry[2];
253	0	uint32_t k4 = pcarry[3];
254
255	0	const uint8_t ptr = (uint8_t)key;
256	0	const uint8_t *end;
257
258		/* Extract carry count from low 4 bits of c value */
259	0	int n = k4 & 15;
260
261	0	#if defined(UNALIGNED_SAFE)
262		/* This CPU handles unaligned word access */
263		// #pragma message ( "UNALIGNED_SAFE" )
264		/* Consume any carry bytes */
265	0	int i = (16-n) & 15;
266	0	if(i && i <= len) {
267	0	dobytes128x86(i, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
268	0	}
269
270		/* Process 128-bit chunks */
271	0	end = ptr + (len & ~15);
272	0	for( ; ptr < end ; ptr+=16) {
273	0	k1 = READ_UINT32(ptr, 0);
274	0	k2 = READ_UINT32(ptr, 1);
275	0	k3 = READ_UINT32(ptr, 2);
276	0	k4 = READ_UINT32(ptr, 3);
277	0	doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
278	0	}
279
280		#else /UNALIGNED_SAFE/
281		/* This CPU does not handle unaligned word access */
282		// #pragma message ( "ALIGNED" )
283		/* Consume enough so that the next data byte is word aligned */
284		int i = -(intptr_t)(void *)ptr & 3;
285		if(i && i <= len) {
286		dobytes128x86(i, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
287		}
288		/* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
289		end = ptr + (len & ~15);
290
291		switch(n) { /* how many bytes in c */
292		case 0: /*
293		k1=[----] k2=[----] k2=[----] k4=[----] w=[3210 7654 ba98 fedc] b=[3210 7654 ba98 fedc] */
294		for( ; ptr < end ; ptr+=16) {
295		k1 = READ_UINT32(ptr, 0);
296		k2 = READ_UINT32(ptr, 1);
297		k3 = READ_UINT32(ptr, 2);
298		k4 = READ_UINT32(ptr, 3);
299		doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
300		}
301		break;
302		case 1: case 2: case 3: /*
303		k1=[10--] k2=[----] k3=[----] k4=[----] w=[5432 9876 dcba hgfe] b=[3210 7654 ba98 fedc] k1'=[hg--] */
304		{
305		const int lshift = n*8, rshift = 32-lshift;
306		for( ; ptr < end ; ptr+=16) {
307		uint32_t c = k1>>rshift; // --10
308		k2 = READ_UINT32(ptr, 0); // 5432
309		c \|= k2<<lshift; // 3210.
310		k1 = READ_UINT32(ptr, 1); // 9876
311		k2 = k1<<lshift \| k2>>rshift; // 7654.
312		k4 = READ_UINT32(ptr, 2); // dcba
313		k3 = k4<<lshift \| k1>>rshift; // ba98.
314		k1 = READ_UINT32(ptr, 3); // hgfe.
315		k4 = k1<<lshift \| k4>>rshift; // fedc.
316		doblock128x86(h1, h2, h3, h4, c, k2, k3, k4);
317		}
318		}
319		break;
320		case 4: /*
321		k1=[3210] k2=[----] k3=[----] k4=[----] w=[7654 ba98 fedc jihg] b=[3210 7654 ba98 fedc] k1'=[jihg] */
322		for( ; ptr < end ; ptr+=16) {
323		k2 = READ_UINT32(ptr, 0);
324		k3 = READ_UINT32(ptr, 1);
325		k4 = READ_UINT32(ptr, 2);
326		doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
327		k1 = READ_UINT32(ptr, 3);
328		}
329		break;
330		case 5: case 6: case 7: /*
331		k1=[3210] k2=[54--] k3=[----] k4=[----] w=[9876 dcba hgfe lkji] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[lk--] */
332		{
333		const int lshift = n*8-32, rshift = 32-lshift;
334		for( ; ptr < end ; ptr+=16) {
335		uint32_t c = k2>>rshift; // --54
336		k3 = READ_UINT32(ptr, 0); // 9876
337		c \|= k3<<lshift; // 7654.
338		k4 = READ_UINT32(ptr, 1); // dcba
339		k3 = k4<<lshift \| k3>>rshift; // ba98.
340		k2 = READ_UINT32(ptr, 2); // hgfe
341		k4 = k2<<lshift \| k4>>rshift; // fedc.
342		doblock128x86(h1, h2, h3, h4, k1, c, k3, k4);
343		k1 = k2>>rshift; // --hg
344		k2 = READ_UINT32(ptr, 3); // lkji.
345		k1 \|= k2<<lshift; // jihg.
346		}
347		}
348		case 8: /*
349		k1=[3210] k2=[7654] k3=[----] k4=[----] w=[ba98 fedc jihg nmlk] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] */
350		for( ; ptr < end ; ptr+=16) {
351		k3 = READ_UINT32(ptr, 0);
352		k4 = READ_UINT32(ptr, 1);
353		doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
354		k1 = READ_UINT32(ptr, 2);
355		k2 = READ_UINT32(ptr, 3);
356		}
357		break;
358		case 9: case 10: case 11: /*
359		k1=[3210] k2=[7654] k3=[98--] k4=[----] w=[dcba hgfe lkji ponm] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[po--] */
360		{
361		const int lshift = n*8-64, rshift = 32-lshift;
362		for( ; ptr < end ; ptr+=16) {
363		uint32_t c = k3>>rshift; // --98
364		k4 = READ_UINT32(ptr, 0); // dcba
365		c \|= k4<<lshift; // ba98.
366		k3 = READ_UINT32(ptr, 1); // hgfe
367		k4 = k3<<lshift \| k4>>rshift; // fedc.
368		doblock128x86(h1, h2, h3, h4, k1, k2, c, k4);
369		k2 = READ_UINT32(ptr, 2); // lkji
370		k1 = k2<<lshift \| k3>>rshift; // jihg.
371		k3 = READ_UINT32(ptr, 3); // ponm.
372		k2 = k3<<lshift \| k2>>rshift; // nmlk.
373		}
374		}
375		case 12: /*
376		k1=[3210] k2=[7654] k3=[ba98] k4=[----] w=[fedc jihg nmlk rqpo] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[rqpo] */
377		for( ; ptr < end ; ptr+=16) {
378		k4 = READ_UINT32(ptr, 0);
379		doblock128x86(h1, h2, h3, h4, k1, k2, k3, k4);
380		k1 = READ_UINT32(ptr, 1);
381		k2 = READ_UINT32(ptr, 2);
382		k3 = READ_UINT32(ptr, 3);
383		}
384		break;
385		default: /* 12 < n <= 15
386		k1=[3210] k2=[7654] k3=[ba98] k4=[dc--] w=[hgfe lkji ponm tsrq] b=[3210 7654 ba98 fedc] k1'=[jihg] k2'=[nmlk] k3'=[rqpo] k3'=[ts--] */
387		{
388		const int lshift = n*8-96, rshift = 32-lshift;
389		for( ; ptr < end ; ptr+=16) {
390		uint32_t c = k4>>rshift; // --dc
391		k4 = READ_UINT32(ptr, 0); // hgfe
392		c \|= k4<<lshift; // fedc.
393		doblock128x86(h1, h2, h3, h4, k1, k2, k3, c);
394		k3 = READ_UINT32(ptr, 1); // lkji
395		k1 = k3<<lshift \| k4>>rshift; // jihg.
396		c = READ_UINT32(ptr, 2); // ponm
397		k2 = c<<lshift \| k3>>rshift; // nmlk.
398		k4 = READ_UINT32(ptr, 3); // tsrq.
399		k3 = k4<<lshift \| c>>rshift; // rqpo.
400		}
401		}
402		}
403		#endif /UNALIGNED_SAFE/
404
405		/* Advance over whole 128-bit chunks, possibly leaving 1..15 bytes */
406	0	len -= len & ~15;
407
408		/* Append any remaining bytes into carry */
409	0	dobytes128x86(len, h1, h2, h3, h4, k1, k2, k3, k4, n, ptr, len);
410
411		/* Copy out new running hash and carry */
412	0	ph[0] = h1;
413	0	ph[1] = h2;
414	0	ph[2] = h3;
415	0	ph[3] = h4;
416	0	pcarry[0] = k1;
417	0	pcarry[1] = k2;
418	0	pcarry[2] = k3;
419	0	pcarry[3] = (k4 & ~0xff) \| n;
420	0	}
421
422		/---------------------------------------------------------------------------/
423
424		/* All in one go */
425
426		/* MurmurHash3_x86_128 api */
427		void PMurHash128x86(const void * key, const int len, uint32_t seed, void * out)
428	0	{
429	0	uint32_t carry[4] = {0, 0, 0, 0};
430	0	uint32_t h[4] = {seed, seed, seed, seed};
431	0	PMurHash128x86_Process(h, carry, key, len);
432	0	PMurHash128x86_Result(h, carry, (uint32_t) len, (uint32_t *) out);
433	0	}
434
435		/-----------------------------------------------------------------------------
436		PMurHash128x64
437		-----------------------------------------------------------------------------/
438		/*-----------------------------------------------------------------------------
439		* Core murmurhash algorithm macros */
440
441		static const uint64_t kC1L = BIG_CONSTANT(0x87c37b91114253d5);
442		static const uint64_t kC2L = BIG_CONSTANT(0x4cf5ad432745937f);
443
444		/* This is the main processing body of the algorithm. It operates
445		* on each full 128-bits of input. */
446	0	#define doblock128x64(h1, h2, k1, k2)\
447	0	do {\
448	0	k1 = kC1L; k1 = ROTL64(k1,31); k1 = kC2L; h1 ^= k1;\
449	0	\
450	0	h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;\
451	0	\
452	0	k2 = kC2L; k2 = ROTL64(k2,33); k2 = kC1L; h2 ^= k2;\
453	0	\
454	0	h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;\
455	0	} while(0)
456
457		/* Append unaligned bytes to carry, forcing hash churn if we have 16 bytes */
458		/* cnt=bytes to process, h1,h2=hash k1,k2=carry, n=bytes in carry, ptr/len=payload */
459	0	#define dobytes128x64(cnt, h1, h2, k1, k2, n, ptr, len) \
460	0	do {\
461	0	unsigned __cnt = cnt;\
462	0	for(;__cnt--; len--) {\
463	0	switch(n) {\
464	0	case 0: case 1: case 2: case 3:\
465	0	case 4: case 5: case 6: case 7:\
466	0	k1 = k1>>8 \| (uint64_t)*ptr++<<56;\
467	0	n++; break;\
468	0	\
469	0	case 8: case 9: case 10: case 11:\
470	0	case 12: case 13: case 14:\
471	0	k2 = k2>>8 \| (uint64_t)*ptr++<<56;\
472	0	n++; break;\
473	0	\
474	0	case 15:\
475	0	k2 = k2>>8 \| (uint64_t)*ptr++<<56;\
476	0	doblock128x64(h1, h2, k1, k2);\
477	0	n = 0; break;\
478	0	}\
479	0	}\
480	0	} while(0)
481
482		/* Finalize a hash. To match the original Murmur3_128x64 the total_length must be provided */
483		void PMurHash128x64_Result(const uint64_t ph[2], const uint64_t pcarry[2],
484		const uint32_t total_length, uint64_t out[2])
485	0	{
486	0	uint64_t h1 = ph[0];
487	0	uint64_t h2 = ph[1];
488
489	0	uint64_t k1;
490	0	uint64_t k2 = pcarry[1];
491
492	0	int n = k2 & 15;
493	0	if (n) {
494	0	k1 = pcarry[0];
495	0	if (n > 8) {
496	0	k2 >>= (16-n)*8;
497	0	k2 = kC2L; k2 = ROTL64(k2,33); k2 = kC1L; h2 ^= k2;
498	0	} else {
499	0	k1 >>= (8-n)*8;
500	0	}
501	0	k1 = kC1L; k1 = ROTL64(k1,31); k1 = kC2L; h1 ^= k1;
502	0	}
503
504		//----------
505		// finalization
506
507	0	h1 ^= total_length; h2 ^= total_length;
508
509	0	h1 += h2;
510	0	h2 += h1;
511
512	0	h1 = fmix64(h1);
513	0	h2 = fmix64(h2);
514
515	0	h1 += h2;
516	0	h2 += h1;
517
518	0	out[0] = h1;
519	0	out[1] = h2;
520	0	}
521
522		/---------------------------------------------------------------------------/
523
524		/* Main hashing function. Initialise carry[2] to {0,0} and h[2] to an initial {seed,seed}
525		* if wanted. Both ph and pcarry are required arguments. */
526		void PMurHash128x64_Process(uint64_t ph[2], uint64_t pcarry[2], const void * const key, int len)
527	0	{
528	0	uint64_t h1 = ph[0];
529	0	uint64_t h2 = ph[1];
530
531	0	uint64_t k1 = pcarry[0];
532	0	uint64_t k2 = pcarry[1];
533
534	0	const uint8_t ptr = (uint8_t)key;
535	0	const uint8_t *end;
536
537		/* Extract carry count from low 4 bits of c value */
538	0	int n = k2 & 15;
539
540	0	#if defined(UNALIGNED_SAFE)
541		/* This CPU handles unaligned word access */
542		// #pragma message ( "UNALIGNED_SAFE" )
543		/* Consume any carry bytes */
544	0	int i = (16-n) & 15;
545	0	if(i && i <= len) {
546	0	dobytes128x64(i, h1, h2, k1, k2, n, ptr, len);
547	0	}
548
549		/* Process 128-bit chunks */
550	0	end = ptr + (len & ~15);
551	0	for( ; ptr < end ; ptr+=16) {
552	0	k1 = READ_UINT64(ptr, 0);
553	0	k2 = READ_UINT64(ptr, 1);
554	0	doblock128x64(h1, h2, k1, k2);
555	0	}
556
557		#else /UNALIGNED_SAFE/
558		/* This CPU does not handle unaligned word access */
559		// #pragma message ( "ALIGNED" )
560		/* Consume enough so that the next data byte is word aligned */
561		int i = -(intptr_t)(void *)ptr & 7;
562		if(i && i <= len) {
563		dobytes128x64(i, h1, h2, k1, k2, n, ptr, len);
564		}
565		/* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
566		end = ptr + (len & ~15);
567
568		switch(n) { /* how many bytes in c */
569		case 0: /*
570		k1=[--------] k2=[--------] w=[76543210 fedcba98] b=[76543210 fedcba98] */
571		for( ; ptr < end ; ptr+=16) {
572		k1 = READ_UINT64(ptr, 0);
573		k2 = READ_UINT64(ptr, 1);
574		doblock128x64(h1, h2, k1, k2);
575		}
576		break;
577		case 1: case 2: case 3: case 4: case 5: case 6: case 7: /*
578		k1=[10------] k2=[--------] w=[98765432 hgfedcba] b=[76543210 fedcba98] k1'=[hg------] */
579		{
580		const int lshift = n*8, rshift = 64-lshift;
581		for( ; ptr < end ; ptr+=16) {
582		uint64_t c = k1>>rshift;
583		k2 = READ_UINT64(ptr, 0);
584		c \|= k2<<lshift;
585		k1 = READ_UINT64(ptr, 1);
586		k2 = k2>>rshift \| k1<<lshift;
587		doblock128x64(h1, h2, c, k2);
588		}
589		}
590		break;
591		case 8: /*
592		k1=[76543210] k2=[--------] w=[fedcba98 nmlkjihg] b=[76543210 fedcba98] k1`=[nmlkjihg] */
593		for( ; ptr < end ; ptr+=16) {
594		k2 = READ_UINT64(ptr, 0);
595		doblock128x64(h1, h2, k1, k2);
596		k1 = READ_UINT64(ptr, 1);
597		}
598		break;
599		default: /* 8 < n <= 15
600		k1=[76543210] k2=[98------] w=[hgfedcba ponmlkji] b=[76543210 fedcba98] k1`=[nmlkjihg] k2`=[po------] */
601		{
602		const int lshift = n*8-64, rshift = 64-lshift;
603		for( ; ptr < end ; ptr+=16) {
604		uint64_t c = k2 >> rshift;
605		k2 = READ_UINT64(ptr, 0);
606		c \|= k2 << lshift;
607		doblock128x64(h1, h2, k1, c);
608		k1 = k2 >> rshift;
609		k2 = READ_UINT64(ptr, 1);
610		k1 \|= k2 << lshift;
611		}
612		}
613		}
614		#endif /UNALIGNED_SAFE/
615
616		/* Advance over whole 128-bit chunks, possibly leaving 1..15 bytes */
617	0	len -= len & ~15;
618
619		/* Append any remaining bytes into carry */
620	0	dobytes128x64(len, h1, h2, k1, k2, n, ptr, len);
621
622		/* Copy out new running hash and carry */
623	0	ph[0] = h1;
624	0	ph[1] = h2;
625	0	pcarry[0] = k1;
626	0	pcarry[1] = (k2 & ~0xff) \| n;
627	0	}
628
629		/---------------------------------------------------------------------------/
630
631		/* All in one go */
632
633		/* MurmurHash3_x64_128 api */
634		void PMurHash128x64(const void * key, const int len, uint32_t seed, void * out)
635	0	{
636	0	uint64_t carry[2] = {0, 0};
637	0	uint64_t h[2] = {seed, seed};
638	0	PMurHash128x64_Process(h, carry, key, len);
639	0	PMurHash128x64_Result(h, carry, (uint32_t) len, (uint64_t *) out);
640	0	}