/src/postgres/src/common/hashfn.c

Source (jump to first uncovered line)
/*-------------------------------------------------------------------------
 *
 * hashfn.c
 *    Generic hashing functions, and hash functions for use in dynahash.c
 *    hashtables
 *
 *
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *    src/common/hashfn.c
 *
 * NOTES
 *    It is expected that every bit of a hash function's 32-bit result is
 *    as random as every other; failure to ensure this is likely to lead
 *    to poor performance of hash tables.  In most cases a hash
 *    function should use hash_bytes() or its variant hash_bytes_uint32(),
 *    or the wrappers hash_any() and hash_uint32 defined in hashfn.h.
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "common/hashfn.h"
#include "port/pg_bitutils.h"


/*
 * This hash function was written by Bob Jenkins
 * (bob_jenkins@burtleburtle.net), and superficially adapted
 * for PostgreSQL by Neil Conway. For more information on this
 * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
 * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
 *
 * In the current code, we have adopted Bob's 2006 update of his hash
 * function to fetch the data a word at a time when it is suitably aligned.
 * This makes for a useful speedup, at the cost of having to maintain
 * four code paths (aligned vs unaligned, and little-endian vs big-endian).
 * It also uses two separate mixing functions mix() and final(), instead
 * of a slower multi-purpose function.
 */

/* Get a bit mask of the bits set in non-uint32 aligned addresses */
#define UINT32_ALIGN_MASK (sizeof(uint32) - 1)

#define rot(x,k) pg_rotate_left32(x, k)

/*----------
 * mix -- mix 3 32-bit values reversibly.
 *
 * This is reversible, so any information in (a,b,c) before mix() is
 * still in (a,b,c) after mix().
 *
 * If four pairs of (a,b,c) inputs are run through mix(), or through
 * mix() in reverse, there are at least 32 bits of the output that
 * are sometimes the same for one pair and different for another pair.
 * This was tested for:
 * * pairs that differed by one bit, by two bits, in any combination
 *   of top bits of (a,b,c), or in any combination of bottom bits of
 *   (a,b,c).
 * * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
 *   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
 *   is commonly produced by subtraction) look like a single 1-bit
 *   difference.
 * * the base values were pseudorandom, all zero but one bit set, or
 *   all zero plus a counter that starts at zero.
 *
 * This does not achieve avalanche.  There are input bits of (a,b,c)
 * that fail to affect some output bits of (a,b,c), especially of a.  The
 * most thoroughly mixed value is c, but it doesn't really even achieve
 * avalanche in c.
 *
 * This allows some parallelism.  Read-after-writes are good at doubling
 * the number of bits affected, so the goal of mixing pulls in the opposite
 * direction from the goal of parallelism.  I did what I could.  Rotates
 * seem to cost as much as shifts on every machine I could lay my hands on,
 * and rotates are much kinder to the top and bottom bits, so I used rotates.
 *----------
 */
#define mix(a,b,c) \
{ \
  a -= c;  a ^= rot(c, 4); c += b; \
  b -= a;  b ^= rot(a, 6); a += c; \
  c -= b;  c ^= rot(b, 8); b += a; \
  a -= c;  a ^= rot(c,16); c += b; \
  b -= a;  b ^= rot(a,19); a += c; \
  c -= b;  c ^= rot(b, 4); b += a; \
}

/*----------
 * final -- final mixing of 3 32-bit values (a,b,c) into c
 *
 * Pairs of (a,b,c) values differing in only a few bits will usually
 * produce values of c that look totally different.  This was tested for
 * * pairs that differed by one bit, by two bits, in any combination
 *   of top bits of (a,b,c), or in any combination of bottom bits of
 *   (a,b,c).
 * * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
 *   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
 *   is commonly produced by subtraction) look like a single 1-bit
 *   difference.
 * * the base values were pseudorandom, all zero but one bit set, or
 *   all zero plus a counter that starts at zero.
 *
 * The use of separate functions for mix() and final() allow for a
 * substantial performance increase since final() does not need to
 * do well in reverse, but is does need to affect all output bits.
 * mix(), on the other hand, does not need to affect all output
 * bits (affecting 32 bits is enough).  The original hash function had
 * a single mixing operation that had to satisfy both sets of requirements
 * and was slower as a result.
 *----------
 */
#define final(a,b,c) \
{ \
  c ^= b; c -= rot(b,14); \
  a ^= c; a -= rot(c,11); \
  b ^= a; b -= rot(a,25); \
  c ^= b; c -= rot(b,16); \
  a ^= c; a -= rot(c, 4); \
  b ^= a; b -= rot(a,14); \
  c ^= b; c -= rot(b,24); \
}

/*
 * hash_bytes() -- hash a variable-length key into a 32-bit value
 *    k   : the key (the unaligned variable-length array of bytes)
 *    len   : the length of the key, counting by bytes
 *
 * Returns a uint32 value.  Every bit of the key affects every bit of
 * the return value.  Every 1-bit and 2-bit delta achieves avalanche.
 * About 6*len+35 instructions. The best hash table sizes are powers
 * of 2.  There is no need to do mod a prime (mod is sooo slow!).
 * If you need less than 32 bits, use a bitmask.
 *
 * This procedure must never throw elog(ERROR); the ResourceOwner code
 * relies on this not to fail.
 *
 * Note: we could easily change this function to return a 64-bit hash value
 * by using the final values of both b and c.  b is perhaps a little less
 * well mixed than c, however.
 */
uint32
hash_bytes(const unsigned char *k, int keylen)
{
  uint32    a,
        b,
        c,
        len;

  /* Set up the internal state */
  len = keylen;
  a = b = c = 0x9e3779b9 + len + 3923095;

  /* If the source pointer is word-aligned, we use word-wide fetches */
  if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
  {
    /* Code path for aligned source data */
    const uint32 *ka = (const uint32 *) k;

    /* handle most of the key */
    while (len >= 12)
    {
      a += ka[0];
      b += ka[1];
      c += ka[2];
      mix(a, b, c);
      ka += 3;
      len -= 12;
    }

    /* handle the last 11 bytes */
    k = (const unsigned char *) ka;
#ifdef WORDS_BIGENDIAN
    switch (len)
    {
      case 11:
        c += ((uint32) k[10] << 8);
        /* fall through */
      case 10:
        c += ((uint32) k[9] << 16);
        /* fall through */
      case 9:
        c += ((uint32) k[8] << 24);
        /* fall through */
      case 8:
        /* the lowest byte of c is reserved for the length */
        b += ka[1];
        a += ka[0];
        break;
      case 7:
        b += ((uint32) k[6] << 8);
        /* fall through */
      case 6:
        b += ((uint32) k[5] << 16);
        /* fall through */
      case 5:
        b += ((uint32) k[4] << 24);
        /* fall through */
      case 4:
        a += ka[0];
        break;
      case 3:
        a += ((uint32) k[2] << 8);
        /* fall through */
      case 2:
        a += ((uint32) k[1] << 16);
        /* fall through */
      case 1:
        a += ((uint32) k[0] << 24);
        /* case 0: nothing left to add */
    }
#else             /* !WORDS_BIGENDIAN */
    switch (len)
    {
      case 11:
        c += ((uint32) k[10] << 24);
        /* fall through */
      case 10:
        c += ((uint32) k[9] << 16);
        /* fall through */
      case 9:
        c += ((uint32) k[8] << 8);
        /* fall through */
      case 8:
        /* the lowest byte of c is reserved for the length */
        b += ka[1];
        a += ka[0];
        break;
      case 7:
        b += ((uint32) k[6] << 16);
        /* fall through */
      case 6:
        b += ((uint32) k[5] << 8);
        /* fall through */
      case 5:
        b += k[4];
        /* fall through */
      case 4:
        a += ka[0];
        break;
      case 3:
        a += ((uint32) k[2] << 16);
        /* fall through */
      case 2:
        a += ((uint32) k[1] << 8);
        /* fall through */
      case 1:
        a += k[0];
        /* case 0: nothing left to add */
    }
#endif              /* WORDS_BIGENDIAN */
  }
  else
  {
    /* Code path for non-aligned source data */

    /* handle most of the key */
    while (len >= 12)
    {
#ifdef WORDS_BIGENDIAN
      a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
      b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
      c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
#else             /* !WORDS_BIGENDIAN */
      a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
      b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
      c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
#endif              /* WORDS_BIGENDIAN */
      mix(a, b, c);
      k += 12;
      len -= 12;
    }

    /* handle the last 11 bytes */
#ifdef WORDS_BIGENDIAN
    switch (len)
    {
      case 11:
        c += ((uint32) k[10] << 8);
        /* fall through */
      case 10:
        c += ((uint32) k[9] << 16);
        /* fall through */
      case 9:
        c += ((uint32) k[8] << 24);
        /* fall through */
      case 8:
        /* the lowest byte of c is reserved for the length */
        b += k[7];
        /* fall through */
      case 7:
        b += ((uint32) k[6] << 8);
        /* fall through */
      case 6:
        b += ((uint32) k[5] << 16);
        /* fall through */
      case 5:
        b += ((uint32) k[4] << 24);
        /* fall through */
      case 4:
        a += k[3];
        /* fall through */
      case 3:
        a += ((uint32) k[2] << 8);
        /* fall through */
      case 2:
        a += ((uint32) k[1] << 16);
        /* fall through */
      case 1:
        a += ((uint32) k[0] << 24);
        /* case 0: nothing left to add */
    }
#else             /* !WORDS_BIGENDIAN */
    switch (len)
    {
      case 11:
        c += ((uint32) k[10] << 24);
        /* fall through */
      case 10:
        c += ((uint32) k[9] << 16);
        /* fall through */
      case 9:
        c += ((uint32) k[8] << 8);
        /* fall through */
      case 8:
        /* the lowest byte of c is reserved for the length */
        b += ((uint32) k[7] << 24);
        /* fall through */
      case 7:
        b += ((uint32) k[6] << 16);
        /* fall through */
      case 6:
        b += ((uint32) k[5] << 8);
        /* fall through */
      case 5:
        b += k[4];
        /* fall through */
      case 4:
        a += ((uint32) k[3] << 24);
        /* fall through */
      case 3:
        a += ((uint32) k[2] << 16);
        /* fall through */
      case 2:
        a += ((uint32) k[1] << 8);
        /* fall through */
      case 1:
        a += k[0];
        /* case 0: nothing left to add */
    }
#endif              /* WORDS_BIGENDIAN */
  }

  final(a, b, c);

  /* report the result */
  return c;
}

/*
 * hash_bytes_extended() -- hash into a 64-bit value, using an optional seed
 *    k   : the key (the unaligned variable-length array of bytes)
 *    len   : the length of the key, counting by bytes
 *    seed  : a 64-bit seed (0 means no seed)
 *
 * Returns a uint64 value.  Otherwise similar to hash_bytes.
 */
uint64
hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed)
{
  uint32    a,
        b,
        c,
        len;

  /* Set up the internal state */
  len = keylen;
  a = b = c = 0x9e3779b9 + len + 3923095;

  /* If the seed is non-zero, use it to perturb the internal state. */
  if (seed != 0)
  {
    /*
     * In essence, the seed is treated as part of the data being hashed,
     * but for simplicity, we pretend that it's padded with four bytes of
     * zeroes so that the seed constitutes a 12-byte chunk.
     */
    a += (uint32) (seed >> 32);
    b += (uint32) seed;
    mix(a, b, c);
  }

  /* If the source pointer is word-aligned, we use word-wide fetches */
  if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
  {
    /* Code path for aligned source data */
    const uint32 *ka = (const uint32 *) k;

    /* handle most of the key */
    while (len >= 12)
    {
      a += ka[0];
      b += ka[1];
      c += ka[2];
      mix(a, b, c);
      ka += 3;
      len -= 12;
    }

    /* handle the last 11 bytes */
    k = (const unsigned char *) ka;
#ifdef WORDS_BIGENDIAN
    switch (len)
    {
      case 11:
        c += ((uint32) k[10] << 8);
        /* fall through */
      case 10:
        c += ((uint32) k[9] << 16);
        /* fall through */
      case 9:
        c += ((uint32) k[8] << 24);
        /* fall through */
      case 8:
        /* the lowest byte of c is reserved for the length */
        b += ka[1];
        a += ka[0];
        break;
      case 7:
        b += ((uint32) k[6] << 8);
        /* fall through */
      case 6:
        b += ((uint32) k[5] << 16);
        /* fall through */
      case 5:
        b += ((uint32) k[4] << 24);
        /* fall through */
      case 4:
        a += ka[0];
        break;
      case 3:
        a += ((uint32) k[2] << 8);
        /* fall through */
      case 2:
        a += ((uint32) k[1] << 16);
        /* fall through */
      case 1:
        a += ((uint32) k[0] << 24);
        /* case 0: nothing left to add */
    }
#else             /* !WORDS_BIGENDIAN */
    switch (len)
    {
      case 11:
        c += ((uint32) k[10] << 24);
        /* fall through */
      case 10:
        c += ((uint32) k[9] << 16);
        /* fall through */
      case 9:
        c += ((uint32) k[8] << 8);
        /* fall through */
      case 8:
        /* the lowest byte of c is reserved for the length */
        b += ka[1];
        a += ka[0];
        break;
      case 7:
        b += ((uint32) k[6] << 16);
        /* fall through */
      case 6:
        b += ((uint32) k[5] << 8);
        /* fall through */
      case 5:
        b += k[4];
        /* fall through */
      case 4:
        a += ka[0];
        break;
      case 3:
        a += ((uint32) k[2] << 16);
        /* fall through */
      case 2:
        a += ((uint32) k[1] << 8);
        /* fall through */
      case 1:
        a += k[0];
        /* case 0: nothing left to add */
    }
#endif              /* WORDS_BIGENDIAN */
  }
  else
  {
    /* Code path for non-aligned source data */

    /* handle most of the key */
    while (len >= 12)
    {
#ifdef WORDS_BIGENDIAN
      a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
      b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
      c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
#else             /* !WORDS_BIGENDIAN */
      a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
      b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
      c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
#endif              /* WORDS_BIGENDIAN */
      mix(a, b, c);
      k += 12;
      len -= 12;
    }

    /* handle the last 11 bytes */
#ifdef WORDS_BIGENDIAN
    switch (len)
    {
      case 11:
        c += ((uint32) k[10] << 8);
        /* fall through */
      case 10:
        c += ((uint32) k[9] << 16);
        /* fall through */
      case 9:
        c += ((uint32) k[8] << 24);
        /* fall through */
      case 8:
        /* the lowest byte of c is reserved for the length */
        b += k[7];
        /* fall through */
      case 7:
        b += ((uint32) k[6] << 8);
        /* fall through */
      case 6:
        b += ((uint32) k[5] << 16);
        /* fall through */
      case 5:
        b += ((uint32) k[4] << 24);
        /* fall through */
      case 4:
        a += k[3];
        /* fall through */
      case 3:
        a += ((uint32) k[2] << 8);
        /* fall through */
      case 2:
        a += ((uint32) k[1] << 16);
        /* fall through */
      case 1:
        a += ((uint32) k[0] << 24);
        /* case 0: nothing left to add */
    }
#else             /* !WORDS_BIGENDIAN */
    switch (len)
    {
      case 11:
        c += ((uint32) k[10] << 24);
        /* fall through */
      case 10:
        c += ((uint32) k[9] << 16);
        /* fall through */
      case 9:
        c += ((uint32) k[8] << 8);
        /* fall through */
      case 8:
        /* the lowest byte of c is reserved for the length */
        b += ((uint32) k[7] << 24);
        /* fall through */
      case 7:
        b += ((uint32) k[6] << 16);
        /* fall through */
      case 6:
        b += ((uint32) k[5] << 8);
        /* fall through */
      case 5:
        b += k[4];
        /* fall through */
      case 4:
        a += ((uint32) k[3] << 24);
        /* fall through */
      case 3:
        a += ((uint32) k[2] << 16);
        /* fall through */
      case 2:
        a += ((uint32) k[1] << 8);
        /* fall through */
      case 1:
        a += k[0];
        /* case 0: nothing left to add */
    }
#endif              /* WORDS_BIGENDIAN */
  }

  final(a, b, c);

  /* report the result */
  return ((uint64) b << 32) | c;
}

/*
 * hash_bytes_uint32() -- hash a 32-bit value to a 32-bit value
 *
 * This has the same result as
 *    hash_bytes(&k, sizeof(uint32))
 * but is faster and doesn't force the caller to store k into memory.
 */
uint32
hash_bytes_uint32(uint32 k)
{
  uint32    a,
        b,
        c;

  a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
  a += k;

  final(a, b, c);

  /* report the result */
  return c;
}

/*
 * hash_bytes_uint32_extended() -- hash 32-bit value to 64-bit value, with seed
 *
 * Like hash_bytes_uint32, this is a convenience function.
 */
uint64
hash_bytes_uint32_extended(uint32 k, uint64 seed)
{
  uint32    a,
        b,
        c;

  a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;

  if (seed != 0)
  {
    a += (uint32) (seed >> 32);
    b += (uint32) seed;
    mix(a, b, c);
  }

  a += k;

  final(a, b, c);

  /* report the result */
  return ((uint64) b << 32) | c;
}

/*
 * string_hash: hash function for keys that are NUL-terminated strings.
 *
 * NOTE: this is the default hash function if none is specified.
 */
uint32
string_hash(const void *key, Size keysize)
{
  /*
   * If the string exceeds keysize-1 bytes, we want to hash only that many,
   * because when it is copied into the hash table it will be truncated at
   * that length.
   */
  Size    s_len = strlen((const char *) key);

  s_len = Min(s_len, keysize - 1);
  return hash_bytes((const unsigned char *) key, (int) s_len);
}

/*
 * tag_hash: hash function for fixed-size tag values
 */
uint32
tag_hash(const void *key, Size keysize)
{
  return hash_bytes((const unsigned char *) key, (int) keysize);
}

/*
 * uint32_hash: hash function for keys that are uint32 or int32
 *
 * (tag_hash works for this case too, but is slower)
 */
uint32
uint32_hash(const void *key, Size keysize)
{
  Assert(keysize == sizeof(uint32));
  return hash_bytes_uint32(*((const uint32 *) key));
}

Coverage Report

Created: 2025-08-12 06:43

Line	Count	Source (jump to first uncovered line)
1		/*-------------------------------------------------------------------------
2		*
3		* hashfn.c
4		* Generic hashing functions, and hash functions for use in dynahash.c
5		* hashtables
6		*
7		*
8		* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
9		* Portions Copyright (c) 1994, Regents of the University of California
10		*
11		*
12		* IDENTIFICATION
13		* src/common/hashfn.c
14		*
15		* NOTES
16		* It is expected that every bit of a hash function's 32-bit result is
17		* as random as every other; failure to ensure this is likely to lead
18		* to poor performance of hash tables. In most cases a hash
19		* function should use hash_bytes() or its variant hash_bytes_uint32(),
20		* or the wrappers hash_any() and hash_uint32 defined in hashfn.h.
21		*
22		*-------------------------------------------------------------------------
23		*/
24		#include "postgres.h"
25
26		#include "common/hashfn.h"
27		#include "port/pg_bitutils.h"
28
29
30		/*
31		* This hash function was written by Bob Jenkins
32		* (bob_jenkins@burtleburtle.net), and superficially adapted
33		* for PostgreSQL by Neil Conway. For more information on this
34		* hash function, see http://burtleburtle.net/bob/hash/doobs.html,
35		* or Bob's article in Dr. Dobb's Journal, Sept. 1997.
36		*
37		* In the current code, we have adopted Bob's 2006 update of his hash
38		* function to fetch the data a word at a time when it is suitably aligned.
39		* This makes for a useful speedup, at the cost of having to maintain
40		* four code paths (aligned vs unaligned, and little-endian vs big-endian).
41		* It also uses two separate mixing functions mix() and final(), instead
42		* of a slower multi-purpose function.
43		*/
44
45		/* Get a bit mask of the bits set in non-uint32 aligned addresses */
46	8	#define UINT32_ALIGN_MASK (sizeof(uint32) - 1)
47
48	56	#define rot(x,k) pg_rotate_left32(x, k)
49
50		/*----------
51		* mix -- mix 3 32-bit values reversibly.
52		*
53		* This is reversible, so any information in (a,b,c) before mix() is
54		* still in (a,b,c) after mix().
55		*
56		* If four pairs of (a,b,c) inputs are run through mix(), or through
57		* mix() in reverse, there are at least 32 bits of the output that
58		* are sometimes the same for one pair and different for another pair.
59		* This was tested for:
60		* * pairs that differed by one bit, by two bits, in any combination
61		* of top bits of (a,b,c), or in any combination of bottom bits of
62		* (a,b,c).
63		* * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
64		* the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
65		* is commonly produced by subtraction) look like a single 1-bit
66		* difference.
67		* * the base values were pseudorandom, all zero but one bit set, or
68		* all zero plus a counter that starts at zero.
69		*
70		* This does not achieve avalanche. There are input bits of (a,b,c)
71		* that fail to affect some output bits of (a,b,c), especially of a. The
72		* most thoroughly mixed value is c, but it doesn't really even achieve
73		* avalanche in c.
74		*
75		* This allows some parallelism. Read-after-writes are good at doubling
76		* the number of bits affected, so the goal of mixing pulls in the opposite
77		* direction from the goal of parallelism. I did what I could. Rotates
78		* seem to cost as much as shifts on every machine I could lay my hands on,
79		* and rotates are much kinder to the top and bottom bits, so I used rotates.
80		*----------
81		*/
82	0	#define mix(a,b,c) \
83	0	{ \
84	0	a -= c; a ^= rot(c, 4); c += b; \
85	0	b -= a; b ^= rot(a, 6); a += c; \
86	0	c -= b; c ^= rot(b, 8); b += a; \
87	0	a -= c; a ^= rot(c,16); c += b; \
88	0	b -= a; b ^= rot(a,19); a += c; \
89	0	c -= b; c ^= rot(b, 4); b += a; \
90	0	}
91
92		/*----------
93		* final -- final mixing of 3 32-bit values (a,b,c) into c
94		*
95		* Pairs of (a,b,c) values differing in only a few bits will usually
96		* produce values of c that look totally different. This was tested for
97		* * pairs that differed by one bit, by two bits, in any combination
98		* of top bits of (a,b,c), or in any combination of bottom bits of
99		* (a,b,c).
100		* * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
101		* the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
102		* is commonly produced by subtraction) look like a single 1-bit
103		* difference.
104		* * the base values were pseudorandom, all zero but one bit set, or
105		* all zero plus a counter that starts at zero.
106		*
107		* The use of separate functions for mix() and final() allow for a
108		* substantial performance increase since final() does not need to
109		* do well in reverse, but is does need to affect all output bits.
110		* mix(), on the other hand, does not need to affect all output
111		* bits (affecting 32 bits is enough). The original hash function had
112		* a single mixing operation that had to satisfy both sets of requirements
113		* and was slower as a result.
114		*----------
115		*/
116	8	#define final(a,b,c) \
117	8	{ \
118	8	c ^= b; c -= rot(b,14); \
119	8	a ^= c; a -= rot(c,11); \
120	8	b ^= a; b -= rot(a,25); \
121	8	c ^= b; c -= rot(b,16); \
122	8	a ^= c; a -= rot(c, 4); \
123	8	b ^= a; b -= rot(a,14); \
124	8	c ^= b; c -= rot(b,24); \
125	8	}
126
127		/*
128		* hash_bytes() -- hash a variable-length key into a 32-bit value
129		* k : the key (the unaligned variable-length array of bytes)
130		* len : the length of the key, counting by bytes
131		*
132		* Returns a uint32 value. Every bit of the key affects every bit of
133		* the return value. Every 1-bit and 2-bit delta achieves avalanche.
134		* About 6*len+35 instructions. The best hash table sizes are powers
135		* of 2. There is no need to do mod a prime (mod is sooo slow!).
136		* If you need less than 32 bits, use a bitmask.
137		*
138		* This procedure must never throw elog(ERROR); the ResourceOwner code
139		* relies on this not to fail.
140		*
141		* Note: we could easily change this function to return a 64-bit hash value
142		* by using the final values of both b and c. b is perhaps a little less
143		* well mixed than c, however.
144		*/
145		uint32
146		hash_bytes(const unsigned char *k, int keylen)
147	8	{
148	8	uint32 a,
149	8	b,
150	8	c,
151	8	len;
152
153		/* Set up the internal state */
154	8	len = keylen;
155	8	a = b = c = 0x9e3779b9 + len + 3923095;
156
157		/* If the source pointer is word-aligned, we use word-wide fetches */
158	8	if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
159	8	{
160		/* Code path for aligned source data */
161	8	const uint32 ka = (const uint32 ) k;
162
163		/* handle most of the key */
164	8	while (len >= 12)
165	0	{
166	0	a += ka[0];
167	0	b += ka[1];
168	0	c += ka[2];
169	0	mix(a, b, c);
170	0	ka += 3;
171	0	len -= 12;
172	0	}
173
174		/* handle the last 11 bytes */
175	8	k = (const unsigned char *) ka;
176		#ifdef WORDS_BIGENDIAN
177		switch (len)
178		{
179		case 11:
180		c += ((uint32) k[10] << 8);
181		/* fall through */
182		case 10:
183		c += ((uint32) k[9] << 16);
184		/* fall through */
185		case 9:
186		c += ((uint32) k[8] << 24);
187		/* fall through */
188		case 8:
189		/* the lowest byte of c is reserved for the length */
190		b += ka[1];
191		a += ka[0];
192		break;
193		case 7:
194		b += ((uint32) k[6] << 8);
195		/* fall through */
196		case 6:
197		b += ((uint32) k[5] << 16);
198		/* fall through */
199		case 5:
200		b += ((uint32) k[4] << 24);
201		/* fall through */
202		case 4:
203		a += ka[0];
204		break;
205		case 3:
206		a += ((uint32) k[2] << 8);
207		/* fall through */
208		case 2:
209		a += ((uint32) k[1] << 16);
210		/* fall through */
211		case 1:
212		a += ((uint32) k[0] << 24);
213		/* case 0: nothing left to add */
214		}
215		#else /* !WORDS_BIGENDIAN */
216	8	switch (len)
217	8	{
218	0	case 11:
219	0	c += ((uint32) k[10] << 24);
220		/* fall through */
221	0	case 10:
222	0	c += ((uint32) k[9] << 16);
223		/* fall through */
224	0	case 9:
225	0	c += ((uint32) k[8] << 8);
226		/* fall through */
227	0	case 8:
228		/* the lowest byte of c is reserved for the length */
229	0	b += ka[1];
230	0	a += ka[0];
231	0	break;
232	0	case 7:
233	0	b += ((uint32) k[6] << 16);
234		/* fall through */
235	0	case 6:
236	0	b += ((uint32) k[5] << 8);
237		/* fall through */
238	0	case 5:
239	0	b += k[4];
240		/* fall through */
241	0	case 4:
242	0	a += ka[0];
243	0	break;
244	8	case 3:
245	8	a += ((uint32) k[2] << 16);
246		/* fall through */
247	8	case 2:
248	8	a += ((uint32) k[1] << 8);
249		/* fall through */
250	8	case 1:
251	8	a += k[0];
252		/* case 0: nothing left to add */
253	8	}
254	8	#endif /* WORDS_BIGENDIAN */
255	8	}
256	0	else
257	0	{
258		/* Code path for non-aligned source data */
259
260		/* handle most of the key */
261	0	while (len >= 12)
262	0	{
263		#ifdef WORDS_BIGENDIAN
264		a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
265		b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
266		c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
267		#else /* !WORDS_BIGENDIAN */
268	0	a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
269	0	b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
270	0	c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
271	0	#endif /* WORDS_BIGENDIAN */
272	0	mix(a, b, c);
273	0	k += 12;
274	0	len -= 12;
275	0	}
276
277		/* handle the last 11 bytes */
278		#ifdef WORDS_BIGENDIAN
279		switch (len)
280		{
281		case 11:
282		c += ((uint32) k[10] << 8);
283		/* fall through */
284		case 10:
285		c += ((uint32) k[9] << 16);
286		/* fall through */
287		case 9:
288		c += ((uint32) k[8] << 24);
289		/* fall through */
290		case 8:
291		/* the lowest byte of c is reserved for the length */
292		b += k[7];
293		/* fall through */
294		case 7:
295		b += ((uint32) k[6] << 8);
296		/* fall through */
297		case 6:
298		b += ((uint32) k[5] << 16);
299		/* fall through */
300		case 5:
301		b += ((uint32) k[4] << 24);
302		/* fall through */
303		case 4:
304		a += k[3];
305		/* fall through */
306		case 3:
307		a += ((uint32) k[2] << 8);
308		/* fall through */
309		case 2:
310		a += ((uint32) k[1] << 16);
311		/* fall through */
312		case 1:
313		a += ((uint32) k[0] << 24);
314		/* case 0: nothing left to add */
315		}
316		#else /* !WORDS_BIGENDIAN */
317	0	switch (len)
318	0	{
319	0	case 11:
320	0	c += ((uint32) k[10] << 24);
321		/* fall through */
322	0	case 10:
323	0	c += ((uint32) k[9] << 16);
324		/* fall through */
325	0	case 9:
326	0	c += ((uint32) k[8] << 8);
327		/* fall through */
328	0	case 8:
329		/* the lowest byte of c is reserved for the length */
330	0	b += ((uint32) k[7] << 24);
331		/* fall through */
332	0	case 7:
333	0	b += ((uint32) k[6] << 16);
334		/* fall through */
335	0	case 6:
336	0	b += ((uint32) k[5] << 8);
337		/* fall through */
338	0	case 5:
339	0	b += k[4];
340		/* fall through */
341	0	case 4:
342	0	a += ((uint32) k[3] << 24);
343		/* fall through */
344	0	case 3:
345	0	a += ((uint32) k[2] << 16);
346		/* fall through */
347	0	case 2:
348	0	a += ((uint32) k[1] << 8);
349		/* fall through */
350	0	case 1:
351	0	a += k[0];
352		/* case 0: nothing left to add */
353	0	}
354	0	#endif /* WORDS_BIGENDIAN */
355	0	}
356
357	8	final(a, b, c);
358
359		/* report the result */
360	8	return c;
361	8	}
362
363		/*
364		* hash_bytes_extended() -- hash into a 64-bit value, using an optional seed
365		* k : the key (the unaligned variable-length array of bytes)
366		* len : the length of the key, counting by bytes
367		* seed : a 64-bit seed (0 means no seed)
368		*
369		* Returns a uint64 value. Otherwise similar to hash_bytes.
370		*/
371		uint64
372		hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed)
373	0	{
374	0	uint32 a,
375	0	b,
376	0	c,
377	0	len;
378
379		/* Set up the internal state */
380	0	len = keylen;
381	0	a = b = c = 0x9e3779b9 + len + 3923095;
382
383		/* If the seed is non-zero, use it to perturb the internal state. */
384	0	if (seed != 0)
385	0	{
386		/*
387		* In essence, the seed is treated as part of the data being hashed,
388		* but for simplicity, we pretend that it's padded with four bytes of
389		* zeroes so that the seed constitutes a 12-byte chunk.
390		*/
391	0	a += (uint32) (seed >> 32);
392	0	b += (uint32) seed;
393	0	mix(a, b, c);
394	0	}
395
396		/* If the source pointer is word-aligned, we use word-wide fetches */
397	0	if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
398	0	{
399		/* Code path for aligned source data */
400	0	const uint32 ka = (const uint32 ) k;
401
402		/* handle most of the key */
403	0	while (len >= 12)
404	0	{
405	0	a += ka[0];
406	0	b += ka[1];
407	0	c += ka[2];
408	0	mix(a, b, c);
409	0	ka += 3;
410	0	len -= 12;
411	0	}
412
413		/* handle the last 11 bytes */
414	0	k = (const unsigned char *) ka;
415		#ifdef WORDS_BIGENDIAN
416		switch (len)
417		{
418		case 11:
419		c += ((uint32) k[10] << 8);
420		/* fall through */
421		case 10:
422		c += ((uint32) k[9] << 16);
423		/* fall through */
424		case 9:
425		c += ((uint32) k[8] << 24);
426		/* fall through */
427		case 8:
428		/* the lowest byte of c is reserved for the length */
429		b += ka[1];
430		a += ka[0];
431		break;
432		case 7:
433		b += ((uint32) k[6] << 8);
434		/* fall through */
435		case 6:
436		b += ((uint32) k[5] << 16);
437		/* fall through */
438		case 5:
439		b += ((uint32) k[4] << 24);
440		/* fall through */
441		case 4:
442		a += ka[0];
443		break;
444		case 3:
445		a += ((uint32) k[2] << 8);
446		/* fall through */
447		case 2:
448		a += ((uint32) k[1] << 16);
449		/* fall through */
450		case 1:
451		a += ((uint32) k[0] << 24);
452		/* case 0: nothing left to add */
453		}
454		#else /* !WORDS_BIGENDIAN */
455	0	switch (len)
456	0	{
457	0	case 11:
458	0	c += ((uint32) k[10] << 24);
459		/* fall through */
460	0	case 10:
461	0	c += ((uint32) k[9] << 16);
462		/* fall through */
463	0	case 9:
464	0	c += ((uint32) k[8] << 8);
465		/* fall through */
466	0	case 8:
467		/* the lowest byte of c is reserved for the length */
468	0	b += ka[1];
469	0	a += ka[0];
470	0	break;
471	0	case 7:
472	0	b += ((uint32) k[6] << 16);
473		/* fall through */
474	0	case 6:
475	0	b += ((uint32) k[5] << 8);
476		/* fall through */
477	0	case 5:
478	0	b += k[4];
479		/* fall through */
480	0	case 4:
481	0	a += ka[0];
482	0	break;
483	0	case 3:
484	0	a += ((uint32) k[2] << 16);
485		/* fall through */
486	0	case 2:
487	0	a += ((uint32) k[1] << 8);
488		/* fall through */
489	0	case 1:
490	0	a += k[0];
491		/* case 0: nothing left to add */
492	0	}
493	0	#endif /* WORDS_BIGENDIAN */
494	0	}
495	0	else
496	0	{
497		/* Code path for non-aligned source data */
498
499		/* handle most of the key */
500	0	while (len >= 12)
501	0	{
502		#ifdef WORDS_BIGENDIAN
503		a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
504		b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
505		c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
506		#else /* !WORDS_BIGENDIAN */
507	0	a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
508	0	b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
509	0	c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
510	0	#endif /* WORDS_BIGENDIAN */
511	0	mix(a, b, c);
512	0	k += 12;
513	0	len -= 12;
514	0	}
515
516		/* handle the last 11 bytes */
517		#ifdef WORDS_BIGENDIAN
518		switch (len)
519		{
520		case 11:
521		c += ((uint32) k[10] << 8);
522		/* fall through */
523		case 10:
524		c += ((uint32) k[9] << 16);
525		/* fall through */
526		case 9:
527		c += ((uint32) k[8] << 24);
528		/* fall through */
529		case 8:
530		/* the lowest byte of c is reserved for the length */
531		b += k[7];
532		/* fall through */
533		case 7:
534		b += ((uint32) k[6] << 8);
535		/* fall through */
536		case 6:
537		b += ((uint32) k[5] << 16);
538		/* fall through */
539		case 5:
540		b += ((uint32) k[4] << 24);
541		/* fall through */
542		case 4:
543		a += k[3];
544		/* fall through */
545		case 3:
546		a += ((uint32) k[2] << 8);
547		/* fall through */
548		case 2:
549		a += ((uint32) k[1] << 16);
550		/* fall through */
551		case 1:
552		a += ((uint32) k[0] << 24);
553		/* case 0: nothing left to add */
554		}
555		#else /* !WORDS_BIGENDIAN */
556	0	switch (len)
557	0	{
558	0	case 11:
559	0	c += ((uint32) k[10] << 24);
560		/* fall through */
561	0	case 10:
562	0	c += ((uint32) k[9] << 16);
563		/* fall through */
564	0	case 9:
565	0	c += ((uint32) k[8] << 8);
566		/* fall through */
567	0	case 8:
568		/* the lowest byte of c is reserved for the length */
569	0	b += ((uint32) k[7] << 24);
570		/* fall through */
571	0	case 7:
572	0	b += ((uint32) k[6] << 16);
573		/* fall through */
574	0	case 6:
575	0	b += ((uint32) k[5] << 8);
576		/* fall through */
577	0	case 5:
578	0	b += k[4];
579		/* fall through */
580	0	case 4:
581	0	a += ((uint32) k[3] << 24);
582		/* fall through */
583	0	case 3:
584	0	a += ((uint32) k[2] << 16);
585		/* fall through */
586	0	case 2:
587	0	a += ((uint32) k[1] << 8);
588		/* fall through */
589	0	case 1:
590	0	a += k[0];
591		/* case 0: nothing left to add */
592	0	}
593	0	#endif /* WORDS_BIGENDIAN */
594	0	}
595
596	0	final(a, b, c);
597
598		/* report the result */
599	0	return ((uint64) b << 32) \| c;
600	0	}
601
602		/*
603		* hash_bytes_uint32() -- hash a 32-bit value to a 32-bit value
604		*
605		* This has the same result as
606		* hash_bytes(&k, sizeof(uint32))
607		* but is faster and doesn't force the caller to store k into memory.
608		*/
609		uint32
610		hash_bytes_uint32(uint32 k)
611	0	{
612	0	uint32 a,
613	0	b,
614	0	c;
615
616	0	a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
617	0	a += k;
618
619	0	final(a, b, c);
620
621		/* report the result */
622	0	return c;
623	0	}
624
625		/*
626		* hash_bytes_uint32_extended() -- hash 32-bit value to 64-bit value, with seed
627		*
628		* Like hash_bytes_uint32, this is a convenience function.
629		*/
630		uint64
631		hash_bytes_uint32_extended(uint32 k, uint64 seed)
632	0	{
633	0	uint32 a,
634	0	b,
635	0	c;
636
637	0	a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
638
639	0	if (seed != 0)
640	0	{
641	0	a += (uint32) (seed >> 32);
642	0	b += (uint32) seed;
643	0	mix(a, b, c);
644	0	}
645
646	0	a += k;
647
648	0	final(a, b, c);
649
650		/* report the result */
651	0	return ((uint64) b << 32) \| c;
652	0	}
653
654		/*
655		* string_hash: hash function for keys that are NUL-terminated strings.
656		*
657		* NOTE: this is the default hash function if none is specified.
658		*/
659		uint32
660		string_hash(const void *key, Size keysize)
661	8	{
662		/*
663		* If the string exceeds keysize-1 bytes, we want to hash only that many,
664		* because when it is copied into the hash table it will be truncated at
665		* that length.
666		*/
667	8	Size s_len = strlen((const char *) key);
668
669	8	s_len = Min(s_len, keysize - 1);
670	8	return hash_bytes((const unsigned char *) key, (int) s_len);
671	8	}
672
673		/*
674		* tag_hash: hash function for fixed-size tag values
675		*/
676		uint32
677		tag_hash(const void *key, Size keysize)
678	0	{
679	0	return hash_bytes((const unsigned char *) key, (int) keysize);
680	0	}
681
682		/*
683		* uint32_hash: hash function for keys that are uint32 or int32
684		*
685		* (tag_hash works for this case too, but is slower)
686		*/
687		uint32
688		uint32_hash(const void *key, Size keysize)
689	0	{
690	0	Assert(keysize == sizeof(uint32));
691	0	return hash_bytes_uint32(((const uint32 ) key));
692	0	}