/src/cpython/Python/pyhash.c

Source (jump to first uncovered line)
/* Set of hash utility functions to help maintaining the invariant that
    if a==b then hash(a)==hash(b)

   All the utility functions (_Py_Hash*()) return "-1" to signify an error.
*/
#include "Python.h"
#include "pycore_pyhash.h"        // _Py_HashSecret_t

#ifdef __APPLE__
#  include <libkern/OSByteOrder.h>
#elif defined(HAVE_LE64TOH) && defined(HAVE_ENDIAN_H)
#  include <endian.h>
#elif defined(HAVE_LE64TOH) && defined(HAVE_SYS_ENDIAN_H)
#  include <sys/endian.h>
#endif

_Py_HashSecret_t _Py_HashSecret = {{0}};

#if Py_HASH_ALGORITHM == Py_HASH_EXTERNAL
extern PyHash_FuncDef PyHash_Func;
#else
static PyHash_FuncDef PyHash_Func;
#endif

/* Count Py_HashBuffer() calls */
#ifdef Py_HASH_STATS
#define Py_HASH_STATS_MAX 32
static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
#endif

/* For numeric types, the hash of a number x is based on the reduction
   of x modulo the prime P = 2**_PyHASH_BITS - 1.  It's designed so that
   hash(x) == hash(y) whenever x and y are numerically equal, even if
   x and y have different types.

   A quick summary of the hashing strategy:

   (1) First define the 'reduction of x modulo P' for any rational
   number x; this is a standard extension of the usual notion of
   reduction modulo P for integers.  If x == p/q (written in lowest
   terms), the reduction is interpreted as the reduction of p times
   the inverse of the reduction of q, all modulo P; if q is exactly
   divisible by P then define the reduction to be infinity.  So we've
   got a well-defined map

      reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.

   (2) Now for a rational number x, define hash(x) by:

      reduce(x)   if x >= 0
      -reduce(-x) if x < 0

   If the result of the reduction is infinity (this is impossible for
   integers, floats and Decimals) then use the predefined hash value
   _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
   _PyHASH_INF and -_PyHASH_INF are also used for the
   hashes of float and Decimal infinities.

   NaNs hash with a pointer hash.  Having distinct hash values prevents
   catastrophic pileups from distinct NaN instances which used to always
   have the same hash value but would compare unequal.

   A selling point for the above strategy is that it makes it possible
   to compute hashes of decimal and binary floating-point numbers
   efficiently, even if the exponent of the binary or decimal number
   is large.  The key point is that

      reduce(x * y) == reduce(x) * reduce(y) (modulo _PyHASH_MODULUS)

   provided that {reduce(x), reduce(y)} != {0, infinity}.  The reduction of a
   binary or decimal float is never infinity, since the denominator is a power
   of 2 (for binary) or a divisor of a power of 10 (for decimal).  So we have,
   for nonnegative x,

      reduce(x * 2**e) == reduce(x) * reduce(2**e) % _PyHASH_MODULUS

      reduce(x * 10**e) == reduce(x) * reduce(10**e) % _PyHASH_MODULUS

   and reduce(10**e) can be computed efficiently by the usual modular
   exponentiation algorithm.  For reduce(2**e) it's even better: since
   P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication
   by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits.

   */

Py_hash_t
_Py_HashDouble(PyObject *inst, double v)
{
    int e, sign;
    double m;
    Py_uhash_t x, y;

    if (!isfinite(v)) {
        if (isinf(v))
            return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
        else
            return PyObject_GenericHash(inst);
    }

    m = frexp(v, &e);

    sign = 1;
    if (m < 0) {
        sign = -1;
        m = -m;
    }

    /* process 28 bits at a time;  this should work well both for binary
       and hexadecimal floating point. */
    x = 0;
    while (m) {
        x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28);
        m *= 268435456.0;  /* 2**28 */
        e -= 28;
        y = (Py_uhash_t)m;  /* pull out integer part */
        m -= y;
        x += y;
        if (x >= _PyHASH_MODULUS)
            x -= _PyHASH_MODULUS;
    }

    /* adjust for the exponent;  first reduce it modulo _PyHASH_BITS */
    e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
    x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e);

    x = x * sign;
    if (x == (Py_uhash_t)-1)
        x = (Py_uhash_t)-2;
    return (Py_hash_t)x;
}

Py_hash_t
Py_HashPointer(const void *ptr)
{
    Py_hash_t hash = _Py_HashPointerRaw(ptr);
    if (hash == -1) {
        hash = -2;
    }
    return hash;
}

Py_hash_t
PyObject_GenericHash(PyObject *obj)
{
    return Py_HashPointer(obj);
}

Py_hash_t
Py_HashBuffer(const void *ptr, Py_ssize_t len)
{
    /*
      We make the hash of the empty string be 0, rather than using
      (prefix ^ suffix), since this slightly obfuscates the hash secret
    */
    if (len == 0) {
        return 0;
    }

#ifdef Py_HASH_STATS
    hashstats[(len <= Py_HASH_STATS_MAX) ? len : 0]++;
#endif

    Py_hash_t x;
#if Py_HASH_CUTOFF > 0
    if (len < Py_HASH_CUTOFF) {
        /* Optimize hashing of very small strings with inline DJBX33A. */
        Py_uhash_t hash;
        const unsigned char *p = ptr;
        hash = 5381; /* DJBX33A starts with 5381 */

        switch(len) {
            /* ((hash << 5) + hash) + *p == hash * 33 + *p */
            case 7: hash = ((hash << 5) + hash) + *p++; _Py_FALLTHROUGH;
            case 6: hash = ((hash << 5) + hash) + *p++; _Py_FALLTHROUGH;
            case 5: hash = ((hash << 5) + hash) + *p++; _Py_FALLTHROUGH;
            case 4: hash = ((hash << 5) + hash) + *p++; _Py_FALLTHROUGH;
            case 3: hash = ((hash << 5) + hash) + *p++; _Py_FALLTHROUGH;
            case 2: hash = ((hash << 5) + hash) + *p++; _Py_FALLTHROUGH;
            case 1: hash = ((hash << 5) + hash) + *p++; break;
            default:
                Py_UNREACHABLE();
        }
        hash ^= len;
        hash ^= (Py_uhash_t) _Py_HashSecret.djbx33a.suffix;
        x = (Py_hash_t)hash;
    }
    else
#endif /* Py_HASH_CUTOFF */
    {
        x = PyHash_Func.hash(ptr, len);
    }

    if (x == -1) {
        return -2;
    }
    return x;
}

void
_PyHash_Fini(void)
{
#ifdef Py_HASH_STATS
    fprintf(stderr, "len   calls    total\n");
    Py_ssize_t total = 0;
    for (int i = 1; i <= Py_HASH_STATS_MAX; i++) {
        total += hashstats[i];
        fprintf(stderr, "%2i %8zd %8zd\n", i, hashstats[i], total);
    }
    total += hashstats[0];
    fprintf(stderr, ">  %8zd %8zd\n", hashstats[0], total);
#endif
}

PyHash_FuncDef *
PyHash_GetFuncDef(void)
{
    return &PyHash_Func;
}

/* Optimized memcpy() for Windows */
#ifdef _MSC_VER
#  if SIZEOF_PY_UHASH_T == 4
#    define PY_UHASH_CPY(dst, src) do {                                    \
       dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
       } while(0)
#  elif SIZEOF_PY_UHASH_T == 8
#    define PY_UHASH_CPY(dst, src) do {                                    \
       dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
       dst[4] = src[4]; dst[5] = src[5]; dst[6] = src[6]; dst[7] = src[7]; \
       } while(0)
#  else
#    error SIZEOF_PY_UHASH_T must be 4 or 8
#  endif /* SIZEOF_PY_UHASH_T */
#else /* not Windows */
#  define PY_UHASH_CPY(dst, src) memcpy(dst, src, SIZEOF_PY_UHASH_T)
#endif /* _MSC_VER */


#if Py_HASH_ALGORITHM == Py_HASH_FNV
/* **************************************************************************
 * Modified Fowler-Noll-Vo (FNV) hash function
 */
static Py_hash_t
fnv(const void *src, Py_ssize_t len)
{
    const unsigned char *p = src;
    Py_uhash_t x;
    Py_ssize_t remainder, blocks;
    union {
        Py_uhash_t value;
        unsigned char bytes[SIZEOF_PY_UHASH_T];
    } block;

#ifdef Py_DEBUG
    assert(_Py_HashSecret_Initialized);
#endif
    remainder = len % SIZEOF_PY_UHASH_T;
    if (remainder == 0) {
        /* Process at least one block byte by byte to reduce hash collisions
         * for strings with common prefixes. */
        remainder = SIZEOF_PY_UHASH_T;
    }
    blocks = (len - remainder) / SIZEOF_PY_UHASH_T;

    x = (Py_uhash_t) _Py_HashSecret.fnv.prefix;
    x ^= (Py_uhash_t) *p << 7;
    while (blocks--) {
        PY_UHASH_CPY(block.bytes, p);
        x = (PyHASH_MULTIPLIER * x) ^ block.value;
        p += SIZEOF_PY_UHASH_T;
    }
    /* add remainder */
    for (; remainder > 0; remainder--)
        x = (PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
    x ^= (Py_uhash_t) len;
    x ^= (Py_uhash_t) _Py_HashSecret.fnv.suffix;
    if (x == (Py_uhash_t) -1) {
        x = (Py_uhash_t) -2;
    }
    return x;
}

static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
                                     16 * SIZEOF_PY_HASH_T};

#endif /* Py_HASH_ALGORITHM == Py_HASH_FNV */


/* **************************************************************************
 <MIT License>
 Copyright (c) 2013  Marek Majkowski <marek@popcount.org>

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in
 all copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 </MIT License>

 Original location:
    https://github.com/majek/csiphash/

 Solution inspired by code from:
    Samuel Neves (supercop/crypto_auth/siphash24/little)
    djb (supercop/crypto_auth/siphash24/little2)
    Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)

 Modified for Python by Christian Heimes:
    - C89 / MSVC compatibility
    - _rotl64() on Windows
    - letoh64() fallback
*/

/* byte swap little endian to host endian
 * Endian conversion not only ensures that the hash function returns the same
 * value on all platforms. It is also required to for a good dispersion of
 * the hash values' least significant bits.
 */
#if PY_LITTLE_ENDIAN
#  define _le64toh(x) ((uint64_t)(x))
#elif defined(__APPLE__)
#  define _le64toh(x) OSSwapLittleToHostInt64(x)
#elif defined(HAVE_LETOH64)
#  define _le64toh(x) le64toh(x)
#else
#  define _le64toh(x) (((uint64_t)(x) << 56) | \
                      (((uint64_t)(x) << 40) & 0xff000000000000ULL) | \
                      (((uint64_t)(x) << 24) & 0xff0000000000ULL) | \
                      (((uint64_t)(x) << 8)  & 0xff00000000ULL) | \
                      (((uint64_t)(x) >> 8)  & 0xff000000ULL) | \
                      (((uint64_t)(x) >> 24) & 0xff0000ULL) | \
                      (((uint64_t)(x) >> 40) & 0xff00ULL) | \
                      ((uint64_t)(x)  >> 56))
#endif


#ifdef _MSC_VER
#  define ROTATE(x, b)  _rotl64(x, b)
#else
#  define ROTATE(x, b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) )
#endif

#define HALF_ROUND(a,b,c,d,s,t)     \
    a += b; c += d;                 \
    b = ROTATE(b, s) ^ a;           \
    d = ROTATE(d, t) ^ c;           \
    a = ROTATE(a, 32);

#define SINGLE_ROUND(v0,v1,v2,v3)   \
    HALF_ROUND(v0,v1,v2,v3,13,16);  \
    HALF_ROUND(v2,v1,v0,v3,17,21);

#define DOUBLE_ROUND(v0,v1,v2,v3)   \
    SINGLE_ROUND(v0,v1,v2,v3);      \
    SINGLE_ROUND(v0,v1,v2,v3);


static uint64_t
siphash13(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) {
    uint64_t b = (uint64_t)src_sz << 56;
    const uint8_t *in = (const uint8_t*)src;

    uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
    uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
    uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
    uint64_t v3 = k1 ^ 0x7465646279746573ULL;

    uint64_t t;
    uint8_t *pt;

    while (src_sz >= 8) {
        uint64_t mi;
        memcpy(&mi, in, sizeof(mi));
        mi = _le64toh(mi);
        in += sizeof(mi);
        src_sz -= sizeof(mi);
        v3 ^= mi;
        SINGLE_ROUND(v0,v1,v2,v3);
        v0 ^= mi;
    }

    t = 0;
    pt = (uint8_t *)&t;
    switch (src_sz) {
        case 7: pt[6] = in[6]; _Py_FALLTHROUGH;
        case 6: pt[5] = in[5]; _Py_FALLTHROUGH;
        case 5: pt[4] = in[4]; _Py_FALLTHROUGH;
        case 4: memcpy(pt, in, sizeof(uint32_t)); break;
        case 3: pt[2] = in[2]; _Py_FALLTHROUGH;
        case 2: pt[1] = in[1]; _Py_FALLTHROUGH;
        case 1: pt[0] = in[0]; break;
    }
    b |= _le64toh(t);

    v3 ^= b;
    SINGLE_ROUND(v0,v1,v2,v3);
    v0 ^= b;
    v2 ^= 0xff;
    SINGLE_ROUND(v0,v1,v2,v3);
    SINGLE_ROUND(v0,v1,v2,v3);
    SINGLE_ROUND(v0,v1,v2,v3);

    /* modified */
    t = (v0 ^ v1) ^ (v2 ^ v3);
    return t;
}

#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
static uint64_t
siphash24(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) {
    uint64_t b = (uint64_t)src_sz << 56;
    const uint8_t *in = (const uint8_t*)src;

    uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
    uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
    uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
    uint64_t v3 = k1 ^ 0x7465646279746573ULL;

    uint64_t t;
    uint8_t *pt;

    while (src_sz >= 8) {
        uint64_t mi;
        memcpy(&mi, in, sizeof(mi));
        mi = _le64toh(mi);
        in += sizeof(mi);
        src_sz -= sizeof(mi);
        v3 ^= mi;
        DOUBLE_ROUND(v0,v1,v2,v3);
        v0 ^= mi;
    }

    t = 0;
    pt = (uint8_t *)&t;
    switch (src_sz) {
        case 7: pt[6] = in[6]; _Py_FALLTHROUGH;
        case 6: pt[5] = in[5]; _Py_FALLTHROUGH;
        case 5: pt[4] = in[4]; _Py_FALLTHROUGH;
        case 4: memcpy(pt, in, sizeof(uint32_t)); break;
        case 3: pt[2] = in[2]; _Py_FALLTHROUGH;
        case 2: pt[1] = in[1]; _Py_FALLTHROUGH;
        case 1: pt[0] = in[0]; break;
    }
    b |= _le64toh(t);

    v3 ^= b;
    DOUBLE_ROUND(v0,v1,v2,v3);
    v0 ^= b;
    v2 ^= 0xff;
    DOUBLE_ROUND(v0,v1,v2,v3);
    DOUBLE_ROUND(v0,v1,v2,v3);

    /* modified */
    t = (v0 ^ v1) ^ (v2 ^ v3);
    return t;
}
#endif

uint64_t
_Py_KeyedHash(uint64_t key, const void *src, Py_ssize_t src_sz)
{
    return siphash13(key, 0, src, src_sz);
}


#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH13
static Py_hash_t
pysiphash(const void *src, Py_ssize_t src_sz) {
    return (Py_hash_t)siphash13(
        _le64toh(_Py_HashSecret.siphash.k0), _le64toh(_Py_HashSecret.siphash.k1),
        src, src_sz);
}

static PyHash_FuncDef PyHash_Func = {pysiphash, "siphash13", 64, 128};
#endif

#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
static Py_hash_t
pysiphash(const void *src, Py_ssize_t src_sz) {
    return (Py_hash_t)siphash24(
        _le64toh(_Py_HashSecret.siphash.k0), _le64toh(_Py_HashSecret.siphash.k1),
        src, src_sz);
}

static PyHash_FuncDef PyHash_Func = {pysiphash, "siphash24", 64, 128};
#endif

Coverage Report

Created: 2025-07-04 06:49

Line	Count	Source (jump to first uncovered line)
1		/* Set of hash utility functions to help maintaining the invariant that
2		if a==b then hash(a)==hash(b)
3
4		All the utility functions (_Py_Hash*()) return "-1" to signify an error.
5		*/
6		#include "Python.h"
7		#include "pycore_pyhash.h" // _Py_HashSecret_t
8
9		#ifdef __APPLE__
10		# include <libkern/OSByteOrder.h>
11		#elif defined(HAVE_LE64TOH) && defined(HAVE_ENDIAN_H)
12		# include <endian.h>
13		#elif defined(HAVE_LE64TOH) && defined(HAVE_SYS_ENDIAN_H)
14		# include <sys/endian.h>
15		#endif
16
17		_Py_HashSecret_t _Py_HashSecret = {{0}};
18
19		#if Py_HASH_ALGORITHM == Py_HASH_EXTERNAL
20		extern PyHash_FuncDef PyHash_Func;
21		#else
22		static PyHash_FuncDef PyHash_Func;
23		#endif
24
25		/* Count Py_HashBuffer() calls */
26		#ifdef Py_HASH_STATS
27		#define Py_HASH_STATS_MAX 32
28		static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
29		#endif
30
31		/* For numeric types, the hash of a number x is based on the reduction
32		of x modulo the prime P = 2**_PyHASH_BITS - 1. It's designed so that
33		hash(x) == hash(y) whenever x and y are numerically equal, even if
34		x and y have different types.
35
36		A quick summary of the hashing strategy:
37
38		(1) First define the 'reduction of x modulo P' for any rational
39		number x; this is a standard extension of the usual notion of
40		reduction modulo P for integers. If x == p/q (written in lowest
41		terms), the reduction is interpreted as the reduction of p times
42		the inverse of the reduction of q, all modulo P; if q is exactly
43		divisible by P then define the reduction to be infinity. So we've
44		got a well-defined map
45
46		reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.
47
48		(2) Now for a rational number x, define hash(x) by:
49
50		reduce(x) if x >= 0
51		-reduce(-x) if x < 0
52
53		If the result of the reduction is infinity (this is impossible for
54		integers, floats and Decimals) then use the predefined hash value
55		_PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
56		_PyHASH_INF and -_PyHASH_INF are also used for the
57		hashes of float and Decimal infinities.
58
59		NaNs hash with a pointer hash. Having distinct hash values prevents
60		catastrophic pileups from distinct NaN instances which used to always
61		have the same hash value but would compare unequal.
62
63		A selling point for the above strategy is that it makes it possible
64		to compute hashes of decimal and binary floating-point numbers
65		efficiently, even if the exponent of the binary or decimal number
66		is large. The key point is that
67
68		reduce(x * y) == reduce(x) * reduce(y) (modulo _PyHASH_MODULUS)
69
70		provided that {reduce(x), reduce(y)} != {0, infinity}. The reduction of a
71		binary or decimal float is never infinity, since the denominator is a power
72		of 2 (for binary) or a divisor of a power of 10 (for decimal). So we have,
73		for nonnegative x,
74
75		reduce(x * 2*e) == reduce(x) reduce(2**e) % _PyHASH_MODULUS
76
77		reduce(x * 10*e) == reduce(x) reduce(10**e) % _PyHASH_MODULUS
78
79		and reduce(10**e) can be computed efficiently by the usual modular
80		exponentiation algorithm. For reduce(2**e) it's even better: since
81		P is of the form 2n-1, reduce(2e) is 2**(e mod n), and multiplication
82		by 2(e mod n) modulo 2n-1 just amounts to a rotation of bits.
83
84		*/
85
86		Py_hash_t
87		_Py_HashDouble(PyObject *inst, double v)
88	365	{
89	365	int e, sign;
90	365	double m;
91	365	Py_uhash_t x, y;
92
93	365	if (!isfinite(v)) {
94	0	if (isinf(v))
95	0	return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
96	0	else
97	0	return PyObject_GenericHash(inst);
98	0	}
99
100	365	m = frexp(v, &e);
101
102	365	sign = 1;
103	365	if (m < 0) {
104	12	sign = -1;
105	12	m = -m;
106	12	}
107
108		/* process 28 bits at a time; this should work well both for binary
109		and hexadecimal floating point. */
110	365	x = 0;
111	745	while (m) {
112	380	x = ((x << 28) & _PyHASH_MODULUS) \| x >> (_PyHASH_BITS - 28);
113	380	m = 268435456.0; / 2*28 /
114	380	e -= 28;
115	380	y = (Py_uhash_t)m; /* pull out integer part */
116	380	m -= y;
117	380	x += y;
118	380	if (x >= _PyHASH_MODULUS)
119	0	x -= _PyHASH_MODULUS;
120	380	}
121
122		/* adjust for the exponent; first reduce it modulo _PyHASH_BITS */
123	365	e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
124	365	x = ((x << e) & _PyHASH_MODULUS) \| x >> (_PyHASH_BITS - e);
125
126	365	x = x * sign;
127	365	if (x == (Py_uhash_t)-1)
128	3	x = (Py_uhash_t)-2;
129	365	return (Py_hash_t)x;
130	365	}
131
132		Py_hash_t
133		Py_HashPointer(const void *ptr)
134	940k	{
135	940k	Py_hash_t hash = _Py_HashPointerRaw(ptr);
136	940k	if (hash == -1) {
137	0	hash = -2;
138	0	}
139	940k	return hash;
140	940k	}
141
142		Py_hash_t
143		PyObject_GenericHash(PyObject *obj)
144	938k	{
145	938k	return Py_HashPointer(obj);
146	938k	}
147
148		Py_hash_t
149		Py_HashBuffer(const void *ptr, Py_ssize_t len)
150	51.8M	{
151		/*
152		We make the hash of the empty string be 0, rather than using
153		(prefix ^ suffix), since this slightly obfuscates the hash secret
154		*/
155	51.8M	if (len == 0) {
156	32	return 0;
157	32	}
158
159		#ifdef Py_HASH_STATS
160		hashstats[(len <= Py_HASH_STATS_MAX) ? len : 0]++;
161		#endif
162
163	51.8M	Py_hash_t x;
164		#if Py_HASH_CUTOFF > 0
165		if (len < Py_HASH_CUTOFF) {
166		/* Optimize hashing of very small strings with inline DJBX33A. */
167		Py_uhash_t hash;
168		const unsigned char *p = ptr;
169		hash = 5381; /* DJBX33A starts with 5381 */
170
171		switch(len) {
172		/* ((hash << 5) + hash) + p == hash 33 + p /
173		case 7: hash = ((hash << 5) + hash) + *p++; _Py_FALLTHROUGH;
174		case 6: hash = ((hash << 5) + hash) + *p++; _Py_FALLTHROUGH;
175		case 5: hash = ((hash << 5) + hash) + *p++; _Py_FALLTHROUGH;
176		case 4: hash = ((hash << 5) + hash) + *p++; _Py_FALLTHROUGH;
177		case 3: hash = ((hash << 5) + hash) + *p++; _Py_FALLTHROUGH;
178		case 2: hash = ((hash << 5) + hash) + *p++; _Py_FALLTHROUGH;
179		case 1: hash = ((hash << 5) + hash) + *p++; break;
180		default:
181		Py_UNREACHABLE();
182		}
183		hash ^= len;
184		hash ^= (Py_uhash_t) _Py_HashSecret.djbx33a.suffix;
185		x = (Py_hash_t)hash;
186		}
187		else
188		#endif /* Py_HASH_CUTOFF */
189	51.8M	{
190	51.8M	x = PyHash_Func.hash(ptr, len);
191	51.8M	}
192
193	51.8M	if (x == -1) {
194	0	return -2;
195	0	}
196	51.8M	return x;
197	51.8M	}
198
199		void
200		_PyHash_Fini(void)
201	0	{
202		#ifdef Py_HASH_STATS
203		fprintf(stderr, "len calls total\n");
204		Py_ssize_t total = 0;
205		for (int i = 1; i <= Py_HASH_STATS_MAX; i++) {
206		total += hashstats[i];
207		fprintf(stderr, "%2i %8zd %8zd\n", i, hashstats[i], total);
208		}
209		total += hashstats[0];
210		fprintf(stderr, "> %8zd %8zd\n", hashstats[0], total);
211		#endif
212	0	}
213
214		PyHash_FuncDef *
215		PyHash_GetFuncDef(void)
216	16	{
217	16	return &PyHash_Func;
218	16	}
219
220		/* Optimized memcpy() for Windows */
221		#ifdef _MSC_VER
222		# if SIZEOF_PY_UHASH_T == 4
223		# define PY_UHASH_CPY(dst, src) do { \
224		dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
225		} while(0)
226		# elif SIZEOF_PY_UHASH_T == 8
227		# define PY_UHASH_CPY(dst, src) do { \
228		dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
229		dst[4] = src[4]; dst[5] = src[5]; dst[6] = src[6]; dst[7] = src[7]; \
230		} while(0)
231		# else
232		# error SIZEOF_PY_UHASH_T must be 4 or 8
233		# endif /* SIZEOF_PY_UHASH_T */
234		#else /* not Windows */
235		# define PY_UHASH_CPY(dst, src) memcpy(dst, src, SIZEOF_PY_UHASH_T)
236		#endif /* _MSC_VER */
237
238
239		#if Py_HASH_ALGORITHM == Py_HASH_FNV
240		/* **************************************************************************
241		* Modified Fowler-Noll-Vo (FNV) hash function
242		*/
243		static Py_hash_t
244		fnv(const void *src, Py_ssize_t len)
245		{
246		const unsigned char *p = src;
247		Py_uhash_t x;
248		Py_ssize_t remainder, blocks;
249		union {
250		Py_uhash_t value;
251		unsigned char bytes[SIZEOF_PY_UHASH_T];
252		} block;
253
254		#ifdef Py_DEBUG
255		assert(_Py_HashSecret_Initialized);
256		#endif
257		remainder = len % SIZEOF_PY_UHASH_T;
258		if (remainder == 0) {
259		/* Process at least one block byte by byte to reduce hash collisions
260		* for strings with common prefixes. */
261		remainder = SIZEOF_PY_UHASH_T;
262		}
263		blocks = (len - remainder) / SIZEOF_PY_UHASH_T;
264
265		x = (Py_uhash_t) _Py_HashSecret.fnv.prefix;
266		x ^= (Py_uhash_t) *p << 7;
267		while (blocks--) {
268		PY_UHASH_CPY(block.bytes, p);
269		x = (PyHASH_MULTIPLIER * x) ^ block.value;
270		p += SIZEOF_PY_UHASH_T;
271		}
272		/* add remainder */
273		for (; remainder > 0; remainder--)
274		x = (PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
275		x ^= (Py_uhash_t) len;
276		x ^= (Py_uhash_t) _Py_HashSecret.fnv.suffix;
277		if (x == (Py_uhash_t) -1) {
278		x = (Py_uhash_t) -2;
279		}
280		return x;
281		}
282
283		static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
284		16 * SIZEOF_PY_HASH_T};
285
286		#endif /* Py_HASH_ALGORITHM == Py_HASH_FNV */
287
288
289		/* **************************************************************************
290		<MIT License>
291		Copyright (c) 2013 Marek Majkowski <marek@popcount.org>
292
293		Permission is hereby granted, free of charge, to any person obtaining a copy
294		of this software and associated documentation files (the "Software"), to deal
295		in the Software without restriction, including without limitation the rights
296		to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
297		copies of the Software, and to permit persons to whom the Software is
298		furnished to do so, subject to the following conditions:
299
300		The above copyright notice and this permission notice shall be included in
301		all copies or substantial portions of the Software.
302
303		THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
304		IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
305		FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
306		AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
307		LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
308		OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
309		THE SOFTWARE.
310		</MIT License>
311
312		Original location:
313		https://github.com/majek/csiphash/
314
315		Solution inspired by code from:
316		Samuel Neves (supercop/crypto_auth/siphash24/little)
317		djb (supercop/crypto_auth/siphash24/little2)
318		Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)
319
320		Modified for Python by Christian Heimes:
321		- C89 / MSVC compatibility
322		- _rotl64() on Windows
323		- letoh64() fallback
324		*/
325
326		/* byte swap little endian to host endian
327		* Endian conversion not only ensures that the hash function returns the same
328		* value on all platforms. It is also required to for a good dispersion of
329		* the hash values' least significant bits.
330		*/
331		#if PY_LITTLE_ENDIAN
332	322M	# define _le64toh(x) ((uint64_t)(x))
333		#elif defined(__APPLE__)
334		# define _le64toh(x) OSSwapLittleToHostInt64(x)
335		#elif defined(HAVE_LETOH64)
336		# define _le64toh(x) le64toh(x)
337		#else
338		# define _le64toh(x) (((uint64_t)(x) << 56) \| \
339		(((uint64_t)(x) << 40) & 0xff000000000000ULL) \| \
340		(((uint64_t)(x) << 24) & 0xff0000000000ULL) \| \
341		(((uint64_t)(x) << 8) & 0xff00000000ULL) \| \
342		(((uint64_t)(x) >> 8) & 0xff000000ULL) \| \
343		(((uint64_t)(x) >> 24) & 0xff0000ULL) \| \
344		(((uint64_t)(x) >> 40) & 0xff00ULL) \| \
345		((uint64_t)(x) >> 56))
346		#endif
347
348
349		#ifdef _MSC_VER
350		# define ROTATE(x, b) _rotl64(x, b)
351		#else
352	2.24G	# define ROTATE(x, b) (uint64_t)( ((x) << (b)) \| ( (x) >> (64 - (b))) )
353		#endif
354
355		#define HALF_ROUND(a,b,c,d,s,t) \
356	748M	a += b; c += d; \
357	748M	b = ROTATE(b, s) ^ a; \
358	748M	d = ROTATE(d, t) ^ c; \
359	748M	a = ROTATE(a, 32);
360
361		#define SINGLE_ROUND(v0,v1,v2,v3) \
362	374M	HALF_ROUND(v0,v1,v2,v3,13,16); \
363	374M	HALF_ROUND(v2,v1,v0,v3,17,21);
364
365		#define DOUBLE_ROUND(v0,v1,v2,v3) \
366		SINGLE_ROUND(v0,v1,v2,v3); \
367		SINGLE_ROUND(v0,v1,v2,v3);
368
369
370		static uint64_t
371	51.8M	siphash13(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) {
372	51.8M	uint64_t b = (uint64_t)src_sz << 56;
373	51.8M	const uint8_t in = (const uint8_t)src;
374
375	51.8M	uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
376	51.8M	uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
377	51.8M	uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
378	51.8M	uint64_t v3 = k1 ^ 0x7465646279746573ULL;
379
380	51.8M	uint64_t t;
381	51.8M	uint8_t *pt;
382
383	218M	while (src_sz >= 8) {
384	167M	uint64_t mi;
385	167M	memcpy(&mi, in, sizeof(mi));
386	167M	mi = _le64toh(mi);
387	167M	in += sizeof(mi);
388	167M	src_sz -= sizeof(mi);
389	167M	v3 ^= mi;
390	167M	SINGLE_ROUND(v0,v1,v2,v3);
391	167M	v0 ^= mi;
392	167M	}
393
394	51.8M	t = 0;
395	51.8M	pt = (uint8_t *)&t;
396	51.8M	switch (src_sz) {
397	255k	case 7: pt[6] = in[6]; _Py_FALLTHROUGH;
398	3.14M	case 6: pt[5] = in[5]; _Py_FALLTHROUGH;
399	3.63M	case 5: pt[4] = in[4]; _Py_FALLTHROUGH;
400	6.52M	case 4: memcpy(pt, in, sizeof(uint32_t)); break;
401	2.02M	case 3: pt[2] = in[2]; _Py_FALLTHROUGH;
402	41.3M	case 2: pt[1] = in[1]; _Py_FALLTHROUGH;
403	42.7M	case 1: pt[0] = in[0]; break;
404	51.8M	}
405	51.8M	b \|= _le64toh(t);
406
407	51.8M	v3 ^= b;
408	51.8M	SINGLE_ROUND(v0,v1,v2,v3);
409	51.8M	v0 ^= b;
410	51.8M	v2 ^= 0xff;
411	51.8M	SINGLE_ROUND(v0,v1,v2,v3);
412	51.8M	SINGLE_ROUND(v0,v1,v2,v3);
413	51.8M	SINGLE_ROUND(v0,v1,v2,v3);
414
415		/* modified */
416	51.8M	t = (v0 ^ v1) ^ (v2 ^ v3);
417	51.8M	return t;
418	51.8M	}
419
420		#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
421		static uint64_t
422		siphash24(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) {
423		uint64_t b = (uint64_t)src_sz << 56;
424		const uint8_t in = (const uint8_t)src;
425
426		uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
427		uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
428		uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
429		uint64_t v3 = k1 ^ 0x7465646279746573ULL;
430
431		uint64_t t;
432		uint8_t *pt;
433
434		while (src_sz >= 8) {
435		uint64_t mi;
436		memcpy(&mi, in, sizeof(mi));
437		mi = _le64toh(mi);
438		in += sizeof(mi);
439		src_sz -= sizeof(mi);
440		v3 ^= mi;
441		DOUBLE_ROUND(v0,v1,v2,v3);
442		v0 ^= mi;
443		}
444
445		t = 0;
446		pt = (uint8_t *)&t;
447		switch (src_sz) {
448		case 7: pt[6] = in[6]; _Py_FALLTHROUGH;
449		case 6: pt[5] = in[5]; _Py_FALLTHROUGH;
450		case 5: pt[4] = in[4]; _Py_FALLTHROUGH;
451		case 4: memcpy(pt, in, sizeof(uint32_t)); break;
452		case 3: pt[2] = in[2]; _Py_FALLTHROUGH;
453		case 2: pt[1] = in[1]; _Py_FALLTHROUGH;
454		case 1: pt[0] = in[0]; break;
455		}
456		b \|= _le64toh(t);
457
458		v3 ^= b;
459		DOUBLE_ROUND(v0,v1,v2,v3);
460		v0 ^= b;
461		v2 ^= 0xff;
462		DOUBLE_ROUND(v0,v1,v2,v3);
463		DOUBLE_ROUND(v0,v1,v2,v3);
464
465		/* modified */
466		t = (v0 ^ v1) ^ (v2 ^ v3);
467		return t;
468		}
469		#endif
470
471		uint64_t
472		_Py_KeyedHash(uint64_t key, const void *src, Py_ssize_t src_sz)
473	0	{
474	0	return siphash13(key, 0, src, src_sz);
475	0	}
476
477
478		#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH13
479		static Py_hash_t
480	51.8M	pysiphash(const void *src, Py_ssize_t src_sz) {
481	51.8M	return (Py_hash_t)siphash13(
482	51.8M	_le64toh(_Py_HashSecret.siphash.k0), _le64toh(_Py_HashSecret.siphash.k1),
483	51.8M	src, src_sz);
484	51.8M	}
485
486		static PyHash_FuncDef PyHash_Func = {pysiphash, "siphash13", 64, 128};
487		#endif
488
489		#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
490		static Py_hash_t
491		pysiphash(const void *src, Py_ssize_t src_sz) {
492		return (Py_hash_t)siphash24(
493		_le64toh(_Py_HashSecret.siphash.k0), _le64toh(_Py_HashSecret.siphash.k1),
494		src, src_sz);
495		}
496
497		static PyHash_FuncDef PyHash_Func = {pysiphash, "siphash24", 64, 128};
498		#endif