Coverage Report

Created: 2025-07-11 06:59

/src/Python-3.8.3/Python/pyhash.c
Line
Count
Source (jump to first uncovered line)
1
/* Set of hash utility functions to help maintaining the invariant that
2
    if a==b then hash(a)==hash(b)
3
4
   All the utility functions (_Py_Hash*()) return "-1" to signify an error.
5
*/
6
#include "Python.h"
7
8
#ifdef __APPLE__
9
#  include <libkern/OSByteOrder.h>
10
#elif defined(HAVE_LE64TOH) && defined(HAVE_ENDIAN_H)
11
#  include <endian.h>
12
#elif defined(HAVE_LE64TOH) && defined(HAVE_SYS_ENDIAN_H)
13
#  include <sys/endian.h>
14
#endif
15
16
#ifdef __cplusplus
17
extern "C" {
18
#endif
19
20
_Py_HashSecret_t _Py_HashSecret = {{0}};
21
22
#if Py_HASH_ALGORITHM == Py_HASH_EXTERNAL
23
extern PyHash_FuncDef PyHash_Func;
24
#else
25
static PyHash_FuncDef PyHash_Func;
26
#endif
27
28
/* Count _Py_HashBytes() calls */
29
#ifdef Py_HASH_STATS
30
#define Py_HASH_STATS_MAX 32
31
static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
32
#endif
33
34
/* For numeric types, the hash of a number x is based on the reduction
35
   of x modulo the prime P = 2**_PyHASH_BITS - 1.  It's designed so that
36
   hash(x) == hash(y) whenever x and y are numerically equal, even if
37
   x and y have different types.
38
39
   A quick summary of the hashing strategy:
40
41
   (1) First define the 'reduction of x modulo P' for any rational
42
   number x; this is a standard extension of the usual notion of
43
   reduction modulo P for integers.  If x == p/q (written in lowest
44
   terms), the reduction is interpreted as the reduction of p times
45
   the inverse of the reduction of q, all modulo P; if q is exactly
46
   divisible by P then define the reduction to be infinity.  So we've
47
   got a well-defined map
48
49
      reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.
50
51
   (2) Now for a rational number x, define hash(x) by:
52
53
      reduce(x)   if x >= 0
54
      -reduce(-x) if x < 0
55
56
   If the result of the reduction is infinity (this is impossible for
57
   integers, floats and Decimals) then use the predefined hash value
58
   _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
59
   _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
60
   hashes of float and Decimal infinities and nans.
61
62
   A selling point for the above strategy is that it makes it possible
63
   to compute hashes of decimal and binary floating-point numbers
64
   efficiently, even if the exponent of the binary or decimal number
65
   is large.  The key point is that
66
67
      reduce(x * y) == reduce(x) * reduce(y) (modulo _PyHASH_MODULUS)
68
69
   provided that {reduce(x), reduce(y)} != {0, infinity}.  The reduction of a
70
   binary or decimal float is never infinity, since the denominator is a power
71
   of 2 (for binary) or a divisor of a power of 10 (for decimal).  So we have,
72
   for nonnegative x,
73
74
      reduce(x * 2**e) == reduce(x) * reduce(2**e) % _PyHASH_MODULUS
75
76
      reduce(x * 10**e) == reduce(x) * reduce(10**e) % _PyHASH_MODULUS
77
78
   and reduce(10**e) can be computed efficiently by the usual modular
79
   exponentiation algorithm.  For reduce(2**e) it's even better: since
80
   P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication
81
   by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits.
82
83
   */
84
85
Py_hash_t
86
_Py_HashDouble(double v)
87
10
{
88
10
    int e, sign;
89
10
    double m;
90
10
    Py_uhash_t x, y;
91
92
10
    if (!Py_IS_FINITE(v)) {
93
0
        if (Py_IS_INFINITY(v))
94
0
            return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
95
0
        else
96
0
            return _PyHASH_NAN;
97
0
    }
98
99
10
    m = frexp(v, &e);
100
101
10
    sign = 1;
102
10
    if (m < 0) {
103
0
        sign = -1;
104
0
        m = -m;
105
0
    }
106
107
    /* process 28 bits at a time;  this should work well both for binary
108
       and hexadecimal floating point. */
109
10
    x = 0;
110
20
    while (m) {
111
10
        x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28);
112
10
        m *= 268435456.0;  /* 2**28 */
113
10
        e -= 28;
114
10
        y = (Py_uhash_t)m;  /* pull out integer part */
115
10
        m -= y;
116
10
        x += y;
117
10
        if (x >= _PyHASH_MODULUS)
118
0
            x -= _PyHASH_MODULUS;
119
10
    }
120
121
    /* adjust for the exponent;  first reduce it modulo _PyHASH_BITS */
122
10
    e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
123
10
    x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e);
124
125
10
    x = x * sign;
126
10
    if (x == (Py_uhash_t)-1)
127
0
        x = (Py_uhash_t)-2;
128
10
    return (Py_hash_t)x;
129
10
}
130
131
Py_hash_t
132
_Py_HashPointer(void *p)
133
2.65k
{
134
2.65k
    Py_hash_t x;
135
2.65k
    size_t y = (size_t)p;
136
    /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid
137
       excessive hash collisions for dicts and sets */
138
2.65k
    y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4));
139
2.65k
    x = (Py_hash_t)y;
140
2.65k
    if (x == -1)
141
0
        x = -2;
142
2.65k
    return x;
143
2.65k
}
144
145
Py_hash_t
146
_Py_HashBytes(const void *src, Py_ssize_t len)
147
216k
{
148
216k
    Py_hash_t x;
149
    /*
150
      We make the hash of the empty string be 0, rather than using
151
      (prefix ^ suffix), since this slightly obfuscates the hash secret
152
    */
153
216k
    if (len == 0) {
154
15
        return 0;
155
15
    }
156
157
#ifdef Py_HASH_STATS
158
    hashstats[(len <= Py_HASH_STATS_MAX) ? len : 0]++;
159
#endif
160
161
#if Py_HASH_CUTOFF > 0
162
    if (len < Py_HASH_CUTOFF) {
163
        /* Optimize hashing of very small strings with inline DJBX33A. */
164
        Py_uhash_t hash;
165
        const unsigned char *p = src;
166
        hash = 5381; /* DJBX33A starts with 5381 */
167
168
        switch(len) {
169
            /* ((hash << 5) + hash) + *p == hash * 33 + *p */
170
            case 7: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
171
            case 6: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
172
            case 5: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
173
            case 4: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
174
            case 3: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
175
            case 2: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
176
            case 1: hash = ((hash << 5) + hash) + *p++; break;
177
            default:
178
                Py_UNREACHABLE();
179
        }
180
        hash ^= len;
181
        hash ^= (Py_uhash_t) _Py_HashSecret.djbx33a.suffix;
182
        x = (Py_hash_t)hash;
183
    }
184
    else
185
#endif /* Py_HASH_CUTOFF */
186
216k
        x = PyHash_Func.hash(src, len);
187
188
216k
    if (x == -1)
189
0
        return -2;
190
216k
    return x;
191
216k
}
192
193
void
194
_PyHash_Fini(void)
195
0
{
196
#ifdef Py_HASH_STATS
197
    int i;
198
    Py_ssize_t total = 0;
199
    const char *fmt = "%2i %8" PY_FORMAT_SIZE_T "d %8" PY_FORMAT_SIZE_T "d\n";
200
201
    fprintf(stderr, "len   calls    total\n");
202
    for (i = 1; i <= Py_HASH_STATS_MAX; i++) {
203
        total += hashstats[i];
204
        fprintf(stderr, fmt, i, hashstats[i], total);
205
    }
206
    total += hashstats[0];
207
    fprintf(stderr, ">  %8" PY_FORMAT_SIZE_T "d %8" PY_FORMAT_SIZE_T "d\n",
208
            hashstats[0], total);
209
#endif
210
0
}
211
212
PyHash_FuncDef *
213
PyHash_GetFuncDef(void)
214
14
{
215
14
    return &PyHash_Func;
216
14
}
217
218
/* Optimized memcpy() for Windows */
219
#ifdef _MSC_VER
220
#  if SIZEOF_PY_UHASH_T == 4
221
#    define PY_UHASH_CPY(dst, src) do {                                    \
222
       dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
223
       } while(0)
224
#  elif SIZEOF_PY_UHASH_T == 8
225
#    define PY_UHASH_CPY(dst, src) do {                                    \
226
       dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
227
       dst[4] = src[4]; dst[5] = src[5]; dst[6] = src[6]; dst[7] = src[7]; \
228
       } while(0)
229
#  else
230
#    error SIZEOF_PY_UHASH_T must be 4 or 8
231
#  endif /* SIZEOF_PY_UHASH_T */
232
#else /* not Windows */
233
#  define PY_UHASH_CPY(dst, src) memcpy(dst, src, SIZEOF_PY_UHASH_T)
234
#endif /* _MSC_VER */
235
236
237
#if Py_HASH_ALGORITHM == Py_HASH_FNV
238
/* **************************************************************************
239
 * Modified Fowler-Noll-Vo (FNV) hash function
240
 */
241
static Py_hash_t
242
fnv(const void *src, Py_ssize_t len)
243
{
244
    const unsigned char *p = src;
245
    Py_uhash_t x;
246
    Py_ssize_t remainder, blocks;
247
    union {
248
        Py_uhash_t value;
249
        unsigned char bytes[SIZEOF_PY_UHASH_T];
250
    } block;
251
252
#ifdef Py_DEBUG
253
    assert(_Py_HashSecret_Initialized);
254
#endif
255
    remainder = len % SIZEOF_PY_UHASH_T;
256
    if (remainder == 0) {
257
        /* Process at least one block byte by byte to reduce hash collisions
258
         * for strings with common prefixes. */
259
        remainder = SIZEOF_PY_UHASH_T;
260
    }
261
    blocks = (len - remainder) / SIZEOF_PY_UHASH_T;
262
263
    x = (Py_uhash_t) _Py_HashSecret.fnv.prefix;
264
    x ^= (Py_uhash_t) *p << 7;
265
    while (blocks--) {
266
        PY_UHASH_CPY(block.bytes, p);
267
        x = (_PyHASH_MULTIPLIER * x) ^ block.value;
268
        p += SIZEOF_PY_UHASH_T;
269
    }
270
    /* add remainder */
271
    for (; remainder > 0; remainder--)
272
        x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
273
    x ^= (Py_uhash_t) len;
274
    x ^= (Py_uhash_t) _Py_HashSecret.fnv.suffix;
275
    if (x == (Py_uhash_t) -1) {
276
        x = (Py_uhash_t) -2;
277
    }
278
    return x;
279
}
280
281
static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
282
                                     16 * SIZEOF_PY_HASH_T};
283
284
#endif /* Py_HASH_ALGORITHM == Py_HASH_FNV */
285
286
287
/* **************************************************************************
288
 <MIT License>
289
 Copyright (c) 2013  Marek Majkowski <marek@popcount.org>
290
291
 Permission is hereby granted, free of charge, to any person obtaining a copy
292
 of this software and associated documentation files (the "Software"), to deal
293
 in the Software without restriction, including without limitation the rights
294
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
295
 copies of the Software, and to permit persons to whom the Software is
296
 furnished to do so, subject to the following conditions:
297
298
 The above copyright notice and this permission notice shall be included in
299
 all copies or substantial portions of the Software.
300
301
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
302
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
303
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
304
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
305
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
306
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
307
 THE SOFTWARE.
308
 </MIT License>
309
310
 Original location:
311
    https://github.com/majek/csiphash/
312
313
 Solution inspired by code from:
314
    Samuel Neves (supercop/crypto_auth/siphash24/little)
315
    djb (supercop/crypto_auth/siphash24/little2)
316
    Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)
317
318
 Modified for Python by Christian Heimes:
319
    - C89 / MSVC compatibility
320
    - _rotl64() on Windows
321
    - letoh64() fallback
322
*/
323
324
/* byte swap little endian to host endian
325
 * Endian conversion not only ensures that the hash function returns the same
326
 * value on all platforms. It is also required to for a good dispersion of
327
 * the hash values' least significant bits.
328
 */
329
#if PY_LITTLE_ENDIAN
330
1.22M
#  define _le64toh(x) ((uint64_t)(x))
331
#elif defined(__APPLE__)
332
#  define _le64toh(x) OSSwapLittleToHostInt64(x)
333
#elif defined(HAVE_LETOH64)
334
#  define _le64toh(x) le64toh(x)
335
#else
336
#  define _le64toh(x) (((uint64_t)(x) << 56) | \
337
                      (((uint64_t)(x) << 40) & 0xff000000000000ULL) | \
338
                      (((uint64_t)(x) << 24) & 0xff0000000000ULL) | \
339
                      (((uint64_t)(x) << 8)  & 0xff00000000ULL) | \
340
                      (((uint64_t)(x) >> 8)  & 0xff000000ULL) | \
341
                      (((uint64_t)(x) >> 24) & 0xff0000ULL) | \
342
                      (((uint64_t)(x) >> 40) & 0xff00ULL) | \
343
                      ((uint64_t)(x)  >> 56))
344
#endif
345
346
347
#ifdef _MSC_VER
348
#  define ROTATE(x, b)  _rotl64(x, b)
349
#else
350
14.6M
#  define ROTATE(x, b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) )
351
#endif
352
353
#define HALF_ROUND(a,b,c,d,s,t)         \
354
4.89M
    a += b; c += d;             \
355
4.89M
    b = ROTATE(b, s) ^ a;           \
356
4.89M
    d = ROTATE(d, t) ^ c;           \
357
4.89M
    a = ROTATE(a, 32);
358
359
#define DOUBLE_ROUND(v0,v1,v2,v3)       \
360
1.22M
    HALF_ROUND(v0,v1,v2,v3,13,16);      \
361
1.22M
    HALF_ROUND(v2,v1,v0,v3,17,21);      \
362
1.22M
    HALF_ROUND(v0,v1,v2,v3,13,16);      \
363
1.22M
    HALF_ROUND(v2,v1,v0,v3,17,21);
364
365
366
static uint64_t
367
216k
siphash24(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) {
368
216k
    uint64_t b = (uint64_t)src_sz << 56;
369
216k
    const uint8_t *in = (const uint8_t*)src;
370
371
216k
    uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
372
216k
    uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
373
216k
    uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
374
216k
    uint64_t v3 = k1 ^ 0x7465646279746573ULL;
375
376
216k
    uint64_t t;
377
216k
    uint8_t *pt;
378
379
791k
    while (src_sz >= 8) {
380
574k
        uint64_t mi;
381
574k
        memcpy(&mi, in, sizeof(mi));
382
574k
        mi = _le64toh(mi);
383
574k
        in += sizeof(mi);
384
574k
        src_sz -= sizeof(mi);
385
574k
        v3 ^= mi;
386
574k
        DOUBLE_ROUND(v0,v1,v2,v3);
387
574k
        v0 ^= mi;
388
574k
    }
389
390
216k
    t = 0;
391
216k
    pt = (uint8_t *)&t;
392
216k
    switch (src_sz) {
393
27.8k
        case 7: pt[6] = in[6]; /* fall through */
394
52.3k
        case 6: pt[5] = in[5]; /* fall through */
395
75.5k
        case 5: pt[4] = in[4]; /* fall through */
396
100k
        case 4: memcpy(pt, in, sizeof(uint32_t)); break;
397
19.8k
        case 3: pt[2] = in[2]; /* fall through */
398
43.5k
        case 2: pt[1] = in[1]; /* fall through */
399
72.8k
        case 1: pt[0] = in[0]; /* fall through */
400
216k
    }
401
216k
    b |= _le64toh(t);
402
403
216k
    v3 ^= b;
404
216k
    DOUBLE_ROUND(v0,v1,v2,v3);
405
216k
    v0 ^= b;
406
216k
    v2 ^= 0xff;
407
216k
    DOUBLE_ROUND(v0,v1,v2,v3);
408
216k
    DOUBLE_ROUND(v0,v1,v2,v3);
409
410
    /* modified */
411
216k
    t = (v0 ^ v1) ^ (v2 ^ v3);
412
216k
    return t;
413
216k
}
414
415
uint64_t
416
_Py_KeyedHash(uint64_t key, const void *src, Py_ssize_t src_sz)
417
0
{
418
0
    return siphash24(key, 0, src, src_sz);
419
0
}
420
421
422
#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
423
static Py_hash_t
424
216k
pysiphash(const void *src, Py_ssize_t src_sz) {
425
216k
    return (Py_hash_t)siphash24(
426
216k
        _le64toh(_Py_HashSecret.siphash.k0), _le64toh(_Py_HashSecret.siphash.k1),
427
216k
        src, src_sz);
428
216k
}
429
430
static PyHash_FuncDef PyHash_Func = {pysiphash, "siphash24", 64, 128};
431
#endif
432
433
#ifdef __cplusplus
434
}
435
#endif