Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Python/bootstrap_hash.c
Line
Count
Source (jump to first uncovered line)
1
#include "Python.h"
2
#include "pycore_fileutils.h"     // _Py_fstat_noraise()
3
#include "pycore_initconfig.h"
4
#include "pycore_pyhash.h"        // _Py_HashSecret_t
5
#include "pycore_pylifecycle.h"   // _PyOS_URandomNonblock()
6
#include "pycore_runtime.h"       // _PyRuntime
7
8
#ifdef HAVE_UNISTD_H
9
#  include <unistd.h>             // close()
10
#endif
11
#ifdef MS_WINDOWS
12
#  include <windows.h>
13
#  include <bcrypt.h>
14
#else
15
#  include <fcntl.h>              // O_RDONLY
16
#  ifdef HAVE_SYS_STAT_H
17
#    include <sys/stat.h>
18
#  endif
19
#  ifdef HAVE_LINUX_RANDOM_H
20
#    include <linux/random.h>     // GRND_NONBLOCK
21
#  endif
22
#  if defined(HAVE_SYS_RANDOM_H) && (defined(HAVE_GETRANDOM) || defined(HAVE_GETENTROPY))
23
#    include <sys/random.h>       // getrandom()
24
#  endif
25
#  if !defined(HAVE_GETRANDOM) && defined(HAVE_GETRANDOM_SYSCALL)
26
#    include <sys/syscall.h>      // SYS_getrandom
27
#  endif
28
#endif
29
30
#ifdef _Py_MEMORY_SANITIZER
31
#  include <sanitizer/msan_interface.h>
32
#endif
33
34
#if defined(__APPLE__) && defined(__has_builtin)
35
#  if __has_builtin(__builtin_available)
36
#    define HAVE_GETENTRYPY_GETRANDOM_RUNTIME __builtin_available(macOS 10.12, iOS 10.10, tvOS 10.0, watchOS 3.0, *)
37
#  endif
38
#endif
39
#ifndef HAVE_GETENTRYPY_GETRANDOM_RUNTIME
40
18
#  define HAVE_GETENTRYPY_GETRANDOM_RUNTIME 1
41
#endif
42
43
44
#ifdef Py_DEBUG
45
int _Py_HashSecret_Initialized = 0;
46
#else
47
static int _Py_HashSecret_Initialized = 0;
48
#endif
49
50
#ifdef MS_WINDOWS
51
52
/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen
53
   API. Return 0 on success, or raise an exception and return -1 on error. */
54
static int
55
win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise)
56
{
57
    while (size > 0)
58
    {
59
        DWORD chunk = (DWORD)Py_MIN(size, PY_DWORD_MAX);
60
        NTSTATUS status = BCryptGenRandom(NULL, buffer, chunk, BCRYPT_USE_SYSTEM_PREFERRED_RNG);
61
        if (!BCRYPT_SUCCESS(status)) {
62
            /* BCryptGenRandom() failed */
63
            if (raise) {
64
                PyErr_SetFromWindowsErr(0);
65
            }
66
            return -1;
67
        }
68
        buffer += chunk;
69
        size -= chunk;
70
    }
71
    return 0;
72
}
73
74
#else /* !MS_WINDOWS */
75
76
#if defined(HAVE_GETRANDOM) || defined(HAVE_GETRANDOM_SYSCALL)
77
#define PY_GETRANDOM 1
78
79
/* Call getrandom() to get random bytes:
80
81
   - Return 1 on success
82
   - Return 0 if getrandom() is not available (failed with ENOSYS or EPERM),
83
     or if getrandom(GRND_NONBLOCK) failed with EAGAIN (system urandom not
84
     initialized yet) and raise=0.
85
   - Raise an exception (if raise is non-zero) and return -1 on error:
86
     if getrandom() failed with EINTR, raise is non-zero and the Python signal
87
     handler raised an exception, or if getrandom() failed with a different
88
     error.
89
90
   getrandom() is retried if it failed with EINTR: interrupted by a signal. */
91
static int
92
py_getrandom(void *buffer, Py_ssize_t size, int blocking, int raise)
93
18
{
94
    /* Is getrandom() supported by the running kernel? Set to 0 if getrandom()
95
       failed with ENOSYS or EPERM. Need Linux kernel 3.17 or newer, or Solaris
96
       11.3 or newer */
97
18
    static int getrandom_works = 1;
98
18
    int flags;
99
18
    char *dest;
100
18
    long n;
101
102
18
    if (!getrandom_works) {
103
0
        return 0;
104
0
    }
105
106
18
    flags = blocking ? 0 : GRND_NONBLOCK;
107
18
    dest = buffer;
108
36
    while (0 < size) {
109
#if defined(__sun) && defined(__SVR4)
110
        /* Issue #26735: On Solaris, getrandom() is limited to returning up
111
           to 1024 bytes. Call it multiple times if more bytes are
112
           requested. */
113
        n = Py_MIN(size, 1024);
114
#else
115
18
        n = Py_MIN(size, LONG_MAX);
116
18
#endif
117
118
18
        errno = 0;
119
18
#ifdef HAVE_GETRANDOM
120
18
        if (raise) {
121
2
            Py_BEGIN_ALLOW_THREADS
122
2
            n = getrandom(dest, n, flags);
123
2
            Py_END_ALLOW_THREADS
124
2
        }
125
16
        else {
126
16
            n = getrandom(dest, n, flags);
127
16
        }
128
#else
129
        /* On Linux, use the syscall() function because the GNU libc doesn't
130
           expose the Linux getrandom() syscall yet. See:
131
           https://sourceware.org/bugzilla/show_bug.cgi?id=17252 */
132
        if (raise) {
133
            Py_BEGIN_ALLOW_THREADS
134
            n = syscall(SYS_getrandom, dest, n, flags);
135
            Py_END_ALLOW_THREADS
136
        }
137
        else {
138
            n = syscall(SYS_getrandom, dest, n, flags);
139
        }
140
#  ifdef _Py_MEMORY_SANITIZER
141
        if (n > 0) {
142
             __msan_unpoison(dest, n);
143
        }
144
#  endif
145
#endif
146
147
18
        if (n < 0) {
148
            /* ENOSYS: the syscall is not supported by the kernel.
149
               EPERM: the syscall is blocked by a security policy (ex: SECCOMP)
150
               or something else. */
151
0
            if (errno == ENOSYS || errno == EPERM) {
152
0
                getrandom_works = 0;
153
0
                return 0;
154
0
            }
155
156
            /* getrandom(GRND_NONBLOCK) fails with EAGAIN if the system urandom
157
               is not initialized yet. For _PyRandom_Init(), we ignore the
158
               error and fall back on reading /dev/urandom which never blocks,
159
               even if the system urandom is not initialized yet:
160
               see the PEP 524. */
161
0
            if (errno == EAGAIN && !raise && !blocking) {
162
0
                return 0;
163
0
            }
164
165
0
            if (errno == EINTR) {
166
0
                if (raise) {
167
0
                    if (PyErr_CheckSignals()) {
168
0
                        return -1;
169
0
                    }
170
0
                }
171
172
                /* retry getrandom() if it was interrupted by a signal */
173
0
                continue;
174
0
            }
175
176
0
            if (raise) {
177
0
                PyErr_SetFromErrno(PyExc_OSError);
178
0
            }
179
0
            return -1;
180
0
        }
181
182
18
        dest += n;
183
18
        size -= n;
184
18
    }
185
18
    return 1;
186
18
}
187
188
#elif defined(HAVE_GETENTROPY)
189
#define PY_GETENTROPY 1
190
191
/* Fill buffer with size pseudo-random bytes generated by getentropy():
192
193
   - Return 1 on success
194
   - Return 0 if getentropy() syscall is not available (failed with ENOSYS or
195
     EPERM).
196
   - Raise an exception (if raise is non-zero) and return -1 on error:
197
     if getentropy() failed with EINTR, raise is non-zero and the Python signal
198
     handler raised an exception, or if getentropy() failed with a different
199
     error.
200
201
   getentropy() is retried if it failed with EINTR: interrupted by a signal. */
202
203
#if defined(__APPLE__) && _Py__has_attribute(availability)
204
static int
205
py_getentropy(char *buffer, Py_ssize_t size, int raise)
206
        __attribute__((availability(macos,introduced=10.12)))
207
        __attribute__((availability(ios,introduced=10.0)))
208
        __attribute__((availability(tvos,introduced=10.0)))
209
        __attribute__((availability(watchos,introduced=3.0)));
210
#endif
211
212
static int
213
py_getentropy(char *buffer, Py_ssize_t size, int raise)
214
{
215
    /* Is getentropy() supported by the running kernel? Set to 0 if
216
       getentropy() failed with ENOSYS or EPERM. */
217
    static int getentropy_works = 1;
218
219
    if (!getentropy_works) {
220
        return 0;
221
    }
222
223
    while (size > 0) {
224
        /* getentropy() is limited to returning up to 256 bytes. Call it
225
           multiple times if more bytes are requested. */
226
        Py_ssize_t len = Py_MIN(size, 256);
227
        int res;
228
229
        if (raise) {
230
            Py_BEGIN_ALLOW_THREADS
231
            res = getentropy(buffer, len);
232
            Py_END_ALLOW_THREADS
233
        }
234
        else {
235
            res = getentropy(buffer, len);
236
        }
237
238
        if (res < 0) {
239
            /* ENOSYS: the syscall is not supported by the running kernel.
240
               EPERM: the syscall is blocked by a security policy (ex: SECCOMP)
241
               or something else. */
242
            if (errno == ENOSYS || errno == EPERM) {
243
                getentropy_works = 0;
244
                return 0;
245
            }
246
247
            if (errno == EINTR) {
248
                if (raise) {
249
                    if (PyErr_CheckSignals()) {
250
                        return -1;
251
                    }
252
                }
253
254
                /* retry getentropy() if it was interrupted by a signal */
255
                continue;
256
            }
257
258
            if (raise) {
259
                PyErr_SetFromErrno(PyExc_OSError);
260
            }
261
            return -1;
262
        }
263
264
        buffer += len;
265
        size -= len;
266
    }
267
    return 1;
268
}
269
#endif /* defined(HAVE_GETENTROPY) && !(defined(__sun) && defined(__SVR4)) */
270
271
272
0
#define urandom_cache (_PyRuntime.pyhash_state.urandom_cache)
273
274
/* Read random bytes from the /dev/urandom device:
275
276
   - Return 0 on success
277
   - Raise an exception (if raise is non-zero) and return -1 on error
278
279
   Possible causes of errors:
280
281
   - open() failed with ENOENT, ENXIO, ENODEV, EACCES: the /dev/urandom device
282
     was not found. For example, it was removed manually or not exposed in a
283
     chroot or container.
284
   - open() failed with a different error
285
   - fstat() failed
286
   - read() failed or returned 0
287
288
   read() is retried if it failed with EINTR: interrupted by a signal.
289
290
   The file descriptor of the device is kept open between calls to avoid using
291
   many file descriptors when run in parallel from multiple threads:
292
   see the issue #18756.
293
294
   st_dev and st_ino fields of the file descriptor (from fstat()) are cached to
295
   check if the file descriptor was replaced by a different file (which is
296
   likely a bug in the application): see the issue #21207.
297
298
   If the file descriptor was closed or replaced, open a new file descriptor
299
   but don't close the old file descriptor: it probably points to something
300
   important for some third-party code. */
301
static int
302
dev_urandom(char *buffer, Py_ssize_t size, int raise)
303
0
{
304
0
    int fd;
305
0
    Py_ssize_t n;
306
307
0
    if (raise) {
308
0
        struct _Py_stat_struct st;
309
0
        int fstat_result;
310
311
0
        if (urandom_cache.fd >= 0) {
312
0
            Py_BEGIN_ALLOW_THREADS
313
0
            fstat_result = _Py_fstat_noraise(urandom_cache.fd, &st);
314
0
            Py_END_ALLOW_THREADS
315
316
            /* Does the fd point to the same thing as before? (issue #21207) */
317
0
            if (fstat_result
318
0
                || st.st_dev != urandom_cache.st_dev
319
0
                || st.st_ino != urandom_cache.st_ino) {
320
                /* Something changed: forget the cached fd (but don't close it,
321
                   since it probably points to something important for some
322
                   third-party code). */
323
0
                urandom_cache.fd = -1;
324
0
            }
325
0
        }
326
0
        if (urandom_cache.fd >= 0)
327
0
            fd = urandom_cache.fd;
328
0
        else {
329
0
            fd = _Py_open("/dev/urandom", O_RDONLY);
330
0
            if (fd < 0) {
331
0
                if (errno == ENOENT || errno == ENXIO ||
332
0
                    errno == ENODEV || errno == EACCES) {
333
0
                    PyErr_SetString(PyExc_NotImplementedError,
334
0
                                    "/dev/urandom (or equivalent) not found");
335
0
                }
336
                /* otherwise, keep the OSError exception raised by _Py_open() */
337
0
                return -1;
338
0
            }
339
0
            if (urandom_cache.fd >= 0) {
340
                /* urandom_fd was initialized by another thread while we were
341
                   not holding the GIL, keep it. */
342
0
                close(fd);
343
0
                fd = urandom_cache.fd;
344
0
            }
345
0
            else {
346
0
                if (_Py_fstat(fd, &st)) {
347
0
                    close(fd);
348
0
                    return -1;
349
0
                }
350
0
                else {
351
0
                    urandom_cache.fd = fd;
352
0
                    urandom_cache.st_dev = st.st_dev;
353
0
                    urandom_cache.st_ino = st.st_ino;
354
0
                }
355
0
            }
356
0
        }
357
358
0
        do {
359
0
            n = _Py_read(fd, buffer, (size_t)size);
360
0
            if (n == -1)
361
0
                return -1;
362
0
            if (n == 0) {
363
0
                PyErr_Format(PyExc_RuntimeError,
364
0
                        "Failed to read %zi bytes from /dev/urandom",
365
0
                        size);
366
0
                return -1;
367
0
            }
368
369
0
            buffer += n;
370
0
            size -= n;
371
0
        } while (0 < size);
372
0
    }
373
0
    else {
374
0
        fd = _Py_open_noraise("/dev/urandom", O_RDONLY);
375
0
        if (fd < 0) {
376
0
            return -1;
377
0
        }
378
379
0
        while (0 < size)
380
0
        {
381
0
            do {
382
0
                n = read(fd, buffer, (size_t)size);
383
0
            } while (n < 0 && errno == EINTR);
384
385
0
            if (n <= 0) {
386
                /* stop on error or if read(size) returned 0 */
387
0
                close(fd);
388
0
                return -1;
389
0
            }
390
391
0
            buffer += n;
392
0
            size -= n;
393
0
        }
394
0
        close(fd);
395
0
    }
396
0
    return 0;
397
0
}
398
399
static void
400
dev_urandom_close(void)
401
0
{
402
0
    if (urandom_cache.fd >= 0) {
403
0
        close(urandom_cache.fd);
404
0
        urandom_cache.fd = -1;
405
0
    }
406
0
}
407
408
#undef urandom_cache
409
410
#endif /* !MS_WINDOWS */
411
412
413
/* Fill buffer with pseudo-random bytes generated by a linear congruent
414
   generator (LCG):
415
416
       x(n+1) = (x(n) * 214013 + 2531011) % 2^32
417
418
   Use bits 23..16 of x(n) to generate a byte. */
419
static void
420
lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size)
421
0
{
422
0
    size_t index;
423
0
    unsigned int x;
424
425
0
    x = x0;
426
0
    for (index=0; index < size; index++) {
427
0
        x *= 214013;
428
0
        x += 2531011;
429
        /* modulo 2 ^ (8 * sizeof(int)) */
430
0
        buffer[index] = (x >> 16) & 0xff;
431
0
    }
432
0
}
433
434
/* Read random bytes:
435
436
   - Return 0 on success
437
   - Raise an exception (if raise is non-zero) and return -1 on error
438
439
   Used sources of entropy ordered by preference, preferred source first:
440
441
   - BCryptGenRandom() on Windows
442
   - getrandom() function (ex: Linux and Solaris): call py_getrandom()
443
   - getentropy() function (ex: OpenBSD): call py_getentropy()
444
   - /dev/urandom device
445
446
   Read from the /dev/urandom device if getrandom() or getentropy() function
447
   is not available or does not work.
448
449
   Prefer getrandom() over getentropy() because getrandom() supports blocking
450
   and non-blocking mode: see the PEP 524. Python requires non-blocking RNG at
451
   startup to initialize its hash secret, but os.urandom() must block until the
452
   system urandom is initialized (at least on Linux 3.17 and newer).
453
454
   Prefer getrandom() and getentropy() over reading directly /dev/urandom
455
   because these functions don't need file descriptors and so avoid ENFILE or
456
   EMFILE errors (too many open files): see the issue #18756.
457
458
   Only the getrandom() function supports non-blocking mode.
459
460
   Only use RNG running in the kernel. They are more secure because it is
461
   harder to get the internal state of a RNG running in the kernel land than a
462
   RNG running in the user land. The kernel has a direct access to the hardware
463
   and has access to hardware RNG, they are used as entropy sources.
464
465
   Note: the OpenSSL RAND_pseudo_bytes() function does not automatically reseed
466
   its RNG on fork(), two child processes (with the same pid) generate the same
467
   random numbers: see issue #18747. Kernel RNGs don't have this issue,
468
   they have access to good quality entropy sources.
469
470
   If raise is zero:
471
472
   - Don't raise an exception on error
473
   - Don't call the Python signal handler (don't call PyErr_CheckSignals()) if
474
     a function fails with EINTR: retry directly the interrupted function
475
   - Don't release the GIL to call functions.
476
*/
477
static int
478
pyurandom(void *buffer, Py_ssize_t size, int blocking, int raise)
479
18
{
480
18
#if defined(PY_GETRANDOM) || defined(PY_GETENTROPY)
481
18
    int res;
482
18
#endif
483
484
18
    if (size < 0) {
485
0
        if (raise) {
486
0
            PyErr_Format(PyExc_ValueError,
487
0
                         "negative argument not allowed");
488
0
        }
489
0
        return -1;
490
0
    }
491
492
18
    if (size == 0) {
493
0
        return 0;
494
0
    }
495
496
#ifdef MS_WINDOWS
497
    return win32_urandom((unsigned char *)buffer, size, raise);
498
#else
499
500
18
#if defined(PY_GETRANDOM) || defined(PY_GETENTROPY)
501
18
    if (HAVE_GETENTRYPY_GETRANDOM_RUNTIME) {
502
18
#ifdef PY_GETRANDOM
503
18
        res = py_getrandom(buffer, size, blocking, raise);
504
#else
505
        res = py_getentropy(buffer, size, raise);
506
#endif
507
18
        if (res < 0) {
508
0
            return -1;
509
0
        }
510
18
        if (res == 1) {
511
18
            return 0;
512
18
        }
513
        /* getrandom() or getentropy() function is not available: failed with
514
           ENOSYS or EPERM. Fall back on reading from /dev/urandom. */
515
18
        } /* end of availability block */
516
0
#endif
517
518
0
    return dev_urandom(buffer, size, raise);
519
18
#endif
520
18
}
521
522
/* Fill buffer with size pseudo-random bytes from the operating system random
523
   number generator (RNG). It is suitable for most cryptographic purposes
524
   except long living private keys for asymmetric encryption.
525
526
   On Linux 3.17 and newer, the getrandom() syscall is used in blocking mode:
527
   block until the system urandom entropy pool is initialized (128 bits are
528
   collected by the kernel).
529
530
   Return 0 on success. Raise an exception and return -1 on error. */
531
int
532
_PyOS_URandom(void *buffer, Py_ssize_t size)
533
0
{
534
0
    return pyurandom(buffer, size, 1, 1);
535
0
}
536
537
/* Fill buffer with size pseudo-random bytes from the operating system random
538
   number generator (RNG). It is not suitable for cryptographic purpose.
539
540
   On Linux 3.17 and newer (when getrandom() syscall is used), if the system
541
   urandom is not initialized yet, the function returns "weak" entropy read
542
   from /dev/urandom.
543
544
   Return 0 on success. Raise an exception and return -1 on error. */
545
int
546
_PyOS_URandomNonblock(void *buffer, Py_ssize_t size)
547
2
{
548
2
    return pyurandom(buffer, size, 0, 1);
549
2
}
550
551
552
PyStatus
553
_Py_HashRandomization_Init(const PyConfig *config)
554
16
{
555
16
    void *secret = &_Py_HashSecret;
556
16
    Py_ssize_t secret_size = sizeof(_Py_HashSecret_t);
557
558
16
    if (_Py_HashSecret_Initialized) {
559
0
        return _PyStatus_OK();
560
0
    }
561
16
    _Py_HashSecret_Initialized = 1;
562
563
16
    if (config->use_hash_seed) {
564
0
        if (config->hash_seed == 0) {
565
            /* disable the randomized hash */
566
0
            memset(secret, 0, secret_size);
567
0
        }
568
0
        else {
569
            /* use the specified hash seed */
570
0
            lcg_urandom(config->hash_seed, secret, secret_size);
571
0
        }
572
0
    }
573
16
    else {
574
        /* use a random hash seed */
575
16
        int res;
576
577
        /* _PyRandom_Init() is called very early in the Python initialization
578
           and so exceptions cannot be used (use raise=0).
579
580
           _PyRandom_Init() must not block Python initialization: call
581
           pyurandom() is non-blocking mode (blocking=0): see the PEP 524. */
582
16
        res = pyurandom(secret, secret_size, 0, 0);
583
16
        if (res < 0) {
584
0
            return _PyStatus_ERR("failed to get random numbers "
585
0
                                 "to initialize Python");
586
0
        }
587
16
    }
588
16
    return _PyStatus_OK();
589
16
}
590
591
592
void
593
_Py_HashRandomization_Fini(void)
594
0
{
595
0
#ifndef MS_WINDOWS
596
0
    dev_urandom_close();
597
0
#endif
598
0
}