Coverage Report

Created: 2026-05-16 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/obmalloc.c
Line
Count
Source
1
/* Python's malloc wrappers (see pymem.h) */
2
3
#include "Python.h"
4
#include "pycore_interp.h"        // _PyInterpreterState_HasFeature
5
#include "pycore_mmap.h"          // _PyAnnotateMemoryMap()
6
#include "pycore_object.h"        // _PyDebugAllocatorStats() definition
7
#include "pycore_obmalloc.h"
8
#include "pycore_obmalloc_init.h"
9
#include "pycore_pyerrors.h"      // _Py_FatalErrorFormat()
10
#include "pycore_pymem.h"
11
#include "pycore_pystate.h"       // _PyInterpreterState_GET
12
#include "pycore_stats.h"         // OBJECT_STAT_INC_COND()
13
14
#include <stdlib.h>               // malloc()
15
#include <stdbool.h>
16
#include <stdio.h>                // fopen(), fgets(), sscanf()
17
#include <errno.h>                // errno
18
#ifdef WITH_MIMALLOC
19
// Forward declarations of functions used in our mimalloc modifications
20
static void _PyMem_mi_page_clear_qsbr(mi_page_t *page);
21
static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page);
22
static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force);
23
static void _PyMem_mi_page_reclaimed(mi_page_t *page);
24
static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap);
25
#  include "pycore_mimalloc.h"
26
#  include "mimalloc/static.c"
27
#  include "mimalloc/internal.h"  // for stats
28
#endif
29
30
#if defined(Py_GIL_DISABLED) && !defined(WITH_MIMALLOC)
31
#  error "Py_GIL_DISABLED requires WITH_MIMALLOC"
32
#endif
33
34
#undef  uint
35
1.97G
#define uint pymem_uint
36
37
38
/* Defined in tracemalloc.c */
39
extern void _PyMem_DumpTraceback(int fd, const void *ptr);
40
41
static void _PyObject_DebugDumpAddress(const void *p);
42
static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
43
44
45
static void set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain);
46
static void set_up_debug_hooks_unlocked(void);
47
static void get_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
48
static void set_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
49
50
51
/***************************************/
52
/* low-level allocator implementations */
53
/***************************************/
54
55
/* the default raw allocator (wraps malloc) */
56
57
void *
58
_PyMem_RawMalloc(void *Py_UNUSED(ctx), size_t size)
59
231M
{
60
    /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
61
       for malloc(0), which would be treated as an error. Some platforms would
62
       return a pointer with no memory behind it, which would break pymalloc.
63
       To solve these problems, allocate an extra byte. */
64
231M
    if (size == 0)
65
38.9M
        size = 1;
66
231M
    return malloc(size);
67
231M
}
68
69
void *
70
_PyMem_RawCalloc(void *Py_UNUSED(ctx), size_t nelem, size_t elsize)
71
151k
{
72
    /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
73
       for calloc(0, 0), which would be treated as an error. Some platforms
74
       would return a pointer with no memory behind it, which would break
75
       pymalloc.  To solve these problems, allocate an extra byte. */
76
151k
    if (nelem == 0 || elsize == 0) {
77
671
        nelem = 1;
78
671
        elsize = 1;
79
671
    }
80
151k
    return calloc(nelem, elsize);
81
151k
}
82
83
void *
84
_PyMem_RawRealloc(void *Py_UNUSED(ctx), void *ptr, size_t size)
85
8.97M
{
86
8.97M
    if (size == 0)
87
0
        size = 1;
88
8.97M
    return realloc(ptr, size);
89
8.97M
}
90
91
void
92
_PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
93
231M
{
94
231M
    free(ptr);
95
231M
}
96
97
#ifdef WITH_MIMALLOC
98
99
static void
100
_PyMem_mi_page_clear_qsbr(mi_page_t *page)
101
0
{
102
#ifdef Py_GIL_DISABLED
103
    // Clear the QSBR goal and remove the page from the QSBR linked list.
104
    page->qsbr_goal = 0;
105
    if (page->qsbr_node.next != NULL) {
106
        llist_remove(&page->qsbr_node);
107
    }
108
#endif
109
0
}
110
111
// Check if an empty, newly reclaimed page is safe to free now.
112
static bool
113
_PyMem_mi_page_is_safe_to_free(mi_page_t *page)
114
0
{
115
0
    assert(mi_page_all_free(page));
116
#ifdef Py_GIL_DISABLED
117
    assert(page->qsbr_node.next == NULL);
118
    if (page->use_qsbr && page->qsbr_goal != 0) {
119
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
120
        if (tstate == NULL) {
121
            return false;
122
        }
123
        return _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal);
124
    }
125
#endif
126
0
    return true;
127
128
0
}
129
130
#ifdef Py_GIL_DISABLED
131
132
// If we are deferring collection of more than this amount of memory for
133
// mimalloc pages, advance the write sequence.  Advancing allows these
134
// pages to be re-used in a different thread or for a different size class.
135
#define QSBR_PAGE_MEM_LIMIT 4096*20
136
137
// Return true if the global write sequence should be advanced for a mimalloc
138
// page that is deferred from collection.
139
static bool
140
should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page)
141
{
142
    size_t bsize = mi_page_block_size(page);
143
    size_t page_size = page->capacity*bsize;
144
    if (page_size > QSBR_PAGE_MEM_LIMIT) {
145
        qsbr->deferred_page_memory = 0;
146
        return true;
147
    }
148
    qsbr->deferred_page_memory += page_size;
149
    if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) {
150
        qsbr->deferred_page_memory = 0;
151
        return true;
152
    }
153
    return false;
154
}
155
156
static _PyThreadStateImpl *
157
tstate_from_heap(mi_heap_t *heap)
158
{
159
    return _Py_CONTAINER_OF(heap->tld, _PyThreadStateImpl, mimalloc.tld);
160
}
161
#endif
162
163
static bool
164
_PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
165
0
{
166
#ifdef Py_GIL_DISABLED
167
    assert(mi_page_all_free(page));
168
    if (page->use_qsbr) {
169
        struct _qsbr_thread_state *qsbr = ((_PyThreadStateImpl *)PyThreadState_GET())->qsbr;
170
        if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(qsbr, page->qsbr_goal)) {
171
            _PyMem_mi_page_clear_qsbr(page);
172
            _mi_page_free(page, pq, force);
173
            return true;
174
        }
175
176
        // gh-145615: since we are not freeing this page yet, we want to
177
        // make it available for allocations. Note that the QSBR goal and
178
        // linked list node remain set even if the page is later used for
179
        // an allocation. We only detect and clear the QSBR goal when the
180
        // page becomes empty again (used == 0).
181
        if (mi_page_is_in_full(page)) {
182
            _mi_page_unfull(page);
183
        }
184
185
        _PyMem_mi_page_clear_qsbr(page);
186
        page->retire_expire = 0;
187
188
        if (should_advance_qsbr_for_page(qsbr, page)) {
189
            page->qsbr_goal = _Py_qsbr_advance(qsbr->shared);
190
        }
191
        else {
192
            page->qsbr_goal = _Py_qsbr_shared_next(qsbr->shared);
193
        }
194
195
        // We may be freeing a page belonging to a different thread during a
196
        // stop-the-world event. Find the _PyThreadStateImpl for the page.
197
        _PyThreadStateImpl *tstate = tstate_from_heap(mi_page_heap(page));
198
        llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
199
        return false;
200
    }
201
#endif
202
0
    _mi_page_free(page, pq, force);
203
0
    return true;
204
0
}
205
206
static void
207
_PyMem_mi_page_reclaimed(mi_page_t *page)
208
0
{
209
#ifdef Py_GIL_DISABLED
210
    assert(page->qsbr_node.next == NULL);
211
    if (page->qsbr_goal != 0) {
212
        if (mi_page_all_free(page)) {
213
            assert(page->qsbr_node.next == NULL);
214
            _PyThreadStateImpl *tstate = tstate_from_heap(mi_page_heap(page));
215
            assert(tstate == (_PyThreadStateImpl *)_PyThreadState_GET());
216
            page->retire_expire = 0;
217
            llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
218
        }
219
        else {
220
            page->qsbr_goal = 0;
221
        }
222
    }
223
#endif
224
0
}
225
226
static void
227
_PyMem_mi_heap_collect_qsbr(mi_heap_t *heap)
228
0
{
229
#ifdef Py_GIL_DISABLED
230
    if (!heap->page_use_qsbr) {
231
        return;
232
    }
233
234
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
235
    struct llist_node *head = &tstate->mimalloc.page_list;
236
    if (llist_empty(head)) {
237
        return;
238
    }
239
240
    struct llist_node *node;
241
    llist_for_each_safe(node, head) {
242
        mi_page_t *page = llist_data(node, mi_page_t, qsbr_node);
243
        if (!mi_page_all_free(page)) {
244
            // We allocated from this page some point after the delayed free
245
            _PyMem_mi_page_clear_qsbr(page);
246
            continue;
247
        }
248
249
        if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) {
250
            return;
251
        }
252
253
        _PyMem_mi_page_clear_qsbr(page);
254
        _mi_page_free(page, mi_page_queue_of(page), false);
255
    }
256
#endif
257
0
}
258
259
void *
260
_PyMem_MiMalloc(void *ctx, size_t size)
261
0
{
262
#ifdef Py_GIL_DISABLED
263
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
264
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
265
    return mi_heap_malloc(heap, size);
266
#else
267
0
    return mi_malloc(size);
268
0
#endif
269
0
}
270
271
void *
272
_PyMem_MiCalloc(void *ctx, size_t nelem, size_t elsize)
273
0
{
274
#ifdef Py_GIL_DISABLED
275
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
276
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
277
    return mi_heap_calloc(heap, nelem, elsize);
278
#else
279
0
    return mi_calloc(nelem, elsize);
280
0
#endif
281
0
}
282
283
void *
284
_PyMem_MiRealloc(void *ctx, void *ptr, size_t size)
285
0
{
286
#ifdef Py_GIL_DISABLED
287
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
288
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
289
    return mi_heap_realloc(heap, ptr, size);
290
#else
291
0
    return mi_realloc(ptr, size);
292
0
#endif
293
0
}
294
295
void
296
_PyMem_MiFree(void *ctx, void *ptr)
297
0
{
298
0
    mi_free(ptr);
299
0
}
300
301
void *
302
_PyObject_MiMalloc(void *ctx, size_t nbytes)
303
0
{
304
#ifdef Py_GIL_DISABLED
305
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
306
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
307
    return mi_heap_malloc(heap, nbytes);
308
#else
309
0
    return mi_malloc(nbytes);
310
0
#endif
311
0
}
312
313
void *
314
_PyObject_MiCalloc(void *ctx, size_t nelem, size_t elsize)
315
0
{
316
#ifdef Py_GIL_DISABLED
317
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
318
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
319
    return mi_heap_calloc(heap, nelem, elsize);
320
#else
321
0
    return mi_calloc(nelem, elsize);
322
0
#endif
323
0
}
324
325
326
void *
327
_PyObject_MiRealloc(void *ctx, void *ptr, size_t nbytes)
328
0
{
329
#ifdef Py_GIL_DISABLED
330
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
331
    // Implement our own realloc logic so that we can copy PyObject header
332
    // in a thread-safe way.
333
    size_t size = mi_usable_size(ptr);
334
    if (nbytes <= size && nbytes >= (size / 2) && nbytes > 0) {
335
        return ptr;
336
    }
337
338
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
339
    void* newp = mi_heap_malloc(heap, nbytes);
340
    if (newp == NULL) {
341
        return NULL;
342
    }
343
344
    // Free threaded Python allows access from other threads to the PyObject reference count
345
    // fields for a period of time after the object is freed (see InternalDocs/qsbr.md).
346
    // These fields are typically initialized by PyObject_Init() using relaxed
347
    // atomic stores. We need to copy these fields in a thread-safe way here.
348
    // We use the "debug_offset" to determine how many bytes to copy -- it
349
    // includes the PyObject header and plus any extra pre-headers.
350
    size_t offset = heap->debug_offset;
351
    assert(offset % sizeof(void*) == 0);
352
353
    size_t copy_size = (size < nbytes ? size : nbytes);
354
    if (copy_size >= offset) {
355
        for (size_t i = 0; i != offset; i += sizeof(void*)) {
356
            // Use memcpy to avoid strict-aliasing issues. However, we probably
357
            // still have unavoidable strict-aliasing issues with
358
            // _Py_atomic_store_ptr_relaxed here.
359
            void *word;
360
            memcpy(&word, (char*)ptr + i, sizeof(void*));
361
            _Py_atomic_store_ptr_relaxed((void**)((char*)newp + i), word);
362
        }
363
        _mi_memcpy((char*)newp + offset, (char*)ptr + offset, copy_size - offset);
364
    }
365
    else {
366
        _mi_memcpy(newp, ptr, copy_size);
367
    }
368
    mi_free(ptr);
369
    return newp;
370
#else
371
0
    return mi_realloc(ptr, nbytes);
372
0
#endif
373
0
}
374
375
void
376
_PyObject_MiFree(void *ctx, void *ptr)
377
0
{
378
0
    mi_free(ptr);
379
0
}
380
381
void *
382
_PyMem_MiRawMalloc(void *ctx, size_t size)
383
0
{
384
0
    return mi_malloc(size);
385
0
}
386
387
void *
388
_PyMem_MiRawCalloc(void *ctx, size_t nelem, size_t elsize)
389
0
{
390
0
    return mi_calloc(nelem, elsize);
391
0
}
392
393
void *
394
_PyMem_MiRawRealloc(void *ctx, void *ptr, size_t size)
395
0
{
396
0
    return mi_realloc(ptr, size);
397
0
}
398
399
void
400
_PyMem_MiRawFree(void *ctx, void *ptr)
401
0
{
402
0
    mi_free(ptr);
403
0
}
404
#endif // WITH_MIMALLOC
405
406
407
0
#define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
408
409
410
#ifdef WITH_MIMALLOC
411
0
#  define MIMALLOC_ALLOC    {NULL, _PyMem_MiMalloc, _PyMem_MiCalloc, _PyMem_MiRealloc, _PyMem_MiFree}
412
0
#  define MIMALLOC_RAWALLOC {NULL, _PyMem_MiRawMalloc, _PyMem_MiRawCalloc, _PyMem_MiRawRealloc, _PyMem_MiRawFree}
413
0
#  define MIMALLOC_OBJALLOC {NULL, _PyObject_MiMalloc, _PyObject_MiCalloc, _PyObject_MiRealloc, _PyObject_MiFree}
414
#endif
415
416
/* the pymalloc allocator */
417
418
// The actual implementation is further down.
419
420
#if defined(WITH_PYMALLOC)
421
void* _PyObject_Malloc(void *ctx, size_t size);
422
void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
423
void _PyObject_Free(void *ctx, void *p);
424
void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
425
0
#  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
426
#endif  // WITH_PYMALLOC
427
428
#if defined(Py_GIL_DISABLED)
429
// Py_GIL_DISABLED requires using mimalloc for "mem" and "obj" domains.
430
#  define PYRAW_ALLOC MIMALLOC_RAWALLOC
431
#  define PYMEM_ALLOC MIMALLOC_ALLOC
432
#  define PYOBJ_ALLOC MIMALLOC_OBJALLOC
433
#elif defined(WITH_PYMALLOC)
434
0
#  define PYRAW_ALLOC MALLOC_ALLOC
435
0
#  define PYMEM_ALLOC PYMALLOC_ALLOC
436
0
#  define PYOBJ_ALLOC PYMALLOC_ALLOC
437
#else
438
#  define PYRAW_ALLOC MALLOC_ALLOC
439
#  define PYMEM_ALLOC MALLOC_ALLOC
440
#  define PYOBJ_ALLOC MALLOC_ALLOC
441
#endif
442
443
444
/* the default debug allocators */
445
446
// The actual implementation is further down.
447
448
void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
449
void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
450
void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
451
void _PyMem_DebugRawFree(void *ctx, void *ptr);
452
453
void* _PyMem_DebugMalloc(void *ctx, size_t size);
454
void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
455
void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
456
void _PyMem_DebugFree(void *ctx, void *p);
457
458
#define PYDBGRAW_ALLOC \
459
0
    {&_PyRuntime.allocators.debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
460
#define PYDBGMEM_ALLOC \
461
0
    {&_PyRuntime.allocators.debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
462
#define PYDBGOBJ_ALLOC \
463
0
    {&_PyRuntime.allocators.debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
464
465
/* default raw allocator (not swappable) */
466
467
void *
468
_PyMem_DefaultRawMalloc(size_t size)
469
296
{
470
#ifdef Py_DEBUG
471
    return _PyMem_DebugRawMalloc(&_PyRuntime.allocators.debug.raw, size);
472
#else
473
296
    return _PyMem_RawMalloc(NULL, size);
474
296
#endif
475
296
}
476
477
void *
478
_PyMem_DefaultRawCalloc(size_t nelem, size_t elsize)
479
0
{
480
#ifdef Py_DEBUG
481
    return _PyMem_DebugRawCalloc(&_PyRuntime.allocators.debug.raw, nelem, elsize);
482
#else
483
0
    return _PyMem_RawCalloc(NULL, nelem, elsize);
484
0
#endif
485
0
}
486
487
void *
488
_PyMem_DefaultRawRealloc(void *ptr, size_t size)
489
0
{
490
#ifdef Py_DEBUG
491
    return _PyMem_DebugRawRealloc(&_PyRuntime.allocators.debug.raw, ptr, size);
492
#else
493
0
    return _PyMem_RawRealloc(NULL, ptr, size);
494
0
#endif
495
0
}
496
497
void
498
_PyMem_DefaultRawFree(void *ptr)
499
333
{
500
#ifdef Py_DEBUG
501
    _PyMem_DebugRawFree(&_PyRuntime.allocators.debug.raw, ptr);
502
#else
503
333
    _PyMem_RawFree(NULL, ptr);
504
333
#endif
505
333
}
506
507
wchar_t*
508
_PyMem_DefaultRawWcsdup(const wchar_t *str)
509
222
{
510
222
    assert(str != NULL);
511
512
222
    size_t len = wcslen(str);
513
222
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
514
0
        return NULL;
515
0
    }
516
517
222
    size_t size = (len + 1) * sizeof(wchar_t);
518
222
    wchar_t *str2 = _PyMem_DefaultRawMalloc(size);
519
222
    if (str2 == NULL) {
520
0
        return NULL;
521
0
    }
522
523
222
    memcpy(str2, str, size);
524
222
    return str2;
525
222
}
526
527
/* the low-level virtual memory allocator */
528
529
#ifdef WITH_PYMALLOC
530
#  ifdef MS_WINDOWS
531
#    include <windows.h>
532
#  elif defined(HAVE_MMAP)
533
#    include <sys/mman.h>
534
#    ifdef MAP_ANONYMOUS
535
#      define ARENAS_USE_MMAP
536
#    endif
537
#  endif
538
#endif
539
540
/* Return the system's default huge page size in bytes, or 0 if it
541
 * cannot be determined.  The result is cached after the first call.
542
 *
543
 * This is Linux-only (/proc/meminfo).  On other systems that define
544
 * MAP_HUGETLB the caller should skip huge pages gracefully. */
545
#if defined(PYMALLOC_USE_HUGEPAGES) && defined(ARENAS_USE_MMAP) && defined(MAP_HUGETLB)
546
static size_t
547
_pymalloc_system_hugepage_size(void)
548
{
549
    static size_t hp_size = 0;
550
    static int initialized = 0;
551
552
    if (initialized) {
553
        return hp_size;
554
    }
555
556
#ifdef __linux__
557
    FILE *f = fopen("/proc/meminfo", "r");
558
    if (f != NULL) {
559
        char line[256];
560
        while (fgets(line, sizeof(line), f)) {
561
            unsigned long size_kb;
562
            if (sscanf(line, "Hugepagesize: %lu kB", &size_kb) == 1) {
563
                hp_size = (size_t)size_kb * 1024;
564
                break;
565
            }
566
        }
567
        fclose(f);
568
    }
569
#endif
570
571
    initialized = 1;
572
    return hp_size;
573
}
574
#endif
575
576
#if (defined(MS_WINDOWS) && defined(PYMALLOC_USE_HUGEPAGES)) || \
577
    (defined(PYMALLOC_USE_HUGEPAGES) && defined(ARENAS_USE_MMAP) && defined(MAP_HUGETLB))
578
static size_t
579
_pymalloc_round_up_to_multiple(size_t size, size_t multiple)
580
{
581
    if (multiple == 0 || size == 0) {
582
        return size;
583
    }
584
585
    size_t remainder = size % multiple;
586
    if (remainder == 0) {
587
        return size;
588
    }
589
590
    size_t padding = multiple - remainder;
591
    if (size > SIZE_MAX - padding) {
592
        return 0;
593
    }
594
    return size + padding;
595
}
596
#endif
597
598
static size_t
599
_pymalloc_virtual_alloc_size(size_t size)
600
55.2k
{
601
#if defined(MS_WINDOWS) && defined(PYMALLOC_USE_HUGEPAGES)
602
    if (_PyRuntime.allocators.use_hugepages) {
603
        SIZE_T large_page_size = GetLargePageMinimum();
604
        if (large_page_size > 0) {
605
            return _pymalloc_round_up_to_multiple(size, (size_t)large_page_size);
606
        }
607
    }
608
#elif defined(PYMALLOC_USE_HUGEPAGES) && defined(ARENAS_USE_MMAP) && defined(MAP_HUGETLB)
609
    if (_PyRuntime.allocators.use_hugepages) {
610
        size_t hp_size = _pymalloc_system_hugepage_size();
611
        if (hp_size > 0) {
612
            return _pymalloc_round_up_to_multiple(size, hp_size);
613
        }
614
    }
615
#endif
616
55.2k
    return size;
617
55.2k
}
618
619
void *
620
_PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size)
621
33.2k
{
622
#ifdef MS_WINDOWS
623
#  ifdef PYMALLOC_USE_HUGEPAGES
624
    if (_PyRuntime.allocators.use_hugepages) {
625
        SIZE_T lp_size = GetLargePageMinimum();
626
        if (lp_size > 0 && size % lp_size == 0) {
627
            void *ptr = VirtualAlloc(NULL, size,
628
                            MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES,
629
                            PAGE_READWRITE);
630
            if (ptr != NULL)
631
                return ptr;
632
        }
633
    }
634
    /* Fall back to regular pages */
635
#  endif
636
    return VirtualAlloc(NULL, size,
637
                        MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
638
#elif defined(ARENAS_USE_MMAP)
639
    void *ptr;
640
#  ifdef PYMALLOC_USE_HUGEPAGES
641
#    ifdef MAP_HUGETLB
642
    if (_PyRuntime.allocators.use_hugepages) {
643
        size_t hp_size = _pymalloc_system_hugepage_size();
644
        /* Only use huge pages if the arena size is a multiple of the
645
         * system's default huge page size.  When the arena is smaller
646
         * than the huge page, mmap still succeeds but silently
647
         * allocates an entire huge page; the subsequent munmap with
648
         * the smaller arena size then fails with EINVAL, leaking
649
         * all of that memory. */
650
        if (hp_size > 0 && size % hp_size == 0) {
651
            ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
652
                       MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0);
653
            if (ptr != MAP_FAILED) {
654
                assert(ptr != NULL);
655
                (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc:hugepage");
656
                return ptr;
657
            }
658
        }
659
    }
660
    /* Fall back to regular pages */
661
#    endif
662
#  endif
663
33.2k
    ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
664
33.2k
               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
665
33.2k
    if (ptr == MAP_FAILED)
666
0
        return NULL;
667
33.2k
    assert(ptr != NULL);
668
33.2k
#ifdef MADV_HUGEPAGE
669
33.2k
    (void)madvise(ptr, size, MADV_HUGEPAGE);
670
33.2k
#endif
671
33.2k
    (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc");
672
33.2k
    return ptr;
673
#else
674
    return malloc(size);
675
#endif
676
33.2k
}
677
678
void
679
_PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr,
680
#if defined(ARENAS_USE_MMAP)
681
    size_t size
682
#else
683
    size_t Py_UNUSED(size)
684
#endif
685
)
686
32.7k
{
687
#ifdef MS_WINDOWS
688
    /* Unlike free(), VirtualFree() does not special-case NULL to noop. */
689
    if (ptr == NULL) {
690
        return;
691
    }
692
    VirtualFree(ptr, 0, MEM_RELEASE);
693
#elif defined(ARENAS_USE_MMAP)
694
    /* Unlike free(), munmap() does not special-case NULL to noop. */
695
32.7k
    if (ptr == NULL) {
696
0
        return;
697
0
    }
698
32.7k
    if (munmap(ptr, size) < 0) {
699
0
        _Py_FatalErrorFormat(__func__,
700
0
                             "munmap(%p, %zu) failed with errno %d",
701
0
                             ptr, size, errno);
702
0
    }
703
#else
704
    free(ptr);
705
#endif
706
32.7k
}
707
708
/*******************************************/
709
/* end low-level allocator implementations */
710
/*******************************************/
711
712
713
74
#define ALLOCATORS_MUTEX (_PyRuntime.allocators.mutex)
714
945M
#define _PyMem_Raw (_PyRuntime.allocators.standard.raw)
715
2.20G
#define _PyMem (_PyRuntime.allocators.standard.mem)
716
5.74G
#define _PyObject (_PyRuntime.allocators.standard.obj)
717
0
#define _PyMem_Debug (_PyRuntime.allocators.debug)
718
132k
#define _PyObject_Arena (_PyRuntime.allocators.obj_arena)
719
720
721
/***************************/
722
/* managing the allocators */
723
/***************************/
724
725
static int
726
set_default_allocator_unlocked(PyMemAllocatorDomain domain, int debug,
727
                               PyMemAllocatorEx *old_alloc)
728
0
{
729
0
    if (old_alloc != NULL) {
730
0
        get_allocator_unlocked(domain, old_alloc);
731
0
    }
732
733
734
0
    PyMemAllocatorEx new_alloc;
735
0
    switch(domain)
736
0
    {
737
0
    case PYMEM_DOMAIN_RAW:
738
0
        new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
739
0
        break;
740
0
    case PYMEM_DOMAIN_MEM:
741
0
        new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
742
0
        break;
743
0
    case PYMEM_DOMAIN_OBJ:
744
0
        new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
745
0
        break;
746
0
    default:
747
        /* unknown domain */
748
0
        return -1;
749
0
    }
750
0
    set_allocator_unlocked(domain, &new_alloc);
751
0
    if (debug) {
752
0
        set_up_debug_hooks_domain_unlocked(domain);
753
0
    }
754
0
    return 0;
755
0
}
756
757
758
#ifdef Py_DEBUG
759
static const int pydebug = 1;
760
#else
761
static const int pydebug = 0;
762
#endif
763
764
int
765
_PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
766
0
{
767
0
    if (name == NULL || *name == '\0') {
768
        /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
769
           nameions): use default memory allocators */
770
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
771
0
    }
772
0
    else if (strcmp(name, "default") == 0) {
773
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
774
0
    }
775
0
    else if (strcmp(name, "debug") == 0) {
776
0
        *allocator = PYMEM_ALLOCATOR_DEBUG;
777
0
    }
778
0
#if defined(WITH_PYMALLOC) && !defined(Py_GIL_DISABLED)
779
0
    else if (strcmp(name, "pymalloc") == 0) {
780
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC;
781
0
    }
782
0
    else if (strcmp(name, "pymalloc_debug") == 0) {
783
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
784
0
    }
785
0
#endif
786
0
#ifdef WITH_MIMALLOC
787
0
    else if (strcmp(name, "mimalloc") == 0) {
788
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC;
789
0
    }
790
0
    else if (strcmp(name, "mimalloc_debug") == 0) {
791
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC_DEBUG;
792
0
    }
793
0
#endif
794
0
#ifndef Py_GIL_DISABLED
795
0
    else if (strcmp(name, "malloc") == 0) {
796
0
        *allocator = PYMEM_ALLOCATOR_MALLOC;
797
0
    }
798
0
    else if (strcmp(name, "malloc_debug") == 0) {
799
0
        *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
800
0
    }
801
0
#endif
802
0
    else {
803
        /* unknown allocator */
804
0
        return -1;
805
0
    }
806
0
    return 0;
807
0
}
808
809
810
static int
811
set_up_allocators_unlocked(PyMemAllocatorName allocator)
812
0
{
813
0
    switch (allocator) {
814
0
    case PYMEM_ALLOCATOR_NOT_SET:
815
        /* do nothing */
816
0
        break;
817
818
0
    case PYMEM_ALLOCATOR_DEFAULT:
819
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, pydebug, NULL);
820
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, pydebug, NULL);
821
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, pydebug, NULL);
822
0
        _PyRuntime.allocators.is_debug_enabled = pydebug;
823
0
        break;
824
825
0
    case PYMEM_ALLOCATOR_DEBUG:
826
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, 1, NULL);
827
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, 1, NULL);
828
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, 1, NULL);
829
0
        _PyRuntime.allocators.is_debug_enabled = 1;
830
0
        break;
831
832
0
#ifdef WITH_PYMALLOC
833
0
    case PYMEM_ALLOCATOR_PYMALLOC:
834
0
    case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
835
0
    {
836
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
837
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
838
839
0
        PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
840
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
841
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &pymalloc);
842
843
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG);
844
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
845
0
        if (is_debug) {
846
0
            set_up_debug_hooks_unlocked();
847
0
        }
848
0
        break;
849
0
    }
850
0
#endif
851
0
#ifdef WITH_MIMALLOC
852
0
    case PYMEM_ALLOCATOR_MIMALLOC:
853
0
    case PYMEM_ALLOCATOR_MIMALLOC_DEBUG:
854
0
    {
855
0
        PyMemAllocatorEx malloc_alloc = MIMALLOC_RAWALLOC;
856
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
857
858
0
        PyMemAllocatorEx pymalloc = MIMALLOC_ALLOC;
859
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
860
861
0
        PyMemAllocatorEx objmalloc = MIMALLOC_OBJALLOC;
862
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &objmalloc);
863
864
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MIMALLOC_DEBUG);
865
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
866
0
        if (is_debug) {
867
0
            set_up_debug_hooks_unlocked();
868
0
        }
869
870
0
        break;
871
0
    }
872
0
#endif
873
874
0
    case PYMEM_ALLOCATOR_MALLOC:
875
0
    case PYMEM_ALLOCATOR_MALLOC_DEBUG:
876
0
    {
877
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
878
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
879
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &malloc_alloc);
880
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &malloc_alloc);
881
882
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG);
883
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
884
0
        if (is_debug) {
885
0
            set_up_debug_hooks_unlocked();
886
0
        }
887
0
        break;
888
0
    }
889
890
0
    default:
891
        /* unknown allocator */
892
0
        return -1;
893
0
    }
894
895
0
    return 0;
896
0
}
897
898
int
899
_PyMem_SetupAllocators(PyMemAllocatorName allocator)
900
0
{
901
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
902
0
    int res = set_up_allocators_unlocked(allocator);
903
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
904
0
    return res;
905
0
}
906
907
908
static int
909
pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
910
0
{
911
0
    return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
912
0
}
913
914
915
static const char*
916
get_current_allocator_name_unlocked(void)
917
0
{
918
0
    PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
919
0
#ifdef WITH_PYMALLOC
920
0
    PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
921
0
#endif
922
0
#ifdef WITH_MIMALLOC
923
0
    PyMemAllocatorEx mimalloc = MIMALLOC_ALLOC;
924
0
    PyMemAllocatorEx mimalloc_obj = MIMALLOC_OBJALLOC;
925
0
    PyMemAllocatorEx mimalloc_raw = MIMALLOC_RAWALLOC;
926
0
#endif
927
928
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
929
0
        pymemallocator_eq(&_PyMem, &malloc_alloc) &&
930
0
        pymemallocator_eq(&_PyObject, &malloc_alloc))
931
0
    {
932
0
        return "malloc";
933
0
    }
934
0
#ifdef WITH_PYMALLOC
935
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
936
0
        pymemallocator_eq(&_PyMem, &pymalloc) &&
937
0
        pymemallocator_eq(&_PyObject, &pymalloc))
938
0
    {
939
0
        return "pymalloc";
940
0
    }
941
0
#endif
942
0
#ifdef WITH_MIMALLOC
943
0
    if (pymemallocator_eq(&_PyMem_Raw, &mimalloc_raw) &&
944
0
        pymemallocator_eq(&_PyMem, &mimalloc) &&
945
0
        pymemallocator_eq(&_PyObject, &mimalloc_obj))
946
0
    {
947
0
        return "mimalloc";
948
0
    }
949
0
#endif
950
951
0
    PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
952
0
    PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
953
0
    PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
954
955
0
    if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
956
0
        pymemallocator_eq(&_PyMem, &dbg_mem) &&
957
0
        pymemallocator_eq(&_PyObject, &dbg_obj))
958
0
    {
959
        /* Debug hooks installed */
960
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
961
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
962
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
963
0
        {
964
0
            return "malloc_debug";
965
0
        }
966
0
#ifdef WITH_PYMALLOC
967
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
968
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
969
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
970
0
        {
971
0
            return "pymalloc_debug";
972
0
        }
973
0
#endif
974
0
#ifdef WITH_MIMALLOC
975
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &mimalloc_raw) &&
976
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &mimalloc) &&
977
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &mimalloc_obj))
978
0
        {
979
0
            return "mimalloc_debug";
980
0
        }
981
0
#endif
982
0
    }
983
0
    return NULL;
984
0
}
985
986
const char*
987
_PyMem_GetCurrentAllocatorName(void)
988
0
{
989
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
990
0
    const char *name = get_current_allocator_name_unlocked();
991
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
992
0
    return name;
993
0
}
994
995
996
int
997
_PyMem_DebugEnabled(void)
998
0
{
999
0
    return _PyRuntime.allocators.is_debug_enabled;
1000
0
}
1001
1002
#ifdef WITH_PYMALLOC
1003
static int
1004
_PyMem_PymallocEnabled(void)
1005
0
{
1006
0
    if (_PyMem_DebugEnabled()) {
1007
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
1008
0
    }
1009
0
    else {
1010
0
        return (_PyObject.malloc == _PyObject_Malloc);
1011
0
    }
1012
0
}
1013
1014
#ifdef WITH_MIMALLOC
1015
static int
1016
_PyMem_MimallocEnabled(void)
1017
0
{
1018
#ifdef Py_GIL_DISABLED
1019
    return 1;
1020
#else
1021
0
    if (_PyMem_DebugEnabled()) {
1022
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_MiMalloc);
1023
0
    }
1024
0
    else {
1025
0
        return (_PyObject.malloc == _PyObject_MiMalloc);
1026
0
    }
1027
0
#endif
1028
0
}
1029
#endif  // WITH_MIMALLOC
1030
1031
#endif  // WITH_PYMALLOC
1032
1033
1034
static void
1035
set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain)
1036
0
{
1037
0
    PyMemAllocatorEx alloc;
1038
1039
0
    if (domain == PYMEM_DOMAIN_RAW) {
1040
0
        if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
1041
0
            return;
1042
0
        }
1043
1044
0
        get_allocator_unlocked(domain, &_PyMem_Debug.raw.alloc);
1045
0
        alloc.ctx = &_PyMem_Debug.raw;
1046
0
        alloc.malloc = _PyMem_DebugRawMalloc;
1047
0
        alloc.calloc = _PyMem_DebugRawCalloc;
1048
0
        alloc.realloc = _PyMem_DebugRawRealloc;
1049
0
        alloc.free = _PyMem_DebugRawFree;
1050
0
        set_allocator_unlocked(domain, &alloc);
1051
0
    }
1052
0
    else if (domain == PYMEM_DOMAIN_MEM) {
1053
0
        if (_PyMem.malloc == _PyMem_DebugMalloc) {
1054
0
            return;
1055
0
        }
1056
1057
0
        get_allocator_unlocked(domain, &_PyMem_Debug.mem.alloc);
1058
0
        alloc.ctx = &_PyMem_Debug.mem;
1059
0
        alloc.malloc = _PyMem_DebugMalloc;
1060
0
        alloc.calloc = _PyMem_DebugCalloc;
1061
0
        alloc.realloc = _PyMem_DebugRealloc;
1062
0
        alloc.free = _PyMem_DebugFree;
1063
0
        set_allocator_unlocked(domain, &alloc);
1064
0
    }
1065
0
    else if (domain == PYMEM_DOMAIN_OBJ)  {
1066
0
        if (_PyObject.malloc == _PyMem_DebugMalloc) {
1067
0
            return;
1068
0
        }
1069
1070
0
        get_allocator_unlocked(domain, &_PyMem_Debug.obj.alloc);
1071
0
        alloc.ctx = &_PyMem_Debug.obj;
1072
0
        alloc.malloc = _PyMem_DebugMalloc;
1073
0
        alloc.calloc = _PyMem_DebugCalloc;
1074
0
        alloc.realloc = _PyMem_DebugRealloc;
1075
0
        alloc.free = _PyMem_DebugFree;
1076
0
        set_allocator_unlocked(domain, &alloc);
1077
0
    }
1078
0
}
1079
1080
1081
static void
1082
set_up_debug_hooks_unlocked(void)
1083
0
{
1084
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_RAW);
1085
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_MEM);
1086
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_OBJ);
1087
0
    _PyRuntime.allocators.is_debug_enabled = 1;
1088
0
}
1089
1090
void
1091
PyMem_SetupDebugHooks(void)
1092
0
{
1093
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1094
0
    set_up_debug_hooks_unlocked();
1095
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1096
0
}
1097
1098
static void
1099
get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1100
37
{
1101
37
    switch(domain)
1102
37
    {
1103
37
    case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
1104
0
    case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
1105
0
    case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
1106
0
    default:
1107
        /* unknown domain: set all attributes to NULL */
1108
0
        allocator->ctx = NULL;
1109
0
        allocator->malloc = NULL;
1110
0
        allocator->calloc = NULL;
1111
0
        allocator->realloc = NULL;
1112
0
        allocator->free = NULL;
1113
37
    }
1114
37
}
1115
1116
static void
1117
set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1118
0
{
1119
0
    switch(domain)
1120
0
    {
1121
0
    case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
1122
0
    case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
1123
0
    case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
1124
    /* ignore unknown domain */
1125
0
    }
1126
0
}
1127
1128
void
1129
PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1130
37
{
1131
37
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1132
37
    get_allocator_unlocked(domain, allocator);
1133
37
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1134
37
}
1135
1136
void
1137
PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1138
0
{
1139
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1140
0
    set_allocator_unlocked(domain, allocator);
1141
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1142
0
}
1143
1144
void
1145
PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
1146
0
{
1147
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1148
0
    *allocator = _PyObject_Arena;
1149
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1150
0
}
1151
1152
void
1153
PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
1154
0
{
1155
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1156
0
    _PyObject_Arena = *allocator;
1157
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1158
0
}
1159
1160
1161
/* Note that there is a possible, but very unlikely, race in any place
1162
 * below where we call one of the allocator functions.  We access two
1163
 * fields in each case:  "malloc", etc. and "ctx".
1164
 *
1165
 * It is unlikely that the allocator will be changed while one of those
1166
 * calls is happening, much less in that very narrow window.
1167
 * Furthermore, the likelihood of a race is drastically reduced by the
1168
 * fact that the allocator may not be changed after runtime init
1169
 * (except with a wrapper).
1170
 *
1171
 * With the above in mind, we currently don't worry about locking
1172
 * around these uses of the runtime-global allocators state. */
1173
1174
1175
/*************************/
1176
/* the "arena" allocator */
1177
/*************************/
1178
1179
void *
1180
_PyObject_VirtualAlloc(size_t size)
1181
27.6k
{
1182
27.6k
    size_t alloc_size = _pymalloc_virtual_alloc_size(size);
1183
27.6k
    if (alloc_size == 0 && size != 0) {
1184
0
        return NULL;
1185
0
    }
1186
27.6k
    return _PyObject_Arena.alloc(_PyObject_Arena.ctx, alloc_size);
1187
27.6k
}
1188
1189
void
1190
_PyObject_VirtualFree(void *obj, size_t size)
1191
27.6k
{
1192
27.6k
    size_t alloc_size = _pymalloc_virtual_alloc_size(size);
1193
27.6k
    assert(alloc_size != 0 || size == 0);
1194
27.6k
    _PyObject_Arena.free(_PyObject_Arena.ctx, obj, alloc_size);
1195
27.6k
}
1196
1197
1198
/***********************/
1199
/* the "raw" allocator */
1200
/***********************/
1201
1202
void *
1203
PyMem_RawMalloc(size_t size)
1204
231M
{
1205
    /*
1206
     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
1207
     * Most python internals blindly use a signed Py_ssize_t to track
1208
     * things without checking for overflows or negatives.
1209
     * As size_t is unsigned, checking for size < 0 is not required.
1210
     */
1211
231M
    if (size > (size_t)PY_SSIZE_T_MAX)
1212
0
        return NULL;
1213
231M
    return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
1214
231M
}
1215
1216
void *
1217
PyMem_RawCalloc(size_t nelem, size_t elsize)
1218
151k
{
1219
    /* see PyMem_RawMalloc() */
1220
151k
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1221
0
        return NULL;
1222
151k
    return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
1223
151k
}
1224
1225
void*
1226
PyMem_RawRealloc(void *ptr, size_t new_size)
1227
8.97M
{
1228
    /* see PyMem_RawMalloc() */
1229
8.97M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1230
0
        return NULL;
1231
8.97M
    return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
1232
8.97M
}
1233
1234
void PyMem_RawFree(void *ptr)
1235
231M
{
1236
231M
    _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
1237
231M
}
1238
1239
1240
/***********************/
1241
/* the "mem" allocator */
1242
/***********************/
1243
1244
void *
1245
PyMem_Malloc(size_t size)
1246
264M
{
1247
    /* see PyMem_RawMalloc() */
1248
264M
    if (size > (size_t)PY_SSIZE_T_MAX)
1249
0
        return NULL;
1250
264M
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1251
264M
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1252
264M
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1253
264M
    OBJECT_STAT_INC(allocations);
1254
264M
    return _PyMem.malloc(_PyMem.ctx, size);
1255
264M
}
1256
1257
void *
1258
PyMem_Calloc(size_t nelem, size_t elsize)
1259
42.4M
{
1260
    /* see PyMem_RawMalloc() */
1261
42.4M
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1262
0
        return NULL;
1263
42.4M
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1264
42.4M
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1265
42.4M
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1266
42.4M
    OBJECT_STAT_INC(allocations);
1267
42.4M
    return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
1268
42.4M
}
1269
1270
void *
1271
PyMem_Realloc(void *ptr, size_t new_size)
1272
259M
{
1273
    /* see PyMem_RawMalloc() */
1274
259M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1275
0
        return NULL;
1276
259M
    return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
1277
259M
}
1278
1279
void
1280
PyMem_Free(void *ptr)
1281
535M
{
1282
535M
    OBJECT_STAT_INC(frees);
1283
535M
    _PyMem.free(_PyMem.ctx, ptr);
1284
535M
}
1285
1286
1287
/***************************/
1288
/* pymem utility functions */
1289
/***************************/
1290
1291
wchar_t*
1292
_PyMem_RawWcsdup(const wchar_t *str)
1293
1.48k
{
1294
1.48k
    assert(str != NULL);
1295
1296
1.48k
    size_t len = wcslen(str);
1297
1.48k
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
1298
0
        return NULL;
1299
0
    }
1300
1301
1.48k
    size_t size = (len + 1) * sizeof(wchar_t);
1302
1.48k
    wchar_t *str2 = PyMem_RawMalloc(size);
1303
1.48k
    if (str2 == NULL) {
1304
0
        return NULL;
1305
0
    }
1306
1307
1.48k
    memcpy(str2, str, size);
1308
1.48k
    return str2;
1309
1.48k
}
1310
1311
char *
1312
_PyMem_RawStrdup(const char *str)
1313
111
{
1314
111
    assert(str != NULL);
1315
111
    size_t size = strlen(str) + 1;
1316
111
    char *copy = PyMem_RawMalloc(size);
1317
111
    if (copy == NULL) {
1318
0
        return NULL;
1319
0
    }
1320
111
    memcpy(copy, str, size);
1321
111
    return copy;
1322
111
}
1323
1324
char *
1325
_PyMem_Strdup(const char *str)
1326
0
{
1327
0
    assert(str != NULL);
1328
0
    size_t size = strlen(str) + 1;
1329
0
    char *copy = PyMem_Malloc(size);
1330
0
    if (copy == NULL) {
1331
0
        return NULL;
1332
0
    }
1333
0
    memcpy(copy, str, size);
1334
0
    return copy;
1335
0
}
1336
1337
/***********************************************/
1338
/* Delayed freeing support for Py_GIL_DISABLED */
1339
/***********************************************/
1340
1341
// So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms.
1342
#define WORK_ITEMS_PER_CHUNK 254
1343
1344
// A pointer to be freed once the QSBR read sequence reaches qsbr_goal.
1345
struct _mem_work_item {
1346
    uintptr_t ptr; // lowest bit tagged 1 for objects freed with PyObject_Free
1347
    uint64_t qsbr_goal;
1348
};
1349
1350
// A fixed-size buffer of pointers to be freed
1351
struct _mem_work_chunk {
1352
    // Linked list node of chunks in queue
1353
    struct llist_node node;
1354
1355
    Py_ssize_t rd_idx;  // index of next item to read
1356
    Py_ssize_t wr_idx;  // index of next item to write
1357
    struct _mem_work_item array[WORK_ITEMS_PER_CHUNK];
1358
};
1359
1360
static int
1361
work_item_should_decref(uintptr_t ptr)
1362
0
{
1363
0
    return ptr & 0x01;
1364
0
}
1365
1366
static void
1367
free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
1368
0
{
1369
0
    if (work_item_should_decref(ptr)) {
1370
0
        PyObject *obj = (PyObject *)(ptr - 1);
1371
#ifdef Py_GIL_DISABLED
1372
        if (cb == NULL) {
1373
            assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped);
1374
            Py_DECREF(obj);
1375
            return;
1376
        }
1377
        assert(_PyInterpreterState_GET()->stoptheworld.world_stopped);
1378
        Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1379
        if (refcount == 0) {
1380
            cb(obj, state);
1381
        }
1382
#else
1383
0
        Py_DECREF(obj);
1384
0
#endif
1385
0
    }
1386
0
    else {
1387
0
        PyMem_Free((void *)ptr);
1388
0
    }
1389
0
}
1390
1391
1392
#ifdef Py_GIL_DISABLED
1393
1394
// For deferred advance on free: the number of deferred items before advancing
1395
// the write sequence.  This is based on WORK_ITEMS_PER_CHUNK.  We ideally
1396
// want to process a chunk before it overflows.
1397
#define QSBR_DEFERRED_LIMIT 127
1398
1399
// If the deferred memory exceeds 1 MiB, advance the write sequence.  This
1400
// helps limit memory usage due to QSBR delaying frees too long.
1401
#define QSBR_FREE_MEM_LIMIT 1024*1024
1402
1403
// Return true if the global write sequence should be advanced for a deferred
1404
// memory free.
1405
static bool
1406
should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size)
1407
{
1408
    if (size > QSBR_FREE_MEM_LIMIT) {
1409
        qsbr->deferred_count = 0;
1410
        qsbr->deferred_memory = 0;
1411
        qsbr->should_process = true;
1412
        return true;
1413
    }
1414
    qsbr->deferred_count++;
1415
    qsbr->deferred_memory += size;
1416
    if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT ||
1417
            qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) {
1418
        qsbr->deferred_count = 0;
1419
        qsbr->deferred_memory = 0;
1420
        qsbr->should_process = true;
1421
        return true;
1422
    }
1423
    return false;
1424
}
1425
#endif
1426
1427
static void
1428
free_delayed(uintptr_t ptr, size_t size)
1429
0
{
1430
0
#ifndef Py_GIL_DISABLED
1431
0
    free_work_item(ptr, NULL, NULL);
1432
#else
1433
    PyInterpreterState *interp = _PyInterpreterState_GET();
1434
    if (_PyInterpreterState_GetFinalizing(interp) != NULL ||
1435
        interp->stoptheworld.world_stopped)
1436
    {
1437
        // Free immediately during interpreter shutdown or if the world is
1438
        // stopped.
1439
        assert(!interp->stoptheworld.world_stopped || !work_item_should_decref(ptr));
1440
        free_work_item(ptr, NULL, NULL);
1441
        return;
1442
    }
1443
1444
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1445
    struct llist_node *head = &tstate->mem_free_queue;
1446
1447
    struct _mem_work_chunk *buf = NULL;
1448
    if (!llist_empty(head)) {
1449
        // Try to re-use the last buffer
1450
        buf = llist_data(head->prev, struct _mem_work_chunk, node);
1451
        if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1452
            // already full
1453
            buf = NULL;
1454
        }
1455
    }
1456
1457
    if (buf == NULL) {
1458
        buf = PyMem_Calloc(1, sizeof(*buf));
1459
        if (buf != NULL) {
1460
            llist_insert_tail(head, &buf->node);
1461
        }
1462
    }
1463
1464
    if (buf == NULL) {
1465
        // failed to allocate a buffer, free immediately
1466
        PyObject *to_dealloc = NULL;
1467
        _PyEval_StopTheWorld(tstate->base.interp);
1468
        if (work_item_should_decref(ptr)) {
1469
            PyObject *obj = (PyObject *)(ptr - 1);
1470
            Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1471
            if (refcount == 0) {
1472
                to_dealloc = obj;
1473
            }
1474
        }
1475
        else {
1476
            PyMem_Free((void *)ptr);
1477
        }
1478
        _PyEval_StartTheWorld(tstate->base.interp);
1479
        if (to_dealloc != NULL) {
1480
            _Py_Dealloc(to_dealloc);
1481
        }
1482
        return;
1483
    }
1484
1485
    assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
1486
    uint64_t seq;
1487
    if (should_advance_qsbr_for_free(tstate->qsbr, size)) {
1488
        seq = _Py_qsbr_advance(tstate->qsbr->shared);
1489
    }
1490
    else {
1491
        seq = _Py_qsbr_shared_next(tstate->qsbr->shared);
1492
    }
1493
    buf->array[buf->wr_idx].ptr = ptr;
1494
    buf->array[buf->wr_idx].qsbr_goal = seq;
1495
    buf->wr_idx++;
1496
1497
    if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1498
        // Normally the processing of delayed items is done from the eval
1499
        // breaker.  Processing here is a safety measure to ensure too much
1500
        // work does not accumulate.
1501
        _PyMem_ProcessDelayed((PyThreadState *)tstate);
1502
    }
1503
#endif
1504
0
}
1505
1506
void
1507
_PyMem_FreeDelayed(void *ptr, size_t size)
1508
0
{
1509
0
    assert(!((uintptr_t)ptr & 0x01));
1510
0
    if (ptr != NULL) {
1511
0
        free_delayed((uintptr_t)ptr, size);
1512
0
    }
1513
0
}
1514
1515
#ifdef Py_GIL_DISABLED
1516
void
1517
_PyObject_XDecRefDelayed(PyObject *ptr)
1518
{
1519
    assert(!((uintptr_t)ptr & 0x01));
1520
    if (ptr != NULL) {
1521
        // We use 0 as the size since we don't have an easy way to know the
1522
        // actual size.  If we are freeing many objects, the write sequence
1523
        // will be advanced due to QSBR_DEFERRED_LIMIT.
1524
        free_delayed(((uintptr_t)ptr)|0x01, 0);
1525
    }
1526
}
1527
#endif
1528
1529
#ifdef Py_GIL_DISABLED
1530
void
1531
_PyObject_XSetRefDelayed(PyObject **ptr, PyObject *value)
1532
{
1533
    PyObject *old = *ptr;
1534
    FT_ATOMIC_STORE_PTR_RELEASE(*ptr, value);
1535
    if (old == NULL) {
1536
        return;
1537
    }
1538
    if (!_Py_IsImmortal(old)) {
1539
         _PyObject_XDecRefDelayed(old);
1540
    }
1541
}
1542
#endif
1543
1544
static struct _mem_work_chunk *
1545
work_queue_first(struct llist_node *head)
1546
0
{
1547
0
    return llist_data(head->next, struct _mem_work_chunk, node);
1548
0
}
1549
1550
static void
1551
process_queue(struct llist_node *head, _PyThreadStateImpl *tstate,
1552
              bool keep_empty, delayed_dealloc_cb cb, void *state)
1553
0
{
1554
0
    while (!llist_empty(head)) {
1555
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1556
1557
0
        if (buf->rd_idx < buf->wr_idx) {
1558
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1559
0
            if (!_Py_qsbr_poll(tstate->qsbr, item->qsbr_goal)) {
1560
0
                return;
1561
0
            }
1562
1563
0
            buf->rd_idx++;
1564
            // NB: free_work_item may re-enter or execute arbitrary code
1565
0
            free_work_item(item->ptr, cb, state);
1566
0
            continue;
1567
0
        }
1568
1569
0
        assert(buf->rd_idx == buf->wr_idx);
1570
0
        if (keep_empty && buf->node.next == head) {
1571
            // Keep the last buffer in the queue to reduce re-allocations
1572
0
            buf->rd_idx = buf->wr_idx = 0;
1573
0
            return;
1574
0
        }
1575
1576
0
        llist_remove(&buf->node);
1577
0
        PyMem_Free(buf);
1578
0
    }
1579
0
}
1580
1581
static void
1582
process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1583
                     _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1584
                     void *state)
1585
0
{
1586
0
    assert(PyMutex_IsLocked(&queue->mutex));
1587
0
    process_queue(&queue->head, tstate, false, cb, state);
1588
1589
0
    int more_work = !llist_empty(&queue->head);
1590
0
    _Py_atomic_store_int_relaxed(&queue->has_work, more_work);
1591
0
}
1592
1593
static void
1594
maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1595
                           _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1596
                           void *state)
1597
0
{
1598
0
    if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
1599
0
        return;
1600
0
    }
1601
1602
    // Try to acquire the lock, but don't block if it's already held.
1603
0
    if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
1604
0
        process_interp_queue(queue, tstate, cb, state);
1605
0
        PyMutex_Unlock(&queue->mutex);
1606
0
    }
1607
0
}
1608
1609
void
1610
_PyMem_ProcessDelayed(PyThreadState *tstate)
1611
0
{
1612
0
    PyInterpreterState *interp = tstate->interp;
1613
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1614
1615
0
    tstate_impl->qsbr->should_process = false;
1616
1617
    // Process thread-local work
1618
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL);
1619
1620
    // Process shared interpreter work
1621
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, NULL, NULL);
1622
0
}
1623
1624
void
1625
_PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, void *state)
1626
0
{
1627
0
    PyInterpreterState *interp = tstate->interp;
1628
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1629
1630
    // Process thread-local work
1631
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, cb, state);
1632
1633
    // Process shared interpreter work
1634
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, cb, state);
1635
0
}
1636
1637
void
1638
_PyMem_AbandonDelayed(PyThreadState *tstate)
1639
0
{
1640
0
    PyInterpreterState *interp = tstate->interp;
1641
0
    struct llist_node *queue = &((_PyThreadStateImpl *)tstate)->mem_free_queue;
1642
1643
0
    if (llist_empty(queue)) {
1644
0
        return;
1645
0
    }
1646
1647
    // Check if the queue contains one empty buffer
1648
0
    struct _mem_work_chunk *buf = work_queue_first(queue);
1649
0
    if (buf->rd_idx == buf->wr_idx) {
1650
0
        llist_remove(&buf->node);
1651
0
        PyMem_Free(buf);
1652
0
        assert(llist_empty(queue));
1653
0
        return;
1654
0
    }
1655
1656
0
    PyMutex_Lock(&interp->mem_free_queue.mutex);
1657
1658
    // Merge the thread's work queue into the interpreter's work queue.
1659
0
    llist_concat(&interp->mem_free_queue.head, queue);
1660
1661
    // Process the merged queue now (see gh-130794).
1662
0
    _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1663
0
    process_interp_queue(&interp->mem_free_queue, this_tstate, NULL, NULL);
1664
1665
0
    PyMutex_Unlock(&interp->mem_free_queue.mutex);
1666
1667
0
    assert(llist_empty(queue));  // the thread's queue is now empty
1668
0
}
1669
1670
void
1671
_PyMem_FiniDelayed(PyInterpreterState *interp)
1672
0
{
1673
0
    struct llist_node *head = &interp->mem_free_queue.head;
1674
0
    while (!llist_empty(head)) {
1675
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1676
1677
0
        if (buf->rd_idx < buf->wr_idx) {
1678
            // Free the remaining items immediately. There should be no other
1679
            // threads accessing the memory at this point during shutdown.
1680
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1681
0
            buf->rd_idx++;
1682
            // NB: free_work_item may re-enter or execute arbitrary code
1683
0
            free_work_item(item->ptr, NULL, NULL);
1684
0
            continue;
1685
0
        }
1686
1687
0
        llist_remove(&buf->node);
1688
0
        PyMem_Free(buf);
1689
0
    }
1690
0
}
1691
1692
/**************************/
1693
/* the "object" allocator */
1694
/**************************/
1695
1696
void *
1697
PyObject_Malloc(size_t size)
1698
1.40G
{
1699
    /* see PyMem_RawMalloc() */
1700
1.40G
    if (size > (size_t)PY_SSIZE_T_MAX)
1701
0
        return NULL;
1702
1.40G
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1703
1.40G
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1704
1.40G
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1705
1.40G
    OBJECT_STAT_INC(allocations);
1706
1.40G
    return _PyObject.malloc(_PyObject.ctx, size);
1707
1.40G
}
1708
1709
void *
1710
PyObject_Calloc(size_t nelem, size_t elsize)
1711
0
{
1712
    /* see PyMem_RawMalloc() */
1713
0
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1714
0
        return NULL;
1715
0
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1716
0
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1717
0
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1718
0
    OBJECT_STAT_INC(allocations);
1719
0
    return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
1720
0
}
1721
1722
void *
1723
PyObject_Realloc(void *ptr, size_t new_size)
1724
62.8M
{
1725
    /* see PyMem_RawMalloc() */
1726
62.8M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1727
0
        return NULL;
1728
62.8M
    return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
1729
62.8M
}
1730
1731
void
1732
PyObject_Free(void *ptr)
1733
1.40G
{
1734
1.40G
    OBJECT_STAT_INC(frees);
1735
1.40G
    _PyObject.free(_PyObject.ctx, ptr);
1736
1.40G
}
1737
1738
1739
/* Use __builtin_expect() where available to reduce overhead of
1740
   the valgrind checks */
1741
#if (defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 2))) && defined(__OPTIMIZE__)
1742
11.8G
#  define UNLIKELY(value) __builtin_expect((value), 0)
1743
5.31G
#  define LIKELY(value) __builtin_expect((value), 1)
1744
#else
1745
#  define UNLIKELY(value) (value)
1746
#  define LIKELY(value) (value)
1747
#endif
1748
1749
#ifdef WITH_PYMALLOC
1750
1751
#ifdef WITH_VALGRIND
1752
#include <valgrind/valgrind.h>
1753
1754
/* -1 indicates that we haven't checked that we're running on valgrind yet. */
1755
static int running_on_valgrind = -1;
1756
#endif
1757
1758
typedef struct _obmalloc_state OMState;
1759
1760
/* obmalloc state for main interpreter and shared by all interpreters without
1761
 * their own obmalloc state.  By not explicitly initializing this structure, it
1762
 * will be allocated in the BSS which is a small performance win.  The radix
1763
 * tree arrays are fairly large but are sparsely used.  */
1764
static struct _obmalloc_state obmalloc_state_main;
1765
static bool obmalloc_state_initialized;
1766
1767
static inline int
1768
has_own_state(PyInterpreterState *interp)
1769
0
{
1770
0
    return (_Py_IsMainInterpreter(interp) ||
1771
0
            !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) ||
1772
0
            _Py_IsMainInterpreterFinalizing(interp));
1773
0
}
1774
1775
static inline OMState *
1776
get_state(void)
1777
4.08G
{
1778
4.08G
    PyInterpreterState *interp = _PyInterpreterState_GET();
1779
4.08G
    assert(interp->obmalloc != NULL); // otherwise not initialized or freed
1780
4.08G
    return interp->obmalloc;
1781
4.08G
}
1782
1783
// These macros all rely on a local "state" variable.
1784
1.97G
#define usedpools (state->pools.used)
1785
2.92M
#define allarenas (state->mgmt.arenas)
1786
365
#define maxarenas (state->mgmt.maxarenas)
1787
27.2k
#define unused_arena_objects (state->mgmt.unused_arena_objects)
1788
24.8M
#define usable_arenas (state->mgmt.usable_arenas)
1789
17.7M
#define nfp2lasta (state->mgmt.nfp2lasta)
1790
17.4k
#define narenas_currently_allocated (state->mgmt.narenas_currently_allocated)
1791
5.62k
#define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated)
1792
6.65k
#define narenas_highwater (state->mgmt.narenas_highwater)
1793
462M
#define raw_allocated_blocks (state->mgmt.raw_allocated_blocks)
1794
1795
#ifdef WITH_MIMALLOC
1796
static bool count_blocks(
1797
    const mi_heap_t* heap, const mi_heap_area_t* area,
1798
    void* block, size_t block_size, void* allocated_blocks)
1799
0
{
1800
0
    *(size_t *)allocated_blocks += area->used;
1801
0
    return 1;
1802
0
}
1803
1804
static Py_ssize_t
1805
get_mimalloc_allocated_blocks(PyInterpreterState *interp)
1806
0
{
1807
0
    size_t allocated_blocks = 0;
1808
#ifdef Py_GIL_DISABLED
1809
    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, t) {
1810
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t;
1811
        for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
1812
            mi_heap_t *heap = &tstate->mimalloc.heaps[i];
1813
            mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1814
        }
1815
    }
1816
1817
    mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
1818
    for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) {
1819
        _mi_abandoned_pool_visit_blocks(pool, tag, false, &count_blocks,
1820
                                        &allocated_blocks);
1821
    }
1822
#else
1823
    // TODO(sgross): this only counts the current thread's blocks.
1824
0
    mi_heap_t *heap = mi_heap_get_default();
1825
0
    mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1826
0
#endif
1827
0
    return allocated_blocks;
1828
0
}
1829
#endif
1830
1831
Py_ssize_t
1832
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
1833
0
{
1834
0
#ifdef WITH_MIMALLOC
1835
0
    if (_PyMem_MimallocEnabled()) {
1836
0
        return get_mimalloc_allocated_blocks(interp);
1837
0
    }
1838
0
#endif
1839
1840
#ifdef Py_DEBUG
1841
    assert(has_own_state(interp));
1842
#else
1843
0
    if (!has_own_state(interp)) {
1844
0
        _Py_FatalErrorFunc(__func__,
1845
0
                           "the interpreter doesn't have its own allocator");
1846
0
    }
1847
0
#endif
1848
0
    OMState *state = interp->obmalloc;
1849
1850
0
    if (state == NULL) {
1851
0
        return 0;
1852
0
    }
1853
1854
0
    Py_ssize_t n = raw_allocated_blocks;
1855
    /* add up allocated blocks for used pools */
1856
0
    for (uint i = 0; i < maxarenas; ++i) {
1857
        /* Skip arenas which are not allocated. */
1858
0
        if (allarenas[i].address == 0) {
1859
0
            continue;
1860
0
        }
1861
1862
0
        uintptr_t base = (uintptr_t)_Py_ALIGN_UP(allarenas[i].address, POOL_SIZE);
1863
1864
        /* visit every pool in the arena */
1865
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
1866
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
1867
0
            poolp p = (poolp)base;
1868
0
            n += p->ref.count;
1869
0
        }
1870
0
    }
1871
0
    return n;
1872
0
}
1873
1874
static void free_obmalloc_arenas(PyInterpreterState *interp);
1875
1876
void
1877
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
1878
0
{
1879
0
#ifdef WITH_MIMALLOC
1880
0
    if (_PyMem_MimallocEnabled()) {
1881
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1882
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1883
0
        return;
1884
0
    }
1885
0
#endif
1886
0
    if (has_own_state(interp) && interp->obmalloc != NULL) {
1887
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1888
0
        assert(has_own_state(interp) || leaked == 0);
1889
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1890
0
        if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) {
1891
            // free the obmalloc arenas and radix tree nodes.  If leaked > 0
1892
            // then some of the memory allocated by obmalloc has not been
1893
            // freed.  It might be safe to free the arenas in that case but
1894
            // it's possible that extension modules are still using that
1895
            // memory.  So, it is safer to not free and to leak.  Perhaps there
1896
            // should be warning when this happens.  It should be possible to
1897
            // use a tool like "-fsanitize=address" to track down these leaks.
1898
0
            free_obmalloc_arenas(interp);
1899
0
        }
1900
0
    }
1901
0
}
1902
1903
static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *);
1904
1905
/* We preserve the number of blocks leaked during runtime finalization,
1906
   so they can be reported if the runtime is initialized again. */
1907
// XXX We don't lose any information by dropping this,
1908
// so we should consider doing so.
1909
static Py_ssize_t last_final_leaks = 0;
1910
1911
void
1912
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime)
1913
0
{
1914
0
    last_final_leaks = get_num_global_allocated_blocks(runtime);
1915
0
    runtime->obmalloc.interpreter_leaks = 0;
1916
0
}
1917
1918
static Py_ssize_t
1919
get_num_global_allocated_blocks(_PyRuntimeState *runtime)
1920
0
{
1921
0
    Py_ssize_t total = 0;
1922
0
    if (_PyRuntimeState_GetFinalizing(runtime) != NULL) {
1923
0
        PyInterpreterState *interp = _PyInterpreterState_Main();
1924
0
        if (interp == NULL) {
1925
            /* We are at the very end of runtime finalization.
1926
               We can't rely on finalizing->interp since that thread
1927
               state is probably already freed, so we don't worry
1928
               about it. */
1929
0
            assert(PyInterpreterState_Head() == NULL);
1930
0
        }
1931
0
        else {
1932
0
            assert(interp != NULL);
1933
            /* It is probably the last interpreter but not necessarily. */
1934
0
            assert(PyInterpreterState_Next(interp) == NULL);
1935
0
            total += _PyInterpreterState_GetAllocatedBlocks(interp);
1936
0
        }
1937
0
    }
1938
0
    else {
1939
0
        _PyEval_StopTheWorldAll(&_PyRuntime);
1940
0
        HEAD_LOCK(runtime);
1941
0
        PyInterpreterState *interp = PyInterpreterState_Head();
1942
0
        assert(interp != NULL);
1943
#ifdef Py_DEBUG
1944
        int got_main = 0;
1945
#endif
1946
0
        for (; interp != NULL; interp = PyInterpreterState_Next(interp)) {
1947
#ifdef Py_DEBUG
1948
            if (_Py_IsMainInterpreter(interp)) {
1949
                assert(!got_main);
1950
                got_main = 1;
1951
                assert(has_own_state(interp));
1952
            }
1953
#endif
1954
0
            if (has_own_state(interp)) {
1955
0
                total += _PyInterpreterState_GetAllocatedBlocks(interp);
1956
0
            }
1957
0
        }
1958
0
        HEAD_UNLOCK(runtime);
1959
0
        _PyEval_StartTheWorldAll(&_PyRuntime);
1960
#ifdef Py_DEBUG
1961
        assert(got_main);
1962
#endif
1963
0
    }
1964
0
    total += runtime->obmalloc.interpreter_leaks;
1965
0
    total += last_final_leaks;
1966
0
    return total;
1967
0
}
1968
1969
Py_ssize_t
1970
_Py_GetGlobalAllocatedBlocks(void)
1971
0
{
1972
0
    return get_num_global_allocated_blocks(&_PyRuntime);
1973
0
}
1974
1975
#if WITH_PYMALLOC_RADIX_TREE
1976
/*==========================================================================*/
1977
/* radix tree for tracking arena usage. */
1978
1979
6.03G
#define arena_map_root (state->usage.arena_map_root)
1980
#ifdef USE_INTERIOR_NODES
1981
37
#define arena_map_mid_count (state->usage.arena_map_mid_count)
1982
37
#define arena_map_bot_count (state->usage.arena_map_bot_count)
1983
#endif
1984
1985
/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1986
 * it cannot be created */
1987
static inline Py_ALWAYS_INLINE arena_map_bot_t *
1988
arena_map_get(OMState *state, pymem_block *p, int create)
1989
2.09G
{
1990
2.09G
#ifdef USE_INTERIOR_NODES
1991
    /* sanity check that IGNORE_BITS is correct */
1992
2.09G
    assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1993
2.09G
    int i1 = MAP_TOP_INDEX(p);
1994
2.09G
    if (arena_map_root.ptrs[i1] == NULL) {
1995
37
        if (!create) {
1996
0
            return NULL;
1997
0
        }
1998
37
        arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1999
37
        if (n == NULL) {
2000
0
            return NULL;
2001
0
        }
2002
37
        arena_map_root.ptrs[i1] = n;
2003
37
        arena_map_mid_count++;
2004
37
    }
2005
2.09G
    int i2 = MAP_MID_INDEX(p);
2006
2.09G
    if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
2007
240M
        if (!create) {
2008
240M
            return NULL;
2009
240M
        }
2010
37
        arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
2011
37
        if (n == NULL) {
2012
0
            return NULL;
2013
0
        }
2014
37
        arena_map_root.ptrs[i1]->ptrs[i2] = n;
2015
37
        arena_map_bot_count++;
2016
37
    }
2017
1.85G
    return arena_map_root.ptrs[i1]->ptrs[i2];
2018
#else
2019
    return &arena_map_root;
2020
#endif
2021
2.09G
}
2022
2023
2024
/* The radix tree only tracks arenas.  So, for 16 MiB arenas, we throw
2025
 * away 24 bits of the address.  That reduces the space requirement of
2026
 * the tree compared to similar radix tree page-map schemes.  In
2027
 * exchange for slashing the space requirement, it needs more
2028
 * computation to check an address.
2029
 *
2030
 * Tracking coverage is done by "ideal" arena address.  It is easier to
2031
 * explain in decimal so let's say that the arena size is 100 bytes.
2032
 * Then, ideal addresses are 100, 200, 300, etc.  For checking if a
2033
 * pointer address is inside an actual arena, we have to check two ideal
2034
 * arena addresses.  E.g. if pointer is 357, we need to check 200 and
2035
 * 300.  In the rare case that an arena is aligned in the ideal way
2036
 * (e.g. base address of arena is 200) then we only have to check one
2037
 * ideal address.
2038
 *
2039
 * The tree nodes for 200 and 300 both store the address of arena.
2040
 * There are two cases: the arena starts at a lower ideal arena and
2041
 * extends to this one, or the arena starts in this arena and extends to
2042
 * the next ideal arena.  The tail_lo and tail_hi members correspond to
2043
 * these two cases.
2044
 */
2045
2046
2047
/* mark or unmark addresses covered by arena */
2048
static int
2049
arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used)
2050
10.7k
{
2051
    /* sanity check that IGNORE_BITS is correct */
2052
10.7k
    assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
2053
10.7k
    arena_map_bot_t *n_hi = arena_map_get(
2054
10.7k
            state, (pymem_block *)arena_base, is_used);
2055
10.7k
    if (n_hi == NULL) {
2056
0
        assert(is_used); /* otherwise node should already exist */
2057
0
        return 0; /* failed to allocate space for node */
2058
0
    }
2059
10.7k
    int i3 = MAP_BOT_INDEX((pymem_block *)arena_base);
2060
10.7k
    int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
2061
10.7k
    if (tail == 0) {
2062
        /* is ideal arena address */
2063
71
        n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
2064
71
    }
2065
10.7k
    else {
2066
        /* arena_base address is not ideal (aligned to arena size) and
2067
         * so it potentially covers two MAP_BOT nodes.  Get the MAP_BOT node
2068
         * for the next arena.  Note that it might be in different MAP_TOP
2069
         * and MAP_MID nodes as well so we need to call arena_map_get()
2070
         * again (do the full tree traversal).
2071
         */
2072
10.7k
        n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
2073
10.7k
        uintptr_t arena_base_next = arena_base + ARENA_SIZE;
2074
        /* If arena_base is a legit arena address, so is arena_base_next - 1
2075
         * (last address in arena).  If arena_base_next overflows then it
2076
         * must overflow to 0.  However, that would mean arena_base was
2077
         * "ideal" and we should not be in this case. */
2078
10.7k
        assert(arena_base < arena_base_next);
2079
10.7k
        arena_map_bot_t *n_lo = arena_map_get(
2080
10.7k
                state, (pymem_block *)arena_base_next, is_used);
2081
10.7k
        if (n_lo == NULL) {
2082
0
            assert(is_used); /* otherwise should already exist */
2083
0
            n_hi->arenas[i3].tail_hi = 0;
2084
0
            return 0; /* failed to allocate space for node */
2085
0
        }
2086
10.7k
        int i3_next = MAP_BOT_INDEX(arena_base_next);
2087
10.7k
        n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
2088
10.7k
    }
2089
10.7k
    return 1;
2090
10.7k
}
2091
2092
/* Return true if 'p' is a pointer inside an obmalloc arena.
2093
 * _PyObject_Free() calls this so it needs to be very fast. */
2094
static int
2095
arena_map_is_used(OMState *state, pymem_block *p)
2096
2.09G
{
2097
2.09G
    arena_map_bot_t *n = arena_map_get(state, p, 0);
2098
2.09G
    if (n == NULL) {
2099
240M
        return 0;
2100
240M
    }
2101
1.85G
    int i3 = MAP_BOT_INDEX(p);
2102
    /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
2103
1.85G
    int32_t hi = n->arenas[i3].tail_hi;
2104
1.85G
    int32_t lo = n->arenas[i3].tail_lo;
2105
1.85G
    int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
2106
1.85G
    return (tail < lo) || (tail >= hi && hi != 0);
2107
2.09G
}
2108
2109
/* end of radix tree logic */
2110
/*==========================================================================*/
2111
#endif /* WITH_PYMALLOC_RADIX_TREE */
2112
2113
2114
/* Allocate a new arena.  If we run out of memory, return NULL.  Else
2115
 * allocate a new arena, and return the address of an arena_object
2116
 * describing the new arena.  It's expected that the caller will set
2117
 * `usable_arenas` to the return value.
2118
 */
2119
static struct arena_object*
2120
new_arena(OMState *state)
2121
5.62k
{
2122
5.62k
    struct arena_object* arenaobj;
2123
5.62k
    uint excess;        /* number of bytes above pool alignment */
2124
5.62k
    void *address;
2125
2126
5.62k
    int debug_stats = _PyRuntime.obmalloc.dump_debug_stats;
2127
5.62k
    if (debug_stats == -1) {
2128
37
        const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
2129
37
        debug_stats = (opt != NULL && *opt != '\0');
2130
37
        _PyRuntime.obmalloc.dump_debug_stats = debug_stats;
2131
37
    }
2132
5.62k
    if (debug_stats) {
2133
0
        _PyObject_DebugMallocStats(stderr);
2134
0
    }
2135
2136
5.62k
    if (unused_arena_objects == NULL) {
2137
67
        uint i;
2138
67
        uint numarenas;
2139
67
        size_t nbytes;
2140
2141
        /* Double the number of arena objects on each allocation.
2142
         * Note that it's possible for `numarenas` to overflow.
2143
         */
2144
67
        numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
2145
67
        if (numarenas <= maxarenas)
2146
0
            return NULL;                /* overflow */
2147
#if SIZEOF_SIZE_T <= SIZEOF_INT
2148
        if (numarenas > SIZE_MAX / sizeof(*allarenas))
2149
            return NULL;                /* overflow */
2150
#endif
2151
67
        nbytes = numarenas * sizeof(*allarenas);
2152
67
        arenaobj = (struct arena_object *)PyMem_RawRealloc(allarenas, nbytes);
2153
67
        if (arenaobj == NULL)
2154
0
            return NULL;
2155
67
        allarenas = arenaobj;
2156
2157
        /* We might need to fix pointers that were copied.  However,
2158
         * new_arena only gets called when all the pages in the
2159
         * previous arenas are full.  Thus, there are *no* pointers
2160
         * into the old array. Thus, we don't have to worry about
2161
         * invalid pointers.  Just to be sure, some asserts:
2162
         */
2163
67
        assert(usable_arenas == NULL);
2164
67
        assert(unused_arena_objects == NULL);
2165
2166
        /* Put the new arenas on the unused_arena_objects list. */
2167
1.77k
        for (i = maxarenas; i < numarenas; ++i) {
2168
1.71k
            allarenas[i].address = 0;              /* mark as unassociated */
2169
1.71k
            allarenas[i].nextarena = i < numarenas - 1 ?
2170
1.71k
                                        &allarenas[i+1] : NULL;
2171
1.71k
        }
2172
2173
        /* Update globals. */
2174
67
        unused_arena_objects = &allarenas[maxarenas];
2175
67
        maxarenas = numarenas;
2176
67
    }
2177
2178
    /* Take the next available arena object off the head of the list. */
2179
5.62k
    assert(unused_arena_objects != NULL);
2180
5.62k
    arenaobj = unused_arena_objects;
2181
5.62k
    unused_arena_objects = arenaobj->nextarena;
2182
5.62k
    assert(arenaobj->address == 0);
2183
5.62k
    address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
2184
5.62k
#if WITH_PYMALLOC_RADIX_TREE
2185
5.62k
    if (address != NULL) {
2186
5.62k
        if (!arena_map_mark_used(state, (uintptr_t)address, 1)) {
2187
            /* marking arena in radix tree failed, abort */
2188
0
            _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
2189
0
            address = NULL;
2190
0
        }
2191
5.62k
    }
2192
5.62k
#endif
2193
5.62k
    if (address == NULL) {
2194
        /* The allocation failed: return NULL after putting the
2195
         * arenaobj back.
2196
         */
2197
0
        arenaobj->nextarena = unused_arena_objects;
2198
0
        unused_arena_objects = arenaobj;
2199
0
        return NULL;
2200
0
    }
2201
5.62k
    arenaobj->address = (uintptr_t)address;
2202
2203
5.62k
    ++narenas_currently_allocated;
2204
5.62k
    ++ntimes_arena_allocated;
2205
5.62k
    if (narenas_currently_allocated > narenas_highwater)
2206
1.02k
        narenas_highwater = narenas_currently_allocated;
2207
5.62k
    arenaobj->freepools = NULL;
2208
    /* pool_address <- first pool-aligned address in the arena
2209
       nfreepools <- number of whole pools that fit after alignment */
2210
5.62k
    arenaobj->pool_address = (pymem_block*)arenaobj->address;
2211
5.62k
    arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
2212
5.62k
    excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
2213
5.62k
    if (excess != 0) {
2214
4.25k
        --arenaobj->nfreepools;
2215
4.25k
        arenaobj->pool_address += POOL_SIZE - excess;
2216
4.25k
    }
2217
5.62k
    arenaobj->ntotalpools = arenaobj->nfreepools;
2218
2219
5.62k
    return arenaobj;
2220
5.62k
}
2221
2222
2223
2224
#if WITH_PYMALLOC_RADIX_TREE
2225
/* Return true if and only if P is an address that was allocated by
2226
   pymalloc.  When the radix tree is used, 'poolp' is unused.
2227
 */
2228
static bool
2229
address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool))
2230
2.09G
{
2231
2.09G
    return arena_map_is_used(state, p);
2232
2.09G
}
2233
#else
2234
/*
2235
address_in_range(P, POOL)
2236
2237
Return true if and only if P is an address that was allocated by pymalloc.
2238
POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
2239
(the caller is asked to compute this because the macro expands POOL more than
2240
once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
2241
variable and pass the latter to the macro; because address_in_range is
2242
called on every alloc/realloc/free, micro-efficiency is important here).
2243
2244
Tricky:  Let B be the arena base address associated with the pool, B =
2245
arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
2246
2247
    B <= P < B + ARENA_SIZE
2248
2249
Subtracting B throughout, this is true iff
2250
2251
    0 <= P-B < ARENA_SIZE
2252
2253
By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
2254
2255
Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
2256
before the first arena has been allocated.  `arenas` is still NULL in that
2257
case.  We're relying on that maxarenas is also 0 in that case, so that
2258
(POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
2259
into a NULL arenas.
2260
2261
Details:  given P and POOL, the arena_object corresponding to P is AO =
2262
arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
2263
stores, etc), POOL is the correct address of P's pool, AO.address is the
2264
correct base address of the pool's arena, and P must be within ARENA_SIZE of
2265
AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
2266
(NULL)).  Therefore address_in_range correctly reports that obmalloc
2267
controls P.
2268
2269
Now suppose obmalloc does not control P (e.g., P was obtained via a direct
2270
call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
2271
in this case -- it may even be uninitialized trash.  If the trash arenaindex
2272
is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
2273
control P.
2274
2275
Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
2276
allocated arena, obmalloc controls all the memory in slice AO.address :
2277
AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
2278
so P doesn't lie in that slice, so the macro correctly reports that P is not
2279
controlled by obmalloc.
2280
2281
Finally, if P is not controlled by obmalloc and AO corresponds to an unused
2282
arena_object (one not currently associated with an allocated arena),
2283
AO.address is 0, and the second test in the macro reduces to:
2284
2285
    P < ARENA_SIZE
2286
2287
If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
2288
that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
2289
of the test still passes, and the third clause (AO.address != 0) is necessary
2290
to get the correct result:  AO.address is 0 in this case, so the macro
2291
correctly reports that P is not controlled by obmalloc (despite that P lies in
2292
slice AO.address : AO.address + ARENA_SIZE).
2293
2294
Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
2295
2.5, arenas were never free()'ed, and an arenaindex < maxarena always
2296
corresponded to a currently-allocated arena, so the "P is not controlled by
2297
obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
2298
was impossible.
2299
2300
Note that the logic is excruciating, and reading up possibly uninitialized
2301
memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
2302
creates problems for some memory debuggers.  The overwhelming advantage is
2303
that this test determines whether an arbitrary address is controlled by
2304
obmalloc in a small constant time, independent of the number of arenas
2305
obmalloc controls.  Since this test is needed at every entry point, it's
2306
extremely desirable that it be this fast.
2307
*/
2308
2309
static bool _Py_NO_SANITIZE_ADDRESS
2310
            _Py_NO_SANITIZE_THREAD
2311
            _Py_NO_SANITIZE_MEMORY
2312
address_in_range(OMState *state, void *p, poolp pool)
2313
{
2314
    // Since address_in_range may be reading from memory which was not allocated
2315
    // by Python, it is important that pool->arenaindex is read only once, as
2316
    // another thread may be concurrently modifying the value without holding
2317
    // the GIL. The following dance forces the compiler to read pool->arenaindex
2318
    // only once.
2319
    uint arenaindex = *((volatile uint *)&pool->arenaindex);
2320
    return arenaindex < maxarenas &&
2321
        (uintptr_t)p - allarenas[arenaindex].address < ARENA_SIZE &&
2322
        allarenas[arenaindex].address != 0;
2323
}
2324
2325
#endif /* !WITH_PYMALLOC_RADIX_TREE */
2326
2327
/*==========================================================================*/
2328
2329
// Called when freelist is exhausted.  Extend the freelist if there is
2330
// space for a block.  Otherwise, remove this pool from usedpools.
2331
static void
2332
pymalloc_pool_extend(poolp pool, uint size)
2333
415M
{
2334
415M
    if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
2335
        /* There is room for another block. */
2336
208M
        pool->freeblock = (pymem_block*)pool + pool->nextoffset;
2337
208M
        pool->nextoffset += INDEX2SIZE(size);
2338
208M
        *(pymem_block **)(pool->freeblock) = NULL;
2339
208M
        return;
2340
208M
    }
2341
2342
    /* Pool is full, unlink from used pools. */
2343
206M
    poolp next;
2344
206M
    next = pool->nextpool;
2345
206M
    pool = pool->prevpool;
2346
206M
    next->prevpool = pool;
2347
206M
    pool->nextpool = next;
2348
206M
}
2349
2350
/* called when pymalloc_alloc can not allocate a block from usedpool.
2351
 * This function takes new pool and allocate a block from it.
2352
 */
2353
static void*
2354
allocate_from_new_pool(OMState *state, uint size)
2355
2.58M
{
2356
    /* There isn't a pool of the right size class immediately
2357
     * available:  use a free pool.
2358
     */
2359
2.58M
    if (UNLIKELY(usable_arenas == NULL)) {
2360
        /* No arena has a free pool:  allocate a new arena. */
2361
#ifdef WITH_MEMORY_LIMITS
2362
        if (narenas_currently_allocated >= MAX_ARENAS) {
2363
            return NULL;
2364
        }
2365
#endif
2366
5.62k
        usable_arenas = new_arena(state);
2367
5.62k
        if (usable_arenas == NULL) {
2368
0
            return NULL;
2369
0
        }
2370
5.62k
        usable_arenas->nextarena = usable_arenas->prevarena = NULL;
2371
5.62k
        assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
2372
5.62k
        nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
2373
5.62k
    }
2374
2.58M
    assert(usable_arenas->address != 0);
2375
2376
    /* This arena already had the smallest nfreepools value, so decreasing
2377
     * nfreepools doesn't change that, and we don't need to rearrange the
2378
     * usable_arenas list.  However, if the arena becomes wholly allocated,
2379
     * we need to remove its arena_object from usable_arenas.
2380
     */
2381
2.58M
    assert(usable_arenas->nfreepools > 0);
2382
2.58M
    if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
2383
        /* It's the last of this size, so there won't be any. */
2384
2.57M
        nfp2lasta[usable_arenas->nfreepools] = NULL;
2385
2.57M
    }
2386
    /* If any free pools will remain, it will be the new smallest. */
2387
2.58M
    if (usable_arenas->nfreepools > 1) {
2388
2.50M
        assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
2389
2.50M
        nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
2390
2.50M
    }
2391
2392
    /* Try to get a cached free pool. */
2393
2.58M
    poolp pool = usable_arenas->freepools;
2394
2.58M
    if (LIKELY(pool != NULL)) {
2395
        /* Unlink from cached pools. */
2396
2.23M
        usable_arenas->freepools = pool->nextpool;
2397
2.23M
        usable_arenas->nfreepools--;
2398
2.23M
        if (UNLIKELY(usable_arenas->nfreepools == 0)) {
2399
            /* Wholly allocated:  remove. */
2400
75.7k
            assert(usable_arenas->freepools == NULL);
2401
75.7k
            assert(usable_arenas->nextarena == NULL ||
2402
75.7k
                   usable_arenas->nextarena->prevarena ==
2403
75.7k
                   usable_arenas);
2404
75.7k
            usable_arenas = usable_arenas->nextarena;
2405
75.7k
            if (usable_arenas != NULL) {
2406
70.7k
                usable_arenas->prevarena = NULL;
2407
70.7k
                assert(usable_arenas->address != 0);
2408
70.7k
            }
2409
75.7k
        }
2410
2.16M
        else {
2411
            /* nfreepools > 0:  it must be that freepools
2412
             * isn't NULL, or that we haven't yet carved
2413
             * off all the arena's pools for the first
2414
             * time.
2415
             */
2416
2.16M
            assert(usable_arenas->freepools != NULL ||
2417
2.16M
                   usable_arenas->pool_address <=
2418
2.16M
                   (pymem_block*)usable_arenas->address +
2419
2.16M
                       ARENA_SIZE - POOL_SIZE);
2420
2.16M
        }
2421
2.23M
    }
2422
349k
    else {
2423
        /* Carve off a new pool. */
2424
349k
        assert(usable_arenas->nfreepools > 0);
2425
349k
        assert(usable_arenas->freepools == NULL);
2426
349k
        pool = (poolp)usable_arenas->pool_address;
2427
349k
        assert((pymem_block*)pool <= (pymem_block*)usable_arenas->address +
2428
349k
                                 ARENA_SIZE - POOL_SIZE);
2429
349k
        pool->arenaindex = (uint)(usable_arenas - allarenas);
2430
349k
        assert(&allarenas[pool->arenaindex] == usable_arenas);
2431
349k
        pool->szidx = DUMMY_SIZE_IDX;
2432
349k
        usable_arenas->pool_address += POOL_SIZE;
2433
349k
        --usable_arenas->nfreepools;
2434
2435
349k
        if (usable_arenas->nfreepools == 0) {
2436
5.43k
            assert(usable_arenas->nextarena == NULL ||
2437
5.43k
                   usable_arenas->nextarena->prevarena ==
2438
5.43k
                   usable_arenas);
2439
            /* Unlink the arena:  it is completely allocated. */
2440
5.43k
            usable_arenas = usable_arenas->nextarena;
2441
5.43k
            if (usable_arenas != NULL) {
2442
356
                usable_arenas->prevarena = NULL;
2443
356
                assert(usable_arenas->address != 0);
2444
356
            }
2445
5.43k
        }
2446
349k
    }
2447
2448
    /* Frontlink to used pools. */
2449
2.58M
    pymem_block *bp;
2450
2.58M
    poolp next = usedpools[size + size]; /* == prev */
2451
2.58M
    pool->nextpool = next;
2452
2.58M
    pool->prevpool = next;
2453
2.58M
    next->nextpool = pool;
2454
2.58M
    next->prevpool = pool;
2455
2.58M
    pool->ref.count = 1;
2456
2.58M
    if (pool->szidx == size) {
2457
        /* Luckily, this pool last contained blocks
2458
         * of the same size class, so its header
2459
         * and free list are already initialized.
2460
         */
2461
1.63M
        bp = pool->freeblock;
2462
1.63M
        assert(bp != NULL);
2463
1.63M
        pool->freeblock = *(pymem_block **)bp;
2464
1.63M
        return bp;
2465
1.63M
    }
2466
    /*
2467
     * Initialize the pool header, set up the free list to
2468
     * contain just the second block, and return the first
2469
     * block.
2470
     */
2471
953k
    pool->szidx = size;
2472
953k
    size = INDEX2SIZE(size);
2473
953k
    bp = (pymem_block *)pool + POOL_OVERHEAD;
2474
953k
    pool->nextoffset = POOL_OVERHEAD + (size << 1);
2475
953k
    pool->maxnextoffset = POOL_SIZE - size;
2476
953k
    pool->freeblock = bp + size;
2477
953k
    *(pymem_block **)(pool->freeblock) = NULL;
2478
953k
    return bp;
2479
2.58M
}
2480
2481
/* pymalloc allocator
2482
2483
   Return a pointer to newly allocated memory if pymalloc allocated memory.
2484
2485
   Return NULL if pymalloc failed to allocate the memory block: on bigger
2486
   requests, on error in the code below (as a last chance to serve the request)
2487
   or when the max memory limit has been reached.
2488
*/
2489
static inline void*
2490
pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes)
2491
1.99G
{
2492
#ifdef WITH_VALGRIND
2493
    if (UNLIKELY(running_on_valgrind == -1)) {
2494
        running_on_valgrind = RUNNING_ON_VALGRIND;
2495
    }
2496
    if (UNLIKELY(running_on_valgrind)) {
2497
        return NULL;
2498
    }
2499
#endif
2500
2501
1.99G
    if (UNLIKELY(nbytes == 0)) {
2502
38.9M
        return NULL;
2503
38.9M
    }
2504
1.95G
    if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
2505
192M
        return NULL;
2506
192M
    }
2507
2508
1.76G
    uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
2509
1.76G
    poolp pool = usedpools[size + size];
2510
1.76G
    pymem_block *bp;
2511
2512
1.76G
    if (LIKELY(pool != pool->nextpool)) {
2513
        /*
2514
         * There is a used pool for this size class.
2515
         * Pick up the head block of its free list.
2516
         */
2517
1.76G
        ++pool->ref.count;
2518
1.76G
        bp = pool->freeblock;
2519
1.76G
        assert(bp != NULL);
2520
2521
1.76G
        if (UNLIKELY((pool->freeblock = *(pymem_block **)bp) == NULL)) {
2522
            // Reached the end of the free list, try to extend it.
2523
415M
            pymalloc_pool_extend(pool, size);
2524
415M
        }
2525
1.76G
    }
2526
2.58M
    else {
2527
        /* There isn't a pool of the right size class immediately
2528
         * available:  use a free pool.
2529
         */
2530
2.58M
        bp = allocate_from_new_pool(state, size);
2531
2.58M
    }
2532
2533
1.76G
    return (void *)bp;
2534
1.95G
}
2535
2536
2537
void *
2538
_PyObject_Malloc(void *ctx, size_t nbytes)
2539
1.95G
{
2540
1.95G
    OMState *state = get_state();
2541
1.95G
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2542
1.95G
    if (LIKELY(ptr != NULL)) {
2543
1.72G
        return ptr;
2544
1.72G
    }
2545
2546
231M
    ptr = PyMem_RawMalloc(nbytes);
2547
231M
    if (ptr != NULL) {
2548
231M
        raw_allocated_blocks++;
2549
231M
    }
2550
231M
    return ptr;
2551
1.95G
}
2552
2553
2554
void *
2555
_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
2556
42.4M
{
2557
42.4M
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2558
42.4M
    size_t nbytes = nelem * elsize;
2559
2560
42.4M
    OMState *state = get_state();
2561
42.4M
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2562
42.4M
    if (LIKELY(ptr != NULL)) {
2563
42.3M
        memset(ptr, 0, nbytes);
2564
42.3M
        return ptr;
2565
42.3M
    }
2566
2567
150k
    ptr = PyMem_RawCalloc(nelem, elsize);
2568
150k
    if (ptr != NULL) {
2569
150k
        raw_allocated_blocks++;
2570
150k
    }
2571
150k
    return ptr;
2572
42.4M
}
2573
2574
2575
static void
2576
insert_to_usedpool(OMState *state, poolp pool)
2577
206M
{
2578
206M
    assert(pool->ref.count > 0);            /* else the pool is empty */
2579
2580
206M
    uint size = pool->szidx;
2581
206M
    poolp next = usedpools[size + size];
2582
206M
    poolp prev = next->prevpool;
2583
2584
    /* insert pool before next:   prev <-> pool <-> next */
2585
206M
    pool->nextpool = next;
2586
206M
    pool->prevpool = prev;
2587
206M
    next->prevpool = pool;
2588
206M
    prev->nextpool = pool;
2589
206M
}
2590
2591
static void
2592
insert_to_freepool(OMState *state, poolp pool)
2593
2.57M
{
2594
2.57M
    poolp next = pool->nextpool;
2595
2.57M
    poolp prev = pool->prevpool;
2596
2.57M
    next->prevpool = prev;
2597
2.57M
    prev->nextpool = next;
2598
2599
    /* Link the pool to freepools.  This is a singly-linked
2600
     * list, and pool->prevpool isn't used there.
2601
     */
2602
2.57M
    struct arena_object *ao = &allarenas[pool->arenaindex];
2603
2.57M
    pool->nextpool = ao->freepools;
2604
2.57M
    ao->freepools = pool;
2605
2.57M
    uint nf = ao->nfreepools;
2606
    /* If this is the rightmost arena with this number of free pools,
2607
     * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
2608
     * are no arenas in usable_arenas with that value.
2609
     */
2610
2.57M
    struct arena_object* lastnf = nfp2lasta[nf];
2611
2.57M
    assert((nf == 0 && lastnf == NULL) ||
2612
2.57M
           (nf > 0 &&
2613
2.57M
            lastnf != NULL &&
2614
2.57M
            lastnf->nfreepools == nf &&
2615
2.57M
            (lastnf->nextarena == NULL ||
2616
2.57M
             nf < lastnf->nextarena->nfreepools)));
2617
2.57M
    if (lastnf == ao) {  /* it is the rightmost */
2618
2.41M
        struct arena_object* p = ao->prevarena;
2619
2.41M
        nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2620
2.41M
    }
2621
2.57M
    ao->nfreepools = ++nf;
2622
2623
    /* All the rest is arena management.  We just freed
2624
     * a pool, and there are 4 cases for arena mgmt:
2625
     * 1. If all the pools are free, return the arena to
2626
     *    the system free().  Except if this is the last
2627
     *    arena in the list, keep it to avoid thrashing:
2628
     *    keeping one wholly free arena in the list avoids
2629
     *    pathological cases where a simple loop would
2630
     *    otherwise provoke needing to allocate and free an
2631
     *    arena on every iteration.  See bpo-37257.
2632
     * 2. If this is the only free pool in the arena,
2633
     *    add the arena back to the `usable_arenas` list.
2634
     * 3. If the "next" arena has a smaller count of free
2635
     *    pools, we have to "slide this arena right" to
2636
     *    restore that usable_arenas is sorted in order of
2637
     *    nfreepools.
2638
     * 4. Else there's nothing more to do.
2639
     */
2640
2.57M
    if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2641
        /* Case 1.  First unlink ao from usable_arenas.
2642
         */
2643
5.17k
        assert(ao->prevarena == NULL ||
2644
5.17k
               ao->prevarena->address != 0);
2645
5.17k
        assert(ao ->nextarena == NULL ||
2646
5.17k
               ao->nextarena->address != 0);
2647
2648
        /* Fix the pointer in the prevarena, or the
2649
         * usable_arenas pointer.
2650
         */
2651
5.17k
        if (ao->prevarena == NULL) {
2652
727
            usable_arenas = ao->nextarena;
2653
727
            assert(usable_arenas == NULL ||
2654
727
                   usable_arenas->address != 0);
2655
727
        }
2656
4.44k
        else {
2657
4.44k
            assert(ao->prevarena->nextarena == ao);
2658
4.44k
            ao->prevarena->nextarena =
2659
4.44k
                ao->nextarena;
2660
4.44k
        }
2661
        /* Fix the pointer in the nextarena. */
2662
5.17k
        if (ao->nextarena != NULL) {
2663
5.17k
            assert(ao->nextarena->prevarena == ao);
2664
5.17k
            ao->nextarena->prevarena =
2665
5.17k
                ao->prevarena;
2666
5.17k
        }
2667
        /* Record that this arena_object slot is
2668
         * available to be reused.
2669
         */
2670
5.17k
        ao->nextarena = unused_arena_objects;
2671
5.17k
        unused_arena_objects = ao;
2672
2673
5.17k
#if WITH_PYMALLOC_RADIX_TREE
2674
        /* mark arena region as not under control of obmalloc */
2675
5.17k
        arena_map_mark_used(state, ao->address, 0);
2676
5.17k
#endif
2677
2678
        /* Free the entire arena. */
2679
5.17k
        _PyObject_Arena.free(_PyObject_Arena.ctx,
2680
5.17k
                             (void *)ao->address, ARENA_SIZE);
2681
5.17k
        ao->address = 0;                        /* mark unassociated */
2682
5.17k
        --narenas_currently_allocated;
2683
2684
5.17k
        return;
2685
5.17k
    }
2686
2687
2.56M
    if (nf == 1) {
2688
        /* Case 2.  Put ao at the head of
2689
         * usable_arenas.  Note that because
2690
         * ao->nfreepools was 0 before, ao isn't
2691
         * currently on the usable_arenas list.
2692
         */
2693
81.0k
        ao->nextarena = usable_arenas;
2694
81.0k
        ao->prevarena = NULL;
2695
81.0k
        if (usable_arenas)
2696
76.5k
            usable_arenas->prevarena = ao;
2697
81.0k
        usable_arenas = ao;
2698
81.0k
        assert(usable_arenas->address != 0);
2699
81.0k
        if (nfp2lasta[1] == NULL) {
2700
77.2k
            nfp2lasta[1] = ao;
2701
77.2k
        }
2702
2703
81.0k
        return;
2704
81.0k
    }
2705
2706
    /* If this arena is now out of order, we need to keep
2707
     * the list sorted.  The list is kept sorted so that
2708
     * the "most full" arenas are used first, which allows
2709
     * the nearly empty arenas to be completely freed.  In
2710
     * a few un-scientific tests, it seems like this
2711
     * approach allowed a lot more memory to be freed.
2712
     */
2713
    /* If this is the only arena with nf, record that. */
2714
2.48M
    if (nfp2lasta[nf] == NULL) {
2715
2.39M
        nfp2lasta[nf] = ao;
2716
2.39M
    } /* else the rightmost with nf doesn't change */
2717
    /* If this was the rightmost of the old size, it remains in place. */
2718
2.48M
    if (ao == lastnf) {
2719
        /* Case 4.  Nothing to do. */
2720
2.41M
        return;
2721
2.41M
    }
2722
    /* If ao were the only arena in the list, the last block would have
2723
     * gotten us out.
2724
     */
2725
2.48M
    assert(ao->nextarena != NULL);
2726
2727
    /* Case 3:  We have to move the arena towards the end of the list,
2728
     * because it has more free pools than the arena to its right.  It needs
2729
     * to move to follow lastnf.
2730
     * First unlink ao from usable_arenas.
2731
     */
2732
70.4k
    if (ao->prevarena != NULL) {
2733
        /* ao isn't at the head of the list */
2734
55.2k
        assert(ao->prevarena->nextarena == ao);
2735
55.2k
        ao->prevarena->nextarena = ao->nextarena;
2736
55.2k
    }
2737
15.1k
    else {
2738
        /* ao is at the head of the list */
2739
15.1k
        assert(usable_arenas == ao);
2740
15.1k
        usable_arenas = ao->nextarena;
2741
15.1k
    }
2742
70.4k
    ao->nextarena->prevarena = ao->prevarena;
2743
    /* And insert after lastnf. */
2744
70.4k
    ao->prevarena = lastnf;
2745
70.4k
    ao->nextarena = lastnf->nextarena;
2746
70.4k
    if (ao->nextarena != NULL) {
2747
68.7k
        ao->nextarena->prevarena = ao;
2748
68.7k
    }
2749
70.4k
    lastnf->nextarena = ao;
2750
    /* Verify that the swaps worked. */
2751
70.4k
    assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2752
70.4k
    assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2753
70.4k
    assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2754
70.4k
    assert((usable_arenas == ao && ao->prevarena == NULL)
2755
70.4k
           || ao->prevarena->nextarena == ao);
2756
70.4k
}
2757
2758
/* Free a memory block allocated by pymalloc_alloc().
2759
   Return 1 if it was freed.
2760
   Return 0 if the block was not allocated by pymalloc_alloc(). */
2761
static inline int
2762
pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p)
2763
1.99G
{
2764
1.99G
    assert(p != NULL);
2765
2766
#ifdef WITH_VALGRIND
2767
    if (UNLIKELY(running_on_valgrind > 0)) {
2768
        return 0;
2769
    }
2770
#endif
2771
2772
1.99G
    poolp pool = POOL_ADDR(p);
2773
1.99G
    if (UNLIKELY(!address_in_range(state, p, pool))) {
2774
231M
        return 0;
2775
231M
    }
2776
    /* We allocated this address. */
2777
2778
    /* Link p to the start of the pool's freeblock list.  Since
2779
     * the pool had at least the p block outstanding, the pool
2780
     * wasn't empty (so it's already in a usedpools[] list, or
2781
     * was full and is in no list -- it's not in the freeblocks
2782
     * list in any case).
2783
     */
2784
1.99G
    assert(pool->ref.count > 0);            /* else it was empty */
2785
1.76G
    pymem_block *lastfree = pool->freeblock;
2786
1.76G
    *(pymem_block **)p = lastfree;
2787
1.76G
    pool->freeblock = (pymem_block *)p;
2788
1.76G
    pool->ref.count--;
2789
2790
1.76G
    if (UNLIKELY(lastfree == NULL)) {
2791
        /* Pool was full, so doesn't currently live in any list:
2792
         * link it to the front of the appropriate usedpools[] list.
2793
         * This mimics LRU pool usage for new allocations and
2794
         * targets optimal filling when several pools contain
2795
         * blocks of the same size class.
2796
         */
2797
206M
        insert_to_usedpool(state, pool);
2798
206M
        return 1;
2799
206M
    }
2800
2801
    /* freeblock wasn't NULL, so the pool wasn't full,
2802
     * and the pool is in a usedpools[] list.
2803
     */
2804
1.55G
    if (LIKELY(pool->ref.count != 0)) {
2805
        /* pool isn't empty:  leave it in usedpools */
2806
1.55G
        return 1;
2807
1.55G
    }
2808
2809
    /* Pool is now empty:  unlink from usedpools, and
2810
     * link to the front of freepools.  This ensures that
2811
     * previously freed pools will be allocated later
2812
     * (being not referenced, they are perhaps paged out).
2813
     */
2814
2.57M
    insert_to_freepool(state, pool);
2815
2.57M
    return 1;
2816
1.55G
}
2817
2818
2819
void
2820
_PyObject_Free(void *ctx, void *p)
2821
1.99G
{
2822
    /* PyObject_Free(NULL) has no effect */
2823
1.99G
    if (p == NULL) {
2824
5.30M
        return;
2825
5.30M
    }
2826
2827
1.99G
    OMState *state = get_state();
2828
1.99G
    if (UNLIKELY(!pymalloc_free(state, ctx, p))) {
2829
        /* pymalloc didn't allocate this address */
2830
231M
        PyMem_RawFree(p);
2831
231M
        raw_allocated_blocks--;
2832
231M
    }
2833
1.99G
}
2834
2835
2836
/* pymalloc realloc.
2837
2838
   If nbytes==0, then as the Python docs promise, we do not treat this like
2839
   free(p), and return a non-NULL result.
2840
2841
   Return 1 if pymalloc reallocated memory and wrote the new pointer into
2842
   newptr_p.
2843
2844
   Return 0 if pymalloc didn't allocated p. */
2845
static int
2846
pymalloc_realloc(OMState *state, void *ctx,
2847
                 void **newptr_p, void *p, size_t nbytes)
2848
99.6M
{
2849
99.6M
    void *bp;
2850
99.6M
    poolp pool;
2851
99.6M
    size_t size;
2852
2853
99.6M
    assert(p != NULL);
2854
2855
#ifdef WITH_VALGRIND
2856
    /* Treat running_on_valgrind == -1 the same as 0 */
2857
    if (UNLIKELY(running_on_valgrind > 0)) {
2858
        return 0;
2859
    }
2860
#endif
2861
2862
99.6M
    pool = POOL_ADDR(p);
2863
99.6M
    if (!address_in_range(state, p, pool)) {
2864
        /* pymalloc is not managing this block.
2865
2866
           If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2867
           over this block.  However, if we do, we need to copy the valid data
2868
           from the C-managed block to one of our blocks, and there's no
2869
           portable way to know how much of the memory space starting at p is
2870
           valid.
2871
2872
           As bug 1185883 pointed out the hard way, it's possible that the
2873
           C-managed block is "at the end" of allocated VM space, so that a
2874
           memory fault can occur if we try to copy nbytes bytes starting at p.
2875
           Instead we punt: let C continue to manage this block. */
2876
8.97M
        return 0;
2877
8.97M
    }
2878
2879
    /* pymalloc is in charge of this block */
2880
90.6M
    size = INDEX2SIZE(pool->szidx);
2881
90.6M
    if (nbytes <= size) {
2882
        /* The block is staying the same or shrinking.
2883
2884
           If it's shrinking, there's a tradeoff: it costs cycles to copy the
2885
           block to a smaller size class, but it wastes memory not to copy it.
2886
2887
           The compromise here is to copy on shrink only if at least 25% of
2888
           size can be shaved off. */
2889
65.1M
        if (4 * nbytes > 3 * size) {
2890
            /* It's the same, or shrinking and new/old > 3/4. */
2891
31.4M
            *newptr_p = p;
2892
31.4M
            return 1;
2893
31.4M
        }
2894
33.7M
        size = nbytes;
2895
33.7M
    }
2896
2897
59.2M
    bp = _PyObject_Malloc(ctx, nbytes);
2898
59.2M
    if (bp != NULL) {
2899
59.2M
        memcpy(bp, p, size);
2900
59.2M
        _PyObject_Free(ctx, p);
2901
59.2M
    }
2902
59.2M
    *newptr_p = bp;
2903
59.2M
    return 1;
2904
90.6M
}
2905
2906
2907
void *
2908
_PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2909
322M
{
2910
322M
    void *ptr2;
2911
2912
322M
    if (ptr == NULL) {
2913
223M
        return _PyObject_Malloc(ctx, nbytes);
2914
223M
    }
2915
2916
99.6M
    OMState *state = get_state();
2917
99.6M
    if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) {
2918
90.6M
        return ptr2;
2919
90.6M
    }
2920
2921
8.97M
    return PyMem_RawRealloc(ptr, nbytes);
2922
99.6M
}
2923
2924
#else   /* ! WITH_PYMALLOC */
2925
2926
/*==========================================================================*/
2927
/* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2928
 * only be used by extensions that are compiled with pymalloc enabled. */
2929
2930
Py_ssize_t
2931
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2932
{
2933
    return 0;
2934
}
2935
2936
Py_ssize_t
2937
_Py_GetGlobalAllocatedBlocks(void)
2938
{
2939
    return 0;
2940
}
2941
2942
void
2943
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2944
{
2945
    return;
2946
}
2947
2948
void
2949
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime))
2950
{
2951
    return;
2952
}
2953
2954
#endif /* WITH_PYMALLOC */
2955
2956
2957
/*==========================================================================*/
2958
/* A x-platform debugging allocator.  This doesn't manage memory directly,
2959
 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2960
 */
2961
2962
/* Uncomment this define to add the "serialno" field */
2963
/* #define PYMEM_DEBUG_SERIALNO */
2964
2965
#ifdef PYMEM_DEBUG_SERIALNO
2966
static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2967
2968
/* serialno is always incremented via calling this routine.  The point is
2969
 * to supply a single place to set a breakpoint.
2970
 */
2971
static void
2972
bumpserialno(void)
2973
{
2974
    ++serialno;
2975
}
2976
#endif
2977
2978
0
#define SST SIZEOF_SIZE_T
2979
2980
#ifdef PYMEM_DEBUG_SERIALNO
2981
#  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2982
#else
2983
0
#  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2984
#endif
2985
2986
/* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2987
static size_t
2988
read_size_t(const void *p)
2989
0
{
2990
0
    const uint8_t *q = (const uint8_t *)p;
2991
0
    size_t result = *q++;
2992
0
    int i;
2993
2994
0
    for (i = SST; --i > 0; ++q)
2995
0
        result = (result << 8) | *q;
2996
0
    return result;
2997
0
}
2998
2999
/* Write n as a big-endian size_t, MSB at address p, LSB at
3000
 * p + sizeof(size_t) - 1.
3001
 */
3002
static void
3003
write_size_t(void *p, size_t n)
3004
0
{
3005
0
    uint8_t *q = (uint8_t *)p + SST - 1;
3006
0
    int i;
3007
3008
0
    for (i = SST; --i >= 0; --q) {
3009
0
        *q = (uint8_t)(n & 0xff);
3010
0
        n >>= 8;
3011
0
    }
3012
0
}
3013
3014
static void
3015
fill_mem_debug(debug_alloc_api_t *api, void *data, int c, size_t nbytes,
3016
               bool is_alloc)
3017
0
{
3018
#ifdef Py_GIL_DISABLED
3019
    if (api->api_id == 'o') {
3020
        // Don't overwrite the first few bytes of a PyObject allocation in the
3021
        // free-threaded build
3022
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
3023
        size_t debug_offset;
3024
        if (is_alloc) {
3025
            debug_offset = tstate->mimalloc.current_object_heap->debug_offset;
3026
        }
3027
        else {
3028
            char *alloc = (char *)data - 2*SST;  // start of the allocation
3029
            debug_offset = _mi_ptr_page(alloc)->debug_offset;
3030
        }
3031
        debug_offset -= 2*SST;  // account for pymalloc extra bytes
3032
        if (debug_offset < nbytes) {
3033
            memset((char *)data + debug_offset, c, nbytes - debug_offset);
3034
        }
3035
        return;
3036
    }
3037
#endif
3038
0
    memset(data, c, nbytes);
3039
0
}
3040
3041
/* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
3042
   fills them with useful stuff, here calling the underlying malloc's result p:
3043
3044
p[0: S]
3045
    Number of bytes originally asked for.  This is a size_t, big-endian (easier
3046
    to read in a memory dump).
3047
p[S]
3048
    API ID.  See PEP 445.  This is a character, but seems undocumented.
3049
p[S+1: 2*S]
3050
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
3051
p[2*S: 2*S+n]
3052
    The requested memory, filled with copies of PYMEM_CLEANBYTE.
3053
    Used to catch reference to uninitialized memory.
3054
    &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
3055
    handled the request itself.
3056
p[2*S+n: 2*S+n+S]
3057
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
3058
p[2*S+n+S: 2*S+n+2*S]
3059
    A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
3060
    and _PyMem_DebugRealloc.
3061
    This is a big-endian size_t.
3062
    If "bad memory" is detected later, the serial number gives an
3063
    excellent way to set a breakpoint on the next run, to capture the
3064
    instant at which this block was passed out.
3065
3066
If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
3067
for 3 * S extra bytes, and omits the last serialno field.
3068
*/
3069
3070
static void *
3071
_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
3072
0
{
3073
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3074
0
    uint8_t *p;           /* base address of malloc'ed pad block */
3075
0
    uint8_t *data;        /* p + 2*SST == pointer to data bytes */
3076
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
3077
0
    size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
3078
3079
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
3080
        /* integer overflow: can't represent total as a Py_ssize_t */
3081
0
        return NULL;
3082
0
    }
3083
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
3084
3085
    /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
3086
                ^--- p    ^--- data   ^--- tail
3087
       S: nbytes stored as size_t
3088
       I: API identifier (1 byte)
3089
       F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
3090
       C: Clean bytes used later to store actual data
3091
       N: Serial number stored as size_t
3092
3093
       If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
3094
       is omitted. */
3095
3096
0
    if (use_calloc) {
3097
0
        p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
3098
0
    }
3099
0
    else {
3100
0
        p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
3101
0
    }
3102
0
    if (p == NULL) {
3103
0
        return NULL;
3104
0
    }
3105
0
    data = p + 2*SST;
3106
3107
#ifdef PYMEM_DEBUG_SERIALNO
3108
    bumpserialno();
3109
#endif
3110
3111
    /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
3112
0
    write_size_t(p, nbytes);
3113
0
    p[SST] = (uint8_t)api->api_id;
3114
0
    memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3115
3116
0
    if (nbytes > 0 && !use_calloc) {
3117
0
        fill_mem_debug(api, data, PYMEM_CLEANBYTE, nbytes, true);
3118
0
    }
3119
3120
    /* at tail, write pad (SST bytes) and serialno (SST bytes) */
3121
0
    tail = data + nbytes;
3122
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3123
#ifdef PYMEM_DEBUG_SERIALNO
3124
    write_size_t(tail + SST, serialno);
3125
#endif
3126
3127
0
    return data;
3128
0
}
3129
3130
void *
3131
_PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
3132
0
{
3133
0
    return _PyMem_DebugRawAlloc(0, ctx, nbytes);
3134
0
}
3135
3136
void *
3137
_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
3138
0
{
3139
0
    size_t nbytes;
3140
0
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
3141
0
    nbytes = nelem * elsize;
3142
0
    return _PyMem_DebugRawAlloc(1, ctx, nbytes);
3143
0
}
3144
3145
3146
/* The debug free first checks the 2*SST bytes on each end for sanity (in
3147
   particular, that the FORBIDDENBYTEs with the api ID are still intact).
3148
   Then fills the original bytes with PYMEM_DEADBYTE.
3149
   Then calls the underlying free.
3150
*/
3151
void
3152
_PyMem_DebugRawFree(void *ctx, void *p)
3153
0
{
3154
    /* PyMem_Free(NULL) has no effect */
3155
0
    if (p == NULL) {
3156
0
        return;
3157
0
    }
3158
3159
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3160
0
    uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
3161
0
    size_t nbytes;
3162
3163
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
3164
0
    nbytes = read_size_t(q);
3165
0
    nbytes += PYMEM_DEBUG_EXTRA_BYTES - 2*SST;
3166
0
    memset(q, PYMEM_DEADBYTE, 2*SST);
3167
0
    fill_mem_debug(api, p, PYMEM_DEADBYTE, nbytes, false);
3168
0
    api->alloc.free(api->alloc.ctx, q);
3169
0
}
3170
3171
3172
void *
3173
_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
3174
0
{
3175
0
    if (p == NULL) {
3176
0
        return _PyMem_DebugRawAlloc(0, ctx, nbytes);
3177
0
    }
3178
3179
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3180
0
    uint8_t *head;        /* base address of malloc'ed pad block */
3181
0
    uint8_t *data;        /* pointer to data bytes */
3182
0
    uint8_t *r;
3183
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
3184
0
    size_t total;         /* 2 * SST + nbytes + 2 * SST */
3185
0
    size_t original_nbytes;
3186
0
#define ERASED_SIZE 64
3187
3188
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
3189
3190
0
    data = (uint8_t *)p;
3191
0
    head = data - 2*SST;
3192
0
    original_nbytes = read_size_t(head);
3193
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
3194
        /* integer overflow: can't represent total as a Py_ssize_t */
3195
0
        return NULL;
3196
0
    }
3197
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
3198
3199
0
    tail = data + original_nbytes;
3200
#ifdef PYMEM_DEBUG_SERIALNO
3201
    size_t block_serialno = read_size_t(tail + SST);
3202
#endif
3203
0
#ifndef Py_GIL_DISABLED
3204
    /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
3205
       ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
3206
     */
3207
0
    uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
3208
0
    if (original_nbytes <= sizeof(save)) {
3209
0
        memcpy(save, data, original_nbytes);
3210
0
        memset(data - 2 * SST, PYMEM_DEADBYTE,
3211
0
               original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
3212
0
    }
3213
0
    else {
3214
0
        memcpy(save, data, ERASED_SIZE);
3215
0
        memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
3216
0
        memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
3217
0
        memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
3218
0
               ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
3219
0
    }
3220
0
#endif
3221
3222
    /* Resize and add decorations. */
3223
0
    r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
3224
0
    if (r == NULL) {
3225
        /* if realloc() failed: rewrite header and footer which have
3226
           just been erased */
3227
0
        nbytes = original_nbytes;
3228
0
    }
3229
0
    else {
3230
0
        head = r;
3231
#ifdef PYMEM_DEBUG_SERIALNO
3232
        bumpserialno();
3233
        block_serialno = serialno;
3234
#endif
3235
0
    }
3236
0
    data = head + 2*SST;
3237
3238
0
    write_size_t(head, nbytes);
3239
0
    head[SST] = (uint8_t)api->api_id;
3240
0
    memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3241
3242
0
    tail = data + nbytes;
3243
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3244
#ifdef PYMEM_DEBUG_SERIALNO
3245
    write_size_t(tail + SST, block_serialno);
3246
#endif
3247
3248
0
#ifndef Py_GIL_DISABLED
3249
    /* Restore saved bytes. */
3250
0
    if (original_nbytes <= sizeof(save)) {
3251
0
        memcpy(data, save, Py_MIN(nbytes, original_nbytes));
3252
0
    }
3253
0
    else {
3254
0
        size_t i = original_nbytes - ERASED_SIZE;
3255
0
        memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
3256
0
        if (nbytes > i) {
3257
0
            memcpy(data + i, &save[ERASED_SIZE],
3258
0
                   Py_MIN(nbytes - i, ERASED_SIZE));
3259
0
        }
3260
0
    }
3261
0
#endif
3262
3263
0
    if (r == NULL) {
3264
0
        return NULL;
3265
0
    }
3266
3267
0
    if (nbytes > original_nbytes) {
3268
        /* growing: mark new extra memory clean */
3269
0
        memset(data + original_nbytes, PYMEM_CLEANBYTE,
3270
0
               nbytes - original_nbytes);
3271
0
    }
3272
3273
0
    return data;
3274
0
}
3275
3276
static inline void
3277
_PyMem_DebugCheckGIL(const char *func)
3278
0
{
3279
0
    PyThreadState *tstate = _PyThreadState_GET();
3280
0
    if (tstate == NULL) {
3281
0
#ifndef Py_GIL_DISABLED
3282
0
        _Py_FatalErrorFunc(func,
3283
0
                           "Python memory allocator called "
3284
0
                           "without holding the GIL");
3285
#else
3286
        _Py_FatalErrorFunc(func,
3287
                           "Python memory allocator called "
3288
                           "without an active thread state. "
3289
                           "Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?");
3290
#endif
3291
0
    }
3292
0
}
3293
3294
void *
3295
_PyMem_DebugMalloc(void *ctx, size_t nbytes)
3296
0
{
3297
0
    _PyMem_DebugCheckGIL(__func__);
3298
0
    return _PyMem_DebugRawMalloc(ctx, nbytes);
3299
0
}
3300
3301
void *
3302
_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
3303
0
{
3304
0
    _PyMem_DebugCheckGIL(__func__);
3305
0
    return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
3306
0
}
3307
3308
3309
void
3310
_PyMem_DebugFree(void *ctx, void *ptr)
3311
0
{
3312
0
    _PyMem_DebugCheckGIL(__func__);
3313
0
    _PyMem_DebugRawFree(ctx, ptr);
3314
0
}
3315
3316
3317
void *
3318
_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
3319
0
{
3320
0
    _PyMem_DebugCheckGIL(__func__);
3321
0
    return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
3322
0
}
3323
3324
/* Check the forbidden bytes on both ends of the memory allocated for p.
3325
 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
3326
 * and call Py_FatalError to kill the program.
3327
 * The API id, is also checked.
3328
 */
3329
static void
3330
_PyMem_DebugCheckAddress(const char *func, char api, const void *p)
3331
0
{
3332
0
    assert(p != NULL);
3333
3334
0
    const uint8_t *q = (const uint8_t *)p;
3335
0
    size_t nbytes;
3336
0
    const uint8_t *tail;
3337
0
    int i;
3338
0
    char id;
3339
3340
    /* Check the API id */
3341
0
    id = (char)q[-SST];
3342
0
    if (id != api) {
3343
0
        _PyObject_DebugDumpAddress(p);
3344
0
        _Py_FatalErrorFormat(func,
3345
0
                             "bad ID: Allocated using API '%c', "
3346
0
                             "verified using API '%c'",
3347
0
                             id, api);
3348
0
    }
3349
3350
    /* Check the stuff at the start of p first:  if there's underwrite
3351
     * corruption, the number-of-bytes field may be nuts, and checking
3352
     * the tail could lead to a segfault then.
3353
     */
3354
0
    for (i = SST-1; i >= 1; --i) {
3355
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3356
0
            _PyObject_DebugDumpAddress(p);
3357
0
            _Py_FatalErrorFunc(func, "bad leading pad byte");
3358
0
        }
3359
0
    }
3360
3361
0
    nbytes = read_size_t(q - 2*SST);
3362
0
    tail = q + nbytes;
3363
0
    for (i = 0; i < SST; ++i) {
3364
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3365
0
            _PyObject_DebugDumpAddress(p);
3366
0
            _Py_FatalErrorFunc(func, "bad trailing pad byte");
3367
0
        }
3368
0
    }
3369
0
}
3370
3371
/* Display info to stderr about the memory block at p. */
3372
static void
3373
_PyObject_DebugDumpAddress(const void *p)
3374
0
{
3375
0
    const uint8_t *q = (const uint8_t *)p;
3376
0
    const uint8_t *tail;
3377
0
    size_t nbytes;
3378
0
    int i;
3379
0
    int ok;
3380
0
    char id;
3381
3382
0
    fprintf(stderr, "Debug memory block at address p=%p:", p);
3383
0
    if (p == NULL) {
3384
0
        fprintf(stderr, "\n");
3385
0
        return;
3386
0
    }
3387
0
    id = (char)q[-SST];
3388
0
    fprintf(stderr, " API '%c'\n", id);
3389
3390
0
    nbytes = read_size_t(q - 2*SST);
3391
0
    fprintf(stderr, "    %zu bytes originally requested\n", nbytes);
3392
3393
    /* In case this is nuts, check the leading pad bytes first. */
3394
0
    fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
3395
0
    ok = 1;
3396
0
    for (i = 1; i <= SST-1; ++i) {
3397
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3398
0
            ok = 0;
3399
0
            break;
3400
0
        }
3401
0
    }
3402
0
    if (ok)
3403
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3404
0
    else {
3405
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3406
0
            PYMEM_FORBIDDENBYTE);
3407
0
        for (i = SST-1; i >= 1; --i) {
3408
0
            const uint8_t byte = *(q-i);
3409
0
            fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
3410
0
            if (byte != PYMEM_FORBIDDENBYTE)
3411
0
                fputs(" *** OUCH", stderr);
3412
0
            fputc('\n', stderr);
3413
0
        }
3414
3415
0
        fputs("    Because memory is corrupted at the start, the "
3416
0
              "count of bytes requested\n"
3417
0
              "       may be bogus, and checking the trailing pad "
3418
0
              "bytes may segfault.\n", stderr);
3419
0
    }
3420
3421
0
    tail = q + nbytes;
3422
0
    fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
3423
0
    ok = 1;
3424
0
    for (i = 0; i < SST; ++i) {
3425
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3426
0
            ok = 0;
3427
0
            break;
3428
0
        }
3429
0
    }
3430
0
    if (ok)
3431
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3432
0
    else {
3433
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3434
0
                PYMEM_FORBIDDENBYTE);
3435
0
        for (i = 0; i < SST; ++i) {
3436
0
            const uint8_t byte = tail[i];
3437
0
            fprintf(stderr, "        at tail+%d: 0x%02x",
3438
0
                    i, byte);
3439
0
            if (byte != PYMEM_FORBIDDENBYTE)
3440
0
                fputs(" *** OUCH", stderr);
3441
0
            fputc('\n', stderr);
3442
0
        }
3443
0
    }
3444
3445
#ifdef PYMEM_DEBUG_SERIALNO
3446
    size_t serial = read_size_t(tail + SST);
3447
    fprintf(stderr,
3448
            "    The block was made by call #%zu to debug malloc/realloc.\n",
3449
            serial);
3450
#endif
3451
3452
0
    if (nbytes > 0) {
3453
0
        i = 0;
3454
0
        fputs("    Data at p:", stderr);
3455
        /* print up to 8 bytes at the start */
3456
0
        while (q < tail && i < 8) {
3457
0
            fprintf(stderr, " %02x", *q);
3458
0
            ++i;
3459
0
            ++q;
3460
0
        }
3461
        /* and up to 8 at the end */
3462
0
        if (q < tail) {
3463
0
            if (tail - q > 8) {
3464
0
                fputs(" ...", stderr);
3465
0
                q = tail - 8;
3466
0
            }
3467
0
            while (q < tail) {
3468
0
                fprintf(stderr, " %02x", *q);
3469
0
                ++q;
3470
0
            }
3471
0
        }
3472
0
        fputc('\n', stderr);
3473
0
    }
3474
0
    fputc('\n', stderr);
3475
3476
0
    fflush(stderr);
3477
0
    _PyMem_DumpTraceback(fileno(stderr), p);
3478
0
}
3479
3480
3481
static size_t
3482
printone(FILE *out, const char* msg, size_t value)
3483
0
{
3484
0
    int i, k;
3485
0
    char buf[100];
3486
0
    size_t origvalue = value;
3487
3488
0
    fputs(msg, out);
3489
0
    for (i = (int)strlen(msg); i < 35; ++i)
3490
0
        fputc(' ', out);
3491
0
    fputc('=', out);
3492
3493
    /* Write the value with commas. */
3494
0
    i = 22;
3495
0
    buf[i--] = '\0';
3496
0
    buf[i--] = '\n';
3497
0
    k = 3;
3498
0
    do {
3499
0
        size_t nextvalue = value / 10;
3500
0
        unsigned int digit = (unsigned int)(value - nextvalue * 10);
3501
0
        value = nextvalue;
3502
0
        buf[i--] = (char)(digit + '0');
3503
0
        --k;
3504
0
        if (k == 0 && value && i >= 0) {
3505
0
            k = 3;
3506
0
            buf[i--] = ',';
3507
0
        }
3508
0
    } while (value && i >= 0);
3509
3510
0
    while (i >= 0)
3511
0
        buf[i--] = ' ';
3512
0
    fputs(buf, out);
3513
3514
0
    return origvalue;
3515
0
}
3516
3517
void
3518
_PyDebugAllocatorStats(FILE *out,
3519
                       const char *block_name, int num_blocks, size_t sizeof_block)
3520
0
{
3521
0
    char buf1[128];
3522
0
    char buf2[128];
3523
0
    PyOS_snprintf(buf1, sizeof(buf1),
3524
0
                  "%d %ss * %zd bytes each",
3525
0
                  num_blocks, block_name, sizeof_block);
3526
0
    PyOS_snprintf(buf2, sizeof(buf2),
3527
0
                  "%48s ", buf1);
3528
0
    (void)printone(out, buf2, num_blocks * sizeof_block);
3529
0
}
3530
3531
// Return true if the obmalloc state structure is heap allocated,
3532
// by PyMem_RawCalloc().  For the main interpreter, this structure
3533
// allocated in the BSS.  Allocating that way gives some memory savings
3534
// and a small performance win (at least on a demand paged OS).  On
3535
// 64-bit platforms, the obmalloc structure is 256 kB. Most of that
3536
// memory is for the arena_map_top array.  Since normally only one entry
3537
// of that array is used, only one page of resident memory is actually
3538
// used, rather than the full 256 kB.
3539
bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp)
3540
0
{
3541
0
#if WITH_PYMALLOC
3542
0
    return interp->obmalloc && interp->obmalloc != &obmalloc_state_main;
3543
#else
3544
    return false;
3545
#endif
3546
0
}
3547
3548
#ifdef WITH_PYMALLOC
3549
static void
3550
init_obmalloc_pools(PyInterpreterState *interp)
3551
37
{
3552
    // initialize the obmalloc->pools structure.  This must be done
3553
    // before the obmalloc alloc/free functions can be called.
3554
37
    poolp temp[OBMALLOC_USED_POOLS_SIZE] =
3555
37
        _obmalloc_pools_INIT(interp->obmalloc->pools);
3556
37
    memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
3557
37
}
3558
#endif /* WITH_PYMALLOC */
3559
3560
int _PyMem_init_obmalloc(PyInterpreterState *interp)
3561
37
{
3562
37
#ifdef WITH_PYMALLOC
3563
    /* Initialize obmalloc, but only for subinterpreters,
3564
       since the main interpreter is initialized statically. */
3565
37
    if (_Py_IsMainInterpreter(interp)
3566
0
            || _PyInterpreterState_HasFeature(interp,
3567
37
                                              Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
3568
37
        interp->obmalloc = &obmalloc_state_main;
3569
37
        if (!obmalloc_state_initialized) {
3570
37
            init_obmalloc_pools(interp);
3571
37
            obmalloc_state_initialized = true;
3572
37
        }
3573
37
    } else {
3574
0
        interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
3575
0
        if (interp->obmalloc == NULL) {
3576
0
            return -1;
3577
0
        }
3578
0
        init_obmalloc_pools(interp);
3579
0
    }
3580
37
#endif /* WITH_PYMALLOC */
3581
37
    return 0; // success
3582
37
}
3583
3584
3585
#ifdef WITH_PYMALLOC
3586
3587
static void
3588
free_obmalloc_arenas(PyInterpreterState *interp)
3589
0
{
3590
0
    OMState *state = interp->obmalloc;
3591
0
    for (uint i = 0; i < maxarenas; ++i) {
3592
        // free each obmalloc memory arena
3593
0
        struct arena_object *ao = &allarenas[i];
3594
0
        _PyObject_Arena.free(_PyObject_Arena.ctx,
3595
0
                             (void *)ao->address, ARENA_SIZE);
3596
0
    }
3597
    // free the array containing pointers to all arenas
3598
0
    PyMem_RawFree(allarenas);
3599
0
#if WITH_PYMALLOC_RADIX_TREE
3600
0
#ifdef USE_INTERIOR_NODES
3601
    // Free the middle and bottom nodes of the radix tree.  These are allocated
3602
    // by arena_map_mark_used() but not freed when arenas are freed.
3603
0
    for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
3604
0
         arena_map_mid_t *mid = arena_map_root.ptrs[i1];
3605
0
         if (mid == NULL) {
3606
0
             continue;
3607
0
         }
3608
0
         for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
3609
0
            arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
3610
0
            if (bot == NULL) {
3611
0
                continue;
3612
0
            }
3613
0
            PyMem_RawFree(bot);
3614
0
         }
3615
0
         PyMem_RawFree(mid);
3616
0
    }
3617
0
#endif
3618
0
#endif
3619
0
}
3620
3621
#ifdef Py_DEBUG
3622
/* Is target in the list?  The list is traversed via the nextpool pointers.
3623
 * The list may be NULL-terminated, or circular.  Return 1 if target is in
3624
 * list, else 0.
3625
 */
3626
static int
3627
pool_is_in_list(const poolp target, poolp list)
3628
{
3629
    poolp origlist = list;
3630
    assert(target != NULL);
3631
    if (list == NULL)
3632
        return 0;
3633
    do {
3634
        if (target == list)
3635
            return 1;
3636
        list = list->nextpool;
3637
    } while (list != NULL && list != origlist);
3638
    return 0;
3639
}
3640
#endif
3641
3642
#ifdef WITH_MIMALLOC
3643
struct _alloc_stats {
3644
    size_t allocated_blocks;
3645
    size_t allocated_bytes;
3646
    size_t allocated_with_overhead;
3647
    size_t bytes_reserved;
3648
    size_t bytes_committed;
3649
};
3650
3651
static bool _collect_alloc_stats(
3652
    const mi_heap_t* heap, const mi_heap_area_t* area,
3653
    void* block, size_t block_size, void* arg)
3654
0
{
3655
0
    struct _alloc_stats *stats = (struct _alloc_stats *)arg;
3656
0
    stats->allocated_blocks += area->used;
3657
0
    stats->allocated_bytes += area->used * area->block_size;
3658
0
    stats->allocated_with_overhead += area->used * area->full_block_size;
3659
0
    stats->bytes_reserved += area->reserved;
3660
0
    stats->bytes_committed += area->committed;
3661
0
    return 1;
3662
0
}
3663
3664
static void
3665
py_mimalloc_print_stats(FILE *out)
3666
0
{
3667
0
    fprintf(out, "Small block threshold = %zu, in %u size classes.\n",
3668
0
        (size_t)MI_SMALL_OBJ_SIZE_MAX, MI_BIN_HUGE);
3669
0
    fprintf(out, "Medium block threshold = %zu\n",
3670
0
            (size_t)MI_MEDIUM_OBJ_SIZE_MAX);
3671
0
    fprintf(out, "Large object max size = %zu\n",
3672
0
            (size_t)MI_LARGE_OBJ_SIZE_MAX);
3673
3674
0
    mi_heap_t *heap = mi_heap_get_default();
3675
0
    struct _alloc_stats stats;
3676
0
    memset(&stats, 0, sizeof(stats));
3677
0
    mi_heap_visit_blocks(heap, false, &_collect_alloc_stats, &stats);
3678
3679
0
    fprintf(out, "    Allocated Blocks: %zd\n", stats.allocated_blocks);
3680
0
    fprintf(out, "    Allocated Bytes: %zd\n", stats.allocated_bytes);
3681
0
    fprintf(out, "    Allocated Bytes w/ Overhead: %zd\n", stats.allocated_with_overhead);
3682
0
    fprintf(out, "    Bytes Reserved: %zd\n", stats.bytes_reserved);
3683
0
    fprintf(out, "    Bytes Committed: %zd\n", stats.bytes_committed);
3684
0
}
3685
#endif
3686
3687
3688
static void
3689
pymalloc_print_stats(FILE *out)
3690
0
{
3691
0
    OMState *state = get_state();
3692
3693
0
    uint i;
3694
0
    const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
3695
    /* # of pools, allocated blocks, and free blocks per class index */
3696
0
    size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3697
0
    size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3698
0
    size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3699
    /* total # of allocated bytes in used and full pools */
3700
0
    size_t allocated_bytes = 0;
3701
    /* total # of available bytes in used pools */
3702
0
    size_t available_bytes = 0;
3703
    /* # of free pools + pools not yet carved out of current arena */
3704
0
    uint numfreepools = 0;
3705
    /* # of bytes for arena alignment padding */
3706
0
    size_t arena_alignment = 0;
3707
    /* # of bytes in used and full pools used for pool_headers */
3708
0
    size_t pool_header_bytes = 0;
3709
    /* # of bytes in used and full pools wasted due to quantization,
3710
     * i.e. the necessarily leftover space at the ends of used and
3711
     * full pools.
3712
     */
3713
0
    size_t quantization = 0;
3714
    /* # of arenas actually allocated. */
3715
0
    size_t narenas = 0;
3716
    /* running total -- should equal narenas * ARENA_SIZE */
3717
0
    size_t total;
3718
0
    char buf[128];
3719
3720
0
    fprintf(out, "Small block threshold = %d, in %u size classes.\n",
3721
0
            SMALL_REQUEST_THRESHOLD, numclasses);
3722
3723
0
    for (i = 0; i < numclasses; ++i)
3724
0
        numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
3725
3726
    /* Because full pools aren't linked to from anything, it's easiest
3727
     * to march over all the arenas.  If we're lucky, most of the memory
3728
     * will be living in full pools -- would be a shame to miss them.
3729
     */
3730
0
    for (i = 0; i < maxarenas; ++i) {
3731
0
        uintptr_t base = allarenas[i].address;
3732
3733
        /* Skip arenas which are not allocated. */
3734
0
        if (allarenas[i].address == (uintptr_t)NULL)
3735
0
            continue;
3736
0
        narenas += 1;
3737
3738
0
        numfreepools += allarenas[i].nfreepools;
3739
3740
        /* round up to pool alignment */
3741
0
        if (base & (uintptr_t)POOL_SIZE_MASK) {
3742
0
            arena_alignment += POOL_SIZE;
3743
0
            base &= ~(uintptr_t)POOL_SIZE_MASK;
3744
0
            base += POOL_SIZE;
3745
0
        }
3746
3747
        /* visit every pool in the arena */
3748
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
3749
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
3750
0
            poolp p = (poolp)base;
3751
0
            const uint sz = p->szidx;
3752
0
            uint freeblocks;
3753
3754
0
            if (p->ref.count == 0) {
3755
                /* currently unused */
3756
#ifdef Py_DEBUG
3757
                assert(pool_is_in_list(p, allarenas[i].freepools));
3758
#endif
3759
0
                continue;
3760
0
            }
3761
0
            ++numpools[sz];
3762
0
            numblocks[sz] += p->ref.count;
3763
0
            freeblocks = NUMBLOCKS(sz) - p->ref.count;
3764
0
            numfreeblocks[sz] += freeblocks;
3765
#ifdef Py_DEBUG
3766
            if (freeblocks > 0)
3767
                assert(pool_is_in_list(p, usedpools[sz + sz]));
3768
#endif
3769
0
        }
3770
0
    }
3771
0
    assert(narenas == narenas_currently_allocated);
3772
3773
0
    fputc('\n', out);
3774
0
    fputs("class   size   num pools   blocks in use  avail blocks\n"
3775
0
          "-----   ----   ---------   -------------  ------------\n",
3776
0
          out);
3777
3778
0
    for (i = 0; i < numclasses; ++i) {
3779
0
        size_t p = numpools[i];
3780
0
        size_t b = numblocks[i];
3781
0
        size_t f = numfreeblocks[i];
3782
0
        uint size = INDEX2SIZE(i);
3783
0
        if (p == 0) {
3784
0
            assert(b == 0 && f == 0);
3785
0
            continue;
3786
0
        }
3787
0
        fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3788
0
                i, size, p, b, f);
3789
0
        allocated_bytes += b * size;
3790
0
        available_bytes += f * size;
3791
0
        pool_header_bytes += p * POOL_OVERHEAD;
3792
0
        quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3793
0
    }
3794
0
    fputc('\n', out);
3795
#ifdef PYMEM_DEBUG_SERIALNO
3796
    if (_PyMem_DebugEnabled()) {
3797
        (void)printone(out, "# times object malloc called", serialno);
3798
    }
3799
#endif
3800
0
    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3801
0
    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3802
0
    (void)printone(out, "# arenas highwater mark", narenas_highwater);
3803
0
    (void)printone(out, "# arenas allocated current", narenas);
3804
3805
0
    PyOS_snprintf(buf, sizeof(buf),
3806
0
                  "%zu arenas * %d bytes/arena",
3807
0
                  narenas, ARENA_SIZE);
3808
0
    (void)printone(out, buf, narenas * ARENA_SIZE);
3809
3810
0
    fputc('\n', out);
3811
3812
    /* Account for what all of those arena bytes are being used for. */
3813
0
    total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3814
0
    total += printone(out, "# bytes in available blocks", available_bytes);
3815
3816
0
    PyOS_snprintf(buf, sizeof(buf),
3817
0
        "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3818
0
    total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3819
3820
0
    total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3821
0
    total += printone(out, "# bytes lost to quantization", quantization);
3822
0
    total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3823
0
    (void)printone(out, "Total", total);
3824
0
    assert(narenas * ARENA_SIZE == total);
3825
3826
0
#if WITH_PYMALLOC_RADIX_TREE
3827
0
    fputs("\narena map counts\n", out);
3828
0
#ifdef USE_INTERIOR_NODES
3829
0
    (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3830
0
    (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3831
0
    fputc('\n', out);
3832
0
#endif
3833
0
    total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3834
0
#ifdef USE_INTERIOR_NODES
3835
0
    total += printone(out, "# bytes lost to arena map mid",
3836
0
                      sizeof(arena_map_mid_t) * arena_map_mid_count);
3837
0
    total += printone(out, "# bytes lost to arena map bot",
3838
0
                      sizeof(arena_map_bot_t) * arena_map_bot_count);
3839
0
    (void)printone(out, "Total", total);
3840
0
#endif
3841
0
#endif
3842
3843
0
}
3844
3845
/* Print summary info to "out" about the state of pymalloc's structures.
3846
 * In Py_DEBUG mode, also perform some expensive internal consistency
3847
 * checks.
3848
 *
3849
 * Return 0 if the memory debug hooks are not installed or no statistics was
3850
 * written into out, return 1 otherwise.
3851
 */
3852
int
3853
_PyObject_DebugMallocStats(FILE *out)
3854
0
{
3855
0
#ifdef WITH_MIMALLOC
3856
0
    if (_PyMem_MimallocEnabled()) {
3857
0
        py_mimalloc_print_stats(out);
3858
0
        return 1;
3859
0
    }
3860
0
    else
3861
0
#endif
3862
0
    if (_PyMem_PymallocEnabled()) {
3863
0
        pymalloc_print_stats(out);
3864
0
        return 1;
3865
0
    }
3866
0
    else {
3867
0
        return 0;
3868
0
    }
3869
0
}
3870
3871
#endif /* #ifdef WITH_PYMALLOC */